mirror of
https://github.com/metabarcoding/obitools4.git
synced 2026-03-25 13:30:52 +00:00
Refactor k-mer index building to use disk-based KmerSetGroupBuilder
Refactor k-mer index building to use the new disk-based KmerSetGroupBuilder instead of the old KmerSet and FrequencyFilter approaches. This change introduces a more efficient and scalable approach to building k-mer indices by using partitioned disk storage with streaming operations. - Replace BuildKmerIndex and BuildFrequencyFilterIndex with KmerSetGroupBuilder - Add support for frequency filtering via WithMinFrequency option - Remove deprecated k-mer set persistence methods - Update CLI to use new builder approach - Add new disk-based k-mer operations (union, intersect, difference, quorum) - Introduce KDI (K-mer Delta Index) file format for efficient storage - Add K-way merge operations for combining sorted k-mer streams - Update documentation and examples to reflect new API This refactoring provides better memory usage, faster operations on large datasets, and more flexible k-mer set operations.
This commit is contained in:
53
pkg/obikmer/varint.go
Normal file
53
pkg/obikmer/varint.go
Normal file
@@ -0,0 +1,53 @@
|
||||
package obikmer
|
||||
|
||||
import "io"
|
||||
|
||||
// EncodeVarint writes a uint64 value as a variable-length integer to w.
|
||||
// Uses 7 bits per byte with the high bit as a continuation flag
|
||||
// (identical to protobuf unsigned varint encoding).
|
||||
// Returns the number of bytes written.
|
||||
func EncodeVarint(w io.Writer, v uint64) (int, error) {
|
||||
var buf [10]byte // max 10 bytes for uint64 varint
|
||||
n := 0
|
||||
for v >= 0x80 {
|
||||
buf[n] = byte(v) | 0x80
|
||||
v >>= 7
|
||||
n++
|
||||
}
|
||||
buf[n] = byte(v)
|
||||
n++
|
||||
return w.Write(buf[:n])
|
||||
}
|
||||
|
||||
// DecodeVarint reads a variable-length encoded uint64 from r.
|
||||
// Returns the decoded value and any error encountered.
|
||||
func DecodeVarint(r io.Reader) (uint64, error) {
|
||||
var val uint64
|
||||
var shift uint
|
||||
var buf [1]byte
|
||||
|
||||
for {
|
||||
if _, err := io.ReadFull(r, buf[:]); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
b := buf[0]
|
||||
val |= uint64(b&0x7F) << shift
|
||||
if b < 0x80 {
|
||||
return val, nil
|
||||
}
|
||||
shift += 7
|
||||
if shift >= 70 {
|
||||
return 0, io.ErrUnexpectedEOF
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// VarintLen returns the number of bytes needed to encode v as a varint.
|
||||
func VarintLen(v uint64) int {
|
||||
n := 1
|
||||
for v >= 0x80 {
|
||||
v >>= 7
|
||||
n++
|
||||
}
|
||||
return n
|
||||
}
|
||||
Reference in New Issue
Block a user