mirror of
https://github.com/metabarcoding/obitools4.git
synced 2026-03-26 14:00:51 +00:00
Refactor kmer index to disk-based partitioning with minimizer
Refactor kmer index package to use disk-based partitioning with minimizer - Replace roaring64 bitmaps with disk-based kmer index - Implement partitioned kmer sets with delta-varint encoding - Add support for frequency filtering during construction - Introduce new builder pattern for index construction - Add streaming operations for set operations (union, intersect, etc.) - Add support for super-kmer encoding during construction - Update command line tool to use new index format - Remove dependency on roaring bitmap library This change introduces a new architecture for kmer indexing that is more memory efficient and scalable for large datasets.
This commit is contained in:
@@ -145,6 +145,14 @@ func (ksg *KmerSetGroup) AddSequences(sequences *obiseq.BioSequenceSlice, index
|
||||
ksg.sets[index].AddSequences(sequences)
|
||||
}
|
||||
|
||||
// AddSequenceSlice adds all k-mers from a slice of sequences to a specific KmerSet
|
||||
func (ksg *KmerSetGroup) AddSequenceSlice(sequences *obiseq.BioSequenceSlice, index int) {
|
||||
if index < 0 || index >= len(ksg.sets) {
|
||||
panic(fmt.Sprintf("Index out of bounds: %d (size: %d)", index, len(ksg.sets)))
|
||||
}
|
||||
ksg.sets[index].AddSequenceSlice(sequences)
|
||||
}
|
||||
|
||||
// Union returns the union of all KmerSet in the group
|
||||
// Optimization: starts from the largest set to minimize operations
|
||||
func (ksg *KmerSetGroup) Union() *KmerSet {
|
||||
|
||||
Reference in New Issue
Block a user