mirror of
https://github.com/metabarcoding/obitools4.git
synced 2026-03-25 13:30:52 +00:00
Refactor k-mer encoding and frequency filtering with KmerSet
This commit refactors the k-mer encoding logic to handle ambiguous bases more consistently and introduces a KmerSet type for better management of k-mer collections. The frequency filter now works with KmerSet instead of roaring bitmaps directly, and the API has been updated to support level-based frequency queries. Additionally, the commit updates the version and commit hash.
This commit is contained in:
@@ -7,7 +7,8 @@ import (
|
|||||||
|
|
||||||
// __single_base_code__ encodes DNA bases to 2-bit values.
|
// __single_base_code__ encodes DNA bases to 2-bit values.
|
||||||
// Standard bases: A=0, C=1, G=2, T/U=3
|
// Standard bases: A=0, C=1, G=2, T/U=3
|
||||||
// Ambiguous bases (N, R, Y, W, S, K, M, B, D, H, V): 0xFF (255) to signal error
|
// Ambiguous bases (N, R, Y, W, S, K, M, B, D, H, V) and other characters: encoded as 0 (A)
|
||||||
|
// Note: For error detection with ambiguous bases, use __single_base_code_err__ in encodekmer.go
|
||||||
|
|
||||||
var __single_base_code__ = []byte{0,
|
var __single_base_code__ = []byte{0,
|
||||||
// A, B, C, D,
|
// A, B, C, D,
|
||||||
|
|||||||
@@ -15,19 +15,19 @@ var __single_base_code_err__ = []byte{0,
|
|||||||
0xFF, 0xFF, 0xFF, 3,
|
0xFF, 0xFF, 0xFF, 3,
|
||||||
// U, V, W, X,
|
// U, V, W, X,
|
||||||
3, 0xFF, 0xFF, 0xFF,
|
3, 0xFF, 0xFF, 0xFF,
|
||||||
// Y, Z, ., .,
|
// Y, Z, ., .
|
||||||
0xFF, 0xFF, 0xFF, 0xFF,
|
0xFF, 0xFF, 0xFF, 0xFF,
|
||||||
0xFF, 0xFF, 0xFF,
|
0xFF, 0xFF, 0xFF,
|
||||||
}
|
}
|
||||||
|
|
||||||
const ambiguousBaseCode = byte(0xFF)
|
const ambiguousBaseCode = byte(0xFF)
|
||||||
|
|
||||||
|
|
||||||
// Error markers for k-mers of odd length ≤ 31
|
// Error markers for k-mers of odd length ≤ 31
|
||||||
// For odd k ≤ 31, only k*2 bits are used (max 62 bits), leaving 2 high bits
|
// For odd k ≤ 31, only k*2 bits are used (max 62 bits), leaving 2 high bits
|
||||||
// available for error coding in the top 2 bits (bits 62-63).
|
// available for error coding in the top 2 bits (bits 62-63).
|
||||||
//
|
//
|
||||||
// Error codes are simple integers:
|
// Error codes are simple integers:
|
||||||
|
//
|
||||||
// 0 = no error
|
// 0 = no error
|
||||||
// 1 = error type 1
|
// 1 = error type 1
|
||||||
// 2 = error type 2
|
// 2 = error type 2
|
||||||
@@ -95,31 +95,14 @@ func EncodeKmers(seq []byte, k int, buffer *[]uint64) []uint64 {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
n := len(seq) - k + 1
|
|
||||||
|
|
||||||
var result []uint64
|
var result []uint64
|
||||||
if buffer == nil {
|
if buffer == nil {
|
||||||
result = make([]uint64, 0, n)
|
result = make([]uint64, 0, len(seq)-k+1)
|
||||||
} else {
|
} else {
|
||||||
result = (*buffer)[:0]
|
result = (*buffer)[:0]
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mask to keep only k*2 bits
|
for kmer := range IterKmers(seq, k) {
|
||||||
mask := uint64(1)<<(k*2) - 1
|
|
||||||
|
|
||||||
// Build the first k-mer
|
|
||||||
var kmer uint64
|
|
||||||
for i := 0; i < k; i++ {
|
|
||||||
kmer <<= 2
|
|
||||||
kmer |= uint64(__single_base_code__[seq[i]&31])
|
|
||||||
}
|
|
||||||
result = append(result, kmer)
|
|
||||||
|
|
||||||
// Slide through the rest of the sequence
|
|
||||||
for i := k; i < len(seq); i++ {
|
|
||||||
kmer <<= 2
|
|
||||||
kmer |= uint64(__single_base_code__[seq[i]&31])
|
|
||||||
kmer &= mask
|
|
||||||
result = append(result, kmer)
|
result = append(result, kmer)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -138,6 +121,7 @@ func EncodeKmers(seq []byte, k int, buffer *[]uint64) []uint64 {
|
|||||||
// - iterator yielding uint64 encoded k-mers
|
// - iterator yielding uint64 encoded k-mers
|
||||||
//
|
//
|
||||||
// Example:
|
// Example:
|
||||||
|
//
|
||||||
// for kmer := range IterKmers(seq, 21) {
|
// for kmer := range IterKmers(seq, 21) {
|
||||||
// bitmap.Add(kmer)
|
// bitmap.Add(kmer)
|
||||||
// }
|
// }
|
||||||
@@ -147,10 +131,8 @@ func IterKmers(seq []byte, k int) iter.Seq[uint64] {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mask to keep only k*2 bits
|
|
||||||
mask := uint64(1)<<(k*2) - 1
|
mask := uint64(1)<<(k*2) - 1
|
||||||
|
|
||||||
// Build the first k-mer
|
|
||||||
var kmer uint64
|
var kmer uint64
|
||||||
for i := 0; i < k; i++ {
|
for i := 0; i < k; i++ {
|
||||||
kmer <<= 2
|
kmer <<= 2
|
||||||
@@ -161,7 +143,6 @@ func IterKmers(seq []byte, k int) iter.Seq[uint64] {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Slide through the rest of the sequence
|
|
||||||
for i := k; i < len(seq); i++ {
|
for i := k; i < len(seq); i++ {
|
||||||
kmer <<= 2
|
kmer <<= 2
|
||||||
kmer |= uint64(__single_base_code__[seq[i]&31])
|
kmer |= uint64(__single_base_code__[seq[i]&31])
|
||||||
@@ -191,6 +172,7 @@ func IterKmers(seq []byte, k int) iter.Seq[uint64] {
|
|||||||
// - iterator yielding uint64 normalized k-mers with error markers
|
// - iterator yielding uint64 normalized k-mers with error markers
|
||||||
//
|
//
|
||||||
// Example:
|
// Example:
|
||||||
|
//
|
||||||
// for kmer := range IterNormalizedKmersWithErrors(seq, 21) {
|
// for kmer := range IterNormalizedKmersWithErrors(seq, 21) {
|
||||||
// if GetKmerError(kmer) == 0 {
|
// if GetKmerError(kmer) == 0 {
|
||||||
// bitmap.Add(kmer) // Only add clean k-mers
|
// bitmap.Add(kmer) // Only add clean k-mers
|
||||||
@@ -198,44 +180,35 @@ func IterKmers(seq []byte, k int) iter.Seq[uint64] {
|
|||||||
// }
|
// }
|
||||||
func IterNormalizedKmersWithErrors(seq []byte, k int) iter.Seq[uint64] {
|
func IterNormalizedKmersWithErrors(seq []byte, k int) iter.Seq[uint64] {
|
||||||
return func(yield func(uint64) bool) {
|
return func(yield func(uint64) bool) {
|
||||||
// Only valid for odd k ≤ 31
|
|
||||||
if k < 1 || k > 31 || k%2 == 0 || len(seq) < k {
|
if k < 1 || k > 31 || k%2 == 0 || len(seq) < k {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mask to keep only k*2 bits
|
|
||||||
mask := uint64(1)<<(k*2) - 1
|
mask := uint64(1)<<(k*2) - 1
|
||||||
|
|
||||||
// Shift amount for adding to reverse complement (high position)
|
|
||||||
rcShift := uint((k - 1) * 2)
|
rcShift := uint((k - 1) * 2)
|
||||||
|
|
||||||
// Track ambiguous base count in sliding window
|
|
||||||
ambiguousCount := 0
|
ambiguousCount := 0
|
||||||
const ambiguousCode = byte(0xFF)
|
const ambiguousCode = byte(0xFF)
|
||||||
|
|
||||||
// Build the first k-mer (forward and reverse complement)
|
|
||||||
var fwd, rvc uint64
|
var fwd, rvc uint64
|
||||||
hasError := false
|
hasError := false
|
||||||
for i := 0; i < k; i++ {
|
for i := 0; i < k; i++ {
|
||||||
code := __single_base_code_err__[seq[i]&31]
|
code := __single_base_code_err__[seq[i]&31]
|
||||||
|
|
||||||
// Check for ambiguous base
|
|
||||||
if code == ambiguousCode {
|
if code == ambiguousCode {
|
||||||
ambiguousCount++
|
ambiguousCount++
|
||||||
hasError = true
|
hasError = true
|
||||||
code = 0 // Encode as A for the sequence bits
|
code = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
codeUint := uint64(code)
|
codeUint := uint64(code)
|
||||||
// Forward: shift left and add new code at low end
|
|
||||||
fwd <<= 2
|
fwd <<= 2
|
||||||
fwd |= codeUint
|
fwd |= codeUint
|
||||||
// Reverse complement: shift right and add complement at high end
|
|
||||||
rvc >>= 2
|
rvc >>= 2
|
||||||
rvc |= (codeUint ^ 3) << rcShift
|
rvc |= (codeUint ^ 3) << rcShift
|
||||||
}
|
}
|
||||||
|
|
||||||
// Yield normalized k-mer with error marker
|
|
||||||
var canonical uint64
|
var canonical uint64
|
||||||
if fwd <= rvc {
|
if fwd <= rvc {
|
||||||
canonical = fwd
|
canonical = fwd
|
||||||
@@ -243,7 +216,6 @@ func IterNormalizedKmersWithErrors(seq []byte, k int) iter.Seq[uint64] {
|
|||||||
canonical = rvc
|
canonical = rvc
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set error code based on ambiguous count
|
|
||||||
if hasError {
|
if hasError {
|
||||||
errorCode := uint64(ambiguousCount)
|
errorCode := uint64(ambiguousCount)
|
||||||
if errorCode > 3 {
|
if errorCode > 3 {
|
||||||
@@ -256,40 +228,33 @@ func IterNormalizedKmersWithErrors(seq []byte, k int) iter.Seq[uint64] {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Slide through the rest of the sequence
|
|
||||||
for i := k; i < len(seq); i++ {
|
for i := k; i < len(seq); i++ {
|
||||||
// Check outgoing base (position i-k)
|
outgoingCode := __single_base_code_err__[seq[i-k]&31]
|
||||||
outgoingCode := __single_base_code__[seq[i-k]&31]
|
|
||||||
if outgoingCode == ambiguousCode {
|
if outgoingCode == ambiguousCode {
|
||||||
ambiguousCount--
|
ambiguousCount--
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check incoming base (position i)
|
code := __single_base_code_err__[seq[i]&31]
|
||||||
code := __single_base_code__[seq[i]&31]
|
|
||||||
if code == ambiguousCode {
|
if code == ambiguousCode {
|
||||||
ambiguousCount++
|
ambiguousCount++
|
||||||
code = 0 // Encode as A for the sequence bits
|
code = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
codeUint := uint64(code)
|
codeUint := uint64(code)
|
||||||
|
|
||||||
// Update forward k-mer: shift left, add new code, mask
|
|
||||||
fwd <<= 2
|
fwd <<= 2
|
||||||
fwd |= codeUint
|
fwd |= codeUint
|
||||||
fwd &= mask
|
fwd &= mask
|
||||||
|
|
||||||
// Update reverse complement: shift right, add complement at high end
|
|
||||||
rvc >>= 2
|
rvc >>= 2
|
||||||
rvc |= (codeUint ^ 3) << rcShift
|
rvc |= (codeUint ^ 3) << rcShift
|
||||||
|
|
||||||
// Yield normalized k-mer
|
|
||||||
if fwd <= rvc {
|
if fwd <= rvc {
|
||||||
canonical = fwd
|
canonical = fwd
|
||||||
} else {
|
} else {
|
||||||
canonical = rvc
|
canonical = rvc
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set error code based on ambiguous count
|
|
||||||
if ambiguousCount > 0 {
|
if ambiguousCount > 0 {
|
||||||
errorCode := uint64(ambiguousCount)
|
errorCode := uint64(ambiguousCount)
|
||||||
if errorCode > 3 {
|
if errorCode > 3 {
|
||||||
@@ -316,6 +281,7 @@ func IterNormalizedKmersWithErrors(seq []byte, k int) iter.Seq[uint64] {
|
|||||||
// - iterator yielding uint64 normalized k-mers
|
// - iterator yielding uint64 normalized k-mers
|
||||||
//
|
//
|
||||||
// Example:
|
// Example:
|
||||||
|
//
|
||||||
// for canonical := range IterNormalizedKmers(seq, 21) {
|
// for canonical := range IterNormalizedKmers(seq, 21) {
|
||||||
// bitmap.Add(canonical)
|
// bitmap.Add(canonical)
|
||||||
// }
|
// }
|
||||||
@@ -325,25 +291,19 @@ func IterNormalizedKmers(seq []byte, k int) iter.Seq[uint64] {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mask to keep only k*2 bits
|
|
||||||
mask := uint64(1)<<(k*2) - 1
|
mask := uint64(1)<<(k*2) - 1
|
||||||
|
|
||||||
// Shift amount for adding to reverse complement (high position)
|
|
||||||
rcShift := uint((k - 1) * 2)
|
rcShift := uint((k - 1) * 2)
|
||||||
|
|
||||||
// Build the first k-mer (forward and reverse complement)
|
|
||||||
var fwd, rvc uint64
|
var fwd, rvc uint64
|
||||||
for i := 0; i < k; i++ {
|
for i := 0; i < k; i++ {
|
||||||
code := uint64(__single_base_code__[seq[i]&31])
|
code := uint64(__single_base_code__[seq[i]&31])
|
||||||
// Forward: shift left and add new code at low end
|
|
||||||
fwd <<= 2
|
fwd <<= 2
|
||||||
fwd |= code
|
fwd |= code
|
||||||
// Reverse complement: shift right and add complement at high end
|
|
||||||
rvc >>= 2
|
rvc >>= 2
|
||||||
rvc |= (code ^ 3) << rcShift
|
rvc |= (code ^ 3) << rcShift
|
||||||
}
|
}
|
||||||
|
|
||||||
// Yield normalized k-mer
|
|
||||||
var canonical uint64
|
var canonical uint64
|
||||||
if fwd <= rvc {
|
if fwd <= rvc {
|
||||||
canonical = fwd
|
canonical = fwd
|
||||||
@@ -355,20 +315,16 @@ func IterNormalizedKmers(seq []byte, k int) iter.Seq[uint64] {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Slide through the rest of the sequence
|
|
||||||
for i := k; i < len(seq); i++ {
|
for i := k; i < len(seq); i++ {
|
||||||
code := uint64(__single_base_code__[seq[i]&31])
|
code := uint64(__single_base_code__[seq[i]&31])
|
||||||
|
|
||||||
// Update forward k-mer: shift left, add new code, mask
|
|
||||||
fwd <<= 2
|
fwd <<= 2
|
||||||
fwd |= code
|
fwd |= code
|
||||||
fwd &= mask
|
fwd &= mask
|
||||||
|
|
||||||
// Update reverse complement: shift right, add complement at high end
|
|
||||||
rvc >>= 2
|
rvc >>= 2
|
||||||
rvc |= (code ^ 3) << rcShift
|
rvc |= (code ^ 3) << rcShift
|
||||||
|
|
||||||
// Yield normalized k-mer
|
|
||||||
if fwd <= rvc {
|
if fwd <= rvc {
|
||||||
canonical = fwd
|
canonical = fwd
|
||||||
} else {
|
} else {
|
||||||
@@ -424,15 +380,12 @@ type dequeItem struct {
|
|||||||
// Time complexity: O(n) where n is the sequence length
|
// Time complexity: O(n) where n is the sequence length
|
||||||
// Space complexity: O(k-m+1) for the deque + O(number of super k-mers) for results
|
// Space complexity: O(k-m+1) for the deque + O(number of super k-mers) for results
|
||||||
func ExtractSuperKmers(seq []byte, k int, m int, buffer *[]SuperKmer) []SuperKmer {
|
func ExtractSuperKmers(seq []byte, k int, m int, buffer *[]SuperKmer) []SuperKmer {
|
||||||
// Validate parameters
|
|
||||||
if m < 1 || m >= k || k < 2 || k > 31 || len(seq) < k {
|
if m < 1 || m >= k || k < 2 || k > 31 || len(seq) < k {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize result buffer
|
|
||||||
var result []SuperKmer
|
var result []SuperKmer
|
||||||
if buffer == nil {
|
if buffer == nil {
|
||||||
// Estimate: worst case is one super k-mer per k nucleotides
|
|
||||||
estimatedSize := len(seq) / k
|
estimatedSize := len(seq) / k
|
||||||
if estimatedSize < 1 {
|
if estimatedSize < 1 {
|
||||||
estimatedSize = 1
|
estimatedSize = 1
|
||||||
@@ -442,14 +395,11 @@ func ExtractSuperKmers(seq []byte, k int, m int, buffer *[]SuperKmer) []SuperKme
|
|||||||
result = (*buffer)[:0]
|
result = (*buffer)[:0]
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize monotone deque for tracking minimizers
|
|
||||||
deque := make([]dequeItem, 0, k-m+1)
|
deque := make([]dequeItem, 0, k-m+1)
|
||||||
|
|
||||||
// Masks for m-mer encoding
|
|
||||||
mMask := uint64(1)<<(m*2) - 1
|
mMask := uint64(1)<<(m*2) - 1
|
||||||
rcShift := uint((m - 1) * 2)
|
rcShift := uint((m - 1) * 2)
|
||||||
|
|
||||||
// Build first m-1 nucleotides (can't form complete m-mer yet)
|
|
||||||
var fwdMmer, rvcMmer uint64
|
var fwdMmer, rvcMmer uint64
|
||||||
for i := 0; i < m-1 && i < len(seq); i++ {
|
for i := 0; i < m-1 && i < len(seq); i++ {
|
||||||
code := uint64(__single_base_code__[seq[i]&31])
|
code := uint64(__single_base_code__[seq[i]&31])
|
||||||
@@ -457,19 +407,15 @@ func ExtractSuperKmers(seq []byte, k int, m int, buffer *[]SuperKmer) []SuperKme
|
|||||||
rvcMmer = (rvcMmer >> 2) | ((code ^ 3) << rcShift)
|
rvcMmer = (rvcMmer >> 2) | ((code ^ 3) << rcShift)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Track super k-mer boundaries
|
|
||||||
superKmerStart := 0
|
superKmerStart := 0
|
||||||
var currentMinimizer uint64
|
var currentMinimizer uint64
|
||||||
firstKmer := true
|
firstKmer := true
|
||||||
|
|
||||||
// Slide through sequence, processing each position that completes an m-mer
|
|
||||||
for pos := m - 1; pos < len(seq); pos++ {
|
for pos := m - 1; pos < len(seq); pos++ {
|
||||||
// Add new nucleotide to m-mer
|
|
||||||
code := uint64(__single_base_code__[seq[pos]&31])
|
code := uint64(__single_base_code__[seq[pos]&31])
|
||||||
fwdMmer = ((fwdMmer << 2) | code) & mMask
|
fwdMmer = ((fwdMmer << 2) | code) & mMask
|
||||||
rvcMmer = (rvcMmer >> 2) | ((code ^ 3) << rcShift)
|
rvcMmer = (rvcMmer >> 2) | ((code ^ 3) << rcShift)
|
||||||
|
|
||||||
// Get canonical m-mer (minimum of forward and reverse complement)
|
|
||||||
canonical := fwdMmer
|
canonical := fwdMmer
|
||||||
if rvcMmer < fwdMmer {
|
if rvcMmer < fwdMmer {
|
||||||
canonical = rvcMmer
|
canonical = rvcMmer
|
||||||
@@ -477,9 +423,6 @@ func ExtractSuperKmers(seq []byte, k int, m int, buffer *[]SuperKmer) []SuperKme
|
|||||||
|
|
||||||
mmerPos := pos - m + 1
|
mmerPos := pos - m + 1
|
||||||
|
|
||||||
// Remove m-mers outside the current k-mer window from front of deque
|
|
||||||
// The k-mer at position pos spans from (pos-k+1) to pos
|
|
||||||
// It contains m-mers from position (pos-k+1) to (pos-m+1)
|
|
||||||
if pos >= k-1 {
|
if pos >= k-1 {
|
||||||
windowStart := pos - k + 1
|
windowStart := pos - k + 1
|
||||||
for len(deque) > 0 && deque[0].position < windowStart {
|
for len(deque) > 0 && deque[0].position < windowStart {
|
||||||
@@ -487,30 +430,20 @@ func ExtractSuperKmers(seq []byte, k int, m int, buffer *[]SuperKmer) []SuperKme
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Maintain monotone property: remove larger values from back
|
|
||||||
for len(deque) > 0 && deque[len(deque)-1].canonical >= canonical {
|
for len(deque) > 0 && deque[len(deque)-1].canonical >= canonical {
|
||||||
deque = deque[:len(deque)-1]
|
deque = deque[:len(deque)-1]
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add new m-mer to deque
|
|
||||||
deque = append(deque, dequeItem{position: mmerPos, canonical: canonical})
|
deque = append(deque, dequeItem{position: mmerPos, canonical: canonical})
|
||||||
|
|
||||||
// Once we have processed the first k nucleotides, we have our first k-mer
|
|
||||||
if pos >= k-1 {
|
if pos >= k-1 {
|
||||||
// The minimizer is at the front of the deque
|
|
||||||
newMinimizer := deque[0].canonical
|
newMinimizer := deque[0].canonical
|
||||||
kmerStart := pos - k + 1 // Start position of current k-mer (ending at pos)
|
kmerStart := pos - k + 1
|
||||||
|
|
||||||
if firstKmer {
|
if firstKmer {
|
||||||
// Initialize first super k-mer
|
|
||||||
currentMinimizer = newMinimizer
|
currentMinimizer = newMinimizer
|
||||||
firstKmer = false
|
firstKmer = false
|
||||||
} else if newMinimizer != currentMinimizer {
|
} else if newMinimizer != currentMinimizer {
|
||||||
// Minimizer changed at this k-mer position
|
|
||||||
// Previous k-mer started at position kmerStart-1
|
|
||||||
// That k-mer is seq[kmerStart-1 : kmerStart-1+k] (Go slice notation)
|
|
||||||
// The last base of that k-mer is at kmerStart-1+k-1 = kmerStart+k-2
|
|
||||||
// In Go slice notation (exclusive end): kmerStart+k-1
|
|
||||||
endPos := kmerStart + k - 1
|
endPos := kmerStart + k - 1
|
||||||
superKmer := SuperKmer{
|
superKmer := SuperKmer{
|
||||||
Minimizer: currentMinimizer,
|
Minimizer: currentMinimizer,
|
||||||
@@ -520,14 +453,12 @@ func ExtractSuperKmers(seq []byte, k int, m int, buffer *[]SuperKmer) []SuperKme
|
|||||||
}
|
}
|
||||||
result = append(result, superKmer)
|
result = append(result, superKmer)
|
||||||
|
|
||||||
// New super k-mer starts at current k-mer position
|
|
||||||
superKmerStart = kmerStart
|
superKmerStart = kmerStart
|
||||||
currentMinimizer = newMinimizer
|
currentMinimizer = newMinimizer
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Emit final super k-mer
|
|
||||||
if !firstKmer {
|
if !firstKmer {
|
||||||
superKmer := SuperKmer{
|
superKmer := SuperKmer{
|
||||||
Minimizer: currentMinimizer,
|
Minimizer: currentMinimizer,
|
||||||
@@ -556,26 +487,19 @@ func ExtractSuperKmers(seq []byte, k int, m int, buffer *[]SuperKmer) []SuperKme
|
|||||||
// Returns:
|
// Returns:
|
||||||
// - the reverse complement of the k-mer with error bits preserved
|
// - the reverse complement of the k-mer with error bits preserved
|
||||||
func ReverseComplement(kmer uint64, k int) uint64 {
|
func ReverseComplement(kmer uint64, k int) uint64 {
|
||||||
// Step 0: Extract and preserve error bits
|
|
||||||
errorBits := kmer & KmerErrorMask
|
errorBits := kmer & KmerErrorMask
|
||||||
|
|
||||||
// Step 1: Complement - XOR with all 1s to flip A↔T and C↔G
|
|
||||||
// For a k-mer of size k, we only want to flip the lower k*2 bits
|
|
||||||
mask := uint64(1)<<(k*2) - 1
|
mask := uint64(1)<<(k*2) - 1
|
||||||
rc := (^kmer) & mask
|
rc := (^kmer) & mask
|
||||||
|
|
||||||
// Step 2: Reverse the order of 2-bit pairs
|
rc = ((rc & 0x3333333333333333) << 2) | ((rc & 0xCCCCCCCCCCCCCCCC) >> 2)
|
||||||
// We use a series of swaps at increasing granularity
|
rc = ((rc & 0x0F0F0F0F0F0F0F0F) << 4) | ((rc & 0xF0F0F0F0F0F0F0F0) >> 4)
|
||||||
rc = ((rc & 0x3333333333333333) << 2) | ((rc & 0xCCCCCCCCCCCCCCCC) >> 2) // Swap adjacent pairs
|
rc = ((rc & 0x00FF00FF00FF00FF) << 8) | ((rc & 0xFF00FF00FF00FF00) >> 8)
|
||||||
rc = ((rc & 0x0F0F0F0F0F0F0F0F) << 4) | ((rc & 0xF0F0F0F0F0F0F0F0) >> 4) // Swap nibbles
|
rc = ((rc & 0x0000FFFF0000FFFF) << 16) | ((rc & 0xFFFF0000FFFF0000) >> 16)
|
||||||
rc = ((rc & 0x00FF00FF00FF00FF) << 8) | ((rc & 0xFF00FF00FF00FF00) >> 8) // Swap bytes
|
rc = (rc << 32) | (rc >> 32)
|
||||||
rc = ((rc & 0x0000FFFF0000FFFF) << 16) | ((rc & 0xFFFF0000FFFF0000) >> 16) // Swap 16-bit words
|
|
||||||
rc = (rc << 32) | (rc >> 32) // Swap 32-bit words
|
|
||||||
|
|
||||||
// Step 3: Shift right to align the k-mer (we reversed all 32 pairs, need only k)
|
|
||||||
rc >>= (64 - k*2)
|
rc >>= (64 - k*2)
|
||||||
|
|
||||||
// Step 4: Restore error bits
|
|
||||||
rc |= errorBits
|
rc |= errorBits
|
||||||
|
|
||||||
return rc
|
return rc
|
||||||
@@ -621,112 +545,19 @@ func NormalizeKmer(kmer uint64, k int) uint64 {
|
|||||||
// - slice of uint64 normalized k-mers with error markers
|
// - slice of uint64 normalized k-mers with error markers
|
||||||
// - nil if sequence is shorter than k, k is invalid, or k is even
|
// - nil if sequence is shorter than k, k is invalid, or k is even
|
||||||
func EncodeNormalizedKmersWithErrors(seq []byte, k int, buffer *[]uint64) []uint64 {
|
func EncodeNormalizedKmersWithErrors(seq []byte, k int, buffer *[]uint64) []uint64 {
|
||||||
// Only valid for odd k ≤ 31
|
|
||||||
if k < 1 || k > 31 || k%2 == 0 || len(seq) < k {
|
if k < 1 || k > 31 || k%2 == 0 || len(seq) < k {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
n := len(seq) - k + 1
|
|
||||||
|
|
||||||
var result []uint64
|
var result []uint64
|
||||||
if buffer == nil {
|
if buffer == nil {
|
||||||
result = make([]uint64, 0, n)
|
result = make([]uint64, 0, len(seq)-k+1)
|
||||||
} else {
|
} else {
|
||||||
result = (*buffer)[:0]
|
result = (*buffer)[:0]
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mask to keep only k*2 bits
|
for kmer := range IterNormalizedKmersWithErrors(seq, k) {
|
||||||
mask := uint64(1)<<(k*2) - 1
|
result = append(result, kmer)
|
||||||
|
|
||||||
// Shift amount for adding to reverse complement (high position)
|
|
||||||
rcShift := uint((k - 1) * 2)
|
|
||||||
|
|
||||||
// Track ambiguous base count in sliding window
|
|
||||||
ambiguousCount := 0
|
|
||||||
const ambiguousCode = byte(0xFF)
|
|
||||||
|
|
||||||
// Build the first k-mer (forward and reverse complement)
|
|
||||||
var fwd, rvc uint64
|
|
||||||
hasError := false
|
|
||||||
for i := 0; i < k; i++ {
|
|
||||||
code := __single_base_code_err__[seq[i]&31]
|
|
||||||
|
|
||||||
// Check for ambiguous base
|
|
||||||
if code == ambiguousCode {
|
|
||||||
ambiguousCount++
|
|
||||||
hasError = true
|
|
||||||
code = 0 // Encode as A for the sequence bits
|
|
||||||
}
|
|
||||||
|
|
||||||
codeUint := uint64(code)
|
|
||||||
// Forward: shift left and add new code at low end
|
|
||||||
fwd <<= 2
|
|
||||||
fwd |= codeUint
|
|
||||||
// Reverse complement: shift right and add complement at high end
|
|
||||||
rvc >>= 2
|
|
||||||
rvc |= (codeUint ^ 3) << rcShift
|
|
||||||
}
|
|
||||||
|
|
||||||
// Store the normalized (canonical) k-mer with error marker
|
|
||||||
var canonical uint64
|
|
||||||
if fwd <= rvc {
|
|
||||||
canonical = fwd
|
|
||||||
} else {
|
|
||||||
canonical = rvc
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set error code based on ambiguous count
|
|
||||||
if hasError {
|
|
||||||
errorCode := uint64(ambiguousCount)
|
|
||||||
if errorCode > 3 {
|
|
||||||
errorCode = 3
|
|
||||||
}
|
|
||||||
canonical = SetKmerError(canonical, errorCode)
|
|
||||||
}
|
|
||||||
result = append(result, canonical)
|
|
||||||
|
|
||||||
// Slide through the rest of the sequence
|
|
||||||
for i := k; i < len(seq); i++ {
|
|
||||||
// Check outgoing base (position i-k)
|
|
||||||
outgoingCode := __single_base_code__[seq[i-k]&31]
|
|
||||||
if outgoingCode == ambiguousCode {
|
|
||||||
ambiguousCount--
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check incoming base (position i)
|
|
||||||
code := __single_base_code__[seq[i]&31]
|
|
||||||
if code == ambiguousCode {
|
|
||||||
ambiguousCount++
|
|
||||||
code = 0 // Encode as A for the sequence bits
|
|
||||||
}
|
|
||||||
|
|
||||||
codeUint := uint64(code)
|
|
||||||
|
|
||||||
// Update forward k-mer: shift left, add new code, mask
|
|
||||||
fwd <<= 2
|
|
||||||
fwd |= codeUint
|
|
||||||
fwd &= mask
|
|
||||||
|
|
||||||
// Update reverse complement: shift right, add complement at high end
|
|
||||||
rvc >>= 2
|
|
||||||
rvc |= (codeUint ^ 3) << rcShift
|
|
||||||
|
|
||||||
// Store the normalized k-mer
|
|
||||||
if fwd <= rvc {
|
|
||||||
canonical = fwd
|
|
||||||
} else {
|
|
||||||
canonical = rvc
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set error code based on ambiguous count
|
|
||||||
if ambiguousCount > 0 {
|
|
||||||
errorCode := uint64(ambiguousCount)
|
|
||||||
if errorCode > 3 {
|
|
||||||
errorCode = 3
|
|
||||||
}
|
|
||||||
canonical = SetKmerError(canonical, errorCode)
|
|
||||||
}
|
|
||||||
result = append(result, canonical)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return result
|
return result
|
||||||
@@ -753,62 +584,15 @@ func EncodeNormalizedKmers(seq []byte, k int, buffer *[]uint64) []uint64 {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
n := len(seq) - k + 1
|
|
||||||
|
|
||||||
var result []uint64
|
var result []uint64
|
||||||
if buffer == nil {
|
if buffer == nil {
|
||||||
result = make([]uint64, 0, n)
|
result = make([]uint64, 0, len(seq)-k+1)
|
||||||
} else {
|
} else {
|
||||||
result = (*buffer)[:0]
|
result = (*buffer)[:0]
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mask to keep only k*2 bits
|
for kmer := range IterNormalizedKmers(seq, k) {
|
||||||
mask := uint64(1)<<(k*2) - 1
|
result = append(result, kmer)
|
||||||
|
|
||||||
// Shift amount for adding to reverse complement (high position)
|
|
||||||
rcShift := uint((k - 1) * 2)
|
|
||||||
|
|
||||||
// Complement lookup: A(00)->T(11), C(01)->G(10), G(10)->C(01), T(11)->A(00)
|
|
||||||
// This is simply XOR with 3
|
|
||||||
|
|
||||||
// Build the first k-mer (forward and reverse complement)
|
|
||||||
var fwd, rvc uint64
|
|
||||||
for i := 0; i < k; i++ {
|
|
||||||
code := uint64(__single_base_code__[seq[i]&31])
|
|
||||||
// Forward: shift left and add new code at low end
|
|
||||||
fwd <<= 2
|
|
||||||
fwd |= code
|
|
||||||
// Reverse complement: shift right and add complement at high end
|
|
||||||
rvc >>= 2
|
|
||||||
rvc |= (code ^ 3) << rcShift
|
|
||||||
}
|
|
||||||
|
|
||||||
// Store the normalized (canonical) k-mer
|
|
||||||
if fwd <= rvc {
|
|
||||||
result = append(result, fwd)
|
|
||||||
} else {
|
|
||||||
result = append(result, rvc)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Slide through the rest of the sequence
|
|
||||||
for i := k; i < len(seq); i++ {
|
|
||||||
code := uint64(__single_base_code__[seq[i]&31])
|
|
||||||
|
|
||||||
// Update forward k-mer: shift left, add new code, mask
|
|
||||||
fwd <<= 2
|
|
||||||
fwd |= code
|
|
||||||
fwd &= mask
|
|
||||||
|
|
||||||
// Update reverse complement: shift right, add complement at high end
|
|
||||||
rvc >>= 2
|
|
||||||
rvc |= (code ^ 3) << rcShift
|
|
||||||
|
|
||||||
// Store the normalized k-mer
|
|
||||||
if fwd <= rvc {
|
|
||||||
result = append(result, fwd)
|
|
||||||
} else {
|
|
||||||
result = append(result, rvc)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ package obikmer
|
|||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
"github.com/RoaringBitmap/roaring/roaring64"
|
"github.com/RoaringBitmap/roaring/roaring64"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -36,8 +37,9 @@ func NewFrequencyFilter(k, minFreq int) *FrequencyFilter {
|
|||||||
|
|
||||||
// AddSequence ajoute tous les k-mers d'une séquence au filtre
|
// AddSequence ajoute tous les k-mers d'une séquence au filtre
|
||||||
// Utilise un itérateur pour éviter l'allocation d'un vecteur intermédiaire
|
// Utilise un itérateur pour éviter l'allocation d'un vecteur intermédiaire
|
||||||
func (ff *FrequencyFilter) AddSequence(seq []byte) {
|
func (ff *FrequencyFilter) AddSequence(seq *obiseq.BioSequence) {
|
||||||
for canonical := range IterNormalizedKmers(seq, ff.K) {
|
rawSeq := seq.Sequence()
|
||||||
|
for canonical := range IterNormalizedKmers(rawSeq, ff.K) {
|
||||||
ff.addKmer(canonical)
|
ff.addKmer(canonical)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -56,20 +58,20 @@ func (ff *FrequencyFilter) addKmer(kmer uint64) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetFilteredSet retourne la Roaring Bitmap des k-mers avec fréquence ≥ minFreq
|
// GetFilteredSet retourne un KmerSet des k-mers avec fréquence ≥ minFreq
|
||||||
func (ff *FrequencyFilter) GetFilteredSet() *roaring64.Bitmap {
|
func (ff *FrequencyFilter) GetFilteredSet() *KmerSet {
|
||||||
// Les k-mers filtrés sont dans le dernier niveau
|
// Les k-mers filtrés sont dans le dernier niveau
|
||||||
return ff.index[ff.MinFreq-1].Clone()
|
return NewKmerSetFromBitmap(ff.K, ff.index[ff.MinFreq-1].Clone())
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetKmersAtLevel retourne la Roaring Bitmap des k-mers vus au moins (level+1) fois
|
// GetKmersAtLevel retourne un KmerSet des k-mers vus au moins (level+1) fois
|
||||||
// level doit être dans [0, minFreq-1]
|
// level doit être dans [0, minFreq-1]
|
||||||
func (ff *FrequencyFilter) GetKmersAtLevel(level int) *roaring64.Bitmap {
|
func (ff *FrequencyFilter) GetKmersAtLevel(level int) *KmerSet {
|
||||||
if level < 0 || level >= ff.MinFreq {
|
if level < 0 || level >= ff.MinFreq {
|
||||||
return roaring64.New()
|
return NewKmerSet(ff.K)
|
||||||
}
|
}
|
||||||
|
|
||||||
return ff.index[level].Clone()
|
return NewKmerSetFromBitmap(ff.K, ff.index[level].Clone())
|
||||||
}
|
}
|
||||||
|
|
||||||
// Stats retourne des statistiques sur les niveaux de fréquence
|
// Stats retourne des statistiques sur les niveaux de fréquence
|
||||||
@@ -143,8 +145,8 @@ func (ff *FrequencyFilter) Clear() {
|
|||||||
// ==================================
|
// ==================================
|
||||||
|
|
||||||
// AddSequences ajoute plusieurs séquences en batch
|
// AddSequences ajoute plusieurs séquences en batch
|
||||||
func (ff *FrequencyFilter) AddSequences(sequences [][]byte) {
|
func (ff *FrequencyFilter) AddSequences(sequences *obiseq.BioSequenceSlice) {
|
||||||
for _, seq := range sequences {
|
for _, seq := range *sequences {
|
||||||
ff.AddSequence(seq)
|
ff.AddSequence(seq)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -193,9 +195,22 @@ func (ff *FrequencyFilter) GetFrequency(kmer uint64) int {
|
|||||||
return freq
|
return freq
|
||||||
}
|
}
|
||||||
|
|
||||||
// Cardinality retourne le nombre de k-mers filtrés
|
// Len retourne le nombre de k-mers filtrés ou à un niveau spécifique
|
||||||
func (ff *FrequencyFilter) Cardinality() uint64 {
|
// Sans argument: retourne le nombre de k-mers avec freq ≥ minFreq (dernier niveau)
|
||||||
|
// Avec argument level: retourne le nombre de k-mers avec freq ≥ (level+1)
|
||||||
|
// Exemple: Len() pour les k-mers filtrés, Len(2) pour freq ≥ 3
|
||||||
|
func (ff *FrequencyFilter) Len(level ...int) uint64 {
|
||||||
|
if len(level) == 0 {
|
||||||
|
// Sans argument: dernier niveau (k-mers filtrés)
|
||||||
return ff.index[ff.MinFreq-1].GetCardinality()
|
return ff.index[ff.MinFreq-1].GetCardinality()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Avec argument: niveau spécifique
|
||||||
|
lvl := level[0]
|
||||||
|
if lvl < 0 || lvl >= ff.MinFreq {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return ff.index[lvl].GetCardinality()
|
||||||
}
|
}
|
||||||
|
|
||||||
// MemoryUsage retourne l'utilisation mémoire en bytes
|
// MemoryUsage retourne l'utilisation mémoire en bytes
|
||||||
|
|||||||
120
pkg/obikmer/kmer_set.go
Normal file
120
pkg/obikmer/kmer_set.go
Normal file
@@ -0,0 +1,120 @@
|
|||||||
|
package obikmer
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
|
"github.com/RoaringBitmap/roaring/roaring64"
|
||||||
|
)
|
||||||
|
|
||||||
|
// KmerSet encapsule un ensemble de k-mers stockés dans un Roaring Bitmap
|
||||||
|
// Fournit des méthodes utilitaires pour manipuler des ensembles de k-mers
|
||||||
|
type KmerSet struct {
|
||||||
|
K int // Taille des k-mers
|
||||||
|
bitmap *roaring64.Bitmap // Bitmap contenant les k-mers
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewKmerSet crée un nouveau KmerSet vide
|
||||||
|
func NewKmerSet(k int) *KmerSet {
|
||||||
|
return &KmerSet{
|
||||||
|
K: k,
|
||||||
|
bitmap: roaring64.New(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewKmerSetFromBitmap crée un KmerSet à partir d'un bitmap existant
|
||||||
|
func NewKmerSetFromBitmap(k int, bitmap *roaring64.Bitmap) *KmerSet {
|
||||||
|
return &KmerSet{
|
||||||
|
K: k,
|
||||||
|
bitmap: bitmap,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add ajoute un k-mer à l'ensemble
|
||||||
|
func (ks *KmerSet) Add(kmer uint64) {
|
||||||
|
ks.bitmap.Add(kmer)
|
||||||
|
}
|
||||||
|
|
||||||
|
// AddSequence ajoute tous les k-mers d'une séquence à l'ensemble
|
||||||
|
// Utilise un itérateur pour éviter l'allocation d'un vecteur intermédiaire
|
||||||
|
func (ks *KmerSet) AddSequence(seq *obiseq.BioSequence) {
|
||||||
|
rawSeq := seq.Sequence()
|
||||||
|
for canonical := range IterNormalizedKmers(rawSeq, ks.K) {
|
||||||
|
ks.bitmap.Add(canonical)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// AddSequences ajoute tous les k-mers de plusieurs séquences en batch
|
||||||
|
func (ks *KmerSet) AddSequences(sequences *obiseq.BioSequenceSlice) {
|
||||||
|
for _, seq := range *sequences {
|
||||||
|
ks.AddSequence(seq)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Contains vérifie si un k-mer est dans l'ensemble
|
||||||
|
func (ks *KmerSet) Contains(kmer uint64) bool {
|
||||||
|
return ks.bitmap.Contains(kmer)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Len retourne le nombre de k-mers dans l'ensemble
|
||||||
|
func (ks *KmerSet) Len() uint64 {
|
||||||
|
return ks.bitmap.GetCardinality()
|
||||||
|
}
|
||||||
|
|
||||||
|
// MemoryUsage retourne l'utilisation mémoire en bytes
|
||||||
|
func (ks *KmerSet) MemoryUsage() uint64 {
|
||||||
|
return ks.bitmap.GetSizeInBytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clear vide l'ensemble
|
||||||
|
func (ks *KmerSet) Clear() {
|
||||||
|
ks.bitmap.Clear()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clone crée une copie de l'ensemble
|
||||||
|
func (ks *KmerSet) Clone() *KmerSet {
|
||||||
|
return &KmerSet{
|
||||||
|
K: ks.K,
|
||||||
|
bitmap: ks.bitmap.Clone(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Union retourne l'union de cet ensemble avec un autre
|
||||||
|
func (ks *KmerSet) Union(other *KmerSet) *KmerSet {
|
||||||
|
if ks.K != other.K {
|
||||||
|
panic(fmt.Sprintf("Cannot union KmerSets with different k values: %d vs %d", ks.K, other.K))
|
||||||
|
}
|
||||||
|
result := ks.bitmap.Clone()
|
||||||
|
result.Or(other.bitmap)
|
||||||
|
return NewKmerSetFromBitmap(ks.K, result)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Intersect retourne l'intersection de cet ensemble avec un autre
|
||||||
|
func (ks *KmerSet) Intersect(other *KmerSet) *KmerSet {
|
||||||
|
if ks.K != other.K {
|
||||||
|
panic(fmt.Sprintf("Cannot intersect KmerSets with different k values: %d vs %d", ks.K, other.K))
|
||||||
|
}
|
||||||
|
result := ks.bitmap.Clone()
|
||||||
|
result.And(other.bitmap)
|
||||||
|
return NewKmerSetFromBitmap(ks.K, result)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Difference retourne la différence de cet ensemble avec un autre (this - other)
|
||||||
|
func (ks *KmerSet) Difference(other *KmerSet) *KmerSet {
|
||||||
|
if ks.K != other.K {
|
||||||
|
panic(fmt.Sprintf("Cannot subtract KmerSets with different k values: %d vs %d", ks.K, other.K))
|
||||||
|
}
|
||||||
|
result := ks.bitmap.Clone()
|
||||||
|
result.AndNot(other.bitmap)
|
||||||
|
return NewKmerSetFromBitmap(ks.K, result)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Iterator retourne un itérateur sur tous les k-mers de l'ensemble
|
||||||
|
func (ks *KmerSet) Iterator() roaring64.IntIterable64 {
|
||||||
|
return ks.bitmap.Iterator()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bitmap retourne le bitmap sous-jacent (pour compatibilité)
|
||||||
|
func (ks *KmerSet) Bitmap() *roaring64.Bitmap {
|
||||||
|
return ks.bitmap
|
||||||
|
}
|
||||||
@@ -8,7 +8,7 @@ import (
|
|||||||
// corresponds to the last commit, and not the one when the file will be
|
// corresponds to the last commit, and not the one when the file will be
|
||||||
// commited
|
// commited
|
||||||
|
|
||||||
var _Commit = "28162ac"
|
var _Commit = "60f27c1"
|
||||||
var _Version = "Release 4.4.0"
|
var _Version = "Release 4.4.0"
|
||||||
|
|
||||||
// Version returns the version of the obitools package.
|
// Version returns the version of the obitools package.
|
||||||
|
|||||||
Reference in New Issue
Block a user