🔧 Fix rolling k-mer and minimizer logic

Fix incorrect nucleotide encoding in `rolling_k` update, correct shift amount for reverse complement k-mer (`self.k - 1`, not `k`), and rename method to match semantics. Also add proper windowed minimizer cleanup when received length exceeds k.
This commit is contained in:
Eric Coissac
2026-04-20 15:43:44 +02:00
parent ae5e1152b9
commit f09b70b209
+10 -4
View File
@@ -85,9 +85,9 @@ impl RollingStat {
let bnuc = encode_nuc(nuc as u8); let bnuc = encode_nuc(nuc as u8);
let cnuc = bnuc ^ 3; let cnuc = bnuc ^ 3;
self.rolling_k = ((self.rolling_k << 2) | (cnuc as u64)) & self.k_mask; self.rolling_k = ((self.rolling_k << 2) | (bnuc as u64)) & self.k_mask;
self.rolling_rck = self.rolling_rck =
((self.rolling_rck >> 2) | ((cnuc as u64) << (self.k * 2))) & self.k_mask; ((self.rolling_rck >> 2) | ((cnuc as u64) << ((self.k -1) * 2))) & self.k_mask;
let canonical_k1 = entropy_norm_kmer(self.rolling_k & 3, 1, false); let canonical_k1 = entropy_norm_kmer(self.rolling_k & 3, 1, false);
@@ -108,6 +108,12 @@ impl RollingStat {
self.minimier.pop_back(); self.minimier.pop_back();
} }
self.minimier.push_back(MmerItem { position: possible_pos_m, canonical: possible_canonical_m }); self.minimier.push_back(MmerItem { position: possible_pos_m, canonical: possible_canonical_m });
if self.received > self.k {
while self.minimier.front().map_or(false, |it| it.position + self.k <= self.received) {
self.minimier.pop_front();
}
}
} }
if self.received > self.k { if self.received > self.k {
@@ -180,9 +186,9 @@ impl RollingStat {
} }
} }
pub fn minimizer_canonical(&self) -> Option<Kmer> { pub fn canonical_minimizer(&self) -> Option<Kmer> {
if self.ready() { if self.ready() {
self.minimier.front().map(|it| Kmer::from_raw_right(it.canonical, self.k)) self.minimier.front().map(|it| Kmer::from_raw_right(it.canonical, self.m))
} else { } else {
None None
} }