From f09b70b209b84bdfdfe496ad2aeaeb59f6f511fa Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Mon, 20 Apr 2026 15:43:44 +0200 Subject: [PATCH] :wrench: Fix rolling k-mer and minimizer logic Fix incorrect nucleotide encoding in `rolling_k` update, correct shift amount for reverse complement k-mer (`self.k - 1`, not `k`), and rename method to match semantics. Also add proper windowed minimizer cleanup when received length exceeds k. --- src/obiskbuilder/src/rolling_stat.rs | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/obiskbuilder/src/rolling_stat.rs b/src/obiskbuilder/src/rolling_stat.rs index b3fc6f9..e33a9ca 100644 --- a/src/obiskbuilder/src/rolling_stat.rs +++ b/src/obiskbuilder/src/rolling_stat.rs @@ -85,9 +85,9 @@ impl RollingStat { let bnuc = encode_nuc(nuc as u8); let cnuc = bnuc ^ 3; - self.rolling_k = ((self.rolling_k << 2) | (cnuc as u64)) & self.k_mask; + self.rolling_k = ((self.rolling_k << 2) | (bnuc as u64)) & self.k_mask; self.rolling_rck = - ((self.rolling_rck >> 2) | ((cnuc as u64) << (self.k * 2))) & self.k_mask; + ((self.rolling_rck >> 2) | ((cnuc as u64) << ((self.k -1) * 2))) & self.k_mask; let canonical_k1 = entropy_norm_kmer(self.rolling_k & 3, 1, false); @@ -108,6 +108,12 @@ impl RollingStat { self.minimier.pop_back(); } self.minimier.push_back(MmerItem { position: possible_pos_m, canonical: possible_canonical_m }); + + if self.received > self.k { + while self.minimier.front().map_or(false, |it| it.position + self.k <= self.received) { + self.minimier.pop_front(); + } + } } if self.received > self.k { @@ -180,9 +186,9 @@ impl RollingStat { } } - pub fn minimizer_canonical(&self) -> Option { + pub fn canonical_minimizer(&self) -> Option { if self.ready() { - self.minimier.front().map(|it| Kmer::from_raw_right(it.canonical, self.k)) + self.minimier.front().map(|it| Kmer::from_raw_right(it.canonical, self.m)) } else { None }