From bba5147f0faf159a015e63b464eb77c55cb9939a Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Wed, 3 Jun 2026 15:10:15 +0200 Subject: [PATCH] fix: account for k-mer overlap in total_bases calculation Introduces a `kmer_overlap` variable (`k-1`) and modifies the `total_bases` accumulation to subtract this overlap from each sequence's length. This ensures the base count accurately reflects only valid k-mer starting positions rather than raw sequence length. --- src/obikmer/src/steps/scatter.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/obikmer/src/steps/scatter.rs b/src/obikmer/src/steps/scatter.rs index 80d789e..cb5a14b 100644 --- a/src/obikmer/src/steps/scatter.rs +++ b/src/obikmer/src/steps/scatter.rs @@ -95,10 +95,13 @@ pub fn scatter( let mut ema_rate: f64 = 0.0; let mut last_t = Instant::now(); let mut last_bases: u64 = 0; + let kmer_overlap = (k - 1) as u64; const ALPHA: f64 = 0.15; for batch in pipe.apply(throttled, n_workers, 1) { - total_bases += batch.iter().map(|sk| sk.seql() as u64).sum::(); + total_bases += batch.iter() + .map(|sk| (sk.seql() as u64).saturating_sub(kmer_overlap)) + .sum::(); let now = Instant::now(); let dt = now.duration_since(last_t).as_secs_f64(); if dt > 0.1 {