fix: account for k-mer overlap in total_bases calculation
Introduces a `kmer_overlap` variable (`k-1`) and modifies the `total_bases` accumulation to subtract this overlap from each sequence's length. This ensures the base count accurately reflects only valid k-mer starting positions rather than raw sequence length.
This commit is contained in:
@@ -95,10 +95,13 @@ pub fn scatter(
|
||||
let mut ema_rate: f64 = 0.0;
|
||||
let mut last_t = Instant::now();
|
||||
let mut last_bases: u64 = 0;
|
||||
let kmer_overlap = (k - 1) as u64;
|
||||
const ALPHA: f64 = 0.15;
|
||||
|
||||
for batch in pipe.apply(throttled, n_workers, 1) {
|
||||
total_bases += batch.iter().map(|sk| sk.seql() as u64).sum::<u64>();
|
||||
total_bases += batch.iter()
|
||||
.map(|sk| (sk.seql() as u64).saturating_sub(kmer_overlap))
|
||||
.sum::<u64>();
|
||||
let now = Instant::now();
|
||||
let dt = now.duration_since(last_t).as_secs_f64();
|
||||
if dt > 0.1 {
|
||||
|
||||
Reference in New Issue
Block a user