feat: dynamically compute seed expansion based on RSS

Introduce a `peak_rss_bytes()` utility for accurate per-phase RAM measurement. Replace the genome-length heuristic with a dynamic seed expansion ratio based on actual RSS delta. Explicitly drop the `GraphDeBruijn` instance before MPHF construction to prevent resource contention and ensure proper memory management.
This commit is contained in:
Eric Coissac
2026-06-12 16:28:03 +02:00
parent db9c604199
commit 2bc189e962
4 changed files with 19 additions and 6 deletions
+8 -4
View File
@@ -5,7 +5,7 @@ use std::path::Path;
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::{Arc, Mutex};
use obisys::{MemoryBudget, Reporter, Stage, available_memory_bytes, progress_bar, spinner};
use obisys::{MemoryBudget, Reporter, Stage, available_memory_bytes, peak_rss_bytes, progress_bar, spinner};
use rayon::prelude::*;
use tracing::{debug, info};
@@ -200,21 +200,25 @@ impl KmerIndex {
let worst_id = order[0];
let worst_bytes = partition_sizes[worst_id];
let rss_before_pilot = peak_rss_bytes();
let worst_g_len = dst_partition
.merge_partition(worst_id, &srcs, mode, n_dst_genomes, block_bits, &evidence)
.map_err(OKIError::Partition)?;
let rss_after_pilot = peak_rss_bytes();
pb.inc(1);
let seed_expansion = if worst_bytes > 0 {
worst_g_len as u64 * 16 * 1000 / worst_bytes
let pilot_rss = rss_after_pilot.saturating_sub(rss_before_pilot);
let seed_expansion = if worst_bytes > 0 && pilot_rss > 0 {
pilot_rss * 1000 / worst_bytes
} else {
FALLBACK_EXPANSION
};
info!(
"merge_partitions: pilot partition {} — {} unitig bytes → {} new kmers, \
expansion {:.2}×",
RSS delta {}, expansion {:.2}×",
worst_id, worst_bytes, worst_g_len,
fmt_bytes(pilot_rss),
seed_expansion as f64 / 1000.0,
);