feat: dynamically compute seed expansion based on RSS

Introduce a `peak_rss_bytes()` utility for accurate per-phase RAM measurement. Replace the genome-length heuristic with a dynamic seed expansion ratio based on actual RSS delta. Explicitly drop the `GraphDeBruijn` instance before MPHF construction to prevent resource contention and ensure proper memory management.
This commit is contained in:
Eric Coissac
2026-06-12 16:28:03 +02:00
parent db9c604199
commit 2bc189e962
4 changed files with 19 additions and 6 deletions
+8 -4
View File
@@ -5,7 +5,7 @@ use std::path::Path;
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::{Arc, Mutex};
use obisys::{MemoryBudget, Reporter, Stage, available_memory_bytes, progress_bar, spinner};
use obisys::{MemoryBudget, Reporter, Stage, available_memory_bytes, peak_rss_bytes, progress_bar, spinner};
use rayon::prelude::*;
use tracing::{debug, info};
@@ -200,21 +200,25 @@ impl KmerIndex {
let worst_id = order[0];
let worst_bytes = partition_sizes[worst_id];
let rss_before_pilot = peak_rss_bytes();
let worst_g_len = dst_partition
.merge_partition(worst_id, &srcs, mode, n_dst_genomes, block_bits, &evidence)
.map_err(OKIError::Partition)?;
let rss_after_pilot = peak_rss_bytes();
pb.inc(1);
let seed_expansion = if worst_bytes > 0 {
worst_g_len as u64 * 16 * 1000 / worst_bytes
let pilot_rss = rss_after_pilot.saturating_sub(rss_before_pilot);
let seed_expansion = if worst_bytes > 0 && pilot_rss > 0 {
pilot_rss * 1000 / worst_bytes
} else {
FALLBACK_EXPANSION
};
info!(
"merge_partitions: pilot partition {} — {} unitig bytes → {} new kmers, \
expansion {:.2}×",
RSS delta {}, expansion {:.2}×",
worst_id, worst_bytes, worst_g_len,
fmt_bytes(pilot_rss),
seed_expansion as f64 / 1000.0,
);
+4 -2
View File
@@ -223,12 +223,14 @@ impl KmerPartition {
uw.write(&unitig)
})?;
uw.close()?;
let n = g.len();
drop(g); // release GraphDeBruijn before MPHF build
Layer::<()>::build(&new_layer_dir, block_bits, evidence).map_err(olm_to_sk)?;
g.len()
n
} else {
drop(g);
0
};
drop(g);
let new_mphf = if any_new {
Some(MphfOnly::open(&new_layer_dir).map_err(olm_to_sk)?)
+1
View File
@@ -104,3 +104,4 @@ fn layer_dir(root: &Path, i: usize) -> PathBuf {
#[cfg(test)]
#[path = "tests/map.rs"]
mod tests;
+6
View File
@@ -119,6 +119,12 @@ use sysinfo::System;
///
/// On macOS, `available_memory()` can return 0 when the memory compressor
/// inflates the page count; in that case we fall back to half of total memory.
/// Returns the process peak RSS (high-water mark since process start).
/// Monotonically increasing — use delta before/after a phase to measure its RAM cost.
pub fn peak_rss_bytes() -> u64 {
rss_to_bytes(&get_rusage())
}
pub fn available_memory_bytes() -> u64 {
let sys = System::new_all();
let host_avail = match sys.available_memory() {