From 2bc189e96259c3c1d3c7abe0b4e1cfcc5da47e9e Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Fri, 12 Jun 2026 16:28:03 +0200 Subject: [PATCH] feat: dynamically compute seed expansion based on RSS Introduce a `peak_rss_bytes()` utility for accurate per-phase RAM measurement. Replace the genome-length heuristic with a dynamic seed expansion ratio based on actual RSS delta. Explicitly drop the `GraphDeBruijn` instance before MPHF construction to prevent resource contention and ensure proper memory management. --- src/obikindex/src/merge.rs | 12 ++++++++---- src/obikpartitionner/src/merge_layer.rs | 6 ++++-- src/obilayeredmap/src/map.rs | 1 + src/obisys/src/lib.rs | 6 ++++++ 4 files changed, 19 insertions(+), 6 deletions(-) diff --git a/src/obikindex/src/merge.rs b/src/obikindex/src/merge.rs index 0857013..f7879f4 100644 --- a/src/obikindex/src/merge.rs +++ b/src/obikindex/src/merge.rs @@ -5,7 +5,7 @@ use std::path::Path; use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::{Arc, Mutex}; -use obisys::{MemoryBudget, Reporter, Stage, available_memory_bytes, progress_bar, spinner}; +use obisys::{MemoryBudget, Reporter, Stage, available_memory_bytes, peak_rss_bytes, progress_bar, spinner}; use rayon::prelude::*; use tracing::{debug, info}; @@ -200,21 +200,25 @@ impl KmerIndex { let worst_id = order[0]; let worst_bytes = partition_sizes[worst_id]; + let rss_before_pilot = peak_rss_bytes(); let worst_g_len = dst_partition .merge_partition(worst_id, &srcs, mode, n_dst_genomes, block_bits, &evidence) .map_err(OKIError::Partition)?; + let rss_after_pilot = peak_rss_bytes(); pb.inc(1); - let seed_expansion = if worst_bytes > 0 { - worst_g_len as u64 * 16 * 1000 / worst_bytes + let pilot_rss = rss_after_pilot.saturating_sub(rss_before_pilot); + let seed_expansion = if worst_bytes > 0 && pilot_rss > 0 { + pilot_rss * 1000 / worst_bytes } else { FALLBACK_EXPANSION }; info!( "merge_partitions: pilot partition {} — {} unitig bytes → {} new kmers, \ - expansion {:.2}×", + RSS delta {}, expansion {:.2}×", worst_id, worst_bytes, worst_g_len, + fmt_bytes(pilot_rss), seed_expansion as f64 / 1000.0, ); diff --git a/src/obikpartitionner/src/merge_layer.rs b/src/obikpartitionner/src/merge_layer.rs index 6b55f5d..3cc5a47 100644 --- a/src/obikpartitionner/src/merge_layer.rs +++ b/src/obikpartitionner/src/merge_layer.rs @@ -223,12 +223,14 @@ impl KmerPartition { uw.write(&unitig) })?; uw.close()?; + let n = g.len(); + drop(g); // release GraphDeBruijn before MPHF build Layer::<()>::build(&new_layer_dir, block_bits, evidence).map_err(olm_to_sk)?; - g.len() + n } else { + drop(g); 0 }; - drop(g); let new_mphf = if any_new { Some(MphfOnly::open(&new_layer_dir).map_err(olm_to_sk)?) diff --git a/src/obilayeredmap/src/map.rs b/src/obilayeredmap/src/map.rs index 18d3c55..31cf20c 100644 --- a/src/obilayeredmap/src/map.rs +++ b/src/obilayeredmap/src/map.rs @@ -104,3 +104,4 @@ fn layer_dir(root: &Path, i: usize) -> PathBuf { #[cfg(test)] #[path = "tests/map.rs"] mod tests; + diff --git a/src/obisys/src/lib.rs b/src/obisys/src/lib.rs index bf2e678..5f8a3d3 100644 --- a/src/obisys/src/lib.rs +++ b/src/obisys/src/lib.rs @@ -119,6 +119,12 @@ use sysinfo::System; /// /// On macOS, `available_memory()` can return 0 when the memory compressor /// inflates the page count; in that case we fall back to half of total memory. +/// Returns the process peak RSS (high-water mark since process start). +/// Monotonically increasing — use delta before/after a phase to measure its RAM cost. +pub fn peak_rss_bytes() -> u64 { + rss_to_bytes(&get_rusage()) +} + pub fn available_memory_bytes() -> u64 { let sys = System::new_all(); let host_avail = match sys.available_memory() {