feat: enhance memory budgeting and add rebuild diagnostics

This commit improves memory management by respecting Linux cgroup v1/v2 limits and introduces a configurable memory budget for the new `rebuild` subcommand to prevent OOM during index reconstruction. The rebuild process now supports filtering, compaction, and parallelization. Diagnostic capabilities are expanded with debug-level tracing for partition merges, k-mer expansion tracking, and utility flags for label renaming, matrix size breakdowns, per-genome counts, and partition distribution reporting. Accessor methods for active and remaining memory have also been added to the stats struct.
This commit is contained in:
Eric Coissac
2026-06-12 15:18:37 +02:00
parent 97e3fb9761
commit 52fd2cf801
3 changed files with 104 additions and 9 deletions
+23 -5
View File
@@ -7,7 +7,7 @@ use std::sync::{Arc, Mutex};
use obisys::{MemoryBudget, Reporter, Stage, available_memory_bytes, progress_bar, spinner};
use rayon::prelude::*;
use tracing::info;
use tracing::{debug, info};
use obilayeredmap::IndexMode;
@@ -250,6 +250,15 @@ impl KmerIndex {
let cost = ubytes * exp / 1000;
budget.acquire(cost);
debug!(
"partition {i}: start — est. {} ({:.2}×), \
{} workers active, {} budget remaining",
fmt_bytes(cost),
exp as f64 / 1000.0,
budget.active(),
fmt_bytes(budget.remaining()),
);
let result = dst_partition
.merge_partition(i, &srcs, mode, n_dst_genomes, block_bits, &evidence);
budget.release(cost);
@@ -257,10 +266,19 @@ impl KmerIndex {
match result {
Ok(g_len) => {
if ubytes > 0 {
let actual = g_len as u64 * 16 * 1000 / ubytes;
max_expansion.fetch_max(actual, Ordering::Relaxed);
}
let actual_exp = if ubytes > 0 {
g_len as u64 * 16 * 1000 / ubytes
} else {
0
};
max_expansion.fetch_max(actual_exp, Ordering::Relaxed);
debug!(
"partition {i}: done — {} new kmers, actual {:.2}× \
(estimated {:.2}×)",
g_len,
actual_exp as f64 / 1000.0,
exp as f64 / 1000.0,
);
part_stats.lock().unwrap().push(PartStat {
id: i,
unitig_bytes: ubytes,