feat: enhance memory budgeting and add rebuild diagnostics
This commit improves memory management by respecting Linux cgroup v1/v2 limits and introduces a configurable memory budget for the new `rebuild` subcommand to prevent OOM during index reconstruction. The rebuild process now supports filtering, compaction, and parallelization. Diagnostic capabilities are expanded with debug-level tracing for partition merges, k-mer expansion tracking, and utility flags for label renaming, matrix size breakdowns, per-genome counts, and partition distribution reporting. Accessor methods for active and remaining memory have also been added to the stats struct.
This commit is contained in:
+66
-2
@@ -111,16 +111,78 @@ use sysinfo::System;
|
||||
|
||||
// ── Memory query ──────────────────────────────────────────────────────────────
|
||||
|
||||
/// Returns the number of bytes available for allocation on this machine.
|
||||
/// Returns the number of bytes available for allocation in the current process context.
|
||||
///
|
||||
/// On Linux, cgroup memory limits (SLURM, containers) are checked first: the
|
||||
/// process may be constrained to far less than the host's available RAM.
|
||||
/// Returns `min(cgroup_available, host_available)` when a finite limit is found.
|
||||
///
|
||||
/// On macOS, `available_memory()` can return 0 when the memory compressor
|
||||
/// inflates the page count; in that case we fall back to half of total memory.
|
||||
pub fn available_memory_bytes() -> u64 {
|
||||
let sys = System::new_all();
|
||||
match sys.available_memory() {
|
||||
let host_avail = match sys.available_memory() {
|
||||
0 => sys.total_memory() / 2,
|
||||
n => n,
|
||||
};
|
||||
#[cfg(target_os = "linux")]
|
||||
if let Some(cg) = cgroup_v2_available().or_else(cgroup_v1_available) {
|
||||
return cg.min(host_avail);
|
||||
}
|
||||
host_avail
|
||||
}
|
||||
|
||||
/// cgroup v2 (unified hierarchy): reads memory.max and memory.current for the
|
||||
/// current process's cgroup. Returns None if unlimited or on any parse error.
|
||||
#[cfg(target_os = "linux")]
|
||||
fn cgroup_v2_available() -> Option<u64> {
|
||||
let cgroup = std::fs::read_to_string("/proc/self/cgroup").ok()?;
|
||||
let rel = cgroup
|
||||
.lines()
|
||||
.find(|l| l.starts_with("0::"))?
|
||||
.strip_prefix("0::")?
|
||||
.trim();
|
||||
let base = format!("/sys/fs/cgroup{rel}");
|
||||
// "max" means no limit → parse::<u64>() fails → None
|
||||
let limit: u64 = std::fs::read_to_string(format!("{base}/memory.max"))
|
||||
.ok()?
|
||||
.trim()
|
||||
.parse()
|
||||
.ok()?;
|
||||
let used: u64 = std::fs::read_to_string(format!("{base}/memory.current"))
|
||||
.ok()?
|
||||
.trim()
|
||||
.parse()
|
||||
.ok()?;
|
||||
Some(limit.saturating_sub(used))
|
||||
}
|
||||
|
||||
/// cgroup v1 (memory subsystem): reads memory.limit_in_bytes and
|
||||
/// memory.usage_in_bytes. Returns None if unlimited or on any parse error.
|
||||
#[cfg(target_os = "linux")]
|
||||
fn cgroup_v1_available() -> Option<u64> {
|
||||
let cgroup = std::fs::read_to_string("/proc/self/cgroup").ok()?;
|
||||
let path = cgroup
|
||||
.lines()
|
||||
.find(|l| l.contains(":memory:"))?
|
||||
.split(':')
|
||||
.nth(2)?;
|
||||
let base = format!("/sys/fs/cgroup/memory{path}");
|
||||
let limit: u64 = std::fs::read_to_string(format!("{base}/memory.limit_in_bytes"))
|
||||
.ok()?
|
||||
.trim()
|
||||
.parse()
|
||||
.ok()?;
|
||||
// Kernel uses 2^63 (rounded to page) as "no limit" sentinel
|
||||
if limit > (1u64 << 62) {
|
||||
return None;
|
||||
}
|
||||
let used: u64 = std::fs::read_to_string(format!("{base}/memory.usage_in_bytes"))
|
||||
.ok()?
|
||||
.trim()
|
||||
.parse()
|
||||
.ok()?;
|
||||
Some(limit.saturating_sub(used))
|
||||
}
|
||||
|
||||
// ── raw helpers ───────────────────────────────────────────────────────────────
|
||||
@@ -359,6 +421,8 @@ impl MemoryBudget {
|
||||
}
|
||||
|
||||
pub fn total(&self) -> u64 { self.total }
|
||||
pub fn active(&self) -> usize { self.inner.lock().unwrap().active }
|
||||
pub fn remaining(&self) -> u64 { self.inner.lock().unwrap().remaining }
|
||||
pub fn peak_active(&self) -> usize { self.inner.lock().unwrap().peak_active }
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user