feat: add memory-aware parallel merge scheduling and CLI flags

Introduces a memory-aware scheduling strategy for parallel partition merging that replaces unbounded concurrency with a First-Fit Decreasing approach gated by a thread-safe `MemoryBudget` semaphore. An adaptive expansion factor, seeded by a sequential pilot run, dynamically caps concurrent workers to prevent hashbrown OOMs. Adds a `--budget-fraction` CLI flag to configure RAM allocation, enhances the CLI to accept multiple indexes, and introduces comprehensive partition diagnostics including memory utilization tracking, concurrency metrics, and statistical summaries with ASCII histograms. Updates documentation and navigation accordingly.
This commit is contained in:
Eric Coissac
2026-06-12 11:26:24 +02:00
parent f44fe042bc
commit b5e027f23b
7 changed files with 742 additions and 81 deletions
+55
View File
@@ -1,5 +1,6 @@
use std::fmt;
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::{Condvar, Mutex};
use std::time::{Duration, Instant};
use indicatif::{ProgressBar, ProgressStyle};
@@ -309,6 +310,60 @@ fn fmt_efficiency(par: f64, n_cores: usize) -> String {
// ── Display ───────────────────────────────────────────────────────────────────
// ── MemoryBudget ──────────────────────────────────────────────────────────────
struct BudgetInner {
remaining: u64,
active: usize,
peak_active: usize,
}
/// Counting semaphore that limits total concurrent estimated memory usage.
///
/// Each worker acquires a cost (bytes) before starting and releases it on
/// completion. Non-deadlock guarantee: when no worker is active the next
/// acquire always succeeds regardless of cost vs. remaining budget.
pub struct MemoryBudget {
total: u64,
inner: Mutex<BudgetInner>,
condvar: Condvar,
}
impl MemoryBudget {
pub fn new(total: u64) -> Self {
Self {
total,
inner: Mutex::new(BudgetInner { remaining: total, active: 0, peak_active: 0 }),
condvar: Condvar::new(),
}
}
pub fn acquire(&self, cost: u64) {
let mut g = self.inner.lock().unwrap();
loop {
if g.active == 0 || g.remaining >= cost {
g.remaining = g.remaining.saturating_sub(cost);
g.active += 1;
g.peak_active = g.peak_active.max(g.active);
return;
}
g = self.condvar.wait(g).unwrap();
}
}
pub fn release(&self, cost: u64) {
let mut g = self.inner.lock().unwrap();
g.remaining = (g.remaining + cost).min(self.total);
g.active -= 1;
self.condvar.notify_all();
}
pub fn total(&self) -> u64 { self.total }
pub fn peak_active(&self) -> usize { self.inner.lock().unwrap().peak_active }
}
// ── Display ───────────────────────────────────────────────────────────────────
impl fmt::Display for Reporter {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.stages.is_empty() { return Ok(()); }