feat: add memory-aware parallel merge scheduling and CLI flags
Introduces a memory-aware scheduling strategy for parallel partition merging that replaces unbounded concurrency with a First-Fit Decreasing approach gated by a thread-safe `MemoryBudget` semaphore. An adaptive expansion factor, seeded by a sequential pilot run, dynamically caps concurrent workers to prevent hashbrown OOMs. Adds a `--budget-fraction` CLI flag to configure RAM allocation, enhances the CLI to accept multiple indexes, and introduces comprehensive partition diagnostics including memory utilization tracking, concurrency metrics, and statistical summaries with ASCII histograms. Updates documentation and navigation accordingly.
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
use std::fmt;
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
use std::sync::{Condvar, Mutex};
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use indicatif::{ProgressBar, ProgressStyle};
|
||||
@@ -309,6 +310,60 @@ fn fmt_efficiency(par: f64, n_cores: usize) -> String {
|
||||
|
||||
// ── Display ───────────────────────────────────────────────────────────────────
|
||||
|
||||
// ── MemoryBudget ──────────────────────────────────────────────────────────────
|
||||
|
||||
struct BudgetInner {
|
||||
remaining: u64,
|
||||
active: usize,
|
||||
peak_active: usize,
|
||||
}
|
||||
|
||||
/// Counting semaphore that limits total concurrent estimated memory usage.
|
||||
///
|
||||
/// Each worker acquires a cost (bytes) before starting and releases it on
|
||||
/// completion. Non-deadlock guarantee: when no worker is active the next
|
||||
/// acquire always succeeds regardless of cost vs. remaining budget.
|
||||
pub struct MemoryBudget {
|
||||
total: u64,
|
||||
inner: Mutex<BudgetInner>,
|
||||
condvar: Condvar,
|
||||
}
|
||||
|
||||
impl MemoryBudget {
|
||||
pub fn new(total: u64) -> Self {
|
||||
Self {
|
||||
total,
|
||||
inner: Mutex::new(BudgetInner { remaining: total, active: 0, peak_active: 0 }),
|
||||
condvar: Condvar::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn acquire(&self, cost: u64) {
|
||||
let mut g = self.inner.lock().unwrap();
|
||||
loop {
|
||||
if g.active == 0 || g.remaining >= cost {
|
||||
g.remaining = g.remaining.saturating_sub(cost);
|
||||
g.active += 1;
|
||||
g.peak_active = g.peak_active.max(g.active);
|
||||
return;
|
||||
}
|
||||
g = self.condvar.wait(g).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn release(&self, cost: u64) {
|
||||
let mut g = self.inner.lock().unwrap();
|
||||
g.remaining = (g.remaining + cost).min(self.total);
|
||||
g.active -= 1;
|
||||
self.condvar.notify_all();
|
||||
}
|
||||
|
||||
pub fn total(&self) -> u64 { self.total }
|
||||
pub fn peak_active(&self) -> usize { self.inner.lock().unwrap().peak_active }
|
||||
}
|
||||
|
||||
// ── Display ───────────────────────────────────────────────────────────────────
|
||||
|
||||
impl fmt::Display for Reporter {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
if self.stages.is_empty() { return Ok(()); }
|
||||
|
||||
Reference in New Issue
Block a user