feat: add CPU-aware parallel worker pool for partition merging

Introduce CpuSample to measure process-level CPU efficiency and wall-clock time. Use crossbeam-channel to distribute partition merging tasks to a dynamic worker pool that scales based on CPU utilization, capped at half the available cores. Update diagnostics to track pool usage.
This commit is contained in:
Eric Coissac
2026-06-13 11:32:12 +02:00
parent fb8c6e427c
commit bc14346f5f
4 changed files with 123 additions and 34 deletions
+33
View File
@@ -212,6 +212,39 @@ fn rss_to_bytes(ru: &rusage) -> u64 { ru.ru_maxrss as u64 * 1024 }
// Monotonically increasing counters — negative delta would be a kernel bug.
fn delta(end: i64, start: i64) -> u64 { (end - start).max(0) as u64 }
// ── CpuSample ─────────────────────────────────────────────────────────────────
/// Snapshot of process-wide CPU time + wall clock at a point in time.
/// Use [`cpu_efficiency`](Self::cpu_efficiency) to measure the fraction of
/// available cores used since the snapshot was taken.
pub struct CpuSample {
wall: Instant,
user_secs: f64,
sys_secs: f64,
}
impl CpuSample {
pub fn now() -> Self {
let ru = get_rusage();
Self {
wall: Instant::now(),
user_secs: tv_to_secs(ru.ru_utime),
sys_secs: tv_to_secs(ru.ru_stime),
}
}
/// (user_delta + sys_delta) / (wall_delta × n_cores) since this snapshot.
/// Returns 0.0 if less than 100 ms have elapsed (too noisy).
pub fn cpu_efficiency(&self, n_cores: usize) -> f64 {
let ru = get_rusage();
let wall = self.wall.elapsed().as_secs_f64();
if wall < 0.1 { return 0.0; }
let cpu = (tv_to_secs(ru.ru_utime) - self.user_secs)
+ (tv_to_secs(ru.ru_stime) - self.sys_secs);
cpu / (wall * n_cores as f64)
}
}
// ── public API ────────────────────────────────────────────────────────────────
/// Snapshot taken at the start of a pipeline stage.