feat: add CPU-aware parallel worker pool for partition merging
Introduce CpuSample to measure process-level CPU efficiency and wall-clock time. Use crossbeam-channel to distribute partition merging tasks to a dynamic worker pool that scales based on CPU utilization, capped at half the available cores. Update diagnostics to track pool usage.
This commit is contained in:
@@ -212,6 +212,39 @@ fn rss_to_bytes(ru: &rusage) -> u64 { ru.ru_maxrss as u64 * 1024 }
|
||||
// Monotonically increasing counters — negative delta would be a kernel bug.
|
||||
fn delta(end: i64, start: i64) -> u64 { (end - start).max(0) as u64 }
|
||||
|
||||
// ── CpuSample ─────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Snapshot of process-wide CPU time + wall clock at a point in time.
|
||||
/// Use [`cpu_efficiency`](Self::cpu_efficiency) to measure the fraction of
|
||||
/// available cores used since the snapshot was taken.
|
||||
pub struct CpuSample {
|
||||
wall: Instant,
|
||||
user_secs: f64,
|
||||
sys_secs: f64,
|
||||
}
|
||||
|
||||
impl CpuSample {
|
||||
pub fn now() -> Self {
|
||||
let ru = get_rusage();
|
||||
Self {
|
||||
wall: Instant::now(),
|
||||
user_secs: tv_to_secs(ru.ru_utime),
|
||||
sys_secs: tv_to_secs(ru.ru_stime),
|
||||
}
|
||||
}
|
||||
|
||||
/// (user_delta + sys_delta) / (wall_delta × n_cores) since this snapshot.
|
||||
/// Returns 0.0 if less than 100 ms have elapsed (too noisy).
|
||||
pub fn cpu_efficiency(&self, n_cores: usize) -> f64 {
|
||||
let ru = get_rusage();
|
||||
let wall = self.wall.elapsed().as_secs_f64();
|
||||
if wall < 0.1 { return 0.0; }
|
||||
let cpu = (tv_to_secs(ru.ru_utime) - self.user_secs)
|
||||
+ (tv_to_secs(ru.ru_stime) - self.sys_secs);
|
||||
cpu / (wall * n_cores as f64)
|
||||
}
|
||||
}
|
||||
|
||||
// ── public API ────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Snapshot taken at the start of a pipeline stage.
|
||||
|
||||
Reference in New Issue
Block a user