6 Commits

Author SHA1 Message Date
Eric Coissac c612132763 feat: simplify worker spawning logic and update macOS build workflow
Release / create-release (push) Successful in 2m59s
Release / build-linux-x86_64 (push) Successful in 8m13s
Release / build-macos-arm64 (push) Failing after 8s
CI / build (pull_request) Successful in 3m24s
Updates the release workflow to run macOS builds inside a Docker container with explicit registry authentication and adjusted artifact paths. Bumps the obikmer crate version to 1.1.29 and adds *.log to .gitignore. Simplifies NUMA worker spawning by lowering the activation threshold from 0.95 to 0.2, replacing complex stateful tracking with a direct efficiency check, and downgrading progress logging to debug level. Includes general code formatting improvements for readability.
2026-07-01 11:40:57 +02:00
coissac 19660f8cd0 Merge pull request 'ci: update registry auth and improve adaptive worker scaling' (#51) from push-qlpywtroutvx into main
Reviewed-on: #51
2026-06-26 13:16:23 +00:00
Eric Coissac 7b07540a69 ci: update registry auth and improve adaptive worker scaling
Release / create-release (push) Successful in 2m27s
CI / build (pull_request) Successful in 3m17s
Release / build-linux-x86_64 (push) Successful in 8m3s
Release / build-macos-arm64 (push) Failing after 1s
Refactor the release workflow to use a structured container object with authenticated pulls for macOS ARM64 builds. Replace single-worker activation with dynamic upfront provisioning based on node and worker counts. Implement an absolute efficiency gain threshold for scaling checks and add early termination to improve adaptive scaling stability. Bump obikmer crate version to 1.1.27.
2026-06-26 15:13:13 +02:00
coissac 89c43e28f5 Merge pull request 'ci: update release workflow and bump obikmer to 1.1.26' (#50) from push-npttlqpomtvz into main
Reviewed-on: #50
2026-06-24 13:55:40 +00:00
Eric Coissac b9b2e42ad2 ci: update release workflow and bump obikmer to 1.1.26
Release / create-release (push) Successful in 2m32s
CI / build (pull_request) Successful in 3m47s
Release / build-linux-x86_64 (push) Successful in 8m18s
Release / build-macos-arm64 (push) Failing after 0s
Replaces the macOS ARM64 cross-compilation container with a custom internal registry image. Adds explicit steps to install the `aarch64-apple-darwin` Rust target and `jq`, and updates the build command to use `--no-default-features`. Bumps the `obikmer` package version from 1.1.25 to 1.1.26.
2026-06-24 15:55:02 +02:00
coissac ca42fdff2f Merge pull request 'ci: update macOS ARM64 build workflow and bump obikmer version' (#49) from push-lllnsqlrqrut into main
Reviewed-on: #49
2026-06-23 13:15:20 +00:00
6 changed files with 239 additions and 162 deletions
+9 -14
View File
@@ -86,17 +86,11 @@ jobs:
build-macos-arm64: build-macos-arm64:
needs: create-release needs: create-release
runs-on: ubuntu-latest runs-on: ubuntu-latest
container: joseluisq/rust-linux-darwin-builder:latest
defaults:
run:
working-directory: src
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- name: Add target and install jq - name: Login to registry
run: | run: echo "${{ secrets.REGISTRYTOKEN }}" | docker login registry.metabarcoding.org -u ${{ github.actor }} --password-stdin
rustup target add aarch64-apple-darwin
apt-get update -qq && apt-get install -y -qq jq
- name: Cache cargo registry - name: Cache cargo registry
uses: actions/cache@v4 uses: actions/cache@v4
@@ -109,11 +103,12 @@ jobs:
restore-keys: macos-arm64-cargo- restore-keys: macos-arm64-cargo-
- name: Build macOS binary - name: Build macOS binary
env: run: |
CC: aarch64-apple-darwin22.4-clang docker run --rm \
CXX: aarch64-apple-darwin22.4-clang++ -v "${{ github.workspace }}:/src" \
CARGO_TARGET_AARCH64_APPLE_DARWIN_LINKER: aarch64-apple-darwin22.4-clang -w /src/src \
run: cargo build --release --target aarch64-apple-darwin registry.metabarcoding.org/cibuilder/rustcrossosx:latest \
cargo build --release --target aarch64-apple-darwin --no-default-features
- name: Prepare and upload artifact - name: Prepare and upload artifact
env: env:
@@ -121,7 +116,7 @@ jobs:
RELEASE_ID: ${{ needs.create-release.outputs.release_id }} RELEASE_ID: ${{ needs.create-release.outputs.release_id }}
run: | run: |
mkdir -p /tmp/dist mkdir -p /tmp/dist
cp target/aarch64-apple-darwin/release/obikmer /tmp/dist/obikmer-macos-arm64 cp src/target/aarch64-apple-darwin/release/obikmer /tmp/dist/obikmer-macos-arm64
curl -s -X POST \ curl -s -X POST \
"${{ github.server_url }}/api/v1/repos/${{ github.repository }}/releases/$RELEASE_ID/assets" \ "${{ github.server_url }}/api/v1/repos/${{ github.repository }}/releases/$RELEASE_ID/assets" \
-H "Authorization: token $GITEA_TOKEN" \ -H "Authorization: token $GITEA_TOKEN" \
+1
View File
@@ -8,6 +8,7 @@ data-stress
*.pb *.pb
./**/*.json ./**/*.json
*.bin *.bin
*.log
Betula_exilis--IGA-24-33 Betula_exilis--IGA-24-33
benchmark/genomes benchmark/genomes
benchmark/simulated_data benchmark/simulated_data
+1 -1
View File
@@ -1704,7 +1704,7 @@ dependencies = [
[[package]] [[package]]
name = "obikmer" name = "obikmer"
version = "1.1.25" version = "1.1.29"
dependencies = [ dependencies = [
"clap", "clap",
"csv", "csv",
+8 -37
View File
@@ -217,13 +217,9 @@ impl PartitionRunner {
return Ok(()); return Ok(());
} }
const SPAWN_THRESHOLD: f64 = 0.95; const SPAWN_THRESHOLD: f64 = 0.2;
const TIMER_SECS: u64 = 30; const TIMER_SECS: u64 = 30;
let n_cores = std::thread::available_parallelism()
.map(|n| n.get())
.unwrap_or(1);
// ── Channels ────────────────────────────────────────────────────────── // ── Channels ──────────────────────────────────────────────────────────
let (part_tx, part_rx) = unbounded::<usize>(); let (part_tx, part_rx) = unbounded::<usize>();
let (activate_tx, activate_rx) = unbounded::<()>(); let (activate_tx, activate_rx) = unbounded::<()>();
@@ -287,10 +283,10 @@ impl PartitionRunner {
drop(event_tx); drop(event_tx);
// ── Controller ──────────────────────────────────────────────────── // ── Controller ────────────────────────────────────────────────────
activate_tx.send(()).ok(); let initial_workers = n_nodes.min(max_workers).min(n_total);
let mut n_active = 1usize; for _ in 0..initial_workers { activate_tx.send(()).ok(); }
let mut n_active = initial_workers;
let mut cpu_sample = CpuSample::now(); let mut cpu_sample = CpuSample::now();
let mut eff_at_last_spawn = 0.0f64; // 0 = no previous spawn to evaluate
let mut completed = 0usize; let mut completed = 0usize;
while completed < n_total { while completed < n_total {
@@ -307,15 +303,13 @@ impl PartitionRunner {
// Inline check: same logic as a timer tick. // Inline check: same logic as a timer tick.
maybe_activate( maybe_activate(
&activate_tx, &mut n_active, max_workers, &activate_tx, &mut n_active, max_workers,
&mut cpu_sample, &mut eff_at_last_spawn, &mut cpu_sample, SPAWN_THRESHOLD, completed, n_total,
n_cores, SPAWN_THRESHOLD, completed, n_total,
); );
} }
WorkerEvent::TimerTick => { WorkerEvent::TimerTick => {
maybe_activate( maybe_activate(
&activate_tx, &mut n_active, max_workers, &activate_tx, &mut n_active, max_workers,
&mut cpu_sample, &mut eff_at_last_spawn, &mut cpu_sample, SPAWN_THRESHOLD, completed, n_total,
n_cores, SPAWN_THRESHOLD, completed, n_total,
); );
} }
} }
@@ -346,38 +340,15 @@ fn maybe_activate(
n_active: &mut usize, n_active: &mut usize,
max_workers: usize, max_workers: usize,
cpu_sample: &mut CpuSample, cpu_sample: &mut CpuSample,
eff_at_last_spawn: &mut f64,
n_cores: usize,
threshold: f64, threshold: f64,
completed: usize, completed: usize,
n_total: usize, n_total: usize,
) { ) {
if *n_active >= max_workers || completed >= n_total { return; } if *n_active >= max_workers || completed >= n_total { return; }
let eff = cpu_sample.cpu_efficiency(n_cores); if cpu_sample.do_i_activate(threshold) {
if eff >= threshold { return; } // CPU already saturated
// Check that the previous activation was beneficial enough.
// Going from k-1 → k workers, the minimum acceptable speedup is (k-1+0.2)/(k-1).
// For the very first extra worker (n_active == 1, no previous spawn), skip this
// check: eff_at_last_spawn == 0 acts as the sentinel.
let last_spawn_was_beneficial = if *eff_at_last_spawn < 1e-9 {
true // first additional worker: no prior data to evaluate
} else {
let k_before = (*n_active - 1) as f64;
let min_speedup = (k_before + 0.2) / k_before;
let actual_speedup = eff / *eff_at_last_spawn;
actual_speedup >= min_speedup
};
if last_spawn_was_beneficial {
activate_tx.send(()).ok(); activate_tx.send(()).ok();
*eff_at_last_spawn = eff;
*n_active += 1; *n_active += 1;
*cpu_sample = CpuSample::now(); debug!("activated worker {}/{}", n_active, max_workers);
debug!(
"activated worker {}/{} — efficiency {:.0}%",
n_active, max_workers, eff * 100.0,
);
} }
} }
+1 -1
View File
@@ -1,6 +1,6 @@
[package] [package]
name = "obikmer" name = "obikmer"
version = "1.1.25" version = "1.1.29"
edition = "2024" edition = "2024"
[[bin]] [[bin]]
+157 -47
View File
@@ -4,7 +4,7 @@ use std::sync::{Condvar, Mutex};
use std::time::{Duration, Instant}; use std::time::{Duration, Instant};
use indicatif::{ProgressBar, ProgressStyle}; use indicatif::{ProgressBar, ProgressStyle};
use tracing::{info, warn}; use tracing::{debug, info, warn};
const BRAILLE: &[&str] = &["", "", "", "", "", "", "", "", "", ""]; const BRAILLE: &[&str] = &["", "", "", "", "", "", "", "", "", ""];
@@ -31,7 +31,8 @@ impl TracedBar {
let pct10 = (pos * 10) / self.total; // 0..=10 let pct10 = (pos * 10) / self.total; // 0..=10
let last = self.last_pct.load(Ordering::Relaxed); let last = self.last_pct.load(Ordering::Relaxed);
if pct10 > last if pct10 > last
&& self.last_pct && self
.last_pct
.compare_exchange(last, pct10, Ordering::Relaxed, Ordering::Relaxed) .compare_exchange(last, pct10, Ordering::Relaxed, Ordering::Relaxed)
.is_ok() .is_ok()
{ {
@@ -49,14 +50,14 @@ impl TracedBar {
let msg = msg.into(); let msg = msg.into();
if self.pb.is_hidden() { if self.pb.is_hidden() {
if self.total > 0 { if self.total > 0 {
// bounded bar: always log (already rate-limited by 10% threshold in inc) debug!(stage = %self.label, "{msg}");
info!(stage = %self.label, "{msg}");
} else { } else {
// spinner: throttle to ~10 s // spinner: throttle to ~10 s
let now_ms = self.start.elapsed().as_millis() as u64; let now_ms = self.start.elapsed().as_millis() as u64;
let last = self.last_log_ms.load(Ordering::Relaxed); let last = self.last_log_ms.load(Ordering::Relaxed);
if now_ms >= last + 10_000 if now_ms >= last + 10_000
&& self.last_log_ms && self
.last_log_ms
.compare_exchange(last, now_ms, Ordering::Relaxed, Ordering::Relaxed) .compare_exchange(last, now_ms, Ordering::Relaxed, Ordering::Relaxed)
.is_ok() .is_ok()
{ {
@@ -83,8 +84,13 @@ pub fn spinner(label: &str) -> TracedBar {
); );
pb.enable_steady_tick(Duration::from_millis(100)); pb.enable_steady_tick(Duration::from_millis(100));
TracedBar { TracedBar {
pb, label: label.to_string(), unit: String::new(), total: 0, pb,
start: Instant::now(), last_pct: AtomicU64::new(0), last_log_ms: AtomicU64::new(0), label: label.to_string(),
unit: String::new(),
total: 0,
start: Instant::now(),
last_pct: AtomicU64::new(0),
last_log_ms: AtomicU64::new(0),
} }
} }
@@ -101,8 +107,13 @@ pub fn progress_bar(label: &str, n: u64, unit: &str) -> TracedBar {
); );
pb.enable_steady_tick(Duration::from_millis(100)); pb.enable_steady_tick(Duration::from_millis(100));
TracedBar { TracedBar {
pb, label: label.to_string(), unit: unit.to_string(), total: n, pb,
start: Instant::now(), last_pct: AtomicU64::new(0), last_log_ms: AtomicU64::new(0), label: label.to_string(),
unit: unit.to_string(),
total: n,
start: Instant::now(),
last_pct: AtomicU64::new(0),
last_log_ms: AtomicU64::new(0),
} }
} }
@@ -204,13 +215,19 @@ fn tv_to_secs(tv: timeval) -> f64 {
} }
#[cfg(target_os = "macos")] #[cfg(target_os = "macos")]
fn rss_to_bytes(ru: &rusage) -> u64 { ru.ru_maxrss as u64 } fn rss_to_bytes(ru: &rusage) -> u64 {
ru.ru_maxrss as u64
}
#[cfg(not(target_os = "macos"))] #[cfg(not(target_os = "macos"))]
fn rss_to_bytes(ru: &rusage) -> u64 { ru.ru_maxrss as u64 * 1024 } fn rss_to_bytes(ru: &rusage) -> u64 {
ru.ru_maxrss as u64 * 1024
}
// Monotonically increasing counters — negative delta would be a kernel bug. // Monotonically increasing counters — negative delta would be a kernel bug.
fn delta(end: i64, start: i64) -> u64 { (end - start).max(0) as u64 } fn delta(end: i64, start: i64) -> u64 {
(end - start).max(0) as u64
}
// ── CpuSample ───────────────────────────────────────────────────────────────── // ── CpuSample ─────────────────────────────────────────────────────────────────
@@ -221,6 +238,7 @@ pub struct CpuSample {
wall: Instant, wall: Instant,
user_secs: f64, user_secs: f64,
sys_secs: f64, sys_secs: f64,
previous: f64,
} }
impl CpuSample { impl CpuSample {
@@ -230,6 +248,7 @@ impl CpuSample {
wall: Instant::now(), wall: Instant::now(),
user_secs: tv_to_secs(ru.ru_utime), user_secs: tv_to_secs(ru.ru_utime),
sys_secs: tv_to_secs(ru.ru_stime), sys_secs: tv_to_secs(ru.ru_stime),
previous: 0.0,
} }
} }
@@ -238,11 +257,38 @@ impl CpuSample {
pub fn cpu_efficiency(&self, n_cores: usize) -> f64 { pub fn cpu_efficiency(&self, n_cores: usize) -> f64 {
let ru = get_rusage(); let ru = get_rusage();
let wall = self.wall.elapsed().as_secs_f64(); let wall = self.wall.elapsed().as_secs_f64();
if wall < 0.1 { return 0.0; } if wall < 0.1 {
let cpu = (tv_to_secs(ru.ru_utime) - self.user_secs) return 0.0;
+ (tv_to_secs(ru.ru_stime) - self.sys_secs); }
let cpu =
(tv_to_secs(ru.ru_utime) - self.user_secs) + (tv_to_secs(ru.ru_stime) - self.sys_secs);
cpu / (wall * n_cores as f64) cpu / (wall * n_cores as f64)
} }
pub fn do_i_activate(&mut self, threshold: f64) -> bool {
let n = CpuSample::now();
let delta_ru = (n.user_secs - self.user_secs) + (n.sys_secs - self.sys_secs);
let delta_wall = self.wall.elapsed().as_secs_f64();
let efficiency = delta_ru / delta_wall;
let activate = 0f64.max(efficiency - self.previous) >= threshold;
if activate {
debug!(
"Do I activate : {} -> {} = {} Activate: {}",
self.previous,
efficiency,
0f64.max(efficiency - self.previous),
activate
);
self.previous = efficiency;
self.user_secs = n.user_secs;
self.sys_secs = n.sys_secs;
self.wall = n.wall;
}
activate
}
} }
// ── public API ──────────────────────────────────────────────────────────────── // ── public API ────────────────────────────────────────────────────────────────
@@ -259,7 +305,11 @@ impl Stage {
pub fn start(label: impl Into<String>) -> Self { pub fn start(label: impl Into<String>) -> Self {
let label = label.into(); let label = label.into();
info!(stage = %label, "started"); info!(stage = %label, "started");
Self { label, wall: Instant::now(), ru: get_rusage() } Self {
label,
wall: Instant::now(),
ru: get_rusage(),
}
} }
pub fn stop(self) -> StageStats { pub fn stop(self) -> StageStats {
@@ -318,8 +368,11 @@ pub struct StageStats {
impl StageStats { impl StageStats {
/// (user + sys) / wall — effective thread count utilisation. /// (user + sys) / wall — effective thread count utilisation.
pub fn parallelism(&self) -> f64 { pub fn parallelism(&self) -> f64 {
if self.wall_secs > 1e-9 { (self.user_secs + self.sys_secs) / self.wall_secs } if self.wall_secs > 1e-9 {
else { 0.0 } (self.user_secs + self.sys_secs) / self.wall_secs
} else {
0.0
}
} }
/// parallelism / n_cores — fraction of available CPU power used (0..1+). /// parallelism / n_cores — fraction of available CPU power used (0..1+).
@@ -335,11 +388,19 @@ pub struct Reporter {
} }
impl Reporter { impl Reporter {
pub fn new() -> Self { Self::default() } pub fn new() -> Self {
pub fn push(&mut self, stats: StageStats) { self.stages.push(stats); } Self::default()
pub fn stages(&self) -> &[StageStats] { &self.stages } }
pub fn push(&mut self, stats: StageStats) {
self.stages.push(stats);
}
pub fn stages(&self) -> &[StageStats] {
&self.stages
}
/// Print the summary to stderr. /// Print the summary to stderr.
pub fn print(&self) { eprint!("{self}"); } pub fn print(&self) {
eprint!("{self}");
}
} }
// ── diagnosis ───────────────────────────────────────────────────────────────── // ── diagnosis ─────────────────────────────────────────────────────────────────
@@ -387,26 +448,43 @@ fn diagnose(s: &StageStats, n_cores: usize) -> Diagnosis {
)), )),
}; };
} }
Diagnosis { tag: "", detail: None } Diagnosis {
tag: "",
detail: None,
}
} }
// ── display helpers ─────────────────────────────────────────────────────────── // ── display helpers ───────────────────────────────────────────────────────────
fn fmt_secs(s: f64) -> String { fn fmt_secs(s: f64) -> String {
if s >= 100.0 { format!("{:.0}s", s) } if s >= 100.0 {
else if s >= 10.0 { format!("{:.1}s", s) } format!("{:.0}s", s)
else if s >= 1.0 { format!("{:.2}s", s) } } else if s >= 10.0 {
else { format!("{:.0}ms", s * 1000.0) } format!("{:.1}s", s)
} else if s >= 1.0 {
format!("{:.2}s", s)
} else {
format!("{:.0}ms", s * 1000.0)
}
} }
fn fmt_bytes(b: u64) -> String { fn fmt_bytes(b: u64) -> String {
if b >= 1 << 30 { format!("{:.1} GB", b as f64 / (1u64 << 30) as f64) } if b >= 1 << 30 {
else if b >= 1 << 20 { format!("{:.0} MB", b as f64 / (1u64 << 20) as f64) } format!("{:.1} GB", b as f64 / (1u64 << 30) as f64)
else { format!("{:.0} KB", b as f64 / 1024.0) } } else if b >= 1 << 20 {
format!("{:.0} MB", b as f64 / (1u64 << 20) as f64)
} else {
format!("{:.0} KB", b as f64 / 1024.0)
}
} }
fn fmt_efficiency(par: f64, n_cores: usize) -> String { fn fmt_efficiency(par: f64, n_cores: usize) -> String {
format!("{:.1}×/{} ({:.0}%)", par, n_cores, par / n_cores as f64 * 100.0) format!(
"{:.1}×/{} ({:.0}%)",
par,
n_cores,
par / n_cores as f64 * 100.0
)
} }
// ── Display ─────────────────────────────────────────────────────────────────── // ── Display ───────────────────────────────────────────────────────────────────
@@ -434,7 +512,11 @@ impl MemoryBudget {
pub fn new(total: u64) -> Self { pub fn new(total: u64) -> Self {
Self { Self {
total, total,
inner: Mutex::new(BudgetInner { remaining: total, active: 0, peak_active: 0 }), inner: Mutex::new(BudgetInner {
remaining: total,
active: 0,
peak_active: 0,
}),
condvar: Condvar::new(), condvar: Condvar::new(),
} }
} }
@@ -459,24 +541,40 @@ impl MemoryBudget {
self.condvar.notify_all(); self.condvar.notify_all();
} }
pub fn total(&self) -> u64 { self.total } pub fn total(&self) -> u64 {
pub fn active(&self) -> usize { self.inner.lock().unwrap().active } self.total
pub fn remaining(&self) -> u64 { self.inner.lock().unwrap().remaining } }
pub fn peak_active(&self) -> usize { self.inner.lock().unwrap().peak_active } pub fn active(&self) -> usize {
self.inner.lock().unwrap().active
}
pub fn remaining(&self) -> u64 {
self.inner.lock().unwrap().remaining
}
pub fn peak_active(&self) -> usize {
self.inner.lock().unwrap().peak_active
}
} }
// ── Display ─────────────────────────────────────────────────────────────────── // ── Display ───────────────────────────────────────────────────────────────────
impl fmt::Display for Reporter { impl fmt::Display for Reporter {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.stages.is_empty() { return Ok(()); } if self.stages.is_empty() {
return Ok(());
}
let n_cores = std::thread::available_parallelism() let n_cores = std::thread::available_parallelism()
.map(|n| n.get()) .map(|n| n.get())
.unwrap_or(1); .unwrap_or(1);
// column widths // column widths
let nw = self.stages.iter().map(|s| s.label.len()).max().unwrap_or(5).max(5); let nw = self
.stages
.iter()
.map(|s| s.label.len())
.max()
.unwrap_or(5)
.max(5);
// efficiency col: worst-case width for this run's n_cores value // efficiency col: worst-case width for this run's n_cores value
let ew = format!("{:.1}×/{} (100%)", 99.9f64, n_cores).len(); let ew = format!("{:.1}×/{} (100%)", 99.9f64, n_cores).len();
@@ -484,18 +582,21 @@ impl fmt::Display for Reporter {
let sep = "".repeat(sep_w); let sep = "".repeat(sep_w);
// header // header
writeln!(f, "{:<nw$} {:>7} {:>ew$} {:>8} status", writeln!(
"stage", "wall", "efficiency", "peak RSS")?; f,
"{:<nw$} {:>7} {:>ew$} {:>8} status",
"stage", "wall", "efficiency", "peak RSS"
)?;
writeln!(f, "{sep}")?; writeln!(f, "{sep}")?;
// compute all diagnoses up front (needed for both table and footnotes) // compute all diagnoses up front (needed for both table and footnotes)
let diagnoses: Vec<Diagnosis> = self.stages.iter() let diagnoses: Vec<Diagnosis> = self.stages.iter().map(|s| diagnose(s, n_cores)).collect();
.map(|s| diagnose(s, n_cores))
.collect();
// per-stage rows // per-stage rows
for (s, d) in self.stages.iter().zip(diagnoses.iter()) { for (s, d) in self.stages.iter().zip(diagnoses.iter()) {
writeln!(f, "{:<nw$} {:>7} {:>ew$} {:>8} {}", writeln!(
f,
"{:<nw$} {:>7} {:>ew$} {:>8} {}",
s.label, s.label,
fmt_secs(s.wall_secs), fmt_secs(s.wall_secs),
fmt_efficiency(s.parallelism(), n_cores), fmt_efficiency(s.parallelism(), n_cores),
@@ -508,11 +609,18 @@ impl fmt::Display for Reporter {
let tw = self.stages.iter().map(|s| s.wall_secs).sum::<f64>(); let tw = self.stages.iter().map(|s| s.wall_secs).sum::<f64>();
let tu = self.stages.iter().map(|s| s.user_secs).sum::<f64>(); let tu = self.stages.iter().map(|s| s.user_secs).sum::<f64>();
let ts = self.stages.iter().map(|s| s.sys_secs).sum::<f64>(); let ts = self.stages.iter().map(|s| s.sys_secs).sum::<f64>();
let trss = self.stages.iter().map(|s| s.max_rss_bytes).max().unwrap_or(0); let trss = self
.stages
.iter()
.map(|s| s.max_rss_bytes)
.max()
.unwrap_or(0);
let tpar = if tw > 1e-9 { (tu + ts) / tw } else { 0.0 }; let tpar = if tw > 1e-9 { (tu + ts) / tw } else { 0.0 };
writeln!(f, "{sep}")?; writeln!(f, "{sep}")?;
writeln!(f, "{:<nw$} {:>7} {:>ew$} {:>8}", writeln!(
f,
"{:<nw$} {:>7} {:>ew$} {:>8}",
"TOTAL", "TOTAL",
fmt_secs(tw), fmt_secs(tw),
fmt_efficiency(tpar, n_cores), fmt_efficiency(tpar, n_cores),
@@ -520,7 +628,9 @@ impl fmt::Display for Reporter {
)?; )?;
// bottleneck footnotes (only if at least one anomaly detected) // bottleneck footnotes (only if at least one anomaly detected)
let bottlenecks: Vec<(&str, &str)> = self.stages.iter() let bottlenecks: Vec<(&str, &str)> = self
.stages
.iter()
.zip(diagnoses.iter()) .zip(diagnoses.iter())
.filter_map(|(s, d)| d.detail.as_deref().map(|det| (s.label.as_str(), det))) .filter_map(|(s, d)| d.detail.as_deref().map(|det| (s.label.as_str(), det)))
.collect(); .collect();