feat: implement NUMA-aware worker pools for merge command

Replaces the global Rayon pool with per-NUMA-node thread pools that pin worker threads to their respective nodes, leveraging Linux first-touch allocation to reduce cross-NUMA memory contention and improve cache locality. Integrates the `hwlocality` crate with a vendored build, includes graceful fallbacks for single-socket or non-Linux systems, and updates dependency constraints. Also adds installation and architecture documentation, and corrects parallelism detection in the partitioner.
This commit is contained in:
Eric Coissac
2026-06-14 23:40:09 +02:00
parent f1d76f3203
commit ea767376bd
9 changed files with 654 additions and 34 deletions
+12 -5
View File
@@ -217,6 +217,9 @@ impl KmerPartition {
}
}
let n_src_layers = unitig_paths.len();
debug!("partition {i}: de Bruijn graph build start — {n_src_layers} source layer(s)");
enum Pass1Data {
File(PathBuf),
Batch(Vec<CanonicalKmer>),
@@ -224,7 +227,9 @@ impl KmerPartition {
}
const BATCH: usize = 4096;
let n_workers = std::thread::available_parallelism().map_or(4, |n| n.get());
// Inside pool.install() this returns the per-NUMA pool size; outside
// it returns the global pool size. Both are the right value here.
let n_workers = rayon::current_num_threads().max(1);
let capacity = n_workers * 8;
let dst_filter = Arc::clone(&dst_map);
@@ -311,18 +316,20 @@ impl KmerPartition {
fs::create_dir_all(&new_layer_dir)?;
let mut uw = Layer::<()>::unitig_writer(&new_layer_dir).map_err(olm_to_sk)?;
debug!("partition {i}: unitig traversal start — {} nodes", g.len());
let mut n_unitigs = 0usize;
g.try_for_each_unitig(|unitig| {
n_unitigs += 1;
uw.write(unitig)
})?;
debug!("partition {i}: unitig writer closing");
uw.close()?;
debug!("partition {i}: unitig writer closed — dropping graph ({} nodes)", g.len());
let n = g.len();
let n_nodes = g.len();
debug!("partition {i}: unitig writer closed — dropping graph ({n_nodes} nodes)");
drop(g);
debug!("partition {i}: graph dropped — starting MPHF build ({n} unitigs)");
debug!("partition {i}: graph dropped — starting MPHF build ({n_unitigs} unitigs)");
Layer::<()>::build(&new_layer_dir, block_bits, evidence).map_err(olm_to_sk)?;
debug!("partition {i}: MPHF build done");
n
n_nodes
} else {
drop(g);
0