refactor: Enforce Rayon parallelism and fix merge_layer concurrency

Updated memory guidelines and feedback docs to explicitly classify intra-partition phases as parallel, correcting prior assumptions of sequential execution. Refactored merge_layer.rs to wrap column builders in Arc<Mutex<ColBuilder>> and use Arc::try_unwrap for safe concurrent access, eliminating race conditions and preventing double-closes during pass2.
This commit is contained in:
Eric Coissac
2026-06-13 13:18:16 +02:00
parent 6d85387077
commit 547cb72d76
3 changed files with 39 additions and 14 deletions
+1
View File
@@ -2,3 +2,4 @@
- [Project domain](project_domain.md) — obikmer est pour la génomique (génomes individuels), pas la métagénomique
- [No architectural decisions without authorization](feedback_architectural_decisions.md) — toute décision architecturale (mémoire, algo, structure) requiert l'accord explicite de l'utilisateur avant toute action
- [Phases intra-partition parallèles](feedback_phases_parallelism.md) — graph build, compute_degrees, unitig traversal, MPHF utilisent Rayon — ne jamais les appeler "séquentielles"
+12
View File
@@ -0,0 +1,12 @@
---
name: feedback-phases-parallelism
description: Les phases intra-partition (graph build, compute_degrees, unitig traversal, MPHF) utilisent toutes Rayon — elles ne sont PAS séquentielles
metadata:
type: feedback
---
Ne jamais qualifier les phases intra-partition de "séquentielles". Chaque phase (graph build, compute_degrees, unitig traversal, MPHF build) utilise Rayon en interne et s'exécute en parallèle sur plusieurs cœurs.
**Why:** L'utilisateur a corrigé ce point plusieurs fois. Le décrire comme "séquentiel" est une erreur factuelle qui fausse l'analyse de performance.
**How to apply:** Quand on analyse l'efficacité CPU ou les 25% manquants, chercher la cause dans le déséquilibre de charge entre partitions, la contention Rayon entre workers, ou la latence inter-partitions — pas dans une prétendue sérialisation des phases.
+26 -14
View File
@@ -453,8 +453,18 @@ impl KmerPartition {
WriteBatch(Vec<(Option<usize>, usize, usize, u32)>),
}
let builders = Arc::new(Mutex::new((exist_builders, new_src_builders)));
let builders_sink = Arc::clone(&builders);
let exist_locked: Vec<Vec<Arc<Mutex<ColBuilder>>>> = exist_builders
.into_iter()
.map(|layer| layer.into_iter().map(|b| Arc::new(Mutex::new(b))).collect())
.collect();
let new_locked: Vec<Arc<Mutex<ColBuilder>>> = new_src_builders
.into_iter()
.map(|b| Arc::new(Mutex::new(b)))
.collect();
let exist_sink: Vec<Vec<Arc<Mutex<ColBuilder>>>> = exist_locked.iter()
.map(|layer| layer.iter().map(Arc::clone).collect())
.collect();
let new_sink: Vec<Arc<Mutex<ColBuilder>>> = new_locked.iter().map(Arc::clone).collect();
let dst_map_t2 = Arc::clone(&dst_map);
let new_mphf_t2 = new_mphf.clone();
let pass2_err: Arc<Mutex<Option<String>>> = Arc::new(Mutex::new(None));
@@ -533,11 +543,10 @@ impl KmerPartition {
],
make_sink!(Pass2Data, {
move |ops: Vec<(Option<usize>, usize, usize, u32)>| {
let mut guard = builders_sink.lock().unwrap();
for (layer_opt, col, slot, val) in ops {
match layer_opt {
Some(l) => guard.0[l][col].set_val(slot, val),
None => guard.1[col].set_val(slot, val),
Some(l) => exist_sink[l][col].lock().unwrap().set_val(slot, val),
None => new_sink[col].lock().unwrap().set_val(slot, val),
}
}
}
@@ -555,17 +564,16 @@ impl KmerPartition {
return Err(SKError::InvalidData { context: "merge pass2", detail: msg });
}
let (exist_builders, new_src_builders) = Arc::try_unwrap(builders)
.unwrap_or_else(|_| panic!("pass2: builders not uniquely owned after pipeline"))
.into_inner()
.unwrap_or_else(|e| e.into_inner());
let t_close = std::time::Instant::now();
// ── Close builders and update metadata ────────────────────────────────
for (l, builders) in exist_builders.into_iter().enumerate() {
for (l, builders) in exist_locked.into_iter().enumerate() {
let layer_dir = dst_index_dir.join(format!("layer_{l}"));
for b in builders {
b.close()?;
Arc::try_unwrap(b)
.unwrap_or_else(|_| panic!("pass2: exist_builder[{l}] not uniquely owned"))
.into_inner()
.unwrap_or_else(|e| e.into_inner())
.close()?;
}
let n = dst_map.layer(l).n();
let data_dir = match mode {
@@ -575,8 +583,12 @@ impl KmerPartition {
write_matrix_meta(&data_dir, n, n_dst_genomes + n_src_total).map_err(SKError::Io)?;
}
for b in new_src_builders {
b.close()?;
for b in new_locked {
Arc::try_unwrap(b)
.unwrap_or_else(|_| panic!("pass2: new_builder not uniquely owned"))
.into_inner()
.unwrap_or_else(|e| e.into_inner())
.close()?;
}
if any_new {
let data_dir = match mode {