From 2e69b0b7fe0744ef0281f00ca6df94ab6a476111 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Fri, 12 Jun 2026 22:09:23 +0200 Subject: [PATCH] refactor: defer SrcLayerData lookups in RawBatch Replace eager resolution of `Vec` values with an `Arc` handle passed alongside `Vec`. This shifts the lookup logic to the subsequent transform step, reducing memory overhead and enabling shared, thread-safe access to the source layer data. --- src/obikpartitionner/src/merge_layer.rs | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/obikpartitionner/src/merge_layer.rs b/src/obikpartitionner/src/merge_layer.rs index 275027d..4733cf4 100644 --- a/src/obikpartitionner/src/merge_layer.rs +++ b/src/obikpartitionner/src/merge_layer.rs @@ -416,7 +416,7 @@ impl KmerPartition { enum Pass2Data { SrcLayer((usize, usize, PathBuf)), - RawBatch((usize, usize, Vec<(CanonicalKmer, Vec)>)), + RawBatch((usize, usize, Arc, Vec)), WriteBatch(Vec<(Option, usize, usize, u32)>), } @@ -432,7 +432,7 @@ impl KmerPartition { vec![ make_flat_transform!(Pass2Data, { move |(col_offset, src_n, src_layer_dir): (usize, usize, PathBuf)| - -> Vec<(usize, usize, Vec<(CanonicalKmer, Vec)>)> + -> Vec<(usize, usize, Arc, Vec)> { let reader = match UnitigFileReader::open_sequential( &src_layer_dir.join("unitigs.bin"), @@ -444,28 +444,29 @@ impl KmerPartition { } }; let src_data = match SrcLayerData::open(&src_layer_dir, mode) { - Ok(d) => d, + Ok(d) => Arc::new(d), Err(e) => { *err_cap2.lock().unwrap() = Some(e.to_string()); return vec![]; } }; - let all_items: Vec<(CanonicalKmer, Vec)> = reader + let all_kmers: Vec = reader .iter_indexed_canonical_kmers() - .map(|(kmer, _, _)| (kmer, src_data.lookup(kmer, src_n))) + .map(|(kmer, _, _)| kmer) .collect(); - all_items + all_kmers .chunks(BATCH) - .map(|c| (col_offset, src_n, c.to_vec())) + .map(|c| (col_offset, src_n, Arc::clone(&src_data), c.to_vec())) .collect() } }, SrcLayer, RawBatch), make_transform!(Pass2Data, { - move |(col_offset, _src_n, items): (usize, usize, Vec<(CanonicalKmer, Vec)>)| + move |(col_offset, src_n, src_data, kmers): (usize, usize, Arc, Vec)| -> Vec<(Option, usize, usize, u32)> { let mut ops: Vec<(Option, usize, usize, u32)> = Vec::new(); - for (kmer, values) in items { + for kmer in kmers { + let values = src_data.lookup(kmer, src_n); if let Some((dst_layer, hit)) = dst_map_t2.query(kmer) { for (g, val) in values.into_iter().enumerate() { ops.push((Some(dst_layer), col_offset + g, hit.slot, val));