refactor: defer SrcLayerData lookups in RawBatch #25

Merged
coissac merged 1 commits from push-nxrynoorswrw into main 2026-06-12 20:19:22 +00:00
Showing only changes of commit 2e69b0b7fe - Show all commits
+10 -9
View File
@@ -416,7 +416,7 @@ impl KmerPartition {
enum Pass2Data { enum Pass2Data {
SrcLayer((usize, usize, PathBuf)), SrcLayer((usize, usize, PathBuf)),
RawBatch((usize, usize, Vec<(CanonicalKmer, Vec<u32>)>)), RawBatch((usize, usize, Arc<SrcLayerData>, Vec<CanonicalKmer>)),
WriteBatch(Vec<(Option<usize>, usize, usize, u32)>), WriteBatch(Vec<(Option<usize>, usize, usize, u32)>),
} }
@@ -432,7 +432,7 @@ impl KmerPartition {
vec![ vec![
make_flat_transform!(Pass2Data, { make_flat_transform!(Pass2Data, {
move |(col_offset, src_n, src_layer_dir): (usize, usize, PathBuf)| move |(col_offset, src_n, src_layer_dir): (usize, usize, PathBuf)|
-> Vec<(usize, usize, Vec<(CanonicalKmer, Vec<u32>)>)> -> Vec<(usize, usize, Arc<SrcLayerData>, Vec<CanonicalKmer>)>
{ {
let reader = match UnitigFileReader::open_sequential( let reader = match UnitigFileReader::open_sequential(
&src_layer_dir.join("unitigs.bin"), &src_layer_dir.join("unitigs.bin"),
@@ -444,28 +444,29 @@ impl KmerPartition {
} }
}; };
let src_data = match SrcLayerData::open(&src_layer_dir, mode) { let src_data = match SrcLayerData::open(&src_layer_dir, mode) {
Ok(d) => d, Ok(d) => Arc::new(d),
Err(e) => { Err(e) => {
*err_cap2.lock().unwrap() = Some(e.to_string()); *err_cap2.lock().unwrap() = Some(e.to_string());
return vec![]; return vec![];
} }
}; };
let all_items: Vec<(CanonicalKmer, Vec<u32>)> = reader let all_kmers: Vec<CanonicalKmer> = reader
.iter_indexed_canonical_kmers() .iter_indexed_canonical_kmers()
.map(|(kmer, _, _)| (kmer, src_data.lookup(kmer, src_n))) .map(|(kmer, _, _)| kmer)
.collect(); .collect();
all_items all_kmers
.chunks(BATCH) .chunks(BATCH)
.map(|c| (col_offset, src_n, c.to_vec())) .map(|c| (col_offset, src_n, Arc::clone(&src_data), c.to_vec()))
.collect() .collect()
} }
}, SrcLayer, RawBatch), }, SrcLayer, RawBatch),
make_transform!(Pass2Data, { make_transform!(Pass2Data, {
move |(col_offset, _src_n, items): (usize, usize, Vec<(CanonicalKmer, Vec<u32>)>)| move |(col_offset, src_n, src_data, kmers): (usize, usize, Arc<SrcLayerData>, Vec<CanonicalKmer>)|
-> Vec<(Option<usize>, usize, usize, u32)> -> Vec<(Option<usize>, usize, usize, u32)>
{ {
let mut ops: Vec<(Option<usize>, usize, usize, u32)> = Vec::new(); let mut ops: Vec<(Option<usize>, usize, usize, u32)> = Vec::new();
for (kmer, values) in items { for kmer in kmers {
let values = src_data.lookup(kmer, src_n);
if let Some((dst_layer, hit)) = dst_map_t2.query(kmer) { if let Some((dst_layer, hit)) = dst_map_t2.query(kmer) {
for (g, val) in values.into_iter().enumerate() { for (g, val) in values.into_iter().enumerate() {
ops.push((Some(dst_layer), col_offset + g, hit.slot, val)); ops.push((Some(dst_layer), col_offset + g, hit.slot, val));