refactor: defer SrcLayerData lookups in RawBatch #25
@@ -416,7 +416,7 @@ impl KmerPartition {
|
||||
|
||||
enum Pass2Data {
|
||||
SrcLayer((usize, usize, PathBuf)),
|
||||
RawBatch((usize, usize, Vec<(CanonicalKmer, Vec<u32>)>)),
|
||||
RawBatch((usize, usize, Arc<SrcLayerData>, Vec<CanonicalKmer>)),
|
||||
WriteBatch(Vec<(Option<usize>, usize, usize, u32)>),
|
||||
}
|
||||
|
||||
@@ -432,7 +432,7 @@ impl KmerPartition {
|
||||
vec![
|
||||
make_flat_transform!(Pass2Data, {
|
||||
move |(col_offset, src_n, src_layer_dir): (usize, usize, PathBuf)|
|
||||
-> Vec<(usize, usize, Vec<(CanonicalKmer, Vec<u32>)>)>
|
||||
-> Vec<(usize, usize, Arc<SrcLayerData>, Vec<CanonicalKmer>)>
|
||||
{
|
||||
let reader = match UnitigFileReader::open_sequential(
|
||||
&src_layer_dir.join("unitigs.bin"),
|
||||
@@ -444,28 +444,29 @@ impl KmerPartition {
|
||||
}
|
||||
};
|
||||
let src_data = match SrcLayerData::open(&src_layer_dir, mode) {
|
||||
Ok(d) => d,
|
||||
Ok(d) => Arc::new(d),
|
||||
Err(e) => {
|
||||
*err_cap2.lock().unwrap() = Some(e.to_string());
|
||||
return vec![];
|
||||
}
|
||||
};
|
||||
let all_items: Vec<(CanonicalKmer, Vec<u32>)> = reader
|
||||
let all_kmers: Vec<CanonicalKmer> = reader
|
||||
.iter_indexed_canonical_kmers()
|
||||
.map(|(kmer, _, _)| (kmer, src_data.lookup(kmer, src_n)))
|
||||
.map(|(kmer, _, _)| kmer)
|
||||
.collect();
|
||||
all_items
|
||||
all_kmers
|
||||
.chunks(BATCH)
|
||||
.map(|c| (col_offset, src_n, c.to_vec()))
|
||||
.map(|c| (col_offset, src_n, Arc::clone(&src_data), c.to_vec()))
|
||||
.collect()
|
||||
}
|
||||
}, SrcLayer, RawBatch),
|
||||
make_transform!(Pass2Data, {
|
||||
move |(col_offset, _src_n, items): (usize, usize, Vec<(CanonicalKmer, Vec<u32>)>)|
|
||||
move |(col_offset, src_n, src_data, kmers): (usize, usize, Arc<SrcLayerData>, Vec<CanonicalKmer>)|
|
||||
-> Vec<(Option<usize>, usize, usize, u32)>
|
||||
{
|
||||
let mut ops: Vec<(Option<usize>, usize, usize, u32)> = Vec::new();
|
||||
for (kmer, values) in items {
|
||||
for kmer in kmers {
|
||||
let values = src_data.lookup(kmer, src_n);
|
||||
if let Some((dst_layer, hit)) = dst_map_t2.query(kmer) {
|
||||
for (g, val) in values.into_iter().enumerate() {
|
||||
ops.push((Some(dst_layer), col_offset + g, hit.slot, val));
|
||||
|
||||
Reference in New Issue
Block a user