refactor: defer SrcLayerData lookups in RawBatch #25
@@ -416,7 +416,7 @@ impl KmerPartition {
|
|||||||
|
|
||||||
enum Pass2Data {
|
enum Pass2Data {
|
||||||
SrcLayer((usize, usize, PathBuf)),
|
SrcLayer((usize, usize, PathBuf)),
|
||||||
RawBatch((usize, usize, Vec<(CanonicalKmer, Vec<u32>)>)),
|
RawBatch((usize, usize, Arc<SrcLayerData>, Vec<CanonicalKmer>)),
|
||||||
WriteBatch(Vec<(Option<usize>, usize, usize, u32)>),
|
WriteBatch(Vec<(Option<usize>, usize, usize, u32)>),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -432,7 +432,7 @@ impl KmerPartition {
|
|||||||
vec![
|
vec![
|
||||||
make_flat_transform!(Pass2Data, {
|
make_flat_transform!(Pass2Data, {
|
||||||
move |(col_offset, src_n, src_layer_dir): (usize, usize, PathBuf)|
|
move |(col_offset, src_n, src_layer_dir): (usize, usize, PathBuf)|
|
||||||
-> Vec<(usize, usize, Vec<(CanonicalKmer, Vec<u32>)>)>
|
-> Vec<(usize, usize, Arc<SrcLayerData>, Vec<CanonicalKmer>)>
|
||||||
{
|
{
|
||||||
let reader = match UnitigFileReader::open_sequential(
|
let reader = match UnitigFileReader::open_sequential(
|
||||||
&src_layer_dir.join("unitigs.bin"),
|
&src_layer_dir.join("unitigs.bin"),
|
||||||
@@ -444,28 +444,29 @@ impl KmerPartition {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
let src_data = match SrcLayerData::open(&src_layer_dir, mode) {
|
let src_data = match SrcLayerData::open(&src_layer_dir, mode) {
|
||||||
Ok(d) => d,
|
Ok(d) => Arc::new(d),
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
*err_cap2.lock().unwrap() = Some(e.to_string());
|
*err_cap2.lock().unwrap() = Some(e.to_string());
|
||||||
return vec![];
|
return vec![];
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
let all_items: Vec<(CanonicalKmer, Vec<u32>)> = reader
|
let all_kmers: Vec<CanonicalKmer> = reader
|
||||||
.iter_indexed_canonical_kmers()
|
.iter_indexed_canonical_kmers()
|
||||||
.map(|(kmer, _, _)| (kmer, src_data.lookup(kmer, src_n)))
|
.map(|(kmer, _, _)| kmer)
|
||||||
.collect();
|
.collect();
|
||||||
all_items
|
all_kmers
|
||||||
.chunks(BATCH)
|
.chunks(BATCH)
|
||||||
.map(|c| (col_offset, src_n, c.to_vec()))
|
.map(|c| (col_offset, src_n, Arc::clone(&src_data), c.to_vec()))
|
||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
}, SrcLayer, RawBatch),
|
}, SrcLayer, RawBatch),
|
||||||
make_transform!(Pass2Data, {
|
make_transform!(Pass2Data, {
|
||||||
move |(col_offset, _src_n, items): (usize, usize, Vec<(CanonicalKmer, Vec<u32>)>)|
|
move |(col_offset, src_n, src_data, kmers): (usize, usize, Arc<SrcLayerData>, Vec<CanonicalKmer>)|
|
||||||
-> Vec<(Option<usize>, usize, usize, u32)>
|
-> Vec<(Option<usize>, usize, usize, u32)>
|
||||||
{
|
{
|
||||||
let mut ops: Vec<(Option<usize>, usize, usize, u32)> = Vec::new();
|
let mut ops: Vec<(Option<usize>, usize, usize, u32)> = Vec::new();
|
||||||
for (kmer, values) in items {
|
for kmer in kmers {
|
||||||
|
let values = src_data.lookup(kmer, src_n);
|
||||||
if let Some((dst_layer, hit)) = dst_map_t2.query(kmer) {
|
if let Some((dst_layer, hit)) = dst_map_t2.query(kmer) {
|
||||||
for (g, val) in values.into_iter().enumerate() {
|
for (g, val) in values.into_iter().enumerate() {
|
||||||
ops.push((Some(dst_layer), col_offset + g, hit.slot, val));
|
ops.push((Some(dst_layer), col_offset + g, hit.slot, val));
|
||||||
|
|||||||
Reference in New Issue
Block a user