refactor: handle kmer deduplication and layer initialization concurrently

Introduce a secondary layer iteration to open `SrcLayerData` alongside the unitig reader for concurrent metadata access. This refactors the merge routine to handle kmer deduplication and per-layer data initialization simultaneously. Also corrects a typo in `rebuild_layer.rs` where `openopen_sequential` is renamed to `open_sequential`.
This commit is contained in:
Eric Coissac
2026-05-26 09:43:45 +02:00
parent 9d46400898
commit 009a328c58
3 changed files with 6 additions and 6 deletions
+2 -2
View File
@@ -44,7 +44,7 @@ impl KmerPartition {
if !layer_dir.exists() { break; } if !layer_dir.exists() { break; }
l += 1; l += 1;
let mphf = MphfLayer::open(&layer_dir).map_err(olm_to_sk)?; let mphf = MphfLayer::open(&layer_dir).map_err(olm_to_sk)?;
let reader = UnitigFileReader::open(&layer_dir.join("unitigs.bin"))?; let reader = UnitigFileReader::open_sequential(&layer_dir.join("unitigs.bin"))?;
let counts_dir = layer_dir.join("counts"); let counts_dir = layer_dir.join("counts");
let presence_dir = layer_dir.join("presence"); let presence_dir = layer_dir.join("presence");
@@ -97,7 +97,7 @@ impl KmerPartition {
let layer_dir = index_dir.join(format!("layer_{layer}")); let layer_dir = index_dir.join(format!("layer_{layer}"));
if !layer_dir.exists() { break; } if !layer_dir.exists() { break; }
let mphf = MphfLayer::open(&layer_dir).map_err(olm_to_sk)?; let mphf = MphfLayer::open(&layer_dir).map_err(olm_to_sk)?;
let reader = UnitigFileReader::open(&layer_dir.join("unitigs.bin"))?; let reader = UnitigFileReader::open_sequential(&layer_dir.join("unitigs.bin"))?;
let counts_dir = layer_dir.join("counts"); let counts_dir = layer_dir.join("counts");
let presence_dir = layer_dir.join("presence"); let presence_dir = layer_dir.join("presence");
+2 -2
View File
@@ -195,7 +195,7 @@ impl KmerPartition {
for l in 0..src_meta.n_layers { for l in 0..src_meta.n_layers {
let unitigs_path = src_index_dir let unitigs_path = src_index_dir
.join(format!("layer_{l}")).join("unitigs.bin"); .join(format!("layer_{l}")).join("unitigs.bin");
let reader = UnitigFileReader::open(&unitigs_path)?; let reader = UnitigFileReader::open_sequential(&unitigs_path)?;
for (kmer, _, _) in reader.iter_indexed_canonical_kmers() { for (kmer, _, _) in reader.iter_indexed_canonical_kmers() {
if dst_map.query(kmer).is_none() { if dst_map.query(kmer).is_none() {
g.push(kmer); g.push(kmer);
@@ -303,7 +303,7 @@ impl KmerPartition {
for l in 0..src_meta.n_layers { for l in 0..src_meta.n_layers {
let src_layer_dir = src_index_dir.join(format!("layer_{l}")); let src_layer_dir = src_index_dir.join(format!("layer_{l}"));
let reader = UnitigFileReader::open(&src_layer_dir.join("unitigs.bin"))?; let reader = UnitigFileReader::open_sequential(&src_layer_dir.join("unitigs.bin"))?;
let src_data = SrcLayerData::open(&src_layer_dir, mode)?; let src_data = SrcLayerData::open(&src_layer_dir, mode)?;
for (kmer, _, _) in reader.iter_indexed_canonical_kmers() { for (kmer, _, _) in reader.iter_indexed_canonical_kmers() {
+2 -2
View File
@@ -116,7 +116,7 @@ impl KmerPartition {
let unitigs_path = src_layer_dir.join("unitigs.bin"); let unitigs_path = src_layer_dir.join("unitigs.bin");
if !unitigs_path.exists() { continue; } if !unitigs_path.exists() { continue; }
let reader = UnitigFileReader::open(&unitigs_path)?; let reader = UnitigFileReader::open_sequential(&unitigs_path)?;
let src_data = SrcLayerData::open(&src_layer_dir, mode)?; let src_data = SrcLayerData::open(&src_layer_dir, mode)?;
for (kmer, _, _) in reader.iter_indexed_canonical_kmers() { for (kmer, _, _) in reader.iter_indexed_canonical_kmers() {
@@ -181,7 +181,7 @@ impl KmerPartition {
let unitigs_path = src_layer_dir.join("unitigs.bin"); let unitigs_path = src_layer_dir.join("unitigs.bin");
if !unitigs_path.exists() { continue; } if !unitigs_path.exists() { continue; }
let reader = UnitigFileReader::open(&unitigs_path)?; let reader = UnitigFileReader::open_sequential(&unitigs_path)?;
let src_data = SrcLayerData::open(&src_layer_dir, mode)?; let src_data = SrcLayerData::open(&src_layer_dir, mode)?;
for (kmer, _, _) in reader.iter_indexed_canonical_kmers() { for (kmer, _, _) in reader.iter_indexed_canonical_kmers() {