From 009a328c5818c781b0a3ce93608584c8fbafebbe Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Tue, 26 May 2026 09:43:45 +0200 Subject: [PATCH] refactor: handle kmer deduplication and layer initialization concurrently Introduce a secondary layer iteration to open `SrcLayerData` alongside the unitig reader for concurrent metadata access. This refactors the merge routine to handle kmer deduplication and per-layer data initialization simultaneously. Also corrects a typo in `rebuild_layer.rs` where `openopen_sequential` is renamed to `open_sequential`. --- src/obikpartitionner/src/dump_layer.rs | 4 ++-- src/obikpartitionner/src/merge_layer.rs | 4 ++-- src/obikpartitionner/src/rebuild_layer.rs | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/obikpartitionner/src/dump_layer.rs b/src/obikpartitionner/src/dump_layer.rs index eaf8231..6216360 100644 --- a/src/obikpartitionner/src/dump_layer.rs +++ b/src/obikpartitionner/src/dump_layer.rs @@ -44,7 +44,7 @@ impl KmerPartition { if !layer_dir.exists() { break; } l += 1; let mphf = MphfLayer::open(&layer_dir).map_err(olm_to_sk)?; - let reader = UnitigFileReader::open(&layer_dir.join("unitigs.bin"))?; + let reader = UnitigFileReader::open_sequential(&layer_dir.join("unitigs.bin"))?; let counts_dir = layer_dir.join("counts"); let presence_dir = layer_dir.join("presence"); @@ -97,7 +97,7 @@ impl KmerPartition { let layer_dir = index_dir.join(format!("layer_{layer}")); if !layer_dir.exists() { break; } let mphf = MphfLayer::open(&layer_dir).map_err(olm_to_sk)?; - let reader = UnitigFileReader::open(&layer_dir.join("unitigs.bin"))?; + let reader = UnitigFileReader::open_sequential(&layer_dir.join("unitigs.bin"))?; let counts_dir = layer_dir.join("counts"); let presence_dir = layer_dir.join("presence"); diff --git a/src/obikpartitionner/src/merge_layer.rs b/src/obikpartitionner/src/merge_layer.rs index d9d5a38..fa6b1be 100644 --- a/src/obikpartitionner/src/merge_layer.rs +++ b/src/obikpartitionner/src/merge_layer.rs @@ -195,7 +195,7 @@ impl KmerPartition { for l in 0..src_meta.n_layers { let unitigs_path = src_index_dir .join(format!("layer_{l}")).join("unitigs.bin"); - let reader = UnitigFileReader::open(&unitigs_path)?; + let reader = UnitigFileReader::open_sequential(&unitigs_path)?; for (kmer, _, _) in reader.iter_indexed_canonical_kmers() { if dst_map.query(kmer).is_none() { g.push(kmer); @@ -303,7 +303,7 @@ impl KmerPartition { for l in 0..src_meta.n_layers { let src_layer_dir = src_index_dir.join(format!("layer_{l}")); - let reader = UnitigFileReader::open(&src_layer_dir.join("unitigs.bin"))?; + let reader = UnitigFileReader::open_sequential(&src_layer_dir.join("unitigs.bin"))?; let src_data = SrcLayerData::open(&src_layer_dir, mode)?; for (kmer, _, _) in reader.iter_indexed_canonical_kmers() { diff --git a/src/obikpartitionner/src/rebuild_layer.rs b/src/obikpartitionner/src/rebuild_layer.rs index eb30cb2..29ca5d5 100644 --- a/src/obikpartitionner/src/rebuild_layer.rs +++ b/src/obikpartitionner/src/rebuild_layer.rs @@ -116,7 +116,7 @@ impl KmerPartition { let unitigs_path = src_layer_dir.join("unitigs.bin"); if !unitigs_path.exists() { continue; } - let reader = UnitigFileReader::open(&unitigs_path)?; + let reader = UnitigFileReader::open_sequential(&unitigs_path)?; let src_data = SrcLayerData::open(&src_layer_dir, mode)?; for (kmer, _, _) in reader.iter_indexed_canonical_kmers() { @@ -181,7 +181,7 @@ impl KmerPartition { let unitigs_path = src_layer_dir.join("unitigs.bin"); if !unitigs_path.exists() { continue; } - let reader = UnitigFileReader::open(&unitigs_path)?; + let reader = UnitigFileReader::open_sequential(&unitigs_path)?; let src_data = SrcLayerData::open(&src_layer_dir, mode)?; for (kmer, _, _) in reader.iter_indexed_canonical_kmers() {