refactor: streamline merge pipeline and MPHF indexing

Replace mphf.find() with direct mphf.index() calls to eliminate absence checks and fallback vectors. Introduce a lightweight MphfOnly wrapper for faster index loading, and standardize k-mer iteration across merge and rebuild layers. Update IndexMeta configuration and n_new calculation to leverage MPHF cardinality, streamlining the overall merge pipeline.
This commit is contained in:
Eric Coissac
2026-06-01 13:56:48 +02:00
parent 1e2115a1b0
commit 0350ca855b
5 changed files with 51 additions and 31 deletions
+1 -1
View File
@@ -12,4 +12,4 @@ pub use layer::{Hit, Layer, LayerData};
pub use layered_store::LayeredStore;
pub use map::LayeredMap;
pub use meta::{IndexMode, PartitionMeta};
pub use mphf_layer::MphfLayer;
pub use mphf_layer::{MphfLayer, MphfOnly};
+24
View File
@@ -129,7 +129,31 @@ impl MphfLayer {
}
pub fn n(&self) -> usize { self.n }
}
// ── MphfOnly ──────────────────────────────────────────────────────────────────
/// Lightweight wrapper that loads only the MPHF file, without evidence or unitigs.
///
/// Use this when the caller guarantees that all queried kmers are in the MPHF
/// domain (e.g. when iterating the source's own unitigs during merge).
pub struct MphfOnly(Mphf);
impl MphfOnly {
pub fn open(dir: &Path) -> OLMResult<Self> {
let mphf: Mphf = Mphf::load_full(&dir.join(MPHF_FILE))
.map_err(|e| OLMError::InvalidLayer(e.to_string()))?;
Ok(Self(mphf))
}
/// Return the slot for `kmer`. Only valid when `kmer` is in the MPHF domain.
#[inline]
pub fn index(&self, kmer: CanonicalKmer) -> usize {
self.0.index(&kmer.raw())
}
}
impl MphfLayer {
// ── Build helpers ─────────────────────────────────────────────────────────
pub fn unitig_writer(dir: &Path) -> OLMResult<UnitigFileWriter> {