refactor: streamline merge pipeline and MPHF indexing
Replace mphf.find() with direct mphf.index() calls to eliminate absence checks and fallback vectors. Introduce a lightweight MphfOnly wrapper for faster index loading, and standardize k-mer iteration across merge and rebuild layers. Update IndexMeta configuration and n_new calculation to leverage MPHF cardinality, streamlining the overall merge pipeline.
This commit is contained in:
@@ -12,4 +12,4 @@ pub use layer::{Hit, Layer, LayerData};
|
||||
pub use layered_store::LayeredStore;
|
||||
pub use map::LayeredMap;
|
||||
pub use meta::{IndexMode, PartitionMeta};
|
||||
pub use mphf_layer::MphfLayer;
|
||||
pub use mphf_layer::{MphfLayer, MphfOnly};
|
||||
|
||||
@@ -129,7 +129,31 @@ impl MphfLayer {
|
||||
}
|
||||
|
||||
pub fn n(&self) -> usize { self.n }
|
||||
}
|
||||
|
||||
// ── MphfOnly ──────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Lightweight wrapper that loads only the MPHF file, without evidence or unitigs.
|
||||
///
|
||||
/// Use this when the caller guarantees that all queried kmers are in the MPHF
|
||||
/// domain (e.g. when iterating the source's own unitigs during merge).
|
||||
pub struct MphfOnly(Mphf);
|
||||
|
||||
impl MphfOnly {
|
||||
pub fn open(dir: &Path) -> OLMResult<Self> {
|
||||
let mphf: Mphf = Mphf::load_full(&dir.join(MPHF_FILE))
|
||||
.map_err(|e| OLMError::InvalidLayer(e.to_string()))?;
|
||||
Ok(Self(mphf))
|
||||
}
|
||||
|
||||
/// Return the slot for `kmer`. Only valid when `kmer` is in the MPHF domain.
|
||||
#[inline]
|
||||
pub fn index(&self, kmer: CanonicalKmer) -> usize {
|
||||
self.0.index(&kmer.raw())
|
||||
}
|
||||
}
|
||||
|
||||
impl MphfLayer {
|
||||
// ── Build helpers ─────────────────────────────────────────────────────────
|
||||
|
||||
pub fn unitig_writer(dir: &Path) -> OLMResult<UnitigFileWriter> {
|
||||
|
||||
Reference in New Issue
Block a user