perf: enable zero-allocation queries and memory-mapped indexes
Introduce zero-allocation row extraction and query result buffers across `obicompactvec` and `obikpartitionner` to eliminate per-kmer heap allocations. Replace in-memory MPHF deserialization with memory-mapped, zero-copy views to reduce runtime memory footprint. Add configurable I/O chunking, a RAM-aware `--chunk-size` parameter, and system memory monitoring via the new `sysinfo` dependency. Re-export `PreloadedIndex` for external consumers.
This commit is contained in:
@@ -17,8 +17,13 @@ pub(crate) const UNITIGS_FILE: &str = "unitigs.bin";
|
||||
pub(crate) const EVIDENCE_FILE: &str = "evidence.bin";
|
||||
pub(crate) const FINGERPRINT_FILE: &str = "fingerprint.bin";
|
||||
|
||||
/// Owned MPHF — used only at build time (construction + store).
|
||||
pub(crate) type Mphf = PtrHash<u64, CubicEps, CachelineEfVec<Vec<CachelineEf>>, Xx64, Vec<u8>>;
|
||||
|
||||
/// Zero-copy MPHF for querying — ε-deserialized view into a memory-mapped file.
|
||||
/// `MemCase` owns the mmap backing; `'static` is sound because MemCase pins the memory.
|
||||
type MphfEps = PtrHash<u64, CubicEps, CachelineEfVec<&'static [CachelineEf]>, Xx64, &'static [u8]>;
|
||||
|
||||
// ── LayerEvidence ─────────────────────────────────────────────────────────────
|
||||
|
||||
enum LayerEvidence {
|
||||
@@ -36,7 +41,7 @@ enum LayerEvidence {
|
||||
/// - [`find_strict`](Self::find_strict) — always exact; O(1) on Exact/Hybrid layers,
|
||||
/// O(n) sequential scan on Approx layers.
|
||||
pub struct MphfLayer {
|
||||
mphf: Mphf,
|
||||
mphf: MemCase<MphfEps>,
|
||||
ev: LayerEvidence,
|
||||
n: usize,
|
||||
}
|
||||
@@ -45,7 +50,7 @@ impl MphfLayer {
|
||||
/// Open a layer using the index-level `mode` determined at `LayeredMap` open time.
|
||||
/// No per-layer metadata file is read.
|
||||
pub fn open(dir: &Path, mode: &IndexMode) -> OLMResult<Self> {
|
||||
let mphf: Mphf = Mphf::load_full(&dir.join(MPHF_FILE))
|
||||
let mphf: MemCase<MphfEps> = Mphf::mmap(&dir.join(MPHF_FILE), Flags::empty())
|
||||
.map_err(|e| OLMError::InvalidLayer(e.to_string()))?;
|
||||
let (ev, n) = match mode {
|
||||
IndexMode::Exact => {
|
||||
@@ -137,11 +142,11 @@ impl MphfLayer {
|
||||
///
|
||||
/// Use this when the caller guarantees that all queried kmers are in the MPHF
|
||||
/// domain (e.g. when iterating the source's own unitigs during merge).
|
||||
pub struct MphfOnly(Mphf);
|
||||
pub struct MphfOnly(MemCase<MphfEps>);
|
||||
|
||||
impl MphfOnly {
|
||||
pub fn open(dir: &Path) -> OLMResult<Self> {
|
||||
let mphf: Mphf = Mphf::load_full(&dir.join(MPHF_FILE))
|
||||
let mphf: MemCase<MphfEps> = Mphf::mmap(&dir.join(MPHF_FILE), Flags::empty())
|
||||
.map_err(|e| OLMError::InvalidLayer(e.to_string()))?;
|
||||
Ok(Self(mphf))
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user