refactor: optimize MPHF construction and update legacy guidelines

Replaces parallel random-access unitig iteration with a sequential mmap-based iterator for MPHF construction, eliminating the build-time `.idx` dependency by deferring index generation until after persistence. Updates `CLAUDE.md` to treat existing code as a hypothesis, mandating proactive removal of obsolete legacy constructs rather than preserving them out of inertia.
This commit is contained in:
Eric Coissac
2026-05-26 09:53:31 +02:00
parent 009a328c58
commit 1d880fdc5f
2 changed files with 16 additions and 9 deletions
+13 -9
View File
@@ -218,11 +218,15 @@ impl MphfLayer {
match evidence_kind {
// ── Exact path ────────────────────────────────────────────────────
// .idx is built LAST, once evidence.bin is written, so it is never
// present during construction — only at query time.
EvidenceKind::Exact => {
build_unitig_idx(&unitig_path, block_bits)?;
let unitigs = UnitigFileReader::open(&unitig_path)?;
let n = unitigs.n_kmers();
let n = UnitigFileReader::open_sequential(&unitig_path)?.n_kmers();
let keys = CanonicalKmerIter::new(&unitig_path)
.map_err(|e| match e {
obiskio::SKError::Io(io) => OLMError::Io(io),
e => OLMError::InvalidLayer(e.to_string()),
})?;
if n == 0 {
fs::File::create(dir.join(EVIDENCE_FILE))?;
@@ -232,15 +236,13 @@ impl MphfLayer {
mphf.store(&dir.join(MPHF_FILE))
.map_err(|e| OLMError::InvalidLayer(e.to_string()))?;
LayerMeta::exact().save(dir)?;
build_unitig_idx(&unitig_path, block_bits)?;
return Ok(0);
}
// Pass 1 — parallel MPHF via random access (.idx required)
let keys = (0..unitigs.len())
.into_par_iter()
.flat_map_iter(|ci| unitigs.unitig(ci).into_canonical_kmers().map(|km| km.raw()));
// Pass 1 — MPHF construction via clonable mmap iterator
let mphf: Mphf =
Mphf::new_from_par_iter(n, keys, PtrHashParams::<CubicEps>::default());
Mphf::new_from_par_iter(n, keys.map(|k| k.raw()).par_bridge(), PtrHashParams::<CubicEps>::default());
mphf.store(&dir.join(MPHF_FILE))
.map_err(|e| OLMError::InvalidLayer(e.to_string()))?;
@@ -266,6 +268,8 @@ impl MphfLayer {
ev.write(&dir.join(EVIDENCE_FILE))?;
LayerMeta::exact().save(dir)?;
// .idx built last: strictly for query-time kmer verification
build_unitig_idx(&unitig_path, block_bits)?;
Ok(n)
}