refactor: optimize MPHF construction and update legacy guidelines
Replaces parallel random-access unitig iteration with a sequential mmap-based iterator for MPHF construction, eliminating the build-time `.idx` dependency by deferring index generation until after persistence. Updates `CLAUDE.md` to treat existing code as a hypothesis, mandating proactive removal of obsolete legacy constructs rather than preserving them out of inertia.
This commit is contained in:
@@ -218,11 +218,15 @@ impl MphfLayer {
|
||||
|
||||
match evidence_kind {
|
||||
// ── Exact path ────────────────────────────────────────────────────
|
||||
// .idx is built LAST, once evidence.bin is written, so it is never
|
||||
// present during construction — only at query time.
|
||||
EvidenceKind::Exact => {
|
||||
build_unitig_idx(&unitig_path, block_bits)?;
|
||||
|
||||
let unitigs = UnitigFileReader::open(&unitig_path)?;
|
||||
let n = unitigs.n_kmers();
|
||||
let n = UnitigFileReader::open_sequential(&unitig_path)?.n_kmers();
|
||||
let keys = CanonicalKmerIter::new(&unitig_path)
|
||||
.map_err(|e| match e {
|
||||
obiskio::SKError::Io(io) => OLMError::Io(io),
|
||||
e => OLMError::InvalidLayer(e.to_string()),
|
||||
})?;
|
||||
|
||||
if n == 0 {
|
||||
fs::File::create(dir.join(EVIDENCE_FILE))?;
|
||||
@@ -232,15 +236,13 @@ impl MphfLayer {
|
||||
mphf.store(&dir.join(MPHF_FILE))
|
||||
.map_err(|e| OLMError::InvalidLayer(e.to_string()))?;
|
||||
LayerMeta::exact().save(dir)?;
|
||||
build_unitig_idx(&unitig_path, block_bits)?;
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
// Pass 1 — parallel MPHF via random access (.idx required)
|
||||
let keys = (0..unitigs.len())
|
||||
.into_par_iter()
|
||||
.flat_map_iter(|ci| unitigs.unitig(ci).into_canonical_kmers().map(|km| km.raw()));
|
||||
// Pass 1 — MPHF construction via clonable mmap iterator
|
||||
let mphf: Mphf =
|
||||
Mphf::new_from_par_iter(n, keys, PtrHashParams::<CubicEps>::default());
|
||||
Mphf::new_from_par_iter(n, keys.map(|k| k.raw()).par_bridge(), PtrHashParams::<CubicEps>::default());
|
||||
mphf.store(&dir.join(MPHF_FILE))
|
||||
.map_err(|e| OLMError::InvalidLayer(e.to_string()))?;
|
||||
|
||||
@@ -266,6 +268,8 @@ impl MphfLayer {
|
||||
|
||||
ev.write(&dir.join(EVIDENCE_FILE))?;
|
||||
LayerMeta::exact().save(dir)?;
|
||||
// .idx built last: strictly for query-time kmer verification
|
||||
build_unitig_idx(&unitig_path, block_bits)?;
|
||||
Ok(n)
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user