feat: support exact and approximate evidence in layer construction

Refactored `MphfLayer::build` to accept an `EvidenceKind` parameter, routing to exact (index-based, parallel MPHF, writes `evidence.bin`) or approximate (sequential mmap iterator, writes `fingerprint.bin`) pipelines. Introduced `CanonicalKmerIter` for memory-mapped, chunked k-mer iteration with O(1) resets via `Arc<Mmap>`. Updated layer and map APIs to forward evidence kind, added `push_layer` for count matrices, and adjusted tests and public exports accordingly.
This commit is contained in:
Eric Coissac
2026-05-26 09:41:13 +02:00
parent 036d044291
commit 9d46400898
9 changed files with 215 additions and 68 deletions
+2 -8
View File
@@ -110,7 +110,7 @@ impl KmerPartition {
uw.close()?;
if with_counts {
Layer::<PersistentCompactIntMatrix>::build(&layer_dir, block_bits, |kmer| {
Layer::<PersistentCompactIntMatrix>::build(&layer_dir, block_bits, evidence, |kmer| {
match (&mphf1_opt, &counts1_opt) {
(Some(mphf), Some(counts)) => counts.get(mphf.index(&kmer.raw())),
_ => 1,
@@ -118,13 +118,7 @@ impl KmerPartition {
})
.map_err(olm_to_sk)?;
} else {
Layer::<()>::build(&layer_dir, block_bits).map_err(olm_to_sk)?;
}
// For approximate evidence: replace the exact evidence bundle with a
// fingerprint. For exact evidence, build() already wrote it.
if let EvidenceKind::Approx { b, z } = evidence {
Layer::<()>::build_approx_evidence(&layer_dir, *b, *z).map_err(olm_to_sk)?;
Layer::<()>::build(&layer_dir, block_bits, evidence).map_err(olm_to_sk)?;
}
// Write meta.json in the index/ directory so LayeredMap::open works
+2 -2
View File
@@ -9,7 +9,7 @@ use obicompactvec::{PersistentBitMatrix, PersistentBitMatrixBuilder,
PersistentCompactIntVecBuilder};
use obikseq::CanonicalKmer;
use obiskio::{SKError, SKResult, UnitigFileReader};
use obilayeredmap::{Layer, LayeredMap, MphfLayer, OLMError};
use obilayeredmap::{EvidenceKind, Layer, LayeredMap, MphfLayer, OLMError};
use obilayeredmap::meta::PartitionMeta;
use crate::partition::KmerPartition;
@@ -217,7 +217,7 @@ impl KmerPartition {
uw.write(&unitig)?;
}
uw.close()?;
Layer::<()>::build(&new_layer_dir, block_bits).map_err(olm_to_sk)?;
Layer::<()>::build(&new_layer_dir, block_bits, &EvidenceKind::Exact).map_err(olm_to_sk)?;
}
drop(g);
+2 -2
View File
@@ -8,7 +8,7 @@ use obicompactvec::{PersistentBitMatrixBuilder,
PersistentCompactIntVecBuilder};
use obidebruinj::GraphDeBruijn;
use obiskio::{SKError, SKResult, UnitigFileReader};
use obilayeredmap::{Layer, MphfLayer, OLMError};
use obilayeredmap::{EvidenceKind, Layer, MphfLayer, OLMError};
use obilayeredmap::meta::PartitionMeta;
use crate::filter::KmerFilter;
@@ -146,7 +146,7 @@ impl KmerPartition {
uw.close()?;
drop(g);
Layer::<()>::build(&dst_layer_dir, block_bits).map_err(olm_to_sk)?;
Layer::<()>::build(&dst_layer_dir, block_bits, &EvidenceKind::Exact).map_err(olm_to_sk)?;
let dst_mphf = MphfLayer::open(&dst_layer_dir).map_err(olm_to_sk)?;
// ── Prepare matrix builders (one column per genome) ───────────────────