feat: add approximate evidence matching and index estimation CLI

Introduces a new `estimate` CLI subcommand to calculate bloom filter size, evidence bits, and false-positive rates for approximate indexing. Updates the index building and querying pipelines to support both exact and approximate evidence types via a unified `EvidenceKind` abstraction. Refactors `MphfLayer` and partition index builders to route operations based on the selected evidence mode, and adds the required `obilayeredmap` dependency.
This commit is contained in:
Eric Coissac
2026-05-23 12:43:32 +02:00
parent 16a6b0d033
commit 876bc0127f
11 changed files with 243 additions and 47 deletions
+8 -1
View File
@@ -5,7 +5,7 @@ use cacheline_ef::{CachelineEf, CachelineEfVec};
use epserde::prelude::*;
use obicompactvec::{PersistentCompactIntMatrix, PersistentCompactIntVec};
use obidebruinj::GraphDeBruijn;
use obilayeredmap::{OLMError, layer::Layer};
use obilayeredmap::{EvidenceKind, OLMError, layer::Layer};
use obilayeredmap::meta::PartitionMeta;
use obiskio::{SKError, SKFileMeta, SKFileReader};
use ptr_hash::{PtrHash, bucket_fn::CubicEps, hash::Xx64};
@@ -44,6 +44,7 @@ impl KmerPartition {
min_ab: u32,
max_ab: Option<u32>,
with_counts: bool,
evidence: &EvidenceKind,
) -> Result<usize, SKError> {
let part_dir = self.part_dir(i);
let dedup_path = part_dir.join("dereplicated.skmer.zst");
@@ -119,6 +120,12 @@ impl KmerPartition {
Layer::<()>::build(&layer_dir).map_err(olm_to_sk)?;
}
// For approximate evidence: replace the exact evidence bundle with a
// fingerprint. For exact evidence, build() already wrote it.
if let EvidenceKind::Approx { b, z } = evidence {
Layer::<()>::build_approx_evidence(&layer_dir, *b, *z).map_err(olm_to_sk)?;
}
// Write meta.json in the index/ directory so LayeredMap::open works
// (e.g. for subsequent merge operations).
let index_dir = layer_dir.parent().expect("layer_dir has a parent");