feat: add approximate evidence matching and index estimation CLI
Introduces a new `estimate` CLI subcommand to calculate bloom filter size, evidence bits, and false-positive rates for approximate indexing. Updates the index building and querying pipelines to support both exact and approximate evidence types via a unified `EvidenceKind` abstraction. Refactors `MphfLayer` and partition index builders to route operations based on the selected evidence mode, and adds the required `obilayeredmap` dependency.
This commit is contained in:
@@ -5,7 +5,7 @@ use cacheline_ef::{CachelineEf, CachelineEfVec};
|
||||
use epserde::prelude::*;
|
||||
use obicompactvec::{PersistentCompactIntMatrix, PersistentCompactIntVec};
|
||||
use obidebruinj::GraphDeBruijn;
|
||||
use obilayeredmap::{OLMError, layer::Layer};
|
||||
use obilayeredmap::{EvidenceKind, OLMError, layer::Layer};
|
||||
use obilayeredmap::meta::PartitionMeta;
|
||||
use obiskio::{SKError, SKFileMeta, SKFileReader};
|
||||
use ptr_hash::{PtrHash, bucket_fn::CubicEps, hash::Xx64};
|
||||
@@ -44,6 +44,7 @@ impl KmerPartition {
|
||||
min_ab: u32,
|
||||
max_ab: Option<u32>,
|
||||
with_counts: bool,
|
||||
evidence: &EvidenceKind,
|
||||
) -> Result<usize, SKError> {
|
||||
let part_dir = self.part_dir(i);
|
||||
let dedup_path = part_dir.join("dereplicated.skmer.zst");
|
||||
@@ -119,6 +120,12 @@ impl KmerPartition {
|
||||
Layer::<()>::build(&layer_dir).map_err(olm_to_sk)?;
|
||||
}
|
||||
|
||||
// For approximate evidence: replace the exact evidence bundle with a
|
||||
// fingerprint. For exact evidence, build() already wrote it.
|
||||
if let EvidenceKind::Approx { b, z } = evidence {
|
||||
Layer::<()>::build_approx_evidence(&layer_dir, *b, *z).map_err(olm_to_sk)?;
|
||||
}
|
||||
|
||||
// Write meta.json in the index/ directory so LayeredMap::open works
|
||||
// (e.g. for subsequent merge operations).
|
||||
let index_dir = layer_dir.parent().expect("layer_dir has a parent");
|
||||
|
||||
Reference in New Issue
Block a user