feat: centralize index configuration and add hybrid mode

Centralizes index configuration by storing a single `IndexMode` (`Exact`, `Approx`, or `Hybrid`) in `PartitionMeta`, eliminating per-layer metadata files. Introduces a `Hybrid` evidence mode and an `--approx` CLI flag to toggle between exact and probabilistic indexing. Refactors the build and query pipelines to dynamically dispatch based on the configured mode, deferring `.idx` generation to Pass 2 and only requiring it for Exact/Hybrid modes. Updates layer opening to load appropriate data structures, enforces strict parameter validation during merges, and clarifies performance trade-offs in documentation.
This commit is contained in:
Eric Coissac
2026-05-26 14:26:19 +02:00
19 changed files with 420 additions and 441 deletions
+7 -7
View File
@@ -8,7 +8,7 @@ use obicompactvec::{PersistentBitMatrixBuilder,
PersistentCompactIntVecBuilder};
use obidebruinj::GraphDeBruijn;
use obiskio::{SKError, SKResult, UnitigFileReader};
use obilayeredmap::{EvidenceKind, Layer, MphfLayer, OLMError};
use obilayeredmap::{IndexMode, Layer, MphfLayer, OLMError};
use obilayeredmap::meta::PartitionMeta;
use crate::filter::KmerFilter;
@@ -67,7 +67,7 @@ fn load_meta(dir: &Path) -> SKResult<PartitionMeta> {
Err(e) if matches!(e, OLMError::Io(ref io_e) if io_e.kind() == std::io::ErrorKind::NotFound) => {
let mut n = 0usize;
while dir.join(format!("layer_{n}")).exists() { n += 1; }
let m = PartitionMeta { n_layers: n };
let m = PartitionMeta { n_layers: n, mode: IndexMode::default() };
m.save(dir).map_err(olm_to_sk)?;
Ok(m)
}
@@ -117,7 +117,7 @@ impl KmerPartition {
if !unitigs_path.exists() { continue; }
let reader = UnitigFileReader::open_sequential(&unitigs_path)?;
let src_data = SrcLayerData::open(&src_layer_dir, mode)?;
let src_data = SrcLayerData::open(&src_layer_dir, mode, &src_meta.mode)?;
for (kmer, _, _) in reader.iter_indexed_canonical_kmers() {
let row = src_data.lookup(kmer, n_genomes);
@@ -146,8 +146,8 @@ impl KmerPartition {
uw.close()?;
drop(g);
Layer::<()>::build(&dst_layer_dir, block_bits, &EvidenceKind::Exact).map_err(olm_to_sk)?;
let dst_mphf = MphfLayer::open(&dst_layer_dir).map_err(olm_to_sk)?;
Layer::<()>::build(&dst_layer_dir, block_bits, &IndexMode::Exact).map_err(olm_to_sk)?;
let dst_mphf = MphfLayer::open(&dst_layer_dir, &IndexMode::Exact).map_err(olm_to_sk)?;
// ── Prepare matrix builders (one column per genome) ───────────────────
let data_dir = match mode {
@@ -182,7 +182,7 @@ impl KmerPartition {
if !unitigs_path.exists() { continue; }
let reader = UnitigFileReader::open_sequential(&unitigs_path)?;
let src_data = SrcLayerData::open(&src_layer_dir, mode)?;
let src_data = SrcLayerData::open(&src_layer_dir, mode, &src_meta.mode)?;
for (kmer, _, _) in reader.iter_indexed_canonical_kmers() {
let row = src_data.lookup(kmer, n_genomes);
@@ -199,7 +199,7 @@ impl KmerPartition {
for b in builders { b.close()?; }
write_matrix_meta(&data_dir, n_new, n_genomes).map_err(SKError::Io)?;
PartitionMeta { n_layers: 1 }.save(&dst_index_dir).map_err(olm_to_sk)?;
PartitionMeta { n_layers: 1, mode: IndexMode::Exact }.save(&dst_index_dir).map_err(olm_to_sk)?;
Ok(())
}