feat: centralize index configuration and add hybrid mode
Centralizes index configuration by storing a single `IndexMode` (`Exact`, `Approx`, or `Hybrid`) in `PartitionMeta`, eliminating per-layer metadata files. Introduces a `Hybrid` evidence mode and an `--approx` CLI flag to toggle between exact and probabilistic indexing. Refactors the build and query pipelines to dynamically dispatch based on the configured mode, deferring `.idx` generation to Pass 2 and only requiring it for Exact/Hybrid modes. Updates layer opening to load appropriate data structures, enforces strict parameter validation during merges, and clarifies performance trade-offs in documentation.
This commit is contained in:
@@ -8,7 +8,7 @@ use obicompactvec::{PersistentBitMatrixBuilder,
|
||||
PersistentCompactIntVecBuilder};
|
||||
use obidebruinj::GraphDeBruijn;
|
||||
use obiskio::{SKError, SKResult, UnitigFileReader};
|
||||
use obilayeredmap::{EvidenceKind, Layer, MphfLayer, OLMError};
|
||||
use obilayeredmap::{IndexMode, Layer, MphfLayer, OLMError};
|
||||
use obilayeredmap::meta::PartitionMeta;
|
||||
|
||||
use crate::filter::KmerFilter;
|
||||
@@ -67,7 +67,7 @@ fn load_meta(dir: &Path) -> SKResult<PartitionMeta> {
|
||||
Err(e) if matches!(e, OLMError::Io(ref io_e) if io_e.kind() == std::io::ErrorKind::NotFound) => {
|
||||
let mut n = 0usize;
|
||||
while dir.join(format!("layer_{n}")).exists() { n += 1; }
|
||||
let m = PartitionMeta { n_layers: n };
|
||||
let m = PartitionMeta { n_layers: n, mode: IndexMode::default() };
|
||||
m.save(dir).map_err(olm_to_sk)?;
|
||||
Ok(m)
|
||||
}
|
||||
@@ -117,7 +117,7 @@ impl KmerPartition {
|
||||
if !unitigs_path.exists() { continue; }
|
||||
|
||||
let reader = UnitigFileReader::open_sequential(&unitigs_path)?;
|
||||
let src_data = SrcLayerData::open(&src_layer_dir, mode)?;
|
||||
let src_data = SrcLayerData::open(&src_layer_dir, mode, &src_meta.mode)?;
|
||||
|
||||
for (kmer, _, _) in reader.iter_indexed_canonical_kmers() {
|
||||
let row = src_data.lookup(kmer, n_genomes);
|
||||
@@ -146,8 +146,8 @@ impl KmerPartition {
|
||||
uw.close()?;
|
||||
drop(g);
|
||||
|
||||
Layer::<()>::build(&dst_layer_dir, block_bits, &EvidenceKind::Exact).map_err(olm_to_sk)?;
|
||||
let dst_mphf = MphfLayer::open(&dst_layer_dir).map_err(olm_to_sk)?;
|
||||
Layer::<()>::build(&dst_layer_dir, block_bits, &IndexMode::Exact).map_err(olm_to_sk)?;
|
||||
let dst_mphf = MphfLayer::open(&dst_layer_dir, &IndexMode::Exact).map_err(olm_to_sk)?;
|
||||
|
||||
// ── Prepare matrix builders (one column per genome) ───────────────────
|
||||
let data_dir = match mode {
|
||||
@@ -182,7 +182,7 @@ impl KmerPartition {
|
||||
if !unitigs_path.exists() { continue; }
|
||||
|
||||
let reader = UnitigFileReader::open_sequential(&unitigs_path)?;
|
||||
let src_data = SrcLayerData::open(&src_layer_dir, mode)?;
|
||||
let src_data = SrcLayerData::open(&src_layer_dir, mode, &src_meta.mode)?;
|
||||
|
||||
for (kmer, _, _) in reader.iter_indexed_canonical_kmers() {
|
||||
let row = src_data.lookup(kmer, n_genomes);
|
||||
@@ -199,7 +199,7 @@ impl KmerPartition {
|
||||
for b in builders { b.close()?; }
|
||||
write_matrix_meta(&data_dir, n_new, n_genomes).map_err(SKError::Io)?;
|
||||
|
||||
PartitionMeta { n_layers: 1 }.save(&dst_index_dir).map_err(olm_to_sk)?;
|
||||
PartitionMeta { n_layers: 1, mode: IndexMode::Exact }.save(&dst_index_dir).map_err(olm_to_sk)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user