feat: centralize index configuration and add hybrid mode

Centralizes index configuration by storing a single `IndexMode` (`Exact`, `Approx`, or `Hybrid`) in `PartitionMeta`, eliminating per-layer metadata files. Introduces a `Hybrid` evidence mode and an `--approx` CLI flag to toggle between exact and probabilistic indexing. Refactors the build and query pipelines to dynamically dispatch based on the configured mode, deferring `.idx` generation to Pass 2 and only requiring it for Exact/Hybrid modes. Updates layer opening to load appropriate data structures, enforces strict parameter validation during merges, and clarifies performance trade-offs in documentation.
This commit is contained in:
Eric Coissac
2026-05-26 14:26:19 +02:00
19 changed files with 420 additions and 441 deletions
+7 -5
View File
@@ -9,7 +9,7 @@ use obisys::{Reporter, Stage};
use rayon::prelude::*;
use tracing::info;
use obilayeredmap::EvidenceKind;
use obilayeredmap::IndexMode;
use crate::error::{OKIError, OKIResult};
use crate::index::KmerIndex;
@@ -271,14 +271,16 @@ fn partition_bar(n: u64) -> ProgressBar {
/// - all `Exact` → OK, returns `Exact`
/// - all `Approx { b, z }` same params → OK, returns `Approx { b, z }`
/// - mixed exact/approx or different approx params → `IncompatibleEvidence`
fn validate_evidence_compat(sources: &[&KmerIndex]) -> OKIResult<EvidenceKind> {
fn validate_evidence_compat(sources: &[&KmerIndex]) -> OKIResult<IndexMode> {
let ref_ev = &sources[0].meta.config.evidence;
for src in &sources[1..] {
let ev = &src.meta.config.evidence;
let compat = match (ref_ev, ev) {
(EvidenceKind::Exact, EvidenceKind::Exact) => true,
(EvidenceKind::Approx { b: b1, z: z1 },
EvidenceKind::Approx { b: b2, z: z2 }) => b1 == b2 && z1 == z2,
(IndexMode::Exact, IndexMode::Exact) => true,
(IndexMode::Approx { b: b1, z: z1 },
IndexMode::Approx { b: b2, z: z2 }) => b1 == b2 && z1 == z2,
(IndexMode::Hybrid { b: b1, z: z1 },
IndexMode::Hybrid { b: b2, z: z2 }) => b1 == b2 && z1 == z2,
_ => false,
};
if !compat {
+2 -2
View File
@@ -3,7 +3,7 @@ use std::fs;
use std::io;
use std::path::Path;
use obilayeredmap::EvidenceKind;
use obilayeredmap::IndexMode;
use serde::{Deserialize, Serialize};
pub const META_FILENAME: &str = "index.meta";
@@ -30,7 +30,7 @@ pub struct IndexConfig {
pub n_bits: usize,
pub with_counts: bool,
#[serde(default)]
pub evidence: EvidenceKind,
pub evidence: IndexMode,
/// Block size for the unitig index as a power-of-two exponent.
/// The `.idx` block covers 2^block_bits consecutive unitigs.
/// 0 = one entry per unitig (O(1) access, largest `.idx`).
+19 -11
View File
@@ -3,7 +3,7 @@ use std::path::Path;
use std::time::Duration;
use indicatif::{ProgressBar, ProgressStyle};
use obilayeredmap::{EvidenceKind, layer::Layer};
use obilayeredmap::{IndexMode, layer::Layer};
use obilayeredmap::meta::PartitionMeta;
use obisys::{Reporter, Stage};
use rayon::prelude::*;
@@ -31,7 +31,7 @@ impl KmerIndex {
/// `index.meta` is updated with the new evidence kind on success.
pub fn reindex(
&mut self,
target: EvidenceKind,
target: IndexMode,
block_bits: u8,
rep: &mut Reporter,
) -> OKIResult<()> {
@@ -75,7 +75,7 @@ impl KmerIndex {
}
self.meta.config.evidence = target;
if matches!(self.meta.config.evidence, EvidenceKind::Exact) {
if matches!(self.meta.config.evidence, IndexMode::Exact) {
self.meta.config.block_bits = block_bits;
}
self.meta.write(&self.root_path)?;
@@ -85,7 +85,7 @@ impl KmerIndex {
}
/// Process all layers of one partition's index directory.
fn reindex_partition(index_dir: &Path, target: &EvidenceKind, block_bits: u8) -> OKIResult<()> {
fn reindex_partition(index_dir: &Path, target: &IndexMode, block_bits: u8) -> OKIResult<()> {
if !index_dir.exists() {
return Ok(());
}
@@ -97,22 +97,30 @@ fn reindex_partition(index_dir: &Path, target: &EvidenceKind, block_bits: u8) ->
Ok(())
}
fn reindex_layer(layer_dir: &Path, target: &EvidenceKind, block_bits: u8) -> OKIResult<()> {
Layer::<()>::build_evidence(layer_dir, target, block_bits).map_err(olm_to_oki)?;
fn reindex_layer(layer_dir: &Path, target: &IndexMode, block_bits: u8) -> OKIResult<()> {
match target {
IndexMode::Exact => {
Layer::<()>::build_exact_evidence(layer_dir, block_bits).map_err(olm_to_oki)?;
}
IndexMode::Approx { b, z } | IndexMode::Hybrid { b, z } => {
Layer::<()>::build_approx_evidence(layer_dir, *b, *z).map_err(olm_to_oki)?;
}
}
remove_stale_evidence(layer_dir, target)
}
fn remove_stale_evidence(layer_dir: &Path, target: &EvidenceKind) -> OKIResult<()> {
fn remove_stale_evidence(layer_dir: &Path, target: &IndexMode) -> OKIResult<()> {
match target {
EvidenceKind::Exact => {
// fingerprint.bin is no longer valid
IndexMode::Exact => {
remove_if_exists(&layer_dir.join(FINGERPRINT_FILE));
}
EvidenceKind::Approx { .. } => {
// exact bundle is no longer valid
IndexMode::Approx { .. } => {
remove_if_exists(&layer_dir.join(EVIDENCE_FILE));
remove_if_exists(&layer_dir.join(UNITIG_IDX_FILE));
}
IndexMode::Hybrid { .. } => {
// both bundles kept — nothing to remove
}
}
Ok(())
}