feat: centralize index configuration and add hybrid mode
Centralizes index configuration by storing a single `IndexMode` (`Exact`, `Approx`, or `Hybrid`) in `PartitionMeta`, eliminating per-layer metadata files. Introduces a `Hybrid` evidence mode and an `--approx` CLI flag to toggle between exact and probabilistic indexing. Refactors the build and query pipelines to dynamically dispatch based on the configured mode, deferring `.idx` generation to Pass 2 and only requiring it for Exact/Hybrid modes. Updates layer opening to load appropriate data structures, enforces strict parameter validation during merges, and clarifies performance trade-offs in documentation.
This commit is contained in:
@@ -9,7 +9,7 @@ use obisys::{Reporter, Stage};
|
||||
use rayon::prelude::*;
|
||||
use tracing::info;
|
||||
|
||||
use obilayeredmap::EvidenceKind;
|
||||
use obilayeredmap::IndexMode;
|
||||
|
||||
use crate::error::{OKIError, OKIResult};
|
||||
use crate::index::KmerIndex;
|
||||
@@ -271,14 +271,16 @@ fn partition_bar(n: u64) -> ProgressBar {
|
||||
/// - all `Exact` → OK, returns `Exact`
|
||||
/// - all `Approx { b, z }` same params → OK, returns `Approx { b, z }`
|
||||
/// - mixed exact/approx or different approx params → `IncompatibleEvidence`
|
||||
fn validate_evidence_compat(sources: &[&KmerIndex]) -> OKIResult<EvidenceKind> {
|
||||
fn validate_evidence_compat(sources: &[&KmerIndex]) -> OKIResult<IndexMode> {
|
||||
let ref_ev = &sources[0].meta.config.evidence;
|
||||
for src in &sources[1..] {
|
||||
let ev = &src.meta.config.evidence;
|
||||
let compat = match (ref_ev, ev) {
|
||||
(EvidenceKind::Exact, EvidenceKind::Exact) => true,
|
||||
(EvidenceKind::Approx { b: b1, z: z1 },
|
||||
EvidenceKind::Approx { b: b2, z: z2 }) => b1 == b2 && z1 == z2,
|
||||
(IndexMode::Exact, IndexMode::Exact) => true,
|
||||
(IndexMode::Approx { b: b1, z: z1 },
|
||||
IndexMode::Approx { b: b2, z: z2 }) => b1 == b2 && z1 == z2,
|
||||
(IndexMode::Hybrid { b: b1, z: z1 },
|
||||
IndexMode::Hybrid { b: b2, z: z2 }) => b1 == b2 && z1 == z2,
|
||||
_ => false,
|
||||
};
|
||||
if !compat {
|
||||
|
||||
@@ -3,7 +3,7 @@ use std::fs;
|
||||
use std::io;
|
||||
use std::path::Path;
|
||||
|
||||
use obilayeredmap::EvidenceKind;
|
||||
use obilayeredmap::IndexMode;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
pub const META_FILENAME: &str = "index.meta";
|
||||
@@ -30,7 +30,7 @@ pub struct IndexConfig {
|
||||
pub n_bits: usize,
|
||||
pub with_counts: bool,
|
||||
#[serde(default)]
|
||||
pub evidence: EvidenceKind,
|
||||
pub evidence: IndexMode,
|
||||
/// Block size for the unitig index as a power-of-two exponent.
|
||||
/// The `.idx` block covers 2^block_bits consecutive unitigs.
|
||||
/// 0 = one entry per unitig (O(1) access, largest `.idx`).
|
||||
|
||||
@@ -3,7 +3,7 @@ use std::path::Path;
|
||||
use std::time::Duration;
|
||||
|
||||
use indicatif::{ProgressBar, ProgressStyle};
|
||||
use obilayeredmap::{EvidenceKind, layer::Layer};
|
||||
use obilayeredmap::{IndexMode, layer::Layer};
|
||||
use obilayeredmap::meta::PartitionMeta;
|
||||
use obisys::{Reporter, Stage};
|
||||
use rayon::prelude::*;
|
||||
@@ -31,7 +31,7 @@ impl KmerIndex {
|
||||
/// `index.meta` is updated with the new evidence kind on success.
|
||||
pub fn reindex(
|
||||
&mut self,
|
||||
target: EvidenceKind,
|
||||
target: IndexMode,
|
||||
block_bits: u8,
|
||||
rep: &mut Reporter,
|
||||
) -> OKIResult<()> {
|
||||
@@ -75,7 +75,7 @@ impl KmerIndex {
|
||||
}
|
||||
|
||||
self.meta.config.evidence = target;
|
||||
if matches!(self.meta.config.evidence, EvidenceKind::Exact) {
|
||||
if matches!(self.meta.config.evidence, IndexMode::Exact) {
|
||||
self.meta.config.block_bits = block_bits;
|
||||
}
|
||||
self.meta.write(&self.root_path)?;
|
||||
@@ -85,7 +85,7 @@ impl KmerIndex {
|
||||
}
|
||||
|
||||
/// Process all layers of one partition's index directory.
|
||||
fn reindex_partition(index_dir: &Path, target: &EvidenceKind, block_bits: u8) -> OKIResult<()> {
|
||||
fn reindex_partition(index_dir: &Path, target: &IndexMode, block_bits: u8) -> OKIResult<()> {
|
||||
if !index_dir.exists() {
|
||||
return Ok(());
|
||||
}
|
||||
@@ -97,22 +97,30 @@ fn reindex_partition(index_dir: &Path, target: &EvidenceKind, block_bits: u8) ->
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn reindex_layer(layer_dir: &Path, target: &EvidenceKind, block_bits: u8) -> OKIResult<()> {
|
||||
Layer::<()>::build_evidence(layer_dir, target, block_bits).map_err(olm_to_oki)?;
|
||||
fn reindex_layer(layer_dir: &Path, target: &IndexMode, block_bits: u8) -> OKIResult<()> {
|
||||
match target {
|
||||
IndexMode::Exact => {
|
||||
Layer::<()>::build_exact_evidence(layer_dir, block_bits).map_err(olm_to_oki)?;
|
||||
}
|
||||
IndexMode::Approx { b, z } | IndexMode::Hybrid { b, z } => {
|
||||
Layer::<()>::build_approx_evidence(layer_dir, *b, *z).map_err(olm_to_oki)?;
|
||||
}
|
||||
}
|
||||
remove_stale_evidence(layer_dir, target)
|
||||
}
|
||||
|
||||
fn remove_stale_evidence(layer_dir: &Path, target: &EvidenceKind) -> OKIResult<()> {
|
||||
fn remove_stale_evidence(layer_dir: &Path, target: &IndexMode) -> OKIResult<()> {
|
||||
match target {
|
||||
EvidenceKind::Exact => {
|
||||
// fingerprint.bin is no longer valid
|
||||
IndexMode::Exact => {
|
||||
remove_if_exists(&layer_dir.join(FINGERPRINT_FILE));
|
||||
}
|
||||
EvidenceKind::Approx { .. } => {
|
||||
// exact bundle is no longer valid
|
||||
IndexMode::Approx { .. } => {
|
||||
remove_if_exists(&layer_dir.join(EVIDENCE_FILE));
|
||||
remove_if_exists(&layer_dir.join(UNITIG_IDX_FILE));
|
||||
}
|
||||
IndexMode::Hybrid { .. } => {
|
||||
// both bundles kept — nothing to remove
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user