refactor: switch indexing to IndexMode and update metadata

Replace EvidenceKind with IndexMode (Exact, Approx, Hybrid) across layer construction and query dispatch. Update PartitionMeta and LayerMeta serialization to centralize index-wide configuration. Add flexible push_layer overloads to LayeredMap for dynamic index expansion without full rebuilds. Improve UnitigFileReader to gracefully fallback to sequential scanning when indexes are missing, eliminating panics.
This commit is contained in:
Eric Coissac
2026-05-26 10:04:25 +02:00
parent 1d880fdc5f
commit 7501b6e854
9 changed files with 284 additions and 315 deletions
+5 -7
View File
@@ -5,7 +5,7 @@ use cacheline_ef::{CachelineEf, CachelineEfVec};
use epserde::prelude::*;
use obicompactvec::{PersistentCompactIntMatrix, PersistentCompactIntVec};
use obidebruinj::GraphDeBruijn;
use obilayeredmap::{EvidenceKind, OLMError, layer::Layer};
use obilayeredmap::{IndexMode, OLMError, layer::Layer};
use obilayeredmap::meta::PartitionMeta;
use obiskio::{SKError, SKFileMeta, SKFileReader};
use ptr_hash::{PtrHash, bucket_fn::CubicEps, hash::Xx64};
@@ -44,7 +44,7 @@ impl KmerPartition {
min_ab: u32,
max_ab: Option<u32>,
with_counts: bool,
evidence: &EvidenceKind,
mode: &IndexMode,
block_bits: u8,
) -> Result<usize, SKError> {
let part_dir = self.part_dir(i);
@@ -110,7 +110,7 @@ impl KmerPartition {
uw.close()?;
if with_counts {
Layer::<PersistentCompactIntMatrix>::build(&layer_dir, block_bits, evidence, |kmer| {
Layer::<PersistentCompactIntMatrix>::build(&layer_dir, block_bits, mode, |kmer| {
match (&mphf1_opt, &counts1_opt) {
(Some(mphf), Some(counts)) => counts.get(mphf.index(&kmer.raw())),
_ => 1,
@@ -118,13 +118,11 @@ impl KmerPartition {
})
.map_err(olm_to_sk)?;
} else {
Layer::<()>::build(&layer_dir, block_bits, evidence).map_err(olm_to_sk)?;
Layer::<()>::build(&layer_dir, block_bits, mode).map_err(olm_to_sk)?;
}
// Write meta.json in the index/ directory so LayeredMap::open works
// (e.g. for subsequent merge operations).
let index_dir = layer_dir.parent().expect("layer_dir has a parent");
PartitionMeta { n_layers: 1 }.save(index_dir).map_err(olm_to_sk)?;
PartitionMeta { n_layers: 1, mode: mode.clone() }.save(index_dir).map_err(olm_to_sk)?;
Ok(n_kmers)
}
+10 -11
View File
@@ -9,7 +9,7 @@ use obicompactvec::{PersistentBitMatrix, PersistentBitMatrixBuilder,
PersistentCompactIntVecBuilder};
use obikseq::CanonicalKmer;
use obiskio::{SKError, SKResult, UnitigFileReader};
use obilayeredmap::{EvidenceKind, Layer, LayeredMap, MphfLayer, OLMError};
use obilayeredmap::{IndexMode, Layer, LayeredMap, MphfLayer, OLMError};
use obilayeredmap::meta::PartitionMeta;
use crate::partition::KmerPartition;
@@ -52,18 +52,17 @@ pub(crate) enum SrcLayerData {
}
impl SrcLayerData {
pub(crate) fn open(layer_dir: &Path, mode: MergeMode) -> SKResult<Self> {
pub(crate) fn open(layer_dir: &Path, merge_mode: MergeMode, index_mode: &IndexMode) -> SKResult<Self> {
let presence_dir = layer_dir.join("presence");
let counts_dir = layer_dir.join("counts");
match mode {
match merge_mode {
MergeMode::Presence => {
if presence_dir.exists() {
let mphf = MphfLayer::open(layer_dir).map_err(olm_to_sk)?;
let mphf = MphfLayer::open(layer_dir, index_mode).map_err(olm_to_sk)?;
let mat = PersistentBitMatrix::open(&presence_dir).map_err(SKError::Io)?;
Ok(SrcLayerData::Presence(mphf, mat))
} else if counts_dir.exists() {
// Source is a count index; treat count > 0 as present via ColBuilder::Bit.
let mphf = MphfLayer::open(layer_dir).map_err(olm_to_sk)?;
let mphf = MphfLayer::open(layer_dir, index_mode).map_err(olm_to_sk)?;
let mat = PersistentCompactIntMatrix::open(&counts_dir).map_err(SKError::Io)?;
Ok(SrcLayerData::Count(mphf, mat))
} else {
@@ -72,7 +71,7 @@ impl SrcLayerData {
}
MergeMode::Count => {
if counts_dir.exists() {
let mphf = MphfLayer::open(layer_dir).map_err(olm_to_sk)?;
let mphf = MphfLayer::open(layer_dir, index_mode).map_err(olm_to_sk)?;
let mat = PersistentCompactIntMatrix::open(&counts_dir).map_err(SKError::Io)?;
Ok(SrcLayerData::Count(mphf, mat))
} else {
@@ -116,7 +115,7 @@ fn load_meta(dir: &Path) -> SKResult<PartitionMeta> {
Err(e) if matches!(e, OLMError::Io(ref io_e) if io_e.kind() == std::io::ErrorKind::NotFound) => {
let mut n = 0usize;
while dir.join(format!("layer_{n}")).exists() { n += 1; }
let m = PartitionMeta { n_layers: n };
let m = PartitionMeta { n_layers: n, mode: IndexMode::default() };
m.save(dir).map_err(olm_to_sk)?;
Ok(m)
}
@@ -217,12 +216,12 @@ impl KmerPartition {
uw.write(&unitig)?;
}
uw.close()?;
Layer::<()>::build(&new_layer_dir, block_bits, &EvidenceKind::Exact).map_err(olm_to_sk)?;
Layer::<()>::build(&new_layer_dir, block_bits, &IndexMode::Exact).map_err(olm_to_sk)?;
}
drop(g);
let new_mphf = if any_new {
Some(MphfLayer::open(&new_layer_dir).map_err(olm_to_sk)?)
Some(MphfLayer::open(&new_layer_dir, &IndexMode::Exact).map_err(olm_to_sk)?)
} else {
None
};
@@ -304,7 +303,7 @@ impl KmerPartition {
for l in 0..src_meta.n_layers {
let src_layer_dir = src_index_dir.join(format!("layer_{l}"));
let reader = UnitigFileReader::open_sequential(&src_layer_dir.join("unitigs.bin"))?;
let src_data = SrcLayerData::open(&src_layer_dir, mode)?;
let src_data = SrcLayerData::open(&src_layer_dir, mode, &src_meta.mode)?;
for (kmer, _, _) in reader.iter_indexed_canonical_kmers() {
let values = src_data.lookup(kmer, *src_n);
+1 -1
View File
@@ -8,7 +8,7 @@ use obicompactvec::{PersistentBitMatrixBuilder,
PersistentCompactIntVecBuilder};
use obidebruinj::GraphDeBruijn;
use obiskio::{SKError, SKResult, UnitigFileReader};
use obilayeredmap::{EvidenceKind, Layer, MphfLayer, OLMError};
use obilayeredmap::{IndexMode, Layer, MphfLayer, OLMError};
use obilayeredmap::meta::PartitionMeta;
use crate::filter::KmerFilter;