refactor: switch indexing to IndexMode and update metadata
Replace EvidenceKind with IndexMode (Exact, Approx, Hybrid) across layer construction and query dispatch. Update PartitionMeta and LayerMeta serialization to centralize index-wide configuration. Add flexible push_layer overloads to LayeredMap for dynamic index expansion without full rebuilds. Improve UnitigFileReader to gracefully fallback to sequential scanning when indexes are missing, eliminating panics.
This commit is contained in:
@@ -5,7 +5,7 @@ use cacheline_ef::{CachelineEf, CachelineEfVec};
|
||||
use epserde::prelude::*;
|
||||
use obicompactvec::{PersistentCompactIntMatrix, PersistentCompactIntVec};
|
||||
use obidebruinj::GraphDeBruijn;
|
||||
use obilayeredmap::{EvidenceKind, OLMError, layer::Layer};
|
||||
use obilayeredmap::{IndexMode, OLMError, layer::Layer};
|
||||
use obilayeredmap::meta::PartitionMeta;
|
||||
use obiskio::{SKError, SKFileMeta, SKFileReader};
|
||||
use ptr_hash::{PtrHash, bucket_fn::CubicEps, hash::Xx64};
|
||||
@@ -44,7 +44,7 @@ impl KmerPartition {
|
||||
min_ab: u32,
|
||||
max_ab: Option<u32>,
|
||||
with_counts: bool,
|
||||
evidence: &EvidenceKind,
|
||||
mode: &IndexMode,
|
||||
block_bits: u8,
|
||||
) -> Result<usize, SKError> {
|
||||
let part_dir = self.part_dir(i);
|
||||
@@ -110,7 +110,7 @@ impl KmerPartition {
|
||||
uw.close()?;
|
||||
|
||||
if with_counts {
|
||||
Layer::<PersistentCompactIntMatrix>::build(&layer_dir, block_bits, evidence, |kmer| {
|
||||
Layer::<PersistentCompactIntMatrix>::build(&layer_dir, block_bits, mode, |kmer| {
|
||||
match (&mphf1_opt, &counts1_opt) {
|
||||
(Some(mphf), Some(counts)) => counts.get(mphf.index(&kmer.raw())),
|
||||
_ => 1,
|
||||
@@ -118,13 +118,11 @@ impl KmerPartition {
|
||||
})
|
||||
.map_err(olm_to_sk)?;
|
||||
} else {
|
||||
Layer::<()>::build(&layer_dir, block_bits, evidence).map_err(olm_to_sk)?;
|
||||
Layer::<()>::build(&layer_dir, block_bits, mode).map_err(olm_to_sk)?;
|
||||
}
|
||||
|
||||
// Write meta.json in the index/ directory so LayeredMap::open works
|
||||
// (e.g. for subsequent merge operations).
|
||||
let index_dir = layer_dir.parent().expect("layer_dir has a parent");
|
||||
PartitionMeta { n_layers: 1 }.save(index_dir).map_err(olm_to_sk)?;
|
||||
PartitionMeta { n_layers: 1, mode: mode.clone() }.save(index_dir).map_err(olm_to_sk)?;
|
||||
|
||||
Ok(n_kmers)
|
||||
}
|
||||
|
||||
@@ -9,7 +9,7 @@ use obicompactvec::{PersistentBitMatrix, PersistentBitMatrixBuilder,
|
||||
PersistentCompactIntVecBuilder};
|
||||
use obikseq::CanonicalKmer;
|
||||
use obiskio::{SKError, SKResult, UnitigFileReader};
|
||||
use obilayeredmap::{EvidenceKind, Layer, LayeredMap, MphfLayer, OLMError};
|
||||
use obilayeredmap::{IndexMode, Layer, LayeredMap, MphfLayer, OLMError};
|
||||
use obilayeredmap::meta::PartitionMeta;
|
||||
|
||||
use crate::partition::KmerPartition;
|
||||
@@ -52,18 +52,17 @@ pub(crate) enum SrcLayerData {
|
||||
}
|
||||
|
||||
impl SrcLayerData {
|
||||
pub(crate) fn open(layer_dir: &Path, mode: MergeMode) -> SKResult<Self> {
|
||||
pub(crate) fn open(layer_dir: &Path, merge_mode: MergeMode, index_mode: &IndexMode) -> SKResult<Self> {
|
||||
let presence_dir = layer_dir.join("presence");
|
||||
let counts_dir = layer_dir.join("counts");
|
||||
match mode {
|
||||
match merge_mode {
|
||||
MergeMode::Presence => {
|
||||
if presence_dir.exists() {
|
||||
let mphf = MphfLayer::open(layer_dir).map_err(olm_to_sk)?;
|
||||
let mphf = MphfLayer::open(layer_dir, index_mode).map_err(olm_to_sk)?;
|
||||
let mat = PersistentBitMatrix::open(&presence_dir).map_err(SKError::Io)?;
|
||||
Ok(SrcLayerData::Presence(mphf, mat))
|
||||
} else if counts_dir.exists() {
|
||||
// Source is a count index; treat count > 0 as present via ColBuilder::Bit.
|
||||
let mphf = MphfLayer::open(layer_dir).map_err(olm_to_sk)?;
|
||||
let mphf = MphfLayer::open(layer_dir, index_mode).map_err(olm_to_sk)?;
|
||||
let mat = PersistentCompactIntMatrix::open(&counts_dir).map_err(SKError::Io)?;
|
||||
Ok(SrcLayerData::Count(mphf, mat))
|
||||
} else {
|
||||
@@ -72,7 +71,7 @@ impl SrcLayerData {
|
||||
}
|
||||
MergeMode::Count => {
|
||||
if counts_dir.exists() {
|
||||
let mphf = MphfLayer::open(layer_dir).map_err(olm_to_sk)?;
|
||||
let mphf = MphfLayer::open(layer_dir, index_mode).map_err(olm_to_sk)?;
|
||||
let mat = PersistentCompactIntMatrix::open(&counts_dir).map_err(SKError::Io)?;
|
||||
Ok(SrcLayerData::Count(mphf, mat))
|
||||
} else {
|
||||
@@ -116,7 +115,7 @@ fn load_meta(dir: &Path) -> SKResult<PartitionMeta> {
|
||||
Err(e) if matches!(e, OLMError::Io(ref io_e) if io_e.kind() == std::io::ErrorKind::NotFound) => {
|
||||
let mut n = 0usize;
|
||||
while dir.join(format!("layer_{n}")).exists() { n += 1; }
|
||||
let m = PartitionMeta { n_layers: n };
|
||||
let m = PartitionMeta { n_layers: n, mode: IndexMode::default() };
|
||||
m.save(dir).map_err(olm_to_sk)?;
|
||||
Ok(m)
|
||||
}
|
||||
@@ -217,12 +216,12 @@ impl KmerPartition {
|
||||
uw.write(&unitig)?;
|
||||
}
|
||||
uw.close()?;
|
||||
Layer::<()>::build(&new_layer_dir, block_bits, &EvidenceKind::Exact).map_err(olm_to_sk)?;
|
||||
Layer::<()>::build(&new_layer_dir, block_bits, &IndexMode::Exact).map_err(olm_to_sk)?;
|
||||
}
|
||||
drop(g);
|
||||
|
||||
let new_mphf = if any_new {
|
||||
Some(MphfLayer::open(&new_layer_dir).map_err(olm_to_sk)?)
|
||||
Some(MphfLayer::open(&new_layer_dir, &IndexMode::Exact).map_err(olm_to_sk)?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
@@ -304,7 +303,7 @@ impl KmerPartition {
|
||||
for l in 0..src_meta.n_layers {
|
||||
let src_layer_dir = src_index_dir.join(format!("layer_{l}"));
|
||||
let reader = UnitigFileReader::open_sequential(&src_layer_dir.join("unitigs.bin"))?;
|
||||
let src_data = SrcLayerData::open(&src_layer_dir, mode)?;
|
||||
let src_data = SrcLayerData::open(&src_layer_dir, mode, &src_meta.mode)?;
|
||||
|
||||
for (kmer, _, _) in reader.iter_indexed_canonical_kmers() {
|
||||
let values = src_data.lookup(kmer, *src_n);
|
||||
|
||||
@@ -8,7 +8,7 @@ use obicompactvec::{PersistentBitMatrixBuilder,
|
||||
PersistentCompactIntVecBuilder};
|
||||
use obidebruinj::GraphDeBruijn;
|
||||
use obiskio::{SKError, SKResult, UnitigFileReader};
|
||||
use obilayeredmap::{EvidenceKind, Layer, MphfLayer, OLMError};
|
||||
use obilayeredmap::{IndexMode, Layer, MphfLayer, OLMError};
|
||||
use obilayeredmap::meta::PartitionMeta;
|
||||
|
||||
use crate::filter::KmerFilter;
|
||||
|
||||
Reference in New Issue
Block a user