refactor: abstract k-mer types and fix bit alignment
Abstracts k-mer storage using a `RawKmer` alias and `KMER_BITS` constant to simplify bit manipulation and enable future extension to larger types. Updates bit-shifting and masking logic across `kmer.rs` and `packed_seq.rs` to prevent overflow and improve type safety. Adapts the MPHF layer to iterate over indexed canonical k-mers with explicit slot bounds validation and bit-level encoding. Fixes test suite compilation errors by correcting method names, adding tuple destructuring, and passing the required `IndexMode::Exact` parameter.
This commit is contained in:
@@ -118,7 +118,7 @@ impl MphfLayer {
|
||||
}
|
||||
LayerEvidence::Approx { unitigs_path, .. } => {
|
||||
let reader = UnitigFileReader::open_sequential(unitigs_path).ok()?;
|
||||
for stored in reader.iter_canonical_kmers() {
|
||||
for (stored, _, _) in reader.iter_indexed_canonical_kmers() {
|
||||
if self.mphf.index(&stored.raw()) == slot {
|
||||
return if stored == kmer { Some(slot) } else { None };
|
||||
}
|
||||
@@ -196,7 +196,7 @@ impl MphfLayer {
|
||||
|
||||
let mut fw = FingerprintVecWriter::new(n, b);
|
||||
|
||||
for kmer in unitigs.iter_canonical_kmers() {
|
||||
for (kmer, _, _) in unitigs.iter_indexed_canonical_kmers() {
|
||||
let slot = mphf.index(&kmer.raw());
|
||||
if slot >= n {
|
||||
return Err(OLMError::Mphf("slot out of bounds".into()));
|
||||
@@ -281,7 +281,7 @@ impl MphfLayer {
|
||||
|
||||
IndexMode::Approx { b, .. } => {
|
||||
let mut fw = FingerprintVecWriter::new(n, *b);
|
||||
for kmer in unitigs2.iter_canonical_kmers() {
|
||||
for (kmer, _, _) in unitigs2.iter_indexed_canonical_kmers() {
|
||||
let slot = mphf.index(&kmer.raw());
|
||||
if slot >= n { return Err(OLMError::Mphf("slot out of bounds".into())); }
|
||||
let byte = slot / 8; let bit = 1u8 << (slot % 8);
|
||||
|
||||
@@ -2,7 +2,7 @@ use super::*;
|
||||
use obicompactvec::PersistentCompactIntMatrix;
|
||||
use obikseq::{set_k, Kmer, Sequence as _, Unitig};
|
||||
use obiskio::DEFAULT_BLOCK_BITS;
|
||||
use crate::meta::EvidenceKind;
|
||||
use crate::meta::IndexMode;
|
||||
use tempfile::tempdir;
|
||||
|
||||
fn write_unitigs(dir: &Path, seqs: &[&[u8]]) {
|
||||
@@ -15,7 +15,8 @@ fn write_unitigs(dir: &Path, seqs: &[&[u8]]) {
|
||||
|
||||
fn all_canonical_kmers(dir: &Path) -> Vec<CanonicalKmer> {
|
||||
UnitigFileReader::open_sequential(&dir.join(UNITIGS_FILE)).unwrap()
|
||||
.iter_canonical_kmers()
|
||||
.iter_indexed_canonical_kmers()
|
||||
.map(|(kmer, _, _)| kmer)
|
||||
.collect()
|
||||
}
|
||||
|
||||
@@ -25,8 +26,8 @@ fn build_and_query_all_kmers_found() {
|
||||
let dir = tempdir().unwrap();
|
||||
write_unitigs(dir.path(), &[b"AAAACGT"]);
|
||||
let kmers = all_canonical_kmers(dir.path());
|
||||
Layer::<()>::build(dir.path(), DEFAULT_BLOCK_BITS, &EvidenceKind::Exact).unwrap();
|
||||
let layer = Layer::<()>::open(dir.path()).unwrap();
|
||||
Layer::<()>::build(dir.path(), DEFAULT_BLOCK_BITS, &IndexMode::Exact).unwrap();
|
||||
let layer = Layer::<()>::open(dir.path(), &IndexMode::Exact).unwrap();
|
||||
for kmer in kmers {
|
||||
assert!(layer.query(kmer).is_some(), "kmer should be present");
|
||||
}
|
||||
@@ -43,10 +44,10 @@ fn counts_are_stored_and_retrieved() {
|
||||
Layer::<PersistentCompactIntMatrix>::build(
|
||||
dir.path(),
|
||||
DEFAULT_BLOCK_BITS,
|
||||
&EvidenceKind::Exact,
|
||||
&IndexMode::Exact,
|
||||
|kmer| count_map.get(&kmer).copied().unwrap_or(0),
|
||||
).unwrap();
|
||||
let layer = Layer::<PersistentCompactIntMatrix>::open(dir.path()).unwrap();
|
||||
let layer = Layer::<PersistentCompactIntMatrix>::open(dir.path(), &IndexMode::Exact).unwrap();
|
||||
for kmer in &kmers {
|
||||
let hit = layer.query(*kmer).expect("kmer must be present");
|
||||
assert_eq!(hit.data[0], count_map[kmer]);
|
||||
@@ -58,8 +59,8 @@ fn query_absent_returns_none() {
|
||||
set_k(4);
|
||||
let dir = tempdir().unwrap();
|
||||
write_unitigs(dir.path(), &[b"AAAACGT"]);
|
||||
Layer::<()>::build(dir.path(), DEFAULT_BLOCK_BITS, &EvidenceKind::Exact).unwrap();
|
||||
let layer = Layer::<()>::open(dir.path()).unwrap();
|
||||
Layer::<()>::build(dir.path(), DEFAULT_BLOCK_BITS, &IndexMode::Exact).unwrap();
|
||||
let layer = Layer::<()>::open(dir.path(), &IndexMode::Exact).unwrap();
|
||||
let absent = Kmer::from_ascii(b"CCCC").unwrap().canonical();
|
||||
assert!(layer.query(absent).is_none());
|
||||
}
|
||||
@@ -69,9 +70,9 @@ fn open_after_build_is_consistent() {
|
||||
set_k(4);
|
||||
let dir = tempdir().unwrap();
|
||||
write_unitigs(dir.path(), &[b"AAAACGT"]);
|
||||
let n = Layer::<PersistentCompactIntMatrix>::build(dir.path(), DEFAULT_BLOCK_BITS, &EvidenceKind::Exact, |_| 7).unwrap();
|
||||
let n = Layer::<PersistentCompactIntMatrix>::build(dir.path(), DEFAULT_BLOCK_BITS, &IndexMode::Exact, |_| 7).unwrap();
|
||||
assert_eq!(n, 4);
|
||||
let layer = Layer::<PersistentCompactIntMatrix>::open(dir.path()).unwrap();
|
||||
let layer = Layer::<PersistentCompactIntMatrix>::open(dir.path(), &IndexMode::Exact).unwrap();
|
||||
let kmer = Kmer::from_ascii(b"AAAA").unwrap().canonical();
|
||||
let hit = layer.query(kmer).expect("AAAA must be present");
|
||||
assert_eq!(hit.data[0], 7);
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use super::*;
|
||||
use obicompactvec::PersistentCompactIntMatrix;
|
||||
use obikseq::{set_k, Sequence as _, Unitig};
|
||||
use crate::meta::IndexMode;
|
||||
use tempfile::tempdir;
|
||||
|
||||
fn push_unitigs_and_layer(
|
||||
@@ -24,7 +25,7 @@ fn canonical(ascii: &[u8]) -> CanonicalKmer {
|
||||
fn create_empty_map() {
|
||||
set_k(4);
|
||||
let dir = tempdir().unwrap();
|
||||
let map = LayeredMap::<()>::create(dir.path()).unwrap();
|
||||
let map = LayeredMap::<()>::create(dir.path(), IndexMode::Exact).unwrap();
|
||||
assert_eq!(map.n_layers(), 0);
|
||||
}
|
||||
|
||||
@@ -33,7 +34,7 @@ fn open_reloads_layer_count() {
|
||||
set_k(4);
|
||||
let dir = tempdir().unwrap();
|
||||
{
|
||||
let mut map = LayeredMap::<PersistentCompactIntMatrix>::create(dir.path()).unwrap();
|
||||
let mut map = LayeredMap::<PersistentCompactIntMatrix>::create(dir.path(), IndexMode::Exact).unwrap();
|
||||
push_unitigs_and_layer(&mut map, &[b"AAAACGT"], 1);
|
||||
}
|
||||
let map = LayeredMap::<PersistentCompactIntMatrix>::open(dir.path()).unwrap();
|
||||
@@ -44,7 +45,7 @@ fn open_reloads_layer_count() {
|
||||
fn query_finds_kmer_in_layer_zero() {
|
||||
set_k(4);
|
||||
let dir = tempdir().unwrap();
|
||||
let mut map = LayeredMap::<PersistentCompactIntMatrix>::create(dir.path()).unwrap();
|
||||
let mut map = LayeredMap::<PersistentCompactIntMatrix>::create(dir.path(), IndexMode::Exact).unwrap();
|
||||
push_unitigs_and_layer(&mut map, &[b"AAAACGT"], 3);
|
||||
let kmer = canonical(b"AAAC");
|
||||
let (layer_idx, hit) = map.query(kmer).expect("kmer must be found");
|
||||
@@ -56,7 +57,7 @@ fn query_finds_kmer_in_layer_zero() {
|
||||
fn query_finds_kmer_in_correct_layer() {
|
||||
set_k(4);
|
||||
let dir = tempdir().unwrap();
|
||||
let mut map = LayeredMap::<PersistentCompactIntMatrix>::create(dir.path()).unwrap();
|
||||
let mut map = LayeredMap::<PersistentCompactIntMatrix>::create(dir.path(), IndexMode::Exact).unwrap();
|
||||
push_unitigs_and_layer(&mut map, &[b"AAAACGT"], 1);
|
||||
push_unitigs_and_layer(&mut map, &[b"GGGACGT"], 2);
|
||||
assert_eq!(map.n_layers(), 2);
|
||||
@@ -74,7 +75,7 @@ fn query_finds_kmer_in_correct_layer() {
|
||||
fn query_absent_returns_none() {
|
||||
set_k(4);
|
||||
let dir = tempdir().unwrap();
|
||||
let mut map = LayeredMap::<PersistentCompactIntMatrix>::create(dir.path()).unwrap();
|
||||
let mut map = LayeredMap::<PersistentCompactIntMatrix>::create(dir.path(), IndexMode::Exact).unwrap();
|
||||
push_unitigs_and_layer(&mut map, &[b"AAAACGT"], 1);
|
||||
let absent = canonical(b"CCCC");
|
||||
assert!(map.query(absent).is_none());
|
||||
@@ -84,7 +85,7 @@ fn query_absent_returns_none() {
|
||||
fn push_layer_from_map_convenience() {
|
||||
set_k(4);
|
||||
let dir = tempdir().unwrap();
|
||||
let mut map = LayeredMap::<PersistentCompactIntMatrix>::create(dir.path()).unwrap();
|
||||
let mut map = LayeredMap::<PersistentCompactIntMatrix>::create(dir.path(), IndexMode::Exact).unwrap();
|
||||
let mut w = map.next_layer_writer().unwrap();
|
||||
w.write(&Unitig::from_ascii(b"AAAACGT")).unwrap();
|
||||
w.close().unwrap();
|
||||
|
||||
Reference in New Issue
Block a user