Push pnxswqpxlyso #13

Merged
coissac merged 5 commits from push-pnxswqpxlyso into main 2026-06-01 12:45:46 +00:00
6 changed files with 84 additions and 70 deletions
Showing only changes of commit 657f964dda - Show all commits
+54 -42
View File
@@ -1,7 +1,8 @@
//! Compact 2-bit kmer stored as a left-aligned u64. //! Compact 2-bit kmer stored as a left-aligned [`RawKmer`].
//! //!
//! Nucleotide 0 occupies bits 6362, nucleotide i occupies bits 632i and 622i. //! Nucleotide 0 occupies bits KMER_BITS1 and KMER_BITS2, nucleotide i occupies
//! The low 642·len bits are always zero. //! bits KMER_BITS12i and KMER_BITS22i.
//! The low `KMER_BITS 2·len` bits are always zero.
//! //!
//! The length is not stored in the struct — it is supplied by the [`KmerLength`] //! The length is not stored in the struct — it is supplied by the [`KmerLength`]
//! type parameter. Two public marker types cover the common cases: //! type parameter. Two public marker types cover the common cases:
@@ -15,6 +16,17 @@
//! Tests that need a fixed length independent of the global singletons can use //! Tests that need a fixed length independent of the global singletons can use
//! [`ConstLen<N>`]. //! [`ConstLen<N>`].
/// Underlying storage type for a raw (2-bit-per-base, left-aligned) kmer.
///
/// All kmer bit-width constants (`KMER_BITS`) and shift calculations are derived
/// from this type. To extend the maximum supported k beyond 32, change this alias
/// to `u128` and update `KMER_BITS` — callers that use `RawKmer` directly will
/// then pick up the wider type automatically.
pub type RawKmer = u64;
/// Bit width of [`RawKmer`]: the number of bits available for nucleotide encoding.
pub const KMER_BITS: usize = 64;
use serde::Serialize; use serde::Serialize;
use std::io::{self, Write}; use std::io::{self, Write};
use std::marker::PhantomData; use std::marker::PhantomData;
@@ -109,35 +121,35 @@ impl Annotation for KmerAnnotation {}
// ── KmerOf ──────────────────────────────────────────────────────────────────── // ── KmerOf ────────────────────────────────────────────────────────────────────
/// A DNA kmer of length `L::len()` encoded as a left-aligned u64 (2 bits/nucleotide, MSB-first). /// A DNA kmer of length `L::len()` encoded as a left-aligned [`RawKmer`] (2 bits/nucleotide, MSB-first).
/// The low `64 2·L::len()` bits are always zero. /// The low `KMER_BITS 2·L::len()` bits are always zero.
#[repr(transparent)] #[repr(transparent)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct KmerOf<L: KmerLength>(u64, PhantomData<L>); pub struct KmerOf<L: KmerLength>(RawKmer, PhantomData<L>);
impl<L: KmerLength> KmerOf<L> { impl<L: KmerLength> KmerOf<L> {
/// Wrap a raw left-aligned u64 value as a kmer. /// Wrap a raw left-aligned [`RawKmer`] value as a kmer.
#[inline] #[inline]
pub const fn from_raw(raw: u64) -> Self { pub const fn from_raw(raw: RawKmer) -> Self {
KmerOf(raw, PhantomData) KmerOf(raw, PhantomData)
} }
/// Wrap a raw right-aligned u64 value, shifting it into left-aligned position. /// Wrap a raw right-aligned [`RawKmer`] value, shifting it into left-aligned position.
#[inline] #[inline]
pub fn from_raw_right(raw: u64) -> Self { pub fn from_raw_right(raw: RawKmer) -> Self {
KmerOf(raw << (64 - 2 * L::len()), PhantomData) KmerOf(raw << (KMER_BITS - 2 * L::len()), PhantomData)
} }
/// Return the raw left-aligned u64 value. /// Return the raw left-aligned [`RawKmer`] value.
#[inline] #[inline]
pub fn raw(&self) -> u64 { pub fn raw(&self) -> RawKmer {
self.0 self.0
} }
/// Return the raw right-aligned u64 value. /// Return the raw right-aligned [`RawKmer`] value.
#[inline] #[inline]
pub fn raw_right(&self) -> u64 { pub fn raw_right(&self) -> RawKmer {
self.0 >> (64 - 2 * L::len()) self.0 >> (KMER_BITS - 2 * L::len())
} }
/// Encode the first `L::len()` nucleotides of an ASCII slice into a kmer. /// Encode the first `L::len()` nucleotides of an ASCII slice into a kmer.
@@ -153,11 +165,11 @@ impl<L: KmerLength> KmerOf<L> {
seql: ascii.len(), seql: ascii.len(),
}); });
} }
let mut val = 0u64; let mut val: RawKmer = 0;
for i in 0..k { for i in 0..k {
val = (val << 2) | encode_base(ascii[i]) as u64; val = (val << 2) | encode_base(ascii[i]) as RawKmer;
} }
Ok(KmerOf(val << (64 - 2 * k), PhantomData)) Ok(KmerOf(val << (KMER_BITS - 2 * k), PhantomData))
} }
/// Decode into a freshly allocated ASCII `Vec<u8>`. /// Decode into a freshly allocated ASCII `Vec<u8>`.
@@ -193,27 +205,27 @@ impl<L: KmerLength> KmerOf<L> {
let x = x.swap_bytes(); let x = x.swap_bytes();
let x = ((x >> 4) & 0x0F0F0F0F0F0F0F0F) | ((x & 0x0F0F0F0F0F0F0F0F) << 4); let x = ((x >> 4) & 0x0F0F0F0F0F0F0F0F) | ((x & 0x0F0F0F0F0F0F0F0F) << 4);
let x = ((x >> 2) & 0x3333333333333333) | ((x & 0x3333333333333333) << 2); let x = ((x >> 2) & 0x3333333333333333) | ((x & 0x3333333333333333) << 2);
KmerOf(x << (64 - 2 * k), PhantomData) KmerOf(x << (KMER_BITS - 2 * k), PhantomData)
} }
/// Slide the window one base to the right: drop nucleotide 0, append `nuc` at position `L::len()-1`. /// Slide the window one base to the right: drop nucleotide 0, append `nuc` at position `L::len()-1`.
pub fn push_right(self, nuc: u8) -> Self { pub fn push_right(self, nuc: u8) -> Self {
let k = L::len(); let k = L::len();
let shifted = self.0 << 2 & (!0u64 << (64 - 2 * (k - 1))); let shifted = self.0 << 2 & (RawKmer::MAX << (KMER_BITS - 2 * (k - 1)));
KmerOf(shifted | ((nuc as u64 & 3) << (64 - 2 * k)), PhantomData) KmerOf(shifted | ((nuc as RawKmer & 3) << (KMER_BITS - 2 * k)), PhantomData)
} }
/// Slide the window one base to the left: drop nucleotide `L::len()-1`, prepend `nuc` at position 0. /// Slide the window one base to the left: drop nucleotide `L::len()-1`, prepend `nuc` at position 0.
pub fn push_left(self, nuc: u8) -> Self { pub fn push_left(self, nuc: u8) -> Self {
let k = L::len(); let k = L::len();
let shifted = (self.0 >> 2) & (!0u64 << (64 - 2 * k)); let shifted = (self.0 >> 2) & (RawKmer::MAX << (KMER_BITS - 2 * k));
KmerOf(shifted | ((nuc as u64 & 3) << 62), PhantomData) KmerOf(shifted | ((nuc as RawKmer & 3) << (KMER_BITS - 2)), PhantomData)
} }
/// Returns `true` if the last `L::len()-1` nucleotides of `self` equal the first `L::len()-1` of `other`. /// Returns `true` if the last `L::len()-1` nucleotides of `self` equal the first `L::len()-1` of `other`.
pub fn is_overlapping(self, other: Self) -> bool { pub fn is_overlapping(self, other: Self) -> bool {
let k = L::len(); let k = L::len();
let mask = !0u64 << (64 - 2 * (k - 1)); let mask = RawKmer::MAX << (KMER_BITS - 2 * (k - 1));
(self.0 << 2 & mask) == (other.0 & mask) (self.0 << 2 & mask) == (other.0 & mask)
} }
} }
@@ -231,7 +243,7 @@ impl<L: KmerLength> Sequence for KmerOf<L> {
#[inline] #[inline]
fn nucleotide(&self, i: usize) -> u8 { fn nucleotide(&self, i: usize) -> u8 {
((self.0 >> (62 - 2 * i)) & 0b11) as u8 ((self.0 >> (KMER_BITS - 2 - 2 * i)) & 0b11) as u8
} }
#[inline] #[inline]
@@ -255,21 +267,21 @@ impl<L: KmerLength> Sequence for KmerOf<L> {
/// [`CanonicalKmerOf::from_raw_unchecked`] (trusted paths such as deserialisation). /// [`CanonicalKmerOf::from_raw_unchecked`] (trusted paths such as deserialisation).
#[repr(transparent)] #[repr(transparent)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct CanonicalKmerOf<L: KmerLength>(u64, PhantomData<L>); pub struct CanonicalKmerOf<L: KmerLength>(RawKmer, PhantomData<L>);
impl<L: KmerLength> CanonicalKmerOf<L> { impl<L: KmerLength> CanonicalKmerOf<L> {
/// Wrap a raw left-aligned u64 without verifying the canonical invariant. /// Wrap a raw left-aligned [`RawKmer`] without verifying the canonical invariant.
/// ///
/// # Safety (logical) /// # Safety (logical)
/// The caller must guarantee `raw == min(raw, revcomp(raw))`. /// The caller must guarantee `raw == min(raw, revcomp(raw))`.
#[inline] #[inline]
pub fn from_raw_unchecked(raw: u64) -> Self { pub fn from_raw_unchecked(raw: RawKmer) -> Self {
CanonicalKmerOf(raw, PhantomData) CanonicalKmerOf(raw, PhantomData)
} }
/// Return the raw left-aligned u64 value. /// Return the raw left-aligned [`RawKmer`] value.
#[inline] #[inline]
pub fn raw(&self) -> u64 { pub fn raw(&self) -> RawKmer {
self.0 self.0
} }
@@ -307,25 +319,25 @@ impl<L: KmerLength> CanonicalKmerOf<L> {
/// Return the four left canonical neighbours (each already canonical). /// Return the four left canonical neighbours (each already canonical).
pub fn left_canonical_neighbors(&self) -> [CanonicalKmerOf<L>; 4] { pub fn left_canonical_neighbors(&self) -> [CanonicalKmerOf<L>; 4] {
let k = L::len(); let k = L::len();
let shifted = (self.0 >> 2) & (!0u64 << (64 - 2 * k)); let shifted = (self.0 >> 2) & (RawKmer::MAX << (KMER_BITS - 2 * k));
[ [
KmerOf::<L>(shifted, PhantomData).canonical(), KmerOf::<L>(shifted, PhantomData).canonical(),
KmerOf::<L>(shifted | (1u64 << 62), PhantomData).canonical(), KmerOf::<L>(shifted | ((1 as RawKmer) << (KMER_BITS - 2)), PhantomData).canonical(),
KmerOf::<L>(shifted | (2u64 << 62), PhantomData).canonical(), KmerOf::<L>(shifted | ((2 as RawKmer) << (KMER_BITS - 2)), PhantomData).canonical(),
KmerOf::<L>(shifted | (3u64 << 62), PhantomData).canonical(), KmerOf::<L>(shifted | ((3 as RawKmer) << (KMER_BITS - 2)), PhantomData).canonical(),
] ]
} }
/// Return the four right canonical neighbours (each already canonical). /// Return the four right canonical neighbours (each already canonical).
pub fn right_canonical_neighbors(&self) -> [CanonicalKmerOf<L>; 4] { pub fn right_canonical_neighbors(&self) -> [CanonicalKmerOf<L>; 4] {
let k = L::len(); let k = L::len();
let shifted = self.0 << 2 & (!0u64 << (64 - 2 * (k - 1))); let shifted = self.0 << 2 & (RawKmer::MAX << (KMER_BITS - 2 * (k - 1)));
let shift = 64 - 2 * k; let shift = KMER_BITS - 2 * k;
[ [
KmerOf::<L>(shifted, PhantomData).canonical(), KmerOf::<L>(shifted, PhantomData).canonical(),
KmerOf::<L>(shifted | (1u64 << shift), PhantomData).canonical(), KmerOf::<L>(shifted | ((1 as RawKmer) << shift), PhantomData).canonical(),
KmerOf::<L>(shifted | (2u64 << shift), PhantomData).canonical(), KmerOf::<L>(shifted | ((2 as RawKmer) << shift), PhantomData).canonical(),
KmerOf::<L>(shifted | (3u64 << shift), PhantomData).canonical(), KmerOf::<L>(shifted | ((3 as RawKmer) << shift), PhantomData).canonical(),
] ]
} }
@@ -349,7 +361,7 @@ impl<L: KmerLength> Sequence for CanonicalKmerOf<L> {
#[inline] #[inline]
fn nucleotide(&self, i: usize) -> u8 { fn nucleotide(&self, i: usize) -> u8 {
((self.0 >> (62 - 2 * i)) & 0b11) as u8 ((self.0 >> (KMER_BITS - 2 - 2 * i)) & 0b11) as u8
} }
fn canonical(&self) -> Self::Canonical { fn canonical(&self) -> Self::Canonical {
@@ -386,7 +398,7 @@ pub type Minimizer = CanonicalKmerOf<MLen>;
/// 64-bit representation directly, which is useful when the canonical /// 64-bit representation directly, which is useful when the canonical
/// invariant is not required or has already been handled. /// invariant is not required or has already been handled.
#[inline] #[inline]
pub fn hash_kmer(raw: u64) -> u64 { pub fn hash_kmer(raw: RawKmer) -> u64 {
mix64(raw ^ 0x9e3779b97f4a7c15) mix64(raw ^ 0x9e3779b97f4a7c15)
} }
+1 -1
View File
@@ -20,7 +20,7 @@ pub mod superkmer;
pub mod unitig; pub mod unitig;
pub use annotations::Annotation; pub use annotations::Annotation;
pub use kmer::{CanonicalKmer, Kmer, Minimizer, hash_kmer}; pub use kmer::{CanonicalKmer, Kmer, Minimizer, RawKmer, KMER_BITS, hash_kmer};
pub use packed_seq::MAX_KMERS_PER_CHUNK; pub use packed_seq::MAX_KMERS_PER_CHUNK;
pub use params::{k, m, set_k, set_m}; pub use params::{k, m, set_k, set_m};
pub use routable::RoutableSuperKmer; pub use routable::RoutableSuperKmer;
+8 -8
View File
@@ -18,7 +18,7 @@ use bitvec::prelude::*;
use crate::Sequence; use crate::Sequence;
use crate::encoding::{DEC4, encode_base}; use crate::encoding::{DEC4, encode_base};
use crate::kmer::{CanonicalKmer, Kmer, KmerError, KLen, KmerLength, KmerOf, MLen, Minimizer}; use crate::kmer::{CanonicalKmer, Kmer, KmerError, KLen, KmerLength, KmerOf, MLen, Minimizer, RawKmer, KMER_BITS};
use crate::params::k; use crate::params::k;
use crate::revcomp_lookup::REVCOMP4; use crate::revcomp_lookup::REVCOMP4;
@@ -198,8 +198,8 @@ impl PackedSeq {
return Err(KmerError::OutOfBounds { position: i, k: len, seql }); return Err(KmerError::OutOfBounds { position: i, k: len, seql });
} }
let bits = self.seq.view_bits::<Msb0>(); let bits = self.seq.view_bits::<Msb0>();
let raw: u64 = bits[i * 2..(i + len) * 2].load_be(); let raw: RawKmer = bits[i * 2..(i + len) * 2].load_be();
Ok(KmerOf::from_raw(raw << (64 - 2 * len))) Ok(KmerOf::from_raw(raw << (KMER_BITS - 2 * len)))
} }
/// Extract the kmer of length `params::k()` at nucleotide position `i`. Zero allocation. /// Extract the kmer of length `params::k()` at nucleotide position `i`. Zero allocation.
@@ -322,9 +322,9 @@ impl AsRef<PackedSeq> for PackedSeq {
/// sequence into the iterator, so no reference escapes the closure. /// sequence into the iterator, so no reference escapes the closure.
pub struct PackedSeqKmerIter<S> { pub struct PackedSeqKmerIter<S> {
seq: S, seq: S,
mask: u64, mask: RawKmer,
lshift: usize, lshift: usize,
current: u64, current: RawKmer,
pos: usize, pos: usize,
max_pos: usize, max_pos: usize,
} }
@@ -341,8 +341,8 @@ impl<S: AsRef<PackedSeq>> PackedSeqKmerIter<S> {
let ps = seq.as_ref(); let ps = seq.as_ref();
let seql = ps.seql(); let seql = ps.seql();
let klen = k(); let klen = k();
let lshift = 64 - klen * 2; let lshift = KMER_BITS - klen * 2;
let mask = ((!0u128) << (lshift + 2)) as u64; let mask = ((!0u128) << (lshift + 2)) as RawKmer;
let current = if seql >= klen { let current = if seql >= klen {
ps.extract::<KLen>(0).map(|km| km.raw()).unwrap_or(0) ps.extract::<KLen>(0).map(|km| km.raw()).unwrap_or(0)
} else { } else {
@@ -362,7 +362,7 @@ impl<S: AsRef<PackedSeq>> Iterator for PackedSeqKmerIter<S> {
let result = Kmer::from_raw(self.current); let result = Kmer::from_raw(self.current);
if self.pos < self.max_pos { if self.pos < self.max_pos {
let inner_shift = 6 - 2 * (self.pos & 3); let inner_shift = 6 - 2 * (self.pos & 3);
let nuc = ((self.seq.as_ref().seq[self.pos / 4] >> inner_shift) & 3) as u64; let nuc = ((self.seq.as_ref().seq[self.pos / 4] >> inner_shift) & 3) as RawKmer;
self.current = ((self.current << 2) & self.mask) | (nuc << self.lshift); self.current = ((self.current << 2) & self.mask) | (nuc << self.lshift);
} }
self.pos += 1; self.pos += 1;
+3 -3
View File
@@ -118,7 +118,7 @@ impl MphfLayer {
} }
LayerEvidence::Approx { unitigs_path, .. } => { LayerEvidence::Approx { unitigs_path, .. } => {
let reader = UnitigFileReader::open_sequential(unitigs_path).ok()?; let reader = UnitigFileReader::open_sequential(unitigs_path).ok()?;
for stored in reader.iter_canonical_kmers() { for (stored, _, _) in reader.iter_indexed_canonical_kmers() {
if self.mphf.index(&stored.raw()) == slot { if self.mphf.index(&stored.raw()) == slot {
return if stored == kmer { Some(slot) } else { None }; return if stored == kmer { Some(slot) } else { None };
} }
@@ -196,7 +196,7 @@ impl MphfLayer {
let mut fw = FingerprintVecWriter::new(n, b); let mut fw = FingerprintVecWriter::new(n, b);
for kmer in unitigs.iter_canonical_kmers() { for (kmer, _, _) in unitigs.iter_indexed_canonical_kmers() {
let slot = mphf.index(&kmer.raw()); let slot = mphf.index(&kmer.raw());
if slot >= n { if slot >= n {
return Err(OLMError::Mphf("slot out of bounds".into())); return Err(OLMError::Mphf("slot out of bounds".into()));
@@ -281,7 +281,7 @@ impl MphfLayer {
IndexMode::Approx { b, .. } => { IndexMode::Approx { b, .. } => {
let mut fw = FingerprintVecWriter::new(n, *b); let mut fw = FingerprintVecWriter::new(n, *b);
for kmer in unitigs2.iter_canonical_kmers() { for (kmer, _, _) in unitigs2.iter_indexed_canonical_kmers() {
let slot = mphf.index(&kmer.raw()); let slot = mphf.index(&kmer.raw());
if slot >= n { return Err(OLMError::Mphf("slot out of bounds".into())); } if slot >= n { return Err(OLMError::Mphf("slot out of bounds".into())); }
let byte = slot / 8; let bit = 1u8 << (slot % 8); let byte = slot / 8; let bit = 1u8 << (slot % 8);
+11 -10
View File
@@ -2,7 +2,7 @@ use super::*;
use obicompactvec::PersistentCompactIntMatrix; use obicompactvec::PersistentCompactIntMatrix;
use obikseq::{set_k, Kmer, Sequence as _, Unitig}; use obikseq::{set_k, Kmer, Sequence as _, Unitig};
use obiskio::DEFAULT_BLOCK_BITS; use obiskio::DEFAULT_BLOCK_BITS;
use crate::meta::EvidenceKind; use crate::meta::IndexMode;
use tempfile::tempdir; use tempfile::tempdir;
fn write_unitigs(dir: &Path, seqs: &[&[u8]]) { fn write_unitigs(dir: &Path, seqs: &[&[u8]]) {
@@ -15,7 +15,8 @@ fn write_unitigs(dir: &Path, seqs: &[&[u8]]) {
fn all_canonical_kmers(dir: &Path) -> Vec<CanonicalKmer> { fn all_canonical_kmers(dir: &Path) -> Vec<CanonicalKmer> {
UnitigFileReader::open_sequential(&dir.join(UNITIGS_FILE)).unwrap() UnitigFileReader::open_sequential(&dir.join(UNITIGS_FILE)).unwrap()
.iter_canonical_kmers() .iter_indexed_canonical_kmers()
.map(|(kmer, _, _)| kmer)
.collect() .collect()
} }
@@ -25,8 +26,8 @@ fn build_and_query_all_kmers_found() {
let dir = tempdir().unwrap(); let dir = tempdir().unwrap();
write_unitigs(dir.path(), &[b"AAAACGT"]); write_unitigs(dir.path(), &[b"AAAACGT"]);
let kmers = all_canonical_kmers(dir.path()); let kmers = all_canonical_kmers(dir.path());
Layer::<()>::build(dir.path(), DEFAULT_BLOCK_BITS, &EvidenceKind::Exact).unwrap(); Layer::<()>::build(dir.path(), DEFAULT_BLOCK_BITS, &IndexMode::Exact).unwrap();
let layer = Layer::<()>::open(dir.path()).unwrap(); let layer = Layer::<()>::open(dir.path(), &IndexMode::Exact).unwrap();
for kmer in kmers { for kmer in kmers {
assert!(layer.query(kmer).is_some(), "kmer should be present"); assert!(layer.query(kmer).is_some(), "kmer should be present");
} }
@@ -43,10 +44,10 @@ fn counts_are_stored_and_retrieved() {
Layer::<PersistentCompactIntMatrix>::build( Layer::<PersistentCompactIntMatrix>::build(
dir.path(), dir.path(),
DEFAULT_BLOCK_BITS, DEFAULT_BLOCK_BITS,
&EvidenceKind::Exact, &IndexMode::Exact,
|kmer| count_map.get(&kmer).copied().unwrap_or(0), |kmer| count_map.get(&kmer).copied().unwrap_or(0),
).unwrap(); ).unwrap();
let layer = Layer::<PersistentCompactIntMatrix>::open(dir.path()).unwrap(); let layer = Layer::<PersistentCompactIntMatrix>::open(dir.path(), &IndexMode::Exact).unwrap();
for kmer in &kmers { for kmer in &kmers {
let hit = layer.query(*kmer).expect("kmer must be present"); let hit = layer.query(*kmer).expect("kmer must be present");
assert_eq!(hit.data[0], count_map[kmer]); assert_eq!(hit.data[0], count_map[kmer]);
@@ -58,8 +59,8 @@ fn query_absent_returns_none() {
set_k(4); set_k(4);
let dir = tempdir().unwrap(); let dir = tempdir().unwrap();
write_unitigs(dir.path(), &[b"AAAACGT"]); write_unitigs(dir.path(), &[b"AAAACGT"]);
Layer::<()>::build(dir.path(), DEFAULT_BLOCK_BITS, &EvidenceKind::Exact).unwrap(); Layer::<()>::build(dir.path(), DEFAULT_BLOCK_BITS, &IndexMode::Exact).unwrap();
let layer = Layer::<()>::open(dir.path()).unwrap(); let layer = Layer::<()>::open(dir.path(), &IndexMode::Exact).unwrap();
let absent = Kmer::from_ascii(b"CCCC").unwrap().canonical(); let absent = Kmer::from_ascii(b"CCCC").unwrap().canonical();
assert!(layer.query(absent).is_none()); assert!(layer.query(absent).is_none());
} }
@@ -69,9 +70,9 @@ fn open_after_build_is_consistent() {
set_k(4); set_k(4);
let dir = tempdir().unwrap(); let dir = tempdir().unwrap();
write_unitigs(dir.path(), &[b"AAAACGT"]); write_unitigs(dir.path(), &[b"AAAACGT"]);
let n = Layer::<PersistentCompactIntMatrix>::build(dir.path(), DEFAULT_BLOCK_BITS, &EvidenceKind::Exact, |_| 7).unwrap(); let n = Layer::<PersistentCompactIntMatrix>::build(dir.path(), DEFAULT_BLOCK_BITS, &IndexMode::Exact, |_| 7).unwrap();
assert_eq!(n, 4); assert_eq!(n, 4);
let layer = Layer::<PersistentCompactIntMatrix>::open(dir.path()).unwrap(); let layer = Layer::<PersistentCompactIntMatrix>::open(dir.path(), &IndexMode::Exact).unwrap();
let kmer = Kmer::from_ascii(b"AAAA").unwrap().canonical(); let kmer = Kmer::from_ascii(b"AAAA").unwrap().canonical();
let hit = layer.query(kmer).expect("AAAA must be present"); let hit = layer.query(kmer).expect("AAAA must be present");
assert_eq!(hit.data[0], 7); assert_eq!(hit.data[0], 7);
+7 -6
View File
@@ -1,6 +1,7 @@
use super::*; use super::*;
use obicompactvec::PersistentCompactIntMatrix; use obicompactvec::PersistentCompactIntMatrix;
use obikseq::{set_k, Sequence as _, Unitig}; use obikseq::{set_k, Sequence as _, Unitig};
use crate::meta::IndexMode;
use tempfile::tempdir; use tempfile::tempdir;
fn push_unitigs_and_layer( fn push_unitigs_and_layer(
@@ -24,7 +25,7 @@ fn canonical(ascii: &[u8]) -> CanonicalKmer {
fn create_empty_map() { fn create_empty_map() {
set_k(4); set_k(4);
let dir = tempdir().unwrap(); let dir = tempdir().unwrap();
let map = LayeredMap::<()>::create(dir.path()).unwrap(); let map = LayeredMap::<()>::create(dir.path(), IndexMode::Exact).unwrap();
assert_eq!(map.n_layers(), 0); assert_eq!(map.n_layers(), 0);
} }
@@ -33,7 +34,7 @@ fn open_reloads_layer_count() {
set_k(4); set_k(4);
let dir = tempdir().unwrap(); let dir = tempdir().unwrap();
{ {
let mut map = LayeredMap::<PersistentCompactIntMatrix>::create(dir.path()).unwrap(); let mut map = LayeredMap::<PersistentCompactIntMatrix>::create(dir.path(), IndexMode::Exact).unwrap();
push_unitigs_and_layer(&mut map, &[b"AAAACGT"], 1); push_unitigs_and_layer(&mut map, &[b"AAAACGT"], 1);
} }
let map = LayeredMap::<PersistentCompactIntMatrix>::open(dir.path()).unwrap(); let map = LayeredMap::<PersistentCompactIntMatrix>::open(dir.path()).unwrap();
@@ -44,7 +45,7 @@ fn open_reloads_layer_count() {
fn query_finds_kmer_in_layer_zero() { fn query_finds_kmer_in_layer_zero() {
set_k(4); set_k(4);
let dir = tempdir().unwrap(); let dir = tempdir().unwrap();
let mut map = LayeredMap::<PersistentCompactIntMatrix>::create(dir.path()).unwrap(); let mut map = LayeredMap::<PersistentCompactIntMatrix>::create(dir.path(), IndexMode::Exact).unwrap();
push_unitigs_and_layer(&mut map, &[b"AAAACGT"], 3); push_unitigs_and_layer(&mut map, &[b"AAAACGT"], 3);
let kmer = canonical(b"AAAC"); let kmer = canonical(b"AAAC");
let (layer_idx, hit) = map.query(kmer).expect("kmer must be found"); let (layer_idx, hit) = map.query(kmer).expect("kmer must be found");
@@ -56,7 +57,7 @@ fn query_finds_kmer_in_layer_zero() {
fn query_finds_kmer_in_correct_layer() { fn query_finds_kmer_in_correct_layer() {
set_k(4); set_k(4);
let dir = tempdir().unwrap(); let dir = tempdir().unwrap();
let mut map = LayeredMap::<PersistentCompactIntMatrix>::create(dir.path()).unwrap(); let mut map = LayeredMap::<PersistentCompactIntMatrix>::create(dir.path(), IndexMode::Exact).unwrap();
push_unitigs_and_layer(&mut map, &[b"AAAACGT"], 1); push_unitigs_and_layer(&mut map, &[b"AAAACGT"], 1);
push_unitigs_and_layer(&mut map, &[b"GGGACGT"], 2); push_unitigs_and_layer(&mut map, &[b"GGGACGT"], 2);
assert_eq!(map.n_layers(), 2); assert_eq!(map.n_layers(), 2);
@@ -74,7 +75,7 @@ fn query_finds_kmer_in_correct_layer() {
fn query_absent_returns_none() { fn query_absent_returns_none() {
set_k(4); set_k(4);
let dir = tempdir().unwrap(); let dir = tempdir().unwrap();
let mut map = LayeredMap::<PersistentCompactIntMatrix>::create(dir.path()).unwrap(); let mut map = LayeredMap::<PersistentCompactIntMatrix>::create(dir.path(), IndexMode::Exact).unwrap();
push_unitigs_and_layer(&mut map, &[b"AAAACGT"], 1); push_unitigs_and_layer(&mut map, &[b"AAAACGT"], 1);
let absent = canonical(b"CCCC"); let absent = canonical(b"CCCC");
assert!(map.query(absent).is_none()); assert!(map.query(absent).is_none());
@@ -84,7 +85,7 @@ fn query_absent_returns_none() {
fn push_layer_from_map_convenience() { fn push_layer_from_map_convenience() {
set_k(4); set_k(4);
let dir = tempdir().unwrap(); let dir = tempdir().unwrap();
let mut map = LayeredMap::<PersistentCompactIntMatrix>::create(dir.path()).unwrap(); let mut map = LayeredMap::<PersistentCompactIntMatrix>::create(dir.path(), IndexMode::Exact).unwrap();
let mut w = map.next_layer_writer().unwrap(); let mut w = map.next_layer_writer().unwrap();
w.write(&Unitig::from_ascii(b"AAAACGT")).unwrap(); w.write(&Unitig::from_ascii(b"AAAACGT")).unwrap();
w.close().unwrap(); w.close().unwrap();