Add consuming k-mer iterators to PackedSeq and Superkmer
Introduces `into_kmers()` and `into_canonical_kmers()` consuming methods to `PackedSeq` and `Superkmer`, enabling zero-allocation sliding-window k-mer extraction via bitwise operations. This complements existing borrow-based iterators by allowing direct ownership transfer. Also includes minor documentation updates, whitespace fixes, and new unit tests to verify canonical k-mer iteration counts and output sequences.
This commit is contained in:
@@ -232,6 +232,18 @@ impl PackedSeq {
|
||||
self.iter_kmers().map(|km| km.canonical())
|
||||
}
|
||||
|
||||
/// Consuming iterator over all k-mers. Moves `self` into the iterator; zero allocation.
|
||||
#[inline]
|
||||
pub fn into_kmers(self) -> OwnedPackedSeqKmerIter {
|
||||
OwnedPackedSeqKmerIter::new(self)
|
||||
}
|
||||
|
||||
/// Consuming iterator over all canonical k-mers. Moves `self` into the iterator; zero allocation.
|
||||
#[inline]
|
||||
pub fn into_canonical_kmers(self) -> impl Iterator<Item = CanonicalKmer> {
|
||||
self.into_kmers().map(|km| km.canonical())
|
||||
}
|
||||
|
||||
/// Extract nucleotides `[start, end)` as a new [`PackedSeq`]. Allocates.
|
||||
pub fn sub(&self, start: usize, end: usize) -> Self {
|
||||
debug_assert!(end > start && end <= self.seql());
|
||||
@@ -317,6 +329,51 @@ impl Iterator for PackedSeqKmerIter<'_> {
|
||||
}
|
||||
}
|
||||
|
||||
// ── OwnedPackedSeqKmerIter ────────────────────────────────────────────────────
|
||||
|
||||
/// Sliding-window kmer iterator that owns its [`PackedSeq`]. Zero allocation.
|
||||
pub struct OwnedPackedSeqKmerIter {
|
||||
seq: PackedSeq,
|
||||
mask: u64,
|
||||
lshift: usize,
|
||||
current: u64,
|
||||
pos: usize,
|
||||
max_pos: usize,
|
||||
}
|
||||
|
||||
impl OwnedPackedSeqKmerIter {
|
||||
fn new(seq: PackedSeq) -> Self {
|
||||
let seql = seq.seql();
|
||||
let klen = k();
|
||||
let lshift = 64 - klen * 2;
|
||||
let mask = ((!0u128) << (lshift + 2)) as u64;
|
||||
let current = if seql >= klen {
|
||||
seq.extract::<KLen>(0).map(|km| km.raw()).unwrap_or(0)
|
||||
} else {
|
||||
0
|
||||
};
|
||||
Self { seq, mask, lshift, current, pos: klen, max_pos: seql }
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for OwnedPackedSeqKmerIter {
|
||||
type Item = Kmer;
|
||||
|
||||
fn next(&mut self) -> Option<Kmer> {
|
||||
if self.pos > self.max_pos {
|
||||
return None;
|
||||
}
|
||||
let result = Kmer::from_raw(self.current);
|
||||
if self.pos < self.max_pos {
|
||||
let inner_shift = 6 - 2 * (self.pos & 3);
|
||||
let nuc = ((self.seq.seq[self.pos / 4] >> inner_shift) & 3) as u64;
|
||||
self.current = ((self.current << 2) & self.mask) | (nuc << self.lshift);
|
||||
}
|
||||
self.pos += 1;
|
||||
Some(result)
|
||||
}
|
||||
}
|
||||
|
||||
// ── varint (LEB128) ───────────────────────────────────────────────────────────
|
||||
|
||||
pub(crate) fn write_varint<W: Write>(w: &mut W, mut val: u64) -> io::Result<()> {
|
||||
|
||||
@@ -207,6 +207,12 @@ impl SuperKmer {
|
||||
pub fn iter_canonical_kmers(&self) -> impl Iterator<Item = CanonicalKmer> + '_ {
|
||||
self.inner.iter_canonical_kmers()
|
||||
}
|
||||
|
||||
/// Consuming iterator over all canonical k-mers. Moves `self`; zero allocation.
|
||||
#[inline]
|
||||
pub fn into_canonical_kmers(self) -> impl Iterator<Item = CanonicalKmer> {
|
||||
self.inner.into_canonical_kmers()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
Reference in New Issue
Block a user