refactor(packed_seq): unify kmer iterators with generic storage
Merge PackedSeqKmerIter and OwnedPackedSeqKmerIter into a single generic PackedSeqKmerIter<S> parameterized over the storage type. Add an AsRef<PackedSeq> implementation to PackedSeq to enable this abstraction, allowing the zero-allocation sliding-window kmer iterator to seamlessly accept both borrowed and owned sequences without code duplication.
This commit is contained in:
@@ -222,7 +222,7 @@ impl PackedSeq {
|
|||||||
|
|
||||||
/// Iterate over all kmers of length `params::k()` in order. Zero allocation.
|
/// Iterate over all kmers of length `params::k()` in order. Zero allocation.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn iter_kmers(&self) -> PackedSeqKmerIter<'_> {
|
pub fn iter_kmers(&self) -> PackedSeqKmerIter<&PackedSeq> {
|
||||||
PackedSeqKmerIter::new(self)
|
PackedSeqKmerIter::new(self)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -282,11 +282,18 @@ impl PackedSeq {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl AsRef<PackedSeq> for PackedSeq {
|
||||||
|
fn as_ref(&self) -> &PackedSeq { self }
|
||||||
|
}
|
||||||
|
|
||||||
// ── PackedSeqKmerIter ─────────────────────────────────────────────────────────
|
// ── PackedSeqKmerIter ─────────────────────────────────────────────────────────
|
||||||
|
|
||||||
/// Sliding-window kmer iterator over a [`PackedSeq`]. Zero allocation.
|
/// Sliding-window kmer iterator over a [`PackedSeq`]. Zero allocation.
|
||||||
pub struct PackedSeqKmerIter<'a> {
|
///
|
||||||
seq: &'a PackedSeq,
|
/// `S` is the storage: `&PackedSeq` (borrowing) or `PackedSeq` (owning).
|
||||||
|
/// Both implement `AsRef<PackedSeq>` via std blanket impls.
|
||||||
|
pub struct PackedSeqKmerIter<S> {
|
||||||
|
seq: S,
|
||||||
mask: u64,
|
mask: u64,
|
||||||
lshift: usize,
|
lshift: usize,
|
||||||
current: u64,
|
current: u64,
|
||||||
@@ -294,61 +301,18 @@ pub struct PackedSeqKmerIter<'a> {
|
|||||||
max_pos: usize,
|
max_pos: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> PackedSeqKmerIter<'a> {
|
/// Owning alias: the iterator moves the [`PackedSeq`] into itself.
|
||||||
fn new(seq: &'a PackedSeq) -> Self {
|
pub type OwnedPackedSeqKmerIter = PackedSeqKmerIter<PackedSeq>;
|
||||||
let seql = seq.seql();
|
|
||||||
let klen = k();
|
|
||||||
let lshift = 64 - klen * 2;
|
|
||||||
let mask = ((!0u128) << (lshift + 2)) as u64;
|
|
||||||
Self {
|
|
||||||
seq,
|
|
||||||
mask,
|
|
||||||
lshift,
|
|
||||||
current: if seql >= klen { seq.extract::<KLen>(0).map(|km| km.raw()).unwrap_or(0) } else { 0 },
|
|
||||||
pos: klen,
|
|
||||||
max_pos: seql,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Iterator for PackedSeqKmerIter<'_> {
|
impl<S: AsRef<PackedSeq>> PackedSeqKmerIter<S> {
|
||||||
type Item = Kmer;
|
fn new(seq: S) -> Self {
|
||||||
|
let ps = seq.as_ref();
|
||||||
fn next(&mut self) -> Option<Kmer> {
|
let seql = ps.seql();
|
||||||
if self.pos > self.max_pos {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
let result = Kmer::from_raw(self.current);
|
|
||||||
if self.pos < self.max_pos {
|
|
||||||
let inner_shift = 6 - 2 * (self.pos & 3);
|
|
||||||
let nuc = ((self.seq.seq[self.pos / 4] >> inner_shift) & 3) as u64;
|
|
||||||
self.current = ((self.current << 2) & self.mask) | (nuc << self.lshift);
|
|
||||||
}
|
|
||||||
self.pos += 1;
|
|
||||||
Some(result)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ── OwnedPackedSeqKmerIter ────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
/// Sliding-window kmer iterator that owns its [`PackedSeq`]. Zero allocation.
|
|
||||||
pub struct OwnedPackedSeqKmerIter {
|
|
||||||
seq: PackedSeq,
|
|
||||||
mask: u64,
|
|
||||||
lshift: usize,
|
|
||||||
current: u64,
|
|
||||||
pos: usize,
|
|
||||||
max_pos: usize,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl OwnedPackedSeqKmerIter {
|
|
||||||
fn new(seq: PackedSeq) -> Self {
|
|
||||||
let seql = seq.seql();
|
|
||||||
let klen = k();
|
let klen = k();
|
||||||
let lshift = 64 - klen * 2;
|
let lshift = 64 - klen * 2;
|
||||||
let mask = ((!0u128) << (lshift + 2)) as u64;
|
let mask = ((!0u128) << (lshift + 2)) as u64;
|
||||||
let current = if seql >= klen {
|
let current = if seql >= klen {
|
||||||
seq.extract::<KLen>(0).map(|km| km.raw()).unwrap_or(0)
|
ps.extract::<KLen>(0).map(|km| km.raw()).unwrap_or(0)
|
||||||
} else {
|
} else {
|
||||||
0
|
0
|
||||||
};
|
};
|
||||||
@@ -356,7 +320,7 @@ impl OwnedPackedSeqKmerIter {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Iterator for OwnedPackedSeqKmerIter {
|
impl<S: AsRef<PackedSeq>> Iterator for PackedSeqKmerIter<S> {
|
||||||
type Item = Kmer;
|
type Item = Kmer;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Kmer> {
|
fn next(&mut self) -> Option<Kmer> {
|
||||||
@@ -366,7 +330,7 @@ impl Iterator for OwnedPackedSeqKmerIter {
|
|||||||
let result = Kmer::from_raw(self.current);
|
let result = Kmer::from_raw(self.current);
|
||||||
if self.pos < self.max_pos {
|
if self.pos < self.max_pos {
|
||||||
let inner_shift = 6 - 2 * (self.pos & 3);
|
let inner_shift = 6 - 2 * (self.pos & 3);
|
||||||
let nuc = ((self.seq.seq[self.pos / 4] >> inner_shift) & 3) as u64;
|
let nuc = ((self.seq.as_ref().seq[self.pos / 4] >> inner_shift) & 3) as u64;
|
||||||
self.current = ((self.current << 2) & self.mask) | (nuc << self.lshift);
|
self.current = ((self.current << 2) & self.mask) | (nuc << self.lshift);
|
||||||
}
|
}
|
||||||
self.pos += 1;
|
self.pos += 1;
|
||||||
|
|||||||
Reference in New Issue
Block a user