refactor(packed_seq): unify kmer iterators with generic storage
Merge PackedSeqKmerIter and OwnedPackedSeqKmerIter into a single generic PackedSeqKmerIter<S> parameterized over the storage type. Add an AsRef<PackedSeq> implementation to PackedSeq to enable this abstraction, allowing the zero-allocation sliding-window kmer iterator to seamlessly accept both borrowed and owned sequences without code duplication.
This commit is contained in:
@@ -222,7 +222,7 @@ impl PackedSeq {
|
||||
|
||||
/// Iterate over all kmers of length `params::k()` in order. Zero allocation.
|
||||
#[inline]
|
||||
pub fn iter_kmers(&self) -> PackedSeqKmerIter<'_> {
|
||||
pub fn iter_kmers(&self) -> PackedSeqKmerIter<&PackedSeq> {
|
||||
PackedSeqKmerIter::new(self)
|
||||
}
|
||||
|
||||
@@ -282,11 +282,18 @@ impl PackedSeq {
|
||||
}
|
||||
}
|
||||
|
||||
impl AsRef<PackedSeq> for PackedSeq {
|
||||
fn as_ref(&self) -> &PackedSeq { self }
|
||||
}
|
||||
|
||||
// ── PackedSeqKmerIter ─────────────────────────────────────────────────────────
|
||||
|
||||
/// Sliding-window kmer iterator over a [`PackedSeq`]. Zero allocation.
|
||||
pub struct PackedSeqKmerIter<'a> {
|
||||
seq: &'a PackedSeq,
|
||||
///
|
||||
/// `S` is the storage: `&PackedSeq` (borrowing) or `PackedSeq` (owning).
|
||||
/// Both implement `AsRef<PackedSeq>` via std blanket impls.
|
||||
pub struct PackedSeqKmerIter<S> {
|
||||
seq: S,
|
||||
mask: u64,
|
||||
lshift: usize,
|
||||
current: u64,
|
||||
@@ -294,61 +301,18 @@ pub struct PackedSeqKmerIter<'a> {
|
||||
max_pos: usize,
|
||||
}
|
||||
|
||||
impl<'a> PackedSeqKmerIter<'a> {
|
||||
fn new(seq: &'a PackedSeq) -> Self {
|
||||
let seql = seq.seql();
|
||||
let klen = k();
|
||||
let lshift = 64 - klen * 2;
|
||||
let mask = ((!0u128) << (lshift + 2)) as u64;
|
||||
Self {
|
||||
seq,
|
||||
mask,
|
||||
lshift,
|
||||
current: if seql >= klen { seq.extract::<KLen>(0).map(|km| km.raw()).unwrap_or(0) } else { 0 },
|
||||
pos: klen,
|
||||
max_pos: seql,
|
||||
}
|
||||
}
|
||||
}
|
||||
/// Owning alias: the iterator moves the [`PackedSeq`] into itself.
|
||||
pub type OwnedPackedSeqKmerIter = PackedSeqKmerIter<PackedSeq>;
|
||||
|
||||
impl Iterator for PackedSeqKmerIter<'_> {
|
||||
type Item = Kmer;
|
||||
|
||||
fn next(&mut self) -> Option<Kmer> {
|
||||
if self.pos > self.max_pos {
|
||||
return None;
|
||||
}
|
||||
let result = Kmer::from_raw(self.current);
|
||||
if self.pos < self.max_pos {
|
||||
let inner_shift = 6 - 2 * (self.pos & 3);
|
||||
let nuc = ((self.seq.seq[self.pos / 4] >> inner_shift) & 3) as u64;
|
||||
self.current = ((self.current << 2) & self.mask) | (nuc << self.lshift);
|
||||
}
|
||||
self.pos += 1;
|
||||
Some(result)
|
||||
}
|
||||
}
|
||||
|
||||
// ── OwnedPackedSeqKmerIter ────────────────────────────────────────────────────
|
||||
|
||||
/// Sliding-window kmer iterator that owns its [`PackedSeq`]. Zero allocation.
|
||||
pub struct OwnedPackedSeqKmerIter {
|
||||
seq: PackedSeq,
|
||||
mask: u64,
|
||||
lshift: usize,
|
||||
current: u64,
|
||||
pos: usize,
|
||||
max_pos: usize,
|
||||
}
|
||||
|
||||
impl OwnedPackedSeqKmerIter {
|
||||
fn new(seq: PackedSeq) -> Self {
|
||||
let seql = seq.seql();
|
||||
impl<S: AsRef<PackedSeq>> PackedSeqKmerIter<S> {
|
||||
fn new(seq: S) -> Self {
|
||||
let ps = seq.as_ref();
|
||||
let seql = ps.seql();
|
||||
let klen = k();
|
||||
let lshift = 64 - klen * 2;
|
||||
let mask = ((!0u128) << (lshift + 2)) as u64;
|
||||
let current = if seql >= klen {
|
||||
seq.extract::<KLen>(0).map(|km| km.raw()).unwrap_or(0)
|
||||
ps.extract::<KLen>(0).map(|km| km.raw()).unwrap_or(0)
|
||||
} else {
|
||||
0
|
||||
};
|
||||
@@ -356,7 +320,7 @@ impl OwnedPackedSeqKmerIter {
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for OwnedPackedSeqKmerIter {
|
||||
impl<S: AsRef<PackedSeq>> Iterator for PackedSeqKmerIter<S> {
|
||||
type Item = Kmer;
|
||||
|
||||
fn next(&mut self) -> Option<Kmer> {
|
||||
@@ -366,7 +330,7 @@ impl Iterator for OwnedPackedSeqKmerIter {
|
||||
let result = Kmer::from_raw(self.current);
|
||||
if self.pos < self.max_pos {
|
||||
let inner_shift = 6 - 2 * (self.pos & 3);
|
||||
let nuc = ((self.seq.seq[self.pos / 4] >> inner_shift) & 3) as u64;
|
||||
let nuc = ((self.seq.as_ref().seq[self.pos / 4] >> inner_shift) & 3) as u64;
|
||||
self.current = ((self.current << 2) & self.mask) | (nuc << self.lshift);
|
||||
}
|
||||
self.pos += 1;
|
||||
|
||||
Reference in New Issue
Block a user