refactor(packed_seq): unify kmer iterators with generic storage

Merge PackedSeqKmerIter and OwnedPackedSeqKmerIter into a single generic PackedSeqKmerIter<S> parameterized over the storage type. Add an AsRef<PackedSeq> implementation to PackedSeq to enable this abstraction, allowing the zero-allocation sliding-window kmer iterator to seamlessly accept both borrowed and owned sequences without code duplication.
This commit is contained in:
Eric Coissac
2026-05-11 11:13:30 +08:00
parent 6687911d60
commit 7bc9aa9af5
+19 -55
View File
@@ -222,7 +222,7 @@ impl PackedSeq {
/// Iterate over all kmers of length `params::k()` in order. Zero allocation.
#[inline]
pub fn iter_kmers(&self) -> PackedSeqKmerIter<'_> {
pub fn iter_kmers(&self) -> PackedSeqKmerIter<&PackedSeq> {
PackedSeqKmerIter::new(self)
}
@@ -282,11 +282,18 @@ impl PackedSeq {
}
}
impl AsRef<PackedSeq> for PackedSeq {
fn as_ref(&self) -> &PackedSeq { self }
}
// ── PackedSeqKmerIter ─────────────────────────────────────────────────────────
/// Sliding-window kmer iterator over a [`PackedSeq`]. Zero allocation.
pub struct PackedSeqKmerIter<'a> {
seq: &'a PackedSeq,
///
/// `S` is the storage: `&PackedSeq` (borrowing) or `PackedSeq` (owning).
/// Both implement `AsRef<PackedSeq>` via std blanket impls.
pub struct PackedSeqKmerIter<S> {
seq: S,
mask: u64,
lshift: usize,
current: u64,
@@ -294,61 +301,18 @@ pub struct PackedSeqKmerIter<'a> {
max_pos: usize,
}
impl<'a> PackedSeqKmerIter<'a> {
fn new(seq: &'a PackedSeq) -> Self {
let seql = seq.seql();
let klen = k();
let lshift = 64 - klen * 2;
let mask = ((!0u128) << (lshift + 2)) as u64;
Self {
seq,
mask,
lshift,
current: if seql >= klen { seq.extract::<KLen>(0).map(|km| km.raw()).unwrap_or(0) } else { 0 },
pos: klen,
max_pos: seql,
}
}
}
/// Owning alias: the iterator moves the [`PackedSeq`] into itself.
pub type OwnedPackedSeqKmerIter = PackedSeqKmerIter<PackedSeq>;
impl Iterator for PackedSeqKmerIter<'_> {
type Item = Kmer;
fn next(&mut self) -> Option<Kmer> {
if self.pos > self.max_pos {
return None;
}
let result = Kmer::from_raw(self.current);
if self.pos < self.max_pos {
let inner_shift = 6 - 2 * (self.pos & 3);
let nuc = ((self.seq.seq[self.pos / 4] >> inner_shift) & 3) as u64;
self.current = ((self.current << 2) & self.mask) | (nuc << self.lshift);
}
self.pos += 1;
Some(result)
}
}
// ── OwnedPackedSeqKmerIter ────────────────────────────────────────────────────
/// Sliding-window kmer iterator that owns its [`PackedSeq`]. Zero allocation.
pub struct OwnedPackedSeqKmerIter {
seq: PackedSeq,
mask: u64,
lshift: usize,
current: u64,
pos: usize,
max_pos: usize,
}
impl OwnedPackedSeqKmerIter {
fn new(seq: PackedSeq) -> Self {
let seql = seq.seql();
impl<S: AsRef<PackedSeq>> PackedSeqKmerIter<S> {
fn new(seq: S) -> Self {
let ps = seq.as_ref();
let seql = ps.seql();
let klen = k();
let lshift = 64 - klen * 2;
let mask = ((!0u128) << (lshift + 2)) as u64;
let current = if seql >= klen {
seq.extract::<KLen>(0).map(|km| km.raw()).unwrap_or(0)
ps.extract::<KLen>(0).map(|km| km.raw()).unwrap_or(0)
} else {
0
};
@@ -356,7 +320,7 @@ impl OwnedPackedSeqKmerIter {
}
}
impl Iterator for OwnedPackedSeqKmerIter {
impl<S: AsRef<PackedSeq>> Iterator for PackedSeqKmerIter<S> {
type Item = Kmer;
fn next(&mut self) -> Option<Kmer> {
@@ -366,7 +330,7 @@ impl Iterator for OwnedPackedSeqKmerIter {
let result = Kmer::from_raw(self.current);
if self.pos < self.max_pos {
let inner_shift = 6 - 2 * (self.pos & 3);
let nuc = ((self.seq.seq[self.pos / 4] >> inner_shift) & 3) as u64;
let nuc = ((self.seq.as_ref().seq[self.pos / 4] >> inner_shift) & 3) as u64;
self.current = ((self.current << 2) & self.mask) | (nuc << self.lshift);
}
self.pos += 1;