add PhantomData import for generic type safety

- Added `use std::marker::PhantomData;` to prepare for generic scheduler implementations
- Ensures type safety and avoids unused lifetime/type parameters warnings
This commit is contained in:
Eric Coissac
2026-04-27 23:27:42 +02:00
parent ebbfe35cbc
commit 4c19882f03
10 changed files with 328 additions and 271 deletions
+174 -2
View File
@@ -333,8 +333,6 @@ impl SuperKmer {
/// Extract the canonical kmer of length k starting at nucleotide position i (0-based).
///
/// Equivalent to `self.kmer(i, k)?.canonical(k)` but avoids the redundant `revcomp` call
/// when the super-kmer is already in canonical form (which is the normal case).
/// Returns an error if k is invalid (0 or > 32) or if position i + k exceeds the sequence length.
pub fn canonical_kmer(&self, i: usize, k: usize) -> Result<Kmer, KmerError> {
Ok(self.kmer(i, k)?.canonical(k))
@@ -367,6 +365,16 @@ impl SuperKmer {
true
}
/// Iterate over all kmers of length `k` in order, yielding each as a left-aligned [`Kmer`].
pub fn iter_kmers(&self, k: usize) -> impl Iterator<Item = Kmer> + '_ {
SKKmerIter::new(self, k)
}
/// Iterate over all canonical kmers of length `k` in order.
pub fn iter_canonical_kmers(&self, k: usize) -> impl Iterator<Item = Kmer> + '_ {
self.iter_kmers(k).map(move |km| km.canonical(k))
}
/// Returns the XXH3 hash of the super-kmer sequence.
pub fn hash(&self) -> u64 {
if self.is_canonical() {
@@ -379,6 +387,53 @@ impl SuperKmer {
}
}
struct SKKmerIter<'a> {
skmer: &'a SuperKmer,
mask: u64,
lshift: usize,
current: u64,
pos: usize,
max_pos: usize,
}
impl<'a> SKKmerIter<'a> {
fn new(skmer: &'a SuperKmer, k: usize) -> Self {
let seql = skmer.seql();
let lshift = 64 - k * 2;
let mask = ((!0u128) << (lshift + 2)) as u64;
Self {
skmer,
mask,
lshift,
current: if seql >= k { skmer.kmer(0, k).unwrap().raw() } else { 0 },
pos: k,
max_pos: seql,
}
}
}
impl<'a> Iterator for SKKmerIter<'a> {
type Item = Kmer;
fn next(&mut self) -> Option<Self::Item> {
if self.pos > self.max_pos {
return None;
}
// Emit current kmer first, then slide the window forward.
let result = Kmer::from_raw(self.current);
if self.pos < self.max_pos {
let byte_pos = self.pos / 4;
// Nucleotide at position r within its byte occupies bits 7-2r (MSB) and 6-2r (LSB).
// Extract right-aligned, then place at lshift.
let inner_shift = 6 - 2 * (self.pos & 3);
let nuc = (((self.skmer.seq[byte_pos] >> inner_shift) & 3) as u64) << self.lshift;
self.current = ((self.current << 2) & self.mask) | nuc;
}
self.pos += 1;
Some(result)
}
}
// ── helpers ───────────────────────────────────────────────────────────────────
fn complement(base: u8) -> u8 {
@@ -755,4 +810,121 @@ mod tests {
sk.set_minimizer_pos(3);
assert_eq!(sk.to_ascii(), ascii);
}
// ── iter_kmers ────────────────────────────────────────────────────────────
#[test]
fn iter_kmers_count() {
let ascii = b"ACGTACGTACGT";
let sk = SuperKmer::from_ascii(ascii);
for k in [1usize, 3, 4, 5, 8, 12] {
let n = sk.iter_kmers(k).count();
assert_eq!(n, ascii.len() - k + 1, "count mismatch for k={k}");
}
}
#[test]
fn iter_kmers_first_is_kmer_0() {
let ascii = b"ACGTACGT";
let sk = SuperKmer::from_ascii(ascii);
for k in 1..=ascii.len() {
let first = sk.iter_kmers(k).next().unwrap();
assert_eq!(first, sk.kmer(0, k).unwrap(), "k={k}");
}
}
#[test]
fn iter_kmers_matches_kmer_at_each_position() {
let ascii = b"ACGTACGTACGT";
let sk = SuperKmer::from_ascii(ascii);
let k = 4;
let kmers: Vec<Kmer> = sk.iter_kmers(k).collect();
assert_eq!(kmers.len(), ascii.len() - k + 1);
for (i, &km) in kmers.iter().enumerate() {
assert_eq!(km, sk.kmer(i, k).unwrap(), "mismatch at pos {i}");
}
}
#[test]
fn iter_kmers_single_when_seql_eq_k() {
let ascii = b"ACGTACGT";
let sk = SuperKmer::from_ascii(ascii);
let k = ascii.len();
let kmers: Vec<Kmer> = sk.iter_kmers(k).collect();
assert_eq!(kmers.len(), 1);
assert_eq!(kmers[0], sk.kmer(0, k).unwrap());
}
#[test]
fn iter_kmers_two_when_seql_eq_k_plus_one() {
let ascii = b"ACGTACGT";
let sk = SuperKmer::from_ascii(ascii);
let k = ascii.len() - 1;
let kmers: Vec<Kmer> = sk.iter_kmers(k).collect();
assert_eq!(kmers.len(), 2);
assert_eq!(kmers[0], sk.kmer(0, k).unwrap());
assert_eq!(kmers[1], sk.kmer(1, k).unwrap());
}
#[test]
fn iter_kmers_all_k_values() {
// For every valid k, each yielded kmer must match kmer(i, k).
let ascii = b"ACGTACGTACGT";
let sk = SuperKmer::from_ascii(ascii);
let seql = ascii.len();
for k in 1..=seql {
let kmers: Vec<Kmer> = sk.iter_kmers(k).collect();
assert_eq!(kmers.len(), seql - k + 1, "k={k}");
for (i, &km) in kmers.iter().enumerate() {
assert_eq!(km, sk.kmer(i, k).unwrap(), "k={k}, pos={i}");
}
}
}
#[test]
fn iter_kmers_crosses_byte_boundary() {
// Positions 3→4 and 7→8 cross a 4-nucleotide byte boundary.
let ascii = b"ACGTACGTACGT";
let sk = SuperKmer::from_ascii(ascii);
let k = 3;
let kmers: Vec<Kmer> = sk.iter_kmers(k).collect();
for boundary in [3usize, 4, 7, 8] {
if boundary + 1 < kmers.len() {
assert_eq!(
kmers[boundary],
sk.kmer(boundary, k).unwrap(),
"pos={boundary}"
);
assert_eq!(
kmers[boundary + 1],
sk.kmer(boundary + 1, k).unwrap(),
"pos={}",
boundary + 1
);
}
}
}
#[test]
fn iter_kmers_k1_yields_all_nucleotides() {
let ascii = b"ACGT";
let sk = SuperKmer::from_ascii(ascii);
let kmers: Vec<Kmer> = sk.iter_kmers(1).collect();
assert_eq!(kmers.len(), 4);
for (i, &km) in kmers.iter().enumerate() {
assert_eq!(km, sk.kmer(i, 1).unwrap(), "pos={i}");
}
}
#[test]
fn iter_kmers_long_sequence() {
let ascii = make_seq(20);
let sk = SuperKmer::from_ascii(&ascii);
let k = 7;
let kmers: Vec<Kmer> = sk.iter_kmers(k).collect();
assert_eq!(kmers.len(), ascii.len() - k + 1);
for (i, &km) in kmers.iter().enumerate() {
assert_eq!(km, sk.kmer(i, k).unwrap(), "pos={i}");
}
}
}