feat: enforce canonical k-mer representation throughout the codebase

Refactor core types to consistently use `CanonicalKMer` (lexicographically minimal of k-mer and its reverse complement) as the canonical representation, ensuring deterministic behavior in graph traversal (unitig decomposition), neighbor resolution (`unique_neighbor` with `[CanonicalKmer; 4]` input) and scatter output generation. Introduce `RoutableSuperKmer`, add `.seq_hash()` support, fix type syntax errors in unitig extraction methods and deduplication tests. Update all k-mer construction to use canonical-aware APIs, including unsafe unchecked constructors for performance-critical paths.
This commit is contained in:
Eric Coissac
2026-05-01 13:34:55 +02:00
parent 21ddbf1674
commit defeeb9460
12 changed files with 235 additions and 113 deletions
+3 -2
View File
@@ -16,6 +16,7 @@
//! | super-kmer length = 256| k |
use obikrope::{ForwardCursor, Rope, RopeCursor};
use obikseq::kmer::CanonicalKmer;
use obikseq::RoutableSuperKmer;
use crate::rolling_stat::RollingStat;
@@ -29,7 +30,7 @@ pub struct SuperKmerIter<'a> {
theta: f64,
scratch: SuperKmerScratch,
stat: RollingStat,
prev_min: Option<u64>,
prev_min: Option<CanonicalKmer>,
prev_min_pos: usize,
}
@@ -107,7 +108,7 @@ impl Iterator for SuperKmerIter<'_> {
continue;
}
let min = self.stat.canonical_minimizer_raw().unwrap_or(0);
let min = self.stat.canonical_minimizer().unwrap(); // always Some after ready()
let min_pos = self.stat.minimizer_position().unwrap_or(0);
// ── 2. Minimizer change check ─────────────────────────────────────
+7 -1
View File
@@ -1,4 +1,4 @@
use obikseq::kmer::Kmer;
use obikseq::kmer::{CanonicalKmer, Kmer};
use crate::encoding::encode_nuc;
use crate::entropy_table::{WS_MAX, emax, entropy_norm_kmer, ln_class_size, log_nwords, n_log_n};
@@ -283,6 +283,12 @@ impl RollingStat {
}
}
pub fn canonical_minimizer(&self) -> Option<CanonicalKmer> {
self.canonical_minimizer_raw().map(|raw| {
CanonicalKmer::from_raw_unchecked(Kmer::from_raw_right(raw, self.m).raw())
})
}
pub fn entropy(&self, order: usize) -> Option<f64> {
if !self.ready() {
return None;