feat: enforce canonical k-mer representation throughout the codebase
Refactor core types to consistently use `CanonicalKMer` (lexicographically minimal of k-mer and its reverse complement) as the canonical representation, ensuring deterministic behavior in graph traversal (unitig decomposition), neighbor resolution (`unique_neighbor` with `[CanonicalKmer; 4]` input) and scatter output generation. Introduce `RoutableSuperKmer`, add `.seq_hash()` support, fix type syntax errors in unitig extraction methods and deduplication tests. Update all k-mer construction to use canonical-aware APIs, including unsafe unchecked constructors for performance-critical paths.
This commit is contained in:
@@ -16,6 +16,7 @@
|
||||
//! | super-kmer length = 256| k |
|
||||
|
||||
use obikrope::{ForwardCursor, Rope, RopeCursor};
|
||||
use obikseq::kmer::CanonicalKmer;
|
||||
use obikseq::RoutableSuperKmer;
|
||||
|
||||
use crate::rolling_stat::RollingStat;
|
||||
@@ -29,7 +30,7 @@ pub struct SuperKmerIter<'a> {
|
||||
theta: f64,
|
||||
scratch: SuperKmerScratch,
|
||||
stat: RollingStat,
|
||||
prev_min: Option<u64>,
|
||||
prev_min: Option<CanonicalKmer>,
|
||||
prev_min_pos: usize,
|
||||
}
|
||||
|
||||
@@ -107,7 +108,7 @@ impl Iterator for SuperKmerIter<'_> {
|
||||
continue;
|
||||
}
|
||||
|
||||
let min = self.stat.canonical_minimizer_raw().unwrap_or(0);
|
||||
let min = self.stat.canonical_minimizer().unwrap(); // always Some after ready()
|
||||
let min_pos = self.stat.minimizer_position().unwrap_or(0);
|
||||
|
||||
// ── 2. Minimizer change check ─────────────────────────────────────
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use obikseq::kmer::Kmer;
|
||||
use obikseq::kmer::{CanonicalKmer, Kmer};
|
||||
|
||||
use crate::encoding::encode_nuc;
|
||||
use crate::entropy_table::{WS_MAX, emax, entropy_norm_kmer, ln_class_size, log_nwords, n_log_n};
|
||||
@@ -283,6 +283,12 @@ impl RollingStat {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn canonical_minimizer(&self) -> Option<CanonicalKmer> {
|
||||
self.canonical_minimizer_raw().map(|raw| {
|
||||
CanonicalKmer::from_raw_unchecked(Kmer::from_raw_right(raw, self.m).raw())
|
||||
})
|
||||
}
|
||||
|
||||
pub fn entropy(&self, order: usize) -> Option<f64> {
|
||||
if !self.ready() {
|
||||
return None;
|
||||
|
||||
Reference in New Issue
Block a user