refactor: centralize k-mer config and introduce packed sequences
Centralize k-mer and minimizer configuration using a thread-safe global module, and replace manual bit-packing with a memory-efficient `PackedSeq` type. Refactor core sequence and k-mer types to use compile-time length enforcement and centralized hashing. Introduce a new De Bruijn graph implementation with compact node encoding and traversal iterators. Update I/O, partitioning, and builder modules to align with the new architecture, and add the `xxhash-rust` dependency.
This commit is contained in:
@@ -7,3 +7,6 @@ edition = "2024"
|
||||
obikseq = { path = "../obikseq" }
|
||||
obikrope = { path = "../obikrope" }
|
||||
lazy_static = "1.5.0"
|
||||
|
||||
[dev-dependencies]
|
||||
obikseq = { path = "../obikseq", features = ["test-utils"] }
|
||||
|
||||
@@ -21,7 +21,6 @@ pub(crate) static LN_CARD_ROT5: LazyLock<[f64; 1024]> =
|
||||
pub(crate) static LN_CARD_ROT6: LazyLock<[f64; 4096]> =
|
||||
LazyLock::new(|| build_log_class_size::<4096>(&NORMK6));
|
||||
|
||||
|
||||
fn ln0(x: f64) -> f64 {
|
||||
if x == 0.0 { 0.0 } else { x.ln() }
|
||||
}
|
||||
@@ -47,7 +46,7 @@ fn build_normalized_kmer<const N: usize>() -> [u64; N] {
|
||||
for i in 0..N {
|
||||
let la = (i as u64) << shift;
|
||||
let ra = i as u64;
|
||||
let rc_ra = Kmer::from_raw(la).revcomp(k).raw() >> shift;
|
||||
let rc_ra = Kmer::from_raw(la).revcomp().raw() >> shift;
|
||||
let circ = normalize_circular(ra, k);
|
||||
let circ_rc = normalize_circular(rc_ra, k);
|
||||
result[i] = circ.min(circ_rc);
|
||||
@@ -107,12 +106,10 @@ pub(crate) const K_MAX: usize = 32;
|
||||
pub(crate) const WS_MAX: usize = 6;
|
||||
|
||||
/// n·ln(n), with n_log_n[0] = 0. Indexed by n = 0..=K_MAX.
|
||||
pub(crate) static N_LOG_N: LazyLock<[f64; K_MAX + 1]> =
|
||||
LazyLock::new(|| build_n_log_n());
|
||||
pub(crate) static N_LOG_N: LazyLock<[f64; K_MAX + 1]> = LazyLock::new(|| build_n_log_n());
|
||||
|
||||
/// H_max[k][ws]: maximum entropy for kmer length k and word size ws.
|
||||
pub(crate) static EMAX: LazyLock<[[f64; WS_MAX + 1]; K_MAX + 1]> =
|
||||
LazyLock::new(|| build_emax());
|
||||
pub(crate) static EMAX: LazyLock<[[f64; WS_MAX + 1]; K_MAX + 1]> = LazyLock::new(|| build_emax());
|
||||
|
||||
/// ln(k − ws + 1): log of the number of ws-words in a kmer of length k.
|
||||
pub(crate) static LOG_NWORDS: LazyLock<[[f64; WS_MAX + 1]; K_MAX + 1]> =
|
||||
|
||||
@@ -16,8 +16,8 @@
|
||||
//! | super-kmer length = 256| k |
|
||||
|
||||
use obikrope::{ForwardCursor, Rope, RopeCursor};
|
||||
use obikseq::kmer::CanonicalKmer;
|
||||
use obikseq::RoutableSuperKmer;
|
||||
use obikseq::kmer::Minimizer;
|
||||
|
||||
use crate::rolling_stat::RollingStat;
|
||||
use crate::scratch::SuperKmerScratch;
|
||||
@@ -26,11 +26,10 @@ use crate::scratch::SuperKmerScratch;
|
||||
pub struct SuperKmerIter<'a> {
|
||||
cursor: ForwardCursor<'a>,
|
||||
k: usize,
|
||||
m: usize,
|
||||
theta: f64,
|
||||
scratch: SuperKmerScratch,
|
||||
stat: RollingStat,
|
||||
prev_min: Option<CanonicalKmer>,
|
||||
prev_min: Option<Minimizer>,
|
||||
prev_min_pos: usize,
|
||||
}
|
||||
|
||||
@@ -41,14 +40,13 @@ impl<'a> SuperKmerIter<'a> {
|
||||
/// - `m`: minimizer size (1 < m < k)
|
||||
/// - `level_max`: maximum sub-word size for entropy (1–6)
|
||||
/// - `theta`: entropy threshold; k-mers with score ≤ theta are rejected
|
||||
pub fn new(rope: &'a Rope, k: usize, m: usize, level_max: usize, theta: f64) -> Self {
|
||||
pub fn new(rope: &'a Rope, k: usize, level_max: usize, theta: f64) -> Self {
|
||||
Self {
|
||||
cursor: rope.fw_cursor(),
|
||||
k,
|
||||
m,
|
||||
theta,
|
||||
scratch: SuperKmerScratch::new(),
|
||||
stat: RollingStat::new(k, m, level_max),
|
||||
stat: RollingStat::new(level_max),
|
||||
prev_min: None,
|
||||
prev_min_pos: 0,
|
||||
}
|
||||
@@ -66,7 +64,7 @@ impl<'a> SuperKmerIter<'a> {
|
||||
return None;
|
||||
}
|
||||
self.prev_min?;
|
||||
Some(self.scratch.emit(self.prev_min_pos, self.m))
|
||||
Some(self.scratch.emit(self.prev_min_pos))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -149,26 +147,31 @@ mod tests {
|
||||
use super::*;
|
||||
use obikrope::Rope;
|
||||
|
||||
fn setup() {
|
||||
obikseq::params::set_k(K);
|
||||
obikseq::params::set_m(5);
|
||||
}
|
||||
|
||||
fn make_rope(data: &[u8]) -> Rope {
|
||||
let mut r = Rope::new(None);
|
||||
r.push(data.to_vec());
|
||||
r
|
||||
}
|
||||
|
||||
fn run_nofilter(data: &[u8], k: usize, m: usize) -> Vec<Vec<u8>> {
|
||||
fn run_nofilter(data: &[u8], k: usize) -> Vec<Vec<u8>> {
|
||||
let rope = make_rope(data);
|
||||
SuperKmerIter::new(&rope, k, m, 1, 0.0)
|
||||
SuperKmerIter::new(&rope, k, 1, 0.0)
|
||||
.map(|rsk| rsk.superkmer().to_ascii())
|
||||
.collect()
|
||||
}
|
||||
|
||||
// k=11, m=5 — valeurs minimales du projet (k ∈ [11,31])
|
||||
const K: usize = 11;
|
||||
const M: usize = 5;
|
||||
|
||||
#[test]
|
||||
fn single_segment_one_superkmer() {
|
||||
let out = run_nofilter(b"ACGTACGTACGTACGTACGT\x00", K, M);
|
||||
setup();
|
||||
let out = run_nofilter(b"ACGTACGTACGTACGTACGT\x00", K);
|
||||
assert!(!out.is_empty());
|
||||
let total: Vec<u8> = out.into_iter().flatten().collect();
|
||||
assert!(total.len() >= K);
|
||||
@@ -176,29 +179,33 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn segment_shorter_than_k_emits_nothing() {
|
||||
let out = run_nofilter(b"ACGTACGT\x00", K, M);
|
||||
setup();
|
||||
let out = run_nofilter(b"ACGTACGT\x00", K);
|
||||
assert_eq!(out, Vec::<Vec<u8>>::new());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_input_emits_nothing() {
|
||||
let out = run_nofilter(b"", K, M);
|
||||
setup();
|
||||
let out = run_nofilter(b"", K);
|
||||
assert_eq!(out, Vec::<Vec<u8>>::new());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn two_segments_both_emitted() {
|
||||
let out = run_nofilter(b"ACGTACGTACGTACGT\x00TGCATGCATGCATGCA\x00", K, M);
|
||||
setup();
|
||||
let out = run_nofilter(b"ACGTACGTACGTACGT\x00TGCATGCATGCATGCA\x00", K);
|
||||
assert!(!out.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn low_complexity_kmer_is_rejected() {
|
||||
let out_pass = run_nofilter(b"AAAAAAAAAAAACGTACGTACGT\x00", K, M);
|
||||
setup();
|
||||
let out_pass = run_nofilter(b"AAAAAAAAAAAACGTACGTACGT\x00", K);
|
||||
assert!(!out_pass.is_empty());
|
||||
|
||||
let rope = make_rope(b"AAAAAAAAAAAAAAAAAAAA\x00");
|
||||
let out_reject: Vec<Vec<u8>> = SuperKmerIter::new(&rope, K, M, 6, 0.9)
|
||||
let out_reject: Vec<Vec<u8>> = SuperKmerIter::new(&rope, K, 6, 0.9)
|
||||
.map(|rsk| rsk.superkmer().to_ascii())
|
||||
.collect();
|
||||
assert!(out_reject.is_empty());
|
||||
@@ -206,12 +213,13 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn multi_slice_rope() {
|
||||
setup();
|
||||
let data = b"ACGTACGTACGTACGTACGT\x00";
|
||||
let mid = data.len() / 2;
|
||||
let mut rope = Rope::new(None);
|
||||
rope.push(data[..mid].to_vec());
|
||||
rope.push(data[mid..].to_vec());
|
||||
let out: Vec<Vec<u8>> = SuperKmerIter::new(&rope, K, M, 1, 0.0)
|
||||
let out: Vec<Vec<u8>> = SuperKmerIter::new(&rope, K, 1, 0.0)
|
||||
.map(|rsk| rsk.superkmer().to_ascii())
|
||||
.collect();
|
||||
assert!(!out.is_empty());
|
||||
@@ -219,8 +227,9 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn yields_minimizer_value() {
|
||||
setup();
|
||||
let rope = make_rope(b"ACGTACGTACGTACGTACGT\x00");
|
||||
let results: Vec<RoutableSuperKmer> = SuperKmerIter::new(&rope, K, M, 1, 0.0).collect();
|
||||
let results: Vec<RoutableSuperKmer> = SuperKmerIter::new(&rope, K, 1, 0.0).collect();
|
||||
assert!(!results.is_empty());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,6 +19,11 @@ use obikrope::Rope;
|
||||
use obikseq::RoutableSuperKmer;
|
||||
|
||||
/// Collect all super-kmers from a normalised rope chunk.
|
||||
pub fn build_superkmers(rope: Rope, k: usize, m: usize, level_max: usize, theta: f64) -> Vec<RoutableSuperKmer> {
|
||||
SuperKmerIter::new(&rope, k, m, level_max, theta).collect()
|
||||
pub fn build_superkmers(
|
||||
rope: Rope,
|
||||
k: usize,
|
||||
level_max: usize,
|
||||
theta: f64,
|
||||
) -> Vec<RoutableSuperKmer> {
|
||||
SuperKmerIter::new(&rope, k, level_max, theta).collect()
|
||||
}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
use obikseq::kmer::{CanonicalKmer, Kmer};
|
||||
use obikseq::kmer::{Minimizer, hash_kmer};
|
||||
use obikseq::params;
|
||||
|
||||
use crate::encoding::encode_nuc;
|
||||
use crate::entropy_table::{WS_MAX, emax, entropy_norm_kmer, ln_class_size, log_nwords, n_log_n};
|
||||
@@ -13,22 +14,7 @@ struct MmerItem {
|
||||
hash: u64,
|
||||
}
|
||||
|
||||
/// Bijective hash used to randomise the minimizer ordering.
|
||||
/// The XOR seed (2^64/φ) breaks the mix64 fixed point at 0,
|
||||
/// preventing poly-A/T kmers (canonical = 0) from always winning.
|
||||
#[inline(always)]
|
||||
fn hash_mmer(canonical: u64) -> u64 {
|
||||
let x = canonical ^ 0x9e3779b97f4a7c15;
|
||||
let x = x ^ (x >> 30);
|
||||
let x = x.wrapping_mul(0xbf58476d1ce4e5b9);
|
||||
let x = x ^ (x >> 27);
|
||||
let x = x.wrapping_mul(0x94d049bb133111eb);
|
||||
x ^ (x >> 31)
|
||||
}
|
||||
|
||||
pub struct RollingStat {
|
||||
k: usize,
|
||||
m: usize,
|
||||
entropy_max_k: usize,
|
||||
rolling_k: u64,
|
||||
rolling_rck: u64,
|
||||
@@ -53,15 +39,15 @@ pub struct RollingStat {
|
||||
}
|
||||
|
||||
impl RollingStat {
|
||||
pub fn new(k: usize, m: usize, entropy_max_k: usize) -> Self {
|
||||
pub fn new(entropy_max_k: usize) -> Self {
|
||||
let k = params::k();
|
||||
let m = params::m();
|
||||
Self {
|
||||
k,
|
||||
m,
|
||||
entropy_max_k,
|
||||
rolling_k: 0,
|
||||
rolling_rck: 0,
|
||||
k_mask: (!0) >> (64 - k * 2),
|
||||
m_mask: (!0) >> (64 - m * 2),
|
||||
k_mask: (!0u64) >> (64 - k * 2),
|
||||
m_mask: (!0u64) >> (64 - m * 2),
|
||||
received: 0,
|
||||
k1q: std::collections::VecDeque::with_capacity(k),
|
||||
k2q: std::collections::VecDeque::with_capacity(k - 1),
|
||||
@@ -85,12 +71,24 @@ impl RollingStat {
|
||||
self.rolling_k = 0;
|
||||
self.rolling_rck = 0;
|
||||
self.received = 0;
|
||||
for &i in &self.k1q { self.k1c[i as usize] = 0; }
|
||||
for &i in &self.k2q { self.k2c[i as usize] = 0; }
|
||||
for &i in &self.k3q { self.k3c[i as usize] = 0; }
|
||||
for &i in &self.k4q { self.k4c[i as usize] = 0; }
|
||||
for &i in &self.k5q { self.k5c[i as usize] = 0; }
|
||||
for &i in &self.k6q { self.k6c[i as usize] = 0; }
|
||||
for &i in &self.k1q {
|
||||
self.k1c[i as usize] = 0;
|
||||
}
|
||||
for &i in &self.k2q {
|
||||
self.k2c[i as usize] = 0;
|
||||
}
|
||||
for &i in &self.k3q {
|
||||
self.k3c[i as usize] = 0;
|
||||
}
|
||||
for &i in &self.k4q {
|
||||
self.k4c[i as usize] = 0;
|
||||
}
|
||||
for &i in &self.k5q {
|
||||
self.k5c[i as usize] = 0;
|
||||
}
|
||||
for &i in &self.k6q {
|
||||
self.k6c[i as usize] = 0;
|
||||
}
|
||||
self.k1q.clear();
|
||||
self.k2q.clear();
|
||||
self.k3q.clear();
|
||||
@@ -127,12 +125,15 @@ impl RollingStat {
|
||||
}
|
||||
|
||||
pub fn push(&mut self, nuc: u8) {
|
||||
let k = params::k();
|
||||
let m = params::m();
|
||||
|
||||
let bnuc = encode_nuc(nuc);
|
||||
let cnuc = bnuc ^ 3;
|
||||
|
||||
self.rolling_k = ((self.rolling_k << 2) | (bnuc as u64)) & self.k_mask;
|
||||
self.rolling_rck =
|
||||
((self.rolling_rck >> 2) | ((cnuc as u64) << ((self.k - 1) * 2))) & self.k_mask;
|
||||
((self.rolling_rck >> 2) | ((cnuc as u64) << ((k - 1) * 2))) & self.k_mask;
|
||||
|
||||
let canonical_k1 = entropy_norm_kmer(self.rolling_k & 3, 1, false);
|
||||
let canonical_k2 = entropy_norm_kmer(self.rolling_k & 15, 2, false);
|
||||
@@ -143,30 +144,37 @@ impl RollingStat {
|
||||
|
||||
self.received += 1;
|
||||
|
||||
if self.received >= self.m {
|
||||
if self.received >= m {
|
||||
let possible_canonical_m =
|
||||
(self.rolling_k & self.m_mask).min(self.rolling_rck >> ((self.k - self.m) * 2));
|
||||
let possible_hash_m = hash_mmer(possible_canonical_m);
|
||||
let possible_pos_m = self.received - self.m;
|
||||
(self.rolling_k & self.m_mask).min(self.rolling_rck >> ((k - m) * 2));
|
||||
let possible_hash_m = hash_kmer(possible_canonical_m << 64 - m * 2);
|
||||
let possible_pos_m = self.received - m;
|
||||
|
||||
while self.minimier.back().map_or(false, |it| it.hash >= possible_hash_m) {
|
||||
while self
|
||||
.minimier
|
||||
.back()
|
||||
.map_or(false, |it| it.hash >= possible_hash_m)
|
||||
{
|
||||
self.minimier.pop_back();
|
||||
}
|
||||
self.minimier
|
||||
.push_back(MmerItem { position: possible_pos_m, canonical: possible_canonical_m, hash: possible_hash_m });
|
||||
self.minimier.push_back(MmerItem {
|
||||
position: possible_pos_m,
|
||||
canonical: possible_canonical_m,
|
||||
hash: possible_hash_m,
|
||||
});
|
||||
|
||||
if self.received > self.k {
|
||||
if self.received > k {
|
||||
while self
|
||||
.minimier
|
||||
.front()
|
||||
.map_or(false, |it| it.position + self.k < self.received)
|
||||
.map_or(false, |it| it.position + k < self.received)
|
||||
{
|
||||
self.minimier.pop_front();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if self.received > self.k {
|
||||
if self.received > k {
|
||||
let old1 = self.k1q.pop_front().unwrap();
|
||||
let f1 = self.k1c[old1 as usize];
|
||||
Self::update_sums_decrement(&mut self.sum_f_log_f, &mut self.sum_f_log_s, 1, old1, f1);
|
||||
@@ -199,37 +207,73 @@ impl RollingStat {
|
||||
}
|
||||
|
||||
let g1 = self.k1c[canonical_k1 as usize];
|
||||
Self::update_sums_increment(&mut self.sum_f_log_f, &mut self.sum_f_log_s, 1, canonical_k1, g1);
|
||||
Self::update_sums_increment(
|
||||
&mut self.sum_f_log_f,
|
||||
&mut self.sum_f_log_s,
|
||||
1,
|
||||
canonical_k1,
|
||||
g1,
|
||||
);
|
||||
self.k1c[canonical_k1 as usize] += 1;
|
||||
self.k1q.push_back(canonical_k1);
|
||||
|
||||
if self.received >= 2 {
|
||||
let g2 = self.k2c[canonical_k2 as usize];
|
||||
Self::update_sums_increment(&mut self.sum_f_log_f, &mut self.sum_f_log_s, 2, canonical_k2, g2);
|
||||
Self::update_sums_increment(
|
||||
&mut self.sum_f_log_f,
|
||||
&mut self.sum_f_log_s,
|
||||
2,
|
||||
canonical_k2,
|
||||
g2,
|
||||
);
|
||||
self.k2c[canonical_k2 as usize] += 1;
|
||||
self.k2q.push_back(canonical_k2);
|
||||
|
||||
if self.received >= 3 {
|
||||
let g3 = self.k3c[canonical_k3 as usize];
|
||||
Self::update_sums_increment(&mut self.sum_f_log_f, &mut self.sum_f_log_s, 3, canonical_k3, g3);
|
||||
Self::update_sums_increment(
|
||||
&mut self.sum_f_log_f,
|
||||
&mut self.sum_f_log_s,
|
||||
3,
|
||||
canonical_k3,
|
||||
g3,
|
||||
);
|
||||
self.k3c[canonical_k3 as usize] += 1;
|
||||
self.k3q.push_back(canonical_k3);
|
||||
|
||||
if self.received >= 4 {
|
||||
let g4 = self.k4c[canonical_k4 as usize];
|
||||
Self::update_sums_increment(&mut self.sum_f_log_f, &mut self.sum_f_log_s, 4, canonical_k4, g4);
|
||||
Self::update_sums_increment(
|
||||
&mut self.sum_f_log_f,
|
||||
&mut self.sum_f_log_s,
|
||||
4,
|
||||
canonical_k4,
|
||||
g4,
|
||||
);
|
||||
self.k4c[canonical_k4 as usize] += 1;
|
||||
self.k4q.push_back(canonical_k4);
|
||||
|
||||
if self.received >= 5 {
|
||||
let g5 = self.k5c[canonical_k5 as usize];
|
||||
Self::update_sums_increment(&mut self.sum_f_log_f, &mut self.sum_f_log_s, 5, canonical_k5, g5);
|
||||
Self::update_sums_increment(
|
||||
&mut self.sum_f_log_f,
|
||||
&mut self.sum_f_log_s,
|
||||
5,
|
||||
canonical_k5,
|
||||
g5,
|
||||
);
|
||||
self.k5c[canonical_k5 as usize] += 1;
|
||||
self.k5q.push_back(canonical_k5);
|
||||
|
||||
if self.received >= 6 {
|
||||
let g6 = self.k6c[canonical_k6 as usize];
|
||||
Self::update_sums_increment(&mut self.sum_f_log_f, &mut self.sum_f_log_s, 6, canonical_k6, g6);
|
||||
Self::update_sums_increment(
|
||||
&mut self.sum_f_log_f,
|
||||
&mut self.sum_f_log_s,
|
||||
6,
|
||||
canonical_k6,
|
||||
g6,
|
||||
);
|
||||
self.k6c[canonical_k6 as usize] += 1;
|
||||
self.k6q.push_back(canonical_k6);
|
||||
}
|
||||
@@ -240,31 +284,7 @@ impl RollingStat {
|
||||
}
|
||||
|
||||
pub fn ready(&self) -> bool {
|
||||
self.received >= self.k
|
||||
}
|
||||
|
||||
pub fn kmer(&self) -> Option<Kmer> {
|
||||
if self.ready() {
|
||||
Some(Kmer::from_raw_right(self.rolling_k, self.k))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn revcomp_kmer(&self) -> Option<Kmer> {
|
||||
if self.ready() {
|
||||
Some(Kmer::from_raw_right(self.rolling_rck, self.k))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn canonical_kmer(&self) -> Option<Kmer> {
|
||||
if self.ready() {
|
||||
Some(Kmer::from_raw_right(self.rolling_k.min(self.rolling_rck), self.k))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
self.received >= params::k()
|
||||
}
|
||||
|
||||
pub fn minimizer_position(&self) -> Option<usize> {
|
||||
@@ -283,22 +303,22 @@ impl RollingStat {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn canonical_minimizer(&self) -> Option<CanonicalKmer> {
|
||||
self.canonical_minimizer_raw().map(|raw| {
|
||||
CanonicalKmer::from_raw_unchecked(Kmer::from_raw_right(raw, self.m).raw())
|
||||
})
|
||||
pub fn canonical_minimizer(&self) -> Option<Minimizer> {
|
||||
self.canonical_minimizer_raw()
|
||||
.map(|raw| Minimizer::from_raw_unchecked(raw << (64 - params::m() * 2)))
|
||||
}
|
||||
|
||||
pub fn entropy(&self, order: usize) -> Option<f64> {
|
||||
if !self.ready() {
|
||||
return None;
|
||||
}
|
||||
let em = emax(self.k, order);
|
||||
let k = params::k();
|
||||
let em = emax(k, order);
|
||||
if em <= 0.0 {
|
||||
return Some(1.0);
|
||||
}
|
||||
let nwords = self.k - order + 1;
|
||||
let log_nw = log_nwords(self.k, order);
|
||||
let nwords = k - order + 1;
|
||||
let log_nw = log_nwords(k, order);
|
||||
let nw_f = nwords as f64;
|
||||
let h_corr = log_nw + (self.sum_f_log_s[order] - self.sum_f_log_f[order]) / nw_f;
|
||||
Some((h_corr / em).max(0.0))
|
||||
|
||||
@@ -56,14 +56,14 @@ impl SuperKmerScratch {
|
||||
///
|
||||
/// The heap allocation (`Box<[u8]>`) is exactly sized to the sequence.
|
||||
/// Resets the buffer to empty afterward.
|
||||
pub fn emit(&mut self, min_pos: usize, m: usize) -> RoutableSuperKmer {
|
||||
pub fn emit(&mut self, min_pos: usize) -> RoutableSuperKmer {
|
||||
let seql = self.len;
|
||||
debug_assert!(seql >= 1 && seql <= MAX_SUPERKMER_LEN);
|
||||
let n = (seql + 3) / 4;
|
||||
let seq: Box<[u8]> = self.buf[..n].into();
|
||||
self.buf[..n].fill(0);
|
||||
self.len = 0;
|
||||
RoutableSuperKmer::build(min_pos, m, seql as u8, seq)
|
||||
RoutableSuperKmer::build(min_pos, seql, seq)
|
||||
}
|
||||
/// Discard all accumulated nucleotides without producing a [`SuperKmer`].
|
||||
pub fn reset(&mut self) {
|
||||
|
||||
Reference in New Issue
Block a user