refactor: replace explicit collect with Unitig::from_nucleotides

Introduce a thread-local buffer to materialize nucleotide iterators into contiguous slices. Update `try_for_each_unitig` across the debruijn, index, merge, and rebuild layers to directly instantiate `Unitig` via `from_nucleotides()` instead of explicitly collecting iterators. This eliminates intermediate allocations and aligns test code with the new approach.
This commit is contained in:
Eric Coissac
2026-06-13 10:09:07 +02:00
parent 8b563d0804
commit 5f98d2ef96
5 changed files with 21 additions and 15 deletions
+11 -2
View File
@@ -3,6 +3,7 @@ use hashbrown::HashMap;
use obikseq::k; use obikseq::k;
use obikseq::{CanonicalKmer, Sequence}; use obikseq::{CanonicalKmer, Sequence};
use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
use std::cell::RefCell;
use std::fmt; use std::fmt;
use std::sync::atomic::{AtomicU8, Ordering}; use std::sync::atomic::{AtomicU8, Ordering};
use xxhash_rust::xxh3::Xxh3Builder; use xxhash_rust::xxh3::Xxh3Builder;
@@ -450,18 +451,26 @@ impl GraphDeBruijn {
pub fn try_for_each_unitig<E, F>(&self, f: F) -> Result<(), E> pub fn try_for_each_unitig<E, F>(&self, f: F) -> Result<(), E>
where where
E: Send, E: Send,
F: FnMut(UnitigNucIter<'_>) -> Result<(), E> + Send, F: FnMut(&[u8]) -> Result<(), E> + Send,
{ {
thread_local! {
static BUF: std::cell::RefCell<Vec<u8>> = RefCell::new(Vec::with_capacity(4096));
}
let error = std::sync::Mutex::new(None::<E>); let error = std::sync::Mutex::new(None::<E>);
let f = std::sync::Mutex::new(f); let f = std::sync::Mutex::new(f);
self.for_each_unitig(|iter| { self.for_each_unitig(|iter| {
if error.lock().unwrap().is_some() { if error.lock().unwrap().is_some() {
return; return;
} }
if let Err(e) = f.lock().unwrap()(iter) { BUF.with(|buf| {
let mut buf = buf.borrow_mut();
buf.clear();
buf.extend(iter);
if let Err(e) = f.lock().unwrap()(&buf) {
*error.lock().unwrap() = Some(e); *error.lock().unwrap() = Some(e);
} }
}); });
});
error.into_inner().unwrap().map_or(Ok(()), Err) error.into_inner().unwrap().map_or(Ok(()), Err)
} }
+2 -2
View File
@@ -24,8 +24,8 @@ fn canonical_kmers(seq: &[u8]) -> Vec<CanonicalKmer> {
fn collect_unitigs(g: &GraphDeBruijn) -> Vec<Unitig> { fn collect_unitigs(g: &GraphDeBruijn) -> Vec<Unitig> {
let mut unitigs = Vec::new(); let mut unitigs = Vec::new();
g.try_for_each_unitig(|nuc_iter| -> Result<(), std::convert::Infallible> { g.try_for_each_unitig(|nucs| -> Result<(), std::convert::Infallible> {
unitigs.push(nuc_iter.collect()); unitigs.push(Unitig::from_nucleotides(nucs));
Ok(()) Ok(())
}) })
.unwrap(); .unwrap();
+2 -3
View File
@@ -107,9 +107,8 @@ impl KmerPartition {
fs::create_dir_all(&layer_dir)?; fs::create_dir_all(&layer_dir)?;
let mut uw = Layer::<()>::unitig_writer(&layer_dir).map_err(olm_to_sk)?; let mut uw = Layer::<()>::unitig_writer(&layer_dir).map_err(olm_to_sk)?;
g.try_for_each_unitig(|nuc_iter| { g.try_for_each_unitig(|nucs| {
let unitig: obikseq::unitig::Unitig = nuc_iter.collect(); uw.write(&obikseq::unitig::Unitig::from_nucleotides(nucs))
uw.write(&unitig)
})?; })?;
uw.close()?; uw.close()?;
+2 -3
View File
@@ -307,9 +307,8 @@ impl KmerPartition {
g.compute_degrees_and_mark_starts(); g.compute_degrees_and_mark_starts();
fs::create_dir_all(&new_layer_dir)?; fs::create_dir_all(&new_layer_dir)?;
let mut uw = Layer::<()>::unitig_writer(&new_layer_dir).map_err(olm_to_sk)?; let mut uw = Layer::<()>::unitig_writer(&new_layer_dir).map_err(olm_to_sk)?;
g.try_for_each_unitig(|nuc_iter| { g.try_for_each_unitig(|nucs| {
let unitig: obikseq::unitig::Unitig = nuc_iter.collect(); uw.write(&obikseq::unitig::Unitig::from_nucleotides(nucs))
uw.write(&unitig)
})?; })?;
uw.close()?; uw.close()?;
let n = g.len(); let n = g.len();
+2 -3
View File
@@ -168,9 +168,8 @@ impl KmerPartition {
fs::create_dir_all(&dst_layer_dir)?; fs::create_dir_all(&dst_layer_dir)?;
let mut uw = Layer::<()>::unitig_writer(&dst_layer_dir).map_err(olm_to_sk)?; let mut uw = Layer::<()>::unitig_writer(&dst_layer_dir).map_err(olm_to_sk)?;
g.try_for_each_unitig(|nuc_iter| { g.try_for_each_unitig(|nucs| {
let unitig: obikseq::unitig::Unitig = nuc_iter.collect(); uw.write(&obikseq::unitig::Unitig::from_nucleotides(nucs))
uw.write(&unitig)
})?; })?;
uw.close()?; uw.close()?;
drop(g); drop(g);