From 5f98d2ef96b349986964985a26aa1bd0e646bb81 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Sat, 13 Jun 2026 10:09:07 +0200 Subject: [PATCH] refactor: replace explicit collect with Unitig::from_nucleotides Introduce a thread-local buffer to materialize nucleotide iterators into contiguous slices. Update `try_for_each_unitig` across the debruijn, index, merge, and rebuild layers to directly instantiate `Unitig` via `from_nucleotides()` instead of explicitly collecting iterators. This eliminates intermediate allocations and aligns test code with the new approach. --- src/obidebruinj/src/debruijn.rs | 17 +++++++++++++---- src/obidebruinj/src/tests/debruijn.rs | 4 ++-- src/obikpartitionner/src/index_layer.rs | 5 ++--- src/obikpartitionner/src/merge_layer.rs | 5 ++--- src/obikpartitionner/src/rebuild_layer.rs | 5 ++--- 5 files changed, 21 insertions(+), 15 deletions(-) diff --git a/src/obidebruinj/src/debruijn.rs b/src/obidebruinj/src/debruijn.rs index cebe17c..865567e 100644 --- a/src/obidebruinj/src/debruijn.rs +++ b/src/obidebruinj/src/debruijn.rs @@ -3,6 +3,7 @@ use hashbrown::HashMap; use obikseq::k; use obikseq::{CanonicalKmer, Sequence}; use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; +use std::cell::RefCell; use std::fmt; use std::sync::atomic::{AtomicU8, Ordering}; use xxhash_rust::xxh3::Xxh3Builder; @@ -450,17 +451,25 @@ impl GraphDeBruijn { pub fn try_for_each_unitig(&self, f: F) -> Result<(), E> where E: Send, - F: FnMut(UnitigNucIter<'_>) -> Result<(), E> + Send, + F: FnMut(&[u8]) -> Result<(), E> + Send, { + thread_local! { + static BUF: std::cell::RefCell> = RefCell::new(Vec::with_capacity(4096)); + } let error = std::sync::Mutex::new(None::); let f = std::sync::Mutex::new(f); self.for_each_unitig(|iter| { if error.lock().unwrap().is_some() { return; } - if let Err(e) = f.lock().unwrap()(iter) { - *error.lock().unwrap() = Some(e); - } + BUF.with(|buf| { + let mut buf = buf.borrow_mut(); + buf.clear(); + buf.extend(iter); + if let Err(e) = f.lock().unwrap()(&buf) { + *error.lock().unwrap() = Some(e); + } + }); }); error.into_inner().unwrap().map_or(Ok(()), Err) } diff --git a/src/obidebruinj/src/tests/debruijn.rs b/src/obidebruinj/src/tests/debruijn.rs index 7b911b3..17f4b2f 100644 --- a/src/obidebruinj/src/tests/debruijn.rs +++ b/src/obidebruinj/src/tests/debruijn.rs @@ -24,8 +24,8 @@ fn canonical_kmers(seq: &[u8]) -> Vec { fn collect_unitigs(g: &GraphDeBruijn) -> Vec { let mut unitigs = Vec::new(); - g.try_for_each_unitig(|nuc_iter| -> Result<(), std::convert::Infallible> { - unitigs.push(nuc_iter.collect()); + g.try_for_each_unitig(|nucs| -> Result<(), std::convert::Infallible> { + unitigs.push(Unitig::from_nucleotides(nucs)); Ok(()) }) .unwrap(); diff --git a/src/obikpartitionner/src/index_layer.rs b/src/obikpartitionner/src/index_layer.rs index 1b097a0..acc8623 100644 --- a/src/obikpartitionner/src/index_layer.rs +++ b/src/obikpartitionner/src/index_layer.rs @@ -107,9 +107,8 @@ impl KmerPartition { fs::create_dir_all(&layer_dir)?; let mut uw = Layer::<()>::unitig_writer(&layer_dir).map_err(olm_to_sk)?; - g.try_for_each_unitig(|nuc_iter| { - let unitig: obikseq::unitig::Unitig = nuc_iter.collect(); - uw.write(&unitig) + g.try_for_each_unitig(|nucs| { + uw.write(&obikseq::unitig::Unitig::from_nucleotides(nucs)) })?; uw.close()?; diff --git a/src/obikpartitionner/src/merge_layer.rs b/src/obikpartitionner/src/merge_layer.rs index 2b67dd4..d38042a 100644 --- a/src/obikpartitionner/src/merge_layer.rs +++ b/src/obikpartitionner/src/merge_layer.rs @@ -307,9 +307,8 @@ impl KmerPartition { g.compute_degrees_and_mark_starts(); fs::create_dir_all(&new_layer_dir)?; let mut uw = Layer::<()>::unitig_writer(&new_layer_dir).map_err(olm_to_sk)?; - g.try_for_each_unitig(|nuc_iter| { - let unitig: obikseq::unitig::Unitig = nuc_iter.collect(); - uw.write(&unitig) + g.try_for_each_unitig(|nucs| { + uw.write(&obikseq::unitig::Unitig::from_nucleotides(nucs)) })?; uw.close()?; let n = g.len(); diff --git a/src/obikpartitionner/src/rebuild_layer.rs b/src/obikpartitionner/src/rebuild_layer.rs index 7c39246..1a4c238 100644 --- a/src/obikpartitionner/src/rebuild_layer.rs +++ b/src/obikpartitionner/src/rebuild_layer.rs @@ -168,9 +168,8 @@ impl KmerPartition { fs::create_dir_all(&dst_layer_dir)?; let mut uw = Layer::<()>::unitig_writer(&dst_layer_dir).map_err(olm_to_sk)?; - g.try_for_each_unitig(|nuc_iter| { - let unitig: obikseq::unitig::Unitig = nuc_iter.collect(); - uw.write(&unitig) + g.try_for_each_unitig(|nucs| { + uw.write(&obikseq::unitig::Unitig::from_nucleotides(nucs)) })?; uw.close()?; drop(g);