refactor: optimize unitig iteration and graph traversal

Switches unitig processing to a lazy, fallible `try_for_each_unitig` API across partitioner layers, reducing intermediate allocations and enabling proper error propagation. Refactors de Bruijn graph traversal into a two-pass algorithm with explicit node flags, named constants, and diagnostic logging. Introduces parallel chain processing and staged performance profiling for the unitig command, and adds a memory-efficient `FromIterator` implementation for packed nucleotide sequences.
This commit is contained in:
Eric Coissac
2026-06-05 18:31:52 +02:00
parent ea2c594c86
commit 27088ab810
6 changed files with 511 additions and 274 deletions
+23
View File
@@ -115,6 +115,29 @@ impl PackedSeq {
}
Self::new(count_to_tail(seql), seq.into_boxed_slice())
}
}
impl FromIterator<u8> for PackedSeq {
/// Build from a stream of 2-bit nucleotide values (0=A…3=T).
/// Packs on the fly — one allocation, no intermediate Vec.
fn from_iter<I: IntoIterator<Item = u8>>(iter: I) -> Self {
let iter = iter.into_iter();
let (lower, _) = iter.size_hint();
let mut seq: Vec<u8> = Vec::with_capacity(byte_len(lower.max(1)));
let mut seql = 0usize;
for nuc in iter {
if seql % 4 == 0 {
seq.push(0);
}
*seq.last_mut().unwrap() |= (nuc & 0b11) << (6 - 2 * (seql % 4));
seql += 1;
}
debug_assert!(seql >= 1, "PackedSeq requires at least one nucleotide");
Self::new(count_to_tail(seql), seq.into_boxed_slice())
}
}
impl PackedSeq {
/// Write ASCII nucleotides into `writer`. Zero allocation.
pub fn write_ascii<W: Write>(&self, writer: &mut W) -> io::Result<()> {