refactor: optimize unitig iteration and graph traversal
Switches unitig processing to a lazy, fallible `try_for_each_unitig` API across partitioner layers, reducing intermediate allocations and enabling proper error propagation. Refactors de Bruijn graph traversal into a two-pass algorithm with explicit node flags, named constants, and diagnostic logging. Introduces parallel chain processing and staged performance profiling for the unitig command, and adds a memory-efficient `FromIterator` implementation for packed nucleotide sequences.
This commit is contained in:
@@ -115,6 +115,29 @@ impl PackedSeq {
|
||||
}
|
||||
Self::new(count_to_tail(seql), seq.into_boxed_slice())
|
||||
}
|
||||
}
|
||||
|
||||
impl FromIterator<u8> for PackedSeq {
|
||||
/// Build from a stream of 2-bit nucleotide values (0=A…3=T).
|
||||
/// Packs on the fly — one allocation, no intermediate Vec.
|
||||
fn from_iter<I: IntoIterator<Item = u8>>(iter: I) -> Self {
|
||||
let iter = iter.into_iter();
|
||||
let (lower, _) = iter.size_hint();
|
||||
let mut seq: Vec<u8> = Vec::with_capacity(byte_len(lower.max(1)));
|
||||
let mut seql = 0usize;
|
||||
for nuc in iter {
|
||||
if seql % 4 == 0 {
|
||||
seq.push(0);
|
||||
}
|
||||
*seq.last_mut().unwrap() |= (nuc & 0b11) << (6 - 2 * (seql % 4));
|
||||
seql += 1;
|
||||
}
|
||||
debug_assert!(seql >= 1, "PackedSeq requires at least one nucleotide");
|
||||
Self::new(count_to_tail(seql), seq.into_boxed_slice())
|
||||
}
|
||||
}
|
||||
|
||||
impl PackedSeq {
|
||||
|
||||
/// Write ASCII nucleotides into `writer`. Zero allocation.
|
||||
pub fn write_ascii<W: Write>(&self, writer: &mut W) -> io::Result<()> {
|
||||
|
||||
Reference in New Issue
Block a user