perf: optimize packed_seq sub() with direct bit-slice copying

Replaces per-nucleotide iteration with direct bit-slice copying via `bitvec`. This eliminates per-element decoding overhead and intermediate allocations by computing the target byte length, copying the packed bit range `[start*2, end*2)` directly into a pre-allocated buffer, and constructing the result in a single pass.
This commit is contained in:
Eric Coissac
2026-05-12 22:30:45 +08:00
parent ff75c9198d
commit 84ed752b78
+7 -2
View File
@@ -261,8 +261,13 @@ impl PackedSeq {
/// Extract nucleotides `[start, end)` as a new [`PackedSeq`]. Allocates.
pub fn sub(&self, start: usize, end: usize) -> Self {
debug_assert!(end > start && end <= self.seql());
let nucs: Vec<u8> = (start..end).map(|i| self.nucleotide(i)).collect();
Self::from_nucleotides(&nucs)
let seql = end - start;
let n = (seql + 3) / 4;
let mut out = vec![0u8; n];
let src = self.seq.view_bits::<Msb0>();
let dst = out.view_bits_mut::<Msb0>();
dst[..seql * 2].copy_from_bitslice(&src[start * 2..end * 2]);
Self::new(count_to_tail(seql), out.into_boxed_slice())
}
/// Serialise one chunk to binary.