feat: add configurable block sizes and in-place reindex command

Propagate configurable block size (`block_bits`) through index and layer construction to control unitig chunking and optimize memory/performance trade-offs. Introduce an in-place `reindex` command and library method to convert indices between exact and approximate evidence formats. Add validation to reject merging indexes with mismatched evidence types, and update parallel kmer counting to use `AtomicUsize` for thread-safe aggregation. Includes CLI argument parsing, metadata persistence, and updated tests.
This commit is contained in:
Eric Coissac
2026-05-23 12:50:03 +02:00
parent 876bc0127f
commit bc51cd9861
21 changed files with 318 additions and 51 deletions
+6 -1
View File
@@ -21,7 +21,7 @@ use crate::error::{SKError, SKResult};
const MAGIC: [u8; 4] = *b"UIX3";
/// Default block granularity used by [`UnitigFileWriter::create`].
pub const DEFAULT_BLOCK_BITS: u8 = 6;
pub const DEFAULT_BLOCK_BITS: u8 = 0;
fn idx_path(path: &Path) -> PathBuf {
crate::append_path_suffix(path, ".idx")
@@ -325,6 +325,11 @@ impl UnitigFileReader {
})
}
/// Iterate all unitigs sequentially. Works without `.idx` (sequential open).
pub fn iter_unitigs(&self) -> impl Iterator<Item = (usize, Unitig)> + '_ {
self.iter_chunks_sequential()
}
pub fn iter_kmers(&self) -> impl Iterator<Item = Kmer> + '_ {
self.iter_chunks_sequential()
.flat_map(|(_, u)| u.into_kmers())