feat: add configurable block sizes and in-place reindex command

Propagate configurable block size (`block_bits`) through index and layer construction to control unitig chunking and optimize memory/performance trade-offs. Introduce an in-place `reindex` command and library method to convert indices between exact and approximate evidence formats. Add validation to reject merging indexes with mismatched evidence types, and update parallel kmer counting to use `AtomicUsize` for thread-safe aggregation. Includes CLI argument parsing, metadata persistence, and updated tests.
This commit is contained in:
Eric Coissac
2026-05-23 12:50:03 +02:00
parent 876bc0127f
commit bc51cd9861
21 changed files with 318 additions and 51 deletions
+2 -1
View File
@@ -96,6 +96,7 @@ impl KmerPartition {
filters: &[Box<dyn KmerFilter>],
mode: MergeMode,
n_genomes: usize,
block_bits: u8,
) -> SKResult<()> {
let src_index_dir = src.part_dir(i).join(INDEX_SUBDIR);
if !src_index_dir.exists() {
@@ -145,7 +146,7 @@ impl KmerPartition {
uw.close()?;
drop(g);
Layer::<()>::build(&dst_layer_dir).map_err(olm_to_sk)?;
Layer::<()>::build(&dst_layer_dir, block_bits).map_err(olm_to_sk)?;
let dst_mphf = MphfLayer::open(&dst_layer_dir).map_err(olm_to_sk)?;
// ── Prepare matrix builders (one column per genome) ───────────────────