feat: add configurable block sizes and in-place reindex command

Propagate configurable block size (`block_bits`) through index and layer construction to control unitig chunking and optimize memory/performance trade-offs. Introduce an in-place `reindex` command and library method to convert indices between exact and approximate evidence formats. Add validation to reject merging indexes with mismatched evidence types, and update parallel kmer counting to use `AtomicUsize` for thread-safe aggregation. Includes CLI argument parsing, metadata persistence, and updated tests.
This commit is contained in:
Eric Coissac
2026-05-23 12:50:03 +02:00
parent 876bc0127f
commit bc51cd9861
21 changed files with 318 additions and 51 deletions
+3 -2
View File
@@ -45,6 +45,7 @@ impl KmerPartition {
max_ab: Option<u32>,
with_counts: bool,
evidence: &EvidenceKind,
block_bits: u8,
) -> Result<usize, SKError> {
let part_dir = self.part_dir(i);
let dedup_path = part_dir.join("dereplicated.skmer.zst");
@@ -109,7 +110,7 @@ impl KmerPartition {
uw.close()?;
if with_counts {
Layer::<PersistentCompactIntMatrix>::build(&layer_dir, |kmer| {
Layer::<PersistentCompactIntMatrix>::build(&layer_dir, block_bits, |kmer| {
match (&mphf1_opt, &counts1_opt) {
(Some(mphf), Some(counts)) => counts.get(mphf.index(&kmer.raw())),
_ => 1,
@@ -117,7 +118,7 @@ impl KmerPartition {
})
.map_err(olm_to_sk)?;
} else {
Layer::<()>::build(&layer_dir).map_err(olm_to_sk)?;
Layer::<()>::build(&layer_dir, block_bits).map_err(olm_to_sk)?;
}
// For approximate evidence: replace the exact evidence bundle with a
+2 -1
View File
@@ -161,6 +161,7 @@ impl KmerPartition {
sources: &[(&KmerPartition, usize)],
mode: MergeMode,
n_dst_genomes: usize,
block_bits: u8,
) -> SKResult<()> {
let dst_index_dir = self.part_dir(i).join(INDEX_SUBDIR);
if !dst_index_dir.exists() {
@@ -216,7 +217,7 @@ impl KmerPartition {
uw.write(&unitig)?;
}
uw.close()?;
Layer::<()>::build(&new_layer_dir).map_err(olm_to_sk)?;
Layer::<()>::build(&new_layer_dir, block_bits).map_err(olm_to_sk)?;
}
drop(g);
+2 -1
View File
@@ -96,6 +96,7 @@ impl KmerPartition {
filters: &[Box<dyn KmerFilter>],
mode: MergeMode,
n_genomes: usize,
block_bits: u8,
) -> SKResult<()> {
let src_index_dir = src.part_dir(i).join(INDEX_SUBDIR);
if !src_index_dir.exists() {
@@ -145,7 +146,7 @@ impl KmerPartition {
uw.close()?;
drop(g);
Layer::<()>::build(&dst_layer_dir).map_err(olm_to_sk)?;
Layer::<()>::build(&dst_layer_dir, block_bits).map_err(olm_to_sk)?;
let dst_mphf = MphfLayer::open(&dst_layer_dir).map_err(olm_to_sk)?;
// ── Prepare matrix builders (one column per genome) ───────────────────