feat: add configurable block sizes and in-place reindex command
Propagate configurable block size (`block_bits`) through index and layer construction to control unitig chunking and optimize memory/performance trade-offs. Introduce an in-place `reindex` command and library method to convert indices between exact and approximate evidence formats. Add validation to reject merging indexes with mismatched evidence types, and update parallel kmer counting to use `AtomicUsize` for thread-safe aggregation. Includes CLI argument parsing, metadata persistence, and updated tests.
This commit is contained in:
@@ -45,6 +45,7 @@ impl KmerPartition {
|
||||
max_ab: Option<u32>,
|
||||
with_counts: bool,
|
||||
evidence: &EvidenceKind,
|
||||
block_bits: u8,
|
||||
) -> Result<usize, SKError> {
|
||||
let part_dir = self.part_dir(i);
|
||||
let dedup_path = part_dir.join("dereplicated.skmer.zst");
|
||||
@@ -109,7 +110,7 @@ impl KmerPartition {
|
||||
uw.close()?;
|
||||
|
||||
if with_counts {
|
||||
Layer::<PersistentCompactIntMatrix>::build(&layer_dir, |kmer| {
|
||||
Layer::<PersistentCompactIntMatrix>::build(&layer_dir, block_bits, |kmer| {
|
||||
match (&mphf1_opt, &counts1_opt) {
|
||||
(Some(mphf), Some(counts)) => counts.get(mphf.index(&kmer.raw())),
|
||||
_ => 1,
|
||||
@@ -117,7 +118,7 @@ impl KmerPartition {
|
||||
})
|
||||
.map_err(olm_to_sk)?;
|
||||
} else {
|
||||
Layer::<()>::build(&layer_dir).map_err(olm_to_sk)?;
|
||||
Layer::<()>::build(&layer_dir, block_bits).map_err(olm_to_sk)?;
|
||||
}
|
||||
|
||||
// For approximate evidence: replace the exact evidence bundle with a
|
||||
|
||||
@@ -161,6 +161,7 @@ impl KmerPartition {
|
||||
sources: &[(&KmerPartition, usize)],
|
||||
mode: MergeMode,
|
||||
n_dst_genomes: usize,
|
||||
block_bits: u8,
|
||||
) -> SKResult<()> {
|
||||
let dst_index_dir = self.part_dir(i).join(INDEX_SUBDIR);
|
||||
if !dst_index_dir.exists() {
|
||||
@@ -216,7 +217,7 @@ impl KmerPartition {
|
||||
uw.write(&unitig)?;
|
||||
}
|
||||
uw.close()?;
|
||||
Layer::<()>::build(&new_layer_dir).map_err(olm_to_sk)?;
|
||||
Layer::<()>::build(&new_layer_dir, block_bits).map_err(olm_to_sk)?;
|
||||
}
|
||||
drop(g);
|
||||
|
||||
|
||||
@@ -96,6 +96,7 @@ impl KmerPartition {
|
||||
filters: &[Box<dyn KmerFilter>],
|
||||
mode: MergeMode,
|
||||
n_genomes: usize,
|
||||
block_bits: u8,
|
||||
) -> SKResult<()> {
|
||||
let src_index_dir = src.part_dir(i).join(INDEX_SUBDIR);
|
||||
if !src_index_dir.exists() {
|
||||
@@ -145,7 +146,7 @@ impl KmerPartition {
|
||||
uw.close()?;
|
||||
drop(g);
|
||||
|
||||
Layer::<()>::build(&dst_layer_dir).map_err(olm_to_sk)?;
|
||||
Layer::<()>::build(&dst_layer_dir, block_bits).map_err(olm_to_sk)?;
|
||||
let dst_mphf = MphfLayer::open(&dst_layer_dir).map_err(olm_to_sk)?;
|
||||
|
||||
// ── Prepare matrix builders (one column per genome) ───────────────────
|
||||
|
||||
Reference in New Issue
Block a user