feat: add merge command to consolidate k-mer indexes

Introduces a new `merge` CLI subcommand and underlying implementation to consolidate multiple pre-indexed k-mer indexes into a single output. Adds `append_column` methods to persistent bit and int matrices to enable incremental genome column expansion without rebuilding the MPHF. Includes new error variants for index readiness and configuration mismatches, adds a `--force` flag to the index command, and updates documentation and navigation structure accordingly.
This commit is contained in:
Eric Coissac
2026-05-21 05:53:55 +02:00
parent bfa436ad15
commit e1d59fde54
17 changed files with 799 additions and 8 deletions
+20
View File
@@ -101,6 +101,26 @@ impl PersistentBitMatrix {
}
}
// ── Column append ─────────────────────────────────────────────────────────────
impl PersistentBitMatrix {
/// Append a new column to an existing matrix on disk.
///
/// Reads `meta.json` to obtain `n` and the current column count, writes
/// `col_{n_cols:06}.pbiv` filled by `value_of(slot)`, then increments
/// `n_cols` in `meta.json`.
pub fn append_column(dir: &Path, value_of: impl Fn(usize) -> bool) -> io::Result<()> {
let mut meta = MatrixMeta::load(dir)?;
let mut b = PersistentBitVecBuilder::new(meta.n, &col_path(dir, meta.n_cols))?;
for slot in 0..meta.n {
b.set(slot, value_of(slot));
}
b.close()?;
meta.n_cols += 1;
meta.save(dir)
}
}
fn upper_pairs(n: usize) -> Vec<(usize, usize)> {
(0..n).flat_map(|i| (i + 1..n).map(move |j| (i, j))).collect()
}
+20
View File
@@ -203,6 +203,26 @@ where
m
}
// ── Column append ─────────────────────────────────────────────────────────────
impl PersistentCompactIntMatrix {
/// Append a new column to an existing matrix on disk.
///
/// Reads `meta.json` to obtain `n` and the current column count, writes
/// `col_{n_cols:06}.pciv` filled by `value_of(slot)`, then increments
/// `n_cols` in `meta.json`.
pub fn append_column(dir: &Path, value_of: impl Fn(usize) -> u32) -> io::Result<()> {
let mut meta = MatrixMeta::load(dir)?;
let mut b = PersistentCompactIntVecBuilder::new(meta.n, &col_path(dir, meta.n_cols))?;
for slot in 0..meta.n {
b.set(slot, value_of(slot));
}
b.close()?;
meta.n_cols += 1;
meta.save(dir)
}
}
// ── Trait impls ───────────────────────────────────────────────────────────────
use crate::traits::{ColumnWeights, CountPartials};