feat: add merge command to consolidate k-mer indexes
Introduces a new `merge` CLI subcommand and underlying implementation to consolidate multiple pre-indexed k-mer indexes into a single output. Adds `append_column` methods to persistent bit and int matrices to enable incremental genome column expansion without rebuilding the MPHF. Includes new error variants for index readiness and configuration mismatches, adds a `--force` flag to the index command, and updates documentation and navigation structure accordingly.
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
use std::collections::HashMap;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
|
||||
use obicompactvec::{
|
||||
@@ -83,6 +84,22 @@ impl Layer<()> {
|
||||
pub fn build(out_dir: &Path) -> OLMResult<usize> {
|
||||
MphfLayer::build(out_dir, &mut |_, _| Ok(()))
|
||||
}
|
||||
|
||||
/// Create a presence matrix for a set-membership layer (first merge).
|
||||
///
|
||||
/// All `n_kmers` slots are set to `true`: every kmer in this layer belongs
|
||||
/// to genome_0, so genome_0 is present at every slot.
|
||||
pub fn init_presence_matrix(layer_dir: &Path, n_kmers: usize) -> OLMResult<()> {
|
||||
let presence_dir = layer_dir.join(PRESENCE_DIR);
|
||||
fs::create_dir_all(&presence_dir).map_err(OLMError::Io)?;
|
||||
let mut mb = PersistentBitMatrixBuilder::new(n_kmers, &presence_dir).map_err(OLMError::Io)?;
|
||||
let mut col = mb.add_col().map_err(OLMError::Io)?;
|
||||
for slot in 0..n_kmers {
|
||||
col.set(slot, true);
|
||||
}
|
||||
col.close().map_err(OLMError::Io)?;
|
||||
mb.close().map_err(OLMError::Io)
|
||||
}
|
||||
}
|
||||
|
||||
// ── Mode 2 — count matrix (1 column per layer) ────────────────────────────────
|
||||
@@ -111,9 +128,31 @@ impl Layer<PersistentCompactIntMatrix> {
|
||||
}
|
||||
}
|
||||
|
||||
// ── Mode 2 — count matrix column append ──────────────────────────────────────
|
||||
|
||||
impl Layer<PersistentCompactIntMatrix> {
|
||||
/// Append a genome column to an existing count matrix.
|
||||
pub fn append_genome_column(
|
||||
layer_dir: &Path,
|
||||
value_of: impl Fn(usize) -> u32,
|
||||
) -> OLMResult<()> {
|
||||
PersistentCompactIntMatrix::append_column(&layer_dir.join(COUNTS_DIR), value_of)
|
||||
.map_err(OLMError::Io)
|
||||
}
|
||||
}
|
||||
|
||||
// ── Mode 3 — presence/absence matrix (1 column per genome) ───────────────────
|
||||
|
||||
impl Layer<PersistentBitMatrix> {
|
||||
/// Append a genome column to an existing presence matrix.
|
||||
pub fn append_genome_column(
|
||||
layer_dir: &Path,
|
||||
value_of: impl Fn(usize) -> bool,
|
||||
) -> OLMResult<()> {
|
||||
PersistentBitMatrix::append_column(&layer_dir.join(PRESENCE_DIR), value_of)
|
||||
.map_err(OLMError::Io)
|
||||
}
|
||||
|
||||
pub fn build_presence(
|
||||
out_dir: &Path,
|
||||
n_genomes: usize,
|
||||
|
||||
Reference in New Issue
Block a user