feat: add CLI command to export indexed k-mers to CSV
This change introduces a new `dump` subcommand that exports all indexed k-mers to a CSV stream. The implementation spans multiple crates, adding core export logic to `obikindex` and partition iteration to `obikpartitionner`. The command supports a `--force-presence` flag to output binary presence/absence data instead of stored counts, and includes necessary module registrations and structural updates across the codebase.
This commit is contained in:
@@ -0,0 +1,50 @@
|
||||
use std::io::Write;
|
||||
|
||||
use crate::error::{OKIError, OKIResult};
|
||||
use crate::index::KmerIndex;
|
||||
|
||||
impl KmerIndex {
|
||||
/// Write a CSV table of all indexed kmers to `out`.
|
||||
///
|
||||
/// Columns: `kmer`, then one column per genome (in index order).
|
||||
/// Values are counts (u32) when `use_counts = true`, otherwise 0/1.
|
||||
///
|
||||
/// `force_presence` overrides `with_counts`: even if the index stores counts,
|
||||
/// the output uses 0/1 presence columns.
|
||||
///
|
||||
/// The caller must have set the global kmer length (`obikseq::set_k`) before
|
||||
/// calling this method.
|
||||
pub fn dump<W: Write>(&self, out: &mut W, force_presence: bool) -> OKIResult<()> {
|
||||
|
||||
let genomes = &self.meta.genomes;
|
||||
let use_counts = self.meta.config.with_counts && !force_presence;
|
||||
let n_genomes = genomes.len().max(1);
|
||||
|
||||
// ── Header ────────────────────────────────────────────────────────────
|
||||
write!(out, "kmer")?;
|
||||
for g in genomes {
|
||||
write!(out, ",{g}")?;
|
||||
}
|
||||
writeln!(out)?;
|
||||
|
||||
// ── Rows ──────────────────────────────────────────────────────────────
|
||||
let n = self.n_partitions();
|
||||
for i in 0..n {
|
||||
self.partition
|
||||
.iter_partition_kmers(i, use_counts, n_genomes, |kmer, row| {
|
||||
let seq = String::from_utf8(kmer.to_ascii())
|
||||
.unwrap_or_else(|_| "?".repeat(self.kmer_size()));
|
||||
// write is infallible inside a closure — propagate via a flag if needed
|
||||
let _ = write!(out, "{seq}");
|
||||
for &v in row.iter() {
|
||||
let _ = write!(out, ",{v}");
|
||||
}
|
||||
let _ = writeln!(out);
|
||||
})
|
||||
.map_err(OKIError::Partition)?;
|
||||
}
|
||||
|
||||
out.flush()?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
pub mod error;
|
||||
pub mod meta;
|
||||
pub mod state;
|
||||
mod dump;
|
||||
mod index;
|
||||
mod merge;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user