feat: add selective k-mer filtering to dump and rebuild commands
Add the `obidebruinj` dependency and introduce `FilterArgs` CLI arguments for ingroup/outgroup predicates and count/fraction thresholds. Extend `GroupFilterParams` to support outgroup filtering, and integrate the filter collection into `KmerIndex::dump` and `rebuild` commands. This enables selective k-mer filtering during index operations and CSV exports.
This commit is contained in:
+12
-10
@@ -2,6 +2,7 @@ use std::io::Write;
|
||||
|
||||
use crate::error::{OKIError, OKIResult};
|
||||
use crate::index::KmerIndex;
|
||||
use obikpartitionner::KmerFilter;
|
||||
|
||||
impl KmerIndex {
|
||||
/// Write a CSV table of all indexed kmers to `out`.
|
||||
@@ -14,8 +15,13 @@ impl KmerIndex {
|
||||
///
|
||||
/// The caller must have set the global kmer length (`obikseq::set_k`) before
|
||||
/// calling this method.
|
||||
pub fn dump<W: Write>(&self, out: &mut W, force_presence: bool, debug: bool) -> OKIResult<()> {
|
||||
|
||||
pub fn dump<W: Write>(
|
||||
&self,
|
||||
out: &mut W,
|
||||
force_presence: bool,
|
||||
debug: bool,
|
||||
filters: &[Box<dyn KmerFilter>],
|
||||
) -> OKIResult<()> {
|
||||
let genomes = &self.meta.genomes;
|
||||
let use_counts = self.meta.config.with_counts && !force_presence;
|
||||
let n_genomes = genomes.len().max(1);
|
||||
@@ -36,25 +42,21 @@ impl KmerIndex {
|
||||
for i in 0..n {
|
||||
if debug {
|
||||
self.partition
|
||||
.iter_partition_kmers_located(i, use_counts, n_genomes, &[], |part, layer, kmer, row| {
|
||||
.iter_partition_kmers_located(i, use_counts, n_genomes, filters, |part, layer, kmer, row| {
|
||||
let seq = String::from_utf8(kmer.to_ascii())
|
||||
.unwrap_or_else(|_| "?".repeat(kmer_size));
|
||||
let _ = write!(out, "{part},{layer},{seq}");
|
||||
for &v in row.iter() {
|
||||
let _ = write!(out, ",{v}");
|
||||
}
|
||||
for &v in row.iter() { let _ = write!(out, ",{v}"); }
|
||||
let _ = writeln!(out);
|
||||
})
|
||||
.map_err(OKIError::Partition)?;
|
||||
} else {
|
||||
self.partition
|
||||
.iter_partition_kmers(i, use_counts, n_genomes, &[], |kmer, row| {
|
||||
.iter_partition_kmers(i, use_counts, n_genomes, filters, |kmer, row| {
|
||||
let seq = String::from_utf8(kmer.to_ascii())
|
||||
.unwrap_or_else(|_| "?".repeat(kmer_size));
|
||||
let _ = write!(out, "{seq}");
|
||||
for &v in row.iter() {
|
||||
let _ = write!(out, ",{v}");
|
||||
}
|
||||
for &v in row.iter() { let _ = write!(out, ",{v}"); }
|
||||
let _ = writeln!(out);
|
||||
})
|
||||
.map_err(OKIError::Partition)?;
|
||||
|
||||
Reference in New Issue
Block a user