feat: add --head and --presence-threshold to dump and distance

Introduces `--head N` to the `dump` command for early iteration termination and `--presence-threshold N` to the `distance` command for Jaccard filtering on count indexes. Updates filter defaults to adapt based on explicit ingroup/outgroup declarations. Fixes a Rust type mismatch in the unitig closure and updates partition iteration callbacks to return `bool` for proper early termination support. Documentation is updated accordingly.
This commit is contained in:
Eric Coissac
2026-06-09 09:47:44 +02:00
parent 650eea43b6
commit d626d42ec7
7 changed files with 105 additions and 29 deletions
+12 -4
View File
@@ -20,6 +20,7 @@ impl KmerIndex {
out: &mut W,
force_presence: bool,
debug: bool,
head: Option<usize>,
filters: &[Box<dyn KmerFilter>],
) -> OKIResult<()> {
let genomes = &self.meta.genomes;
@@ -39,8 +40,10 @@ impl KmerIndex {
// ── Rows ──────────────────────────────────────────────────────────────
let n = self.n_partitions();
let mut remaining = head.unwrap_or(usize::MAX);
for i in 0..n {
if debug {
if remaining == 0 { break; }
let cont = if debug {
self.partition
.iter_partition_kmers_located(i, use_counts, n_genomes, filters, |part, layer, kmer, row| {
let seq = String::from_utf8(kmer.to_ascii())
@@ -48,8 +51,10 @@ impl KmerIndex {
let _ = write!(out, "{part},{layer},{seq}");
for &v in row.iter() { let _ = write!(out, ",{v}"); }
let _ = writeln!(out);
remaining -= 1;
remaining > 0
})
.map_err(OKIError::Partition)?;
.map_err(OKIError::Partition)?
} else {
self.partition
.iter_partition_kmers(i, use_counts, n_genomes, filters, |kmer, row| {
@@ -58,9 +63,12 @@ impl KmerIndex {
let _ = write!(out, "{seq}");
for &v in row.iter() { let _ = write!(out, ",{v}"); }
let _ = writeln!(out);
remaining -= 1;
remaining > 0
})
.map_err(OKIError::Partition)?;
}
.map_err(OKIError::Partition)?
};
if !cont { break; }
}
out.flush()?;