chore: add logging infrastructure to merge routine
Adds comprehensive logging for source metadata, merge modes, and forced approximation detection. Introduces `format_evidence` and `is_trivial` helpers to format `IndexMode` variants and identify single-genome presence indices. The core merge algorithm remains unmodified, with all changes focused on enhanced runtime observability.
This commit is contained in:
@@ -63,8 +63,36 @@ impl KmerIndex {
|
||||
}
|
||||
}
|
||||
|
||||
// ── Choose base: largest source in the output evidence mode ───────────
|
||||
// ── Log source characteristics and choose base ────────────────────────
|
||||
let mode_str = if mode == MergeMode::Presence { "presence" } else { "count" };
|
||||
info!(
|
||||
"merge: {} source(s), smer-size={}, mode={}",
|
||||
sources.len(), sources[0].kmer_size(), mode_str,
|
||||
);
|
||||
for (i, src) in sources.iter().enumerate() {
|
||||
let genome_str = if src.meta.genomes.len() == 1 { "mono-genome".to_string() }
|
||||
else { format!("{} genomes", src.meta.genomes.len()) };
|
||||
let trivial_str = if is_trivial(src, mode) { " [trivial: no data approximation]" } else { "" };
|
||||
info!(
|
||||
" [{}] {} — {}, {}, {}{}",
|
||||
i, src.root_path.display(),
|
||||
format_evidence(&src.meta.config.evidence),
|
||||
genome_str, mode_str, trivial_str,
|
||||
);
|
||||
}
|
||||
|
||||
let base_idx = choose_base(sources, mode);
|
||||
let needs_approx = sources.iter().any(|src| {
|
||||
!is_trivial(src, mode)
|
||||
&& matches!(src.meta.config.evidence, IndexMode::Approx { .. } | IndexMode::Hybrid { .. })
|
||||
});
|
||||
info!(
|
||||
"output evidence: {} ({}base: [{}] {})",
|
||||
format_evidence(&sources[base_idx].meta.config.evidence),
|
||||
if needs_approx { "forced approx — " } else { "" },
|
||||
base_idx, sources[base_idx].root_path.display(),
|
||||
);
|
||||
|
||||
let mut ordered: Vec<&KmerIndex> = Vec::with_capacity(sources.len());
|
||||
ordered.push(sources[base_idx]);
|
||||
for (i, &src) in sources.iter().enumerate() {
|
||||
@@ -272,6 +300,14 @@ fn partition_bar(n: u64) -> ProgressBar {
|
||||
pb
|
||||
}
|
||||
|
||||
fn format_evidence(ev: &IndexMode) -> String {
|
||||
match ev {
|
||||
IndexMode::Exact => "exact".to_string(),
|
||||
IndexMode::Approx { b, z } => format!("approx (b={b}, z={z})"),
|
||||
IndexMode::Hybrid { b, z } => format!("hybrid (b={b}, z={z})"),
|
||||
}
|
||||
}
|
||||
|
||||
/// A source is "trivial" if its presence/count values carry no approximation:
|
||||
/// single-genome presence index (SetMembership — all values are 1 by construction).
|
||||
fn is_trivial(src: &KmerIndex, mode: MergeMode) -> bool {
|
||||
|
||||
Reference in New Issue
Block a user