refactor: centralize k-mer filtering logic and add validation
Refactor shared `FilterArgs` and `build_group_filter` to return a `Result` with explicit validation for fraction bounds, min/max ordering, and count constraints. Update conditional defaults for `--min-frac` and `--max-outgroup-count` to depend on explicit quorum flags, preventing silent configuration conflicts. Update documentation and MkDocs navigation to reflect the new centralized k-mer filtering system across `rebuild`, `dump`, and `unitig` commands.
This commit is contained in:
@@ -207,7 +207,7 @@ impl FilterArgs {
|
||||
std::process::exit(1);
|
||||
}))
|
||||
.collect();
|
||||
vec![Box::new(build_group_filter(
|
||||
let filter = build_group_filter(
|
||||
genomes,
|
||||
&ingroup_preds,
|
||||
&outgroup_preds,
|
||||
@@ -222,7 +222,11 @@ impl FilterArgs {
|
||||
min_outgroup_frac: self.min_outgroup_frac,
|
||||
max_outgroup_frac: self.max_outgroup_frac,
|
||||
},
|
||||
))]
|
||||
).unwrap_or_else(|e| {
|
||||
eprintln!("error in filter parameters: {e}");
|
||||
std::process::exit(1);
|
||||
});
|
||||
vec![Box::new(filter)]
|
||||
}
|
||||
}
|
||||
|
||||
@@ -243,7 +247,7 @@ pub fn build_group_filter(
|
||||
ingroup_preds: &[MetaPred],
|
||||
outgroup_preds: &[MetaPred],
|
||||
p: GroupFilterParams,
|
||||
) -> GroupQuorumFilter {
|
||||
) -> Result<GroupQuorumFilter, String> {
|
||||
let (ingroup_idx, outgroup_idx) = if ingroup_preds.is_empty() && outgroup_preds.is_empty() {
|
||||
((0..genomes.len()).collect(), vec![])
|
||||
} else {
|
||||
@@ -260,20 +264,52 @@ pub fn build_group_filter(
|
||||
let in_size = ingroup_idx.len();
|
||||
let out_size = outgroup_idx.len();
|
||||
|
||||
let default_min_frac = if !ingroup_preds.is_empty() { 1.0 } else { 0.0 };
|
||||
let default_max_outgroup_count = if !outgroup_preds.is_empty() { 0 } else { out_size };
|
||||
let ingroup_quorum_explicit = p.min_count.is_some() || p.max_count.is_some()
|
||||
|| p.min_frac.is_some() || p.max_frac.is_some();
|
||||
let outgroup_quorum_explicit = p.min_outgroup_count.is_some() || p.max_outgroup_count.is_some()
|
||||
|| p.min_outgroup_frac.is_some() || p.max_outgroup_frac.is_some();
|
||||
|
||||
GroupQuorumFilter {
|
||||
let default_min_frac = if !ingroup_preds.is_empty() && !ingroup_quorum_explicit { 1.0 } else { 0.0 };
|
||||
let default_max_outgroup_count = if !outgroup_preds.is_empty() && !outgroup_quorum_explicit { 0 } else { out_size };
|
||||
|
||||
let min_count = p.min_count.unwrap_or(0);
|
||||
let max_count = p.max_count.unwrap_or(in_size);
|
||||
let min_frac = p.min_frac.unwrap_or(default_min_frac);
|
||||
let max_frac = p.max_frac.unwrap_or(1.0);
|
||||
let min_outgroup_count = p.min_outgroup_count.unwrap_or(0);
|
||||
let max_outgroup_count = p.max_outgroup_count.unwrap_or(default_max_outgroup_count);
|
||||
let min_outgroup_frac = p.min_outgroup_frac.unwrap_or(0.0);
|
||||
let max_outgroup_frac = p.max_outgroup_frac.unwrap_or(1.0);
|
||||
|
||||
for (v, lo, hi) in [
|
||||
("--min-frac/--max-frac", min_frac, max_frac),
|
||||
("--min-outgroup-frac/--max-outgroup-frac", min_outgroup_frac, max_outgroup_frac),
|
||||
] {
|
||||
if !(0.0..=1.0).contains(&lo) || !(0.0..=1.0).contains(&hi) {
|
||||
return Err(format!("{v}: fraction values must be in [0.0, 1.0]"));
|
||||
}
|
||||
if lo > hi {
|
||||
return Err(format!("{v}: min ({lo}) is greater than max ({hi})"));
|
||||
}
|
||||
}
|
||||
if min_count > max_count {
|
||||
return Err(format!("--min-count/--max-count: min ({min_count}) is greater than max ({max_count})"));
|
||||
}
|
||||
if min_outgroup_count > max_outgroup_count {
|
||||
return Err(format!("--min-outgroup-count/--max-outgroup-count: min ({min_outgroup_count}) is greater than max ({max_outgroup_count})"));
|
||||
}
|
||||
|
||||
Ok(GroupQuorumFilter {
|
||||
ingroup_idx,
|
||||
outgroup_idx,
|
||||
threshold: p.threshold,
|
||||
min_count: p.min_count.unwrap_or(0),
|
||||
max_count: p.max_count.unwrap_or(in_size),
|
||||
min_frac: p.min_frac.unwrap_or(default_min_frac),
|
||||
max_frac: p.max_frac.unwrap_or(1.0),
|
||||
min_outgroup_count: p.min_outgroup_count.unwrap_or(0),
|
||||
max_outgroup_count: p.max_outgroup_count.unwrap_or(default_max_outgroup_count),
|
||||
min_outgroup_frac: p.min_outgroup_frac.unwrap_or(0.0),
|
||||
max_outgroup_frac: p.max_outgroup_frac.unwrap_or(1.0),
|
||||
}
|
||||
threshold: p.threshold,
|
||||
min_count,
|
||||
max_count,
|
||||
min_frac,
|
||||
max_frac,
|
||||
min_outgroup_count,
|
||||
max_outgroup_count,
|
||||
min_outgroup_frac,
|
||||
max_outgroup_frac,
|
||||
})
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user