fix(bitmatrix): explicitly compute diagonal entries for self-similarity

The pairwise matrix functions now explicitly calculate and overwrite diagonal entries using `f(i,i)`, replacing previous implicit symmetric mirroring or default values. Documentation has been updated to clarify that diagonals represent self-comparison weights, ensuring accurate self-similarity calculations. Additionally, the obikmer crate version has been bumped to 1.1.34.
This commit is contained in:
Eric Coissac
2026-07-03 13:01:39 +02:00
parent 67b4e4da53
commit c3ea10afeb
3 changed files with 19 additions and 5 deletions
+1 -1
View File
@@ -1704,7 +1704,7 @@ dependencies = [
[[package]]
name = "obikmer"
version = "1.1.33"
version = "1.1.34"
dependencies = [
"clap",
"csv",
+17 -3
View File
@@ -500,17 +500,26 @@ where T: Clone + Default {
}
/// Compute a symmetric `n×n` matrix in parallel by evaluating `f(i,j)` for
/// all upper-triangle pairs. `T: Copy` avoids the `.clone()` needed for the
/// lower-triangle mirror.
/// all upper-triangle pairs, plus `f(i,i)` for the diagonal. `T: Copy` avoids
/// the `.clone()` needed for the lower-triangle mirror.
///
/// The diagonal is *not* generally `T::default()`: for a self-comparison,
/// `f(i,i)` is often the column's own weight (e.g. intersection-with-self —
/// see `pairwise2_matrix`), not zero. Distance finalisations that need a
/// zero diagonal (self-distance) already overwrite it explicitly.
pub(crate) fn pairwise_matrix<T>(n: usize, f: impl Fn(usize, usize) -> T + Sync) -> Array2<T>
where T: Copy + Default + Send {
let results: Vec<(usize, usize, T)> = upper_pairs(n)
.into_par_iter().map(|(i, j)| (i, j, f(i, j))).collect();
fill_symmetric(n, results.into_iter().map(|(i, j, v)| (i, j, v, v)))
let mut m = fill_symmetric(n, results.into_iter().map(|(i, j, v)| (i, j, v, v)));
for i in 0..n { m[[i, i]] = f(i, i); }
m
}
/// Same as `pairwise_matrix` but `f` returns two values that fill two
/// symmetric matrices simultaneously (e.g. intersection + union for Jaccard).
/// The diagonal is `f(i,i)` (e.g. a genome's kmer count intersected with
/// itself), not `T::default()` — see `pairwise_matrix` for why that matters.
pub(crate) fn pairwise2_matrix<T>(n: usize, f: impl Fn(usize, usize) -> (T, T) + Sync) -> (Array2<T>, Array2<T>)
where T: Copy + Default + Send {
let results: Vec<(usize, usize, T, T)> = upper_pairs(n)
@@ -523,5 +532,10 @@ where T: Copy + Default + Send {
m0[[i, j]] = a; m0[[j, i]] = a;
m1[[i, j]] = b; m1[[j, i]] = b;
}
for i in 0..n {
let (a, b) = f(i, i);
m0[[i, i]] = a;
m1[[i, i]] = b;
}
(m0, m1)
}
+1 -1
View File
@@ -1,6 +1,6 @@
[package]
name = "obikmer"
version = "1.1.33"
version = "1.1.34"
edition = "2024"
[[bin]]