fix(bitmatrix): explicitly compute diagonal entries for self-similarity
The pairwise matrix functions now explicitly calculate and overwrite diagonal entries using `f(i,i)`, replacing previous implicit symmetric mirroring or default values. Documentation has been updated to clarify that diagonals represent self-comparison weights, ensuring accurate self-similarity calculations. Additionally, the obikmer crate version has been bumped to 1.1.34.
This commit is contained in:
Generated
+1
-1
@@ -1704,7 +1704,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "obikmer"
|
||||
version = "1.1.33"
|
||||
version = "1.1.34"
|
||||
dependencies = [
|
||||
"clap",
|
||||
"csv",
|
||||
|
||||
@@ -500,17 +500,26 @@ where T: Clone + Default {
|
||||
}
|
||||
|
||||
/// Compute a symmetric `n×n` matrix in parallel by evaluating `f(i,j)` for
|
||||
/// all upper-triangle pairs. `T: Copy` avoids the `.clone()` needed for the
|
||||
/// lower-triangle mirror.
|
||||
/// all upper-triangle pairs, plus `f(i,i)` for the diagonal. `T: Copy` avoids
|
||||
/// the `.clone()` needed for the lower-triangle mirror.
|
||||
///
|
||||
/// The diagonal is *not* generally `T::default()`: for a self-comparison,
|
||||
/// `f(i,i)` is often the column's own weight (e.g. intersection-with-self —
|
||||
/// see `pairwise2_matrix`), not zero. Distance finalisations that need a
|
||||
/// zero diagonal (self-distance) already overwrite it explicitly.
|
||||
pub(crate) fn pairwise_matrix<T>(n: usize, f: impl Fn(usize, usize) -> T + Sync) -> Array2<T>
|
||||
where T: Copy + Default + Send {
|
||||
let results: Vec<(usize, usize, T)> = upper_pairs(n)
|
||||
.into_par_iter().map(|(i, j)| (i, j, f(i, j))).collect();
|
||||
fill_symmetric(n, results.into_iter().map(|(i, j, v)| (i, j, v, v)))
|
||||
let mut m = fill_symmetric(n, results.into_iter().map(|(i, j, v)| (i, j, v, v)));
|
||||
for i in 0..n { m[[i, i]] = f(i, i); }
|
||||
m
|
||||
}
|
||||
|
||||
/// Same as `pairwise_matrix` but `f` returns two values that fill two
|
||||
/// symmetric matrices simultaneously (e.g. intersection + union for Jaccard).
|
||||
/// The diagonal is `f(i,i)` (e.g. a genome's kmer count intersected with
|
||||
/// itself), not `T::default()` — see `pairwise_matrix` for why that matters.
|
||||
pub(crate) fn pairwise2_matrix<T>(n: usize, f: impl Fn(usize, usize) -> (T, T) + Sync) -> (Array2<T>, Array2<T>)
|
||||
where T: Copy + Default + Send {
|
||||
let results: Vec<(usize, usize, T, T)> = upper_pairs(n)
|
||||
@@ -523,5 +532,10 @@ where T: Copy + Default + Send {
|
||||
m0[[i, j]] = a; m0[[j, i]] = a;
|
||||
m1[[i, j]] = b; m1[[j, i]] = b;
|
||||
}
|
||||
for i in 0..n {
|
||||
let (a, b) = f(i, i);
|
||||
m0[[i, i]] = a;
|
||||
m1[[i, i]] = b;
|
||||
}
|
||||
(m0, m1)
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "obikmer"
|
||||
version = "1.1.33"
|
||||
version = "1.1.34"
|
||||
edition = "2024"
|
||||
|
||||
[[bin]]
|
||||
|
||||
Reference in New Issue
Block a user