diff --git a/src/Cargo.lock b/src/Cargo.lock index 129707c..057ddea 100644 --- a/src/Cargo.lock +++ b/src/Cargo.lock @@ -1704,7 +1704,7 @@ dependencies = [ [[package]] name = "obikmer" -version = "1.1.33" +version = "1.1.34" dependencies = [ "clap", "csv", diff --git a/src/obicompactvec/src/bitmatrix.rs b/src/obicompactvec/src/bitmatrix.rs index 72f8b05..89121da 100644 --- a/src/obicompactvec/src/bitmatrix.rs +++ b/src/obicompactvec/src/bitmatrix.rs @@ -500,17 +500,26 @@ where T: Clone + Default { } /// Compute a symmetric `n×n` matrix in parallel by evaluating `f(i,j)` for -/// all upper-triangle pairs. `T: Copy` avoids the `.clone()` needed for the -/// lower-triangle mirror. +/// all upper-triangle pairs, plus `f(i,i)` for the diagonal. `T: Copy` avoids +/// the `.clone()` needed for the lower-triangle mirror. +/// +/// The diagonal is *not* generally `T::default()`: for a self-comparison, +/// `f(i,i)` is often the column's own weight (e.g. intersection-with-self — +/// see `pairwise2_matrix`), not zero. Distance finalisations that need a +/// zero diagonal (self-distance) already overwrite it explicitly. pub(crate) fn pairwise_matrix(n: usize, f: impl Fn(usize, usize) -> T + Sync) -> Array2 where T: Copy + Default + Send { let results: Vec<(usize, usize, T)> = upper_pairs(n) .into_par_iter().map(|(i, j)| (i, j, f(i, j))).collect(); - fill_symmetric(n, results.into_iter().map(|(i, j, v)| (i, j, v, v))) + let mut m = fill_symmetric(n, results.into_iter().map(|(i, j, v)| (i, j, v, v))); + for i in 0..n { m[[i, i]] = f(i, i); } + m } /// Same as `pairwise_matrix` but `f` returns two values that fill two /// symmetric matrices simultaneously (e.g. intersection + union for Jaccard). +/// The diagonal is `f(i,i)` (e.g. a genome's kmer count intersected with +/// itself), not `T::default()` — see `pairwise_matrix` for why that matters. pub(crate) fn pairwise2_matrix(n: usize, f: impl Fn(usize, usize) -> (T, T) + Sync) -> (Array2, Array2) where T: Copy + Default + Send { let results: Vec<(usize, usize, T, T)> = upper_pairs(n) @@ -523,5 +532,10 @@ where T: Copy + Default + Send { m0[[i, j]] = a; m0[[j, i]] = a; m1[[i, j]] = b; m1[[j, i]] = b; } + for i in 0..n { + let (a, b) = f(i, i); + m0[[i, i]] = a; + m1[[i, i]] = b; + } (m0, m1) } diff --git a/src/obikmer/Cargo.toml b/src/obikmer/Cargo.toml index 5278946..b164593 100644 --- a/src/obikmer/Cargo.toml +++ b/src/obikmer/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "obikmer" -version = "1.1.33" +version = "1.1.34" edition = "2024" [[bin]]