feat: add parallel distance matrix computation for bit and int matrices
Introduce parallel distance matrix generation using `ndarray` and `rayon` for both `BitMatrix` and `IntMatrix`. Adds full and additive-partial variants for Jaccard, Hamming, Bray-Curtis, Euclidean, and Hellinger metrics. Includes comprehensive unit tests verifying matrix symmetry, zero diagonals, and numerical correctness against pairwise calculations.
This commit is contained in:
Generated
+42
@@ -1460,6 +1460,16 @@ dependencies = [
|
||||
"regex-automata",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "matrixmultiply"
|
||||
version = "0.3.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a06de3016e9fae57a36fd14dba131fccf49f74b40b7fbdb472f96e361ec71a08"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"rawpointer",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mem_dbg"
|
||||
version = "0.3.4"
|
||||
@@ -1546,6 +1556,21 @@ version = "0.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "729eb334247daa1803e0a094d0a5c55711b85571179f5ec6e53eccfdf7008958"
|
||||
|
||||
[[package]]
|
||||
name = "ndarray"
|
||||
version = "0.16.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "882ed72dce9365842bf196bdeedf5055305f11fc8c03dee7bb0194a6cad34841"
|
||||
dependencies = [
|
||||
"matrixmultiply",
|
||||
"num-complex",
|
||||
"num-integer",
|
||||
"num-traits",
|
||||
"portable-atomic",
|
||||
"portable-atomic-util",
|
||||
"rawpointer",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "niffler"
|
||||
version = "2.7.0"
|
||||
@@ -1646,6 +1671,15 @@ dependencies = [
|
||||
"itoa",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-integer"
|
||||
version = "0.1.46"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f"
|
||||
dependencies = [
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-traits"
|
||||
version = "0.2.19"
|
||||
@@ -1660,6 +1694,8 @@ name = "obicompactvec"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"memmap2",
|
||||
"ndarray",
|
||||
"rayon",
|
||||
"tempfile",
|
||||
]
|
||||
|
||||
@@ -2226,6 +2262,12 @@ dependencies = [
|
||||
"getrandom 0.3.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rawpointer"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3"
|
||||
|
||||
[[package]]
|
||||
name = "rayon"
|
||||
version = "1.12.0"
|
||||
|
||||
@@ -5,6 +5,8 @@ edition = "2024"
|
||||
|
||||
[dependencies]
|
||||
memmap2 = "0.9"
|
||||
ndarray = "0.16"
|
||||
rayon = "1"
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3"
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
use std::{fs, io, path::{Path, PathBuf}};
|
||||
|
||||
use ndarray::{Array2};
|
||||
use rayon::prelude::*;
|
||||
|
||||
use crate::bitvec::{PersistentBitVec, PersistentBitVecBuilder};
|
||||
use crate::meta::MatrixMeta;
|
||||
|
||||
@@ -28,8 +31,85 @@ impl PersistentBitMatrix {
|
||||
pub fn row(&self, slot: usize) -> Box<[bool]> {
|
||||
self.cols.iter().map(|c| c.get(slot)).collect()
|
||||
}
|
||||
|
||||
// ── Distance matrices ─────────────────────────────────────────────────────
|
||||
|
||||
pub fn jaccard_dist_matrix(&self) -> Array2<f64> {
|
||||
self.pairwise_f64(|i, j| self.col(i).jaccard_dist(self.col(j)))
|
||||
}
|
||||
|
||||
pub fn hamming_dist_matrix(&self) -> Array2<u64> {
|
||||
self.pairwise_u64(|i, j| self.col(i).hamming_dist(self.col(j)))
|
||||
}
|
||||
|
||||
// ── Partial matrices (additively decomposable across layers) ──────────────
|
||||
|
||||
/// Returns `(inter[n×n], union[n×n])`.
|
||||
/// Reduce across layers by element-wise addition before computing `1 - inter/union`.
|
||||
pub fn partial_jaccard_dist_matrix(&self) -> (Array2<u64>, Array2<u64>) {
|
||||
let n = self.n_cols();
|
||||
let results: Vec<(usize, usize, u64, u64)> = upper_pairs(n)
|
||||
.into_par_iter()
|
||||
.map(|(i, j)| {
|
||||
let (inter, union) = self.col(i).partial_jaccard_dist(self.col(j));
|
||||
(i, j, inter, union)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let mut inter_m = Array2::zeros((n, n));
|
||||
let mut union_m = Array2::zeros((n, n));
|
||||
for (i, j, inter, union) in results {
|
||||
inter_m[[i, j]] = inter; inter_m[[j, i]] = inter;
|
||||
union_m[[i, j]] = union; union_m[[j, i]] = union;
|
||||
}
|
||||
(inter_m, union_m)
|
||||
}
|
||||
|
||||
/// Returns `hamming[n×n]` (number of differing bits per pair).
|
||||
/// Additive across layers — reduce by element-wise addition.
|
||||
pub fn partial_hamming_dist_matrix(&self) -> Array2<u64> {
|
||||
self.pairwise_u64(|i, j| self.col(i).hamming_dist(self.col(j)))
|
||||
}
|
||||
|
||||
// ── Private helpers ───────────────────────────────────────────────────────
|
||||
|
||||
fn pairwise_f64(&self, f: impl Fn(usize, usize) -> f64 + Sync) -> Array2<f64> {
|
||||
let n = self.n_cols();
|
||||
let results: Vec<(usize, usize, f64)> = upper_pairs(n)
|
||||
.into_par_iter()
|
||||
.map(|(i, j)| (i, j, f(i, j)))
|
||||
.collect();
|
||||
fill_symmetric(n, results.into_iter().map(|(i, j, v)| (i, j, v, v)))
|
||||
}
|
||||
|
||||
fn pairwise_u64(&self, f: impl Fn(usize, usize) -> u64 + Sync) -> Array2<u64> {
|
||||
let n = self.n_cols();
|
||||
let results: Vec<(usize, usize, u64)> = upper_pairs(n)
|
||||
.into_par_iter()
|
||||
.map(|(i, j)| (i, j, f(i, j)))
|
||||
.collect();
|
||||
fill_symmetric(n, results.into_iter().map(|(i, j, v)| (i, j, v, v)))
|
||||
}
|
||||
}
|
||||
|
||||
fn upper_pairs(n: usize) -> Vec<(usize, usize)> {
|
||||
(0..n).flat_map(|i| (i + 1..n).map(move |j| (i, j))).collect()
|
||||
}
|
||||
|
||||
fn fill_symmetric<T>(n: usize, vals: impl Iterator<Item = (usize, usize, T, T)>) -> Array2<T>
|
||||
where
|
||||
T: Clone + Default,
|
||||
{
|
||||
let mut m = Array2::from_elem((n, n), T::default());
|
||||
for (i, j, vij, vji) in vals {
|
||||
m[[i, j]] = vij;
|
||||
m[[j, i]] = vji;
|
||||
}
|
||||
m
|
||||
}
|
||||
|
||||
// ── Builder ───────────────────────────────────────────────────────────────────
|
||||
|
||||
pub struct PersistentBitMatrixBuilder {
|
||||
dir: PathBuf,
|
||||
n: usize,
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
use std::{fs, io, path::{Path, PathBuf}};
|
||||
|
||||
use ndarray::{Array1, Array2};
|
||||
use rayon::prelude::*;
|
||||
|
||||
use crate::builder::PersistentCompactIntVecBuilder;
|
||||
use crate::meta::MatrixMeta;
|
||||
use crate::reader::PersistentCompactIntVec;
|
||||
@@ -29,8 +32,130 @@ impl PersistentCompactIntMatrix {
|
||||
pub fn row(&self, slot: usize) -> Box<[u32]> {
|
||||
self.cols.iter().map(|c| c.get(slot)).collect()
|
||||
}
|
||||
|
||||
// ── Distance matrices ─────────────────────────────────────────────────────
|
||||
|
||||
pub fn bray_dist_matrix(&self) -> Array2<f64> {
|
||||
self.pairwise(|i, j| self.col(i).bray_dist(self.col(j)))
|
||||
}
|
||||
|
||||
pub fn relfreq_bray_dist_matrix(&self) -> Array2<f64> {
|
||||
self.pairwise(|i, j| self.col(i).relfreq_bray_dist(self.col(j)))
|
||||
}
|
||||
|
||||
pub fn euclidean_dist_matrix(&self) -> Array2<f64> {
|
||||
self.pairwise(|i, j| self.col(i).euclidean_dist(self.col(j)))
|
||||
}
|
||||
|
||||
pub fn relfreq_euclidean_dist_matrix(&self) -> Array2<f64> {
|
||||
self.pairwise(|i, j| self.col(i).relfreq_euclidean_dist(self.col(j)))
|
||||
}
|
||||
|
||||
pub fn hellinger_dist_matrix(&self) -> Array2<f64> {
|
||||
self.pairwise(|i, j| self.col(i).hellinger_dist(self.col(j)))
|
||||
}
|
||||
|
||||
pub fn jaccard_dist_matrix(&self) -> Array2<f64> {
|
||||
self.pairwise(|i, j| self.col(i).jaccard_dist(self.col(j)))
|
||||
}
|
||||
|
||||
pub fn threshold_jaccard_dist_matrix(&self, threshold: u32) -> Array2<f64> {
|
||||
self.pairwise(|i, j| self.col(i).threshold_jaccard_dist(self.col(j), threshold))
|
||||
}
|
||||
|
||||
// ── Partial matrices (additively decomposable across layers) ──────────────
|
||||
|
||||
/// Returns `(sum_min[n×n], col_sums[n])`.
|
||||
/// `sum_min[i,j]` = Σ_slot min(col_i[slot], col_j[slot]).
|
||||
/// `col_sums[k]` = Σ_slot col_k[slot].
|
||||
/// Reduce across layers by element-wise addition before computing the final distance.
|
||||
pub fn partial_bray_dist_matrix(&self) -> (Array2<u64>, Array1<u64>) {
|
||||
let n = self.n_cols();
|
||||
|
||||
let col_sums: Vec<u64> = (0..n)
|
||||
.into_par_iter()
|
||||
.map(|i| self.col(i).sum())
|
||||
.collect();
|
||||
|
||||
let sum_min = self.pairwise_u64(|i, j| {
|
||||
self.col(i).partial_bray_dist(self.col(j)).0
|
||||
});
|
||||
|
||||
(sum_min, Array1::from_vec(col_sums))
|
||||
}
|
||||
|
||||
/// Returns sum of squared differences `[n×n]`.
|
||||
/// Reduce across layers by element-wise addition, then take `sqrt` for the final distance.
|
||||
pub fn partial_euclidean_dist_matrix(&self) -> Array2<f64> {
|
||||
self.pairwise(|i, j| self.col(i).partial_euclidean_dist(self.col(j)))
|
||||
}
|
||||
|
||||
/// Returns `(inter[n×n], union[n×n])` for threshold-Jaccard.
|
||||
/// Reduce across layers by element-wise addition before computing `1 - inter/union`.
|
||||
pub fn partial_threshold_jaccard_dist_matrix(
|
||||
&self,
|
||||
threshold: u32,
|
||||
) -> (Array2<u64>, Array2<u64>) {
|
||||
let n = self.n_cols();
|
||||
let pairs = upper_pairs(n);
|
||||
|
||||
let results: Vec<(usize, usize, u64, u64)> = pairs
|
||||
.into_par_iter()
|
||||
.map(|(i, j)| {
|
||||
let (inter, union) =
|
||||
self.col(i).partial_threshold_jaccard_dist(self.col(j), threshold);
|
||||
(i, j, inter, union)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let mut inter_m = Array2::zeros((n, n));
|
||||
let mut union_m = Array2::zeros((n, n));
|
||||
for (i, j, inter, union) in results {
|
||||
inter_m[[i, j]] = inter; inter_m[[j, i]] = inter;
|
||||
union_m[[i, j]] = union; union_m[[j, i]] = union;
|
||||
}
|
||||
(inter_m, union_m)
|
||||
}
|
||||
|
||||
// ── Private helpers ───────────────────────────────────────────────────────
|
||||
|
||||
fn pairwise(&self, f: impl Fn(usize, usize) -> f64 + Sync) -> Array2<f64> {
|
||||
let n = self.n_cols();
|
||||
let results: Vec<(usize, usize, f64)> = upper_pairs(n)
|
||||
.into_par_iter()
|
||||
.map(|(i, j)| (i, j, f(i, j)))
|
||||
.collect();
|
||||
fill_symmetric(n, results.into_iter().map(|(i, j, v)| (i, j, v, v)))
|
||||
}
|
||||
|
||||
fn pairwise_u64(&self, f: impl Fn(usize, usize) -> u64 + Sync) -> Array2<u64> {
|
||||
let n = self.n_cols();
|
||||
let results: Vec<(usize, usize, u64)> = upper_pairs(n)
|
||||
.into_par_iter()
|
||||
.map(|(i, j)| (i, j, f(i, j)))
|
||||
.collect();
|
||||
fill_symmetric(n, results.into_iter().map(|(i, j, v)| (i, j, v, v)))
|
||||
}
|
||||
}
|
||||
|
||||
fn upper_pairs(n: usize) -> Vec<(usize, usize)> {
|
||||
(0..n).flat_map(|i| (i + 1..n).map(move |j| (i, j))).collect()
|
||||
}
|
||||
|
||||
fn fill_symmetric<T>(n: usize, vals: impl Iterator<Item = (usize, usize, T, T)>) -> Array2<T>
|
||||
where
|
||||
T: Clone + Default,
|
||||
{
|
||||
let mut m = Array2::from_elem((n, n), T::default());
|
||||
for (i, j, vij, vji) in vals {
|
||||
m[[i, j]] = vij;
|
||||
m[[j, i]] = vji;
|
||||
}
|
||||
m
|
||||
}
|
||||
|
||||
// ── Builder ───────────────────────────────────────────────────────────────────
|
||||
|
||||
pub struct PersistentCompactIntMatrixBuilder {
|
||||
dir: PathBuf,
|
||||
n: usize,
|
||||
|
||||
@@ -2,6 +2,22 @@ use tempfile::tempdir;
|
||||
|
||||
use crate::{PersistentBitMatrix, PersistentBitMatrixBuilder};
|
||||
|
||||
fn make_matrix(cols: &[&[bool]]) -> (tempfile::TempDir, PersistentBitMatrix) {
|
||||
let n = cols.first().map_or(0, |c| c.len());
|
||||
let dir = tempdir().unwrap();
|
||||
let mut b = PersistentBitMatrixBuilder::new(n, dir.path()).unwrap();
|
||||
for &col in cols {
|
||||
let mut cb = b.add_col().unwrap();
|
||||
for (slot, &v) in col.iter().enumerate() {
|
||||
cb.set(slot, v);
|
||||
}
|
||||
cb.close().unwrap();
|
||||
}
|
||||
b.close().unwrap();
|
||||
let m = PersistentBitMatrix::open(dir.path()).unwrap();
|
||||
(dir, m)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn single_col_roundtrip() {
|
||||
let dir = tempdir().unwrap();
|
||||
@@ -67,3 +83,102 @@ fn zero_cols_roundtrip() {
|
||||
assert_eq!(m.n_cols(), 0);
|
||||
assert_eq!(m.n(), 8);
|
||||
}
|
||||
|
||||
// ── Distance matrix tests ─────────────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn jaccard_dist_matrix_symmetry_and_diagonal() {
|
||||
let (_d, m) = make_matrix(&[
|
||||
&[true, false, true],
|
||||
&[true, true, false],
|
||||
&[false, true, true],
|
||||
]);
|
||||
let dm = m.jaccard_dist_matrix();
|
||||
let n = m.n_cols();
|
||||
for i in 0..n { assert_eq!(dm[[i, i]], 0.0, "diagonal"); }
|
||||
for i in 0..n {
|
||||
for j in 0..n {
|
||||
assert!((dm[[i, j]] - dm[[j, i]]).abs() < 1e-12, "symmetry [{i},{j}]");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaccard_dist_matrix_values_match_pairwise() {
|
||||
let (_d, m) = make_matrix(&[
|
||||
&[true, false, true],
|
||||
&[true, true, false],
|
||||
&[false, true, true],
|
||||
]);
|
||||
let dm = m.jaccard_dist_matrix();
|
||||
for i in 0..m.n_cols() {
|
||||
for j in 0..m.n_cols() {
|
||||
let expected = m.col(i).jaccard_dist(m.col(j));
|
||||
assert!((dm[[i, j]] - expected).abs() < 1e-12, "[{i},{j}]");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hamming_dist_matrix_symmetry_and_diagonal() {
|
||||
let (_d, m) = make_matrix(&[
|
||||
&[true, false, true],
|
||||
&[true, true, false],
|
||||
&[false, true, true],
|
||||
]);
|
||||
let dm = m.hamming_dist_matrix();
|
||||
let n = m.n_cols();
|
||||
for i in 0..n { assert_eq!(dm[[i, i]], 0, "diagonal"); }
|
||||
for i in 0..n {
|
||||
for j in 0..n {
|
||||
assert_eq!(dm[[i, j]], dm[[j, i]], "symmetry [{i},{j}]");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hamming_dist_matrix_values_match_pairwise() {
|
||||
let (_d, m) = make_matrix(&[
|
||||
&[true, false, true],
|
||||
&[true, true, false],
|
||||
&[false, true, true],
|
||||
]);
|
||||
let dm = m.hamming_dist_matrix();
|
||||
for i in 0..m.n_cols() {
|
||||
for j in 0..m.n_cols() {
|
||||
assert_eq!(dm[[i, j]], m.col(i).hamming_dist(m.col(j)), "[{i},{j}]");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn partial_jaccard_consistent() {
|
||||
let (_d, m) = make_matrix(&[
|
||||
&[true, false, true, true],
|
||||
&[true, true, false, true],
|
||||
&[false, true, true, false],
|
||||
]);
|
||||
let (inter, union) = m.partial_jaccard_dist_matrix();
|
||||
let n = m.n_cols();
|
||||
for i in 0..n {
|
||||
for j in i + 1..n {
|
||||
let (ei, eu) = m.col(i).partial_jaccard_dist(m.col(j));
|
||||
assert_eq!(inter[[i, j]], ei, "inter [{i},{j}]");
|
||||
assert_eq!(union[[i, j]], eu, "union [{i},{j}]");
|
||||
assert_eq!(inter[[j, i]], ei, "inter symmetry");
|
||||
assert_eq!(union[[j, i]], eu, "union symmetry");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn partial_hamming_matches_hamming() {
|
||||
let (_d, m) = make_matrix(&[
|
||||
&[true, false, true],
|
||||
&[false, true, true],
|
||||
&[true, true, false],
|
||||
]);
|
||||
let partial = m.partial_hamming_dist_matrix();
|
||||
let full = m.hamming_dist_matrix();
|
||||
assert_eq!(partial, full);
|
||||
}
|
||||
|
||||
@@ -2,6 +2,22 @@ use tempfile::tempdir;
|
||||
|
||||
use crate::{PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder};
|
||||
|
||||
fn make_matrix(cols: &[&[u32]]) -> (tempfile::TempDir, PersistentCompactIntMatrix) {
|
||||
let n = cols.first().map_or(0, |c| c.len());
|
||||
let dir = tempdir().unwrap();
|
||||
let mut b = PersistentCompactIntMatrixBuilder::new(n, dir.path()).unwrap();
|
||||
for &col in cols {
|
||||
let mut cb = b.add_col().unwrap();
|
||||
for (slot, &v) in col.iter().enumerate() {
|
||||
cb.set(slot, v);
|
||||
}
|
||||
cb.close().unwrap();
|
||||
}
|
||||
b.close().unwrap();
|
||||
let m = PersistentCompactIntMatrix::open(dir.path()).unwrap();
|
||||
(dir, m)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn single_col_roundtrip() {
|
||||
let dir = tempdir().unwrap();
|
||||
@@ -66,3 +82,111 @@ fn zero_cols_roundtrip() {
|
||||
assert_eq!(m.n_cols(), 0);
|
||||
assert_eq!(m.n(), 10);
|
||||
}
|
||||
|
||||
// ── Distance matrix tests ─────────────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn bray_dist_matrix_symmetry_and_diagonal() {
|
||||
// col0=[1,0,1], col1=[1,1,0], col2=[0,1,1]
|
||||
let (_d, m) = make_matrix(&[&[1, 0, 1], &[1, 1, 0], &[0, 1, 1]]);
|
||||
let dm = m.bray_dist_matrix();
|
||||
let n = m.n_cols();
|
||||
for i in 0..n { assert_eq!(dm[[i, i]], 0.0, "diagonal"); }
|
||||
for i in 0..n {
|
||||
for j in 0..n {
|
||||
assert!((dm[[i, j]] - dm[[j, i]]).abs() < 1e-12, "symmetry");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bray_dist_matrix_values_match_pairwise() {
|
||||
let (_d, m) = make_matrix(&[&[1, 0, 1], &[1, 1, 0], &[0, 1, 1]]);
|
||||
let dm = m.bray_dist_matrix();
|
||||
for i in 0..m.n_cols() {
|
||||
for j in 0..m.n_cols() {
|
||||
let expected = m.col(i).bray_dist(m.col(j));
|
||||
assert!((dm[[i, j]] - expected).abs() < 1e-12, "[{i},{j}]");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jaccard_dist_matrix_values_match_pairwise() {
|
||||
let (_d, m) = make_matrix(&[&[1, 0, 2], &[0, 1, 1], &[1, 1, 0]]);
|
||||
let dm = m.jaccard_dist_matrix();
|
||||
for i in 0..m.n_cols() {
|
||||
for j in 0..m.n_cols() {
|
||||
let expected = m.col(i).jaccard_dist(m.col(j));
|
||||
assert!((dm[[i, j]] - expected).abs() < 1e-12, "[{i},{j}]");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn partial_bray_dist_matrix_consistent() {
|
||||
let (_d, m) = make_matrix(&[&[1, 0, 1], &[1, 1, 0], &[0, 1, 1]]);
|
||||
let (sum_min, col_sums) = m.partial_bray_dist_matrix();
|
||||
let n = m.n_cols();
|
||||
|
||||
// symmetry of sum_min
|
||||
for i in 0..n {
|
||||
for j in 0..n {
|
||||
assert_eq!(sum_min[[i, j]], sum_min[[j, i]]);
|
||||
}
|
||||
}
|
||||
|
||||
// col_sums correct
|
||||
for k in 0..n {
|
||||
assert_eq!(col_sums[k], m.col(k).sum());
|
||||
}
|
||||
|
||||
// reconstruct distance from partials and compare to direct method
|
||||
for i in 0..n {
|
||||
for j in i + 1..n {
|
||||
let denom = col_sums[i] + col_sums[j];
|
||||
let dist = if denom == 0 { 0.0 } else { 1.0 - 2.0 * sum_min[[i, j]] as f64 / denom as f64 };
|
||||
let expected = m.col(i).bray_dist(m.col(j));
|
||||
assert!((dist - expected).abs() < 1e-12, "[{i},{j}]");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn partial_euclidean_dist_matrix_consistent() {
|
||||
let (_d, m) = make_matrix(&[&[3, 0], &[0, 4], &[1, 1]]);
|
||||
let sq = m.partial_euclidean_dist_matrix();
|
||||
let n = m.n_cols();
|
||||
for i in 0..n {
|
||||
for j in i + 1..n {
|
||||
let expected = m.col(i).partial_euclidean_dist(m.col(j));
|
||||
assert!((sq[[i, j]] - expected).abs() < 1e-12, "[{i},{j}]");
|
||||
assert!((sq[[j, i]] - expected).abs() < 1e-12, "symmetry [{j},{i}]");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn partial_threshold_jaccard_consistent() {
|
||||
let (_d, m) = make_matrix(&[&[3, 0, 2], &[1, 2, 0], &[0, 3, 1]]);
|
||||
let threshold = 2u32;
|
||||
let (inter, union) = m.partial_threshold_jaccard_dist_matrix(threshold);
|
||||
let n = m.n_cols();
|
||||
for i in 0..n {
|
||||
for j in i + 1..n {
|
||||
let (ei, eu) = m.col(i).partial_threshold_jaccard_dist(m.col(j), threshold);
|
||||
assert_eq!(inter[[i, j]], ei);
|
||||
assert_eq!(union[[i, j]], eu);
|
||||
assert_eq!(inter[[j, i]], ei, "symmetry inter");
|
||||
assert_eq!(union[[j, i]], eu, "symmetry union");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn single_col_distance_matrix_is_zero() {
|
||||
let (_d, m) = make_matrix(&[&[1, 2, 3]]);
|
||||
let dm = m.bray_dist_matrix();
|
||||
assert_eq!(dm.shape(), &[1, 1]);
|
||||
assert_eq!(dm[[0, 0]], 0.0);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user