feat: add parallel distance matrix computation for bit and int matrices
Introduce parallel distance matrix generation using `ndarray` and `rayon` for both `BitMatrix` and `IntMatrix`. Adds full and additive-partial variants for Jaccard, Hamming, Bray-Curtis, Euclidean, and Hellinger metrics. Includes comprehensive unit tests verifying matrix symmetry, zero diagonals, and numerical correctness against pairwise calculations.
This commit is contained in:
Generated
+42
@@ -1460,6 +1460,16 @@ dependencies = [
|
|||||||
"regex-automata",
|
"regex-automata",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "matrixmultiply"
|
||||||
|
version = "0.3.10"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a06de3016e9fae57a36fd14dba131fccf49f74b40b7fbdb472f96e361ec71a08"
|
||||||
|
dependencies = [
|
||||||
|
"autocfg",
|
||||||
|
"rawpointer",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "mem_dbg"
|
name = "mem_dbg"
|
||||||
version = "0.3.4"
|
version = "0.3.4"
|
||||||
@@ -1546,6 +1556,21 @@ version = "0.6.1"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "729eb334247daa1803e0a094d0a5c55711b85571179f5ec6e53eccfdf7008958"
|
checksum = "729eb334247daa1803e0a094d0a5c55711b85571179f5ec6e53eccfdf7008958"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ndarray"
|
||||||
|
version = "0.16.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "882ed72dce9365842bf196bdeedf5055305f11fc8c03dee7bb0194a6cad34841"
|
||||||
|
dependencies = [
|
||||||
|
"matrixmultiply",
|
||||||
|
"num-complex",
|
||||||
|
"num-integer",
|
||||||
|
"num-traits",
|
||||||
|
"portable-atomic",
|
||||||
|
"portable-atomic-util",
|
||||||
|
"rawpointer",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "niffler"
|
name = "niffler"
|
||||||
version = "2.7.0"
|
version = "2.7.0"
|
||||||
@@ -1646,6 +1671,15 @@ dependencies = [
|
|||||||
"itoa",
|
"itoa",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "num-integer"
|
||||||
|
version = "0.1.46"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f"
|
||||||
|
dependencies = [
|
||||||
|
"num-traits",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "num-traits"
|
name = "num-traits"
|
||||||
version = "0.2.19"
|
version = "0.2.19"
|
||||||
@@ -1660,6 +1694,8 @@ name = "obicompactvec"
|
|||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"memmap2",
|
"memmap2",
|
||||||
|
"ndarray",
|
||||||
|
"rayon",
|
||||||
"tempfile",
|
"tempfile",
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -2226,6 +2262,12 @@ dependencies = [
|
|||||||
"getrandom 0.3.4",
|
"getrandom 0.3.4",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rawpointer"
|
||||||
|
version = "0.2.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rayon"
|
name = "rayon"
|
||||||
version = "1.12.0"
|
version = "1.12.0"
|
||||||
|
|||||||
@@ -5,6 +5,8 @@ edition = "2024"
|
|||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
memmap2 = "0.9"
|
memmap2 = "0.9"
|
||||||
|
ndarray = "0.16"
|
||||||
|
rayon = "1"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
tempfile = "3"
|
tempfile = "3"
|
||||||
|
|||||||
@@ -1,5 +1,8 @@
|
|||||||
use std::{fs, io, path::{Path, PathBuf}};
|
use std::{fs, io, path::{Path, PathBuf}};
|
||||||
|
|
||||||
|
use ndarray::{Array2};
|
||||||
|
use rayon::prelude::*;
|
||||||
|
|
||||||
use crate::bitvec::{PersistentBitVec, PersistentBitVecBuilder};
|
use crate::bitvec::{PersistentBitVec, PersistentBitVecBuilder};
|
||||||
use crate::meta::MatrixMeta;
|
use crate::meta::MatrixMeta;
|
||||||
|
|
||||||
@@ -28,8 +31,85 @@ impl PersistentBitMatrix {
|
|||||||
pub fn row(&self, slot: usize) -> Box<[bool]> {
|
pub fn row(&self, slot: usize) -> Box<[bool]> {
|
||||||
self.cols.iter().map(|c| c.get(slot)).collect()
|
self.cols.iter().map(|c| c.get(slot)).collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Distance matrices ─────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
pub fn jaccard_dist_matrix(&self) -> Array2<f64> {
|
||||||
|
self.pairwise_f64(|i, j| self.col(i).jaccard_dist(self.col(j)))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn hamming_dist_matrix(&self) -> Array2<u64> {
|
||||||
|
self.pairwise_u64(|i, j| self.col(i).hamming_dist(self.col(j)))
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Partial matrices (additively decomposable across layers) ──────────────
|
||||||
|
|
||||||
|
/// Returns `(inter[n×n], union[n×n])`.
|
||||||
|
/// Reduce across layers by element-wise addition before computing `1 - inter/union`.
|
||||||
|
pub fn partial_jaccard_dist_matrix(&self) -> (Array2<u64>, Array2<u64>) {
|
||||||
|
let n = self.n_cols();
|
||||||
|
let results: Vec<(usize, usize, u64, u64)> = upper_pairs(n)
|
||||||
|
.into_par_iter()
|
||||||
|
.map(|(i, j)| {
|
||||||
|
let (inter, union) = self.col(i).partial_jaccard_dist(self.col(j));
|
||||||
|
(i, j, inter, union)
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let mut inter_m = Array2::zeros((n, n));
|
||||||
|
let mut union_m = Array2::zeros((n, n));
|
||||||
|
for (i, j, inter, union) in results {
|
||||||
|
inter_m[[i, j]] = inter; inter_m[[j, i]] = inter;
|
||||||
|
union_m[[i, j]] = union; union_m[[j, i]] = union;
|
||||||
|
}
|
||||||
|
(inter_m, union_m)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns `hamming[n×n]` (number of differing bits per pair).
|
||||||
|
/// Additive across layers — reduce by element-wise addition.
|
||||||
|
pub fn partial_hamming_dist_matrix(&self) -> Array2<u64> {
|
||||||
|
self.pairwise_u64(|i, j| self.col(i).hamming_dist(self.col(j)))
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Private helpers ───────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
fn pairwise_f64(&self, f: impl Fn(usize, usize) -> f64 + Sync) -> Array2<f64> {
|
||||||
|
let n = self.n_cols();
|
||||||
|
let results: Vec<(usize, usize, f64)> = upper_pairs(n)
|
||||||
|
.into_par_iter()
|
||||||
|
.map(|(i, j)| (i, j, f(i, j)))
|
||||||
|
.collect();
|
||||||
|
fill_symmetric(n, results.into_iter().map(|(i, j, v)| (i, j, v, v)))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn pairwise_u64(&self, f: impl Fn(usize, usize) -> u64 + Sync) -> Array2<u64> {
|
||||||
|
let n = self.n_cols();
|
||||||
|
let results: Vec<(usize, usize, u64)> = upper_pairs(n)
|
||||||
|
.into_par_iter()
|
||||||
|
.map(|(i, j)| (i, j, f(i, j)))
|
||||||
|
.collect();
|
||||||
|
fill_symmetric(n, results.into_iter().map(|(i, j, v)| (i, j, v, v)))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn upper_pairs(n: usize) -> Vec<(usize, usize)> {
|
||||||
|
(0..n).flat_map(|i| (i + 1..n).map(move |j| (i, j))).collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn fill_symmetric<T>(n: usize, vals: impl Iterator<Item = (usize, usize, T, T)>) -> Array2<T>
|
||||||
|
where
|
||||||
|
T: Clone + Default,
|
||||||
|
{
|
||||||
|
let mut m = Array2::from_elem((n, n), T::default());
|
||||||
|
for (i, j, vij, vji) in vals {
|
||||||
|
m[[i, j]] = vij;
|
||||||
|
m[[j, i]] = vji;
|
||||||
|
}
|
||||||
|
m
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Builder ───────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
pub struct PersistentBitMatrixBuilder {
|
pub struct PersistentBitMatrixBuilder {
|
||||||
dir: PathBuf,
|
dir: PathBuf,
|
||||||
n: usize,
|
n: usize,
|
||||||
|
|||||||
@@ -1,5 +1,8 @@
|
|||||||
use std::{fs, io, path::{Path, PathBuf}};
|
use std::{fs, io, path::{Path, PathBuf}};
|
||||||
|
|
||||||
|
use ndarray::{Array1, Array2};
|
||||||
|
use rayon::prelude::*;
|
||||||
|
|
||||||
use crate::builder::PersistentCompactIntVecBuilder;
|
use crate::builder::PersistentCompactIntVecBuilder;
|
||||||
use crate::meta::MatrixMeta;
|
use crate::meta::MatrixMeta;
|
||||||
use crate::reader::PersistentCompactIntVec;
|
use crate::reader::PersistentCompactIntVec;
|
||||||
@@ -29,8 +32,130 @@ impl PersistentCompactIntMatrix {
|
|||||||
pub fn row(&self, slot: usize) -> Box<[u32]> {
|
pub fn row(&self, slot: usize) -> Box<[u32]> {
|
||||||
self.cols.iter().map(|c| c.get(slot)).collect()
|
self.cols.iter().map(|c| c.get(slot)).collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Distance matrices ─────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
pub fn bray_dist_matrix(&self) -> Array2<f64> {
|
||||||
|
self.pairwise(|i, j| self.col(i).bray_dist(self.col(j)))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn relfreq_bray_dist_matrix(&self) -> Array2<f64> {
|
||||||
|
self.pairwise(|i, j| self.col(i).relfreq_bray_dist(self.col(j)))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn euclidean_dist_matrix(&self) -> Array2<f64> {
|
||||||
|
self.pairwise(|i, j| self.col(i).euclidean_dist(self.col(j)))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn relfreq_euclidean_dist_matrix(&self) -> Array2<f64> {
|
||||||
|
self.pairwise(|i, j| self.col(i).relfreq_euclidean_dist(self.col(j)))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn hellinger_dist_matrix(&self) -> Array2<f64> {
|
||||||
|
self.pairwise(|i, j| self.col(i).hellinger_dist(self.col(j)))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn jaccard_dist_matrix(&self) -> Array2<f64> {
|
||||||
|
self.pairwise(|i, j| self.col(i).jaccard_dist(self.col(j)))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn threshold_jaccard_dist_matrix(&self, threshold: u32) -> Array2<f64> {
|
||||||
|
self.pairwise(|i, j| self.col(i).threshold_jaccard_dist(self.col(j), threshold))
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Partial matrices (additively decomposable across layers) ──────────────
|
||||||
|
|
||||||
|
/// Returns `(sum_min[n×n], col_sums[n])`.
|
||||||
|
/// `sum_min[i,j]` = Σ_slot min(col_i[slot], col_j[slot]).
|
||||||
|
/// `col_sums[k]` = Σ_slot col_k[slot].
|
||||||
|
/// Reduce across layers by element-wise addition before computing the final distance.
|
||||||
|
pub fn partial_bray_dist_matrix(&self) -> (Array2<u64>, Array1<u64>) {
|
||||||
|
let n = self.n_cols();
|
||||||
|
|
||||||
|
let col_sums: Vec<u64> = (0..n)
|
||||||
|
.into_par_iter()
|
||||||
|
.map(|i| self.col(i).sum())
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let sum_min = self.pairwise_u64(|i, j| {
|
||||||
|
self.col(i).partial_bray_dist(self.col(j)).0
|
||||||
|
});
|
||||||
|
|
||||||
|
(sum_min, Array1::from_vec(col_sums))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns sum of squared differences `[n×n]`.
|
||||||
|
/// Reduce across layers by element-wise addition, then take `sqrt` for the final distance.
|
||||||
|
pub fn partial_euclidean_dist_matrix(&self) -> Array2<f64> {
|
||||||
|
self.pairwise(|i, j| self.col(i).partial_euclidean_dist(self.col(j)))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns `(inter[n×n], union[n×n])` for threshold-Jaccard.
|
||||||
|
/// Reduce across layers by element-wise addition before computing `1 - inter/union`.
|
||||||
|
pub fn partial_threshold_jaccard_dist_matrix(
|
||||||
|
&self,
|
||||||
|
threshold: u32,
|
||||||
|
) -> (Array2<u64>, Array2<u64>) {
|
||||||
|
let n = self.n_cols();
|
||||||
|
let pairs = upper_pairs(n);
|
||||||
|
|
||||||
|
let results: Vec<(usize, usize, u64, u64)> = pairs
|
||||||
|
.into_par_iter()
|
||||||
|
.map(|(i, j)| {
|
||||||
|
let (inter, union) =
|
||||||
|
self.col(i).partial_threshold_jaccard_dist(self.col(j), threshold);
|
||||||
|
(i, j, inter, union)
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let mut inter_m = Array2::zeros((n, n));
|
||||||
|
let mut union_m = Array2::zeros((n, n));
|
||||||
|
for (i, j, inter, union) in results {
|
||||||
|
inter_m[[i, j]] = inter; inter_m[[j, i]] = inter;
|
||||||
|
union_m[[i, j]] = union; union_m[[j, i]] = union;
|
||||||
|
}
|
||||||
|
(inter_m, union_m)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Private helpers ───────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
fn pairwise(&self, f: impl Fn(usize, usize) -> f64 + Sync) -> Array2<f64> {
|
||||||
|
let n = self.n_cols();
|
||||||
|
let results: Vec<(usize, usize, f64)> = upper_pairs(n)
|
||||||
|
.into_par_iter()
|
||||||
|
.map(|(i, j)| (i, j, f(i, j)))
|
||||||
|
.collect();
|
||||||
|
fill_symmetric(n, results.into_iter().map(|(i, j, v)| (i, j, v, v)))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn pairwise_u64(&self, f: impl Fn(usize, usize) -> u64 + Sync) -> Array2<u64> {
|
||||||
|
let n = self.n_cols();
|
||||||
|
let results: Vec<(usize, usize, u64)> = upper_pairs(n)
|
||||||
|
.into_par_iter()
|
||||||
|
.map(|(i, j)| (i, j, f(i, j)))
|
||||||
|
.collect();
|
||||||
|
fill_symmetric(n, results.into_iter().map(|(i, j, v)| (i, j, v, v)))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn upper_pairs(n: usize) -> Vec<(usize, usize)> {
|
||||||
|
(0..n).flat_map(|i| (i + 1..n).map(move |j| (i, j))).collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn fill_symmetric<T>(n: usize, vals: impl Iterator<Item = (usize, usize, T, T)>) -> Array2<T>
|
||||||
|
where
|
||||||
|
T: Clone + Default,
|
||||||
|
{
|
||||||
|
let mut m = Array2::from_elem((n, n), T::default());
|
||||||
|
for (i, j, vij, vji) in vals {
|
||||||
|
m[[i, j]] = vij;
|
||||||
|
m[[j, i]] = vji;
|
||||||
|
}
|
||||||
|
m
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Builder ───────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
pub struct PersistentCompactIntMatrixBuilder {
|
pub struct PersistentCompactIntMatrixBuilder {
|
||||||
dir: PathBuf,
|
dir: PathBuf,
|
||||||
n: usize,
|
n: usize,
|
||||||
|
|||||||
@@ -2,6 +2,22 @@ use tempfile::tempdir;
|
|||||||
|
|
||||||
use crate::{PersistentBitMatrix, PersistentBitMatrixBuilder};
|
use crate::{PersistentBitMatrix, PersistentBitMatrixBuilder};
|
||||||
|
|
||||||
|
fn make_matrix(cols: &[&[bool]]) -> (tempfile::TempDir, PersistentBitMatrix) {
|
||||||
|
let n = cols.first().map_or(0, |c| c.len());
|
||||||
|
let dir = tempdir().unwrap();
|
||||||
|
let mut b = PersistentBitMatrixBuilder::new(n, dir.path()).unwrap();
|
||||||
|
for &col in cols {
|
||||||
|
let mut cb = b.add_col().unwrap();
|
||||||
|
for (slot, &v) in col.iter().enumerate() {
|
||||||
|
cb.set(slot, v);
|
||||||
|
}
|
||||||
|
cb.close().unwrap();
|
||||||
|
}
|
||||||
|
b.close().unwrap();
|
||||||
|
let m = PersistentBitMatrix::open(dir.path()).unwrap();
|
||||||
|
(dir, m)
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn single_col_roundtrip() {
|
fn single_col_roundtrip() {
|
||||||
let dir = tempdir().unwrap();
|
let dir = tempdir().unwrap();
|
||||||
@@ -67,3 +83,102 @@ fn zero_cols_roundtrip() {
|
|||||||
assert_eq!(m.n_cols(), 0);
|
assert_eq!(m.n_cols(), 0);
|
||||||
assert_eq!(m.n(), 8);
|
assert_eq!(m.n(), 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Distance matrix tests ─────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn jaccard_dist_matrix_symmetry_and_diagonal() {
|
||||||
|
let (_d, m) = make_matrix(&[
|
||||||
|
&[true, false, true],
|
||||||
|
&[true, true, false],
|
||||||
|
&[false, true, true],
|
||||||
|
]);
|
||||||
|
let dm = m.jaccard_dist_matrix();
|
||||||
|
let n = m.n_cols();
|
||||||
|
for i in 0..n { assert_eq!(dm[[i, i]], 0.0, "diagonal"); }
|
||||||
|
for i in 0..n {
|
||||||
|
for j in 0..n {
|
||||||
|
assert!((dm[[i, j]] - dm[[j, i]]).abs() < 1e-12, "symmetry [{i},{j}]");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn jaccard_dist_matrix_values_match_pairwise() {
|
||||||
|
let (_d, m) = make_matrix(&[
|
||||||
|
&[true, false, true],
|
||||||
|
&[true, true, false],
|
||||||
|
&[false, true, true],
|
||||||
|
]);
|
||||||
|
let dm = m.jaccard_dist_matrix();
|
||||||
|
for i in 0..m.n_cols() {
|
||||||
|
for j in 0..m.n_cols() {
|
||||||
|
let expected = m.col(i).jaccard_dist(m.col(j));
|
||||||
|
assert!((dm[[i, j]] - expected).abs() < 1e-12, "[{i},{j}]");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn hamming_dist_matrix_symmetry_and_diagonal() {
|
||||||
|
let (_d, m) = make_matrix(&[
|
||||||
|
&[true, false, true],
|
||||||
|
&[true, true, false],
|
||||||
|
&[false, true, true],
|
||||||
|
]);
|
||||||
|
let dm = m.hamming_dist_matrix();
|
||||||
|
let n = m.n_cols();
|
||||||
|
for i in 0..n { assert_eq!(dm[[i, i]], 0, "diagonal"); }
|
||||||
|
for i in 0..n {
|
||||||
|
for j in 0..n {
|
||||||
|
assert_eq!(dm[[i, j]], dm[[j, i]], "symmetry [{i},{j}]");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn hamming_dist_matrix_values_match_pairwise() {
|
||||||
|
let (_d, m) = make_matrix(&[
|
||||||
|
&[true, false, true],
|
||||||
|
&[true, true, false],
|
||||||
|
&[false, true, true],
|
||||||
|
]);
|
||||||
|
let dm = m.hamming_dist_matrix();
|
||||||
|
for i in 0..m.n_cols() {
|
||||||
|
for j in 0..m.n_cols() {
|
||||||
|
assert_eq!(dm[[i, j]], m.col(i).hamming_dist(m.col(j)), "[{i},{j}]");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn partial_jaccard_consistent() {
|
||||||
|
let (_d, m) = make_matrix(&[
|
||||||
|
&[true, false, true, true],
|
||||||
|
&[true, true, false, true],
|
||||||
|
&[false, true, true, false],
|
||||||
|
]);
|
||||||
|
let (inter, union) = m.partial_jaccard_dist_matrix();
|
||||||
|
let n = m.n_cols();
|
||||||
|
for i in 0..n {
|
||||||
|
for j in i + 1..n {
|
||||||
|
let (ei, eu) = m.col(i).partial_jaccard_dist(m.col(j));
|
||||||
|
assert_eq!(inter[[i, j]], ei, "inter [{i},{j}]");
|
||||||
|
assert_eq!(union[[i, j]], eu, "union [{i},{j}]");
|
||||||
|
assert_eq!(inter[[j, i]], ei, "inter symmetry");
|
||||||
|
assert_eq!(union[[j, i]], eu, "union symmetry");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn partial_hamming_matches_hamming() {
|
||||||
|
let (_d, m) = make_matrix(&[
|
||||||
|
&[true, false, true],
|
||||||
|
&[false, true, true],
|
||||||
|
&[true, true, false],
|
||||||
|
]);
|
||||||
|
let partial = m.partial_hamming_dist_matrix();
|
||||||
|
let full = m.hamming_dist_matrix();
|
||||||
|
assert_eq!(partial, full);
|
||||||
|
}
|
||||||
|
|||||||
@@ -2,6 +2,22 @@ use tempfile::tempdir;
|
|||||||
|
|
||||||
use crate::{PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder};
|
use crate::{PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder};
|
||||||
|
|
||||||
|
fn make_matrix(cols: &[&[u32]]) -> (tempfile::TempDir, PersistentCompactIntMatrix) {
|
||||||
|
let n = cols.first().map_or(0, |c| c.len());
|
||||||
|
let dir = tempdir().unwrap();
|
||||||
|
let mut b = PersistentCompactIntMatrixBuilder::new(n, dir.path()).unwrap();
|
||||||
|
for &col in cols {
|
||||||
|
let mut cb = b.add_col().unwrap();
|
||||||
|
for (slot, &v) in col.iter().enumerate() {
|
||||||
|
cb.set(slot, v);
|
||||||
|
}
|
||||||
|
cb.close().unwrap();
|
||||||
|
}
|
||||||
|
b.close().unwrap();
|
||||||
|
let m = PersistentCompactIntMatrix::open(dir.path()).unwrap();
|
||||||
|
(dir, m)
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn single_col_roundtrip() {
|
fn single_col_roundtrip() {
|
||||||
let dir = tempdir().unwrap();
|
let dir = tempdir().unwrap();
|
||||||
@@ -66,3 +82,111 @@ fn zero_cols_roundtrip() {
|
|||||||
assert_eq!(m.n_cols(), 0);
|
assert_eq!(m.n_cols(), 0);
|
||||||
assert_eq!(m.n(), 10);
|
assert_eq!(m.n(), 10);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Distance matrix tests ─────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn bray_dist_matrix_symmetry_and_diagonal() {
|
||||||
|
// col0=[1,0,1], col1=[1,1,0], col2=[0,1,1]
|
||||||
|
let (_d, m) = make_matrix(&[&[1, 0, 1], &[1, 1, 0], &[0, 1, 1]]);
|
||||||
|
let dm = m.bray_dist_matrix();
|
||||||
|
let n = m.n_cols();
|
||||||
|
for i in 0..n { assert_eq!(dm[[i, i]], 0.0, "diagonal"); }
|
||||||
|
for i in 0..n {
|
||||||
|
for j in 0..n {
|
||||||
|
assert!((dm[[i, j]] - dm[[j, i]]).abs() < 1e-12, "symmetry");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn bray_dist_matrix_values_match_pairwise() {
|
||||||
|
let (_d, m) = make_matrix(&[&[1, 0, 1], &[1, 1, 0], &[0, 1, 1]]);
|
||||||
|
let dm = m.bray_dist_matrix();
|
||||||
|
for i in 0..m.n_cols() {
|
||||||
|
for j in 0..m.n_cols() {
|
||||||
|
let expected = m.col(i).bray_dist(m.col(j));
|
||||||
|
assert!((dm[[i, j]] - expected).abs() < 1e-12, "[{i},{j}]");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn jaccard_dist_matrix_values_match_pairwise() {
|
||||||
|
let (_d, m) = make_matrix(&[&[1, 0, 2], &[0, 1, 1], &[1, 1, 0]]);
|
||||||
|
let dm = m.jaccard_dist_matrix();
|
||||||
|
for i in 0..m.n_cols() {
|
||||||
|
for j in 0..m.n_cols() {
|
||||||
|
let expected = m.col(i).jaccard_dist(m.col(j));
|
||||||
|
assert!((dm[[i, j]] - expected).abs() < 1e-12, "[{i},{j}]");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn partial_bray_dist_matrix_consistent() {
|
||||||
|
let (_d, m) = make_matrix(&[&[1, 0, 1], &[1, 1, 0], &[0, 1, 1]]);
|
||||||
|
let (sum_min, col_sums) = m.partial_bray_dist_matrix();
|
||||||
|
let n = m.n_cols();
|
||||||
|
|
||||||
|
// symmetry of sum_min
|
||||||
|
for i in 0..n {
|
||||||
|
for j in 0..n {
|
||||||
|
assert_eq!(sum_min[[i, j]], sum_min[[j, i]]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// col_sums correct
|
||||||
|
for k in 0..n {
|
||||||
|
assert_eq!(col_sums[k], m.col(k).sum());
|
||||||
|
}
|
||||||
|
|
||||||
|
// reconstruct distance from partials and compare to direct method
|
||||||
|
for i in 0..n {
|
||||||
|
for j in i + 1..n {
|
||||||
|
let denom = col_sums[i] + col_sums[j];
|
||||||
|
let dist = if denom == 0 { 0.0 } else { 1.0 - 2.0 * sum_min[[i, j]] as f64 / denom as f64 };
|
||||||
|
let expected = m.col(i).bray_dist(m.col(j));
|
||||||
|
assert!((dist - expected).abs() < 1e-12, "[{i},{j}]");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn partial_euclidean_dist_matrix_consistent() {
|
||||||
|
let (_d, m) = make_matrix(&[&[3, 0], &[0, 4], &[1, 1]]);
|
||||||
|
let sq = m.partial_euclidean_dist_matrix();
|
||||||
|
let n = m.n_cols();
|
||||||
|
for i in 0..n {
|
||||||
|
for j in i + 1..n {
|
||||||
|
let expected = m.col(i).partial_euclidean_dist(m.col(j));
|
||||||
|
assert!((sq[[i, j]] - expected).abs() < 1e-12, "[{i},{j}]");
|
||||||
|
assert!((sq[[j, i]] - expected).abs() < 1e-12, "symmetry [{j},{i}]");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn partial_threshold_jaccard_consistent() {
|
||||||
|
let (_d, m) = make_matrix(&[&[3, 0, 2], &[1, 2, 0], &[0, 3, 1]]);
|
||||||
|
let threshold = 2u32;
|
||||||
|
let (inter, union) = m.partial_threshold_jaccard_dist_matrix(threshold);
|
||||||
|
let n = m.n_cols();
|
||||||
|
for i in 0..n {
|
||||||
|
for j in i + 1..n {
|
||||||
|
let (ei, eu) = m.col(i).partial_threshold_jaccard_dist(m.col(j), threshold);
|
||||||
|
assert_eq!(inter[[i, j]], ei);
|
||||||
|
assert_eq!(union[[i, j]], eu);
|
||||||
|
assert_eq!(inter[[j, i]], ei, "symmetry inter");
|
||||||
|
assert_eq!(union[[j, i]], eu, "symmetry union");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn single_col_distance_matrix_is_zero() {
|
||||||
|
let (_d, m) = make_matrix(&[&[1, 2, 3]]);
|
||||||
|
let dm = m.bray_dist_matrix();
|
||||||
|
assert_eq!(dm.shape(), &[1, 1]);
|
||||||
|
assert_eq!(dm[[0, 0]], 0.0);
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user