f91c5a3f79
Refactors column and matrix access to use unified `BitSliceView` and `IntSliceView` abstractions, replacing legacy `PackedCol`/`IntColView` types. Introduces `BitSlice`/`IntSlice` traits for zero-copy, trait-based bitwise and arithmetic operations across persistent and temporary vector types. Removes deprecated in-memory `MemoryBitVec` and `MemoryIntVec` implementations and their tests, while updating dependent crates to use the new view-based API and `BitSliceMut` trait.
260 lines
8.0 KiB
Rust
260 lines
8.0 KiB
Rust
use tempfile::tempdir;
|
|
|
|
use crate::{pack_bit_matrix, PersistentBitMatrix, PersistentBitMatrixBuilder};
|
|
use crate::traits::BitPartials;
|
|
|
|
fn make_matrix(cols: &[&[bool]]) -> (tempfile::TempDir, PersistentBitMatrix) {
|
|
let n = cols.first().map_or(0, |c| c.len());
|
|
let dir = tempdir().unwrap();
|
|
let presence = dir.path().join("presence");
|
|
let mut b = PersistentBitMatrixBuilder::new(n, &presence).unwrap();
|
|
for &col in cols {
|
|
let mut cb = b.add_col().unwrap();
|
|
for (slot, &v) in col.iter().enumerate() {
|
|
cb.set(slot, v);
|
|
}
|
|
cb.close().unwrap();
|
|
}
|
|
b.close().unwrap();
|
|
let m = PersistentBitMatrix::open(dir.path()).unwrap();
|
|
(dir, m)
|
|
}
|
|
|
|
#[test]
|
|
fn single_col_roundtrip() {
|
|
let dir = tempdir().unwrap();
|
|
let presence = dir.path().join("presence");
|
|
let mut b = PersistentBitMatrixBuilder::new(4, &presence).unwrap();
|
|
let mut col = b.add_col().unwrap();
|
|
col.set(0, true);
|
|
col.set(1, false);
|
|
col.set(2, true);
|
|
col.set(3, true);
|
|
col.close().unwrap();
|
|
b.close().unwrap();
|
|
|
|
let m = PersistentBitMatrix::open(dir.path()).unwrap();
|
|
assert_eq!(m.n_cols(), 1);
|
|
assert_eq!(m.n(), 4);
|
|
assert_eq!(&*m.row(0), &[true]);
|
|
assert_eq!(&*m.row(1), &[false]);
|
|
assert_eq!(&*m.row(2), &[true]);
|
|
assert_eq!(&*m.row(3), &[true]);
|
|
}
|
|
|
|
#[test]
|
|
fn two_cols_roundtrip() {
|
|
let dir = tempdir().unwrap();
|
|
let presence = dir.path().join("presence");
|
|
let mut b = PersistentBitMatrixBuilder::new(3, &presence).unwrap();
|
|
let mut col0 = b.add_col().unwrap();
|
|
col0.set(0, true); col0.set(1, false); col0.set(2, true);
|
|
col0.close().unwrap();
|
|
let mut col1 = b.add_col().unwrap();
|
|
col1.set(0, false); col1.set(1, true); col1.set(2, false);
|
|
col1.close().unwrap();
|
|
b.close().unwrap();
|
|
|
|
let m = PersistentBitMatrix::open(dir.path()).unwrap();
|
|
assert_eq!(m.n_cols(), 2);
|
|
assert_eq!(&*m.row(0), &[true, false]);
|
|
assert_eq!(&*m.row(1), &[false, true]);
|
|
assert_eq!(&*m.row(2), &[true, false]);
|
|
}
|
|
|
|
#[test]
|
|
fn col_accessor() {
|
|
let dir = tempdir().unwrap();
|
|
let presence = dir.path().join("presence");
|
|
let mut b = PersistentBitMatrixBuilder::new(3, &presence).unwrap();
|
|
let mut col = b.add_col().unwrap();
|
|
col.set(0, true); col.set(1, false); col.set(2, true);
|
|
col.close().unwrap();
|
|
b.close().unwrap();
|
|
|
|
let m = PersistentBitMatrix::open(dir.path()).unwrap();
|
|
assert!(m.col(0).get(0));
|
|
assert!(!m.col(0).get(1));
|
|
assert!(m.col(0).get(2));
|
|
}
|
|
|
|
#[test]
|
|
fn zero_cols_roundtrip() {
|
|
let dir = tempdir().unwrap();
|
|
let presence = dir.path().join("presence");
|
|
let b = PersistentBitMatrixBuilder::new(8, &presence).unwrap();
|
|
b.close().unwrap();
|
|
|
|
let m = PersistentBitMatrix::open(dir.path()).unwrap();
|
|
assert_eq!(m.n_cols(), 0);
|
|
assert_eq!(m.n(), 8);
|
|
}
|
|
|
|
// ── count_ones ────────────────────────────────────────────────────────────────
|
|
|
|
#[test]
|
|
fn count_ones_per_column() {
|
|
let (_d, m) = make_matrix(&[
|
|
&[true, false, true, true],
|
|
&[false, false, false, false],
|
|
&[true, true, true, false],
|
|
]);
|
|
let c = m.count_ones();
|
|
assert_eq!(c[0], 3);
|
|
assert_eq!(c[1], 0);
|
|
assert_eq!(c[2], 3);
|
|
}
|
|
|
|
// ── Distance matrix tests ─────────────────────────────────────────────────────
|
|
|
|
#[test]
|
|
fn jaccard_dist_matrix_symmetry_and_diagonal() {
|
|
let (_d, m) = make_matrix(&[
|
|
&[true, false, true],
|
|
&[true, true, false],
|
|
&[false, true, true],
|
|
]);
|
|
let dm = m.jaccard_dist_matrix();
|
|
let n = m.n_cols();
|
|
for i in 0..n { assert_eq!(dm[[i, i]], 0.0, "diagonal"); }
|
|
for i in 0..n {
|
|
for j in 0..n {
|
|
assert!((dm[[i, j]] - dm[[j, i]]).abs() < 1e-12, "symmetry [{i},{j}]");
|
|
}
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn jaccard_dist_matrix_values_match_pairwise() {
|
|
let (_d, m) = make_matrix(&[
|
|
&[true, false, true],
|
|
&[true, true, false],
|
|
&[false, true, true],
|
|
]);
|
|
let dm = m.jaccard_dist_matrix();
|
|
for i in 0..m.n_cols() {
|
|
for j in 0..m.n_cols() {
|
|
let expected = m.col(i).jaccard_dist(m.col(j));
|
|
assert!((dm[[i, j]] - expected).abs() < 1e-12, "[{i},{j}]");
|
|
}
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn hamming_dist_matrix_symmetry_and_diagonal() {
|
|
let (_d, m) = make_matrix(&[
|
|
&[true, false, true],
|
|
&[true, true, false],
|
|
&[false, true, true],
|
|
]);
|
|
let dm = m.hamming_dist_matrix();
|
|
let n = m.n_cols();
|
|
for i in 0..n { assert_eq!(dm[[i, i]], 0, "diagonal"); }
|
|
for i in 0..n {
|
|
for j in 0..n {
|
|
assert_eq!(dm[[i, j]], dm[[j, i]], "symmetry [{i},{j}]");
|
|
}
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn hamming_dist_matrix_values_match_pairwise() {
|
|
let (_d, m) = make_matrix(&[
|
|
&[true, false, true],
|
|
&[true, true, false],
|
|
&[false, true, true],
|
|
]);
|
|
let dm = m.hamming_dist_matrix();
|
|
for i in 0..m.n_cols() {
|
|
for j in 0..m.n_cols() {
|
|
assert_eq!(dm[[i, j]], m.col(i).hamming_dist(m.col(j)), "[{i},{j}]");
|
|
}
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn partial_jaccard_consistent() {
|
|
let (_d, m) = make_matrix(&[
|
|
&[true, false, true, true],
|
|
&[true, true, false, true],
|
|
&[false, true, true, false],
|
|
]);
|
|
let (inter, union) = m.partial_jaccard_dist_matrix();
|
|
let n = m.n_cols();
|
|
for i in 0..n {
|
|
for j in i + 1..n {
|
|
let (ei, eu) = m.col(i).partial_jaccard_dist(m.col(j));
|
|
assert_eq!(inter[[i, j]], ei, "inter [{i},{j}]");
|
|
assert_eq!(union[[i, j]], eu, "union [{i},{j}]");
|
|
assert_eq!(inter[[j, i]], ei, "inter symmetry");
|
|
assert_eq!(union[[j, i]], eu, "union symmetry");
|
|
}
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn partial_hamming_matches_hamming() {
|
|
let (_d, m) = make_matrix(&[
|
|
&[true, false, true],
|
|
&[false, true, true],
|
|
&[true, true, false],
|
|
]);
|
|
let partial = m.partial_hamming_dist_matrix();
|
|
let full = m.hamming_dist_matrix();
|
|
assert_eq!(partial, full);
|
|
}
|
|
|
|
// ── col_view on Packed ────────────────────────────────────────────────────────
|
|
|
|
#[test]
|
|
fn col_view_packed_values() {
|
|
let (dir, _) = make_matrix(&[
|
|
&[true, false, true, true],
|
|
&[false, true, false, true],
|
|
]);
|
|
pack_bit_matrix(&dir.path().join("presence")).unwrap();
|
|
let m = PersistentBitMatrix::open(dir.path()).unwrap();
|
|
|
|
// col 0: [T, F, T, T]
|
|
let v0 = m.col_view(0);
|
|
assert_eq!(v0.len(), 4);
|
|
assert_eq!(v0.get(0), true);
|
|
assert_eq!(v0.get(1), false);
|
|
assert_eq!(v0.get(2), true);
|
|
assert_eq!(v0.get(3), true);
|
|
assert_eq!(v0.count_ones(), 3);
|
|
|
|
// col 1: [F, T, F, T]
|
|
let v1 = m.col_view(1);
|
|
assert_eq!(v1.get(0), false);
|
|
assert_eq!(v1.get(1), true);
|
|
assert_eq!(v1.get(2), false);
|
|
assert_eq!(v1.get(3), true);
|
|
assert_eq!(v1.count_ones(), 2);
|
|
}
|
|
|
|
#[test]
|
|
fn col_view_packed_matches_columnar() {
|
|
let data: &[&[bool]] = &[
|
|
&[true, false, true, false, true, true, false, true],
|
|
&[false, false, true, true, false, true, true, false],
|
|
&[true, true, true, false, false, false, true, true],
|
|
];
|
|
let (dir_col, m_col) = make_matrix(data);
|
|
let (dir_pack, _) = make_matrix(data);
|
|
pack_bit_matrix(&dir_pack.path().join("presence")).unwrap();
|
|
let m_pack = PersistentBitMatrix::open(dir_pack.path()).unwrap();
|
|
|
|
for c in 0..data.len() {
|
|
let col_ref = m_col.col(c);
|
|
let col_view = m_pack.col_view(c);
|
|
assert_eq!(col_view.len(), col_ref.len(), "col={c} len");
|
|
for s in 0..col_ref.len() {
|
|
assert_eq!(col_view.get(s), col_ref.get(s), "col={c} slot={s}");
|
|
}
|
|
assert_eq!(col_view.count_ones(), col_ref.count_ones(), "col={c} count_ones");
|
|
assert_eq!(col_view.words(), col_ref.words(), "col={c} words");
|
|
}
|
|
drop(dir_col);
|
|
}
|