280ca1f5a3
Introduces `new_ones` and `add_col_ones` methods to directly initialize all-ones bit vectors and matrix columns. This replaces redundant initialization sequences that created zero-filled structures and applied bitwise NOT, with a single pass that writes contiguous 0xFF bytes to disk. The change eliminates inversion overhead, streamlines test setup, and improves performance for filter mask intersection logic while preserving identical semantics.
224 lines
9.2 KiB
Rust
224 lines
9.2 KiB
Rust
use tempfile::tempdir;
|
|
|
|
use crate::{
|
|
ColGroup, MatrixGroupOps,
|
|
PersistentBitMatrix, PersistentBitMatrixBuilder,
|
|
PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder,
|
|
};
|
|
use crate::{PersistentBitVecBuilder, PersistentCompactIntVec, PersistentCompactIntVecBuilder};
|
|
|
|
// ── helpers ───────────────────────────────────────────────────────────────────
|
|
|
|
fn make_int_matrix(cols: &[&[u32]]) -> (tempfile::TempDir, PersistentCompactIntMatrix) {
|
|
let n = cols.first().map_or(0, |c| c.len());
|
|
let dir = tempdir().unwrap();
|
|
let mut b = PersistentCompactIntMatrixBuilder::new(n, &dir.path().join("counts")).unwrap();
|
|
for &col in cols {
|
|
let mut cb = b.add_col().unwrap();
|
|
for (slot, &v) in col.iter().enumerate() { cb.set(slot, v); }
|
|
cb.close().unwrap();
|
|
}
|
|
b.close().unwrap();
|
|
let m = PersistentCompactIntMatrix::open(dir.path()).unwrap();
|
|
(dir, m)
|
|
}
|
|
|
|
fn make_bit_matrix(cols: &[&[bool]]) -> (tempfile::TempDir, PersistentBitMatrix) {
|
|
let n = cols.first().map_or(0, |c| c.len());
|
|
let dir = tempdir().unwrap();
|
|
let presence = dir.path().join("presence");
|
|
let mut b = PersistentBitMatrixBuilder::new(n, &presence).unwrap();
|
|
for &col in cols {
|
|
let mut cb = b.add_col().unwrap();
|
|
for (slot, &v) in col.iter().enumerate() { cb.set(slot, v); }
|
|
cb.close().unwrap();
|
|
}
|
|
b.close().unwrap();
|
|
let m = PersistentBitMatrix::open(dir.path()).unwrap();
|
|
(dir, m)
|
|
}
|
|
|
|
// ── IntMatrix: partial_group_sum ──────────────────────────────────────────────
|
|
|
|
#[test]
|
|
fn int_partial_group_sum_basic() {
|
|
// col0=[1,2,3], col1=[10,20,30], col2=[100,0,5]
|
|
// group {0,2}: sum = [101, 2, 8]
|
|
let (_d, m) = make_int_matrix(&[&[1, 2, 3], &[10, 20, 30], &[100, 0, 5]]);
|
|
let g = ColGroup::new("g", vec![0, 2]);
|
|
let result = m.partial_group_sum(&g).unwrap();
|
|
assert_eq!(result.get(0), 101);
|
|
assert_eq!(result.get(1), 2);
|
|
assert_eq!(result.get(2), 8);
|
|
}
|
|
|
|
#[test]
|
|
fn int_partial_group_sum_with_overflow() {
|
|
// col0=[300,0], col1=[200,400]: group {0,1}: sum=[500, 400]
|
|
let (_d, m) = make_int_matrix(&[&[300, 0], &[200, 400]]);
|
|
let g = ColGroup::new("g", vec![0, 1]);
|
|
let result = m.partial_group_sum(&g).unwrap();
|
|
assert_eq!(result.get(0), 500);
|
|
assert_eq!(result.get(1), 400);
|
|
assert_eq!(result.sum(), 900);
|
|
}
|
|
|
|
// ── IntMatrix: partial_group_presence_count ───────────────────────────────────
|
|
|
|
#[test]
|
|
fn int_partial_group_presence_count() {
|
|
// col0=[5,1,0,3], col1=[2,0,4,3], col2=[0,3,1,0]
|
|
// threshold=2: col0: [T,F,F,T], col1: [T,F,T,T], col2: [F,T,F,F]
|
|
// group {0,1,2}: counts = [2, 1, 1, 2]
|
|
let (_d, m) = make_int_matrix(&[&[5, 1, 0, 3], &[2, 0, 4, 3], &[0, 3, 1, 0]]);
|
|
let g = ColGroup::new("g", vec![0, 1, 2]);
|
|
let result = m.partial_group_presence_count(&g, 2).unwrap();
|
|
assert_eq!(result.get(0), 2);
|
|
assert_eq!(result.get(1), 1);
|
|
assert_eq!(result.get(2), 1);
|
|
assert_eq!(result.get(3), 2);
|
|
}
|
|
|
|
#[test]
|
|
fn int_partial_group_presence_count_with_overflow() {
|
|
// col0=[300,0,10], col1=[0,400,10], col2=[1,1,10]
|
|
// threshold=5: col0: [T,F,T], col1: [F,T,T], col2: [F,F,T]
|
|
// group {0,1,2}: counts = [1, 1, 3]
|
|
let (_d, m) = make_int_matrix(&[&[300, 0, 10], &[0, 400, 10], &[1, 1, 10]]);
|
|
let g = ColGroup::new("g", vec![0, 1, 2]);
|
|
let result = m.partial_group_presence_count(&g, 5).unwrap();
|
|
assert_eq!(result.get(0), 1);
|
|
assert_eq!(result.get(1), 1);
|
|
assert_eq!(result.get(2), 3);
|
|
}
|
|
|
|
// ── IntMatrix: partial_group_any ──────────────────────────────────────────────
|
|
|
|
#[test]
|
|
fn int_partial_group_any() {
|
|
// col0=[0,3,0,1], col1=[2,0,0,0], col2=[0,0,5,0]
|
|
// threshold=2: col0: [F,T,F,F], col1: [T,F,F,F], col2: [F,F,T,F]
|
|
// group {0,1,2}: any = [T, T, T, F]
|
|
let (_d, m) = make_int_matrix(&[&[0, 3, 0, 1], &[2, 0, 0, 0], &[0, 0, 5, 0]]);
|
|
let g = ColGroup::new("g", vec![0, 1, 2]);
|
|
let result = m.partial_group_any(&g, 2).unwrap();
|
|
assert_eq!(result.get(0), true);
|
|
assert_eq!(result.get(1), true);
|
|
assert_eq!(result.get(2), true);
|
|
assert_eq!(result.get(3), false);
|
|
}
|
|
|
|
// ── IntMatrix: mask_with ──────────────────────────────────────────────────────
|
|
|
|
#[test]
|
|
fn mask_with_zeros_selected_slots() {
|
|
// count vec [10, 20, 30, 40], mask [T, F, T, F] → [10, 0, 30, 0]
|
|
let dir = tempdir().unwrap();
|
|
let mut v = PersistentCompactIntVecBuilder::new(4, &dir.path().join("v.pciv")).unwrap();
|
|
v.set(0, 10); v.set(1, 20); v.set(2, 30); v.set(3, 40);
|
|
let mut mask = PersistentBitVecBuilder::new(4, &dir.path().join("m.pbiv")).unwrap();
|
|
mask.set(0, true); mask.set(2, true);
|
|
v.mask_with(mask.view());
|
|
v.close().unwrap();
|
|
let r = PersistentCompactIntVec::open(&dir.path().join("v.pciv")).unwrap();
|
|
assert_eq!(r.get(0), 10);
|
|
assert_eq!(r.get(1), 0);
|
|
assert_eq!(r.get(2), 30);
|
|
assert_eq!(r.get(3), 0);
|
|
}
|
|
|
|
#[test]
|
|
fn mask_with_overflow_slot_zeroed() {
|
|
// overflow slot (value 500) masked out → removed from overflow, primary=0
|
|
let dir = tempdir().unwrap();
|
|
let mut v = PersistentCompactIntVecBuilder::new(3, &dir.path().join("v.pciv")).unwrap();
|
|
v.set(0, 10); v.set(1, 500); v.set(2, 5);
|
|
let mut mask = PersistentBitVecBuilder::new(3, &dir.path().join("m.pbiv")).unwrap();
|
|
mask.set(0, true); mask.set(2, true); // slot 1 masked out
|
|
v.mask_with(mask.view());
|
|
v.close().unwrap();
|
|
let r = PersistentCompactIntVec::open(&dir.path().join("v.pciv")).unwrap();
|
|
assert_eq!(r.get(0), 10);
|
|
assert_eq!(r.get(1), 0);
|
|
assert_eq!(r.get(2), 5);
|
|
let ov: Vec<_> = r.view().overflow_entries().collect();
|
|
assert!(ov.is_empty(), "overflow entry for masked-out slot should be gone");
|
|
}
|
|
|
|
#[test]
|
|
fn mask_with_all_ones_is_noop() {
|
|
let dir = tempdir().unwrap();
|
|
let mut v = PersistentCompactIntVecBuilder::new(4, &dir.path().join("v.pciv")).unwrap();
|
|
v.set(0, 300); v.set(1, 1); v.set(2, 0); v.set(3, 42);
|
|
let mask = PersistentBitVecBuilder::new_ones(4, &dir.path().join("m.pbiv")).unwrap();
|
|
v.mask_with(mask.view());
|
|
v.close().unwrap();
|
|
let r = PersistentCompactIntVec::open(&dir.path().join("v.pciv")).unwrap();
|
|
assert_eq!(r.get(0), 300);
|
|
assert_eq!(r.get(1), 1);
|
|
assert_eq!(r.get(2), 0);
|
|
assert_eq!(r.get(3), 42);
|
|
}
|
|
|
|
// ── BitMatrix: partial_group_presence_count ───────────────────────────────────
|
|
|
|
#[test]
|
|
fn bit_partial_group_presence_count() {
|
|
// col0=[T,F,T,F], col1=[T,T,F,F], col2=[F,T,T,F]
|
|
// group {0,1,2}: counts = [2, 2, 2, 0]
|
|
let (_d, m) = make_bit_matrix(&[
|
|
&[true, false, true, false],
|
|
&[true, true, false, false],
|
|
&[false,true, true, false],
|
|
]);
|
|
let g = ColGroup::new("g", vec![0, 1, 2]);
|
|
let result = m.partial_group_presence_count(&g, 1).unwrap();
|
|
assert_eq!(result.get(0), 2);
|
|
assert_eq!(result.get(1), 2);
|
|
assert_eq!(result.get(2), 2);
|
|
assert_eq!(result.get(3), 0);
|
|
}
|
|
|
|
// ── BitMatrix: partial_group_any ──────────────────────────────────────────────
|
|
|
|
#[test]
|
|
fn bit_partial_group_any() {
|
|
// col0=[T,F,F], col1=[F,F,T], group {0,1}: any = [T, F, T]
|
|
let (_d, m) = make_bit_matrix(&[
|
|
&[true, false, false],
|
|
&[false, false, true],
|
|
]);
|
|
let g = ColGroup::new("g", vec![0, 1]);
|
|
let result = m.partial_group_any(&g, 1).unwrap();
|
|
assert_eq!(result.get(0), true);
|
|
assert_eq!(result.get(1), false);
|
|
assert_eq!(result.get(2), true);
|
|
}
|
|
|
|
// ── Composition: partial results are additive ─────────────────────────────────
|
|
|
|
#[test]
|
|
fn int_presence_count_additive_across_split() {
|
|
// Simulate two partitions (different kmer ranges) whose counts should add.
|
|
// Global data for col0: [5,1,0,3,2], col1: [2,0,4,3,1] — threshold=2
|
|
// Split: partition A = slots 0..2, partition B = slots 2..5
|
|
let data_a: &[&[u32]] = &[&[5, 1], &[2, 0]];
|
|
let data_b: &[&[u32]] = &[&[0, 3, 2], &[4, 3, 1]];
|
|
let (_da, ma) = make_int_matrix(data_a);
|
|
let (_db, mb) = make_int_matrix(data_b);
|
|
let g = ColGroup::new("g", vec![0, 1]);
|
|
|
|
let pa = ma.partial_group_presence_count(&g, 2).unwrap();
|
|
let pb = mb.partial_group_presence_count(&g, 2).unwrap();
|
|
|
|
// Concatenate by adding (disjoint kmer ranges — here we just verify
|
|
// individual results match the expected per-partition counts).
|
|
// partition A: col0=[5≥2,1<2]=[T,F], col1=[2≥2,0<2]=[T,F] → [2, 0]
|
|
assert_eq!(pa.get(0), 2);
|
|
assert_eq!(pa.get(1), 0);
|
|
// partition B: col0=[0<2,3≥2,2≥2]=[F,T,T], col1=[4≥2,3≥2,1<2]=[T,T,F] → [1, 2, 1]
|
|
assert_eq!(pb.get(0), 1);
|
|
assert_eq!(pb.get(1), 2);
|
|
assert_eq!(pb.get(2), 1);
|
|
}
|