Files
obikmer/src/obicompactvec/src/tests/colgroup.rs
T
Eric Coissac 280ca1f5a3 feat: add optimized new_ones constructor for all-ones bit vectors
Introduces `new_ones` and `add_col_ones` methods to directly initialize all-ones bit vectors and matrix columns. This replaces redundant initialization sequences that created zero-filled structures and applied bitwise NOT, with a single pass that writes contiguous 0xFF bytes to disk. The change eliminates inversion overhead, streamlines test setup, and improves performance for filter mask intersection logic while preserving identical semantics.
2026-06-22 10:00:01 +02:00

224 lines
9.2 KiB
Rust

use tempfile::tempdir;
use crate::{
ColGroup, MatrixGroupOps,
PersistentBitMatrix, PersistentBitMatrixBuilder,
PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder,
};
use crate::{PersistentBitVecBuilder, PersistentCompactIntVec, PersistentCompactIntVecBuilder};
// ── helpers ───────────────────────────────────────────────────────────────────
fn make_int_matrix(cols: &[&[u32]]) -> (tempfile::TempDir, PersistentCompactIntMatrix) {
let n = cols.first().map_or(0, |c| c.len());
let dir = tempdir().unwrap();
let mut b = PersistentCompactIntMatrixBuilder::new(n, &dir.path().join("counts")).unwrap();
for &col in cols {
let mut cb = b.add_col().unwrap();
for (slot, &v) in col.iter().enumerate() { cb.set(slot, v); }
cb.close().unwrap();
}
b.close().unwrap();
let m = PersistentCompactIntMatrix::open(dir.path()).unwrap();
(dir, m)
}
fn make_bit_matrix(cols: &[&[bool]]) -> (tempfile::TempDir, PersistentBitMatrix) {
let n = cols.first().map_or(0, |c| c.len());
let dir = tempdir().unwrap();
let presence = dir.path().join("presence");
let mut b = PersistentBitMatrixBuilder::new(n, &presence).unwrap();
for &col in cols {
let mut cb = b.add_col().unwrap();
for (slot, &v) in col.iter().enumerate() { cb.set(slot, v); }
cb.close().unwrap();
}
b.close().unwrap();
let m = PersistentBitMatrix::open(dir.path()).unwrap();
(dir, m)
}
// ── IntMatrix: partial_group_sum ──────────────────────────────────────────────
#[test]
fn int_partial_group_sum_basic() {
// col0=[1,2,3], col1=[10,20,30], col2=[100,0,5]
// group {0,2}: sum = [101, 2, 8]
let (_d, m) = make_int_matrix(&[&[1, 2, 3], &[10, 20, 30], &[100, 0, 5]]);
let g = ColGroup::new("g", vec![0, 2]);
let result = m.partial_group_sum(&g).unwrap();
assert_eq!(result.get(0), 101);
assert_eq!(result.get(1), 2);
assert_eq!(result.get(2), 8);
}
#[test]
fn int_partial_group_sum_with_overflow() {
// col0=[300,0], col1=[200,400]: group {0,1}: sum=[500, 400]
let (_d, m) = make_int_matrix(&[&[300, 0], &[200, 400]]);
let g = ColGroup::new("g", vec![0, 1]);
let result = m.partial_group_sum(&g).unwrap();
assert_eq!(result.get(0), 500);
assert_eq!(result.get(1), 400);
assert_eq!(result.sum(), 900);
}
// ── IntMatrix: partial_group_presence_count ───────────────────────────────────
#[test]
fn int_partial_group_presence_count() {
// col0=[5,1,0,3], col1=[2,0,4,3], col2=[0,3,1,0]
// threshold=2: col0: [T,F,F,T], col1: [T,F,T,T], col2: [F,T,F,F]
// group {0,1,2}: counts = [2, 1, 1, 2]
let (_d, m) = make_int_matrix(&[&[5, 1, 0, 3], &[2, 0, 4, 3], &[0, 3, 1, 0]]);
let g = ColGroup::new("g", vec![0, 1, 2]);
let result = m.partial_group_presence_count(&g, 2).unwrap();
assert_eq!(result.get(0), 2);
assert_eq!(result.get(1), 1);
assert_eq!(result.get(2), 1);
assert_eq!(result.get(3), 2);
}
#[test]
fn int_partial_group_presence_count_with_overflow() {
// col0=[300,0,10], col1=[0,400,10], col2=[1,1,10]
// threshold=5: col0: [T,F,T], col1: [F,T,T], col2: [F,F,T]
// group {0,1,2}: counts = [1, 1, 3]
let (_d, m) = make_int_matrix(&[&[300, 0, 10], &[0, 400, 10], &[1, 1, 10]]);
let g = ColGroup::new("g", vec![0, 1, 2]);
let result = m.partial_group_presence_count(&g, 5).unwrap();
assert_eq!(result.get(0), 1);
assert_eq!(result.get(1), 1);
assert_eq!(result.get(2), 3);
}
// ── IntMatrix: partial_group_any ──────────────────────────────────────────────
#[test]
fn int_partial_group_any() {
// col0=[0,3,0,1], col1=[2,0,0,0], col2=[0,0,5,0]
// threshold=2: col0: [F,T,F,F], col1: [T,F,F,F], col2: [F,F,T,F]
// group {0,1,2}: any = [T, T, T, F]
let (_d, m) = make_int_matrix(&[&[0, 3, 0, 1], &[2, 0, 0, 0], &[0, 0, 5, 0]]);
let g = ColGroup::new("g", vec![0, 1, 2]);
let result = m.partial_group_any(&g, 2).unwrap();
assert_eq!(result.get(0), true);
assert_eq!(result.get(1), true);
assert_eq!(result.get(2), true);
assert_eq!(result.get(3), false);
}
// ── IntMatrix: mask_with ──────────────────────────────────────────────────────
#[test]
fn mask_with_zeros_selected_slots() {
// count vec [10, 20, 30, 40], mask [T, F, T, F] → [10, 0, 30, 0]
let dir = tempdir().unwrap();
let mut v = PersistentCompactIntVecBuilder::new(4, &dir.path().join("v.pciv")).unwrap();
v.set(0, 10); v.set(1, 20); v.set(2, 30); v.set(3, 40);
let mut mask = PersistentBitVecBuilder::new(4, &dir.path().join("m.pbiv")).unwrap();
mask.set(0, true); mask.set(2, true);
v.mask_with(mask.view());
v.close().unwrap();
let r = PersistentCompactIntVec::open(&dir.path().join("v.pciv")).unwrap();
assert_eq!(r.get(0), 10);
assert_eq!(r.get(1), 0);
assert_eq!(r.get(2), 30);
assert_eq!(r.get(3), 0);
}
#[test]
fn mask_with_overflow_slot_zeroed() {
// overflow slot (value 500) masked out → removed from overflow, primary=0
let dir = tempdir().unwrap();
let mut v = PersistentCompactIntVecBuilder::new(3, &dir.path().join("v.pciv")).unwrap();
v.set(0, 10); v.set(1, 500); v.set(2, 5);
let mut mask = PersistentBitVecBuilder::new(3, &dir.path().join("m.pbiv")).unwrap();
mask.set(0, true); mask.set(2, true); // slot 1 masked out
v.mask_with(mask.view());
v.close().unwrap();
let r = PersistentCompactIntVec::open(&dir.path().join("v.pciv")).unwrap();
assert_eq!(r.get(0), 10);
assert_eq!(r.get(1), 0);
assert_eq!(r.get(2), 5);
let ov: Vec<_> = r.view().overflow_entries().collect();
assert!(ov.is_empty(), "overflow entry for masked-out slot should be gone");
}
#[test]
fn mask_with_all_ones_is_noop() {
let dir = tempdir().unwrap();
let mut v = PersistentCompactIntVecBuilder::new(4, &dir.path().join("v.pciv")).unwrap();
v.set(0, 300); v.set(1, 1); v.set(2, 0); v.set(3, 42);
let mask = PersistentBitVecBuilder::new_ones(4, &dir.path().join("m.pbiv")).unwrap();
v.mask_with(mask.view());
v.close().unwrap();
let r = PersistentCompactIntVec::open(&dir.path().join("v.pciv")).unwrap();
assert_eq!(r.get(0), 300);
assert_eq!(r.get(1), 1);
assert_eq!(r.get(2), 0);
assert_eq!(r.get(3), 42);
}
// ── BitMatrix: partial_group_presence_count ───────────────────────────────────
#[test]
fn bit_partial_group_presence_count() {
// col0=[T,F,T,F], col1=[T,T,F,F], col2=[F,T,T,F]
// group {0,1,2}: counts = [2, 2, 2, 0]
let (_d, m) = make_bit_matrix(&[
&[true, false, true, false],
&[true, true, false, false],
&[false,true, true, false],
]);
let g = ColGroup::new("g", vec![0, 1, 2]);
let result = m.partial_group_presence_count(&g, 1).unwrap();
assert_eq!(result.get(0), 2);
assert_eq!(result.get(1), 2);
assert_eq!(result.get(2), 2);
assert_eq!(result.get(3), 0);
}
// ── BitMatrix: partial_group_any ──────────────────────────────────────────────
#[test]
fn bit_partial_group_any() {
// col0=[T,F,F], col1=[F,F,T], group {0,1}: any = [T, F, T]
let (_d, m) = make_bit_matrix(&[
&[true, false, false],
&[false, false, true],
]);
let g = ColGroup::new("g", vec![0, 1]);
let result = m.partial_group_any(&g, 1).unwrap();
assert_eq!(result.get(0), true);
assert_eq!(result.get(1), false);
assert_eq!(result.get(2), true);
}
// ── Composition: partial results are additive ─────────────────────────────────
#[test]
fn int_presence_count_additive_across_split() {
// Simulate two partitions (different kmer ranges) whose counts should add.
// Global data for col0: [5,1,0,3,2], col1: [2,0,4,3,1] — threshold=2
// Split: partition A = slots 0..2, partition B = slots 2..5
let data_a: &[&[u32]] = &[&[5, 1], &[2, 0]];
let data_b: &[&[u32]] = &[&[0, 3, 2], &[4, 3, 1]];
let (_da, ma) = make_int_matrix(data_a);
let (_db, mb) = make_int_matrix(data_b);
let g = ColGroup::new("g", vec![0, 1]);
let pa = ma.partial_group_presence_count(&g, 2).unwrap();
let pb = mb.partial_group_presence_count(&g, 2).unwrap();
// Concatenate by adding (disjoint kmer ranges — here we just verify
// individual results match the expected per-partition counts).
// partition A: col0=[5≥2,1<2]=[T,F], col1=[2≥2,0<2]=[T,F] → [2, 0]
assert_eq!(pa.get(0), 2);
assert_eq!(pa.get(1), 0);
// partition B: col0=[0<2,3≥2,2≥2]=[F,T,T], col1=[4≥2,3≥2,1<2]=[T,T,F] → [1, 2, 1]
assert_eq!(pb.get(0), 1);
assert_eq!(pb.get(1), 2);
assert_eq!(pb.get(2), 1);
}