diff --git a/src/obicompactvec/src/bitmatrix.rs b/src/obicompactvec/src/bitmatrix.rs index 591e4af..0e70a96 100644 --- a/src/obicompactvec/src/bitmatrix.rs +++ b/src/obicompactvec/src/bitmatrix.rs @@ -183,6 +183,26 @@ impl BitSlice for PackedCol<'_> { fn words(&self) -> &[u64] { self.words } } +// ── BitColView — uniform column access across Columnar and Packed ───────────── + +enum BitColViewInner<'a> { + Columnar(&'a PersistentBitVec), + Packed(PackedCol<'a>), +} + +/// Opaque column view returned by [`PersistentBitMatrix::col_view`]. +/// Implements [`BitSlice`] uniformly for both Columnar and Packed matrix formats. +pub struct BitColView<'a>(BitColViewInner<'a>); + +impl BitSlice for BitColView<'_> { + fn len(&self) -> usize { + match &self.0 { BitColViewInner::Columnar(c) => c.len(), BitColViewInner::Packed(c) => c.len() } + } + fn words(&self) -> &[u64] { + match &self.0 { BitColViewInner::Columnar(c) => c.words(), BitColViewInner::Packed(c) => c.words() } + } +} + /// Build `presence/matrix.pbmx` from existing `col_*.pbiv` files. pub fn pack_bit_matrix(dir: &Path) -> io::Result<()> { let packed_path = dir.join("matrix.pbmx"); @@ -298,6 +318,14 @@ impl PersistentBitMatrix { } } + pub fn col_view(&self, c: usize) -> BitColView<'_> { + match self { + Self::Columnar(m) => BitColView(BitColViewInner::Columnar(m.col(c))), + Self::Packed(m) => BitColView(BitColViewInner::Packed(m.col_slice(c))), + Self::Implicit { .. } => panic!("col_view() not available on Implicit PersistentBitMatrix"), + } + } + pub fn col_persist(&self, c: usize, path: &Path) -> io::Result { match self { Self::Columnar(m) => PersistentBitVecBuilder::build_from(m.col(c), path), diff --git a/src/obicompactvec/src/intmatrix.rs b/src/obicompactvec/src/intmatrix.rs index 0be16fb..a719e97 100644 --- a/src/obicompactvec/src/intmatrix.rs +++ b/src/obicompactvec/src/intmatrix.rs @@ -224,6 +224,43 @@ impl Iterator for PackedIntColIter<'_> { impl ExactSizeIterator for PackedIntColIter<'_> {} +// ── IntColView — uniform column access across Columnar and Packed ───────────── + +enum IntColViewInner<'a> { + Columnar(&'a PersistentCompactIntVec), + Packed(PackedIntCol<'a>), +} + +/// Opaque column view returned by [`PersistentCompactIntMatrix::col_view`]. +/// Implements [`IntSlice`] uniformly for both Columnar and Packed matrix formats. +pub struct IntColView<'a>(IntColViewInner<'a>); + +impl IntSlice for IntColView<'_> { + fn len(&self) -> usize { + match &self.0 { IntColViewInner::Columnar(c) => c.len(), IntColViewInner::Packed(c) => c.len() } + } + fn get(&self, slot: usize) -> u32 { + match &self.0 { IntColViewInner::Columnar(c) => c.get(slot), IntColViewInner::Packed(c) => c.get(slot) } + } + fn primary_bytes(&self) -> &[u8] { + match &self.0 { IntColViewInner::Columnar(c) => c.primary_bytes(), IntColViewInner::Packed(c) => c.primary_bytes() } + } + fn overflow_entries(&self) -> impl Iterator + '_ { + // Box implements Iterator, satisfying RPITIT across two distinct types. + let it: Box + '_> = match &self.0 { + IntColViewInner::Columnar(c) => Box::new(c.overflow_entries()), + IntColViewInner::Packed(c) => Box::new(c.overflow_entries()), + }; + it + } + fn sum(&self) -> u64 { + match &self.0 { IntColViewInner::Columnar(c) => c.sum(), IntColViewInner::Packed(c) => c.sum() } + } + fn count_nonzero(&self) -> u64 { + match &self.0 { IntColViewInner::Columnar(c) => c.count_nonzero(), IntColViewInner::Packed(c) => c.count_nonzero() } + } +} + // ───────────────────────────────────────────────────────────────────────────── pub struct PackedCompactIntMatrix { @@ -481,6 +518,13 @@ impl PersistentCompactIntMatrix { } } + pub fn col_view(&self, c: usize) -> IntColView<'_> { + match self { + Self::Columnar(m) => IntColView(IntColViewInner::Columnar(m.col(c))), + Self::Packed(m) => IntColView(IntColViewInner::Packed(m.col_slice(c))), + } + } + pub fn col_persist(&self, c: usize, path: &Path) -> io::Result { match self { Self::Columnar(m) => PersistentCompactIntVecBuilder::build_from(m.col(c), path), diff --git a/src/obicompactvec/src/lib.rs b/src/obicompactvec/src/lib.rs index 3a5f1c4..2dc1453 100644 --- a/src/obicompactvec/src/lib.rs +++ b/src/obicompactvec/src/lib.rs @@ -11,9 +11,9 @@ mod reader; pub mod traits; pub use bitvec::{BitIter, PersistentBitVec, PersistentBitVecBuilder}; -pub use bitmatrix::{PersistentBitMatrix, PersistentBitMatrixBuilder, pack_bit_matrix}; +pub use bitmatrix::{BitColView, PersistentBitMatrix, PersistentBitMatrixBuilder, pack_bit_matrix}; pub use builder::PersistentCompactIntVecBuilder; -pub use intmatrix::{PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder, pack_compact_int_matrix}; +pub use intmatrix::{IntColView, PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder, pack_compact_int_matrix}; pub use layer_meta::LayerMeta; pub use memoryintvec::{MemoryIntIter, MemoryIntVec}; pub use memoryvec::MemoryBitVec; diff --git a/src/obicompactvec/src/tests/bitmatrix.rs b/src/obicompactvec/src/tests/bitmatrix.rs index dced37f..5d93222 100644 --- a/src/obicompactvec/src/tests/bitmatrix.rs +++ b/src/obicompactvec/src/tests/bitmatrix.rs @@ -1,6 +1,6 @@ use tempfile::tempdir; -use crate::{PersistentBitMatrix, PersistentBitMatrixBuilder}; +use crate::{pack_bit_matrix, PersistentBitMatrix, PersistentBitMatrixBuilder}; use crate::traits::{BitPartials, BitSlice, BitSliceMut}; fn make_matrix(cols: &[&[bool]]) -> (tempfile::TempDir, PersistentBitMatrix) { @@ -203,3 +203,57 @@ fn partial_hamming_matches_hamming() { let full = m.hamming_dist_matrix(); assert_eq!(partial, full); } + +// ── col_view on Packed ──────────────────────────────────────────────────────── + +#[test] +fn col_view_packed_values() { + let (dir, _) = make_matrix(&[ + &[true, false, true, true], + &[false, true, false, true], + ]); + pack_bit_matrix(&dir.path().join("presence")).unwrap(); + let m = PersistentBitMatrix::open(dir.path()).unwrap(); + + // col 0: [T, F, T, T] + let v0 = m.col_view(0); + assert_eq!(v0.len(), 4); + assert_eq!(v0.get(0), true); + assert_eq!(v0.get(1), false); + assert_eq!(v0.get(2), true); + assert_eq!(v0.get(3), true); + assert_eq!(v0.count_ones(), 3); + + // col 1: [F, T, F, T] + let v1 = m.col_view(1); + assert_eq!(v1.get(0), false); + assert_eq!(v1.get(1), true); + assert_eq!(v1.get(2), false); + assert_eq!(v1.get(3), true); + assert_eq!(v1.count_ones(), 2); +} + +#[test] +fn col_view_packed_matches_columnar() { + let data: &[&[bool]] = &[ + &[true, false, true, false, true, true, false, true], + &[false, false, true, true, false, true, true, false], + &[true, true, true, false, false, false, true, true], + ]; + let (dir_col, m_col) = make_matrix(data); + let (dir_pack, _) = make_matrix(data); + pack_bit_matrix(&dir_pack.path().join("presence")).unwrap(); + let m_pack = PersistentBitMatrix::open(dir_pack.path()).unwrap(); + + for c in 0..data.len() { + let col_ref = m_col.col(c); + let col_view = m_pack.col_view(c); + assert_eq!(col_view.len(), col_ref.len(), "col={c} len"); + for s in 0..col_ref.len() { + assert_eq!(col_view.get(s), col_ref.get(s), "col={c} slot={s}"); + } + assert_eq!(col_view.count_ones(), col_ref.count_ones(), "col={c} count_ones"); + assert_eq!(col_view.words(), col_ref.words(), "col={c} words"); + } + drop(dir_col); +} diff --git a/src/obicompactvec/src/tests/intmatrix.rs b/src/obicompactvec/src/tests/intmatrix.rs index c4c0a98..d9869aa 100644 --- a/src/obicompactvec/src/tests/intmatrix.rs +++ b/src/obicompactvec/src/tests/intmatrix.rs @@ -1,7 +1,7 @@ use tempfile::tempdir; -use crate::{PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder}; -use crate::traits::CountPartials; +use crate::{pack_compact_int_matrix, PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder}; +use crate::traits::{CountPartials, IntSlice}; fn make_matrix(cols: &[&[u32]]) -> (tempfile::TempDir, PersistentCompactIntMatrix) { let n = cols.first().map_or(0, |c| c.len()); @@ -243,6 +243,61 @@ fn partial_hellinger_matches_full() { } } +#[test] +fn col_view_packed_values() { + // Build Columnar with overflow values (≥ 255), pack, reopen as Packed, exercise col_view(). + let (dir, _col) = make_matrix(&[&[10, 300, 500], &[200, 50, 1000]]); + pack_compact_int_matrix(&dir.path().join("counts")).unwrap(); + let m = PersistentCompactIntMatrix::open(dir.path()).unwrap(); + + // col 0: [10, 300, 500] — two overflow slots + let v0 = m.col_view(0); + assert_eq!(v0.get(0), 10); + assert_eq!(v0.get(1), 300); + assert_eq!(v0.get(2), 500); + assert_eq!(v0.sum(), 810); + assert_eq!(v0.count_nonzero(), 3); + let mut ov0: Vec<(usize, u32)> = v0.overflow_entries().collect(); + ov0.sort_unstable_by_key(|&(s, _)| s); + assert_eq!(ov0, vec![(1, 300), (2, 500)]); + + // col 1: [200, 50, 1000] — one overflow slot + let v1 = m.col_view(1); + assert_eq!(v1.get(0), 200); + assert_eq!(v1.get(1), 50); + assert_eq!(v1.get(2), 1000); + let mut ov1: Vec<(usize, u32)> = v1.overflow_entries().collect(); + ov1.sort_unstable_by_key(|&(s, _)| s); + assert_eq!(ov1, vec![(2, 1000)]); +} + +#[test] +fn col_view_packed_matches_columnar() { + // Same data, compare col_view() on Packed against col() on Columnar slot-by-slot. + let data: &[&[u32]] = &[&[0, 255, 1, 300, 128], &[500, 3, 0, 700, 42]]; + let (dir_col, m_col) = make_matrix(data); + // Re-build in a separate dir so we can pack without touching m_col's files. + let (dir_pack, _) = make_matrix(data); + pack_compact_int_matrix(&dir_pack.path().join("counts")).unwrap(); + let m_pack = PersistentCompactIntMatrix::open(dir_pack.path()).unwrap(); + + for c in 0..data.len() { + let col_ref = m_col.col(c); + let col_view = m_pack.col_view(c); + assert_eq!(col_view.len(), col_ref.len()); + for s in 0..col_ref.len() { + assert_eq!(col_view.get(s), col_ref.get(s), "col={c} slot={s}"); + } + assert_eq!(col_view.sum(), col_ref.sum(), "col={c} sum"); + let mut ov_view: Vec<(usize, u32)> = col_view.overflow_entries().collect(); + let mut ov_ref: Vec<(usize, u32)> = col_ref.overflow_entries().collect(); + ov_view.sort_unstable_by_key(|&(s, _)| s); + ov_ref.sort_unstable_by_key(|&(s, _)| s); + assert_eq!(ov_view, ov_ref, "col={c} overflow_entries"); + } + drop(dir_col); +} + #[test] fn partial_relfreq_bray_additive_across_split() { // Split rows [1,2,3,4,5] between two matrices; partial sums should add up.