Push mtzqmmrlmzzx #34
@@ -183,6 +183,26 @@ impl BitSlice for PackedCol<'_> {
|
|||||||
fn words(&self) -> &[u64] { self.words }
|
fn words(&self) -> &[u64] { self.words }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── BitColView — uniform column access across Columnar and Packed ─────────────
|
||||||
|
|
||||||
|
enum BitColViewInner<'a> {
|
||||||
|
Columnar(&'a PersistentBitVec),
|
||||||
|
Packed(PackedCol<'a>),
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Opaque column view returned by [`PersistentBitMatrix::col_view`].
|
||||||
|
/// Implements [`BitSlice`] uniformly for both Columnar and Packed matrix formats.
|
||||||
|
pub struct BitColView<'a>(BitColViewInner<'a>);
|
||||||
|
|
||||||
|
impl BitSlice for BitColView<'_> {
|
||||||
|
fn len(&self) -> usize {
|
||||||
|
match &self.0 { BitColViewInner::Columnar(c) => c.len(), BitColViewInner::Packed(c) => c.len() }
|
||||||
|
}
|
||||||
|
fn words(&self) -> &[u64] {
|
||||||
|
match &self.0 { BitColViewInner::Columnar(c) => c.words(), BitColViewInner::Packed(c) => c.words() }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Build `presence/matrix.pbmx` from existing `col_*.pbiv` files.
|
/// Build `presence/matrix.pbmx` from existing `col_*.pbiv` files.
|
||||||
pub fn pack_bit_matrix(dir: &Path) -> io::Result<()> {
|
pub fn pack_bit_matrix(dir: &Path) -> io::Result<()> {
|
||||||
let packed_path = dir.join("matrix.pbmx");
|
let packed_path = dir.join("matrix.pbmx");
|
||||||
@@ -298,6 +318,14 @@ impl PersistentBitMatrix {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn col_view(&self, c: usize) -> BitColView<'_> {
|
||||||
|
match self {
|
||||||
|
Self::Columnar(m) => BitColView(BitColViewInner::Columnar(m.col(c))),
|
||||||
|
Self::Packed(m) => BitColView(BitColViewInner::Packed(m.col_slice(c))),
|
||||||
|
Self::Implicit { .. } => panic!("col_view() not available on Implicit PersistentBitMatrix"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn col_persist(&self, c: usize, path: &Path) -> io::Result<PersistentBitVecBuilder> {
|
pub fn col_persist(&self, c: usize, path: &Path) -> io::Result<PersistentBitVecBuilder> {
|
||||||
match self {
|
match self {
|
||||||
Self::Columnar(m) => PersistentBitVecBuilder::build_from(m.col(c), path),
|
Self::Columnar(m) => PersistentBitVecBuilder::build_from(m.col(c), path),
|
||||||
|
|||||||
@@ -224,6 +224,43 @@ impl Iterator for PackedIntColIter<'_> {
|
|||||||
|
|
||||||
impl ExactSizeIterator for PackedIntColIter<'_> {}
|
impl ExactSizeIterator for PackedIntColIter<'_> {}
|
||||||
|
|
||||||
|
// ── IntColView — uniform column access across Columnar and Packed ─────────────
|
||||||
|
|
||||||
|
enum IntColViewInner<'a> {
|
||||||
|
Columnar(&'a PersistentCompactIntVec),
|
||||||
|
Packed(PackedIntCol<'a>),
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Opaque column view returned by [`PersistentCompactIntMatrix::col_view`].
|
||||||
|
/// Implements [`IntSlice`] uniformly for both Columnar and Packed matrix formats.
|
||||||
|
pub struct IntColView<'a>(IntColViewInner<'a>);
|
||||||
|
|
||||||
|
impl IntSlice for IntColView<'_> {
|
||||||
|
fn len(&self) -> usize {
|
||||||
|
match &self.0 { IntColViewInner::Columnar(c) => c.len(), IntColViewInner::Packed(c) => c.len() }
|
||||||
|
}
|
||||||
|
fn get(&self, slot: usize) -> u32 {
|
||||||
|
match &self.0 { IntColViewInner::Columnar(c) => c.get(slot), IntColViewInner::Packed(c) => c.get(slot) }
|
||||||
|
}
|
||||||
|
fn primary_bytes(&self) -> &[u8] {
|
||||||
|
match &self.0 { IntColViewInner::Columnar(c) => c.primary_bytes(), IntColViewInner::Packed(c) => c.primary_bytes() }
|
||||||
|
}
|
||||||
|
fn overflow_entries(&self) -> impl Iterator<Item = (usize, u32)> + '_ {
|
||||||
|
// Box<dyn Iterator> implements Iterator, satisfying RPITIT across two distinct types.
|
||||||
|
let it: Box<dyn Iterator<Item = (usize, u32)> + '_> = match &self.0 {
|
||||||
|
IntColViewInner::Columnar(c) => Box::new(c.overflow_entries()),
|
||||||
|
IntColViewInner::Packed(c) => Box::new(c.overflow_entries()),
|
||||||
|
};
|
||||||
|
it
|
||||||
|
}
|
||||||
|
fn sum(&self) -> u64 {
|
||||||
|
match &self.0 { IntColViewInner::Columnar(c) => c.sum(), IntColViewInner::Packed(c) => c.sum() }
|
||||||
|
}
|
||||||
|
fn count_nonzero(&self) -> u64 {
|
||||||
|
match &self.0 { IntColViewInner::Columnar(c) => c.count_nonzero(), IntColViewInner::Packed(c) => c.count_nonzero() }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// ─────────────────────────────────────────────────────────────────────────────
|
// ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
pub struct PackedCompactIntMatrix {
|
pub struct PackedCompactIntMatrix {
|
||||||
@@ -481,6 +518,13 @@ impl PersistentCompactIntMatrix {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn col_view(&self, c: usize) -> IntColView<'_> {
|
||||||
|
match self {
|
||||||
|
Self::Columnar(m) => IntColView(IntColViewInner::Columnar(m.col(c))),
|
||||||
|
Self::Packed(m) => IntColView(IntColViewInner::Packed(m.col_slice(c))),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn col_persist(&self, c: usize, path: &Path) -> io::Result<PersistentCompactIntVecBuilder> {
|
pub fn col_persist(&self, c: usize, path: &Path) -> io::Result<PersistentCompactIntVecBuilder> {
|
||||||
match self {
|
match self {
|
||||||
Self::Columnar(m) => PersistentCompactIntVecBuilder::build_from(m.col(c), path),
|
Self::Columnar(m) => PersistentCompactIntVecBuilder::build_from(m.col(c), path),
|
||||||
|
|||||||
@@ -11,9 +11,9 @@ mod reader;
|
|||||||
pub mod traits;
|
pub mod traits;
|
||||||
|
|
||||||
pub use bitvec::{BitIter, PersistentBitVec, PersistentBitVecBuilder};
|
pub use bitvec::{BitIter, PersistentBitVec, PersistentBitVecBuilder};
|
||||||
pub use bitmatrix::{PersistentBitMatrix, PersistentBitMatrixBuilder, pack_bit_matrix};
|
pub use bitmatrix::{BitColView, PersistentBitMatrix, PersistentBitMatrixBuilder, pack_bit_matrix};
|
||||||
pub use builder::PersistentCompactIntVecBuilder;
|
pub use builder::PersistentCompactIntVecBuilder;
|
||||||
pub use intmatrix::{PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder, pack_compact_int_matrix};
|
pub use intmatrix::{IntColView, PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder, pack_compact_int_matrix};
|
||||||
pub use layer_meta::LayerMeta;
|
pub use layer_meta::LayerMeta;
|
||||||
pub use memoryintvec::{MemoryIntIter, MemoryIntVec};
|
pub use memoryintvec::{MemoryIntIter, MemoryIntVec};
|
||||||
pub use memoryvec::MemoryBitVec;
|
pub use memoryvec::MemoryBitVec;
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
use tempfile::tempdir;
|
use tempfile::tempdir;
|
||||||
|
|
||||||
use crate::{PersistentBitMatrix, PersistentBitMatrixBuilder};
|
use crate::{pack_bit_matrix, PersistentBitMatrix, PersistentBitMatrixBuilder};
|
||||||
use crate::traits::{BitPartials, BitSlice, BitSliceMut};
|
use crate::traits::{BitPartials, BitSlice, BitSliceMut};
|
||||||
|
|
||||||
fn make_matrix(cols: &[&[bool]]) -> (tempfile::TempDir, PersistentBitMatrix) {
|
fn make_matrix(cols: &[&[bool]]) -> (tempfile::TempDir, PersistentBitMatrix) {
|
||||||
@@ -203,3 +203,57 @@ fn partial_hamming_matches_hamming() {
|
|||||||
let full = m.hamming_dist_matrix();
|
let full = m.hamming_dist_matrix();
|
||||||
assert_eq!(partial, full);
|
assert_eq!(partial, full);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── col_view on Packed ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn col_view_packed_values() {
|
||||||
|
let (dir, _) = make_matrix(&[
|
||||||
|
&[true, false, true, true],
|
||||||
|
&[false, true, false, true],
|
||||||
|
]);
|
||||||
|
pack_bit_matrix(&dir.path().join("presence")).unwrap();
|
||||||
|
let m = PersistentBitMatrix::open(dir.path()).unwrap();
|
||||||
|
|
||||||
|
// col 0: [T, F, T, T]
|
||||||
|
let v0 = m.col_view(0);
|
||||||
|
assert_eq!(v0.len(), 4);
|
||||||
|
assert_eq!(v0.get(0), true);
|
||||||
|
assert_eq!(v0.get(1), false);
|
||||||
|
assert_eq!(v0.get(2), true);
|
||||||
|
assert_eq!(v0.get(3), true);
|
||||||
|
assert_eq!(v0.count_ones(), 3);
|
||||||
|
|
||||||
|
// col 1: [F, T, F, T]
|
||||||
|
let v1 = m.col_view(1);
|
||||||
|
assert_eq!(v1.get(0), false);
|
||||||
|
assert_eq!(v1.get(1), true);
|
||||||
|
assert_eq!(v1.get(2), false);
|
||||||
|
assert_eq!(v1.get(3), true);
|
||||||
|
assert_eq!(v1.count_ones(), 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn col_view_packed_matches_columnar() {
|
||||||
|
let data: &[&[bool]] = &[
|
||||||
|
&[true, false, true, false, true, true, false, true],
|
||||||
|
&[false, false, true, true, false, true, true, false],
|
||||||
|
&[true, true, true, false, false, false, true, true],
|
||||||
|
];
|
||||||
|
let (dir_col, m_col) = make_matrix(data);
|
||||||
|
let (dir_pack, _) = make_matrix(data);
|
||||||
|
pack_bit_matrix(&dir_pack.path().join("presence")).unwrap();
|
||||||
|
let m_pack = PersistentBitMatrix::open(dir_pack.path()).unwrap();
|
||||||
|
|
||||||
|
for c in 0..data.len() {
|
||||||
|
let col_ref = m_col.col(c);
|
||||||
|
let col_view = m_pack.col_view(c);
|
||||||
|
assert_eq!(col_view.len(), col_ref.len(), "col={c} len");
|
||||||
|
for s in 0..col_ref.len() {
|
||||||
|
assert_eq!(col_view.get(s), col_ref.get(s), "col={c} slot={s}");
|
||||||
|
}
|
||||||
|
assert_eq!(col_view.count_ones(), col_ref.count_ones(), "col={c} count_ones");
|
||||||
|
assert_eq!(col_view.words(), col_ref.words(), "col={c} words");
|
||||||
|
}
|
||||||
|
drop(dir_col);
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
use tempfile::tempdir;
|
use tempfile::tempdir;
|
||||||
|
|
||||||
use crate::{PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder};
|
use crate::{pack_compact_int_matrix, PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder};
|
||||||
use crate::traits::CountPartials;
|
use crate::traits::{CountPartials, IntSlice};
|
||||||
|
|
||||||
fn make_matrix(cols: &[&[u32]]) -> (tempfile::TempDir, PersistentCompactIntMatrix) {
|
fn make_matrix(cols: &[&[u32]]) -> (tempfile::TempDir, PersistentCompactIntMatrix) {
|
||||||
let n = cols.first().map_or(0, |c| c.len());
|
let n = cols.first().map_or(0, |c| c.len());
|
||||||
@@ -243,6 +243,61 @@ fn partial_hellinger_matches_full() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn col_view_packed_values() {
|
||||||
|
// Build Columnar with overflow values (≥ 255), pack, reopen as Packed, exercise col_view().
|
||||||
|
let (dir, _col) = make_matrix(&[&[10, 300, 500], &[200, 50, 1000]]);
|
||||||
|
pack_compact_int_matrix(&dir.path().join("counts")).unwrap();
|
||||||
|
let m = PersistentCompactIntMatrix::open(dir.path()).unwrap();
|
||||||
|
|
||||||
|
// col 0: [10, 300, 500] — two overflow slots
|
||||||
|
let v0 = m.col_view(0);
|
||||||
|
assert_eq!(v0.get(0), 10);
|
||||||
|
assert_eq!(v0.get(1), 300);
|
||||||
|
assert_eq!(v0.get(2), 500);
|
||||||
|
assert_eq!(v0.sum(), 810);
|
||||||
|
assert_eq!(v0.count_nonzero(), 3);
|
||||||
|
let mut ov0: Vec<(usize, u32)> = v0.overflow_entries().collect();
|
||||||
|
ov0.sort_unstable_by_key(|&(s, _)| s);
|
||||||
|
assert_eq!(ov0, vec![(1, 300), (2, 500)]);
|
||||||
|
|
||||||
|
// col 1: [200, 50, 1000] — one overflow slot
|
||||||
|
let v1 = m.col_view(1);
|
||||||
|
assert_eq!(v1.get(0), 200);
|
||||||
|
assert_eq!(v1.get(1), 50);
|
||||||
|
assert_eq!(v1.get(2), 1000);
|
||||||
|
let mut ov1: Vec<(usize, u32)> = v1.overflow_entries().collect();
|
||||||
|
ov1.sort_unstable_by_key(|&(s, _)| s);
|
||||||
|
assert_eq!(ov1, vec![(2, 1000)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn col_view_packed_matches_columnar() {
|
||||||
|
// Same data, compare col_view() on Packed against col() on Columnar slot-by-slot.
|
||||||
|
let data: &[&[u32]] = &[&[0, 255, 1, 300, 128], &[500, 3, 0, 700, 42]];
|
||||||
|
let (dir_col, m_col) = make_matrix(data);
|
||||||
|
// Re-build in a separate dir so we can pack without touching m_col's files.
|
||||||
|
let (dir_pack, _) = make_matrix(data);
|
||||||
|
pack_compact_int_matrix(&dir_pack.path().join("counts")).unwrap();
|
||||||
|
let m_pack = PersistentCompactIntMatrix::open(dir_pack.path()).unwrap();
|
||||||
|
|
||||||
|
for c in 0..data.len() {
|
||||||
|
let col_ref = m_col.col(c);
|
||||||
|
let col_view = m_pack.col_view(c);
|
||||||
|
assert_eq!(col_view.len(), col_ref.len());
|
||||||
|
for s in 0..col_ref.len() {
|
||||||
|
assert_eq!(col_view.get(s), col_ref.get(s), "col={c} slot={s}");
|
||||||
|
}
|
||||||
|
assert_eq!(col_view.sum(), col_ref.sum(), "col={c} sum");
|
||||||
|
let mut ov_view: Vec<(usize, u32)> = col_view.overflow_entries().collect();
|
||||||
|
let mut ov_ref: Vec<(usize, u32)> = col_ref.overflow_entries().collect();
|
||||||
|
ov_view.sort_unstable_by_key(|&(s, _)| s);
|
||||||
|
ov_ref.sort_unstable_by_key(|&(s, _)| s);
|
||||||
|
assert_eq!(ov_view, ov_ref, "col={c} overflow_entries");
|
||||||
|
}
|
||||||
|
drop(dir_col);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn partial_relfreq_bray_additive_across_split() {
|
fn partial_relfreq_bray_additive_across_split() {
|
||||||
// Split rows [1,2,3,4,5] between two matrices; partial sums should add up.
|
// Split rows [1,2,3,4,5] between two matrices; partial sums should add up.
|
||||||
|
|||||||
Reference in New Issue
Block a user