feat(matrix): add partial group reductions and column persistence

Expands MatrixGroupOps with partial_group_min/max helpers for bitwise reductions and introduces add_col_from methods to persist external vectors as matrix columns. Refactors column aggregation in the partitioner to leverage these group operations directly, replacing iterative row processing with simplified builder lifecycle management and explicit metadata serialization.
This commit is contained in:
Eric Coissac
2026-06-18 07:34:29 +02:00
parent 7eea71fdcd
commit 4c4524766c
5 changed files with 206 additions and 152 deletions
+40
View File
@@ -402,6 +402,26 @@ impl PersistentBitMatrixBuilder {
PersistentBitVecBuilder::new(self.n, &path)
}
pub fn add_col_from(&mut self, src: &TempBitVec) -> io::Result<()> {
src.make_persistent(&col_path(&self.dir, self.n_cols))?;
self.n_cols += 1;
Ok(())
}
pub fn add_col_from_int(&mut self, src: &TempCompactIntVec) -> io::Result<()> {
let path = col_path(&self.dir, self.n_cols);
self.n_cols += 1;
let mut b = PersistentBitVecBuilder::new(self.n, &path)?;
let view = src.view();
for slot in 0..self.n {
if view.primary_bytes()[slot] > 0 { b.set(slot, true); }
}
for (slot, _) in view.overflow_entries() {
b.set(slot, true);
}
b.close()
}
pub fn close(self) -> io::Result<()> {
MatrixMeta { n: self.n, n_cols: self.n_cols }.save(&self.dir)
}
@@ -446,6 +466,26 @@ impl MatrixGroupOps for PersistentBitMatrix {
}
result.freeze()
}
fn partial_group_min(&self, g: &ColGroup) -> io::Result<TempCompactIntVec> {
// min of 0/1 values = AND: 1 only if ALL columns are 1
let n = self.n();
let mut result = TempCompactIntVecBuilder::new(n)?;
if let Some((&first, rest)) = g.indices.split_first() {
result.inc_present_fast(self.col_view(first));
for &c in rest { result.mask_with(self.col_view(c)); }
}
result.freeze()
}
fn partial_group_max(&self, g: &ColGroup) -> io::Result<TempCompactIntVec> {
// max of 0/1 values = OR: 1 if any column is 1
let any = self.partial_group_any(g, 1)?;
let n = any.len();
let mut result = TempCompactIntVecBuilder::new(n)?;
result.inc_present(any.view());
result.freeze()
}
}
// ── Shared matrix helpers (also used by intmatrix.rs) ─────────────────────────
+38 -7
View File
@@ -1,6 +1,6 @@
use std::io;
use crate::tempbitvec::TempBitVec;
use crate::tempbitvec::{TempBitVec, TempBitVecBuilder};
use crate::tempintvec::TempCompactIntVec;
// ── ColGroup ──────────────────────────────────────────────────────────────────
@@ -23,12 +23,14 @@ impl ColGroup {
// ── MatrixGroupOps ────────────────────────────────────────────────────────────
/// Per-matrix group aggregations that return **additive intermediates**.
/// Per-matrix group aggregations.
///
/// Results must be composed by the caller (concat across partitions, add across
/// layers) before applying final predicates (`geq`, `leq`, …). Non-additive
/// predicates like `group_all` or `group_at_least(k)` are intentionally absent
/// — they are derived at the index level from these intermediates.
/// `partial_group_presence_count`, `partial_group_sum`, `partial_group_any`,
/// `partial_group_min`, `partial_group_max` are the primitives; each impl must
/// provide all five.
///
/// `partial_group_all` and `partial_group_none` have default implementations
/// derived from `partial_group_presence_count` and should rarely need overriding.
pub trait MatrixGroupOps {
/// Per-slot count of group columns whose value ≥ `threshold`.
fn partial_group_presence_count(&self, g: &ColGroup, threshold: u32) -> io::Result<TempCompactIntVec>;
@@ -36,6 +38,35 @@ pub trait MatrixGroupOps {
/// Per-slot sum of values across all group columns.
fn partial_group_sum(&self, g: &ColGroup) -> io::Result<TempCompactIntVec>;
/// Per-slot OR: true if any group column has value ≥ `threshold`.
/// Per-slot OR: 1 if any group column has value ≥ `threshold`.
fn partial_group_any(&self, g: &ColGroup, threshold: u32) -> io::Result<TempBitVec>;
/// Per-slot min value across all group columns (0 if group is empty).
fn partial_group_min(&self, g: &ColGroup) -> io::Result<TempCompactIntVec>;
/// Per-slot max value across all group columns (0 if group is empty).
fn partial_group_max(&self, g: &ColGroup) -> io::Result<TempCompactIntVec>;
/// Per-slot AND: 1 if ALL group columns have value ≥ `threshold`.
fn partial_group_all(&self, g: &ColGroup, threshold: u32) -> io::Result<TempBitVec> {
let counts = self.partial_group_presence_count(g, threshold)?;
let n = counts.len();
let n_required = g.indices.len() as u32;
let mut b = TempBitVecBuilder::new(n)?;
for slot in 0..n {
if counts.get(slot) >= n_required { b.set(slot, true); }
}
b.freeze()
}
/// Per-slot NOR: 1 if NO group column has value ≥ `threshold`.
fn partial_group_none(&self, g: &ColGroup, threshold: u32) -> io::Result<TempBitVec> {
let counts = self.partial_group_presence_count(g, threshold)?;
let n = counts.len();
let mut b = TempBitVecBuilder::new(n)?;
for slot in 0..n {
if counts.get(slot) == 0 { b.set(slot, true); }
}
b.freeze()
}
}
+32
View File
@@ -386,6 +386,21 @@ impl PersistentCompactIntMatrixBuilder {
self.n_cols += 1;
PersistentCompactIntVecBuilder::new(self.n, &path)
}
pub fn add_col_from(&mut self, src: &TempCompactIntVec) -> io::Result<()> {
src.make_persistent(&col_path(&self.dir, self.n_cols))?;
self.n_cols += 1;
Ok(())
}
pub fn add_col_from_bit(&mut self, src: &TempBitVec) -> io::Result<()> {
let path = col_path(&self.dir, self.n_cols);
self.n_cols += 1;
let mut b = PersistentCompactIntVecBuilder::new(self.n, &path)?;
b.inc_present(src.view());
b.close()
}
pub fn close(self) -> io::Result<()> {
MatrixMeta { n: self.n, n_cols: self.n_cols }.save(&self.dir)
}
@@ -431,4 +446,21 @@ impl MatrixGroupOps for PersistentCompactIntMatrix {
}
result.freeze()
}
fn partial_group_min(&self, g: &ColGroup) -> io::Result<TempCompactIntVec> {
let n = self.n();
let mut result = TempCompactIntVecBuilder::new(n)?;
if let Some((&first, rest)) = g.indices.split_first() {
result.add(self.col_view(first));
for &c in rest { result.min(self.col_view(c)); }
}
result.freeze()
}
fn partial_group_max(&self, g: &ColGroup) -> io::Result<TempCompactIntVec> {
let n = self.n();
let mut result = TempCompactIntVecBuilder::new(n)?;
for &c in &g.indices { result.max(self.col_view(c)); }
result.freeze()
}
}