4c4524766c
Expands MatrixGroupOps with partial_group_min/max helpers for bitwise reductions and introduces add_col_from methods to persist external vectors as matrix columns. Refactors column aggregation in the partitioner to leverage these group operations directly, replacing iterative row processing with simplified builder lifecycle management and explicit metadata serialization.
467 lines
20 KiB
Rust
467 lines
20 KiB
Rust
use std::fs::{self, File};
|
|
use std::io::{self, BufWriter, Write as _};
|
|
use std::path::{Path, PathBuf};
|
|
|
|
use memmap2::Mmap;
|
|
use ndarray::{Array1, Array2};
|
|
use rayon::prelude::*;
|
|
|
|
use crate::bitmatrix::{pairwise_matrix, pairwise2_matrix};
|
|
use crate::builder::PersistentCompactIntVecBuilder;
|
|
use crate::colgroup::{ColGroup, MatrixGroupOps};
|
|
use crate::format::{HEADER_SIZE, OVERFLOW_ENTRY_SIZE};
|
|
use crate::meta::MatrixMeta;
|
|
use crate::reader::PersistentCompactIntVec;
|
|
use crate::tempbitvec::{TempBitVec, TempBitVecBuilder};
|
|
use crate::tempintvec::{TempCompactIntVec, TempCompactIntVecBuilder};
|
|
use crate::views::IntSliceView;
|
|
|
|
fn col_path(dir: &Path, col: usize) -> PathBuf {
|
|
dir.join(format!("col_{col:06}.pciv"))
|
|
}
|
|
|
|
// ── ColumnarCompactIntMatrix ──────────────────────────────────────────────────
|
|
|
|
pub struct ColumnarCompactIntMatrix {
|
|
cols: Vec<PersistentCompactIntVec>,
|
|
n: usize,
|
|
}
|
|
|
|
impl ColumnarCompactIntMatrix {
|
|
pub(crate) fn open(dir: &Path) -> io::Result<Self> {
|
|
let meta = MatrixMeta::load(dir)?;
|
|
let cols = (0..meta.n_cols)
|
|
.map(|c| PersistentCompactIntVec::open(&col_path(dir, c)))
|
|
.collect::<io::Result<Vec<_>>>()?;
|
|
Ok(Self { cols, n: meta.n })
|
|
}
|
|
|
|
pub(crate) fn n(&self) -> usize { self.n }
|
|
pub(crate) fn n_cols(&self) -> usize { self.cols.len() }
|
|
pub(crate) fn col(&self, c: usize) -> &PersistentCompactIntVec { &self.cols[c] }
|
|
|
|
pub(crate) fn row(&self, slot: usize) -> Box<[u32]> {
|
|
self.cols.iter().map(|c| c.get(slot)).collect()
|
|
}
|
|
|
|
pub(crate) fn fill_row(&self, slot: usize, buf: &mut [u32]) {
|
|
for (c, col) in self.cols.iter().enumerate() { buf[c] = col.get(slot); }
|
|
}
|
|
|
|
pub(crate) fn sum(&self) -> Array1<u64> {
|
|
let sums: Vec<u64> = (0..self.n_cols())
|
|
.into_par_iter()
|
|
.map(|c| self.col(c).sum())
|
|
.collect();
|
|
Array1::from_vec(sums)
|
|
}
|
|
|
|
pub(crate) fn count_nonzero(&self) -> Array1<u64> {
|
|
let counts: Vec<u64> = (0..self.n_cols())
|
|
.into_par_iter()
|
|
.map(|c| self.col(c).count_nonzero())
|
|
.collect();
|
|
Array1::from_vec(counts)
|
|
}
|
|
|
|
pub(crate) fn partial_bray_dist_matrix(&self) -> Array2<u64> {
|
|
pairwise_matrix(self.n_cols(), |i, j| self.col(i).partial_bray_dist(self.col(j)))
|
|
}
|
|
pub(crate) fn partial_euclidean_dist_matrix(&self) -> Array2<f64> {
|
|
pairwise_matrix(self.n_cols(), |i, j| self.col(i).partial_euclidean_dist(self.col(j)))
|
|
}
|
|
pub(crate) fn partial_threshold_jaccard_dist_matrix(&self, threshold: u32) -> (Array2<u64>, Array2<u64>) {
|
|
pairwise2_matrix(self.n_cols(), |i, j| self.col(i).partial_threshold_jaccard_dist(self.col(j), threshold))
|
|
}
|
|
pub(crate) fn partial_relfreq_bray_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> {
|
|
pairwise_matrix(self.n_cols(), |i, j| {
|
|
self.col(i).partial_relfreq_bray_dist(self.col(j), col_sums[i] as f64, col_sums[j] as f64)
|
|
})
|
|
}
|
|
pub(crate) fn partial_relfreq_euclidean_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> {
|
|
pairwise_matrix(self.n_cols(), |i, j| {
|
|
self.col(i).partial_relfreq_euclidean_dist(self.col(j), col_sums[i] as f64, col_sums[j] as f64)
|
|
})
|
|
}
|
|
pub(crate) fn partial_hellinger_euclidean_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> {
|
|
pairwise_matrix(self.n_cols(), |i, j| {
|
|
self.col(i).partial_hellinger_euclidean_dist(self.col(j), col_sums[i] as f64, col_sums[j] as f64)
|
|
})
|
|
}
|
|
|
|
pub(crate) fn append_column(dir: &Path, value_of: impl Fn(usize) -> u32) -> io::Result<()> {
|
|
let mut meta = MatrixMeta::load(dir)?;
|
|
let mut b = PersistentCompactIntVecBuilder::new(meta.n, &col_path(dir, meta.n_cols))?;
|
|
for slot in 0..meta.n { b.set(slot, value_of(slot)); }
|
|
b.close()?;
|
|
meta.n_cols += 1;
|
|
meta.save(dir)
|
|
}
|
|
}
|
|
|
|
// ── PackedCompactIntMatrix ────────────────────────────────────────────────────
|
|
|
|
const PCMX_MAGIC: [u8; 4] = *b"PCMX";
|
|
const PCMX_HEADER: usize = 24; // magic(4) + pad(4) + n_rows(8) + n_cols(8)
|
|
|
|
struct ColInfo {
|
|
primary_start: usize,
|
|
data_offset: usize,
|
|
n_overflow: usize,
|
|
}
|
|
|
|
pub struct PackedCompactIntMatrix {
|
|
mmap: Mmap,
|
|
n_rows: usize,
|
|
n_cols: usize,
|
|
columns: Vec<ColInfo>,
|
|
}
|
|
|
|
impl PackedCompactIntMatrix {
|
|
pub(crate) fn open(path: &Path) -> io::Result<Self> {
|
|
let mmap = unsafe { Mmap::map(&File::open(path)?)? };
|
|
if mmap.len() < PCMX_HEADER {
|
|
return Err(io::Error::new(io::ErrorKind::InvalidData, "PCMX file too short"));
|
|
}
|
|
if &mmap[0..4] != &PCMX_MAGIC {
|
|
return Err(io::Error::new(io::ErrorKind::InvalidData, "bad PCMX magic"));
|
|
}
|
|
let n_rows = u64::from_le_bytes(mmap[8..16].try_into().unwrap()) as usize;
|
|
let n_cols = u64::from_le_bytes(mmap[16..24].try_into().unwrap()) as usize;
|
|
|
|
let mut columns = Vec::with_capacity(n_cols);
|
|
for c in 0..n_cols {
|
|
let off_pos = PCMX_HEADER + c * 8;
|
|
let col_base = u64::from_le_bytes(mmap[off_pos..off_pos+8].try_into().unwrap()) as usize;
|
|
let n_ov = u64::from_le_bytes(mmap[col_base+16..col_base+24].try_into().unwrap()) as usize;
|
|
let n_pciv = u64::from_le_bytes(mmap[col_base+8..col_base+16].try_into().unwrap()) as usize;
|
|
let primary_start = col_base + HEADER_SIZE;
|
|
let data_offset = primary_start + n_pciv;
|
|
columns.push(ColInfo { primary_start, data_offset, n_overflow: n_ov });
|
|
}
|
|
Ok(Self { mmap, n_rows, n_cols, columns })
|
|
}
|
|
|
|
pub(crate) fn col_view(&self, c: usize) -> IntSliceView<'_> {
|
|
let ci = &self.columns[c];
|
|
let primary = &self.mmap[ci.primary_start..ci.primary_start + self.n_rows];
|
|
let overflow_raw = &self.mmap[ci.data_offset..ci.data_offset + ci.n_overflow * OVERFLOW_ENTRY_SIZE];
|
|
IntSliceView::new(primary, overflow_raw, ci.n_overflow, self.n_rows)
|
|
}
|
|
|
|
pub(crate) fn col_persist(&self, c: usize, path: &Path) -> io::Result<PersistentCompactIntVecBuilder> {
|
|
let view = self.col_view(c);
|
|
let overflow: std::collections::HashMap<usize, u32> = view.overflow_entries().collect();
|
|
PersistentCompactIntVecBuilder::from_raw_primary(view.primary_bytes(), overflow, path)
|
|
}
|
|
|
|
#[inline]
|
|
pub(crate) fn get(&self, col: usize, slot: usize) -> u32 { self.col_view(col).get(slot) }
|
|
|
|
pub(crate) fn fill_row(&self, slot: usize, buf: &mut [u32]) {
|
|
for c in 0..self.n_cols { buf[c] = self.get(c, slot); }
|
|
}
|
|
|
|
pub(crate) fn row(&self, slot: usize) -> Box<[u32]> {
|
|
(0..self.n_cols).map(|c| self.get(c, slot)).collect()
|
|
}
|
|
|
|
pub(crate) fn sum(&self) -> Array1<u64> {
|
|
Array1::from_vec(
|
|
(0..self.n_cols).into_par_iter().map(|c| self.col_view(c).sum()).collect()
|
|
)
|
|
}
|
|
|
|
pub(crate) fn count_nonzero(&self) -> Array1<u64> {
|
|
Array1::from_vec(
|
|
(0..self.n_cols).into_par_iter().map(|c| self.col_view(c).count_nonzero()).collect()
|
|
)
|
|
}
|
|
|
|
fn pair_partial_bray(&self, i: usize, j: usize) -> u64 {
|
|
self.col_view(i).iter().zip(self.col_view(j).iter()).map(|(a, b)| a.min(b) as u64).sum()
|
|
}
|
|
fn pair_partial_euclidean(&self, i: usize, j: usize) -> f64 {
|
|
self.col_view(i).iter().zip(self.col_view(j).iter())
|
|
.map(|(a, b)| { let d = a as f64 - b as f64; d * d }).sum()
|
|
}
|
|
fn pair_partial_threshold_jaccard(&self, i: usize, j: usize, t: u32) -> (u64, u64) {
|
|
self.col_view(i).iter().zip(self.col_view(j).iter())
|
|
.fold((0u64, 0u64), |(inter, uni), (a, b)| {
|
|
let ap = a >= t; let bp = b >= t;
|
|
(inter + (ap & bp) as u64, uni + (ap | bp) as u64)
|
|
})
|
|
}
|
|
fn pair_partial_relfreq_bray(&self, i: usize, j: usize, si: f64, sj: f64) -> f64 {
|
|
if si == 0.0 || sj == 0.0 { return 0.0; }
|
|
self.col_view(i).iter().zip(self.col_view(j).iter())
|
|
.map(|(a, b)| (a as f64 / si).min(b as f64 / sj)).sum()
|
|
}
|
|
fn pair_partial_relfreq_euclidean(&self, i: usize, j: usize, si: f64, sj: f64) -> f64 {
|
|
if si == 0.0 || sj == 0.0 { return 0.0; }
|
|
self.col_view(i).iter().zip(self.col_view(j).iter())
|
|
.map(|(a, b)| { let d = a as f64 / si - b as f64 / sj; d * d }).sum()
|
|
}
|
|
fn pair_partial_hellinger(&self, i: usize, j: usize, si: f64, sj: f64) -> f64 {
|
|
if si == 0.0 || sj == 0.0 { return 0.0; }
|
|
self.col_view(i).iter().zip(self.col_view(j).iter())
|
|
.map(|(a, b)| { let d = (a as f64 / si).sqrt() - (b as f64 / sj).sqrt(); d * d }).sum()
|
|
}
|
|
|
|
pub(crate) fn partial_bray_dist_matrix(&self) -> Array2<u64> {
|
|
pairwise_matrix(self.n_cols, |i, j| self.pair_partial_bray(i, j))
|
|
}
|
|
pub(crate) fn partial_euclidean_dist_matrix(&self) -> Array2<f64> {
|
|
pairwise_matrix(self.n_cols, |i, j| self.pair_partial_euclidean(i, j))
|
|
}
|
|
pub(crate) fn partial_threshold_jaccard_dist_matrix(&self, t: u32) -> (Array2<u64>, Array2<u64>) {
|
|
pairwise2_matrix(self.n_cols, |i, j| self.pair_partial_threshold_jaccard(i, j, t))
|
|
}
|
|
pub(crate) fn partial_relfreq_bray_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> {
|
|
pairwise_matrix(self.n_cols, |i, j| self.pair_partial_relfreq_bray(i, j, col_sums[i] as f64, col_sums[j] as f64))
|
|
}
|
|
pub(crate) fn partial_relfreq_euclidean_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> {
|
|
pairwise_matrix(self.n_cols, |i, j| self.pair_partial_relfreq_euclidean(i, j, col_sums[i] as f64, col_sums[j] as f64))
|
|
}
|
|
pub(crate) fn partial_hellinger_euclidean_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> {
|
|
pairwise_matrix(self.n_cols, |i, j| self.pair_partial_hellinger(i, j, col_sums[i] as f64, col_sums[j] as f64))
|
|
}
|
|
}
|
|
|
|
/// Build `counts/matrix.pcmx` from existing `col_*.pciv` files.
|
|
pub fn pack_compact_int_matrix(dir: &Path) -> io::Result<()> {
|
|
let packed_path = dir.join("matrix.pcmx");
|
|
if packed_path.exists() {
|
|
if let Ok(meta) = MatrixMeta::load(dir) {
|
|
for c in 0..meta.n_cols { let _ = fs::remove_file(col_path(dir, c)); }
|
|
let _ = fs::remove_file(dir.join("meta.json"));
|
|
}
|
|
return Ok(());
|
|
}
|
|
let meta = MatrixMeta::load(dir)?;
|
|
let n_cols = meta.n_cols;
|
|
let col_sizes: Vec<u64> = (0..n_cols)
|
|
.map(|c| fs::metadata(col_path(dir, c)).map(|m| m.len()))
|
|
.collect::<io::Result<_>>()?;
|
|
let header_size = (PCMX_HEADER + n_cols * 8) as u64;
|
|
let mut col_offset = header_size;
|
|
let mut offsets = Vec::with_capacity(n_cols);
|
|
for &size in &col_sizes { offsets.push(col_offset); col_offset += size; }
|
|
let tmp_path = dir.join("matrix.pcmx.tmp");
|
|
let mut out = BufWriter::new(File::create(&tmp_path)?);
|
|
out.write_all(&PCMX_MAGIC)?;
|
|
out.write_all(&[0u8; 4])?;
|
|
out.write_all(&(meta.n as u64).to_le_bytes())?;
|
|
out.write_all(&(n_cols as u64).to_le_bytes())?;
|
|
for &off in &offsets { out.write_all(&off.to_le_bytes())?; }
|
|
for c in 0..n_cols { io::copy(&mut File::open(col_path(dir, c))?, &mut out)?; }
|
|
out.flush()?;
|
|
drop(out);
|
|
fs::rename(&tmp_path, &packed_path)?;
|
|
for c in 0..n_cols { fs::remove_file(col_path(dir, c))?; }
|
|
fs::remove_file(dir.join("meta.json"))?;
|
|
Ok(())
|
|
}
|
|
|
|
// ── PersistentCompactIntMatrix — public enum ──────────────────────────────────
|
|
|
|
pub enum PersistentCompactIntMatrix {
|
|
Columnar(ColumnarCompactIntMatrix),
|
|
Packed(PackedCompactIntMatrix),
|
|
}
|
|
|
|
impl PersistentCompactIntMatrix {
|
|
pub fn open(layer_dir: &Path) -> io::Result<Self> {
|
|
let counts_dir = layer_dir.join("counts");
|
|
if counts_dir.join("matrix.pcmx").exists() {
|
|
return Ok(Self::Packed(PackedCompactIntMatrix::open(&counts_dir.join("matrix.pcmx"))?));
|
|
}
|
|
if MatrixMeta::load(&counts_dir).is_ok() {
|
|
return Ok(Self::Columnar(ColumnarCompactIntMatrix::open(&counts_dir)?));
|
|
}
|
|
Err(io::Error::new(
|
|
io::ErrorKind::NotFound,
|
|
format!("no count matrix found in {} — run 'obikmer upgrade'", layer_dir.display()),
|
|
))
|
|
}
|
|
|
|
pub fn n(&self) -> usize {
|
|
match self { Self::Columnar(m) => m.n(), Self::Packed(m) => m.n_rows }
|
|
}
|
|
pub fn n_cols(&self) -> usize {
|
|
match self { Self::Columnar(m) => m.n_cols(), Self::Packed(m) => m.n_cols }
|
|
}
|
|
|
|
pub fn col(&self, c: usize) -> &PersistentCompactIntVec {
|
|
match self {
|
|
Self::Columnar(m) => m.col(c),
|
|
_ => panic!("col() only available on Columnar PersistentCompactIntMatrix"),
|
|
}
|
|
}
|
|
|
|
pub fn col_view(&self, c: usize) -> IntSliceView<'_> {
|
|
match self {
|
|
Self::Columnar(m) => m.col(c).view(),
|
|
Self::Packed(m) => m.col_view(c),
|
|
}
|
|
}
|
|
|
|
pub fn col_persist(&self, c: usize, path: &Path) -> io::Result<PersistentCompactIntVecBuilder> {
|
|
match self {
|
|
Self::Columnar(m) => PersistentCompactIntVecBuilder::build_from(m.col(c), path),
|
|
Self::Packed(m) => m.col_persist(c, path),
|
|
}
|
|
}
|
|
|
|
pub fn row(&self, slot: usize) -> Box<[u32]> {
|
|
match self { Self::Columnar(m) => m.row(slot), Self::Packed(m) => m.row(slot) }
|
|
}
|
|
pub fn fill_row(&self, slot: usize, buf: &mut [u32]) {
|
|
match self { Self::Columnar(m) => m.fill_row(slot, buf), Self::Packed(m) => m.fill_row(slot, buf) }
|
|
}
|
|
pub fn sum(&self) -> Array1<u64> {
|
|
match self { Self::Columnar(m) => m.sum(), Self::Packed(m) => m.sum() }
|
|
}
|
|
pub fn count_nonzero(&self) -> Array1<u64> {
|
|
match self { Self::Columnar(m) => m.count_nonzero(), Self::Packed(m) => m.count_nonzero() }
|
|
}
|
|
pub fn partial_bray_dist_matrix(&self) -> Array2<u64> {
|
|
match self { Self::Columnar(m) => m.partial_bray_dist_matrix(), Self::Packed(m) => m.partial_bray_dist_matrix() }
|
|
}
|
|
pub fn partial_euclidean_dist_matrix(&self) -> Array2<f64> {
|
|
match self { Self::Columnar(m) => m.partial_euclidean_dist_matrix(), Self::Packed(m) => m.partial_euclidean_dist_matrix() }
|
|
}
|
|
pub fn partial_threshold_jaccard_dist_matrix(&self, threshold: u32) -> (Array2<u64>, Array2<u64>) {
|
|
match self { Self::Columnar(m) => m.partial_threshold_jaccard_dist_matrix(threshold), Self::Packed(m) => m.partial_threshold_jaccard_dist_matrix(threshold) }
|
|
}
|
|
pub fn partial_relfreq_bray_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> {
|
|
match self { Self::Columnar(m) => m.partial_relfreq_bray_dist_matrix(col_sums), Self::Packed(m) => m.partial_relfreq_bray_dist_matrix(col_sums) }
|
|
}
|
|
pub fn partial_relfreq_euclidean_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> {
|
|
match self { Self::Columnar(m) => m.partial_relfreq_euclidean_dist_matrix(col_sums), Self::Packed(m) => m.partial_relfreq_euclidean_dist_matrix(col_sums) }
|
|
}
|
|
pub fn partial_hellinger_euclidean_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> {
|
|
match self { Self::Columnar(m) => m.partial_hellinger_euclidean_dist_matrix(col_sums), Self::Packed(m) => m.partial_hellinger_euclidean_dist_matrix(col_sums) }
|
|
}
|
|
pub fn append_column(dir: &Path, value_of: impl Fn(usize) -> u32) -> io::Result<()> {
|
|
ColumnarCompactIntMatrix::append_column(dir, value_of)
|
|
}
|
|
}
|
|
|
|
// ── Trait impls ───────────────────────────────────────────────────────────────
|
|
|
|
use crate::traits::{ColumnWeights, CountPartials};
|
|
|
|
impl ColumnWeights for PersistentCompactIntMatrix {
|
|
fn col_weights(&self) -> Array1<u64> { self.sum() }
|
|
fn partial_kmer_counts(&self) -> Array1<u64> { self.count_nonzero() }
|
|
}
|
|
|
|
impl CountPartials for PersistentCompactIntMatrix {
|
|
fn partial_bray(&self) -> Array2<u64> { self.partial_bray_dist_matrix() }
|
|
fn partial_euclidean(&self) -> Array2<f64> { self.partial_euclidean_dist_matrix() }
|
|
fn partial_threshold_jaccard(&self, t: u32) -> (Array2<u64>, Array2<u64>) { self.partial_threshold_jaccard_dist_matrix(t) }
|
|
fn partial_relfreq_bray(&self, g: &Array1<u64>) -> Array2<f64> { self.partial_relfreq_bray_dist_matrix(g) }
|
|
fn partial_relfreq_euclidean(&self, g: &Array1<u64>) -> Array2<f64> { self.partial_relfreq_euclidean_dist_matrix(g) }
|
|
fn partial_hellinger(&self, g: &Array1<u64>) -> Array2<f64> { self.partial_hellinger_euclidean_dist_matrix(g) }
|
|
}
|
|
|
|
// ── Builder ───────────────────────────────────────────────────────────────────
|
|
|
|
pub struct PersistentCompactIntMatrixBuilder {
|
|
dir: PathBuf,
|
|
n: usize,
|
|
n_cols: usize,
|
|
}
|
|
|
|
impl PersistentCompactIntMatrixBuilder {
|
|
pub fn new(n: usize, dir: &Path) -> io::Result<Self> {
|
|
fs::create_dir_all(dir)?;
|
|
Ok(Self { dir: dir.to_path_buf(), n, n_cols: 0 })
|
|
}
|
|
pub fn n(&self) -> usize { self.n }
|
|
pub fn n_cols(&self) -> usize { self.n_cols }
|
|
pub fn add_col(&mut self) -> io::Result<PersistentCompactIntVecBuilder> {
|
|
let path = col_path(&self.dir, self.n_cols);
|
|
self.n_cols += 1;
|
|
PersistentCompactIntVecBuilder::new(self.n, &path)
|
|
}
|
|
|
|
pub fn add_col_from(&mut self, src: &TempCompactIntVec) -> io::Result<()> {
|
|
src.make_persistent(&col_path(&self.dir, self.n_cols))?;
|
|
self.n_cols += 1;
|
|
Ok(())
|
|
}
|
|
|
|
pub fn add_col_from_bit(&mut self, src: &TempBitVec) -> io::Result<()> {
|
|
let path = col_path(&self.dir, self.n_cols);
|
|
self.n_cols += 1;
|
|
let mut b = PersistentCompactIntVecBuilder::new(self.n, &path)?;
|
|
b.inc_present(src.view());
|
|
b.close()
|
|
}
|
|
|
|
pub fn close(self) -> io::Result<()> {
|
|
MatrixMeta { n: self.n, n_cols: self.n_cols }.save(&self.dir)
|
|
}
|
|
}
|
|
|
|
// ── MatrixGroupOps ────────────────────────────────────────────────────────────
|
|
|
|
impl MatrixGroupOps for PersistentCompactIntMatrix {
|
|
fn partial_group_presence_count(&self, g: &ColGroup, threshold: u32) -> io::Result<TempCompactIntVec> {
|
|
let n = self.n();
|
|
if g.indices.len() < 255 {
|
|
let mut builder = TempCompactIntVecBuilder::new(n)?;
|
|
for &c in &g.indices {
|
|
builder.inc_predicate_fast(self.col_view(c), |v| v >= threshold);
|
|
}
|
|
builder.freeze()
|
|
} else {
|
|
let mut result = TempCompactIntVecBuilder::new(n)?;
|
|
for chunk in g.indices.chunks(254) {
|
|
let mut chunk_b = TempCompactIntVecBuilder::new(n)?;
|
|
for &c in chunk {
|
|
chunk_b.inc_predicate_fast(self.col_view(c), |v| v >= threshold);
|
|
}
|
|
let frozen = chunk_b.freeze()?;
|
|
result.add(frozen.view());
|
|
}
|
|
result.freeze()
|
|
}
|
|
}
|
|
|
|
fn partial_group_sum(&self, g: &ColGroup) -> io::Result<TempCompactIntVec> {
|
|
let n = self.n();
|
|
let mut result = TempCompactIntVecBuilder::new(n)?;
|
|
for &c in &g.indices { result.add(self.col_view(c)); }
|
|
result.freeze()
|
|
}
|
|
|
|
fn partial_group_any(&self, g: &ColGroup, threshold: u32) -> io::Result<TempBitVec> {
|
|
let n = self.n();
|
|
let mut result = TempBitVecBuilder::new(n)?;
|
|
for &c in &g.indices {
|
|
result.or_where(self.col_view(c), |v| v >= threshold);
|
|
}
|
|
result.freeze()
|
|
}
|
|
|
|
fn partial_group_min(&self, g: &ColGroup) -> io::Result<TempCompactIntVec> {
|
|
let n = self.n();
|
|
let mut result = TempCompactIntVecBuilder::new(n)?;
|
|
if let Some((&first, rest)) = g.indices.split_first() {
|
|
result.add(self.col_view(first));
|
|
for &c in rest { result.min(self.col_view(c)); }
|
|
}
|
|
result.freeze()
|
|
}
|
|
|
|
fn partial_group_max(&self, g: &ColGroup) -> io::Result<TempCompactIntVec> {
|
|
let n = self.n();
|
|
let mut result = TempCompactIntVecBuilder::new(n)?;
|
|
for &c in &g.indices { result.max(self.col_view(c)); }
|
|
result.freeze()
|
|
}
|
|
}
|