perf: optimize aggregation with byte-level helpers and direct mmap
Introduce `byte_sum` and `byte_count_nonzero` to efficiently aggregate compact-int byte slices, bypassing per-element decoding and overflow map lookups. Refactor `sum()` and `count_nonzero()` across the matrix, reader, and traits modules to use direct memory-mapped slice iteration and idiomatic Rust iterators. Additionally, expose `MemoryIntIter` publicly and implement `IntoIterator` and `IntSlice` for `MemoryIntVec` to enable standard iteration and delegate aggregation to the new helpers.
This commit is contained in:
@@ -13,6 +13,26 @@ pub const OVERFLOW_ENTRY_SIZE: usize = 12;
|
|||||||
// Index entry: slot(u64) + pos(u64) = 16 bytes.
|
// Index entry: slot(u64) + pos(u64) = 16 bytes.
|
||||||
pub const INDEX_ENTRY_SIZE: usize = 16;
|
pub const INDEX_ENTRY_SIZE: usize = 16;
|
||||||
|
|
||||||
|
/// Sum all values in a compact-int primary byte slice, correcting for overflow sentinels.
|
||||||
|
///
|
||||||
|
/// `primary` is the raw `&[u8]` where 255 is a sentinel for large values.
|
||||||
|
/// `overflow` yields the true values (≥ 255) for each sentinel, in any order.
|
||||||
|
#[inline]
|
||||||
|
pub(crate) fn byte_sum(primary: &[u8], overflow: impl Iterator<Item = u32>) -> u64 {
|
||||||
|
let raw: u64 = primary.iter().map(|&b| b as u64).sum();
|
||||||
|
let (n, ov) = overflow.fold((0u64, 0u64), |(n, s), v| (n + 1, s + v as u64));
|
||||||
|
raw - 255 * n + ov
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Count non-zero values in a compact-int primary byte slice.
|
||||||
|
///
|
||||||
|
/// Overflow sentinels (255) are always non-zero by construction, so a single
|
||||||
|
/// `b != 0` test is sufficient — no overflow map lookup needed.
|
||||||
|
#[inline]
|
||||||
|
pub(crate) fn byte_count_nonzero(primary: &[u8]) -> u64 {
|
||||||
|
primary.iter().filter(|&&b| b != 0).count() as u64
|
||||||
|
}
|
||||||
|
|
||||||
/// Parse a single overflow entry `(slot, value)` from a byte slice.
|
/// Parse a single overflow entry `(slot, value)` from a byte slice.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn parse_overflow_entry(data: &[u8], base: usize, i: usize) -> (usize, u32) {
|
pub fn parse_overflow_entry(data: &[u8], base: usize, i: usize) -> (usize, u32) {
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ use rayon::prelude::*;
|
|||||||
use crate::bitmatrix::{pairwise_matrix, pairwise2_matrix};
|
use crate::bitmatrix::{pairwise_matrix, pairwise2_matrix};
|
||||||
use crate::builder::PersistentCompactIntVecBuilder;
|
use crate::builder::PersistentCompactIntVecBuilder;
|
||||||
use crate::memoryintvec::MemoryIntVec;
|
use crate::memoryintvec::MemoryIntVec;
|
||||||
use crate::format::{HEADER_SIZE, OVERFLOW_ENTRY_SIZE, parse_index_entry, parse_overflow_entry};
|
use crate::format::{byte_count_nonzero, byte_sum, HEADER_SIZE, OVERFLOW_ENTRY_SIZE, parse_index_entry, parse_overflow_entry};
|
||||||
use crate::meta::MatrixMeta;
|
use crate::meta::MatrixMeta;
|
||||||
use crate::reader::PersistentCompactIntVec;
|
use crate::reader::PersistentCompactIntVec;
|
||||||
|
|
||||||
@@ -230,16 +230,24 @@ impl PackedCompactIntMatrix {
|
|||||||
|
|
||||||
pub(crate) fn sum(&self) -> Array1<u64> {
|
pub(crate) fn sum(&self) -> Array1<u64> {
|
||||||
Array1::from_vec(
|
Array1::from_vec(
|
||||||
(0..self.n_cols).into_par_iter()
|
self.columns.par_iter()
|
||||||
.map(|c| (0..self.n_rows).map(|s| self.get(c, s) as u64).sum())
|
.map(|ci| {
|
||||||
|
let primary = &self.mmap[ci.primary_start..ci.primary_start + self.n_rows];
|
||||||
|
let overflow = (0..ci.n_overflow)
|
||||||
|
.map(|i| parse_overflow_entry(&self.mmap, ci.data_offset, i).1);
|
||||||
|
byte_sum(primary, overflow)
|
||||||
|
})
|
||||||
.collect()
|
.collect()
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn count_nonzero(&self) -> Array1<u64> {
|
pub(crate) fn count_nonzero(&self) -> Array1<u64> {
|
||||||
Array1::from_vec(
|
Array1::from_vec(
|
||||||
(0..self.n_cols).into_par_iter()
|
self.columns.par_iter()
|
||||||
.map(|c| (0..self.n_rows).filter(|&s| self.get(c, s) > 0).count() as u64)
|
.map(|ci| {
|
||||||
|
let primary = &self.mmap[ci.primary_start..ci.primary_start + self.n_rows];
|
||||||
|
byte_count_nonzero(primary)
|
||||||
|
})
|
||||||
.collect()
|
.collect()
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ pub use bitmatrix::{PersistentBitMatrix, PersistentBitMatrixBuilder, pack_bit_ma
|
|||||||
pub use builder::PersistentCompactIntVecBuilder;
|
pub use builder::PersistentCompactIntVecBuilder;
|
||||||
pub use intmatrix::{PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder, pack_compact_int_matrix};
|
pub use intmatrix::{PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder, pack_compact_int_matrix};
|
||||||
pub use layer_meta::LayerMeta;
|
pub use layer_meta::LayerMeta;
|
||||||
pub use memoryintvec::MemoryIntVec;
|
pub use memoryintvec::{MemoryIntIter, MemoryIntVec};
|
||||||
pub use memoryvec::{MemoryBitIter, MemoryBitVec};
|
pub use memoryvec::{MemoryBitIter, MemoryBitVec};
|
||||||
pub use reader::PersistentCompactIntVec;
|
pub use reader::PersistentCompactIntVec;
|
||||||
pub use traits::{BitPartials, BitSlice, BitSliceMut, BitToInt, ColumnWeights, CountPartials, IntSlice, IntSliceMut, IntToBit};
|
pub use traits::{BitPartials, BitSlice, BitSliceMut, BitToInt, ColumnWeights, CountPartials, IntSlice, IntSliceMut, IntToBit};
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ use std::ops::{Add, AddAssign, Sub, SubAssign};
|
|||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
use crate::builder::PersistentCompactIntVecBuilder;
|
use crate::builder::PersistentCompactIntVecBuilder;
|
||||||
|
use crate::format::{byte_count_nonzero, byte_sum};
|
||||||
use crate::traits::{IntSlice, IntSliceMut};
|
use crate::traits::{IntSlice, IntSliceMut};
|
||||||
|
|
||||||
// ── MemoryIntVec ──────────────────────────────────────────────────────────────
|
// ── MemoryIntVec ──────────────────────────────────────────────────────────────
|
||||||
@@ -37,8 +38,31 @@ impl MemoryIntVec {
|
|||||||
pub(crate) fn primary_bytes(&self) -> &[u8] { &self.primary }
|
pub(crate) fn primary_bytes(&self) -> &[u8] { &self.primary }
|
||||||
pub(crate) fn overflow_map(&self) -> &HashMap<usize, u32> { &self.overflow }
|
pub(crate) fn overflow_map(&self) -> &HashMap<usize, u32> { &self.overflow }
|
||||||
|
|
||||||
pub fn iter(&self) -> impl Iterator<Item = u32> + '_ {
|
pub fn get(&self, slot: usize) -> u32 {
|
||||||
(0..self.n).map(move |slot| self.get(slot))
|
match self.primary[slot] {
|
||||||
|
255 => *self.overflow.get(&slot).expect("sentinel without overflow entry"),
|
||||||
|
v => v as u32,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn sum(&self) -> u64 {
|
||||||
|
byte_sum(&self.primary, self.overflow.values().copied())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn count_nonzero(&self) -> u64 {
|
||||||
|
byte_count_nonzero(&self.primary)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn filled(n: usize, value: u32) -> Self {
|
||||||
|
if value < 255 {
|
||||||
|
Self { primary: vec![value as u8; n], overflow: HashMap::new(), n }
|
||||||
|
} else {
|
||||||
|
Self { primary: vec![255u8; n], overflow: (0..n).map(|i| (i, value)).collect(), n }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn iter(&self) -> MemoryIntIter<'_> {
|
||||||
|
MemoryIntIter { vec: self, slot: 0 }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Write to disk and return a writable builder at `path`.
|
/// Write to disk and return a writable builder at `path`.
|
||||||
@@ -51,13 +75,9 @@ impl MemoryIntVec {
|
|||||||
|
|
||||||
impl IntSlice for MemoryIntVec {
|
impl IntSlice for MemoryIntVec {
|
||||||
fn len(&self) -> usize { self.n }
|
fn len(&self) -> usize { self.n }
|
||||||
|
fn get(&self, slot: usize) -> u32 { self.get(slot) }
|
||||||
fn get(&self, slot: usize) -> u32 {
|
fn sum(&self) -> u64 { self.sum() }
|
||||||
match self.primary[slot] {
|
fn count_nonzero(&self) -> u64 { self.count_nonzero() }
|
||||||
255 => *self.overflow.get(&slot).expect("sentinel without overflow entry"),
|
|
||||||
v => v as u32,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl IntSliceMut for MemoryIntVec {
|
impl IntSliceMut for MemoryIntVec {
|
||||||
@@ -118,3 +138,34 @@ impl<B: IntSlice> AddAssign<&B> for MemoryIntVec {
|
|||||||
impl<B: IntSlice> SubAssign<&B> for MemoryIntVec {
|
impl<B: IntSlice> SubAssign<&B> for MemoryIntVec {
|
||||||
fn sub_assign(&mut self, rhs: &B) { self.diff(rhs); }
|
fn sub_assign(&mut self, rhs: &B) { self.diff(rhs); }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Iterator ──────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
pub struct MemoryIntIter<'a> {
|
||||||
|
vec: &'a MemoryIntVec,
|
||||||
|
slot: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Iterator for MemoryIntIter<'_> {
|
||||||
|
type Item = u32;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<u32> {
|
||||||
|
if self.slot >= self.vec.n { return None; }
|
||||||
|
let v = self.vec.get(self.slot);
|
||||||
|
self.slot += 1;
|
||||||
|
Some(v)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||||
|
let rem = self.vec.n - self.slot;
|
||||||
|
(rem, Some(rem))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ExactSizeIterator for MemoryIntIter<'_> {}
|
||||||
|
|
||||||
|
impl<'a> IntoIterator for &'a MemoryIntVec {
|
||||||
|
type Item = u32;
|
||||||
|
type IntoIter = MemoryIntIter<'a>;
|
||||||
|
fn into_iter(self) -> MemoryIntIter<'a> { self.iter() }
|
||||||
|
}
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ use std::path::{Path, PathBuf};
|
|||||||
|
|
||||||
use memmap2::Mmap;
|
use memmap2::Mmap;
|
||||||
|
|
||||||
use crate::format::{HEADER_SIZE, MAGIC, OVERFLOW_ENTRY_SIZE, parse_index_entry};
|
use crate::format::{byte_count_nonzero, byte_sum, HEADER_SIZE, MAGIC, OVERFLOW_ENTRY_SIZE, parse_index_entry};
|
||||||
|
|
||||||
pub struct PersistentCompactIntVec {
|
pub struct PersistentCompactIntVec {
|
||||||
mmap: Mmap,
|
mmap: Mmap,
|
||||||
@@ -129,14 +129,14 @@ impl PersistentCompactIntVec {
|
|||||||
u32::from_le_bytes(self.mmap[off..off + 4].try_into().unwrap())
|
u32::from_le_bytes(self.mmap[off..off + 4].try_into().unwrap())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
|
||||||
pub fn sum(&self) -> u64 {
|
pub fn sum(&self) -> u64 {
|
||||||
self.iter().map(|v| v as u64).sum()
|
let primary = &self.mmap[self.primary_offset..self.primary_offset + self.n];
|
||||||
|
byte_sum(primary, (0..self.n_overflow).map(|i| self.data_value(i)))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
|
||||||
pub fn count_nonzero(&self) -> u64 {
|
pub fn count_nonzero(&self) -> u64 {
|
||||||
self.iter().filter(|&v| v > 0).count() as u64
|
let primary = &self.mmap[self.primary_offset..self.primary_offset + self.n];
|
||||||
|
byte_count_nonzero(primary)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
@@ -357,8 +357,6 @@ use crate::traits::IntSlice;
|
|||||||
impl IntSlice for PersistentCompactIntVec {
|
impl IntSlice for PersistentCompactIntVec {
|
||||||
fn len(&self) -> usize { self.n }
|
fn len(&self) -> usize { self.n }
|
||||||
fn get(&self, slot: usize) -> u32 { self.get(slot) }
|
fn get(&self, slot: usize) -> u32 { self.get(slot) }
|
||||||
fn sum(&self) -> u64 { self.sum() }
|
|
||||||
fn count_nonzero(&self) -> u64 { self.count_nonzero() }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> IntoIterator for &'a PersistentCompactIntVec {
|
impl<'a> IntoIterator for &'a PersistentCompactIntVec {
|
||||||
|
|||||||
@@ -68,8 +68,8 @@ pub trait IntSlice {
|
|||||||
fn get(&self, slot: usize) -> u32;
|
fn get(&self, slot: usize) -> u32;
|
||||||
fn is_empty(&self) -> bool { self.len() == 0 }
|
fn is_empty(&self) -> bool { self.len() == 0 }
|
||||||
fn iter(&self) -> impl Iterator<Item = u32> + '_ { (0..self.len()).map(|i| self.get(i)) }
|
fn iter(&self) -> impl Iterator<Item = u32> + '_ { (0..self.len()).map(|i| self.get(i)) }
|
||||||
fn sum(&self) -> u64 { (0..self.len()).map(|s| self.get(s) as u64).sum() }
|
fn sum(&self) -> u64 { self.iter().map(|v| v as u64).sum() }
|
||||||
fn count_nonzero(&self) -> u64 { (0..self.len()).filter(|&s| self.get(s) > 0).count() as u64 }
|
fn count_nonzero(&self) -> u64 { self.iter().filter(|v| *v > 0).count() as u64 }
|
||||||
|
|
||||||
fn lt(&self, threshold: u32) -> MemoryBitVec { self.cmp_scalar(|v| v < threshold) }
|
fn lt(&self, threshold: u32) -> MemoryBitVec { self.cmp_scalar(|v| v < threshold) }
|
||||||
fn leq(&self, threshold: u32) -> MemoryBitVec { self.cmp_scalar(|v| v <= threshold) }
|
fn leq(&self, threshold: u32) -> MemoryBitVec { self.cmp_scalar(|v| v <= threshold) }
|
||||||
|
|||||||
Reference in New Issue
Block a user