Push mtzqmmrlmzzx #34

Merged
coissac merged 25 commits from push-mtzqmmrlmzzx into main 2026-06-22 08:47:24 +00:00
6 changed files with 101 additions and 24 deletions
Showing only changes of commit df7b400fda - Show all commits
+20
View File
@@ -13,6 +13,26 @@ pub const OVERFLOW_ENTRY_SIZE: usize = 12;
// Index entry: slot(u64) + pos(u64) = 16 bytes.
pub const INDEX_ENTRY_SIZE: usize = 16;
/// Sum all values in a compact-int primary byte slice, correcting for overflow sentinels.
///
/// `primary` is the raw `&[u8]` where 255 is a sentinel for large values.
/// `overflow` yields the true values (≥ 255) for each sentinel, in any order.
#[inline]
pub(crate) fn byte_sum(primary: &[u8], overflow: impl Iterator<Item = u32>) -> u64 {
let raw: u64 = primary.iter().map(|&b| b as u64).sum();
let (n, ov) = overflow.fold((0u64, 0u64), |(n, s), v| (n + 1, s + v as u64));
raw - 255 * n + ov
}
/// Count non-zero values in a compact-int primary byte slice.
///
/// Overflow sentinels (255) are always non-zero by construction, so a single
/// `b != 0` test is sufficient — no overflow map lookup needed.
#[inline]
pub(crate) fn byte_count_nonzero(primary: &[u8]) -> u64 {
primary.iter().filter(|&&b| b != 0).count() as u64
}
/// Parse a single overflow entry `(slot, value)` from a byte slice.
#[inline]
pub fn parse_overflow_entry(data: &[u8], base: usize, i: usize) -> (usize, u32) {
+13 -5
View File
@@ -11,7 +11,7 @@ use rayon::prelude::*;
use crate::bitmatrix::{pairwise_matrix, pairwise2_matrix};
use crate::builder::PersistentCompactIntVecBuilder;
use crate::memoryintvec::MemoryIntVec;
use crate::format::{HEADER_SIZE, OVERFLOW_ENTRY_SIZE, parse_index_entry, parse_overflow_entry};
use crate::format::{byte_count_nonzero, byte_sum, HEADER_SIZE, OVERFLOW_ENTRY_SIZE, parse_index_entry, parse_overflow_entry};
use crate::meta::MatrixMeta;
use crate::reader::PersistentCompactIntVec;
@@ -230,16 +230,24 @@ impl PackedCompactIntMatrix {
pub(crate) fn sum(&self) -> Array1<u64> {
Array1::from_vec(
(0..self.n_cols).into_par_iter()
.map(|c| (0..self.n_rows).map(|s| self.get(c, s) as u64).sum())
self.columns.par_iter()
.map(|ci| {
let primary = &self.mmap[ci.primary_start..ci.primary_start + self.n_rows];
let overflow = (0..ci.n_overflow)
.map(|i| parse_overflow_entry(&self.mmap, ci.data_offset, i).1);
byte_sum(primary, overflow)
})
.collect()
)
}
pub(crate) fn count_nonzero(&self) -> Array1<u64> {
Array1::from_vec(
(0..self.n_cols).into_par_iter()
.map(|c| (0..self.n_rows).filter(|&s| self.get(c, s) > 0).count() as u64)
self.columns.par_iter()
.map(|ci| {
let primary = &self.mmap[ci.primary_start..ci.primary_start + self.n_rows];
byte_count_nonzero(primary)
})
.collect()
)
}
+1 -1
View File
@@ -15,7 +15,7 @@ pub use bitmatrix::{PersistentBitMatrix, PersistentBitMatrixBuilder, pack_bit_ma
pub use builder::PersistentCompactIntVecBuilder;
pub use intmatrix::{PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder, pack_compact_int_matrix};
pub use layer_meta::LayerMeta;
pub use memoryintvec::MemoryIntVec;
pub use memoryintvec::{MemoryIntIter, MemoryIntVec};
pub use memoryvec::{MemoryBitIter, MemoryBitVec};
pub use reader::PersistentCompactIntVec;
pub use traits::{BitPartials, BitSlice, BitSliceMut, BitToInt, ColumnWeights, CountPartials, IntSlice, IntSliceMut, IntToBit};
+60 -9
View File
@@ -4,6 +4,7 @@ use std::ops::{Add, AddAssign, Sub, SubAssign};
use std::path::Path;
use crate::builder::PersistentCompactIntVecBuilder;
use crate::format::{byte_count_nonzero, byte_sum};
use crate::traits::{IntSlice, IntSliceMut};
// ── MemoryIntVec ──────────────────────────────────────────────────────────────
@@ -37,8 +38,31 @@ impl MemoryIntVec {
pub(crate) fn primary_bytes(&self) -> &[u8] { &self.primary }
pub(crate) fn overflow_map(&self) -> &HashMap<usize, u32> { &self.overflow }
pub fn iter(&self) -> impl Iterator<Item = u32> + '_ {
(0..self.n).map(move |slot| self.get(slot))
pub fn get(&self, slot: usize) -> u32 {
match self.primary[slot] {
255 => *self.overflow.get(&slot).expect("sentinel without overflow entry"),
v => v as u32,
}
}
pub fn sum(&self) -> u64 {
byte_sum(&self.primary, self.overflow.values().copied())
}
pub fn count_nonzero(&self) -> u64 {
byte_count_nonzero(&self.primary)
}
pub fn filled(n: usize, value: u32) -> Self {
if value < 255 {
Self { primary: vec![value as u8; n], overflow: HashMap::new(), n }
} else {
Self { primary: vec![255u8; n], overflow: (0..n).map(|i| (i, value)).collect(), n }
}
}
pub fn iter(&self) -> MemoryIntIter<'_> {
MemoryIntIter { vec: self, slot: 0 }
}
/// Write to disk and return a writable builder at `path`.
@@ -51,13 +75,9 @@ impl MemoryIntVec {
impl IntSlice for MemoryIntVec {
fn len(&self) -> usize { self.n }
fn get(&self, slot: usize) -> u32 {
match self.primary[slot] {
255 => *self.overflow.get(&slot).expect("sentinel without overflow entry"),
v => v as u32,
}
}
fn get(&self, slot: usize) -> u32 { self.get(slot) }
fn sum(&self) -> u64 { self.sum() }
fn count_nonzero(&self) -> u64 { self.count_nonzero() }
}
impl IntSliceMut for MemoryIntVec {
@@ -118,3 +138,34 @@ impl<B: IntSlice> AddAssign<&B> for MemoryIntVec {
impl<B: IntSlice> SubAssign<&B> for MemoryIntVec {
fn sub_assign(&mut self, rhs: &B) { self.diff(rhs); }
}
// ── Iterator ──────────────────────────────────────────────────────────────────
pub struct MemoryIntIter<'a> {
vec: &'a MemoryIntVec,
slot: usize,
}
impl Iterator for MemoryIntIter<'_> {
type Item = u32;
fn next(&mut self) -> Option<u32> {
if self.slot >= self.vec.n { return None; }
let v = self.vec.get(self.slot);
self.slot += 1;
Some(v)
}
fn size_hint(&self) -> (usize, Option<usize>) {
let rem = self.vec.n - self.slot;
(rem, Some(rem))
}
}
impl ExactSizeIterator for MemoryIntIter<'_> {}
impl<'a> IntoIterator for &'a MemoryIntVec {
type Item = u32;
type IntoIter = MemoryIntIter<'a>;
fn into_iter(self) -> MemoryIntIter<'a> { self.iter() }
}
+5 -7
View File
@@ -4,7 +4,7 @@ use std::path::{Path, PathBuf};
use memmap2::Mmap;
use crate::format::{HEADER_SIZE, MAGIC, OVERFLOW_ENTRY_SIZE, parse_index_entry};
use crate::format::{byte_count_nonzero, byte_sum, HEADER_SIZE, MAGIC, OVERFLOW_ENTRY_SIZE, parse_index_entry};
pub struct PersistentCompactIntVec {
mmap: Mmap,
@@ -129,14 +129,14 @@ impl PersistentCompactIntVec {
u32::from_le_bytes(self.mmap[off..off + 4].try_into().unwrap())
}
#[inline]
pub fn sum(&self) -> u64 {
self.iter().map(|v| v as u64).sum()
let primary = &self.mmap[self.primary_offset..self.primary_offset + self.n];
byte_sum(primary, (0..self.n_overflow).map(|i| self.data_value(i)))
}
#[inline]
pub fn count_nonzero(&self) -> u64 {
self.iter().filter(|&v| v > 0).count() as u64
let primary = &self.mmap[self.primary_offset..self.primary_offset + self.n];
byte_count_nonzero(primary)
}
#[inline]
@@ -357,8 +357,6 @@ use crate::traits::IntSlice;
impl IntSlice for PersistentCompactIntVec {
fn len(&self) -> usize { self.n }
fn get(&self, slot: usize) -> u32 { self.get(slot) }
fn sum(&self) -> u64 { self.sum() }
fn count_nonzero(&self) -> u64 { self.count_nonzero() }
}
impl<'a> IntoIterator for &'a PersistentCompactIntVec {
+2 -2
View File
@@ -68,8 +68,8 @@ pub trait IntSlice {
fn get(&self, slot: usize) -> u32;
fn is_empty(&self) -> bool { self.len() == 0 }
fn iter(&self) -> impl Iterator<Item = u32> + '_ { (0..self.len()).map(|i| self.get(i)) }
fn sum(&self) -> u64 { (0..self.len()).map(|s| self.get(s) as u64).sum() }
fn count_nonzero(&self) -> u64 { (0..self.len()).filter(|&s| self.get(s) > 0).count() as u64 }
fn sum(&self) -> u64 { self.iter().map(|v| v as u64).sum() }
fn count_nonzero(&self) -> u64 { self.iter().filter(|v| *v > 0).count() as u64 }
fn lt(&self, threshold: u32) -> MemoryBitVec { self.cmp_scalar(|v| v < threshold) }
fn leq(&self, threshold: u32) -> MemoryBitVec { self.cmp_scalar(|v| v <= threshold) }