refactor: replace in-memory vectors with temp-file-backed storage
Introduces `TempCompactIntVec` and `TempBitVec` as temporary, file-backed intermediates to replace eager in-memory vectors, enabling OS-level paging under memory pressure. Updates the `MatrixGroupOps` trait to return `io::Result` types, allowing proper error propagation and supporting chunked accumulation for large column groups. Includes builder patterns with `.freeze()` finalization, automatic `TempDir` cleanup on drop, and necessary test updates to handle the new fallible signatures. Also fixes `Cargo.toml` section ordering.
This commit is contained in:
@@ -12,7 +12,8 @@ use crate::bitmatrix::{pairwise_matrix, pairwise2_matrix};
|
||||
use crate::builder::PersistentCompactIntVecBuilder;
|
||||
use crate::colgroup::{ColGroup, MatrixGroupOps, inc_primary_bits};
|
||||
use crate::memoryintvec::MemoryIntVec;
|
||||
use crate::memoryvec::MemoryBitVec;
|
||||
use crate::tempbitvec::{TempBitVec, TempBitVecBuilder};
|
||||
use crate::tempintvec::{TempCompactIntVec, TempCompactIntVecBuilder};
|
||||
use crate::format::{byte_count_nonzero, byte_sum, HEADER_SIZE, OVERFLOW_ENTRY_SIZE, parse_index_entry, parse_overflow_entry};
|
||||
use crate::meta::MatrixMeta;
|
||||
use crate::reader::PersistentCompactIntVec;
|
||||
@@ -630,45 +631,55 @@ impl PersistentCompactIntMatrixBuilder {
|
||||
// ── MatrixGroupOps ────────────────────────────────────────────────────────────
|
||||
|
||||
impl MatrixGroupOps for PersistentCompactIntMatrix {
|
||||
fn partial_group_presence_count(&self, g: &ColGroup, threshold: u32) -> MemoryIntVec {
|
||||
fn partial_group_presence_count(&self, g: &ColGroup, threshold: u32) -> io::Result<TempCompactIntVec> {
|
||||
let n = self.n();
|
||||
if g.indices.len() < 255 {
|
||||
// Fast path: counts fit in u8 — accumulate directly into raw bytes,
|
||||
// no overflow map involved.
|
||||
let mut primary = vec![0u8; n];
|
||||
for &c in &g.indices {
|
||||
let mask = self.col_view(c).cmp_scalar(|v| v >= threshold);
|
||||
inc_primary_bits(&mut primary, &mask);
|
||||
// Fast path: counts fit in u8 — accumulate directly into raw bytes.
|
||||
let mut builder = TempCompactIntVecBuilder::new(n)?;
|
||||
{
|
||||
let primary = builder.primary_bytes_mut();
|
||||
for &c in &g.indices {
|
||||
let mask = self.col_view(c).cmp_scalar(|v| v >= threshold);
|
||||
inc_primary_bits(primary, &mask);
|
||||
}
|
||||
}
|
||||
MemoryIntVec::from_primary(primary)
|
||||
builder.freeze()
|
||||
} else {
|
||||
// Slow path (rare): use IntSliceMut::count_bits which handles overflow.
|
||||
let mut result = MemoryIntVec::new(n);
|
||||
for &c in &g.indices {
|
||||
let mask = self.col_view(c).cmp_scalar(|v| v >= threshold);
|
||||
result.count_bits(&mask);
|
||||
// Slow path: chunk by 254 to keep per-chunk u8 safe, then add chunks.
|
||||
let mut result = TempCompactIntVecBuilder::new(n)?;
|
||||
for chunk in g.indices.chunks(254) {
|
||||
let mut chunk_builder = TempCompactIntVecBuilder::new(n)?;
|
||||
{
|
||||
let primary = chunk_builder.primary_bytes_mut();
|
||||
for &c in chunk {
|
||||
let mask = self.col_view(c).cmp_scalar(|v| v >= threshold);
|
||||
inc_primary_bits(primary, &mask);
|
||||
}
|
||||
}
|
||||
let chunk_frozen = chunk_builder.freeze()?;
|
||||
IntSliceMut::add(&mut result, &chunk_frozen);
|
||||
}
|
||||
result
|
||||
result.freeze()
|
||||
}
|
||||
}
|
||||
|
||||
fn partial_group_sum(&self, g: &ColGroup) -> MemoryIntVec {
|
||||
fn partial_group_sum(&self, g: &ColGroup) -> io::Result<TempCompactIntVec> {
|
||||
let n = self.n();
|
||||
let mut result = MemoryIntVec::new(n);
|
||||
let mut result = TempCompactIntVecBuilder::new(n)?;
|
||||
for &c in &g.indices {
|
||||
let view = self.col_view(c);
|
||||
IntSliceMut::add(&mut result, &view);
|
||||
}
|
||||
result
|
||||
result.freeze()
|
||||
}
|
||||
|
||||
fn partial_group_any(&self, g: &ColGroup, threshold: u32) -> MemoryBitVec {
|
||||
fn partial_group_any(&self, g: &ColGroup, threshold: u32) -> io::Result<TempBitVec> {
|
||||
let n = self.n();
|
||||
let mut result = MemoryBitVec::new(n);
|
||||
let mut result = TempBitVecBuilder::new(n)?;
|
||||
for &c in &g.indices {
|
||||
let mask = self.col_view(c).cmp_scalar(|v| v >= threshold);
|
||||
result.or(&mask);
|
||||
}
|
||||
result
|
||||
result.freeze()
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user