feat: add memory vectors, slice traits, and column extraction methods
Introduce `MemoryBitVec` and `MemoryIntVec` for efficient in-memory storage with hybrid compression and overflow handling. Implement `BitSlice`, `BitSliceMut`, `IntSlice`, and `IntSliceMut` traits across persistent and memory-backed types to enable generic slice operations and bitwise/arithmetic overloads. Add `col_persist` and `col_as_memory` methods to `BitMatrix` and `IntMatrix` for efficient column extraction. Align with the new single-pass rebuild architecture by supporting fast kmer filtering and matrix rebuilding. Includes comprehensive tests and profiling instrumentation for the packing phase.
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::HashMap;
|
||||
use std::fs::{self, File};
|
||||
use std::io::{self, BufWriter, Write as _};
|
||||
use std::path::{Path, PathBuf};
|
||||
@@ -8,6 +9,7 @@ use ndarray::{Array1, Array2};
|
||||
use rayon::prelude::*;
|
||||
|
||||
use crate::builder::PersistentCompactIntVecBuilder;
|
||||
use crate::memoryintvec::MemoryIntVec;
|
||||
use crate::format::{HEADER_SIZE, INDEX_ENTRY_SIZE, OVERFLOW_ENTRY_SIZE};
|
||||
use crate::meta::MatrixMeta;
|
||||
use crate::reader::PersistentCompactIntVec;
|
||||
@@ -194,6 +196,32 @@ impl PackedCompactIntMatrix {
|
||||
Ok(Self { mmap, n_rows, n_cols, columns })
|
||||
}
|
||||
|
||||
pub(crate) fn col_persist(&self, c: usize, path: &Path) -> io::Result<PersistentCompactIntVecBuilder> {
|
||||
let ci = &self.columns[c];
|
||||
let primary = &self.mmap[ci.primary_start..ci.primary_start + self.n_rows];
|
||||
let mut overflow = HashMap::with_capacity(ci.n_overflow);
|
||||
for i in 0..ci.n_overflow {
|
||||
let off = ci.data_offset + i * OVERFLOW_ENTRY_SIZE;
|
||||
let slot = u64::from_le_bytes(self.mmap[off..off+8].try_into().unwrap()) as usize;
|
||||
let value = u32::from_le_bytes(self.mmap[off+8..off+12].try_into().unwrap());
|
||||
overflow.insert(slot, value);
|
||||
}
|
||||
PersistentCompactIntVecBuilder::from_raw_primary(primary, overflow, path)
|
||||
}
|
||||
|
||||
pub(crate) fn col_as_memory(&self, c: usize) -> MemoryIntVec {
|
||||
let ci = &self.columns[c];
|
||||
let primary = self.mmap[ci.primary_start..ci.primary_start + self.n_rows].to_vec();
|
||||
let mut overflow = HashMap::with_capacity(ci.n_overflow);
|
||||
for i in 0..ci.n_overflow {
|
||||
let off = ci.data_offset + i * OVERFLOW_ENTRY_SIZE;
|
||||
let slot = u64::from_le_bytes(self.mmap[off..off+8].try_into().unwrap()) as usize;
|
||||
let value = u32::from_le_bytes(self.mmap[off+8..off+12].try_into().unwrap());
|
||||
overflow.insert(slot, value);
|
||||
}
|
||||
MemoryIntVec::from_primary_and_overflow(primary, overflow)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) fn get(&self, col: usize, slot: usize) -> u32 {
|
||||
let ci = &self.columns[col];
|
||||
@@ -442,6 +470,20 @@ impl PersistentCompactIntMatrix {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn col_persist(&self, c: usize, path: &Path) -> io::Result<PersistentCompactIntVecBuilder> {
|
||||
match self {
|
||||
Self::Columnar(m) => PersistentCompactIntVecBuilder::build_from(m.col(c), path),
|
||||
Self::Packed(m) => m.col_persist(c, path),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn col_as_memory(&self, c: usize) -> MemoryIntVec {
|
||||
match self {
|
||||
Self::Columnar(m) => MemoryIntVec::from(m.col(c)),
|
||||
Self::Packed(m) => m.col_as_memory(c),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn row(&self, slot: usize) -> Box<[u32]> {
|
||||
match self { Self::Columnar(m) => m.row(slot), Self::Packed(m) => m.row(slot) }
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user