refactor: extract matrix helpers and improve bit iteration ergonomics

Refactor parallel matrix construction by extracting reusable `pairwise_matrix` and `pairwise2_matrix` helpers, and consolidate binary record deserialization into dedicated parsing functions. Add `set` and `iter` methods to `BitSliceMut` and `MemoryBitVec` for ergonomic bit manipulation and iteration. Standardize JSON field extraction via `meta::field`, expose `MemoryBitIter`, and improve test reliability by automatically cleaning up temporary directories.
This commit is contained in:
Eric Coissac
2026-06-16 23:36:25 +02:00
parent cde6457eea
commit d1717688d2
16 changed files with 136 additions and 183 deletions
+32 -44
View File
@@ -56,34 +56,11 @@ impl ColumnarBitMatrix {
} }
pub(crate) fn partial_jaccard_dist_matrix(&self) -> (Array2<u64>, Array2<u64>) { pub(crate) fn partial_jaccard_dist_matrix(&self) -> (Array2<u64>, Array2<u64>) {
let n = self.n_cols(); pairwise2_matrix(self.n_cols(), |i, j| self.col(i).partial_jaccard_dist(self.col(j)))
let results: Vec<(usize, usize, u64, u64)> = upper_pairs(n)
.into_par_iter()
.map(|(i, j)| {
let (inter, union) = self.col(i).partial_jaccard_dist(self.col(j));
(i, j, inter, union)
})
.collect();
let mut inter_m = Array2::zeros((n, n));
let mut union_m = Array2::zeros((n, n));
for (i, j, inter, union) in results {
inter_m[[i, j]] = inter; inter_m[[j, i]] = inter;
union_m[[i, j]] = union; union_m[[j, i]] = union;
}
(inter_m, union_m)
} }
pub(crate) fn partial_hamming_dist_matrix(&self) -> Array2<u64> { pub(crate) fn partial_hamming_dist_matrix(&self) -> Array2<u64> {
self.pairwise_u64(|i, j| self.col(i).hamming_dist(self.col(j))) pairwise_matrix(self.n_cols(), |i, j| self.col(i).hamming_dist(self.col(j)))
}
fn pairwise_u64(&self, f: impl Fn(usize, usize) -> u64 + Sync) -> Array2<u64> {
let n = self.n_cols();
let results: Vec<(usize, usize, u64)> = upper_pairs(n)
.into_par_iter()
.map(|(i, j)| (i, j, f(i, j)))
.collect();
fill_symmetric(n, results.into_iter().map(|(i, j, v)| (i, j, v, v)))
} }
pub(crate) fn append_column(dir: &Path, value_of: impl Fn(usize) -> bool) -> io::Result<()> { pub(crate) fn append_column(dir: &Path, value_of: impl Fn(usize) -> bool) -> io::Result<()> {
@@ -228,27 +205,11 @@ impl PackedBitMatrix {
} }
pub(crate) fn partial_jaccard_dist_matrix(&self) -> (Array2<u64>, Array2<u64>) { pub(crate) fn partial_jaccard_dist_matrix(&self) -> (Array2<u64>, Array2<u64>) {
let n = self.n_cols; pairwise2_matrix(self.n_cols, |i, j| self.partial_jaccard_col(i, j))
let results: Vec<(usize, usize, u64, u64)> = upper_pairs(n)
.into_par_iter()
.map(|(i, j)| { let (inter, union) = self.partial_jaccard_col(i, j); (i, j, inter, union) })
.collect();
let mut inter_m = Array2::zeros((n, n));
let mut union_m = Array2::zeros((n, n));
for (i, j, inter, union) in results {
inter_m[[i, j]] = inter; inter_m[[j, i]] = inter;
union_m[[i, j]] = union; union_m[[j, i]] = union;
}
(inter_m, union_m)
} }
pub(crate) fn partial_hamming_dist_matrix(&self) -> Array2<u64> { pub(crate) fn partial_hamming_dist_matrix(&self) -> Array2<u64> {
let n = self.n_cols; pairwise_matrix(self.n_cols, |i, j| self.pair_op(i, j, false))
let results: Vec<(usize, usize, u64)> = upper_pairs(n)
.into_par_iter()
.map(|(i, j)| (i, j, self.pair_op(i, j, false)))
.collect();
fill_symmetric(n, results.into_iter().map(|(i, j, v)| (i, j, v, v)))
} }
} }
@@ -488,7 +449,7 @@ impl PersistentBitMatrixBuilder {
} }
} }
// ── Helpers ─────────────────────────────────────────────────────────────────── // ── Shared matrix helpers (also used by intmatrix.rs) ─────────────────────────
fn upper_pairs(n: usize) -> Vec<(usize, usize)> { fn upper_pairs(n: usize) -> Vec<(usize, usize)> {
(0..n).flat_map(|i| (i + 1..n).map(move |j| (i, j))).collect() (0..n).flat_map(|i| (i + 1..n).map(move |j| (i, j))).collect()
@@ -500,3 +461,30 @@ where T: Clone + Default {
for (i, j, vij, vji) in vals { m[[i, j]] = vij; m[[j, i]] = vji; } for (i, j, vij, vji) in vals { m[[i, j]] = vij; m[[j, i]] = vji; }
m m
} }
/// Compute a symmetric `n×n` matrix in parallel by evaluating `f(i,j)` for
/// all upper-triangle pairs. `T: Copy` avoids the `.clone()` needed for the
/// lower-triangle mirror.
pub(crate) fn pairwise_matrix<T>(n: usize, f: impl Fn(usize, usize) -> T + Sync) -> Array2<T>
where T: Copy + Default + Send {
let results: Vec<(usize, usize, T)> = upper_pairs(n)
.into_par_iter().map(|(i, j)| (i, j, f(i, j))).collect();
fill_symmetric(n, results.into_iter().map(|(i, j, v)| (i, j, v, v)))
}
/// Same as `pairwise_matrix` but `f` returns two values that fill two
/// symmetric matrices simultaneously (e.g. intersection + union for Jaccard).
pub(crate) fn pairwise2_matrix<T>(n: usize, f: impl Fn(usize, usize) -> (T, T) + Sync) -> (Array2<T>, Array2<T>)
where T: Copy + Default + Send {
let results: Vec<(usize, usize, T, T)> = upper_pairs(n)
.into_par_iter()
.map(|(i, j)| { let (a, b) = f(i, j); (i, j, a, b) })
.collect();
let mut m0 = Array2::from_elem((n, n), T::default());
let mut m1 = Array2::from_elem((n, n), T::default());
for (i, j, a, b) in results {
m0[[i, j]] = a; m0[[j, i]] = a;
m1[[i, j]] = b; m1[[j, i]] = b;
}
(m0, m1)
}
+1 -11
View File
@@ -14,7 +14,7 @@ const MAGIC: [u8; 4] = *b"PBIV";
const HEADER_SIZE: usize = 16; const HEADER_SIZE: usize = 16;
#[inline] #[inline]
fn n_words(n: usize) -> usize { pub(crate) fn n_words(n: usize) -> usize {
n.div_ceil(64) n.div_ceil(64)
} }
@@ -222,16 +222,6 @@ impl PersistentBitVecBuilder {
(self.mmap[HEADER_SIZE + (slot >> 3)] >> (slot & 7)) & 1 != 0 (self.mmap[HEADER_SIZE + (slot >> 3)] >> (slot & 7)) & 1 != 0
} }
pub fn set(&mut self, slot: usize, value: bool) {
let byte = HEADER_SIZE + (slot >> 3);
let bit = 1u8 << (slot & 7);
if value {
self.mmap[byte] |= bit;
} else {
self.mmap[byte] &= !bit;
}
}
fn data_words(&self) -> &[u64] { fn data_words(&self) -> &[u64] {
let nw = n_words(self.n); let nw = n_words(self.n);
let ptr = self.mmap[HEADER_SIZE..].as_ptr() as *const u64; let ptr = self.mmap[HEADER_SIZE..].as_ptr() as *const u64;
+2 -4
View File
@@ -5,7 +5,7 @@ use std::path::{Path, PathBuf};
use memmap2::MmapMut; use memmap2::MmapMut;
use crate::format::{HEADER_SIZE, OVERFLOW_ENTRY_SIZE, finalize_pciv}; use crate::format::{HEADER_SIZE, finalize_pciv, parse_overflow_entry};
use crate::reader::PersistentCompactIntVec; use crate::reader::PersistentCompactIntVec;
pub struct PersistentCompactIntVecBuilder { pub struct PersistentCompactIntVecBuilder {
@@ -78,9 +78,7 @@ impl PersistentCompactIntVecBuilder {
let mut overflow = HashMap::with_capacity(n_overflow); let mut overflow = HashMap::with_capacity(n_overflow);
for i in 0..n_overflow { for i in 0..n_overflow {
let off = data_offset + i * OVERFLOW_ENTRY_SIZE; let (slot, value) = parse_overflow_entry(&mmap, data_offset, i);
let slot = u64::from_le_bytes(mmap[off..off + 8].try_into().unwrap()) as usize;
let value = u32::from_le_bytes(mmap[off + 8..off + 12].try_into().unwrap());
overflow.insert(slot, value); overflow.insert(slot, value);
} }
+18
View File
@@ -13,6 +13,24 @@ pub const OVERFLOW_ENTRY_SIZE: usize = 12;
// Index entry: slot(u64) + pos(u64) = 16 bytes. // Index entry: slot(u64) + pos(u64) = 16 bytes.
pub const INDEX_ENTRY_SIZE: usize = 16; pub const INDEX_ENTRY_SIZE: usize = 16;
/// Parse a single overflow entry `(slot, value)` from a byte slice.
#[inline]
pub fn parse_overflow_entry(data: &[u8], base: usize, i: usize) -> (usize, u32) {
let off = base + i * OVERFLOW_ENTRY_SIZE;
let slot = u64::from_le_bytes(data[off..off+8].try_into().unwrap()) as usize;
let value = u32::from_le_bytes(data[off+8..off+12].try_into().unwrap());
(slot, value)
}
/// Parse a single sparse-index entry `(slot, pos)` from a byte slice.
#[inline]
pub fn parse_index_entry(data: &[u8], base: usize, i: usize) -> (usize, usize) {
let off = base + i * INDEX_ENTRY_SIZE;
let slot = u64::from_le_bytes(data[off..off+8].try_into().unwrap()) as usize;
let pos = u64::from_le_bytes(data[off+8..off+16].try_into().unwrap()) as usize;
(slot, pos)
}
// Sparse index target: ≤ 32 KB in L1 cache (16 B per entry → 2048 entries). // Sparse index target: ≤ 32 KB in L1 cache (16 B per entry → 2048 entries).
pub const L1_INDEX_ENTRIES: usize = 2048; pub const L1_INDEX_ENTRIES: usize = 2048;
+27 -100
View File
@@ -8,9 +8,10 @@ use memmap2::Mmap;
use ndarray::{Array1, Array2}; use ndarray::{Array1, Array2};
use rayon::prelude::*; use rayon::prelude::*;
use crate::bitmatrix::{pairwise_matrix, pairwise2_matrix};
use crate::builder::PersistentCompactIntVecBuilder; use crate::builder::PersistentCompactIntVecBuilder;
use crate::memoryintvec::MemoryIntVec; use crate::memoryintvec::MemoryIntVec;
use crate::format::{HEADER_SIZE, INDEX_ENTRY_SIZE, OVERFLOW_ENTRY_SIZE}; use crate::format::{HEADER_SIZE, OVERFLOW_ENTRY_SIZE, parse_index_entry, parse_overflow_entry};
use crate::meta::MatrixMeta; use crate::meta::MatrixMeta;
use crate::reader::PersistentCompactIntVec; use crate::reader::PersistentCompactIntVec;
@@ -65,49 +66,35 @@ impl ColumnarCompactIntMatrix {
} }
pub(crate) fn partial_bray_dist_matrix(&self) -> Array2<u64> { pub(crate) fn partial_bray_dist_matrix(&self) -> Array2<u64> {
self.pairwise_u64(|i, j| self.col(i).partial_bray_dist(self.col(j))) pairwise_matrix(self.n_cols(), |i, j| self.col(i).partial_bray_dist(self.col(j)))
} }
pub(crate) fn partial_euclidean_dist_matrix(&self) -> Array2<f64> { pub(crate) fn partial_euclidean_dist_matrix(&self) -> Array2<f64> {
self.pairwise(|i, j| self.col(i).partial_euclidean_dist(self.col(j))) pairwise_matrix(self.n_cols(), |i, j| self.col(i).partial_euclidean_dist(self.col(j)))
} }
pub(crate) fn partial_threshold_jaccard_dist_matrix( pub(crate) fn partial_threshold_jaccard_dist_matrix(
&self, threshold: u32, &self, threshold: u32,
) -> (Array2<u64>, Array2<u64>) { ) -> (Array2<u64>, Array2<u64>) {
let n = self.n_cols(); pairwise2_matrix(self.n_cols(), |i, j| {
let pairs = upper_pairs(n); self.col(i).partial_threshold_jaccard_dist(self.col(j), threshold)
let results: Vec<(usize, usize, u64, u64)> = pairs
.into_par_iter()
.map(|(i, j)| {
let (inter, union) =
self.col(i).partial_threshold_jaccard_dist(self.col(j), threshold);
(i, j, inter, union)
}) })
.collect();
let mut inter_m = Array2::zeros((n, n));
let mut union_m = Array2::zeros((n, n));
for (i, j, inter, union) in results {
inter_m[[i, j]] = inter; inter_m[[j, i]] = inter;
union_m[[i, j]] = union; union_m[[j, i]] = union;
}
(inter_m, union_m)
} }
pub(crate) fn partial_relfreq_bray_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> { pub(crate) fn partial_relfreq_bray_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> {
self.pairwise(|i, j| { pairwise_matrix(self.n_cols(), |i, j| {
self.col(i).partial_relfreq_bray_dist(self.col(j), col_sums[i] as f64, col_sums[j] as f64) self.col(i).partial_relfreq_bray_dist(self.col(j), col_sums[i] as f64, col_sums[j] as f64)
}) })
} }
pub(crate) fn partial_relfreq_euclidean_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> { pub(crate) fn partial_relfreq_euclidean_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> {
self.pairwise(|i, j| { pairwise_matrix(self.n_cols(), |i, j| {
self.col(i).partial_relfreq_euclidean_dist(self.col(j), col_sums[i] as f64, col_sums[j] as f64) self.col(i).partial_relfreq_euclidean_dist(self.col(j), col_sums[i] as f64, col_sums[j] as f64)
}) })
} }
pub(crate) fn partial_hellinger_euclidean_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> { pub(crate) fn partial_hellinger_euclidean_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> {
self.pairwise(|i, j| { pairwise_matrix(self.n_cols(), |i, j| {
self.col(i).partial_hellinger_euclidean_dist(self.col(j), col_sums[i] as f64, col_sums[j] as f64) self.col(i).partial_hellinger_euclidean_dist(self.col(j), col_sums[i] as f64, col_sums[j] as f64)
}) })
} }
@@ -121,19 +108,6 @@ impl ColumnarCompactIntMatrix {
meta.save(dir) meta.save(dir)
} }
fn pairwise(&self, f: impl Fn(usize, usize) -> f64 + Sync) -> Array2<f64> {
let n = self.n_cols();
let results: Vec<(usize, usize, f64)> = upper_pairs(n)
.into_par_iter().map(|(i, j)| (i, j, f(i, j))).collect();
fill_symmetric(n, results.into_iter().map(|(i, j, v)| (i, j, v, v)))
}
fn pairwise_u64(&self, f: impl Fn(usize, usize) -> u64 + Sync) -> Array2<u64> {
let n = self.n_cols();
let results: Vec<(usize, usize, u64)> = upper_pairs(n)
.into_par_iter().map(|(i, j)| (i, j, f(i, j))).collect();
fill_symmetric(n, results.into_iter().map(|(i, j, v)| (i, j, v, v)))
}
} }
// ── PackedCompactIntMatrix ──────────────────────────────────────────────────── // ── PackedCompactIntMatrix ────────────────────────────────────────────────────
@@ -185,10 +159,7 @@ impl PackedCompactIntMatrix {
let mut index = Vec::with_capacity(n_idx); let mut index = Vec::with_capacity(n_idx);
for i in 0..n_idx { for i in 0..n_idx {
let ioff = index_offset + i * INDEX_ENTRY_SIZE; index.push(parse_index_entry(&mmap, index_offset, i));
let slot = u64::from_le_bytes(mmap[ioff..ioff+8].try_into().unwrap()) as usize;
let pos = u64::from_le_bytes(mmap[ioff+8..ioff+16].try_into().unwrap()) as usize;
index.push((slot, pos));
} }
columns.push(ColInfo { primary_start, data_offset, n_overflow: n_ov, step, index }); columns.push(ColInfo { primary_start, data_offset, n_overflow: n_ov, step, index });
} }
@@ -196,30 +167,25 @@ impl PackedCompactIntMatrix {
Ok(Self { mmap, n_rows, n_cols, columns }) Ok(Self { mmap, n_rows, n_cols, columns })
} }
fn col_overflow_map(&self, ci: &ColInfo) -> HashMap<usize, u32> {
let mut overflow = HashMap::with_capacity(ci.n_overflow);
for i in 0..ci.n_overflow {
let (slot, value) = parse_overflow_entry(&self.mmap, ci.data_offset, i);
overflow.insert(slot, value);
}
overflow
}
pub(crate) fn col_persist(&self, c: usize, path: &Path) -> io::Result<PersistentCompactIntVecBuilder> { pub(crate) fn col_persist(&self, c: usize, path: &Path) -> io::Result<PersistentCompactIntVecBuilder> {
let ci = &self.columns[c]; let ci = &self.columns[c];
let primary = &self.mmap[ci.primary_start..ci.primary_start + self.n_rows]; let primary = &self.mmap[ci.primary_start..ci.primary_start + self.n_rows];
let mut overflow = HashMap::with_capacity(ci.n_overflow); PersistentCompactIntVecBuilder::from_raw_primary(primary, self.col_overflow_map(ci), path)
for i in 0..ci.n_overflow {
let off = ci.data_offset + i * OVERFLOW_ENTRY_SIZE;
let slot = u64::from_le_bytes(self.mmap[off..off+8].try_into().unwrap()) as usize;
let value = u32::from_le_bytes(self.mmap[off+8..off+12].try_into().unwrap());
overflow.insert(slot, value);
}
PersistentCompactIntVecBuilder::from_raw_primary(primary, overflow, path)
} }
pub(crate) fn col_as_memory(&self, c: usize) -> MemoryIntVec { pub(crate) fn col_as_memory(&self, c: usize) -> MemoryIntVec {
let ci = &self.columns[c]; let ci = &self.columns[c];
let primary = self.mmap[ci.primary_start..ci.primary_start + self.n_rows].to_vec(); let primary = self.mmap[ci.primary_start..ci.primary_start + self.n_rows].to_vec();
let mut overflow = HashMap::with_capacity(ci.n_overflow); MemoryIntVec::from_primary_and_overflow(primary, self.col_overflow_map(ci))
for i in 0..ci.n_overflow {
let off = ci.data_offset + i * OVERFLOW_ENTRY_SIZE;
let slot = u64::from_le_bytes(self.mmap[off..off+8].try_into().unwrap()) as usize;
let value = u32::from_le_bytes(self.mmap[off+8..off+12].try_into().unwrap());
overflow.insert(slot, value);
}
MemoryIntVec::from_primary_and_overflow(primary, overflow)
} }
#[inline] #[inline]
@@ -327,55 +293,28 @@ impl PackedCompactIntMatrix {
// ── Matrix methods ──────────────────────────────────────────────────────── // ── Matrix methods ────────────────────────────────────────────────────────
fn pairwise<T>(&self, f: impl Fn(usize, usize) -> T + Sync) -> Array2<T>
where T: Clone + Default + Send {
let n = self.n_cols;
let results: Vec<(usize, usize, T)> = upper_pairs(n)
.into_par_iter().map(|(i, j)| (i, j, f(i, j))).collect();
fill_symmetric(n, results.into_iter().map(|(i, j, v)| { let w = v.clone(); (i, j, v, w) }))
}
fn pairwise_u64(&self, f: impl Fn(usize, usize) -> u64 + Sync) -> Array2<u64> {
let n = self.n_cols;
let results: Vec<(usize, usize, u64)> = upper_pairs(n)
.into_par_iter().map(|(i, j)| (i, j, f(i, j))).collect();
fill_symmetric(n, results.into_iter().map(|(i, j, v)| (i, j, v, v)))
}
pub(crate) fn partial_bray_dist_matrix(&self) -> Array2<u64> { pub(crate) fn partial_bray_dist_matrix(&self) -> Array2<u64> {
self.pairwise_u64(|i, j| self.pair_partial_bray(i, j)) pairwise_matrix(self.n_cols, |i, j| self.pair_partial_bray(i, j))
} }
pub(crate) fn partial_euclidean_dist_matrix(&self) -> Array2<f64> { pub(crate) fn partial_euclidean_dist_matrix(&self) -> Array2<f64> {
self.pairwise(|i, j| self.pair_partial_euclidean(i, j)) pairwise_matrix(self.n_cols, |i, j| self.pair_partial_euclidean(i, j))
} }
pub(crate) fn partial_threshold_jaccard_dist_matrix(&self, t: u32) -> (Array2<u64>, Array2<u64>) { pub(crate) fn partial_threshold_jaccard_dist_matrix(&self, t: u32) -> (Array2<u64>, Array2<u64>) {
let n = self.n_cols; pairwise2_matrix(self.n_cols, |i, j| self.pair_partial_threshold_jaccard(i, j, t))
let results: Vec<(usize, usize, u64, u64)> = upper_pairs(n)
.into_par_iter()
.map(|(i, j)| { let (inter, union) = self.pair_partial_threshold_jaccard(i, j, t); (i, j, inter, union) })
.collect();
let mut inter_m = Array2::zeros((n, n));
let mut union_m = Array2::zeros((n, n));
for (i, j, inter, union) in results {
inter_m[[i, j]] = inter; inter_m[[j, i]] = inter;
union_m[[i, j]] = union; union_m[[j, i]] = union;
}
(inter_m, union_m)
} }
pub(crate) fn partial_relfreq_bray_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> { pub(crate) fn partial_relfreq_bray_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> {
self.pairwise(|i, j| self.pair_partial_relfreq_bray(i, j, col_sums[i] as f64, col_sums[j] as f64)) pairwise_matrix(self.n_cols, |i, j| self.pair_partial_relfreq_bray(i, j, col_sums[i] as f64, col_sums[j] as f64))
} }
pub(crate) fn partial_relfreq_euclidean_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> { pub(crate) fn partial_relfreq_euclidean_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> {
self.pairwise(|i, j| self.pair_partial_relfreq_euclidean(i, j, col_sums[i] as f64, col_sums[j] as f64)) pairwise_matrix(self.n_cols, |i, j| self.pair_partial_relfreq_euclidean(i, j, col_sums[i] as f64, col_sums[j] as f64))
} }
pub(crate) fn partial_hellinger_euclidean_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> { pub(crate) fn partial_hellinger_euclidean_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> {
self.pairwise(|i, j| self.pair_partial_hellinger(i, j, col_sums[i] as f64, col_sums[j] as f64)) pairwise_matrix(self.n_cols, |i, j| self.pair_partial_hellinger(i, j, col_sums[i] as f64, col_sums[j] as f64))
} }
} }
@@ -570,15 +509,3 @@ impl PersistentCompactIntMatrixBuilder {
} }
} }
// ── Helpers ───────────────────────────────────────────────────────────────────
fn upper_pairs(n: usize) -> Vec<(usize, usize)> {
(0..n).flat_map(|i| (i + 1..n).map(move |j| (i, j))).collect()
}
fn fill_symmetric<T>(n: usize, vals: impl Iterator<Item = (usize, usize, T, T)>) -> Array2<T>
where T: Clone + Default {
let mut m = Array2::from_elem((n, n), T::default());
for (i, j, vij, vji) in vals { m[[i, j]] = vij; m[[j, i]] = vji; }
m
}
+1 -6
View File
@@ -23,11 +23,6 @@ impl LayerMeta {
} }
fn parse(s: &str) -> Option<Self> { fn parse(s: &str) -> Option<Self> {
let key = "\"n\":"; Some(Self { n: crate::meta::field(s, "n")? })
let pos = s.find(key)? + key.len();
let rest = s[pos..].trim_start();
let end = rest.find(|c: char| !c.is_ascii_digit()).unwrap_or(rest.len());
let n = rest[..end].parse().ok()?;
Some(Self { n })
} }
} }
+1 -1
View File
@@ -16,7 +16,7 @@ pub use builder::PersistentCompactIntVecBuilder;
pub use intmatrix::{PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder, pack_compact_int_matrix}; pub use intmatrix::{PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder, pack_compact_int_matrix};
pub use layer_meta::LayerMeta; pub use layer_meta::LayerMeta;
pub use memoryintvec::MemoryIntVec; pub use memoryintvec::MemoryIntVec;
pub use memoryvec::MemoryBitVec; pub use memoryvec::{MemoryBitIter, MemoryBitVec};
pub use reader::PersistentCompactIntVec; pub use reader::PersistentCompactIntVec;
pub use traits::{BitPartials, BitSlice, BitSliceMut, BitToInt, ColumnWeights, CountPartials, IntSlice, IntSliceMut, IntToBit}; pub use traits::{BitPartials, BitSlice, BitSliceMut, BitToInt, ColumnWeights, CountPartials, IntSlice, IntSliceMut, IntToBit};
+39 -9
View File
@@ -2,12 +2,9 @@ use std::io;
use std::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Not}; use std::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Not};
use std::path::Path; use std::path::Path;
use crate::bitvec::PersistentBitVecBuilder; use crate::bitvec::{PersistentBitVecBuilder, n_words};
use crate::traits::{BitSlice, BitSliceMut}; use crate::traits::{BitSlice, BitSliceMut};
#[inline]
fn n_words(n: usize) -> usize { n.div_ceil(64) }
// ── MemoryBitVec ────────────────────────────────────────────────────────────── // ── MemoryBitVec ──────────────────────────────────────────────────────────────
#[derive(Clone)] #[derive(Clone)]
@@ -41,11 +38,6 @@ impl MemoryBitVec {
(self.words[slot >> 6] >> (slot & 63)) & 1 != 0 (self.words[slot >> 6] >> (slot & 63)) & 1 != 0
} }
pub fn set(&mut self, slot: usize, value: bool) {
let bit = 1u64 << (slot & 63);
if value { self.words[slot >> 6] |= bit; } else { self.words[slot >> 6] &= !bit; }
}
pub fn count_ones(&self) -> u64 { pub fn count_ones(&self) -> u64 {
self.words.iter().map(|w| w.count_ones() as u64).sum() self.words.iter().map(|w| w.count_ones() as u64).sum()
} }
@@ -136,3 +128,41 @@ impl<B: BitSlice> BitOrAssign<&B> for MemoryBitVec {
impl<B: BitSlice> BitXorAssign<&B> for MemoryBitVec { impl<B: BitSlice> BitXorAssign<&B> for MemoryBitVec {
fn bitxor_assign(&mut self, rhs: &B) { self.xor(rhs); } fn bitxor_assign(&mut self, rhs: &B) { self.xor(rhs); }
} }
// ── Iterator ──────────────────────────────────────────────────────────────────
pub struct MemoryBitIter<'a> {
words: &'a [u64],
slot: usize,
n: usize,
}
impl Iterator for MemoryBitIter<'_> {
type Item = bool;
fn next(&mut self) -> Option<bool> {
if self.slot >= self.n { return None; }
let v = (self.words[self.slot >> 6] >> (self.slot & 63)) & 1 != 0;
self.slot += 1;
Some(v)
}
fn size_hint(&self) -> (usize, Option<usize>) {
let rem = self.n - self.slot;
(rem, Some(rem))
}
}
impl ExactSizeIterator for MemoryBitIter<'_> {}
impl MemoryBitVec {
pub fn iter(&self) -> MemoryBitIter<'_> {
MemoryBitIter { words: &self.words, slot: 0, n: self.n }
}
}
impl<'a> IntoIterator for &'a MemoryBitVec {
type Item = bool;
type IntoIter = MemoryBitIter<'a>;
fn into_iter(self) -> MemoryBitIter<'a> { self.iter() }
}
+1 -1
View File
@@ -23,7 +23,7 @@ fn parse(s: &str) -> Option<MatrixMeta> {
Some(MatrixMeta { n: field(s, "n")?, n_cols: field(s, "n_cols")? }) Some(MatrixMeta { n: field(s, "n")?, n_cols: field(s, "n_cols")? })
} }
fn field(s: &str, name: &str) -> Option<usize> { pub(crate) fn field(s: &str, name: &str) -> Option<usize> {
let key = format!("\"{}\":", name); let key = format!("\"{}\":", name);
let pos = s.find(&key)? + key.len(); let pos = s.find(&key)? + key.len();
let rest = s[pos..].trim_start(); let rest = s[pos..].trim_start();
+2 -5
View File
@@ -4,7 +4,7 @@ use std::path::{Path, PathBuf};
use memmap2::Mmap; use memmap2::Mmap;
use crate::format::{HEADER_SIZE, INDEX_ENTRY_SIZE, MAGIC, OVERFLOW_ENTRY_SIZE}; use crate::format::{HEADER_SIZE, MAGIC, OVERFLOW_ENTRY_SIZE, parse_index_entry};
pub struct PersistentCompactIntVec { pub struct PersistentCompactIntVec {
mmap: Mmap, mmap: Mmap,
@@ -43,10 +43,7 @@ impl PersistentCompactIntVec {
let mut index = Vec::with_capacity(n_index); let mut index = Vec::with_capacity(n_index);
for i in 0..n_index { for i in 0..n_index {
let off = index_offset + i * INDEX_ENTRY_SIZE; index.push(parse_index_entry(&mmap, index_offset, i));
let slot = u64::from_le_bytes(mmap[off..off + 8].try_into().unwrap()) as usize;
let pos = u64::from_le_bytes(mmap[off + 8..off + 16].try_into().unwrap()) as usize;
index.push((slot, pos));
} }
Ok(Self { Ok(Self {
+1 -1
View File
@@ -1,7 +1,7 @@
use tempfile::tempdir; use tempfile::tempdir;
use crate::{PersistentBitMatrix, PersistentBitMatrixBuilder}; use crate::{PersistentBitMatrix, PersistentBitMatrixBuilder};
use crate::traits::BitPartials; use crate::traits::{BitPartials, BitSliceMut};
fn make_matrix(cols: &[&[bool]]) -> (tempfile::TempDir, PersistentBitMatrix) { fn make_matrix(cols: &[&[bool]]) -> (tempfile::TempDir, PersistentBitMatrix) {
let n = cols.first().map_or(0, |c| c.len()); let n = cols.first().map_or(0, |c| c.len());
+6
View File
@@ -20,6 +20,11 @@ pub trait BitSlice {
pub trait BitSliceMut: BitSlice { pub trait BitSliceMut: BitSlice {
fn words_mut(&mut self) -> &mut [u64]; fn words_mut(&mut self) -> &mut [u64];
fn set(&mut self, slot: usize, value: bool) {
let bit = 1u64 << (slot & 63);
if value { self.words_mut()[slot >> 6] |= bit; } else { self.words_mut()[slot >> 6] &= !bit; }
}
fn copy_from<S: BitSlice>(&mut self, src: &S) -> &mut Self { fn copy_from<S: BitSlice>(&mut self, src: &S) -> &mut Self {
assert_eq!(self.len(), src.len(), "BitSlice length mismatch"); assert_eq!(self.len(), src.len(), "BitSlice length mismatch");
self.words_mut().copy_from_slice(src.words()); self.words_mut().copy_from_slice(src.words());
@@ -62,6 +67,7 @@ pub trait IntSlice {
fn len(&self) -> usize; fn len(&self) -> usize;
fn get(&self, slot: usize) -> u32; fn get(&self, slot: usize) -> u32;
fn is_empty(&self) -> bool { self.len() == 0 } fn is_empty(&self) -> bool { self.len() == 0 }
fn iter(&self) -> impl Iterator<Item = u32> + '_ { (0..self.len()).map(|i| self.get(i)) }
fn sum(&self) -> u64 { (0..self.len()).map(|s| self.get(s) as u64).sum() } fn sum(&self) -> u64 { (0..self.len()).map(|s| self.get(s) as u64).sum() }
fn count_nonzero(&self) -> u64 { (0..self.len()).filter(|&s| self.get(s) > 0).count() as u64 } fn count_nonzero(&self) -> u64 { (0..self.len()).filter(|&s| self.get(s) > 0).count() as u64 }
+1
View File
@@ -3,6 +3,7 @@ use std::io;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use obicompactvec::{PersistentBitVecBuilder, PersistentCompactIntVecBuilder}; use obicompactvec::{PersistentBitVecBuilder, PersistentCompactIntVecBuilder};
use obicompactvec::traits::BitSliceMut;
use obilayeredmap::meta::PartitionMeta; use obilayeredmap::meta::PartitionMeta;
use obilayeredmap::{IndexMode, OLMError}; use obilayeredmap::{IndexMode, OLMError};
use obiskio::{SKError, SKResult}; use obiskio::{SKError, SKResult};
+1
View File
@@ -6,6 +6,7 @@ use obicompactvec::{
PersistentBitMatrix, PersistentBitMatrixBuilder, PersistentBitVecBuilder, PersistentBitMatrix, PersistentBitMatrixBuilder, PersistentBitVecBuilder,
PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder, PersistentCompactIntVecBuilder, PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder, PersistentCompactIntVecBuilder,
}; };
use obicompactvec::traits::BitSliceMut;
use obilayeredmap::meta::PartitionMeta; use obilayeredmap::meta::PartitionMeta;
use obilayeredmap::OLMError; use obilayeredmap::OLMError;
use obiskio::{SKError, SKResult}; use obiskio::{SKError, SKResult};
+1
View File
@@ -6,6 +6,7 @@ use obicompactvec::{
PersistentBitMatrix, PersistentBitMatrixBuilder, PersistentBitMatrix, PersistentBitMatrixBuilder,
PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder, PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder,
}; };
use obicompactvec::traits::BitSliceMut;
use obikseq::CanonicalKmer; use obikseq::CanonicalKmer;
use obiskio::{UnitigFileReader, UnitigFileWriter}; use obiskio::{UnitigFileReader, UnitigFileWriter};
+1
View File
@@ -102,6 +102,7 @@ mod tests {
PersistentBitMatrix, PersistentBitMatrixBuilder, PersistentBitMatrix, PersistentBitMatrixBuilder,
PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder, PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder,
}; };
use obicompactvec::traits::BitSliceMut;
use tempfile::tempdir; use tempfile::tempdir;
fn make_int_matrix(cols: &[&[u32]]) -> (tempfile::TempDir, PersistentCompactIntMatrix) { fn make_int_matrix(cols: &[&[u32]]) -> (tempfile::TempDir, PersistentCompactIntMatrix) {