Push mtzqmmrlmzzx #34
@@ -56,34 +56,11 @@ impl ColumnarBitMatrix {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn partial_jaccard_dist_matrix(&self) -> (Array2<u64>, Array2<u64>) {
|
pub(crate) fn partial_jaccard_dist_matrix(&self) -> (Array2<u64>, Array2<u64>) {
|
||||||
let n = self.n_cols();
|
pairwise2_matrix(self.n_cols(), |i, j| self.col(i).partial_jaccard_dist(self.col(j)))
|
||||||
let results: Vec<(usize, usize, u64, u64)> = upper_pairs(n)
|
|
||||||
.into_par_iter()
|
|
||||||
.map(|(i, j)| {
|
|
||||||
let (inter, union) = self.col(i).partial_jaccard_dist(self.col(j));
|
|
||||||
(i, j, inter, union)
|
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
let mut inter_m = Array2::zeros((n, n));
|
|
||||||
let mut union_m = Array2::zeros((n, n));
|
|
||||||
for (i, j, inter, union) in results {
|
|
||||||
inter_m[[i, j]] = inter; inter_m[[j, i]] = inter;
|
|
||||||
union_m[[i, j]] = union; union_m[[j, i]] = union;
|
|
||||||
}
|
|
||||||
(inter_m, union_m)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn partial_hamming_dist_matrix(&self) -> Array2<u64> {
|
pub(crate) fn partial_hamming_dist_matrix(&self) -> Array2<u64> {
|
||||||
self.pairwise_u64(|i, j| self.col(i).hamming_dist(self.col(j)))
|
pairwise_matrix(self.n_cols(), |i, j| self.col(i).hamming_dist(self.col(j)))
|
||||||
}
|
|
||||||
|
|
||||||
fn pairwise_u64(&self, f: impl Fn(usize, usize) -> u64 + Sync) -> Array2<u64> {
|
|
||||||
let n = self.n_cols();
|
|
||||||
let results: Vec<(usize, usize, u64)> = upper_pairs(n)
|
|
||||||
.into_par_iter()
|
|
||||||
.map(|(i, j)| (i, j, f(i, j)))
|
|
||||||
.collect();
|
|
||||||
fill_symmetric(n, results.into_iter().map(|(i, j, v)| (i, j, v, v)))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn append_column(dir: &Path, value_of: impl Fn(usize) -> bool) -> io::Result<()> {
|
pub(crate) fn append_column(dir: &Path, value_of: impl Fn(usize) -> bool) -> io::Result<()> {
|
||||||
@@ -228,27 +205,11 @@ impl PackedBitMatrix {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn partial_jaccard_dist_matrix(&self) -> (Array2<u64>, Array2<u64>) {
|
pub(crate) fn partial_jaccard_dist_matrix(&self) -> (Array2<u64>, Array2<u64>) {
|
||||||
let n = self.n_cols;
|
pairwise2_matrix(self.n_cols, |i, j| self.partial_jaccard_col(i, j))
|
||||||
let results: Vec<(usize, usize, u64, u64)> = upper_pairs(n)
|
|
||||||
.into_par_iter()
|
|
||||||
.map(|(i, j)| { let (inter, union) = self.partial_jaccard_col(i, j); (i, j, inter, union) })
|
|
||||||
.collect();
|
|
||||||
let mut inter_m = Array2::zeros((n, n));
|
|
||||||
let mut union_m = Array2::zeros((n, n));
|
|
||||||
for (i, j, inter, union) in results {
|
|
||||||
inter_m[[i, j]] = inter; inter_m[[j, i]] = inter;
|
|
||||||
union_m[[i, j]] = union; union_m[[j, i]] = union;
|
|
||||||
}
|
|
||||||
(inter_m, union_m)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn partial_hamming_dist_matrix(&self) -> Array2<u64> {
|
pub(crate) fn partial_hamming_dist_matrix(&self) -> Array2<u64> {
|
||||||
let n = self.n_cols;
|
pairwise_matrix(self.n_cols, |i, j| self.pair_op(i, j, false))
|
||||||
let results: Vec<(usize, usize, u64)> = upper_pairs(n)
|
|
||||||
.into_par_iter()
|
|
||||||
.map(|(i, j)| (i, j, self.pair_op(i, j, false)))
|
|
||||||
.collect();
|
|
||||||
fill_symmetric(n, results.into_iter().map(|(i, j, v)| (i, j, v, v)))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -488,7 +449,7 @@ impl PersistentBitMatrixBuilder {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Helpers ───────────────────────────────────────────────────────────────────
|
// ── Shared matrix helpers (also used by intmatrix.rs) ─────────────────────────
|
||||||
|
|
||||||
fn upper_pairs(n: usize) -> Vec<(usize, usize)> {
|
fn upper_pairs(n: usize) -> Vec<(usize, usize)> {
|
||||||
(0..n).flat_map(|i| (i + 1..n).map(move |j| (i, j))).collect()
|
(0..n).flat_map(|i| (i + 1..n).map(move |j| (i, j))).collect()
|
||||||
@@ -500,3 +461,30 @@ where T: Clone + Default {
|
|||||||
for (i, j, vij, vji) in vals { m[[i, j]] = vij; m[[j, i]] = vji; }
|
for (i, j, vij, vji) in vals { m[[i, j]] = vij; m[[j, i]] = vji; }
|
||||||
m
|
m
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Compute a symmetric `n×n` matrix in parallel by evaluating `f(i,j)` for
|
||||||
|
/// all upper-triangle pairs. `T: Copy` avoids the `.clone()` needed for the
|
||||||
|
/// lower-triangle mirror.
|
||||||
|
pub(crate) fn pairwise_matrix<T>(n: usize, f: impl Fn(usize, usize) -> T + Sync) -> Array2<T>
|
||||||
|
where T: Copy + Default + Send {
|
||||||
|
let results: Vec<(usize, usize, T)> = upper_pairs(n)
|
||||||
|
.into_par_iter().map(|(i, j)| (i, j, f(i, j))).collect();
|
||||||
|
fill_symmetric(n, results.into_iter().map(|(i, j, v)| (i, j, v, v)))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Same as `pairwise_matrix` but `f` returns two values that fill two
|
||||||
|
/// symmetric matrices simultaneously (e.g. intersection + union for Jaccard).
|
||||||
|
pub(crate) fn pairwise2_matrix<T>(n: usize, f: impl Fn(usize, usize) -> (T, T) + Sync) -> (Array2<T>, Array2<T>)
|
||||||
|
where T: Copy + Default + Send {
|
||||||
|
let results: Vec<(usize, usize, T, T)> = upper_pairs(n)
|
||||||
|
.into_par_iter()
|
||||||
|
.map(|(i, j)| { let (a, b) = f(i, j); (i, j, a, b) })
|
||||||
|
.collect();
|
||||||
|
let mut m0 = Array2::from_elem((n, n), T::default());
|
||||||
|
let mut m1 = Array2::from_elem((n, n), T::default());
|
||||||
|
for (i, j, a, b) in results {
|
||||||
|
m0[[i, j]] = a; m0[[j, i]] = a;
|
||||||
|
m1[[i, j]] = b; m1[[j, i]] = b;
|
||||||
|
}
|
||||||
|
(m0, m1)
|
||||||
|
}
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ const MAGIC: [u8; 4] = *b"PBIV";
|
|||||||
const HEADER_SIZE: usize = 16;
|
const HEADER_SIZE: usize = 16;
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn n_words(n: usize) -> usize {
|
pub(crate) fn n_words(n: usize) -> usize {
|
||||||
n.div_ceil(64)
|
n.div_ceil(64)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -222,16 +222,6 @@ impl PersistentBitVecBuilder {
|
|||||||
(self.mmap[HEADER_SIZE + (slot >> 3)] >> (slot & 7)) & 1 != 0
|
(self.mmap[HEADER_SIZE + (slot >> 3)] >> (slot & 7)) & 1 != 0
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn set(&mut self, slot: usize, value: bool) {
|
|
||||||
let byte = HEADER_SIZE + (slot >> 3);
|
|
||||||
let bit = 1u8 << (slot & 7);
|
|
||||||
if value {
|
|
||||||
self.mmap[byte] |= bit;
|
|
||||||
} else {
|
|
||||||
self.mmap[byte] &= !bit;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn data_words(&self) -> &[u64] {
|
fn data_words(&self) -> &[u64] {
|
||||||
let nw = n_words(self.n);
|
let nw = n_words(self.n);
|
||||||
let ptr = self.mmap[HEADER_SIZE..].as_ptr() as *const u64;
|
let ptr = self.mmap[HEADER_SIZE..].as_ptr() as *const u64;
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ use std::path::{Path, PathBuf};
|
|||||||
|
|
||||||
use memmap2::MmapMut;
|
use memmap2::MmapMut;
|
||||||
|
|
||||||
use crate::format::{HEADER_SIZE, OVERFLOW_ENTRY_SIZE, finalize_pciv};
|
use crate::format::{HEADER_SIZE, finalize_pciv, parse_overflow_entry};
|
||||||
use crate::reader::PersistentCompactIntVec;
|
use crate::reader::PersistentCompactIntVec;
|
||||||
|
|
||||||
pub struct PersistentCompactIntVecBuilder {
|
pub struct PersistentCompactIntVecBuilder {
|
||||||
@@ -78,9 +78,7 @@ impl PersistentCompactIntVecBuilder {
|
|||||||
|
|
||||||
let mut overflow = HashMap::with_capacity(n_overflow);
|
let mut overflow = HashMap::with_capacity(n_overflow);
|
||||||
for i in 0..n_overflow {
|
for i in 0..n_overflow {
|
||||||
let off = data_offset + i * OVERFLOW_ENTRY_SIZE;
|
let (slot, value) = parse_overflow_entry(&mmap, data_offset, i);
|
||||||
let slot = u64::from_le_bytes(mmap[off..off + 8].try_into().unwrap()) as usize;
|
|
||||||
let value = u32::from_le_bytes(mmap[off + 8..off + 12].try_into().unwrap());
|
|
||||||
overflow.insert(slot, value);
|
overflow.insert(slot, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -13,6 +13,24 @@ pub const OVERFLOW_ENTRY_SIZE: usize = 12;
|
|||||||
// Index entry: slot(u64) + pos(u64) = 16 bytes.
|
// Index entry: slot(u64) + pos(u64) = 16 bytes.
|
||||||
pub const INDEX_ENTRY_SIZE: usize = 16;
|
pub const INDEX_ENTRY_SIZE: usize = 16;
|
||||||
|
|
||||||
|
/// Parse a single overflow entry `(slot, value)` from a byte slice.
|
||||||
|
#[inline]
|
||||||
|
pub fn parse_overflow_entry(data: &[u8], base: usize, i: usize) -> (usize, u32) {
|
||||||
|
let off = base + i * OVERFLOW_ENTRY_SIZE;
|
||||||
|
let slot = u64::from_le_bytes(data[off..off+8].try_into().unwrap()) as usize;
|
||||||
|
let value = u32::from_le_bytes(data[off+8..off+12].try_into().unwrap());
|
||||||
|
(slot, value)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse a single sparse-index entry `(slot, pos)` from a byte slice.
|
||||||
|
#[inline]
|
||||||
|
pub fn parse_index_entry(data: &[u8], base: usize, i: usize) -> (usize, usize) {
|
||||||
|
let off = base + i * INDEX_ENTRY_SIZE;
|
||||||
|
let slot = u64::from_le_bytes(data[off..off+8].try_into().unwrap()) as usize;
|
||||||
|
let pos = u64::from_le_bytes(data[off+8..off+16].try_into().unwrap()) as usize;
|
||||||
|
(slot, pos)
|
||||||
|
}
|
||||||
|
|
||||||
// Sparse index target: ≤ 32 KB in L1 cache (16 B per entry → 2048 entries).
|
// Sparse index target: ≤ 32 KB in L1 cache (16 B per entry → 2048 entries).
|
||||||
pub const L1_INDEX_ENTRIES: usize = 2048;
|
pub const L1_INDEX_ENTRIES: usize = 2048;
|
||||||
|
|
||||||
|
|||||||
@@ -8,9 +8,10 @@ use memmap2::Mmap;
|
|||||||
use ndarray::{Array1, Array2};
|
use ndarray::{Array1, Array2};
|
||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
|
|
||||||
|
use crate::bitmatrix::{pairwise_matrix, pairwise2_matrix};
|
||||||
use crate::builder::PersistentCompactIntVecBuilder;
|
use crate::builder::PersistentCompactIntVecBuilder;
|
||||||
use crate::memoryintvec::MemoryIntVec;
|
use crate::memoryintvec::MemoryIntVec;
|
||||||
use crate::format::{HEADER_SIZE, INDEX_ENTRY_SIZE, OVERFLOW_ENTRY_SIZE};
|
use crate::format::{HEADER_SIZE, OVERFLOW_ENTRY_SIZE, parse_index_entry, parse_overflow_entry};
|
||||||
use crate::meta::MatrixMeta;
|
use crate::meta::MatrixMeta;
|
||||||
use crate::reader::PersistentCompactIntVec;
|
use crate::reader::PersistentCompactIntVec;
|
||||||
|
|
||||||
@@ -65,49 +66,35 @@ impl ColumnarCompactIntMatrix {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn partial_bray_dist_matrix(&self) -> Array2<u64> {
|
pub(crate) fn partial_bray_dist_matrix(&self) -> Array2<u64> {
|
||||||
self.pairwise_u64(|i, j| self.col(i).partial_bray_dist(self.col(j)))
|
pairwise_matrix(self.n_cols(), |i, j| self.col(i).partial_bray_dist(self.col(j)))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn partial_euclidean_dist_matrix(&self) -> Array2<f64> {
|
pub(crate) fn partial_euclidean_dist_matrix(&self) -> Array2<f64> {
|
||||||
self.pairwise(|i, j| self.col(i).partial_euclidean_dist(self.col(j)))
|
pairwise_matrix(self.n_cols(), |i, j| self.col(i).partial_euclidean_dist(self.col(j)))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn partial_threshold_jaccard_dist_matrix(
|
pub(crate) fn partial_threshold_jaccard_dist_matrix(
|
||||||
&self, threshold: u32,
|
&self, threshold: u32,
|
||||||
) -> (Array2<u64>, Array2<u64>) {
|
) -> (Array2<u64>, Array2<u64>) {
|
||||||
let n = self.n_cols();
|
pairwise2_matrix(self.n_cols(), |i, j| {
|
||||||
let pairs = upper_pairs(n);
|
self.col(i).partial_threshold_jaccard_dist(self.col(j), threshold)
|
||||||
let results: Vec<(usize, usize, u64, u64)> = pairs
|
|
||||||
.into_par_iter()
|
|
||||||
.map(|(i, j)| {
|
|
||||||
let (inter, union) =
|
|
||||||
self.col(i).partial_threshold_jaccard_dist(self.col(j), threshold);
|
|
||||||
(i, j, inter, union)
|
|
||||||
})
|
})
|
||||||
.collect();
|
|
||||||
let mut inter_m = Array2::zeros((n, n));
|
|
||||||
let mut union_m = Array2::zeros((n, n));
|
|
||||||
for (i, j, inter, union) in results {
|
|
||||||
inter_m[[i, j]] = inter; inter_m[[j, i]] = inter;
|
|
||||||
union_m[[i, j]] = union; union_m[[j, i]] = union;
|
|
||||||
}
|
|
||||||
(inter_m, union_m)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn partial_relfreq_bray_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> {
|
pub(crate) fn partial_relfreq_bray_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> {
|
||||||
self.pairwise(|i, j| {
|
pairwise_matrix(self.n_cols(), |i, j| {
|
||||||
self.col(i).partial_relfreq_bray_dist(self.col(j), col_sums[i] as f64, col_sums[j] as f64)
|
self.col(i).partial_relfreq_bray_dist(self.col(j), col_sums[i] as f64, col_sums[j] as f64)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn partial_relfreq_euclidean_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> {
|
pub(crate) fn partial_relfreq_euclidean_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> {
|
||||||
self.pairwise(|i, j| {
|
pairwise_matrix(self.n_cols(), |i, j| {
|
||||||
self.col(i).partial_relfreq_euclidean_dist(self.col(j), col_sums[i] as f64, col_sums[j] as f64)
|
self.col(i).partial_relfreq_euclidean_dist(self.col(j), col_sums[i] as f64, col_sums[j] as f64)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn partial_hellinger_euclidean_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> {
|
pub(crate) fn partial_hellinger_euclidean_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> {
|
||||||
self.pairwise(|i, j| {
|
pairwise_matrix(self.n_cols(), |i, j| {
|
||||||
self.col(i).partial_hellinger_euclidean_dist(self.col(j), col_sums[i] as f64, col_sums[j] as f64)
|
self.col(i).partial_hellinger_euclidean_dist(self.col(j), col_sums[i] as f64, col_sums[j] as f64)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@@ -121,19 +108,6 @@ impl ColumnarCompactIntMatrix {
|
|||||||
meta.save(dir)
|
meta.save(dir)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn pairwise(&self, f: impl Fn(usize, usize) -> f64 + Sync) -> Array2<f64> {
|
|
||||||
let n = self.n_cols();
|
|
||||||
let results: Vec<(usize, usize, f64)> = upper_pairs(n)
|
|
||||||
.into_par_iter().map(|(i, j)| (i, j, f(i, j))).collect();
|
|
||||||
fill_symmetric(n, results.into_iter().map(|(i, j, v)| (i, j, v, v)))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn pairwise_u64(&self, f: impl Fn(usize, usize) -> u64 + Sync) -> Array2<u64> {
|
|
||||||
let n = self.n_cols();
|
|
||||||
let results: Vec<(usize, usize, u64)> = upper_pairs(n)
|
|
||||||
.into_par_iter().map(|(i, j)| (i, j, f(i, j))).collect();
|
|
||||||
fill_symmetric(n, results.into_iter().map(|(i, j, v)| (i, j, v, v)))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── PackedCompactIntMatrix ────────────────────────────────────────────────────
|
// ── PackedCompactIntMatrix ────────────────────────────────────────────────────
|
||||||
@@ -185,10 +159,7 @@ impl PackedCompactIntMatrix {
|
|||||||
|
|
||||||
let mut index = Vec::with_capacity(n_idx);
|
let mut index = Vec::with_capacity(n_idx);
|
||||||
for i in 0..n_idx {
|
for i in 0..n_idx {
|
||||||
let ioff = index_offset + i * INDEX_ENTRY_SIZE;
|
index.push(parse_index_entry(&mmap, index_offset, i));
|
||||||
let slot = u64::from_le_bytes(mmap[ioff..ioff+8].try_into().unwrap()) as usize;
|
|
||||||
let pos = u64::from_le_bytes(mmap[ioff+8..ioff+16].try_into().unwrap()) as usize;
|
|
||||||
index.push((slot, pos));
|
|
||||||
}
|
}
|
||||||
columns.push(ColInfo { primary_start, data_offset, n_overflow: n_ov, step, index });
|
columns.push(ColInfo { primary_start, data_offset, n_overflow: n_ov, step, index });
|
||||||
}
|
}
|
||||||
@@ -196,30 +167,25 @@ impl PackedCompactIntMatrix {
|
|||||||
Ok(Self { mmap, n_rows, n_cols, columns })
|
Ok(Self { mmap, n_rows, n_cols, columns })
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn col_overflow_map(&self, ci: &ColInfo) -> HashMap<usize, u32> {
|
||||||
|
let mut overflow = HashMap::with_capacity(ci.n_overflow);
|
||||||
|
for i in 0..ci.n_overflow {
|
||||||
|
let (slot, value) = parse_overflow_entry(&self.mmap, ci.data_offset, i);
|
||||||
|
overflow.insert(slot, value);
|
||||||
|
}
|
||||||
|
overflow
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) fn col_persist(&self, c: usize, path: &Path) -> io::Result<PersistentCompactIntVecBuilder> {
|
pub(crate) fn col_persist(&self, c: usize, path: &Path) -> io::Result<PersistentCompactIntVecBuilder> {
|
||||||
let ci = &self.columns[c];
|
let ci = &self.columns[c];
|
||||||
let primary = &self.mmap[ci.primary_start..ci.primary_start + self.n_rows];
|
let primary = &self.mmap[ci.primary_start..ci.primary_start + self.n_rows];
|
||||||
let mut overflow = HashMap::with_capacity(ci.n_overflow);
|
PersistentCompactIntVecBuilder::from_raw_primary(primary, self.col_overflow_map(ci), path)
|
||||||
for i in 0..ci.n_overflow {
|
|
||||||
let off = ci.data_offset + i * OVERFLOW_ENTRY_SIZE;
|
|
||||||
let slot = u64::from_le_bytes(self.mmap[off..off+8].try_into().unwrap()) as usize;
|
|
||||||
let value = u32::from_le_bytes(self.mmap[off+8..off+12].try_into().unwrap());
|
|
||||||
overflow.insert(slot, value);
|
|
||||||
}
|
|
||||||
PersistentCompactIntVecBuilder::from_raw_primary(primary, overflow, path)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn col_as_memory(&self, c: usize) -> MemoryIntVec {
|
pub(crate) fn col_as_memory(&self, c: usize) -> MemoryIntVec {
|
||||||
let ci = &self.columns[c];
|
let ci = &self.columns[c];
|
||||||
let primary = self.mmap[ci.primary_start..ci.primary_start + self.n_rows].to_vec();
|
let primary = self.mmap[ci.primary_start..ci.primary_start + self.n_rows].to_vec();
|
||||||
let mut overflow = HashMap::with_capacity(ci.n_overflow);
|
MemoryIntVec::from_primary_and_overflow(primary, self.col_overflow_map(ci))
|
||||||
for i in 0..ci.n_overflow {
|
|
||||||
let off = ci.data_offset + i * OVERFLOW_ENTRY_SIZE;
|
|
||||||
let slot = u64::from_le_bytes(self.mmap[off..off+8].try_into().unwrap()) as usize;
|
|
||||||
let value = u32::from_le_bytes(self.mmap[off+8..off+12].try_into().unwrap());
|
|
||||||
overflow.insert(slot, value);
|
|
||||||
}
|
|
||||||
MemoryIntVec::from_primary_and_overflow(primary, overflow)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
@@ -327,55 +293,28 @@ impl PackedCompactIntMatrix {
|
|||||||
|
|
||||||
// ── Matrix methods ────────────────────────────────────────────────────────
|
// ── Matrix methods ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
fn pairwise<T>(&self, f: impl Fn(usize, usize) -> T + Sync) -> Array2<T>
|
|
||||||
where T: Clone + Default + Send {
|
|
||||||
let n = self.n_cols;
|
|
||||||
let results: Vec<(usize, usize, T)> = upper_pairs(n)
|
|
||||||
.into_par_iter().map(|(i, j)| (i, j, f(i, j))).collect();
|
|
||||||
fill_symmetric(n, results.into_iter().map(|(i, j, v)| { let w = v.clone(); (i, j, v, w) }))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn pairwise_u64(&self, f: impl Fn(usize, usize) -> u64 + Sync) -> Array2<u64> {
|
|
||||||
let n = self.n_cols;
|
|
||||||
let results: Vec<(usize, usize, u64)> = upper_pairs(n)
|
|
||||||
.into_par_iter().map(|(i, j)| (i, j, f(i, j))).collect();
|
|
||||||
fill_symmetric(n, results.into_iter().map(|(i, j, v)| (i, j, v, v)))
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn partial_bray_dist_matrix(&self) -> Array2<u64> {
|
pub(crate) fn partial_bray_dist_matrix(&self) -> Array2<u64> {
|
||||||
self.pairwise_u64(|i, j| self.pair_partial_bray(i, j))
|
pairwise_matrix(self.n_cols, |i, j| self.pair_partial_bray(i, j))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
pub(crate) fn partial_euclidean_dist_matrix(&self) -> Array2<f64> {
|
pub(crate) fn partial_euclidean_dist_matrix(&self) -> Array2<f64> {
|
||||||
self.pairwise(|i, j| self.pair_partial_euclidean(i, j))
|
pairwise_matrix(self.n_cols, |i, j| self.pair_partial_euclidean(i, j))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn partial_threshold_jaccard_dist_matrix(&self, t: u32) -> (Array2<u64>, Array2<u64>) {
|
pub(crate) fn partial_threshold_jaccard_dist_matrix(&self, t: u32) -> (Array2<u64>, Array2<u64>) {
|
||||||
let n = self.n_cols;
|
pairwise2_matrix(self.n_cols, |i, j| self.pair_partial_threshold_jaccard(i, j, t))
|
||||||
let results: Vec<(usize, usize, u64, u64)> = upper_pairs(n)
|
|
||||||
.into_par_iter()
|
|
||||||
.map(|(i, j)| { let (inter, union) = self.pair_partial_threshold_jaccard(i, j, t); (i, j, inter, union) })
|
|
||||||
.collect();
|
|
||||||
let mut inter_m = Array2::zeros((n, n));
|
|
||||||
let mut union_m = Array2::zeros((n, n));
|
|
||||||
for (i, j, inter, union) in results {
|
|
||||||
inter_m[[i, j]] = inter; inter_m[[j, i]] = inter;
|
|
||||||
union_m[[i, j]] = union; union_m[[j, i]] = union;
|
|
||||||
}
|
|
||||||
(inter_m, union_m)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn partial_relfreq_bray_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> {
|
pub(crate) fn partial_relfreq_bray_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> {
|
||||||
self.pairwise(|i, j| self.pair_partial_relfreq_bray(i, j, col_sums[i] as f64, col_sums[j] as f64))
|
pairwise_matrix(self.n_cols, |i, j| self.pair_partial_relfreq_bray(i, j, col_sums[i] as f64, col_sums[j] as f64))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn partial_relfreq_euclidean_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> {
|
pub(crate) fn partial_relfreq_euclidean_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> {
|
||||||
self.pairwise(|i, j| self.pair_partial_relfreq_euclidean(i, j, col_sums[i] as f64, col_sums[j] as f64))
|
pairwise_matrix(self.n_cols, |i, j| self.pair_partial_relfreq_euclidean(i, j, col_sums[i] as f64, col_sums[j] as f64))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn partial_hellinger_euclidean_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> {
|
pub(crate) fn partial_hellinger_euclidean_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> {
|
||||||
self.pairwise(|i, j| self.pair_partial_hellinger(i, j, col_sums[i] as f64, col_sums[j] as f64))
|
pairwise_matrix(self.n_cols, |i, j| self.pair_partial_hellinger(i, j, col_sums[i] as f64, col_sums[j] as f64))
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -570,15 +509,3 @@ impl PersistentCompactIntMatrixBuilder {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Helpers ───────────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
fn upper_pairs(n: usize) -> Vec<(usize, usize)> {
|
|
||||||
(0..n).flat_map(|i| (i + 1..n).map(move |j| (i, j))).collect()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn fill_symmetric<T>(n: usize, vals: impl Iterator<Item = (usize, usize, T, T)>) -> Array2<T>
|
|
||||||
where T: Clone + Default {
|
|
||||||
let mut m = Array2::from_elem((n, n), T::default());
|
|
||||||
for (i, j, vij, vji) in vals { m[[i, j]] = vij; m[[j, i]] = vji; }
|
|
||||||
m
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -23,11 +23,6 @@ impl LayerMeta {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn parse(s: &str) -> Option<Self> {
|
fn parse(s: &str) -> Option<Self> {
|
||||||
let key = "\"n\":";
|
Some(Self { n: crate::meta::field(s, "n")? })
|
||||||
let pos = s.find(key)? + key.len();
|
|
||||||
let rest = s[pos..].trim_start();
|
|
||||||
let end = rest.find(|c: char| !c.is_ascii_digit()).unwrap_or(rest.len());
|
|
||||||
let n = rest[..end].parse().ok()?;
|
|
||||||
Some(Self { n })
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ pub use builder::PersistentCompactIntVecBuilder;
|
|||||||
pub use intmatrix::{PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder, pack_compact_int_matrix};
|
pub use intmatrix::{PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder, pack_compact_int_matrix};
|
||||||
pub use layer_meta::LayerMeta;
|
pub use layer_meta::LayerMeta;
|
||||||
pub use memoryintvec::MemoryIntVec;
|
pub use memoryintvec::MemoryIntVec;
|
||||||
pub use memoryvec::MemoryBitVec;
|
pub use memoryvec::{MemoryBitIter, MemoryBitVec};
|
||||||
pub use reader::PersistentCompactIntVec;
|
pub use reader::PersistentCompactIntVec;
|
||||||
pub use traits::{BitPartials, BitSlice, BitSliceMut, BitToInt, ColumnWeights, CountPartials, IntSlice, IntSliceMut, IntToBit};
|
pub use traits::{BitPartials, BitSlice, BitSliceMut, BitToInt, ColumnWeights, CountPartials, IntSlice, IntSliceMut, IntToBit};
|
||||||
|
|
||||||
|
|||||||
@@ -2,12 +2,9 @@ use std::io;
|
|||||||
use std::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Not};
|
use std::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Not};
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
use crate::bitvec::PersistentBitVecBuilder;
|
use crate::bitvec::{PersistentBitVecBuilder, n_words};
|
||||||
use crate::traits::{BitSlice, BitSliceMut};
|
use crate::traits::{BitSlice, BitSliceMut};
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn n_words(n: usize) -> usize { n.div_ceil(64) }
|
|
||||||
|
|
||||||
// ── MemoryBitVec ──────────────────────────────────────────────────────────────
|
// ── MemoryBitVec ──────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
@@ -41,11 +38,6 @@ impl MemoryBitVec {
|
|||||||
(self.words[slot >> 6] >> (slot & 63)) & 1 != 0
|
(self.words[slot >> 6] >> (slot & 63)) & 1 != 0
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn set(&mut self, slot: usize, value: bool) {
|
|
||||||
let bit = 1u64 << (slot & 63);
|
|
||||||
if value { self.words[slot >> 6] |= bit; } else { self.words[slot >> 6] &= !bit; }
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn count_ones(&self) -> u64 {
|
pub fn count_ones(&self) -> u64 {
|
||||||
self.words.iter().map(|w| w.count_ones() as u64).sum()
|
self.words.iter().map(|w| w.count_ones() as u64).sum()
|
||||||
}
|
}
|
||||||
@@ -136,3 +128,41 @@ impl<B: BitSlice> BitOrAssign<&B> for MemoryBitVec {
|
|||||||
impl<B: BitSlice> BitXorAssign<&B> for MemoryBitVec {
|
impl<B: BitSlice> BitXorAssign<&B> for MemoryBitVec {
|
||||||
fn bitxor_assign(&mut self, rhs: &B) { self.xor(rhs); }
|
fn bitxor_assign(&mut self, rhs: &B) { self.xor(rhs); }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Iterator ──────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
pub struct MemoryBitIter<'a> {
|
||||||
|
words: &'a [u64],
|
||||||
|
slot: usize,
|
||||||
|
n: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Iterator for MemoryBitIter<'_> {
|
||||||
|
type Item = bool;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<bool> {
|
||||||
|
if self.slot >= self.n { return None; }
|
||||||
|
let v = (self.words[self.slot >> 6] >> (self.slot & 63)) & 1 != 0;
|
||||||
|
self.slot += 1;
|
||||||
|
Some(v)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||||
|
let rem = self.n - self.slot;
|
||||||
|
(rem, Some(rem))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ExactSizeIterator for MemoryBitIter<'_> {}
|
||||||
|
|
||||||
|
impl MemoryBitVec {
|
||||||
|
pub fn iter(&self) -> MemoryBitIter<'_> {
|
||||||
|
MemoryBitIter { words: &self.words, slot: 0, n: self.n }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> IntoIterator for &'a MemoryBitVec {
|
||||||
|
type Item = bool;
|
||||||
|
type IntoIter = MemoryBitIter<'a>;
|
||||||
|
fn into_iter(self) -> MemoryBitIter<'a> { self.iter() }
|
||||||
|
}
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ fn parse(s: &str) -> Option<MatrixMeta> {
|
|||||||
Some(MatrixMeta { n: field(s, "n")?, n_cols: field(s, "n_cols")? })
|
Some(MatrixMeta { n: field(s, "n")?, n_cols: field(s, "n_cols")? })
|
||||||
}
|
}
|
||||||
|
|
||||||
fn field(s: &str, name: &str) -> Option<usize> {
|
pub(crate) fn field(s: &str, name: &str) -> Option<usize> {
|
||||||
let key = format!("\"{}\":", name);
|
let key = format!("\"{}\":", name);
|
||||||
let pos = s.find(&key)? + key.len();
|
let pos = s.find(&key)? + key.len();
|
||||||
let rest = s[pos..].trim_start();
|
let rest = s[pos..].trim_start();
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ use std::path::{Path, PathBuf};
|
|||||||
|
|
||||||
use memmap2::Mmap;
|
use memmap2::Mmap;
|
||||||
|
|
||||||
use crate::format::{HEADER_SIZE, INDEX_ENTRY_SIZE, MAGIC, OVERFLOW_ENTRY_SIZE};
|
use crate::format::{HEADER_SIZE, MAGIC, OVERFLOW_ENTRY_SIZE, parse_index_entry};
|
||||||
|
|
||||||
pub struct PersistentCompactIntVec {
|
pub struct PersistentCompactIntVec {
|
||||||
mmap: Mmap,
|
mmap: Mmap,
|
||||||
@@ -43,10 +43,7 @@ impl PersistentCompactIntVec {
|
|||||||
|
|
||||||
let mut index = Vec::with_capacity(n_index);
|
let mut index = Vec::with_capacity(n_index);
|
||||||
for i in 0..n_index {
|
for i in 0..n_index {
|
||||||
let off = index_offset + i * INDEX_ENTRY_SIZE;
|
index.push(parse_index_entry(&mmap, index_offset, i));
|
||||||
let slot = u64::from_le_bytes(mmap[off..off + 8].try_into().unwrap()) as usize;
|
|
||||||
let pos = u64::from_le_bytes(mmap[off + 8..off + 16].try_into().unwrap()) as usize;
|
|
||||||
index.push((slot, pos));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
use tempfile::tempdir;
|
use tempfile::tempdir;
|
||||||
|
|
||||||
use crate::{PersistentBitMatrix, PersistentBitMatrixBuilder};
|
use crate::{PersistentBitMatrix, PersistentBitMatrixBuilder};
|
||||||
use crate::traits::BitPartials;
|
use crate::traits::{BitPartials, BitSliceMut};
|
||||||
|
|
||||||
fn make_matrix(cols: &[&[bool]]) -> (tempfile::TempDir, PersistentBitMatrix) {
|
fn make_matrix(cols: &[&[bool]]) -> (tempfile::TempDir, PersistentBitMatrix) {
|
||||||
let n = cols.first().map_or(0, |c| c.len());
|
let n = cols.first().map_or(0, |c| c.len());
|
||||||
|
|||||||
@@ -20,6 +20,11 @@ pub trait BitSlice {
|
|||||||
pub trait BitSliceMut: BitSlice {
|
pub trait BitSliceMut: BitSlice {
|
||||||
fn words_mut(&mut self) -> &mut [u64];
|
fn words_mut(&mut self) -> &mut [u64];
|
||||||
|
|
||||||
|
fn set(&mut self, slot: usize, value: bool) {
|
||||||
|
let bit = 1u64 << (slot & 63);
|
||||||
|
if value { self.words_mut()[slot >> 6] |= bit; } else { self.words_mut()[slot >> 6] &= !bit; }
|
||||||
|
}
|
||||||
|
|
||||||
fn copy_from<S: BitSlice>(&mut self, src: &S) -> &mut Self {
|
fn copy_from<S: BitSlice>(&mut self, src: &S) -> &mut Self {
|
||||||
assert_eq!(self.len(), src.len(), "BitSlice length mismatch");
|
assert_eq!(self.len(), src.len(), "BitSlice length mismatch");
|
||||||
self.words_mut().copy_from_slice(src.words());
|
self.words_mut().copy_from_slice(src.words());
|
||||||
@@ -62,6 +67,7 @@ pub trait IntSlice {
|
|||||||
fn len(&self) -> usize;
|
fn len(&self) -> usize;
|
||||||
fn get(&self, slot: usize) -> u32;
|
fn get(&self, slot: usize) -> u32;
|
||||||
fn is_empty(&self) -> bool { self.len() == 0 }
|
fn is_empty(&self) -> bool { self.len() == 0 }
|
||||||
|
fn iter(&self) -> impl Iterator<Item = u32> + '_ { (0..self.len()).map(|i| self.get(i)) }
|
||||||
fn sum(&self) -> u64 { (0..self.len()).map(|s| self.get(s) as u64).sum() }
|
fn sum(&self) -> u64 { (0..self.len()).map(|s| self.get(s) as u64).sum() }
|
||||||
fn count_nonzero(&self) -> u64 { (0..self.len()).filter(|&s| self.get(s) > 0).count() as u64 }
|
fn count_nonzero(&self) -> u64 { (0..self.len()).filter(|&s| self.get(s) > 0).count() as u64 }
|
||||||
|
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ use std::io;
|
|||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
|
|
||||||
use obicompactvec::{PersistentBitVecBuilder, PersistentCompactIntVecBuilder};
|
use obicompactvec::{PersistentBitVecBuilder, PersistentCompactIntVecBuilder};
|
||||||
|
use obicompactvec::traits::BitSliceMut;
|
||||||
use obilayeredmap::meta::PartitionMeta;
|
use obilayeredmap::meta::PartitionMeta;
|
||||||
use obilayeredmap::{IndexMode, OLMError};
|
use obilayeredmap::{IndexMode, OLMError};
|
||||||
use obiskio::{SKError, SKResult};
|
use obiskio::{SKError, SKResult};
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ use obicompactvec::{
|
|||||||
PersistentBitMatrix, PersistentBitMatrixBuilder, PersistentBitVecBuilder,
|
PersistentBitMatrix, PersistentBitMatrixBuilder, PersistentBitVecBuilder,
|
||||||
PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder, PersistentCompactIntVecBuilder,
|
PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder, PersistentCompactIntVecBuilder,
|
||||||
};
|
};
|
||||||
|
use obicompactvec::traits::BitSliceMut;
|
||||||
use obilayeredmap::meta::PartitionMeta;
|
use obilayeredmap::meta::PartitionMeta;
|
||||||
use obilayeredmap::OLMError;
|
use obilayeredmap::OLMError;
|
||||||
use obiskio::{SKError, SKResult};
|
use obiskio::{SKError, SKResult};
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ use obicompactvec::{
|
|||||||
PersistentBitMatrix, PersistentBitMatrixBuilder,
|
PersistentBitMatrix, PersistentBitMatrixBuilder,
|
||||||
PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder,
|
PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder,
|
||||||
};
|
};
|
||||||
|
use obicompactvec::traits::BitSliceMut;
|
||||||
use obikseq::CanonicalKmer;
|
use obikseq::CanonicalKmer;
|
||||||
use obiskio::{UnitigFileReader, UnitigFileWriter};
|
use obiskio::{UnitigFileReader, UnitigFileWriter};
|
||||||
|
|
||||||
|
|||||||
@@ -102,6 +102,7 @@ mod tests {
|
|||||||
PersistentBitMatrix, PersistentBitMatrixBuilder,
|
PersistentBitMatrix, PersistentBitMatrixBuilder,
|
||||||
PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder,
|
PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder,
|
||||||
};
|
};
|
||||||
|
use obicompactvec::traits::BitSliceMut;
|
||||||
use tempfile::tempdir;
|
use tempfile::tempdir;
|
||||||
|
|
||||||
fn make_int_matrix(cols: &[&[u32]]) -> (tempfile::TempDir, PersistentCompactIntMatrix) {
|
fn make_int_matrix(cols: &[&[u32]]) -> (tempfile::TempDir, PersistentCompactIntMatrix) {
|
||||||
|
|||||||
Reference in New Issue
Block a user