feat: integrate tracing and enhance bit matrix operations
Add the `tracing` crate to `obidebruinj`, `obisys`, and resolve it in `Cargo.lock`. Replace `eprintln!` statements with structured `debug!` and `info!` macros. Introduce a `TracedBar` wrapper for progress bars and enhance the `Stage` lifecycle to emit structured events for timing, memory metrics, and swap warnings. Add a progress spinner for unitig degree computation. Extend `PersistentBitMatrix` with columnar bit-vector operations and parallel distance methods, enabling uniform distance computations across all storage layouts while replacing previous panics with dimension-based fallbacks.
This commit is contained in:
@@ -266,6 +266,161 @@ impl PackedCompactIntMatrix {
|
||||
pub(crate) fn row(&self, slot: usize) -> Box<[u32]> {
|
||||
(0..self.n_cols).map(|c| self.get(c, slot)).collect()
|
||||
}
|
||||
|
||||
pub(crate) fn sum(&self) -> Array1<u64> {
|
||||
Array1::from_vec(
|
||||
(0..self.n_cols).into_par_iter()
|
||||
.map(|c| (0..self.n_rows).map(|s| self.get(c, s) as u64).sum())
|
||||
.collect()
|
||||
)
|
||||
}
|
||||
|
||||
// ── Pair primitives ───────────────────────────────────────────────────────
|
||||
|
||||
fn pair_partial_bray(&self, i: usize, j: usize) -> u64 {
|
||||
(0..self.n_rows).map(|s| self.get(i, s).min(self.get(j, s)) as u64).sum()
|
||||
}
|
||||
|
||||
fn pair_partial_euclidean(&self, i: usize, j: usize) -> f64 {
|
||||
(0..self.n_rows).map(|s| {
|
||||
let d = self.get(i, s) as f64 - self.get(j, s) as f64;
|
||||
d * d
|
||||
}).sum()
|
||||
}
|
||||
|
||||
fn pair_partial_threshold_jaccard(&self, i: usize, j: usize, t: u32) -> (u64, u64) {
|
||||
let (mut inter, mut union) = (0u64, 0u64);
|
||||
for s in 0..self.n_rows {
|
||||
let a = self.get(i, s) >= t;
|
||||
let b = self.get(j, s) >= t;
|
||||
if a && b { inter += 1; }
|
||||
if a || b { union += 1; }
|
||||
}
|
||||
(inter, union)
|
||||
}
|
||||
|
||||
fn pair_partial_relfreq_bray(&self, i: usize, j: usize, si: f64, sj: f64) -> f64 {
|
||||
if si == 0.0 || sj == 0.0 { return 0.0; }
|
||||
(0..self.n_rows).map(|s| {
|
||||
(self.get(i, s) as f64 / si).min(self.get(j, s) as f64 / sj)
|
||||
}).sum()
|
||||
}
|
||||
|
||||
fn pair_partial_relfreq_euclidean(&self, i: usize, j: usize, si: f64, sj: f64) -> f64 {
|
||||
if si == 0.0 || sj == 0.0 { return 0.0; }
|
||||
(0..self.n_rows).map(|s| {
|
||||
let d = self.get(i, s) as f64 / si - self.get(j, s) as f64 / sj;
|
||||
d * d
|
||||
}).sum()
|
||||
}
|
||||
|
||||
fn pair_partial_hellinger(&self, i: usize, j: usize, si: f64, sj: f64) -> f64 {
|
||||
if si == 0.0 || sj == 0.0 { return 0.0; }
|
||||
(0..self.n_rows).map(|s| {
|
||||
let d = (self.get(i, s) as f64 / si).sqrt() - (self.get(j, s) as f64 / sj).sqrt();
|
||||
d * d
|
||||
}).sum()
|
||||
}
|
||||
|
||||
// ── Matrix methods ────────────────────────────────────────────────────────
|
||||
|
||||
fn pairwise<T>(&self, f: impl Fn(usize, usize) -> T + Sync) -> Array2<T>
|
||||
where T: Clone + Default + Send {
|
||||
let n = self.n_cols;
|
||||
let results: Vec<(usize, usize, T)> = upper_pairs(n)
|
||||
.into_par_iter().map(|(i, j)| (i, j, f(i, j))).collect();
|
||||
fill_symmetric(n, results.into_iter().map(|(i, j, v)| { let w = v.clone(); (i, j, v, w) }))
|
||||
}
|
||||
|
||||
fn pairwise_u64(&self, f: impl Fn(usize, usize) -> u64 + Sync) -> Array2<u64> {
|
||||
let n = self.n_cols;
|
||||
let results: Vec<(usize, usize, u64)> = upper_pairs(n)
|
||||
.into_par_iter().map(|(i, j)| (i, j, f(i, j))).collect();
|
||||
fill_symmetric(n, results.into_iter().map(|(i, j, v)| (i, j, v, v)))
|
||||
}
|
||||
|
||||
pub(crate) fn partial_bray_dist_matrix(&self) -> Array2<u64> {
|
||||
self.pairwise_u64(|i, j| self.pair_partial_bray(i, j))
|
||||
}
|
||||
|
||||
pub(crate) fn bray_dist_matrix(&self) -> Array2<f64> {
|
||||
let col_sums = self.sum();
|
||||
let sum_min = self.partial_bray_dist_matrix();
|
||||
let n = self.n_cols;
|
||||
let mut m = Array2::zeros((n, n));
|
||||
for i in 0..n { for j in 0..n {
|
||||
if i != j {
|
||||
let denom = col_sums[i] + col_sums[j];
|
||||
m[[i, j]] = if denom == 0 { 0.0 }
|
||||
else { 1.0 - 2.0 * sum_min[[i, j]] as f64 / denom as f64 };
|
||||
}
|
||||
}}
|
||||
m
|
||||
}
|
||||
|
||||
pub(crate) fn partial_euclidean_dist_matrix(&self) -> Array2<f64> {
|
||||
self.pairwise(|i, j| self.pair_partial_euclidean(i, j))
|
||||
}
|
||||
|
||||
pub(crate) fn euclidean_dist_matrix(&self) -> Array2<f64> {
|
||||
self.pairwise(|i, j| self.pair_partial_euclidean(i, j).sqrt())
|
||||
}
|
||||
|
||||
pub(crate) fn partial_threshold_jaccard_dist_matrix(&self, t: u32) -> (Array2<u64>, Array2<u64>) {
|
||||
let n = self.n_cols;
|
||||
let results: Vec<(usize, usize, u64, u64)> = upper_pairs(n)
|
||||
.into_par_iter()
|
||||
.map(|(i, j)| { let (inter, union) = self.pair_partial_threshold_jaccard(i, j, t); (i, j, inter, union) })
|
||||
.collect();
|
||||
let mut inter_m = Array2::zeros((n, n));
|
||||
let mut union_m = Array2::zeros((n, n));
|
||||
for (i, j, inter, union) in results {
|
||||
inter_m[[i, j]] = inter; inter_m[[j, i]] = inter;
|
||||
union_m[[i, j]] = union; union_m[[j, i]] = union;
|
||||
}
|
||||
(inter_m, union_m)
|
||||
}
|
||||
|
||||
pub(crate) fn jaccard_dist_matrix(&self) -> Array2<f64> {
|
||||
self.pairwise(|i, j| {
|
||||
let (inter, union) = self.pair_partial_threshold_jaccard(i, j, 1);
|
||||
if union == 0 { 0.0 } else { 1.0 - inter as f64 / union as f64 }
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn threshold_jaccard_dist_matrix(&self, t: u32) -> Array2<f64> {
|
||||
self.pairwise(|i, j| {
|
||||
let (inter, union) = self.pair_partial_threshold_jaccard(i, j, t);
|
||||
if union == 0 { 0.0 } else { 1.0 - inter as f64 / union as f64 }
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn partial_relfreq_bray_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> {
|
||||
self.pairwise(|i, j| self.pair_partial_relfreq_bray(i, j, col_sums[i] as f64, col_sums[j] as f64))
|
||||
}
|
||||
|
||||
pub(crate) fn relfreq_bray_dist_matrix(&self) -> Array2<f64> {
|
||||
let col_sums = self.sum();
|
||||
self.partial_relfreq_bray_dist_matrix(&col_sums)
|
||||
}
|
||||
|
||||
pub(crate) fn partial_relfreq_euclidean_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> {
|
||||
self.pairwise(|i, j| self.pair_partial_relfreq_euclidean(i, j, col_sums[i] as f64, col_sums[j] as f64))
|
||||
}
|
||||
|
||||
pub(crate) fn relfreq_euclidean_dist_matrix(&self) -> Array2<f64> {
|
||||
let col_sums = self.sum();
|
||||
self.partial_relfreq_euclidean_dist_matrix(&col_sums)
|
||||
}
|
||||
|
||||
pub(crate) fn partial_hellinger_euclidean_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> {
|
||||
self.pairwise(|i, j| self.pair_partial_hellinger(i, j, col_sums[i] as f64, col_sums[j] as f64))
|
||||
}
|
||||
|
||||
pub(crate) fn hellinger_dist_matrix(&self) -> Array2<f64> {
|
||||
let col_sums = self.sum();
|
||||
self.partial_hellinger_euclidean_dist_matrix(&col_sums)
|
||||
}
|
||||
}
|
||||
|
||||
/// Build `counts/matrix.pcmx` from existing `col_*.pciv` files.
|
||||
@@ -350,50 +505,47 @@ impl PersistentCompactIntMatrix {
|
||||
}
|
||||
|
||||
pub fn sum(&self) -> Array1<u64> {
|
||||
match self {
|
||||
Self::Columnar(m) => m.sum(),
|
||||
_ => panic!("sum() only available on Columnar PersistentCompactIntMatrix"),
|
||||
}
|
||||
match self { Self::Columnar(m) => m.sum(), Self::Packed(m) => m.sum() }
|
||||
}
|
||||
|
||||
pub fn bray_dist_matrix(&self) -> Array2<f64> {
|
||||
match self { Self::Columnar(m) => m.bray_dist_matrix(), _ => panic!("Columnar only") }
|
||||
match self { Self::Columnar(m) => m.bray_dist_matrix(), Self::Packed(m) => m.bray_dist_matrix() }
|
||||
}
|
||||
pub fn relfreq_bray_dist_matrix(&self) -> Array2<f64> {
|
||||
match self { Self::Columnar(m) => m.relfreq_bray_dist_matrix(), _ => panic!("Columnar only") }
|
||||
match self { Self::Columnar(m) => m.relfreq_bray_dist_matrix(), Self::Packed(m) => m.relfreq_bray_dist_matrix() }
|
||||
}
|
||||
pub fn euclidean_dist_matrix(&self) -> Array2<f64> {
|
||||
match self { Self::Columnar(m) => m.euclidean_dist_matrix(), _ => panic!("Columnar only") }
|
||||
match self { Self::Columnar(m) => m.euclidean_dist_matrix(), Self::Packed(m) => m.euclidean_dist_matrix() }
|
||||
}
|
||||
pub fn relfreq_euclidean_dist_matrix(&self) -> Array2<f64> {
|
||||
match self { Self::Columnar(m) => m.relfreq_euclidean_dist_matrix(), _ => panic!("Columnar only") }
|
||||
match self { Self::Columnar(m) => m.relfreq_euclidean_dist_matrix(), Self::Packed(m) => m.relfreq_euclidean_dist_matrix() }
|
||||
}
|
||||
pub fn hellinger_dist_matrix(&self) -> Array2<f64> {
|
||||
match self { Self::Columnar(m) => m.hellinger_dist_matrix(), _ => panic!("Columnar only") }
|
||||
match self { Self::Columnar(m) => m.hellinger_dist_matrix(), Self::Packed(m) => m.hellinger_dist_matrix() }
|
||||
}
|
||||
pub fn jaccard_dist_matrix(&self) -> Array2<f64> {
|
||||
match self { Self::Columnar(m) => m.jaccard_dist_matrix(), _ => panic!("Columnar only") }
|
||||
match self { Self::Columnar(m) => m.jaccard_dist_matrix(), Self::Packed(m) => m.jaccard_dist_matrix() }
|
||||
}
|
||||
pub fn threshold_jaccard_dist_matrix(&self, threshold: u32) -> Array2<f64> {
|
||||
match self { Self::Columnar(m) => m.threshold_jaccard_dist_matrix(threshold), _ => panic!("Columnar only") }
|
||||
match self { Self::Columnar(m) => m.threshold_jaccard_dist_matrix(threshold), Self::Packed(m) => m.threshold_jaccard_dist_matrix(threshold) }
|
||||
}
|
||||
pub fn partial_bray_dist_matrix(&self) -> Array2<u64> {
|
||||
match self { Self::Columnar(m) => m.partial_bray_dist_matrix(), _ => panic!("Columnar only") }
|
||||
match self { Self::Columnar(m) => m.partial_bray_dist_matrix(), Self::Packed(m) => m.partial_bray_dist_matrix() }
|
||||
}
|
||||
pub fn partial_euclidean_dist_matrix(&self) -> Array2<f64> {
|
||||
match self { Self::Columnar(m) => m.partial_euclidean_dist_matrix(), _ => panic!("Columnar only") }
|
||||
match self { Self::Columnar(m) => m.partial_euclidean_dist_matrix(), Self::Packed(m) => m.partial_euclidean_dist_matrix() }
|
||||
}
|
||||
pub fn partial_threshold_jaccard_dist_matrix(&self, threshold: u32) -> (Array2<u64>, Array2<u64>) {
|
||||
match self { Self::Columnar(m) => m.partial_threshold_jaccard_dist_matrix(threshold), _ => panic!("Columnar only") }
|
||||
match self { Self::Columnar(m) => m.partial_threshold_jaccard_dist_matrix(threshold), Self::Packed(m) => m.partial_threshold_jaccard_dist_matrix(threshold) }
|
||||
}
|
||||
pub fn partial_relfreq_bray_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> {
|
||||
match self { Self::Columnar(m) => m.partial_relfreq_bray_dist_matrix(col_sums), _ => panic!("Columnar only") }
|
||||
match self { Self::Columnar(m) => m.partial_relfreq_bray_dist_matrix(col_sums), Self::Packed(m) => m.partial_relfreq_bray_dist_matrix(col_sums) }
|
||||
}
|
||||
pub fn partial_relfreq_euclidean_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> {
|
||||
match self { Self::Columnar(m) => m.partial_relfreq_euclidean_dist_matrix(col_sums), _ => panic!("Columnar only") }
|
||||
match self { Self::Columnar(m) => m.partial_relfreq_euclidean_dist_matrix(col_sums), Self::Packed(m) => m.partial_relfreq_euclidean_dist_matrix(col_sums) }
|
||||
}
|
||||
pub fn partial_hellinger_euclidean_dist_matrix(&self, col_sums: &Array1<u64>) -> Array2<f64> {
|
||||
match self { Self::Columnar(m) => m.partial_hellinger_euclidean_dist_matrix(col_sums), _ => panic!("Columnar only") }
|
||||
match self { Self::Columnar(m) => m.partial_hellinger_euclidean_dist_matrix(col_sums), Self::Packed(m) => m.partial_hellinger_euclidean_dist_matrix(col_sums) }
|
||||
}
|
||||
|
||||
pub fn append_column(dir: &Path, value_of: impl Fn(usize) -> u32) -> io::Result<()> {
|
||||
|
||||
Reference in New Issue
Block a user