docs: document k-mer index architecture and refactor distance metrics

Add comprehensive documentation for the `obilayeredmap` crate, `PersistentCompactIntVec`, `PersistentBitVec`, and the hierarchical k-mer index architecture, including sidebar navigation updates across all documentation pages. Refactor the Bray-Curtis distance computation in `obicompactvec` to decouple numerator and denominator calculations, replacing direct pairwise calls with explicit loops over precomputed sums. Update tests to verify column sum accuracy and align with the simplified API.
This commit is contained in:
Eric Coissac
2026-05-15 21:07:23 +08:00
parent 8409c852ef
commit 45d49ed501
25 changed files with 8842 additions and 117 deletions
+19 -18
View File
@@ -36,7 +36,20 @@ impl PersistentCompactIntMatrix {
// ── Distance matrices ─────────────────────────────────────────────────────
pub fn bray_dist_matrix(&self) -> Array2<f64> {
self.pairwise(|i, j| self.col(i).bray_dist(self.col(j)))
let sum_min = self.partial_bray_dist_matrix();
let col_sums = self.sum();
let n = self.n_cols();
let mut m = Array2::zeros((n, n));
for i in 0..n {
for j in 0..n {
if i != j {
let denom = col_sums[i] + col_sums[j];
m[[i, j]] = if denom == 0 { 0.0 }
else { 1.0 - 2.0 * sum_min[[i, j]] as f64 / denom as f64 };
}
}
}
m
}
pub fn relfreq_bray_dist_matrix(&self) -> Array2<f64> {
@@ -74,23 +87,11 @@ impl PersistentCompactIntMatrix {
// ── Partial matrices (additively decomposable across layers) ──────────────
/// Returns `(sum_min[n×n], col_sums[n])`.
/// `sum_min[i,j]` = Σ_slot min(col_i[slot], col_j[slot]).
/// `col_sums[k]` = Σ_slot col_k[slot].
/// Reduce across layers by element-wise addition before computing the final distance.
pub fn partial_bray_dist_matrix(&self) -> (Array2<u64>, Array1<u64>) {
let n = self.n_cols();
let col_sums: Vec<u64> = (0..n)
.into_par_iter()
.map(|i| self.col(i).sum())
.collect();
let sum_min = self.pairwise_u64(|i, j| {
self.col(i).partial_bray_dist(self.col(j)).0
});
(sum_min, Array1::from_vec(col_sums))
/// Returns `sum_min[n×n]` where `sum_min[i,j]` = Σ_slot min(col_i[slot], col_j[slot]).
/// The denominator `col_sums[i] + col_sums[j]` is obtained from `self.sum()`.
/// Additive across layers by element-wise addition.
pub fn partial_bray_dist_matrix(&self) -> Array2<u64> {
self.pairwise_u64(|i, j| self.col(i).partial_bray_dist(self.col(j)))
}
/// Returns sum of squared differences `[n×n]`.