feat: introduce packed matrix storage and layer metadata

Unifies bit and integer matrix storage into `PersistentBitMatrix` and `PersistentCompactIntMatrix` enums, supporting both columnar and memory-mapped single-file layouts. Introduces `LayerMeta` to persist layer dimensions as `layer_meta.json`, enabling correct initialization of implicit presence matrices. Adds CLI commands (`pack` and `--upgrade-index`) to convert existing columnar indices to the compact format and backfill missing metadata. Updates partitionner and layered map logic to use the new persistent builders, optimized memory allocation, and auto-detected storage backends.
This commit is contained in:
Eric Coissac
2026-06-03 11:50:39 +02:00
parent de1a41810a
commit 173ac9fb42
20 changed files with 799 additions and 271 deletions
+2 -2
View File
@@ -34,7 +34,7 @@ impl LayerData for () {
impl LayerData for PersistentCompactIntMatrix {
type Item = Box<[u32]>;
fn open(layer_dir: &Path) -> OLMResult<Self> {
PersistentCompactIntMatrix::open(&layer_dir.join(COUNTS_DIR)).map_err(OLMError::Io)
PersistentCompactIntMatrix::open(layer_dir).map_err(OLMError::Io)
}
fn read(&self, slot: usize) -> Box<[u32]> { self.row(slot) }
}
@@ -42,7 +42,7 @@ impl LayerData for PersistentCompactIntMatrix {
impl LayerData for PersistentBitMatrix {
type Item = Box<[bool]>;
fn open(layer_dir: &Path) -> OLMResult<Self> {
PersistentBitMatrix::open(&layer_dir.join(PRESENCE_DIR)).map_err(OLMError::Io)
PersistentBitMatrix::open(layer_dir).map_err(OLMError::Io)
}
fn read(&self, slot: usize) -> Box<[bool]> { self.row(slot) }
}
+2 -2
View File
@@ -107,7 +107,7 @@ mod tests {
fn make_int_matrix(cols: &[&[u32]]) -> (tempfile::TempDir, PersistentCompactIntMatrix) {
let n = cols.first().map_or(0, |c| c.len());
let dir = tempdir().unwrap();
let mut b = PersistentCompactIntMatrixBuilder::new(n, dir.path()).unwrap();
let mut b = PersistentCompactIntMatrixBuilder::new(n, &dir.path().join("counts")).unwrap();
for &col in cols {
let mut cb = b.add_col().unwrap();
for (slot, &v) in col.iter().enumerate() { cb.set(slot, v); }
@@ -121,7 +121,7 @@ mod tests {
fn make_bit_matrix(cols: &[&[bool]]) -> (tempfile::TempDir, PersistentBitMatrix) {
let n = cols.first().map_or(0, |c| c.len());
let dir = tempdir().unwrap();
let mut b = PersistentBitMatrixBuilder::new(n, dir.path()).unwrap();
let mut b = PersistentBitMatrixBuilder::new(n, &dir.path().join("presence")).unwrap();
for &col in cols {
let mut cb = b.add_col().unwrap();
for (slot, &v) in col.iter().enumerate() { cb.set(slot, v); }
+2
View File
@@ -3,6 +3,7 @@ use std::path::{Path, PathBuf};
use cacheline_ef::{CachelineEf, CachelineEfVec};
use epserde::prelude::*;
use obicompactvec::LayerMeta;
use obikseq::CanonicalKmer;
use obiskio::{CanonicalKmerIter, UnitigFileReader, UnitigFileWriter, build_unitig_idx};
use ptr_hash::{PtrHash, PtrHashParams, bucket_fn::CubicEps, hash::Xx64};
@@ -341,6 +342,7 @@ impl MphfLayer {
}
}
LayerMeta::save(dir, n)?;
Ok(n)
}
}