feat: add benchmark pipeline, expose APIs, and enforce strict paths

Introduces a Make-based orchestration for simulating, indexing, merging, filtering, and verifying k-mer counts and presence. Exposes internal builder and iterator APIs publicly, enforces mandatory leading slashes for predicate patterns, registers the `obitaxonomy` crate, and updates tooling configurations alongside documentation.
This commit is contained in:
Eric Coissac
2026-06-19 09:55:41 +02:00
parent 280ca1f5a3
commit c694e1f2b0
42 changed files with 2585 additions and 84 deletions
+4 -4
View File
@@ -88,9 +88,9 @@ impl<'a> IntoIterator for &'a PersistentBitVec {
// ── BitIter ───────────────────────────────────────────────────────────────────
pub struct BitIter<'a> {
pub(crate) words: &'a [u64],
pub(crate) slot: usize,
pub(crate) n: usize,
words: &'a [u64],
slot: usize,
n: usize,
}
impl ExactSizeIterator for BitIter<'_> {}
@@ -132,7 +132,7 @@ impl PersistentBitVecBuilder {
Ok(Self { mmap, n, path: path.to_path_buf() })
}
pub(crate) fn from_raw_bytes(bytes: &[u8], n: usize, path: &Path) -> io::Result<Self> {
pub fn from_raw_bytes(bytes: &[u8], n: usize, path: &Path) -> io::Result<Self> {
let file_size = HEADER_SIZE + n_bytes_for_words(n);
let file = OpenOptions::new()
.read(true).write(true).create(true).truncate(true)
+4 -4
View File
@@ -18,11 +18,11 @@ pub use builder::PersistentCompactIntVecBuilder;
pub use colgroup::{ColGroup, FilterMask, MatrixGroupOps, eval_filter_mask};
pub use intmatrix::{PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder, pack_compact_int_matrix};
pub use layer_meta::LayerMeta;
pub use reader::PersistentCompactIntVec;
pub use tempbitvec::TempBitVec;
pub use tempintvec::TempCompactIntVec;
pub use reader::{PersistentCompactIntVec, Iter as CompactIntVecIter};
pub use tempbitvec::{TempBitVec, TempBitVecBuilder};
pub use tempintvec::{TempCompactIntVec, TempCompactIntVecBuilder};
pub use traits::{BitPartials, ColumnWeights, CountPartials};
pub use views::{BitSliceView, IntSliceView};
pub use views::{BitSliceView, BitSliceIter, IntSliceView, IntSliceViewIter};
#[cfg(test)]
#[path = "tests/mod.rs"]
+12 -11
View File
@@ -43,27 +43,27 @@ impl TempBitVec {
// ── TempBitVecBuilder — mutable, becomes TempBitVec on freeze ────────────────
pub(crate) struct TempBitVecBuilder {
pub struct TempBitVecBuilder {
builder: PersistentBitVecBuilder,
temp: TempDir,
}
impl TempBitVecBuilder {
pub(crate) fn new(n: usize) -> io::Result<Self> {
pub fn new(n: usize) -> io::Result<Self> {
let temp = TempDir::new()?;
let path = temp.path().join("data.pbiv");
let builder = PersistentBitVecBuilder::new(n, &path)?;
Ok(Self { builder, temp })
}
pub(crate) fn new_ones(n: usize) -> io::Result<Self> {
pub fn new_ones(n: usize) -> io::Result<Self> {
let temp = TempDir::new()?;
let path = temp.path().join("data.pbiv");
let builder = PersistentBitVecBuilder::new_ones(n, &path)?;
Ok(Self { builder, temp })
}
pub(crate) fn freeze(self) -> io::Result<TempBitVec> {
pub fn freeze(self) -> io::Result<TempBitVec> {
let Self { builder, temp } = self;
let vec = builder.finish()?;
Ok(TempBitVec { vec, _temp: temp })
@@ -72,7 +72,8 @@ impl TempBitVecBuilder {
pub fn set(&mut self, slot: usize, value: bool) {
self.builder.set(slot, value);
}
pub(crate) fn view(&self) -> BitSliceView<'_> {
pub fn view(&self) -> BitSliceView<'_> {
self.builder.view()
}
@@ -80,19 +81,19 @@ impl TempBitVecBuilder {
self.builder.or(other);
}
pub(crate) fn and(&mut self, other: BitSliceView<'_>) {
pub fn and(&mut self, other: BitSliceView<'_>) {
self.builder.and(other);
}
pub(crate) fn xor(&mut self, other: BitSliceView<'_>) {
pub fn xor(&mut self, other: BitSliceView<'_>) {
self.builder.xor(other);
}
pub(crate) fn not(&mut self) {
pub fn not(&mut self) {
self.builder.not();
}
pub(crate) fn copy_from(&mut self, src: BitSliceView<'_>) {
pub fn copy_from(&mut self, src: BitSliceView<'_>) {
self.builder.copy_from(src);
}
@@ -100,11 +101,11 @@ impl TempBitVecBuilder {
self.builder.or_where(col, pred);
}
pub(crate) fn and_where(&mut self, col: IntSliceView<'_>, pred: impl Fn(u32) -> bool) {
pub fn and_where(&mut self, col: IntSliceView<'_>, pred: impl Fn(u32) -> bool) {
self.builder.and_where(col, pred);
}
pub(crate) fn xor_where(&mut self, col: IntSliceView<'_>, pred: impl Fn(u32) -> bool) {
pub fn xor_where(&mut self, col: IntSliceView<'_>, pred: impl Fn(u32) -> bool) {
self.builder.xor_where(col, pred);
}
}
+17 -19
View File
@@ -32,60 +32,58 @@ impl TempCompactIntVec {
// ── TempCompactIntVecBuilder — mutable, becomes TempCompactIntVec on freeze ──
pub(crate) struct TempCompactIntVecBuilder {
pub struct TempCompactIntVecBuilder {
builder: PersistentCompactIntVecBuilder,
temp: TempDir,
}
impl TempCompactIntVecBuilder {
pub(crate) fn new(n: usize) -> io::Result<Self> {
pub fn new(n: usize) -> io::Result<Self> {
let temp = TempDir::new()?;
let path = temp.path().join("data.pciv");
let builder = PersistentCompactIntVecBuilder::new(n, &path)?;
Ok(Self { builder, temp })
}
pub(crate) fn freeze(self) -> io::Result<TempCompactIntVec> {
pub fn freeze(self) -> io::Result<TempCompactIntVec> {
let Self { builder, temp } = self;
let vec = builder.finish()?;
Ok(TempCompactIntVec { vec, _temp: temp })
}
// ── Delegation methods ────────────────────────────────────────────────────
pub fn n(&self) -> usize { self.builder.len() }
pub(crate) fn n(&self) -> usize { self.builder.len() }
pub fn set(&mut self, slot: usize, value: u32) { self.builder.set(slot, value); }
pub fn get(&self, slot: usize) -> u32 { self.builder.get(slot) }
pub(crate) fn set(&mut self, slot: usize, value: u32) { self.builder.set(slot, value); }
pub(crate) fn get(&self, slot: usize) -> u32 { self.builder.get(slot) }
pub fn primary_bytes(&self) -> &[u8] { self.builder.primary_bytes() }
pub fn primary_bytes_mut(&mut self) -> &mut [u8] { self.builder.primary_bytes_mut() }
pub(crate) fn primary_bytes(&self) -> &[u8] { self.builder.primary_bytes() }
pub(crate) fn primary_bytes_mut(&mut self) -> &mut [u8] { self.builder.primary_bytes_mut() }
pub(crate) fn inc_present(&mut self, col: BitSliceView<'_>) {
pub fn inc_present(&mut self, col: BitSliceView<'_>) {
self.builder.inc_present(col);
}
pub(crate) fn inc_present_fast(&mut self, col: BitSliceView<'_>) {
pub fn inc_present_fast(&mut self, col: BitSliceView<'_>) {
self.builder.inc_present_fast(col);
}
pub(crate) fn inc_predicate(&mut self, col: IntSliceView<'_>, pred: impl Fn(u32) -> bool) {
pub fn inc_predicate(&mut self, col: IntSliceView<'_>, pred: impl Fn(u32) -> bool) {
self.builder.inc_predicate(col, pred);
}
pub(crate) fn inc_predicate_fast(&mut self, col: IntSliceView<'_>, pred: impl Fn(u32) -> bool) {
pub fn inc_predicate_fast(&mut self, col: IntSliceView<'_>, pred: impl Fn(u32) -> bool) {
self.builder.inc_predicate_fast(col, pred);
}
pub(crate) fn add(&mut self, other: IntSliceView<'_>) {
pub fn add(&mut self, other: IntSliceView<'_>) {
self.builder.add(other);
}
pub(crate) fn mask_with(&mut self, mask: BitSliceView<'_>) {
pub fn mask_with(&mut self, mask: BitSliceView<'_>) {
self.builder.mask_with(mask);
}
pub(crate) fn min(&mut self, other: IntSliceView<'_>) { self.builder.min(other); }
pub(crate) fn max(&mut self, other: IntSliceView<'_>) { self.builder.max(other); }
pub(crate) fn diff(&mut self, other: IntSliceView<'_>) { self.builder.diff(other); }
pub fn min(&mut self, other: IntSliceView<'_>) { self.builder.min(other); }
pub fn max(&mut self, other: IntSliceView<'_>) { self.builder.max(other); }
pub fn diff(&mut self, other: IntSliceView<'_>) { self.builder.diff(other); }
}