feat: add benchmark pipeline, expose APIs, and enforce strict paths

Introduces a Make-based orchestration for simulating, indexing, merging, filtering, and verifying k-mer counts and presence. Exposes internal builder and iterator APIs publicly, enforces mandatory leading slashes for predicate patterns, registers the `obitaxonomy` crate, and updates tooling configurations alongside documentation.
This commit is contained in:
Eric Coissac
2026-06-19 09:55:41 +02:00
parent 280ca1f5a3
commit c694e1f2b0
42 changed files with 2585 additions and 84 deletions
+4
View File
@@ -1853,6 +1853,10 @@ dependencies = [
"tracing",
]
[[package]]
name = "obitaxonomy"
version = "0.1.0"
[[package]]
name = "object"
version = "0.37.3"
+1 -1
View File
@@ -1,5 +1,5 @@
[workspace]
resolver = "3"
members = ["obikseq", "obiread", "obiskbuilder", "obifastwrite", "obikmer","obikrope","obipipeline", "obikpartitionner","obiskio","obidebruinj","obilayeredmap", "obicompactvec", "obisys", "obikindex"]
members = ["obikseq", "obiread", "obiskbuilder", "obifastwrite", "obikmer","obikrope","obipipeline", "obikpartitionner","obiskio","obidebruinj","obilayeredmap", "obicompactvec", "obisys", "obikindex", "obitaxonomy"]
[profile.release]
debug = 1
+4 -4
View File
@@ -88,9 +88,9 @@ impl<'a> IntoIterator for &'a PersistentBitVec {
// ── BitIter ───────────────────────────────────────────────────────────────────
pub struct BitIter<'a> {
pub(crate) words: &'a [u64],
pub(crate) slot: usize,
pub(crate) n: usize,
words: &'a [u64],
slot: usize,
n: usize,
}
impl ExactSizeIterator for BitIter<'_> {}
@@ -132,7 +132,7 @@ impl PersistentBitVecBuilder {
Ok(Self { mmap, n, path: path.to_path_buf() })
}
pub(crate) fn from_raw_bytes(bytes: &[u8], n: usize, path: &Path) -> io::Result<Self> {
pub fn from_raw_bytes(bytes: &[u8], n: usize, path: &Path) -> io::Result<Self> {
let file_size = HEADER_SIZE + n_bytes_for_words(n);
let file = OpenOptions::new()
.read(true).write(true).create(true).truncate(true)
+4 -4
View File
@@ -18,11 +18,11 @@ pub use builder::PersistentCompactIntVecBuilder;
pub use colgroup::{ColGroup, FilterMask, MatrixGroupOps, eval_filter_mask};
pub use intmatrix::{PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder, pack_compact_int_matrix};
pub use layer_meta::LayerMeta;
pub use reader::PersistentCompactIntVec;
pub use tempbitvec::TempBitVec;
pub use tempintvec::TempCompactIntVec;
pub use reader::{PersistentCompactIntVec, Iter as CompactIntVecIter};
pub use tempbitvec::{TempBitVec, TempBitVecBuilder};
pub use tempintvec::{TempCompactIntVec, TempCompactIntVecBuilder};
pub use traits::{BitPartials, ColumnWeights, CountPartials};
pub use views::{BitSliceView, IntSliceView};
pub use views::{BitSliceView, BitSliceIter, IntSliceView, IntSliceViewIter};
#[cfg(test)]
#[path = "tests/mod.rs"]
+12 -11
View File
@@ -43,27 +43,27 @@ impl TempBitVec {
// ── TempBitVecBuilder — mutable, becomes TempBitVec on freeze ────────────────
pub(crate) struct TempBitVecBuilder {
pub struct TempBitVecBuilder {
builder: PersistentBitVecBuilder,
temp: TempDir,
}
impl TempBitVecBuilder {
pub(crate) fn new(n: usize) -> io::Result<Self> {
pub fn new(n: usize) -> io::Result<Self> {
let temp = TempDir::new()?;
let path = temp.path().join("data.pbiv");
let builder = PersistentBitVecBuilder::new(n, &path)?;
Ok(Self { builder, temp })
}
pub(crate) fn new_ones(n: usize) -> io::Result<Self> {
pub fn new_ones(n: usize) -> io::Result<Self> {
let temp = TempDir::new()?;
let path = temp.path().join("data.pbiv");
let builder = PersistentBitVecBuilder::new_ones(n, &path)?;
Ok(Self { builder, temp })
}
pub(crate) fn freeze(self) -> io::Result<TempBitVec> {
pub fn freeze(self) -> io::Result<TempBitVec> {
let Self { builder, temp } = self;
let vec = builder.finish()?;
Ok(TempBitVec { vec, _temp: temp })
@@ -72,7 +72,8 @@ impl TempBitVecBuilder {
pub fn set(&mut self, slot: usize, value: bool) {
self.builder.set(slot, value);
}
pub(crate) fn view(&self) -> BitSliceView<'_> {
pub fn view(&self) -> BitSliceView<'_> {
self.builder.view()
}
@@ -80,19 +81,19 @@ impl TempBitVecBuilder {
self.builder.or(other);
}
pub(crate) fn and(&mut self, other: BitSliceView<'_>) {
pub fn and(&mut self, other: BitSliceView<'_>) {
self.builder.and(other);
}
pub(crate) fn xor(&mut self, other: BitSliceView<'_>) {
pub fn xor(&mut self, other: BitSliceView<'_>) {
self.builder.xor(other);
}
pub(crate) fn not(&mut self) {
pub fn not(&mut self) {
self.builder.not();
}
pub(crate) fn copy_from(&mut self, src: BitSliceView<'_>) {
pub fn copy_from(&mut self, src: BitSliceView<'_>) {
self.builder.copy_from(src);
}
@@ -100,11 +101,11 @@ impl TempBitVecBuilder {
self.builder.or_where(col, pred);
}
pub(crate) fn and_where(&mut self, col: IntSliceView<'_>, pred: impl Fn(u32) -> bool) {
pub fn and_where(&mut self, col: IntSliceView<'_>, pred: impl Fn(u32) -> bool) {
self.builder.and_where(col, pred);
}
pub(crate) fn xor_where(&mut self, col: IntSliceView<'_>, pred: impl Fn(u32) -> bool) {
pub fn xor_where(&mut self, col: IntSliceView<'_>, pred: impl Fn(u32) -> bool) {
self.builder.xor_where(col, pred);
}
}
+17 -19
View File
@@ -32,60 +32,58 @@ impl TempCompactIntVec {
// ── TempCompactIntVecBuilder — mutable, becomes TempCompactIntVec on freeze ──
pub(crate) struct TempCompactIntVecBuilder {
pub struct TempCompactIntVecBuilder {
builder: PersistentCompactIntVecBuilder,
temp: TempDir,
}
impl TempCompactIntVecBuilder {
pub(crate) fn new(n: usize) -> io::Result<Self> {
pub fn new(n: usize) -> io::Result<Self> {
let temp = TempDir::new()?;
let path = temp.path().join("data.pciv");
let builder = PersistentCompactIntVecBuilder::new(n, &path)?;
Ok(Self { builder, temp })
}
pub(crate) fn freeze(self) -> io::Result<TempCompactIntVec> {
pub fn freeze(self) -> io::Result<TempCompactIntVec> {
let Self { builder, temp } = self;
let vec = builder.finish()?;
Ok(TempCompactIntVec { vec, _temp: temp })
}
// ── Delegation methods ────────────────────────────────────────────────────
pub fn n(&self) -> usize { self.builder.len() }
pub(crate) fn n(&self) -> usize { self.builder.len() }
pub fn set(&mut self, slot: usize, value: u32) { self.builder.set(slot, value); }
pub fn get(&self, slot: usize) -> u32 { self.builder.get(slot) }
pub(crate) fn set(&mut self, slot: usize, value: u32) { self.builder.set(slot, value); }
pub(crate) fn get(&self, slot: usize) -> u32 { self.builder.get(slot) }
pub fn primary_bytes(&self) -> &[u8] { self.builder.primary_bytes() }
pub fn primary_bytes_mut(&mut self) -> &mut [u8] { self.builder.primary_bytes_mut() }
pub(crate) fn primary_bytes(&self) -> &[u8] { self.builder.primary_bytes() }
pub(crate) fn primary_bytes_mut(&mut self) -> &mut [u8] { self.builder.primary_bytes_mut() }
pub(crate) fn inc_present(&mut self, col: BitSliceView<'_>) {
pub fn inc_present(&mut self, col: BitSliceView<'_>) {
self.builder.inc_present(col);
}
pub(crate) fn inc_present_fast(&mut self, col: BitSliceView<'_>) {
pub fn inc_present_fast(&mut self, col: BitSliceView<'_>) {
self.builder.inc_present_fast(col);
}
pub(crate) fn inc_predicate(&mut self, col: IntSliceView<'_>, pred: impl Fn(u32) -> bool) {
pub fn inc_predicate(&mut self, col: IntSliceView<'_>, pred: impl Fn(u32) -> bool) {
self.builder.inc_predicate(col, pred);
}
pub(crate) fn inc_predicate_fast(&mut self, col: IntSliceView<'_>, pred: impl Fn(u32) -> bool) {
pub fn inc_predicate_fast(&mut self, col: IntSliceView<'_>, pred: impl Fn(u32) -> bool) {
self.builder.inc_predicate_fast(col, pred);
}
pub(crate) fn add(&mut self, other: IntSliceView<'_>) {
pub fn add(&mut self, other: IntSliceView<'_>) {
self.builder.add(other);
}
pub(crate) fn mask_with(&mut self, mask: BitSliceView<'_>) {
pub fn mask_with(&mut self, mask: BitSliceView<'_>) {
self.builder.mask_with(mask);
}
pub(crate) fn min(&mut self, other: IntSliceView<'_>) { self.builder.min(other); }
pub(crate) fn max(&mut self, other: IntSliceView<'_>) { self.builder.max(other); }
pub(crate) fn diff(&mut self, other: IntSliceView<'_>) { self.builder.diff(other); }
pub fn min(&mut self, other: IntSliceView<'_>) { self.builder.min(other); }
pub fn max(&mut self, other: IntSliceView<'_>) { self.builder.max(other); }
pub fn diff(&mut self, other: IntSliceView<'_>) { self.builder.diff(other); }
}
+1
View File
@@ -3,6 +3,7 @@ use crossbeam_channel;
use hashbrown::HashMap;
use obikseq::k;
use obikseq::{CanonicalKmer, Sequence, Unitig};
#[cfg(not(any(test, feature = "test-utils")))]
use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
use std::cell::RefCell;
use std::fmt;
+3 -1
View File
@@ -11,7 +11,7 @@ use obilayeredmap::IndexMode;
use crate::error::{OKIError, OKIResult};
use crate::index::KmerIndex;
use crate::meta::{GenomeInfo, IndexMeta};
use crate::state::IndexState;
use crate::state::{IndexState, SENTINEL_INDEXED};
pub use obikpartitionner::MergeMode;
@@ -263,6 +263,8 @@ impl KmerIndex {
rep.push(t.stop());
}
fs::File::create(output.join(SENTINEL_INDEXED)).map_err(OKIError::Io)?;
KmerIndex::open(output)
}
}
+10 -9
View File
@@ -49,6 +49,11 @@ impl MetaPred {
if values.iter().any(|v| v.is_empty()) {
return Err(format!("empty value in predicate: {s}"));
}
if matches!(op, PredOp::Matches | PredOp::NotMatches) {
if let Some(v) = values.iter().find(|v| !v.starts_with('/')) {
return Err(format!("path predicate value must start with '/': {v:?} in predicate: {s}"));
}
}
Ok(Self { key, op, values })
}
@@ -72,16 +77,12 @@ impl MetaPred {
/// True if `value` is equal to `pattern` or is a descendant of it in a `/`-separated hierarchy.
///
/// - Absolute pattern (`/a/b`): `value` must start with `/a/b` at a segment boundary.
/// - Bare segment (`b`): `value` must contain `b` as an exact segment anywhere.
/// Both `value` and `pattern` must start with `/`.
/// `value` matches if it equals `pattern` exactly or starts with `pattern` followed by `/`.
fn path_matches(value: &str, pattern: &str) -> bool {
if pattern.starts_with('/') {
value == pattern
|| (value.starts_with(pattern)
&& value[pattern.len()..].starts_with('/'))
} else {
value.split('/').any(|seg| seg == pattern)
}
value == pattern
|| (value.starts_with(pattern)
&& value[pattern.len()..].starts_with('/'))
}
// ── Three-value group evaluation ──────────────────────────────────────────────