feat: add benchmark pipeline, expose APIs, and enforce strict paths
Introduces a Make-based orchestration for simulating, indexing, merging, filtering, and verifying k-mer counts and presence. Exposes internal builder and iterator APIs publicly, enforces mandatory leading slashes for predicate patterns, registers the `obitaxonomy` crate, and updates tooling configurations alongside documentation.
This commit is contained in:
Generated
+4
@@ -1853,6 +1853,10 @@ dependencies = [
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "obitaxonomy"
|
||||
version = "0.1.0"
|
||||
|
||||
[[package]]
|
||||
name = "object"
|
||||
version = "0.37.3"
|
||||
|
||||
+1
-1
@@ -1,5 +1,5 @@
|
||||
[workspace]
|
||||
resolver = "3"
|
||||
members = ["obikseq", "obiread", "obiskbuilder", "obifastwrite", "obikmer","obikrope","obipipeline", "obikpartitionner","obiskio","obidebruinj","obilayeredmap", "obicompactvec", "obisys", "obikindex"]
|
||||
members = ["obikseq", "obiread", "obiskbuilder", "obifastwrite", "obikmer","obikrope","obipipeline", "obikpartitionner","obiskio","obidebruinj","obilayeredmap", "obicompactvec", "obisys", "obikindex", "obitaxonomy"]
|
||||
[profile.release]
|
||||
debug = 1
|
||||
|
||||
@@ -88,9 +88,9 @@ impl<'a> IntoIterator for &'a PersistentBitVec {
|
||||
// ── BitIter ───────────────────────────────────────────────────────────────────
|
||||
|
||||
pub struct BitIter<'a> {
|
||||
pub(crate) words: &'a [u64],
|
||||
pub(crate) slot: usize,
|
||||
pub(crate) n: usize,
|
||||
words: &'a [u64],
|
||||
slot: usize,
|
||||
n: usize,
|
||||
}
|
||||
|
||||
impl ExactSizeIterator for BitIter<'_> {}
|
||||
@@ -132,7 +132,7 @@ impl PersistentBitVecBuilder {
|
||||
Ok(Self { mmap, n, path: path.to_path_buf() })
|
||||
}
|
||||
|
||||
pub(crate) fn from_raw_bytes(bytes: &[u8], n: usize, path: &Path) -> io::Result<Self> {
|
||||
pub fn from_raw_bytes(bytes: &[u8], n: usize, path: &Path) -> io::Result<Self> {
|
||||
let file_size = HEADER_SIZE + n_bytes_for_words(n);
|
||||
let file = OpenOptions::new()
|
||||
.read(true).write(true).create(true).truncate(true)
|
||||
|
||||
@@ -18,11 +18,11 @@ pub use builder::PersistentCompactIntVecBuilder;
|
||||
pub use colgroup::{ColGroup, FilterMask, MatrixGroupOps, eval_filter_mask};
|
||||
pub use intmatrix::{PersistentCompactIntMatrix, PersistentCompactIntMatrixBuilder, pack_compact_int_matrix};
|
||||
pub use layer_meta::LayerMeta;
|
||||
pub use reader::PersistentCompactIntVec;
|
||||
pub use tempbitvec::TempBitVec;
|
||||
pub use tempintvec::TempCompactIntVec;
|
||||
pub use reader::{PersistentCompactIntVec, Iter as CompactIntVecIter};
|
||||
pub use tempbitvec::{TempBitVec, TempBitVecBuilder};
|
||||
pub use tempintvec::{TempCompactIntVec, TempCompactIntVecBuilder};
|
||||
pub use traits::{BitPartials, ColumnWeights, CountPartials};
|
||||
pub use views::{BitSliceView, IntSliceView};
|
||||
pub use views::{BitSliceView, BitSliceIter, IntSliceView, IntSliceViewIter};
|
||||
|
||||
#[cfg(test)]
|
||||
#[path = "tests/mod.rs"]
|
||||
|
||||
@@ -43,27 +43,27 @@ impl TempBitVec {
|
||||
|
||||
// ── TempBitVecBuilder — mutable, becomes TempBitVec on freeze ────────────────
|
||||
|
||||
pub(crate) struct TempBitVecBuilder {
|
||||
pub struct TempBitVecBuilder {
|
||||
builder: PersistentBitVecBuilder,
|
||||
temp: TempDir,
|
||||
}
|
||||
|
||||
impl TempBitVecBuilder {
|
||||
pub(crate) fn new(n: usize) -> io::Result<Self> {
|
||||
pub fn new(n: usize) -> io::Result<Self> {
|
||||
let temp = TempDir::new()?;
|
||||
let path = temp.path().join("data.pbiv");
|
||||
let builder = PersistentBitVecBuilder::new(n, &path)?;
|
||||
Ok(Self { builder, temp })
|
||||
}
|
||||
|
||||
pub(crate) fn new_ones(n: usize) -> io::Result<Self> {
|
||||
pub fn new_ones(n: usize) -> io::Result<Self> {
|
||||
let temp = TempDir::new()?;
|
||||
let path = temp.path().join("data.pbiv");
|
||||
let builder = PersistentBitVecBuilder::new_ones(n, &path)?;
|
||||
Ok(Self { builder, temp })
|
||||
}
|
||||
|
||||
pub(crate) fn freeze(self) -> io::Result<TempBitVec> {
|
||||
pub fn freeze(self) -> io::Result<TempBitVec> {
|
||||
let Self { builder, temp } = self;
|
||||
let vec = builder.finish()?;
|
||||
Ok(TempBitVec { vec, _temp: temp })
|
||||
@@ -72,7 +72,8 @@ impl TempBitVecBuilder {
|
||||
pub fn set(&mut self, slot: usize, value: bool) {
|
||||
self.builder.set(slot, value);
|
||||
}
|
||||
pub(crate) fn view(&self) -> BitSliceView<'_> {
|
||||
|
||||
pub fn view(&self) -> BitSliceView<'_> {
|
||||
self.builder.view()
|
||||
}
|
||||
|
||||
@@ -80,19 +81,19 @@ impl TempBitVecBuilder {
|
||||
self.builder.or(other);
|
||||
}
|
||||
|
||||
pub(crate) fn and(&mut self, other: BitSliceView<'_>) {
|
||||
pub fn and(&mut self, other: BitSliceView<'_>) {
|
||||
self.builder.and(other);
|
||||
}
|
||||
|
||||
pub(crate) fn xor(&mut self, other: BitSliceView<'_>) {
|
||||
pub fn xor(&mut self, other: BitSliceView<'_>) {
|
||||
self.builder.xor(other);
|
||||
}
|
||||
|
||||
pub(crate) fn not(&mut self) {
|
||||
pub fn not(&mut self) {
|
||||
self.builder.not();
|
||||
}
|
||||
|
||||
pub(crate) fn copy_from(&mut self, src: BitSliceView<'_>) {
|
||||
pub fn copy_from(&mut self, src: BitSliceView<'_>) {
|
||||
self.builder.copy_from(src);
|
||||
}
|
||||
|
||||
@@ -100,11 +101,11 @@ impl TempBitVecBuilder {
|
||||
self.builder.or_where(col, pred);
|
||||
}
|
||||
|
||||
pub(crate) fn and_where(&mut self, col: IntSliceView<'_>, pred: impl Fn(u32) -> bool) {
|
||||
pub fn and_where(&mut self, col: IntSliceView<'_>, pred: impl Fn(u32) -> bool) {
|
||||
self.builder.and_where(col, pred);
|
||||
}
|
||||
|
||||
pub(crate) fn xor_where(&mut self, col: IntSliceView<'_>, pred: impl Fn(u32) -> bool) {
|
||||
pub fn xor_where(&mut self, col: IntSliceView<'_>, pred: impl Fn(u32) -> bool) {
|
||||
self.builder.xor_where(col, pred);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -32,60 +32,58 @@ impl TempCompactIntVec {
|
||||
|
||||
// ── TempCompactIntVecBuilder — mutable, becomes TempCompactIntVec on freeze ──
|
||||
|
||||
pub(crate) struct TempCompactIntVecBuilder {
|
||||
pub struct TempCompactIntVecBuilder {
|
||||
builder: PersistentCompactIntVecBuilder,
|
||||
temp: TempDir,
|
||||
}
|
||||
|
||||
impl TempCompactIntVecBuilder {
|
||||
pub(crate) fn new(n: usize) -> io::Result<Self> {
|
||||
pub fn new(n: usize) -> io::Result<Self> {
|
||||
let temp = TempDir::new()?;
|
||||
let path = temp.path().join("data.pciv");
|
||||
let builder = PersistentCompactIntVecBuilder::new(n, &path)?;
|
||||
Ok(Self { builder, temp })
|
||||
}
|
||||
|
||||
pub(crate) fn freeze(self) -> io::Result<TempCompactIntVec> {
|
||||
pub fn freeze(self) -> io::Result<TempCompactIntVec> {
|
||||
let Self { builder, temp } = self;
|
||||
let vec = builder.finish()?;
|
||||
Ok(TempCompactIntVec { vec, _temp: temp })
|
||||
}
|
||||
|
||||
// ── Delegation methods ────────────────────────────────────────────────────
|
||||
pub fn n(&self) -> usize { self.builder.len() }
|
||||
|
||||
pub(crate) fn n(&self) -> usize { self.builder.len() }
|
||||
pub fn set(&mut self, slot: usize, value: u32) { self.builder.set(slot, value); }
|
||||
pub fn get(&self, slot: usize) -> u32 { self.builder.get(slot) }
|
||||
|
||||
pub(crate) fn set(&mut self, slot: usize, value: u32) { self.builder.set(slot, value); }
|
||||
pub(crate) fn get(&self, slot: usize) -> u32 { self.builder.get(slot) }
|
||||
pub fn primary_bytes(&self) -> &[u8] { self.builder.primary_bytes() }
|
||||
pub fn primary_bytes_mut(&mut self) -> &mut [u8] { self.builder.primary_bytes_mut() }
|
||||
|
||||
pub(crate) fn primary_bytes(&self) -> &[u8] { self.builder.primary_bytes() }
|
||||
pub(crate) fn primary_bytes_mut(&mut self) -> &mut [u8] { self.builder.primary_bytes_mut() }
|
||||
|
||||
pub(crate) fn inc_present(&mut self, col: BitSliceView<'_>) {
|
||||
pub fn inc_present(&mut self, col: BitSliceView<'_>) {
|
||||
self.builder.inc_present(col);
|
||||
}
|
||||
|
||||
pub(crate) fn inc_present_fast(&mut self, col: BitSliceView<'_>) {
|
||||
pub fn inc_present_fast(&mut self, col: BitSliceView<'_>) {
|
||||
self.builder.inc_present_fast(col);
|
||||
}
|
||||
|
||||
pub(crate) fn inc_predicate(&mut self, col: IntSliceView<'_>, pred: impl Fn(u32) -> bool) {
|
||||
pub fn inc_predicate(&mut self, col: IntSliceView<'_>, pred: impl Fn(u32) -> bool) {
|
||||
self.builder.inc_predicate(col, pred);
|
||||
}
|
||||
|
||||
pub(crate) fn inc_predicate_fast(&mut self, col: IntSliceView<'_>, pred: impl Fn(u32) -> bool) {
|
||||
pub fn inc_predicate_fast(&mut self, col: IntSliceView<'_>, pred: impl Fn(u32) -> bool) {
|
||||
self.builder.inc_predicate_fast(col, pred);
|
||||
}
|
||||
|
||||
pub(crate) fn add(&mut self, other: IntSliceView<'_>) {
|
||||
pub fn add(&mut self, other: IntSliceView<'_>) {
|
||||
self.builder.add(other);
|
||||
}
|
||||
|
||||
pub(crate) fn mask_with(&mut self, mask: BitSliceView<'_>) {
|
||||
pub fn mask_with(&mut self, mask: BitSliceView<'_>) {
|
||||
self.builder.mask_with(mask);
|
||||
}
|
||||
|
||||
pub(crate) fn min(&mut self, other: IntSliceView<'_>) { self.builder.min(other); }
|
||||
pub(crate) fn max(&mut self, other: IntSliceView<'_>) { self.builder.max(other); }
|
||||
pub(crate) fn diff(&mut self, other: IntSliceView<'_>) { self.builder.diff(other); }
|
||||
pub fn min(&mut self, other: IntSliceView<'_>) { self.builder.min(other); }
|
||||
pub fn max(&mut self, other: IntSliceView<'_>) { self.builder.max(other); }
|
||||
pub fn diff(&mut self, other: IntSliceView<'_>) { self.builder.diff(other); }
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ use crossbeam_channel;
|
||||
use hashbrown::HashMap;
|
||||
use obikseq::k;
|
||||
use obikseq::{CanonicalKmer, Sequence, Unitig};
|
||||
#[cfg(not(any(test, feature = "test-utils")))]
|
||||
use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
|
||||
use std::cell::RefCell;
|
||||
use std::fmt;
|
||||
|
||||
@@ -11,7 +11,7 @@ use obilayeredmap::IndexMode;
|
||||
use crate::error::{OKIError, OKIResult};
|
||||
use crate::index::KmerIndex;
|
||||
use crate::meta::{GenomeInfo, IndexMeta};
|
||||
use crate::state::IndexState;
|
||||
use crate::state::{IndexState, SENTINEL_INDEXED};
|
||||
|
||||
pub use obikpartitionner::MergeMode;
|
||||
|
||||
@@ -263,6 +263,8 @@ impl KmerIndex {
|
||||
rep.push(t.stop());
|
||||
}
|
||||
|
||||
fs::File::create(output.join(SENTINEL_INDEXED)).map_err(OKIError::Io)?;
|
||||
|
||||
KmerIndex::open(output)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -49,6 +49,11 @@ impl MetaPred {
|
||||
if values.iter().any(|v| v.is_empty()) {
|
||||
return Err(format!("empty value in predicate: {s}"));
|
||||
}
|
||||
if matches!(op, PredOp::Matches | PredOp::NotMatches) {
|
||||
if let Some(v) = values.iter().find(|v| !v.starts_with('/')) {
|
||||
return Err(format!("path predicate value must start with '/': {v:?} in predicate: {s}"));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Self { key, op, values })
|
||||
}
|
||||
@@ -72,16 +77,12 @@ impl MetaPred {
|
||||
|
||||
/// True if `value` is equal to `pattern` or is a descendant of it in a `/`-separated hierarchy.
|
||||
///
|
||||
/// - Absolute pattern (`/a/b`): `value` must start with `/a/b` at a segment boundary.
|
||||
/// - Bare segment (`b`): `value` must contain `b` as an exact segment anywhere.
|
||||
/// Both `value` and `pattern` must start with `/`.
|
||||
/// `value` matches if it equals `pattern` exactly or starts with `pattern` followed by `/`.
|
||||
fn path_matches(value: &str, pattern: &str) -> bool {
|
||||
if pattern.starts_with('/') {
|
||||
value == pattern
|
||||
|| (value.starts_with(pattern)
|
||||
&& value[pattern.len()..].starts_with('/'))
|
||||
} else {
|
||||
value.split('/').any(|seg| seg == pattern)
|
||||
}
|
||||
value == pattern
|
||||
|| (value.starts_with(pattern)
|
||||
&& value[pattern.len()..].starts_with('/'))
|
||||
}
|
||||
|
||||
// ── Three-value group evaluation ──────────────────────────────────────────────
|
||||
|
||||
Reference in New Issue
Block a user