feat: add benchmark pipeline, expose APIs, and enforce strict paths

Introduces a Make-based orchestration for simulating, indexing, merging, filtering, and verifying k-mer counts and presence. Exposes internal builder and iterator APIs publicly, enforces mandatory leading slashes for predicate patterns, registers the `obitaxonomy` crate, and updates tooling configurations alongside documentation.
This commit is contained in:
Eric Coissac
2026-06-19 09:55:41 +02:00
parent 280ca1f5a3
commit c694e1f2b0
42 changed files with 2585 additions and 84 deletions
+27
View File
@@ -0,0 +1,27 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
BINARY="${SCRIPT_DIR}/../src/target/release/obikmer"
INDEX="${SCRIPT_DIR}/global_index_presence"
REF_DIR="${SCRIPT_DIR}/reference_index"
STATS_DIR="${SCRIPT_DIR}/stats/verify_merge_presence"
PYTHON="${SCRIPT_DIR}/../.venv/bin/python3"
VERIFY_PY="${SCRIPT_DIR}/verify_merge_presence.py"
mkdir -p "${STATS_DIR}"
CURRENT="${STATS_DIR}/current.csv"
"${PYTHON}" "${VERIFY_PY}" --header >"${CURRENT}"
"${PYTHON}" "${VERIFY_PY}" \
--obikmer "${BINARY}" \
"${INDEX}" "${REF_DIR}" \
>>"${CURRENT}"
run_n=$(printf '%03d' "$(find "${STATS_DIR}" -maxdepth 1 -name 'presence_*.csv' | wc -l | tr -d ' ')")
ARCHIVE="${STATS_DIR}/presence_${run_n}.csv"
cp "${CURRENT}" "${ARCHIVE}"
echo "Done. Results → ${ARCHIVE}"