feat: add benchmark pipeline, expose APIs, and enforce strict paths
Introduces a Make-based orchestration for simulating, indexing, merging, filtering, and verifying k-mer counts and presence. Exposes internal builder and iterator APIs publicly, enforces mandatory leading slashes for predicate patterns, registers the `obitaxonomy` crate, and updates tooling configurations alongside documentation.
This commit is contained in:
Executable
+48
@@ -0,0 +1,48 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
assemblies=(
|
||||
GCF_000005845.2
|
||||
GCF_000010245.2
|
||||
GCF_000007445.1
|
||||
GCF_000006665.1
|
||||
|
||||
GCF_000006945.2
|
||||
GCF_000195995.1
|
||||
GCF_000009505.1
|
||||
GCF_000026565.1
|
||||
|
||||
GCF_000016305.1
|
||||
GCF_000019965.1
|
||||
GCF_000240185.1
|
||||
GCF_000742135.1
|
||||
|
||||
GCF_000069965.1
|
||||
GCF_000022565.1
|
||||
GCF_000306885.1
|
||||
GCF_003013715.1
|
||||
|
||||
GCF_000009045.1
|
||||
GCF_000009825.1
|
||||
GCF_000022445.1
|
||||
GCF_000834255.1
|
||||
)
|
||||
|
||||
mkdir -p genomes
|
||||
|
||||
for acc in "${assemblies[@]}"; do
|
||||
echo "Downloading ${acc}"
|
||||
|
||||
datasets download genome accession "${acc}" \
|
||||
--include genome \
|
||||
--filename "${acc}.zip"
|
||||
|
||||
unzip -q "${acc}.zip" -d "${acc}"
|
||||
find "${acc}" -name "*.fna" |
|
||||
while read file; do
|
||||
obiconvert -Z ${file} >genomes/$(basename ${file}).gz
|
||||
done
|
||||
|
||||
rm -rf "${acc}" "${acc}.zip"
|
||||
done
|
||||
Reference in New Issue
Block a user