c694e1f2b0
Introduces a Make-based orchestration for simulating, indexing, merging, filtering, and verifying k-mer counts and presence. Exposes internal builder and iterator APIs publicly, enforces mandatory leading slashes for predicate patterns, registers the `obitaxonomy` crate, and updates tooling configurations alongside documentation.
49 lines
871 B
Bash
Executable File
49 lines
871 B
Bash
Executable File
#!/usr/bin/env bash
|
|
|
|
set -euo pipefail
|
|
|
|
assemblies=(
|
|
GCF_000005845.2
|
|
GCF_000010245.2
|
|
GCF_000007445.1
|
|
GCF_000006665.1
|
|
|
|
GCF_000006945.2
|
|
GCF_000195995.1
|
|
GCF_000009505.1
|
|
GCF_000026565.1
|
|
|
|
GCF_000016305.1
|
|
GCF_000019965.1
|
|
GCF_000240185.1
|
|
GCF_000742135.1
|
|
|
|
GCF_000069965.1
|
|
GCF_000022565.1
|
|
GCF_000306885.1
|
|
GCF_003013715.1
|
|
|
|
GCF_000009045.1
|
|
GCF_000009825.1
|
|
GCF_000022445.1
|
|
GCF_000834255.1
|
|
)
|
|
|
|
mkdir -p genomes
|
|
|
|
for acc in "${assemblies[@]}"; do
|
|
echo "Downloading ${acc}"
|
|
|
|
datasets download genome accession "${acc}" \
|
|
--include genome \
|
|
--filename "${acc}.zip"
|
|
|
|
unzip -q "${acc}.zip" -d "${acc}"
|
|
find "${acc}" -name "*.fna" |
|
|
while read file; do
|
|
obiconvert -Z ${file} >genomes/$(basename ${file}).gz
|
|
done
|
|
|
|
rm -rf "${acc}" "${acc}.zip"
|
|
done
|