Files
obikmer/benchmark/build_reference.sh
T

40 lines
1.2 KiB
Bash
Raw Permalink Normal View History

#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
SIMDATA_DIR="${SCRIPT_DIR}/simulated_data"
REF_DIR="${SCRIPT_DIR}/reference_index"
PYTHON="${SCRIPT_DIR}/../.venv/bin/python3"
BUILD_PY="${SCRIPT_DIR}/build_reference.py"
KMER_SIZE="${KMER_SIZE:-31}"
MIN_ABUNDANCE="${MIN_ABUNDANCE:-1}"
mkdir -p "${REF_DIR}"
for species_dir in "${SIMDATA_DIR}"/*/; do
[[ -d "${species_dir}" ]] || continue
species=$(basename "${species_dir}")
for strain_dir in "${species_dir}"*/; do
[[ -d "${strain_dir}" ]] || continue
strain=$(basename "${strain_dir}")
r1="${strain_dir}/reads_R1.fastq.gz"
r2="${strain_dir}/reads_R2.fastq.gz"
if [[ ! -f "${r1}" || ! -f "${r2}" ]]; then
echo "SKIP ${species}--${strain}: reads not found" >&2
continue
fi
out="${REF_DIR}/${species}--${strain}.npz"
echo "[${species}--${strain}] → ${out}"
"${PYTHON}" "${BUILD_PY}" \
--kmer-size "${KMER_SIZE}" \
--min-abundance "${MIN_ABUNDANCE}" \
--output "${out}" \
"${r1}" "${r2}"
done
done