#!/usr/bin/env bash set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" SIMDATA_DIR="${SCRIPT_DIR}/simulated_data" REF_DIR="${SCRIPT_DIR}/reference_index" PYTHON="${SCRIPT_DIR}/../.venv/bin/python3" BUILD_PY="${SCRIPT_DIR}/build_reference.py" KMER_SIZE="${KMER_SIZE:-31}" MIN_ABUNDANCE="${MIN_ABUNDANCE:-1}" mkdir -p "${REF_DIR}" for species_dir in "${SIMDATA_DIR}"/*/; do [[ -d "${species_dir}" ]] || continue species=$(basename "${species_dir}") for strain_dir in "${species_dir}"*/; do [[ -d "${strain_dir}" ]] || continue strain=$(basename "${strain_dir}") r1="${strain_dir}/reads_R1.fastq.gz" r2="${strain_dir}/reads_R2.fastq.gz" if [[ ! -f "${r1}" || ! -f "${r2}" ]]; then echo "SKIP ${species}--${strain}: reads not found" >&2 continue fi out="${REF_DIR}/${species}--${strain}.npz" echo "[${species}--${strain}] → ${out}" "${PYTHON}" "${BUILD_PY}" \ --kmer-size "${KMER_SIZE}" \ --min-abundance "${MIN_ABUNDANCE}" \ --output "${out}" \ "${r1}" "${r2}" done done