40 lines
1.2 KiB
Bash
40 lines
1.2 KiB
Bash
|
|
#!/usr/bin/env bash
|
||
|
|
set -euo pipefail
|
||
|
|
|
||
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||
|
|
SIMDATA_DIR="${SCRIPT_DIR}/simulated_data"
|
||
|
|
REF_DIR="${SCRIPT_DIR}/reference_index"
|
||
|
|
PYTHON="${SCRIPT_DIR}/../.venv/bin/python3"
|
||
|
|
BUILD_PY="${SCRIPT_DIR}/build_reference.py"
|
||
|
|
|
||
|
|
KMER_SIZE="${KMER_SIZE:-31}"
|
||
|
|
MIN_ABUNDANCE="${MIN_ABUNDANCE:-1}"
|
||
|
|
|
||
|
|
mkdir -p "${REF_DIR}"
|
||
|
|
|
||
|
|
for species_dir in "${SIMDATA_DIR}"/*/; do
|
||
|
|
[[ -d "${species_dir}" ]] || continue
|
||
|
|
species=$(basename "${species_dir}")
|
||
|
|
|
||
|
|
for strain_dir in "${species_dir}"*/; do
|
||
|
|
[[ -d "${strain_dir}" ]] || continue
|
||
|
|
strain=$(basename "${strain_dir}")
|
||
|
|
|
||
|
|
r1="${strain_dir}/reads_R1.fastq.gz"
|
||
|
|
r2="${strain_dir}/reads_R2.fastq.gz"
|
||
|
|
if [[ ! -f "${r1}" || ! -f "${r2}" ]]; then
|
||
|
|
echo "SKIP ${species}--${strain}: reads not found" >&2
|
||
|
|
continue
|
||
|
|
fi
|
||
|
|
|
||
|
|
out="${REF_DIR}/${species}--${strain}.npz"
|
||
|
|
echo "[${species}--${strain}] → ${out}"
|
||
|
|
|
||
|
|
"${PYTHON}" "${BUILD_PY}" \
|
||
|
|
--kmer-size "${KMER_SIZE}" \
|
||
|
|
--min-abundance "${MIN_ABUNDANCE}" \
|
||
|
|
--output "${out}" \
|
||
|
|
"${r1}" "${r2}"
|
||
|
|
done
|
||
|
|
done
|