#!/usr/bin/env bash # Usage: simulate_one.sh genome.fna.gz output_dir # Simulates paired-end HiSeq reads for a single genome. set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" ISS="${SCRIPT_DIR}/../.venv/bin/iss" COVERAGE=15 READ_LENGTH=150 CPUS="${CPUS:-$(sysctl -n hw.logicalcpu 2>/dev/null || nproc 2>/dev/null || echo 2)}" genome_file="$1" out_dir="$2" mkdir -p "${out_dir}" tmp_fasta=$(mktemp "${TMPDIR:-/tmp}/obikmer_XXXXXX.fna") trap 'rm -f "${tmp_fasta}"' EXIT gzip -dc "${genome_file}" > "${tmp_fasta}" genome_size=$(grep -v "^>" "${tmp_fasta}" | tr -d '[:space:]' | wc -c | tr -d ' ') n_reads=$(python3 -c "import math; print(math.ceil(${COVERAGE} * ${genome_size} / (2 * ${READ_LENGTH})))") echo "[${out_dir}] genome=${genome_size} bp → ${n_reads} read pairs (${COVERAGE}x HiSeq)" "${ISS}" generate \ --genomes "${tmp_fasta}" \ --model HiSeq \ --n_reads "${n_reads}" \ --cpus "${CPUS}" \ --compress \ --output "${out_dir}/reads"