54 lines
2.1 KiB
Bash
54 lines
2.1 KiB
Bash
|
|
#!/usr/bin/env bash
|
||
|
|
# Usage: aggregate_stats.sh TYPE
|
||
|
|
# TYPE = indexing_presence | indexing_count | verify_presence | verify_count
|
||
|
|
#
|
||
|
|
# Reads all stats/TYPE/*.stats files (one CSV data row each, no header).
|
||
|
|
# Creates a new stats/TYPE/run_NNN.csv only if any .stats file is newer than
|
||
|
|
# the most recent run CSV (idempotent when nothing changed).
|
||
|
|
set -euo pipefail
|
||
|
|
|
||
|
|
TYPE="$1"
|
||
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||
|
|
STATS_DIR="${SCRIPT_DIR}/stats/${TYPE}"
|
||
|
|
|
||
|
|
case "${TYPE}" in
|
||
|
|
indexing_presence|indexing_count)
|
||
|
|
HEADER="run,species,strain,scatter_wall_s,scatter_rss_b,dereplicate_wall_s,dereplicate_rss_b,count_kmer_wall_s,count_kmer_rss_b,index_wall_s,index_rss_b,total_wall_s,total_rss_b"
|
||
|
|
;;
|
||
|
|
verify_presence)
|
||
|
|
HEADER="run,species,strain,ref_kmers,idx_kmers,false_neg,false_pos,fn_pct,fp_pct"
|
||
|
|
;;
|
||
|
|
verify_count)
|
||
|
|
HEADER="run,species,strain,ref_kmers,idx_kmers,false_neg,false_pos,count_mismatch,fn_pct,fp_pct,cm_pct"
|
||
|
|
;;
|
||
|
|
specific_kmer_presence|specific_kmer_count)
|
||
|
|
HEADER="run,species,rebuild_wall_s,rebuild_rss_b,pack_wall_s,pack_rss_b,filter_total_wall_s,filter_total_rss_b,select_wall_s,select_rss_b,select_total_wall_s,select_total_rss_b"
|
||
|
|
;;
|
||
|
|
*)
|
||
|
|
echo "ERROR: unknown stats type '${TYPE}'" >&2
|
||
|
|
exit 1
|
||
|
|
;;
|
||
|
|
esac
|
||
|
|
|
||
|
|
# Find most recent existing run CSV (empty string if none).
|
||
|
|
latest_csv=$(find "${STATS_DIR}" -maxdepth 1 -name 'run_*.csv' 2>/dev/null | sort | tail -1)
|
||
|
|
|
||
|
|
# Check if any .stats file is newer than the latest run CSV.
|
||
|
|
if [[ -n "${latest_csv}" ]] && \
|
||
|
|
[[ -z "$(find "${STATS_DIR}" -maxdepth 1 -name '*.stats' -newer "${latest_csv}" 2>/dev/null)" ]]; then
|
||
|
|
echo "[${TYPE}] stats up to date (${latest_csv})"
|
||
|
|
exit 0
|
||
|
|
fi
|
||
|
|
|
||
|
|
run_n=$(printf '%03d' "$(find "${STATS_DIR}" -maxdepth 1 -name 'run_*.csv' 2>/dev/null | wc -l | tr -d ' ')")
|
||
|
|
CSV="${STATS_DIR}/run_${run_n}.csv"
|
||
|
|
|
||
|
|
echo "${HEADER}" >"${CSV}"
|
||
|
|
|
||
|
|
# Sort .stats files by name for reproducible row order.
|
||
|
|
while IFS= read -r stats_file; do
|
||
|
|
sed "s/^/${run_n},/" "${stats_file}"
|
||
|
|
done < <(find "${STATS_DIR}" -maxdepth 1 -name '*.stats' | sort) >>"${CSV}"
|
||
|
|
|
||
|
|
echo "[${TYPE}] run ${run_n} → ${CSV}"
|