# Requires GNU Make >= 4.3 (grouped targets &:) — use gmake on macOS BINARY := ../src/target/release/obikmer VENV_PY := ../.venv/bin/python3 GENOMES := $(wildcard genomes/*.fna.gz) # SPECIMENS, SPECIES, and the full dependency graph are generated by # make_deps.py from the genome FASTA headers — like .d files in C. # Make rebuilds deps.mk whenever genomes/ changes and restarts. -include deps.mk REF_NPZS := $(SPECIMENS:%=reference_index/%.npz) REF_DIST_CSVS := $(addprefix reference_dist/, \ shared_kmers.csv hamming_dist.csv jaccard_dist.csv \ bray_curtis_dist.csv relfreq_bray_curtis_dist.csv \ euclidean_dist.csv relfreq_euclidean_dist.csv \ hellinger_dist.csv hellinger_euclidean_dist.csv) OBIKMER_PRESENCE_DIST := $(addprefix obikmer_dist/presence/, \ jaccard_dist.csv jaccard_shared.csv jaccard_nj.nwk \ hamming_dist.csv hamming_nj.nwk) OBIKMER_COUNT_DIST := $(addprefix obikmer_dist/count/, \ jaccard_dist.csv jaccard_shared.csv jaccard_nj.nwk \ bray_curtis_dist.csv bray_curtis_nj.nwk \ relfreq_bray_curtis_dist.csv relfreq_bray_curtis_nj.nwk \ euclidean_dist.csv euclidean_nj.nwk \ relfreq_euclidean_dist.csv relfreq_euclidean_nj.nwk \ hellinger_dist.csv hellinger_nj.nwk \ hellinger_euclidean_dist.csv hellinger_euclidean_nj.nwk) DIST_COMPARISON := stats/dist_comparison/summary.csv PRESENCE_DONE := $(SPECIMENS:%=specimen_index_presence/%/index.done) PRESENCE_STATS := $(SPECIMENS:%=stats/indexing_presence/%.stats) COUNT_DONE := $(SPECIMENS:%=specimen_index_count/%/index.done) COUNT_STATS := $(SPECIMENS:%=stats/indexing_count/%.stats) VERIFY_PRESENCE_STATS := $(SPECIMENS:%=stats/verify_presence/%.stats) VERIFY_COUNT_STATS := $(SPECIMENS:%=stats/verify_count/%.stats) SPECIFIC_PRESENCE_DONE := $(SPECIES:%=specific_index_presence/%/index.done) SPECIFIC_PRESENCE_STATS := $(SPECIES:%=stats/specific_kmer_presence/%.stats) SPECIFIC_COUNT_DONE := $(SPECIES:%=specific_index_count/%/index.done) SPECIFIC_COUNT_STATS := $(SPECIES:%=stats/specific_kmer_count/%.stats) SIMULATED_READS := $(foreach s,$(SPECIMENS),simulated_data/$(subst --,/,$s)/reads_R1.fastq.gz) .NOTPARALLEL: .PHONY: all simulate reference reference_dist \ obikmer_dist obikmer_dist_presence obikmer_dist_count \ dist_comparison \ index_presence index_count \ aggregate_index_presence aggregate_index_count \ merge_presence merge_count \ verify_presence verify_count \ aggregate_verify_presence aggregate_verify_count \ verify_merge_presence verify_merge_count \ filter_presence filter_count \ aggregate_filter_presence aggregate_filter_count verify_merge_presence: stats/verify_merge_presence/current.csv verify_merge_count: stats/verify_merge_count/current.csv all: aggregate_verify_presence aggregate_verify_count \ verify_merge_presence verify_merge_count \ aggregate_filter_presence aggregate_filter_count \ dist_comparison # ── dependency file ─────────────────────────────────────────────────────────── deps.mk: $(GENOMES) $(VENV_PY) make_deps.py $^ > $@ # ── simulation ──────────────────────────────────────────────────────────────── # Prerequisites (genome → reads) are in deps.mk; $< is the genome file. $(SIMULATED_READS): bash simulate_one.sh $< $(dir $@) simulate: $(SIMULATED_READS) # ── reference kmer sets ─────────────────────────────────────────────────────── # Prerequisites (reads → npz) are in deps.mk. reference_index/%.npz: bash build_reference.sh $* reference: $(REF_NPZS) # ── reference distance matrices ─────────────────────────────────────────────── $(REF_DIST_CSVS) &: $(REF_NPZS) build_reference_dist.py $(VENV_PY) build_reference_dist.py reference_dist: $(REF_DIST_CSVS) # ── obikmer distance (presence index) ──────────────────────────────────────── $(OBIKMER_PRESENCE_DIST) &: global_index_presence/index.done $(BINARY) mkdir -p obikmer_dist/presence $(BINARY) distance \ --output obikmer_dist/presence/jaccard \ --metric jaccard --shared-kmers --nj \ global_index_presence $(BINARY) distance \ --output obikmer_dist/presence/hamming \ --metric hamming --nj \ global_index_presence obikmer_dist_presence: $(OBIKMER_PRESENCE_DIST) # ── obikmer distance (count index) ─────────────────────────────────────────── $(OBIKMER_COUNT_DIST) &: global_index_count/index.done $(BINARY) mkdir -p obikmer_dist/count $(BINARY) distance \ --output obikmer_dist/count/jaccard \ --metric jaccard --shared-kmers --nj \ global_index_count $(BINARY) distance \ --output obikmer_dist/count/bray_curtis \ --metric bray-curtis --nj \ global_index_count $(BINARY) distance \ --output obikmer_dist/count/relfreq_bray_curtis \ --metric relfreq-bray-curtis --nj \ global_index_count $(BINARY) distance \ --output obikmer_dist/count/euclidean \ --metric euclidean --nj \ global_index_count $(BINARY) distance \ --output obikmer_dist/count/relfreq_euclidean \ --metric relfreq-euclidean --nj \ global_index_count $(BINARY) distance \ --output obikmer_dist/count/hellinger \ --metric hellinger --nj \ global_index_count $(BINARY) distance \ --output obikmer_dist/count/hellinger_euclidean \ --metric hellinger-euclidean --nj \ global_index_count obikmer_dist_count: $(OBIKMER_COUNT_DIST) obikmer_dist: obikmer_dist_presence obikmer_dist_count # ── distance comparison ─────────────────────────────────────────────────────── $(DIST_COMPARISON): $(REF_DIST_CSVS) $(OBIKMER_PRESENCE_DIST) $(OBIKMER_COUNT_DIST) compare_all_dist.py $(VENV_PY) compare_all_dist.py --out $(DIST_COMPARISON) dist_comparison: $(DIST_COMPARISON) # ── per-specimen indexing ───────────────────────────────────────────────────── # Prerequisites (reads → index.done + .stats) are in deps.mk. specimen_index_presence/%/index.done \ stats/indexing_presence/%.stats &: $(BINARY) bash index_one_presence.sh $* specimen_index_count/%/index.done \ stats/indexing_count/%.stats &: $(BINARY) bash index_one_count.sh $* index_presence: $(PRESENCE_DONE) index_count: $(COUNT_DONE) # ── indexing stats aggregation ──────────────────────────────────────────────── aggregate_index_presence: $(PRESENCE_STATS) bash aggregate_stats.sh indexing_presence aggregate_index_count: $(COUNT_STATS) bash aggregate_stats.sh indexing_count # ── global merge ────────────────────────────────────────────────────────────── global_index_presence/index.done: $(PRESENCE_DONE) $(BINARY) bash merge_presence.sh global_index_count/index.done: $(COUNT_DONE) $(BINARY) bash merge_count.sh merge_presence: global_index_presence/index.done merge_count: global_index_count/index.done # ── per-specimen verification ───────────────────────────────────────────────── # Prerequisites (index.done + npz → .stats) are in deps.mk. stats/verify_presence/%.stats: bash verify_one_presence.sh $* stats/verify_count/%.stats: bash verify_one_count.sh $* verify_presence: $(VERIFY_PRESENCE_STATS) verify_count: $(VERIFY_COUNT_STATS) # ── verification stats aggregation ─────────────────────────────────────────── aggregate_verify_presence: $(VERIFY_PRESENCE_STATS) bash aggregate_stats.sh verify_presence aggregate_verify_count: $(VERIFY_COUNT_STATS) bash aggregate_stats.sh verify_count # ── species-specific indexes ────────────────────────────────────────────────── # Prerequisites (global index → specific index) are in deps.mk. specific_index_presence/%/index.done \ stats/specific_kmer_presence/%.stats &: $(BINARY) bash filter_one_presence.sh $* specific_index_count/%/index.done \ stats/specific_kmer_count/%.stats &: $(BINARY) bash filter_one_count.sh $* filter_presence: $(SPECIFIC_PRESENCE_DONE) filter_count: $(SPECIFIC_COUNT_DONE) aggregate_filter_presence: $(SPECIFIC_PRESENCE_STATS) bash aggregate_stats.sh specific_kmer_presence aggregate_filter_count: $(SPECIFIC_COUNT_STATS) bash aggregate_stats.sh specific_kmer_count # ── merged index verification ───────────────────────────────────────────────── stats/verify_merge_presence/current.csv: $(REF_NPZS) global_index_presence/index.done bash verify_merge_presence.sh stats/verify_merge_count/current.csv: $(REF_NPZS) global_index_count/index.done bash verify_merge_count.sh