#!/usr/bin/env python3 """Plot kmer frequency spectrum from kmer_spectrum_raw.json (log-log scale).""" import json import sys import argparse import matplotlib.pyplot as plt def load_spectrum(path: str): with open(path) as f: data = json.load(f) spectrum = data["spectrum"] pairs = sorted((int(k), v) for k, v in spectrum.items() if v > 0) counts = [x for x, _ in pairs] freqs = [y for _, y in pairs] return counts, freqs, data.get("f0"), data.get("f1") def main(): parser = argparse.ArgumentParser(description="Log-log kmer spectrum plot") parser.add_argument("spectrum", nargs="?", default="kmer_spectrum_raw.json", help="Path to kmer_spectrum_raw.json") parser.add_argument("-o", "--output", default=None, help="Save figure to file (PNG/SVG/PDF). Omit to display interactively.") parser.add_argument("--max-count", type=int, default=None, help="Truncate x-axis at this count value") args = parser.parse_args() counts, freqs, f0, f1 = load_spectrum(args.spectrum) if args.max_count: pairs = [(c, f) for c, f in zip(counts, freqs) if c <= args.max_count] counts, freqs = zip(*pairs) if pairs else ([], []) fig, ax = plt.subplots(figsize=(9, 5)) ax.plot(counts, freqs, ".", markersize=2, linewidth=0.6, color="steelblue") ax.set_xscale("log") ax.set_yscale("log") ax.set_xlabel("Repetition degree (count)", fontsize=12) ax.set_ylabel("Number of distinct k-mers", fontsize=12) ax.set_title("K-mer frequency spectrum", fontsize=13) info = [] if f0 is not None: info.append(f"F₀ = {f0:,}") if f1 is not None: info.append(f"F₁ = {f1:,}") if info: ax.text(0.98, 0.97, " ".join(info), transform=ax.transAxes, ha="right", va="top", fontsize=9, color="gray") ax.grid(True, which="both", linestyle="--", linewidth=0.4, alpha=0.6) fig.tight_layout() if args.output: fig.savefig(args.output, dpi=150) print(f"Saved to {args.output}", file=sys.stderr) else: plt.show() if __name__ == "__main__": main()