Files
Eric Coissac ebbfe35cbc Refactor: Extract utility function for string reversal
Extracted `inverser_chaine` into a reusable utility function with docstring and added unit test to ensure correctness.
2026-04-30 06:58:46 +02:00

65 lines
2.1 KiB
Python

#!/usr/bin/env python3
"""Plot kmer frequency spectrum from kmer_spectrum_raw.json (log-log scale)."""
import json
import sys
import argparse
import matplotlib.pyplot as plt
def load_spectrum(path: str):
with open(path) as f:
data = json.load(f)
spectrum = data["spectrum"]
pairs = sorted((int(k), v) for k, v in spectrum.items() if v > 0)
counts = [x for x, _ in pairs]
freqs = [y for _, y in pairs]
return counts, freqs, data.get("f0"), data.get("f1")
def main():
parser = argparse.ArgumentParser(description="Log-log kmer spectrum plot")
parser.add_argument("spectrum", nargs="?", default="kmer_spectrum_raw.json",
help="Path to kmer_spectrum_raw.json")
parser.add_argument("-o", "--output", default=None,
help="Save figure to file (PNG/SVG/PDF). Omit to display interactively.")
parser.add_argument("--max-count", type=int, default=None,
help="Truncate x-axis at this count value")
args = parser.parse_args()
counts, freqs, f0, f1 = load_spectrum(args.spectrum)
if args.max_count:
pairs = [(c, f) for c, f in zip(counts, freqs) if c <= args.max_count]
counts, freqs = zip(*pairs) if pairs else ([], [])
fig, ax = plt.subplots(figsize=(9, 5))
ax.plot(counts, freqs, ".", markersize=2, linewidth=0.6, color="steelblue")
ax.set_xscale("log")
ax.set_yscale("log")
ax.set_xlabel("Repetition degree (count)", fontsize=12)
ax.set_ylabel("Number of distinct k-mers", fontsize=12)
ax.set_title("K-mer frequency spectrum", fontsize=13)
info = []
if f0 is not None:
info.append(f"F₀ = {f0:,}")
if f1 is not None:
info.append(f"F₁ = {f1:,}")
if info:
ax.text(0.98, 0.97, " ".join(info), transform=ax.transAxes,
ha="right", va="top", fontsize=9, color="gray")
ax.grid(True, which="both", linestyle="--", linewidth=0.4, alpha=0.6)
fig.tight_layout()
if args.output:
fig.savefig(args.output, dpi=150)
print(f"Saved to {args.output}", file=sys.stderr)
else:
plt.show()
if __name__ == "__main__":
main()