ebbfe35cbc
Extracted `inverser_chaine` into a reusable utility function with docstring and added unit test to ensure correctness.
65 lines
2.1 KiB
Python
65 lines
2.1 KiB
Python
#!/usr/bin/env python3
|
|
"""Plot kmer frequency spectrum from kmer_spectrum_raw.json (log-log scale)."""
|
|
|
|
import json
|
|
import sys
|
|
import argparse
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
|
def load_spectrum(path: str):
|
|
with open(path) as f:
|
|
data = json.load(f)
|
|
spectrum = data["spectrum"]
|
|
pairs = sorted((int(k), v) for k, v in spectrum.items() if v > 0)
|
|
counts = [x for x, _ in pairs]
|
|
freqs = [y for _, y in pairs]
|
|
return counts, freqs, data.get("f0"), data.get("f1")
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Log-log kmer spectrum plot")
|
|
parser.add_argument("spectrum", nargs="?", default="kmer_spectrum_raw.json",
|
|
help="Path to kmer_spectrum_raw.json")
|
|
parser.add_argument("-o", "--output", default=None,
|
|
help="Save figure to file (PNG/SVG/PDF). Omit to display interactively.")
|
|
parser.add_argument("--max-count", type=int, default=None,
|
|
help="Truncate x-axis at this count value")
|
|
args = parser.parse_args()
|
|
|
|
counts, freqs, f0, f1 = load_spectrum(args.spectrum)
|
|
|
|
if args.max_count:
|
|
pairs = [(c, f) for c, f in zip(counts, freqs) if c <= args.max_count]
|
|
counts, freqs = zip(*pairs) if pairs else ([], [])
|
|
|
|
fig, ax = plt.subplots(figsize=(9, 5))
|
|
ax.plot(counts, freqs, ".", markersize=2, linewidth=0.6, color="steelblue")
|
|
ax.set_xscale("log")
|
|
ax.set_yscale("log")
|
|
ax.set_xlabel("Repetition degree (count)", fontsize=12)
|
|
ax.set_ylabel("Number of distinct k-mers", fontsize=12)
|
|
ax.set_title("K-mer frequency spectrum", fontsize=13)
|
|
|
|
info = []
|
|
if f0 is not None:
|
|
info.append(f"F₀ = {f0:,}")
|
|
if f1 is not None:
|
|
info.append(f"F₁ = {f1:,}")
|
|
if info:
|
|
ax.text(0.98, 0.97, " ".join(info), transform=ax.transAxes,
|
|
ha="right", va="top", fontsize=9, color="gray")
|
|
|
|
ax.grid(True, which="both", linestyle="--", linewidth=0.4, alpha=0.6)
|
|
fig.tight_layout()
|
|
|
|
if args.output:
|
|
fig.savefig(args.output, dpi=150)
|
|
print(f"Saved to {args.output}", file=sys.stderr)
|
|
else:
|
|
plt.show()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|