#!/usr/bin/env python3 """Generate lightweight HTML galleries for PDF files within PAGES_DIR.""" from __future__ import annotations import html import os import re from pathlib import Path from typing import Iterable, List, Tuple from urllib.parse import quote SCRIPT_DIR = Path(__file__).resolve().parent PAGES_DIR = (SCRIPT_DIR / ".." / "jupyterhub_volumes" / "web" / "pages").resolve() GALLERY_TEMPLATE = """ {title}

{title}

""" ITEM_TEMPLATE = """
{label} Prévisualisation indisponible
{label}
""" def clean_label(entry_name: str) -> str: """Remove numeric prefixes and make a readable label.""" base = Path(entry_name).stem base = re.sub(r"^\d+[_\-\s]*", "", base) base = base.replace("_", " ").replace("-", " ") base = re.sub(r"\s+", " ", base).strip() return base.capitalize() if base else Path(entry_name).stem def should_skip_dir(dirname: str) -> bool: """Return True if we should ignore the directory while walking.""" return dirname.startswith(".") or dirname.endswith("libs") or dirname == "__pycache__" def iter_pdf_directories(root: Path) -> Iterable[Tuple[Path, List[str]]]: """Yield directories in root containing at least one PDF file.""" for current_dir, dirnames, filenames in os.walk(root): dirnames[:] = [name for name in dirnames if not should_skip_dir(name)] pdfs = sorted(name for name in filenames if name.lower().endswith(".pdf")) if pdfs: yield Path(current_dir), pdfs def directory_label(directory: Path) -> str: """Return a human-readable label for the given directory.""" try: relative = directory.relative_to(PAGES_DIR) except ValueError: relative = directory if not relative.parts: return "Documents" return " / ".join(clean_label(part) for part in relative.parts) def build_gallery_html(directory: Path, pdf_files: List[str]) -> str: """Create the HTML content for all PDFs in the directory.""" cards = [] for filename in pdf_files: label = html.escape(clean_label(filename)) href = quote(filename) object_src = f"{href}#page=1&view=Fit" cards.append( ITEM_TEMPLATE.format( link_href=html.escape(href, quote=True), object_src=html.escape(object_src, quote=True), label=label, ) ) title = html.escape(directory_label(directory)) return GALLERY_TEMPLATE.format(title=title, cards="\n".join(cards)) def main() -> None: if not PAGES_DIR.exists(): raise SystemExit(f"❌ PAGES_DIR inexistant: {PAGES_DIR}") generated = 0 for directory, pdf_files in iter_pdf_directories(PAGES_DIR): html_content = build_gallery_html(directory, pdf_files) output_path = directory / "documents.html" output_path.write_text(html_content, encoding="utf-8") rel_out = output_path.relative_to(PAGES_DIR) print(f"✅ {rel_out} ({len(pdf_files)} PDF)") generated += 1 if not generated: print("ℹ️ Aucun PDF trouvé, aucun document.html généré.") if __name__ == "__main__": main()