From 9484857d9a54b5802b009996929787124dd57796 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Thu, 30 Apr 2026 18:24:35 +0200 Subject: [PATCH] :wrench: Add selective image rebuild flags and enhance R dependency scanning - Added --rebuild-builder, -student, hub flags to start-jupyterhub.sh for granular Docker rebuilds - Updated check_if_image_needs_rebuild to accept per-image force flag and propagate no-cache option - Added libuv1-dev dependency in Dockerfile.builder (likely for quarto or R runtime) - Rewrote install_quarto_deps.R to: a) Manually parse library()/require() and remotes::install_git/github calls b) Distinguish between quarto-required packages (must reside in persistent target_lib) c), CRAN and git/github dependencies d) Install with robust error handling, skipping unavailable packages - Removed dependency on attachment package for scanning --- obijupyterhub/Dockerfile.builder | 1 + start-jupyterhub.sh | 41 ++++++---- tools/install_quarto_deps.R | 135 +++++++++++++++++++++++++------ 3 files changed, 137 insertions(+), 40 deletions(-) diff --git a/obijupyterhub/Dockerfile.builder b/obijupyterhub/Dockerfile.builder index 8daf2da..0d820df 100644 --- a/obijupyterhub/Dockerfile.builder +++ b/obijupyterhub/Dockerfile.builder @@ -32,6 +32,7 @@ RUN apt-get update \ libpng-dev \ libtiff5-dev \ libjpeg-dev \ + libuv1-dev \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* diff --git a/start-jupyterhub.sh b/start-jupyterhub.sh index 52f2f05..4b53dee 100755 --- a/start-jupyterhub.sh +++ b/start-jupyterhub.sh @@ -20,6 +20,9 @@ FORCE_REBUILD=false STOP_SERVER=false UPDATE_LECTURES=false BUILD_OBIDOC=false +REBUILD_BUILDER=false +REBUILD_STUDENT=false +REBUILD_HUB=false usage() { cat < 0) args[1] else "." -# Target library for installing packages (the mounted volume) target_lib <- "/usr/local/lib/R/site-library" cat("Scanning Quarto files in:", quarto_dir, "\n") cat("Target library:", target_lib, "\n") -# Find all .qmd files qmd_files <- list.files( path = quarto_dir, pattern = "\\.qmd$", @@ -26,34 +24,119 @@ if (length(qmd_files) == 0) { cat("Found", length(qmd_files), "Quarto files\n") -# Extract dependencies using attachment -deps <- attachment::att_from_rmds(qmd_files, inline = TRUE) - -if (length(deps) == 0) { - cat("No R package dependencies detected.\n") - quit(status = 0) +# Extract package names from library()/require() calls +extract_cran_packages <- function(files) { + pattern <- "(?:library|require)\\s*\\(\\s*['\"]?([A-Za-z0-9._]+)['\"]?" + pkgs <- character(0) + for (f in files) { + lines <- tryCatch(readLines(f, warn = FALSE), error = function(e) character(0)) + m <- regmatches(lines, gregexpr(pattern, lines, perl = TRUE)) + hits <- unlist(m) + if (length(hits) > 0) { + extracted <- sub( + "(?:library|require)\\s*\\(\\s*['\"]?([A-Za-z0-9._]+)['\"]?.*", + "\\1", hits, perl = TRUE + ) + pkgs <- c(pkgs, extracted) + } + } + unique(pkgs) } -cat("\nDetected R packages:\n") -cat(paste(" -", deps, collapse = "\n"), "\n\n") +# Extract git/github URLs from remotes::install_git/github() calls +extract_git_packages <- function(files) { + # Matches remotes::install_git('url') or remotes::install_github('user/repo') + pattern <- "remotes::install_(git|github)\\s*\\(\\s*['\"]([^'\"]+)['\"]" + result <- list() + for (f in files) { + lines <- tryCatch(readLines(f, warn = FALSE), error = function(e) character(0)) + text <- paste(lines, collapse = "\n") + m <- gregexpr(pattern, text, perl = TRUE) + hits <- regmatches(text, m)[[1]] + for (hit in hits) { + type <- sub("remotes::install_(git|github).*", "\\1", hit, perl = TRUE) + url <- sub("remotes::install_(?:git|github)\\s*\\(\\s*['\"]([^'\"]+)['\"].*", + "\\1", hit, perl = TRUE) + result[[length(result) + 1]] <- list(type = type, url = url) + } + } + result +} + +cran_deps <- extract_cran_packages(qmd_files) +git_deps <- extract_git_packages(qmd_files) + +# Quarto's implicit runtime dependencies — must be in target_lib (the persistent +# volume), not just somewhere in libPaths, because Quarto spawns its own R session. +quarto_required <- c("rmarkdown", "knitr") +if (length(git_deps) > 0) quarto_required <- c(quarto_required, "remotes") + +cat("\nDetected CRAN packages:\n") +cat(paste(" -", unique(c(quarto_required, cran_deps)), collapse = "\n"), "\n") + +if (length(git_deps) > 0) { + cat("\nDetected git/github packages:\n") + for (d in git_deps) cat(" -", d$type, ":", d$url, "\n") +} +cat("\n") + +# --- Install CRAN packages --- -# Filter out base R packages that are always available base_pkgs <- rownames(installed.packages(priority = "base")) -deps <- setdiff(deps, base_pkgs) -# Check which packages are not installed -installed <- rownames(installed.packages()) -to_install <- setdiff(deps, installed) +# quarto_required: check only in target_lib so they are guaranteed to be there +installed_in_target <- rownames(installed.packages(lib.loc = target_lib)) +quarto_missing <- setdiff(quarto_required, c(base_pkgs, installed_in_target)) + +# other deps: check anywhere in libPaths (they just need to be loadable) +cran_deps <- setdiff(cran_deps, c(base_pkgs, quarto_required)) +installed <- rownames(installed.packages()) +to_install <- unique(c(quarto_missing, setdiff(cran_deps, installed))) if (length(to_install) == 0) { - cat("All required packages are already installed.\n") + cat("All CRAN packages already installed.\n") } else { - cat("Installing missing packages:", paste(to_install, collapse = ", "), "\n\n") - install.packages( - to_install, - lib = target_lib, - repos = "https://cloud.r-project.org/", - dependencies = TRUE - ) - cat("\nPackage installation complete.\n") + cat("Installing CRAN packages:", paste(to_install, collapse = ", "), "\n\n") + failed <- character(0) + for (pkg in to_install) { + result <- tryCatch({ + withCallingHandlers( + install.packages(pkg, lib = target_lib, repos = "https://cloud.r-project.org/", + dependencies = TRUE, quiet = FALSE), + warning = function(w) { + if (grepl("not available", conditionMessage(w))) invokeRestart("muffleWarning") + } + ) + if (!requireNamespace(pkg, quietly = TRUE)) "unavailable" else "ok" + }, error = function(e) "error") + + if (result %in% c("unavailable", "error")) { + cat(" [SKIP]", pkg, "- not available on CRAN\n") + failed <- c(failed, pkg) + } else { + cat(" [OK]", pkg, "\n") + } + } + if (length(failed) > 0) + cat("\nNot installed (not on CRAN):", paste(failed, collapse = ", "), "\n") } + +# --- Install git/github packages --- + +if (length(git_deps) > 0) { + cat("\nInstalling git/github packages...\n") + for (d in git_deps) { + tryCatch({ + if (d$type == "git") { + remotes::install_git(d$url, lib = target_lib, upgrade = "never") + } else { + remotes::install_github(d$url, lib = target_lib, upgrade = "never") + } + cat(" [OK]", d$url, "\n") + }, error = function(e) { + cat(" [FAIL]", d$url, "-", conditionMessage(e), "\n") + }) + } +} + +cat("\nDependency installation complete.\n")