Bump version to 4.4.16

Update version from 4.4.15 to 4.4.16 in version.go and version.txt files.
Fix GenBank parsing and add release notes script
2026-03-26 14:00:51 +00:00 · 2026-02-20 11:40:40 +01:00 · 2026-02-20 11:37:51 +01:00 · 2026-02-11 06:34:05 +01:00 · 2026-02-11 06:31:11 +01:00 · 2026-02-11 06:31:10 +01:00
6 changed files with 215 additions and 34 deletions
--- a/obitests/obitools/obisuperkmer/test.sh
+++ b/obitests/obitools/obisuperkmer/test.sh
@@ -4,8 +4,8 @@
 # Here give the name of the test serie
 #
-TEST_NAME=obisuperkmer
+TEST_NAME=obik-super
-CMD=obisuperkmer
+CMD=obik
 ######
 #
@@ -16,7 +16,7 @@ TEST_DIR="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"
 OBITOOLS_DIR="${TEST_DIR/obitest*/}build"
 export PATH="${OBITOOLS_DIR}:${PATH}"
-MCMD="$(echo "${CMD:0:4}" | tr '[:lower:]' '[:upper:]')$(echo "${CMD:4}" | tr '[:upper:]' '[:lower:]')"
+MCMD="OBIk-super"
 TMPDIR="$(mktemp -d)"
 ntest=0
@@ -65,31 +65,10 @@ log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)"
 ####
 #### Below are the tests
 ####
 #### Before each test :
 ####  - increment the variable ntest
 ####
 #### Run the command as the condition of an if / then /else
 ####  - The command must return 0 on success
 ####  - The command must return an exit code different from 0 on failure
 ####  - The datafiles are stored in the same directory than the test script
 ####  - The test script directory is stored in the TEST_DIR variable
 ####  - If result files have to be produced they must be stored
 ####    in the temporary directory (TMPDIR variable)
 ####
 #### then clause is executed on success of the command
 ####  - Write a success message using the log function
 ####  - increment the variable success
 ####
 #### else clause is executed on failure of the command
 ####  - Write a failure message using the log function
 ####  - increment the variable failed
 ####
 ######################################################################
 ((ntest++))
-if $CMD -h > "${TMPDIR}/help.txt" 2>&1
+if $CMD super -h > "${TMPDIR}/help.txt" 2>&1
 then
    log "$MCMD: printing help OK"
    ((success++))
@@ -100,7 +79,7 @@ fi
 # Test 1: Basic super k-mer extraction with default parameters
 ((ntest++))
-if obisuperkmer "${TEST_DIR}/test_sequences.fasta" \
+if $CMD super "${TEST_DIR}/test_sequences.fasta" \
    > "${TMPDIR}/output_default.fasta" 2>&1
 then
    log "$MCMD: basic extraction with default parameters OK"
@@ -148,7 +127,7 @@ fi
 # Test 5: Extract super k-mers with custom k and m parameters
 ((ntest++))
-if obisuperkmer -k 15 -m 7 "${TEST_DIR}/test_sequences.fasta" \
+if $CMD super -k 15 -m 7 "${TEST_DIR}/test_sequences.fasta" \
    > "${TMPDIR}/output_k15_m7.fasta" 2>&1
 then
    log "$MCMD: extraction with custom k=15, m=7 OK"
@@ -172,7 +151,7 @@ fi
 # Test 7: Test with different output format (FASTA output explicitly)
 ((ntest++))
-if obisuperkmer --fasta-output -k 21 -m 11 \
+if $CMD super --fasta-output -k 21 -m 11 \
    "${TEST_DIR}/test_sequences.fasta" \
    > "${TMPDIR}/output_fasta.fasta" 2>&1
 then
@@ -209,7 +188,7 @@ fi
 # Test 10: Test with output file option
 ((ntest++))
-if obisuperkmer -o "${TMPDIR}/output_file.fasta" \
+if $CMD super -o "${TMPDIR}/output_file.fasta" \
    "${TEST_DIR}/test_sequences.fasta" 2>&1
 then
    log "$MCMD: output to file with -o option OK"
--- a/pkg/obiformats/genbank_read.go
+++ b/pkg/obiformats/genbank_read.go
@@ -162,9 +162,10 @@ func GenbankChunkParser(withFeatureTable, UtoT bool) func(string, io.Reader) (ob
 					// log.Debugf("Chunk %d : Genbank: line %d, state = %d : %s", chunks.order, nl, state, line)
 					sl++
-					parts := strings.SplitN(line[10:], " ", 6)
+					cleanline := strings.TrimSpace(line)
 					parts := strings.SplitN(cleanline, " ", 7)
 					lparts := len(parts)
-					for i := 0; i < lparts; i++ {
+					for i := 1; i < lparts; i++ {
 						if UtoT {
 							parts[i] = strings.ReplaceAll(parts[i], "u", "t")
 						}
--- a/pkg/obioptions/version.go
+++ b/pkg/obioptions/version.go
@@ -3,7 +3,7 @@ package obioptions
 // Version is automatically updated by the Makefile from version.txt
 // The patch number (third digit) is incremented on each push to the repository
-var _Version = "Release 4.4.12"
+var _Version = "Release 4.4.16"
 // Version returns the version of the obitools package.
 //
--- a/pkg/obitools/obiconvert/sequence_reader.go
+++ b/pkg/obitools/obiconvert/sequence_reader.go
@@ -68,6 +68,8 @@ func ExpandListOfFiles(check_ext bool, filenames ...string) ([]string, error) {
 						strings.HasSuffix(path, "seq.gz") ||
 						strings.HasSuffix(path, "gb") ||
 						strings.HasSuffix(path, "gb.gz") ||
 						strings.HasSuffix(path, "gbff") ||
 						strings.HasSuffix(path, "gbff.gz") ||
 						strings.HasSuffix(path, "dat") ||
 						strings.HasSuffix(path, "dat.gz") ||
 						strings.HasSuffix(path, "ecopcr") ||
@@ -204,7 +206,7 @@ func CLIReadBioSequences(filenames ...string) (obiiter.IBioSequence, error) {
 					iterator = iterator.PairTo(ip)
 				}
 			} else {
-				iterator = obiiter.NilIBioSequence
+				return obiiter.NilIBioSequence, fmt.Errorf("no sequence files found in the provided paths")
 			}
 		}
--- a/release_notes.sh
+++ b/release_notes.sh
@@ -0,0 +1,199 @@
 #!/bin/bash
 # Generate GitHub-compatible release notes for an OBITools4 version.
 #
 # Usage:
 #   ./release_notes.sh                 # latest version
 #   ./release_notes.sh -v 4.4.15       # specific version
 #   ./release_notes.sh -l              # list available versions
 #   ./release_notes.sh -r              # raw commit list (no LLM)
 #   ./release_notes.sh -c -v 4.4.16   # show LLM context for a version
 GITHUB_REPO="metabarcoding/obitools4"
 GITHUB_API="https://api.github.com/repos/${GITHUB_REPO}"
 VERSION=""
 LIST_VERSIONS=false
 RAW_MODE=false
 CONTEXT_MODE=false
 LLM_MODEL="ollama:qwen3-coder-next:latest"
 # ── Helpers ──────────────────────────────────────────────────────────────
 die() { echo "Error: $*" >&2; exit 1; }
 display_help() {
  cat <<EOF
 Usage: $(basename "$0") [OPTIONS]
 Generate GitHub-compatible Markdown release notes for an OBITools4 version.
 Options:
  -v, --version VERSION   Target version (e.g., 4.4.15). Default: latest.
  -l, --list              List all available versions and exit.
  -r, --raw               Output raw commit list without LLM summarization.
  -c, --context           Show the exact context (commits + prompt) sent to the LLM.
  -m, --model MODEL       LLM model for orla (default: $LLM_MODEL).
  -h, --help              Display this help message.
 Examples:
  $(basename "$0")                  # release notes for the latest version
  $(basename "$0") -v 4.4.15       # release notes for a specific version
  $(basename "$0") -l              # list versions
  $(basename "$0") -r -v 4.4.15    # raw commit log for a version
  $(basename "$0") -c -v 4.4.16    # show LLM context for a version
 EOF
 }
 # Fetch all Release tags from GitHub API (sorted newest first)
 fetch_versions() {
  curl -sf "${GITHUB_API}/releases" \
    | grep '"tag_name":' \
    | sed -E 's/.*"tag_name": "Release_([0-9.]+)".*/\1/' \
    | sort -V -r
 }
 # ── Parse arguments ──────────────────────────────────────────────────────
 while [ "$#" -gt 0 ]; do
  case "$1" in
    -v|--version)  VERSION="$2"; shift 2 ;;
    -l|--list)     LIST_VERSIONS=true; shift ;;
    -r|--raw)      RAW_MODE=true; shift ;;
    -c|--context)  CONTEXT_MODE=true; shift ;;
    -m|--model)    LLM_MODEL="$2"; shift 2 ;;
    -h|--help)     display_help; exit 0 ;;
    *)             die "Unsupported option: $1" ;;
  esac
 done
 # ── List mode ────────────────────────────────────────────────────────────
 if [ "$LIST_VERSIONS" = true ]; then
  echo "Available OBITools4 versions:" >&2
  echo "==============================" >&2
  fetch_versions
  exit 0
 fi
 # ── Resolve versions ─────────────────────────────────────────────────────
 all_versions=$(fetch_versions)
 [ -z "$all_versions" ] && die "Could not fetch versions from GitHub"
 if [ -z "$VERSION" ]; then
  VERSION=$(echo "$all_versions" | head -1)
  echo "Using latest version: $VERSION" >&2
 fi
 tag_name="Release_${VERSION}"
 # Verify the requested version exists
 if ! echo "$all_versions" | grep -qx "$VERSION"; then
  die "Version $VERSION not found. Use -l to list available versions."
 fi
 # Find the previous version (the one right after in the sorted-descending list)
 previous_version=$(echo "$all_versions" | grep -A1 -x "$VERSION" | tail -1)
 if [ "$previous_version" = "$VERSION" ] || [ -z "$previous_version" ]; then
  previous_tag=""
  echo "No previous version found -- will include all commits for $tag_name" >&2
 else
  previous_tag="Release_${previous_version}"
  echo "Generating notes: $previous_tag -> $tag_name" >&2
 fi
 # ── Fetch commit messages between tags via GitHub compare API ────────────
 if [ -n "$previous_tag" ]; then
  commits_json=$(curl -sf "${GITHUB_API}/compare/${previous_tag}...${tag_name}")
  if [ -z "$commits_json" ]; then
    die "Could not fetch commit comparison from GitHub"
  fi
  commit_list=$(echo "$commits_json" \
    | jq -r '.commits[] | (.sha[:8] + " " + (.commit.message | split("\n")[0]))' 2>/dev/null)
 else
  # First release: get commits up to this tag
  commits_json=$(curl -sf "${GITHUB_API}/commits?sha=${tag_name}&per_page=50")
  if [ -z "$commits_json" ]; then
    die "Could not fetch commits from GitHub"
  fi
  commit_list=$(echo "$commits_json" \
    | jq -r '.[] | (.sha[:8] + " " + (.commit.message | split("\n")[0]))' 2>/dev/null)
 fi
 if [ -z "$commit_list" ]; then
  die "No commits found between $previous_tag and $tag_name"
 fi
 # ── LLM prompt (shared by context mode and summarization) ────────────────
 LLM_PROMPT="Summarize the following commits into a GitHub release note for version ${VERSION}. \
 Ignore commits related to version bumps, .gitignore changes, or any internal housekeeping \
 that is irrelevant to end users. Describe each user-facing change precisely without exposing \
 code. Eliminate redundancy. Output strictly valid JSON with no surrounding text, using this \
 exact schema: {\"title\": \"<short release title>\", \"body\": \"<detailed markdown release notes>\"}"
 # ── Raw mode: just output the commit list ────────────────────────────────
 if [ "$RAW_MODE" = true ]; then
  echo "# Release ${VERSION}"
  echo ""
  echo "## Commits"
  echo ""
  echo "$commit_list" | while IFS= read -r line; do
    echo "- ${line}"
  done
  exit 0
 fi
 # ── Context mode: show what would be sent to the LLM ────────────────────
 if [ "$CONTEXT_MODE" = true ]; then
  echo "=== LLM Model ==="
  echo "$LLM_MODEL"
  echo ""
  echo "=== Prompt ==="
  echo "$LLM_PROMPT"
  echo ""
  echo "=== Stdin (commit list) ==="
  echo "$commit_list"
  exit 0
 fi
 # ── LLM summarization ───────────────────────────────────────────────────
 if ! command -v orla >/dev/null 2>&1; then
  die "orla is required for LLM summarization. Use -r for raw output."
 fi
 if ! command -v jq >/dev/null 2>&1; then
  die "jq is required for JSON parsing. Use -r for raw output."
 fi
 echo "Summarizing with LLM ($LLM_MODEL)..." >&2
 raw_output=$(echo "$commit_list" | \
  ORLA_MAX_TOOL_CALLS=50 orla agent -m "$LLM_MODEL" \
  "$LLM_PROMPT" \
  2>/dev/null) || true
 if [ -z "$raw_output" ]; then
  echo "Warning: LLM returned empty output, falling back to raw mode" >&2
  exec "$0" -r -v "$VERSION"
 fi
 # Sanitize: extract JSON object, strip control characters
 sanitized=$(echo "$raw_output" | sed -n '/^{/,/^}/p' | tr -d '\000-\011\013-\014\016-\037')
 release_title=$(echo "$sanitized" | jq -r '.title // empty' 2>/dev/null)
 release_body=$(echo "$sanitized" | jq -r '.body // empty' 2>/dev/null)
 if [ -n "$release_title" ] && [ -n "$release_body" ]; then
  echo "# ${release_title}"
  echo ""
  echo "$release_body"
 else
  echo "Warning: JSON parsing failed, falling back to raw mode" >&2
  exec "$0" -r -v "$VERSION"
 fi
--- a/version.txt
+++ b/version.txt
@@ -1 +1 @@
-4.4.12
+4.4.17
Author	SHA1	Message	Date
Eric Coissac	b05404721e	Bump version to 4.4.16 Update version from 4.4.15 to 4.4.16 in version.go and version.txt files.	2026-02-20 11:40:40 +01:00
Eric Coissac	c57e788459	Fix GenBank parsing and add release notes script This commit fixes an issue in the GenBank parser where empty parts were being included in the parsed data. It also introduces a new script `release_notes.sh` to automate the generation of GitHub-compatible release notes for OBITools4 versions, including support for LLM summarization and various output modes.	2026-02-20 11:37:51 +01:00
coissac	1cecf23978	Merge pull request #86 from metabarcoding/push-oulwykrpwxuz Push oulwykrpwxuz	2026-02-11 06:34:05 +01:00
Eric Coissac	4c824ef9b7	Bump version to 4.4.15 Update version from 4.4.14 to 4.4.15 in version.txt and pkg/obioptions/version.go	2026-02-11 06:31:11 +01:00
Eric Coissac	1ce5da9bee	Support new sequence file formats and improve error handling Add support for .gbff and .gbff.gz file extensions in sequence reader. Update the logic to return an error instead of using NilIBioSequence when no sequence files are found, improving the error handling and user feedback.	2026-02-11 06:31:10 +01:00
coissac	dc23d9de9a	Merge pull request #85 from metabarcoding/push-smturnsrozkp Push smturnsrozkp	2026-02-10 22:19:22 +01:00
Eric Coissac	aa9d7bbf72	Bump version to 4.4.14 Update version number from 4.4.13 to 4.4.14 in both version.go and version.txt files.	2026-02-10 22:17:23 +01:00
Eric Coissac	db22d20d0a	Rename obisuperkmer test script to obik-super and update command references Update test script name from obisuperkmer to obik-super and adjust all command references accordingly. - Changed TEST_NAME from 'obisuperkmer' to 'obik-super' - Changed CMD from 'obisuperkmer' to 'obik' - Updated MCMD to 'OBIk-super' - Modified command calls to use '$CMD super' instead of direct command names - Updated help test to use '$CMD super -h' - Updated all test cases to use the new command format	2026-02-10 22:17:22 +01:00
coissac	7c05bdb01c	Merge pull request #84 from metabarcoding/push-uxvowwlxkrlq Push uxvowwlxkrlq	2026-02-10 22:12:18 +01:00
Eric Coissac	b6542c4523	Bump version to 4.4.13 Update version from 4.4.12 to 4.4.13 in version.txt and pkg/obioptions/version.go	2026-02-10 22:10:38 +01:00