diff --git a/scripts/jj_commit_msg.sh b/scripts/jj_commit_msg.sh index 5ef8a51..207e3cc 100755 --- a/scripts/jj_commit_msg.sh +++ b/scripts/jj_commit_msg.sh @@ -5,7 +5,7 @@ # Summarises each changed file's diff individually, then combines all # summaries into a single commit message via aichat. # REV defaults to `@` (current working copy). Accepts any jj revision: -# `@-`, `lk`, a commit ID, a branch name, etc. +# `@-`, `lk`, a commit ID, a branch name, etc. # # Typical use: # jj describe -m "$(jj_commit_msg.sh)" @@ -18,9 +18,24 @@ set -euo pipefail REV="${1:-@}" # Log to stderr so progress doesn't pollute the commit message on stdout -log() { printf '\033[1;34m==>\033[0m %s\n' "$*" >&2; } -info() { printf ' \033[0;37m%s\033[0m\n' "$*" >&2; } -ok() { printf ' \033[0;32m✓\033[0m %s\n' "$*" >&2; } +log() { printf '\033[1;34m==>\033[0m %s\n' "$*" >&2; } +info() { printf ' \033[0;37m%s\033[0m\n' "$*" >&2; } +ok() { printf ' \033[0;32m✓\033[0m %s\n' "$*" >&2; } + +# _strip_think — remove reasoning tags from stdin +# Buffer all input, locate the LAST line, emit only what follows. +# This handles think blocks that themselves contain fragments. +_strip_think() { + awk '{ + lines[NR] = $0 + print $0 > "/dev/stderr" + if (/^<\/think>/) last_end = NR + } + END { + start = (last_end ? last_end + 1 : 1) + for (i = start; i <= NR; i++) print lines[i] + }' +} # _readable_diff # Returns a human-readable diff for . @@ -31,9 +46,9 @@ _readable_diff() { local file="$1" local raw_diff raw_diff=$(jj diff -r "$REV" -- "$file") - [[ -z "$raw_diff" ]] && return 0 + [[ -z "$raw_diff" ]] && return 0 - # Detect pathological diff: any +/- content line longer than 500 chars + # Detect pathological diff: any +/- content line longer than 500 chars local max_len max_len=$(grep '^[+-]' <<< "$raw_diff" | awk '{ if (length > m) m = length } END { print m+0 }') @@ -42,40 +57,40 @@ _readable_diff() { return fi - # Pretty-print strategy per extension + # Pretty-print strategy per extension local ext="${file##*.}" local pretty_old pretty_new case "$ext" in json) pretty_old=$(jj file show -r "$REV@-" -- "$file" 2>/dev/null | python3 -m json.tool 2>/dev/null || true) pretty_new=$(jj file show -r "$REV" -- "$file" 2>/dev/null | python3 -m json.tool 2>/dev/null || true) - ;; + ;; js|mjs|cjs|css|ts) local node_fmt=' const chunks = []; process.stdin.on("data", d => chunks.push(d)); process.stdin.on("end", () => { const src = chunks.join(""); - // Insert newline before { } ( ) ; and after , + // Insert newline before { } ( ) ; and after , const out = src - .replace(/([{(])/g, "$1\n ") - .replace(/([;}])/g, "\n$1\n") - .replace(/,\s*/g, ",\n "); + .replace(/([{(])/g, "$1\n ") + .replace(/([;}])/g, "\n$1\n") + .replace(/,\s*/g, ",\n "); process.stdout.write(out); - });' + });' pretty_old=$(jj file show -r "$REV@-" -- "$file" 2>/dev/null | node -e "$node_fmt" 2>/dev/null || true) pretty_new=$(jj file show -r "$REV" -- "$file" 2>/dev/null | node -e "$node_fmt" 2>/dev/null || true) - ;; - *) - # Generic fallback: fold long lines at 120 chars + ;; + *) + # Generic fallback: fold long lines at 120 chars pretty_old=$(jj file show -r "$REV@-" -- "$file" 2>/dev/null | fold -s -w 120 || true) pretty_new=$(jj file show -r "$REV" -- "$file" 2>/dev/null | fold -s -w 120 || true) - ;; + ;; esac if [[ -n "$pretty_old" && -n "$pretty_new" ]]; then diff <(printf '%s\n' "$pretty_old") <(printf '%s\n' "$pretty_new") \ - --label "a/${file}" --label "b/${file}" -u || true + --label "a/${file}" --label "b/${file}" -u || true else printf '%s' "$raw_diff" fi @@ -104,9 +119,9 @@ while IFS= read -r file; do n=$((n + 1)) log "[$n/$file_count] Summarising $file …" - summary=$(printf '%s' "$diff" | aichat "In 2-3 lines, summarise what this diff changes in the file '$file'. Be concise and technical.") + summary=$(printf '%s' "$diff" | aichat "In 2-3 lines, summarise what this diff changes in the file '$file'. Be concise and technical." | _strip_think) - # Print the summary indented to stderr + # Print the summary indented to stderr while IFS= read -r line; do info "$line" done <<< "$summary" @@ -123,10 +138,10 @@ if [[ -z "$summaries" ]]; then fi log "Generating commit message from $n summary/summaries …" -result=$(printf '%s' "$summaries" | aichat "From these per-file summaries of a jj diff, write a single conventional commit message in English. First line: short imperative summary (max 72 chars). Then a blank line. Then a short paragraph with more detail if needed. Output only the commit message, nothing else.") +result=$(printf '%s' "$summaries" | aichat "From these per-file summaries of a jj diff, write a single conventional commit message in English. First line: short imperative summary (max 72 chars). Then a blank line. Then a short paragraph with more detail if needed. Output only the commit message, nothing else." | _strip_think) ok "Done" printf '\n' >&2 -# Commit message goes to stdout -printf '%s\n' "$result" +# Commit message goes to stdout (strip leading blank lines so jj sees content) +printf '%s\n' "$result" | sed '/./,$!d' diff --git a/src/obikseq/src/lib.rs b/src/obikseq/src/lib.rs index 992a42a..2219b71 100644 --- a/src/obikseq/src/lib.rs +++ b/src/obikseq/src/lib.rs @@ -12,6 +12,7 @@ pub mod kmer; mod revcomp_lookup; /// Routable super-kmer: canonical sequence paired with its minimizer for scatter routing. pub mod routable; +mod sequence; pub mod superkmer; pub mod unitig; @@ -19,4 +20,5 @@ pub mod unitig; pub use annotations::Annotation; pub use kmer::CanonicalKmer; pub use routable::RoutableSuperKmer; +pub use sequence::Sequence; pub use superkmer::SuperKmer; diff --git a/src/obikseq/src/sequence.rs b/src/obikseq/src/sequence.rs index ec23511..b23711e 100644 --- a/src/obikseq/src/sequence.rs +++ b/src/obikseq/src/sequence.rs @@ -1,5 +1,8 @@ +use crate::Annotation; + pub trait Sequence { - fn sequence(&self) -> &[u8]; - fn canonical(&self) -> Self; + fn sequence(&self) -> Box<[u8]>; + fn canonical(&self) -> &Self; fn seq_hash(&self) -> u64; + fn annotation(&self) -> Annotation; } diff --git a/src/obikseq/src/superkmer.rs b/src/obikseq/src/superkmer.rs index 4606d8e..7f14303 100644 --- a/src/obikseq/src/superkmer.rs +++ b/src/obikseq/src/superkmer.rs @@ -1,13 +1,14 @@ //! Compact 2-bit DNA super-kmer with in-place reverse complement and canonical form. use std::io::{self, Write}; +use bitvec::prelude::*; use serde::Serialize; +use xxhash_rust::xxh3::xxh3_64; +use crate::Sequence; use crate::encoding::{DEC4, encode_base}; use crate::kmer::{CanonicalKmer, Kmer, KmerError}; use crate::revcomp_lookup::REVCOMP4; -use bitvec::prelude::*; -use xxhash_rust::xxh3::xxh3_64; // ── SuperKmerHeader ─────────────────────────────────────────────────────────── @@ -53,7 +54,7 @@ impl SuperKmerHeader { } #[derive(Serialize)] -struct CountAnnotation { +struct SKAnnotation { seq_length: usize, kmer_size: usize, minimizer_size: usize, @@ -90,6 +91,22 @@ impl std::hash::Hash for SuperKmer { } } +impl Sequence for SuperKmer { + fn sequence(&self) -> Box<[u8]> { + self.seq.clone() + } + + fn canonical(&self) -> &Self { + &self + } + + /// Returns the XXH3-64 hash of the packed sequence bytes. + fn seq_hash(&self) -> u64 { + xxh3_64(&self.seq) + } + + fn annotation(&self) -> Annotation {} +} impl SuperKmer { /// `seql` is the raw stored byte: 1–255 for lengths 1–255, 0 for length 256. pub fn new(seql: u8, seq: Box<[u8]>) -> Self { @@ -315,11 +332,6 @@ impl SuperKmer { pub fn iter_canonical_kmers(&self, k: usize) -> impl Iterator + '_ { self.iter_kmers(k).map(move |km| km.canonical(k)) } - - /// Returns the XXH3-64 hash of the packed sequence bytes. - pub fn seq_hash(&self) -> u64 { - xxh3_64(&self.seq) - } } struct SKKmerIter<'a> {