fix: strip AI reasoning blocks from commit messages

Adds a `_strip_think` function using `awk` to buffer stdin and track the last `</think>` tag, emitting only the subsequent content. This utility is now piped after `aichat` calls to remove AI reasoning blocks before commit message generation. Also applies minor whitespace and indentation adjustments throughout the script.
This commit is contained in:
Eric Coissac
2026-05-02 18:01:31 +02:00
parent 0b784242cf
commit 602f414957
4 changed files with 65 additions and 33 deletions
+38 -23
View File
@@ -5,7 +5,7 @@
# Summarises each changed file's diff individually, then combines all # Summarises each changed file's diff individually, then combines all
# summaries into a single commit message via aichat. # summaries into a single commit message via aichat.
# REV defaults to `@` (current working copy). Accepts any jj revision: # REV defaults to `@` (current working copy). Accepts any jj revision:
# `@-`, `lk`, a commit ID, a branch name, etc. # `@-`, `lk`, a commit ID, a branch name, etc.
# #
# Typical use: # Typical use:
# jj describe -m "$(jj_commit_msg.sh)" # jj describe -m "$(jj_commit_msg.sh)"
@@ -18,9 +18,24 @@ set -euo pipefail
REV="${1:-@}" REV="${1:-@}"
# Log to stderr so progress doesn't pollute the commit message on stdout # Log to stderr so progress doesn't pollute the commit message on stdout
log() { printf '\033[1;34m==>\033[0m %s\n' "$*" >&2; } log() { printf '\033[1;34m==>\033[0m %s\n' "$*" >&2; }
info() { printf ' \033[0;37m%s\033[0m\n' "$*" >&2; } info() { printf ' \033[0;37m%s\033[0m\n' "$*" >&2; }
ok() { printf ' \033[0;32m✓\033[0m %s\n' "$*" >&2; } ok() { printf ' \033[0;32m✓\033[0m %s\n' "$*" >&2; }
# _strip_think — remove reasoning tags from stdin
# Buffer all input, locate the LAST </think> line, emit only what follows.
# This handles think blocks that themselves contain </think> fragments.
_strip_think() {
awk '{
lines[NR] = $0
print $0 > "/dev/stderr"
if (/^<\/think>/) last_end = NR
}
END {
start = (last_end ? last_end + 1 : 1)
for (i = start; i <= NR; i++) print lines[i]
}'
}
# _readable_diff <file> # _readable_diff <file>
# Returns a human-readable diff for <file>. # Returns a human-readable diff for <file>.
@@ -31,9 +46,9 @@ _readable_diff() {
local file="$1" local file="$1"
local raw_diff local raw_diff
raw_diff=$(jj diff -r "$REV" -- "$file") raw_diff=$(jj diff -r "$REV" -- "$file")
[[ -z "$raw_diff" ]] && return 0 [[ -z "$raw_diff" ]] && return 0
# Detect pathological diff: any +/- content line longer than 500 chars # Detect pathological diff: any +/- content line longer than 500 chars
local max_len local max_len
max_len=$(grep '^[+-]' <<< "$raw_diff" | awk '{ if (length > m) m = length } END { print m+0 }') max_len=$(grep '^[+-]' <<< "$raw_diff" | awk '{ if (length > m) m = length } END { print m+0 }')
@@ -42,40 +57,40 @@ _readable_diff() {
return return
fi fi
# Pretty-print strategy per extension # Pretty-print strategy per extension
local ext="${file##*.}" local ext="${file##*.}"
local pretty_old pretty_new local pretty_old pretty_new
case "$ext" in case "$ext" in
json) json)
pretty_old=$(jj file show -r "$REV@-" -- "$file" 2>/dev/null | python3 -m json.tool 2>/dev/null || true) pretty_old=$(jj file show -r "$REV@-" -- "$file" 2>/dev/null | python3 -m json.tool 2>/dev/null || true)
pretty_new=$(jj file show -r "$REV" -- "$file" 2>/dev/null | python3 -m json.tool 2>/dev/null || true) pretty_new=$(jj file show -r "$REV" -- "$file" 2>/dev/null | python3 -m json.tool 2>/dev/null || true)
;; ;;
js|mjs|cjs|css|ts) js|mjs|cjs|css|ts)
local node_fmt=' local node_fmt='
const chunks = []; const chunks = [];
process.stdin.on("data", d => chunks.push(d)); process.stdin.on("data", d => chunks.push(d));
process.stdin.on("end", () => { process.stdin.on("end", () => {
const src = chunks.join(""); const src = chunks.join("");
// Insert newline before { } ( ) ; and after , // Insert newline before { } ( ) ; and after ,
const out = src const out = src
.replace(/([{(])/g, "$1\n ") .replace(/([{(])/g, "$1\n ")
.replace(/([;}])/g, "\n$1\n") .replace(/([;}])/g, "\n$1\n")
.replace(/,\s*/g, ",\n "); .replace(/,\s*/g, ",\n ");
process.stdout.write(out); process.stdout.write(out);
});' });'
pretty_old=$(jj file show -r "$REV@-" -- "$file" 2>/dev/null | node -e "$node_fmt" 2>/dev/null || true) pretty_old=$(jj file show -r "$REV@-" -- "$file" 2>/dev/null | node -e "$node_fmt" 2>/dev/null || true)
pretty_new=$(jj file show -r "$REV" -- "$file" 2>/dev/null | node -e "$node_fmt" 2>/dev/null || true) pretty_new=$(jj file show -r "$REV" -- "$file" 2>/dev/null | node -e "$node_fmt" 2>/dev/null || true)
;; ;;
*) *)
# Generic fallback: fold long lines at 120 chars # Generic fallback: fold long lines at 120 chars
pretty_old=$(jj file show -r "$REV@-" -- "$file" 2>/dev/null | fold -s -w 120 || true) pretty_old=$(jj file show -r "$REV@-" -- "$file" 2>/dev/null | fold -s -w 120 || true)
pretty_new=$(jj file show -r "$REV" -- "$file" 2>/dev/null | fold -s -w 120 || true) pretty_new=$(jj file show -r "$REV" -- "$file" 2>/dev/null | fold -s -w 120 || true)
;; ;;
esac esac
if [[ -n "$pretty_old" && -n "$pretty_new" ]]; then if [[ -n "$pretty_old" && -n "$pretty_new" ]]; then
diff <(printf '%s\n' "$pretty_old") <(printf '%s\n' "$pretty_new") \ diff <(printf '%s\n' "$pretty_old") <(printf '%s\n' "$pretty_new") \
--label "a/${file}" --label "b/${file}" -u || true --label "a/${file}" --label "b/${file}" -u || true
else else
printf '%s' "$raw_diff" printf '%s' "$raw_diff"
fi fi
@@ -104,9 +119,9 @@ while IFS= read -r file; do
n=$((n + 1)) n=$((n + 1))
log "[$n/$file_count] Summarising $file" log "[$n/$file_count] Summarising $file"
summary=$(printf '%s' "$diff" | aichat "In 2-3 lines, summarise what this diff changes in the file '$file'. Be concise and technical.") summary=$(printf '%s' "$diff" | aichat "In 2-3 lines, summarise what this diff changes in the file '$file'. Be concise and technical." | _strip_think)
# Print the summary indented to stderr # Print the summary indented to stderr
while IFS= read -r line; do while IFS= read -r line; do
info "$line" info "$line"
done <<< "$summary" done <<< "$summary"
@@ -123,10 +138,10 @@ if [[ -z "$summaries" ]]; then
fi fi
log "Generating commit message from $n summary/summaries …" log "Generating commit message from $n summary/summaries …"
result=$(printf '%s' "$summaries" | aichat "From these per-file summaries of a jj diff, write a single conventional commit message in English. First line: short imperative summary (max 72 chars). Then a blank line. Then a short paragraph with more detail if needed. Output only the commit message, nothing else.") result=$(printf '%s' "$summaries" | aichat "From these per-file summaries of a jj diff, write a single conventional commit message in English. First line: short imperative summary (max 72 chars). Then a blank line. Then a short paragraph with more detail if needed. Output only the commit message, nothing else." | _strip_think)
ok "Done" ok "Done"
printf '\n' >&2 printf '\n' >&2
# Commit message goes to stdout # Commit message goes to stdout (strip leading blank lines so jj sees content)
printf '%s\n' "$result" printf '%s\n' "$result" | sed '/./,$!d'
+2
View File
@@ -12,6 +12,7 @@ pub mod kmer;
mod revcomp_lookup; mod revcomp_lookup;
/// Routable super-kmer: canonical sequence paired with its minimizer for scatter routing. /// Routable super-kmer: canonical sequence paired with its minimizer for scatter routing.
pub mod routable; pub mod routable;
mod sequence;
pub mod superkmer; pub mod superkmer;
pub mod unitig; pub mod unitig;
@@ -19,4 +20,5 @@ pub mod unitig;
pub use annotations::Annotation; pub use annotations::Annotation;
pub use kmer::CanonicalKmer; pub use kmer::CanonicalKmer;
pub use routable::RoutableSuperKmer; pub use routable::RoutableSuperKmer;
pub use sequence::Sequence;
pub use superkmer::SuperKmer; pub use superkmer::SuperKmer;
+5 -2
View File
@@ -1,5 +1,8 @@
use crate::Annotation;
pub trait Sequence { pub trait Sequence {
fn sequence(&self) -> &[u8]; fn sequence(&self) -> Box<[u8]>;
fn canonical(&self) -> Self; fn canonical(&self) -> &Self;
fn seq_hash(&self) -> u64; fn seq_hash(&self) -> u64;
fn annotation(&self) -> Annotation;
} }
+20 -8
View File
@@ -1,13 +1,14 @@
//! Compact 2-bit DNA super-kmer with in-place reverse complement and canonical form. //! Compact 2-bit DNA super-kmer with in-place reverse complement and canonical form.
use std::io::{self, Write}; use std::io::{self, Write};
use bitvec::prelude::*;
use serde::Serialize; use serde::Serialize;
use xxhash_rust::xxh3::xxh3_64;
use crate::Sequence;
use crate::encoding::{DEC4, encode_base}; use crate::encoding::{DEC4, encode_base};
use crate::kmer::{CanonicalKmer, Kmer, KmerError}; use crate::kmer::{CanonicalKmer, Kmer, KmerError};
use crate::revcomp_lookup::REVCOMP4; use crate::revcomp_lookup::REVCOMP4;
use bitvec::prelude::*;
use xxhash_rust::xxh3::xxh3_64;
// ── SuperKmerHeader ─────────────────────────────────────────────────────────── // ── SuperKmerHeader ───────────────────────────────────────────────────────────
@@ -53,7 +54,7 @@ impl SuperKmerHeader {
} }
#[derive(Serialize)] #[derive(Serialize)]
struct CountAnnotation { struct SKAnnotation {
seq_length: usize, seq_length: usize,
kmer_size: usize, kmer_size: usize,
minimizer_size: usize, minimizer_size: usize,
@@ -90,6 +91,22 @@ impl std::hash::Hash for SuperKmer {
} }
} }
impl Sequence for SuperKmer {
fn sequence(&self) -> Box<[u8]> {
self.seq.clone()
}
fn canonical(&self) -> &Self {
&self
}
/// Returns the XXH3-64 hash of the packed sequence bytes.
fn seq_hash(&self) -> u64 {
xxh3_64(&self.seq)
}
fn annotation(&self) -> Annotation {}
}
impl SuperKmer { impl SuperKmer {
/// `seql` is the raw stored byte: 1255 for lengths 1255, 0 for length 256. /// `seql` is the raw stored byte: 1255 for lengths 1255, 0 for length 256.
pub fn new(seql: u8, seq: Box<[u8]>) -> Self { pub fn new(seql: u8, seq: Box<[u8]>) -> Self {
@@ -315,11 +332,6 @@ impl SuperKmer {
pub fn iter_canonical_kmers(&self, k: usize) -> impl Iterator<Item = CanonicalKmer> + '_ { pub fn iter_canonical_kmers(&self, k: usize) -> impl Iterator<Item = CanonicalKmer> + '_ {
self.iter_kmers(k).map(move |km| km.canonical(k)) self.iter_kmers(k).map(move |km| km.canonical(k))
} }
/// Returns the XXH3-64 hash of the packed sequence bytes.
pub fn seq_hash(&self) -> u64 {
xxh3_64(&self.seq)
}
} }
struct SKKmerIter<'a> { struct SKKmerIter<'a> {