fix: strip AI reasoning blocks from commit messages
Adds a `_strip_think` function using `awk` to buffer stdin and track the last `</think>` tag, emitting only the subsequent content. This utility is now piped after `aichat` calls to remove AI reasoning blocks before commit message generation. Also applies minor whitespace and indentation adjustments throughout the script.
This commit is contained in:
+38
-23
@@ -5,7 +5,7 @@
|
|||||||
# Summarises each changed file's diff individually, then combines all
|
# Summarises each changed file's diff individually, then combines all
|
||||||
# summaries into a single commit message via aichat.
|
# summaries into a single commit message via aichat.
|
||||||
# REV defaults to `@` (current working copy). Accepts any jj revision:
|
# REV defaults to `@` (current working copy). Accepts any jj revision:
|
||||||
# `@-`, `lk`, a commit ID, a branch name, etc.
|
# `@-`, `lk`, a commit ID, a branch name, etc.
|
||||||
#
|
#
|
||||||
# Typical use:
|
# Typical use:
|
||||||
# jj describe -m "$(jj_commit_msg.sh)"
|
# jj describe -m "$(jj_commit_msg.sh)"
|
||||||
@@ -18,9 +18,24 @@ set -euo pipefail
|
|||||||
REV="${1:-@}"
|
REV="${1:-@}"
|
||||||
|
|
||||||
# Log to stderr so progress doesn't pollute the commit message on stdout
|
# Log to stderr so progress doesn't pollute the commit message on stdout
|
||||||
log() { printf '\033[1;34m==>\033[0m %s\n' "$*" >&2; }
|
log() { printf '\033[1;34m==>\033[0m %s\n' "$*" >&2; }
|
||||||
info() { printf ' \033[0;37m%s\033[0m\n' "$*" >&2; }
|
info() { printf ' \033[0;37m%s\033[0m\n' "$*" >&2; }
|
||||||
ok() { printf ' \033[0;32m✓\033[0m %s\n' "$*" >&2; }
|
ok() { printf ' \033[0;32m✓\033[0m %s\n' "$*" >&2; }
|
||||||
|
|
||||||
|
# _strip_think — remove reasoning tags from stdin
|
||||||
|
# Buffer all input, locate the LAST </think> line, emit only what follows.
|
||||||
|
# This handles think blocks that themselves contain </think> fragments.
|
||||||
|
_strip_think() {
|
||||||
|
awk '{
|
||||||
|
lines[NR] = $0
|
||||||
|
print $0 > "/dev/stderr"
|
||||||
|
if (/^<\/think>/) last_end = NR
|
||||||
|
}
|
||||||
|
END {
|
||||||
|
start = (last_end ? last_end + 1 : 1)
|
||||||
|
for (i = start; i <= NR; i++) print lines[i]
|
||||||
|
}'
|
||||||
|
}
|
||||||
|
|
||||||
# _readable_diff <file>
|
# _readable_diff <file>
|
||||||
# Returns a human-readable diff for <file>.
|
# Returns a human-readable diff for <file>.
|
||||||
@@ -31,9 +46,9 @@ _readable_diff() {
|
|||||||
local file="$1"
|
local file="$1"
|
||||||
local raw_diff
|
local raw_diff
|
||||||
raw_diff=$(jj diff -r "$REV" -- "$file")
|
raw_diff=$(jj diff -r "$REV" -- "$file")
|
||||||
[[ -z "$raw_diff" ]] && return 0
|
[[ -z "$raw_diff" ]] && return 0
|
||||||
|
|
||||||
# Detect pathological diff: any +/- content line longer than 500 chars
|
# Detect pathological diff: any +/- content line longer than 500 chars
|
||||||
local max_len
|
local max_len
|
||||||
max_len=$(grep '^[+-]' <<< "$raw_diff" | awk '{ if (length > m) m = length } END { print m+0 }')
|
max_len=$(grep '^[+-]' <<< "$raw_diff" | awk '{ if (length > m) m = length } END { print m+0 }')
|
||||||
|
|
||||||
@@ -42,40 +57,40 @@ _readable_diff() {
|
|||||||
return
|
return
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Pretty-print strategy per extension
|
# Pretty-print strategy per extension
|
||||||
local ext="${file##*.}"
|
local ext="${file##*.}"
|
||||||
local pretty_old pretty_new
|
local pretty_old pretty_new
|
||||||
case "$ext" in
|
case "$ext" in
|
||||||
json)
|
json)
|
||||||
pretty_old=$(jj file show -r "$REV@-" -- "$file" 2>/dev/null | python3 -m json.tool 2>/dev/null || true)
|
pretty_old=$(jj file show -r "$REV@-" -- "$file" 2>/dev/null | python3 -m json.tool 2>/dev/null || true)
|
||||||
pretty_new=$(jj file show -r "$REV" -- "$file" 2>/dev/null | python3 -m json.tool 2>/dev/null || true)
|
pretty_new=$(jj file show -r "$REV" -- "$file" 2>/dev/null | python3 -m json.tool 2>/dev/null || true)
|
||||||
;;
|
;;
|
||||||
js|mjs|cjs|css|ts)
|
js|mjs|cjs|css|ts)
|
||||||
local node_fmt='
|
local node_fmt='
|
||||||
const chunks = [];
|
const chunks = [];
|
||||||
process.stdin.on("data", d => chunks.push(d));
|
process.stdin.on("data", d => chunks.push(d));
|
||||||
process.stdin.on("end", () => {
|
process.stdin.on("end", () => {
|
||||||
const src = chunks.join("");
|
const src = chunks.join("");
|
||||||
// Insert newline before { } ( ) ; and after ,
|
// Insert newline before { } ( ) ; and after ,
|
||||||
const out = src
|
const out = src
|
||||||
.replace(/([{(])/g, "$1\n ")
|
.replace(/([{(])/g, "$1\n ")
|
||||||
.replace(/([;}])/g, "\n$1\n")
|
.replace(/([;}])/g, "\n$1\n")
|
||||||
.replace(/,\s*/g, ",\n ");
|
.replace(/,\s*/g, ",\n ");
|
||||||
process.stdout.write(out);
|
process.stdout.write(out);
|
||||||
});'
|
});'
|
||||||
pretty_old=$(jj file show -r "$REV@-" -- "$file" 2>/dev/null | node -e "$node_fmt" 2>/dev/null || true)
|
pretty_old=$(jj file show -r "$REV@-" -- "$file" 2>/dev/null | node -e "$node_fmt" 2>/dev/null || true)
|
||||||
pretty_new=$(jj file show -r "$REV" -- "$file" 2>/dev/null | node -e "$node_fmt" 2>/dev/null || true)
|
pretty_new=$(jj file show -r "$REV" -- "$file" 2>/dev/null | node -e "$node_fmt" 2>/dev/null || true)
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
# Generic fallback: fold long lines at 120 chars
|
# Generic fallback: fold long lines at 120 chars
|
||||||
pretty_old=$(jj file show -r "$REV@-" -- "$file" 2>/dev/null | fold -s -w 120 || true)
|
pretty_old=$(jj file show -r "$REV@-" -- "$file" 2>/dev/null | fold -s -w 120 || true)
|
||||||
pretty_new=$(jj file show -r "$REV" -- "$file" 2>/dev/null | fold -s -w 120 || true)
|
pretty_new=$(jj file show -r "$REV" -- "$file" 2>/dev/null | fold -s -w 120 || true)
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
if [[ -n "$pretty_old" && -n "$pretty_new" ]]; then
|
if [[ -n "$pretty_old" && -n "$pretty_new" ]]; then
|
||||||
diff <(printf '%s\n' "$pretty_old") <(printf '%s\n' "$pretty_new") \
|
diff <(printf '%s\n' "$pretty_old") <(printf '%s\n' "$pretty_new") \
|
||||||
--label "a/${file}" --label "b/${file}" -u || true
|
--label "a/${file}" --label "b/${file}" -u || true
|
||||||
else
|
else
|
||||||
printf '%s' "$raw_diff"
|
printf '%s' "$raw_diff"
|
||||||
fi
|
fi
|
||||||
@@ -104,9 +119,9 @@ while IFS= read -r file; do
|
|||||||
n=$((n + 1))
|
n=$((n + 1))
|
||||||
log "[$n/$file_count] Summarising $file …"
|
log "[$n/$file_count] Summarising $file …"
|
||||||
|
|
||||||
summary=$(printf '%s' "$diff" | aichat "In 2-3 lines, summarise what this diff changes in the file '$file'. Be concise and technical.")
|
summary=$(printf '%s' "$diff" | aichat "In 2-3 lines, summarise what this diff changes in the file '$file'. Be concise and technical." | _strip_think)
|
||||||
|
|
||||||
# Print the summary indented to stderr
|
# Print the summary indented to stderr
|
||||||
while IFS= read -r line; do
|
while IFS= read -r line; do
|
||||||
info "$line"
|
info "$line"
|
||||||
done <<< "$summary"
|
done <<< "$summary"
|
||||||
@@ -123,10 +138,10 @@ if [[ -z "$summaries" ]]; then
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
log "Generating commit message from $n summary/summaries …"
|
log "Generating commit message from $n summary/summaries …"
|
||||||
result=$(printf '%s' "$summaries" | aichat "From these per-file summaries of a jj diff, write a single conventional commit message in English. First line: short imperative summary (max 72 chars). Then a blank line. Then a short paragraph with more detail if needed. Output only the commit message, nothing else.")
|
result=$(printf '%s' "$summaries" | aichat "From these per-file summaries of a jj diff, write a single conventional commit message in English. First line: short imperative summary (max 72 chars). Then a blank line. Then a short paragraph with more detail if needed. Output only the commit message, nothing else." | _strip_think)
|
||||||
|
|
||||||
ok "Done"
|
ok "Done"
|
||||||
printf '\n' >&2
|
printf '\n' >&2
|
||||||
|
|
||||||
# Commit message goes to stdout
|
# Commit message goes to stdout (strip leading blank lines so jj sees content)
|
||||||
printf '%s\n' "$result"
|
printf '%s\n' "$result" | sed '/./,$!d'
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ pub mod kmer;
|
|||||||
mod revcomp_lookup;
|
mod revcomp_lookup;
|
||||||
/// Routable super-kmer: canonical sequence paired with its minimizer for scatter routing.
|
/// Routable super-kmer: canonical sequence paired with its minimizer for scatter routing.
|
||||||
pub mod routable;
|
pub mod routable;
|
||||||
|
mod sequence;
|
||||||
pub mod superkmer;
|
pub mod superkmer;
|
||||||
|
|
||||||
pub mod unitig;
|
pub mod unitig;
|
||||||
@@ -19,4 +20,5 @@ pub mod unitig;
|
|||||||
pub use annotations::Annotation;
|
pub use annotations::Annotation;
|
||||||
pub use kmer::CanonicalKmer;
|
pub use kmer::CanonicalKmer;
|
||||||
pub use routable::RoutableSuperKmer;
|
pub use routable::RoutableSuperKmer;
|
||||||
|
pub use sequence::Sequence;
|
||||||
pub use superkmer::SuperKmer;
|
pub use superkmer::SuperKmer;
|
||||||
|
|||||||
@@ -1,5 +1,8 @@
|
|||||||
|
use crate::Annotation;
|
||||||
|
|
||||||
pub trait Sequence {
|
pub trait Sequence {
|
||||||
fn sequence(&self) -> &[u8];
|
fn sequence(&self) -> Box<[u8]>;
|
||||||
fn canonical(&self) -> Self;
|
fn canonical(&self) -> &Self;
|
||||||
fn seq_hash(&self) -> u64;
|
fn seq_hash(&self) -> u64;
|
||||||
|
fn annotation(&self) -> Annotation;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,13 +1,14 @@
|
|||||||
//! Compact 2-bit DNA super-kmer with in-place reverse complement and canonical form.
|
//! Compact 2-bit DNA super-kmer with in-place reverse complement and canonical form.
|
||||||
use std::io::{self, Write};
|
use std::io::{self, Write};
|
||||||
|
|
||||||
|
use bitvec::prelude::*;
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
|
use xxhash_rust::xxh3::xxh3_64;
|
||||||
|
|
||||||
|
use crate::Sequence;
|
||||||
use crate::encoding::{DEC4, encode_base};
|
use crate::encoding::{DEC4, encode_base};
|
||||||
use crate::kmer::{CanonicalKmer, Kmer, KmerError};
|
use crate::kmer::{CanonicalKmer, Kmer, KmerError};
|
||||||
use crate::revcomp_lookup::REVCOMP4;
|
use crate::revcomp_lookup::REVCOMP4;
|
||||||
use bitvec::prelude::*;
|
|
||||||
use xxhash_rust::xxh3::xxh3_64;
|
|
||||||
|
|
||||||
// ── SuperKmerHeader ───────────────────────────────────────────────────────────
|
// ── SuperKmerHeader ───────────────────────────────────────────────────────────
|
||||||
|
|
||||||
@@ -53,7 +54,7 @@ impl SuperKmerHeader {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize)]
|
#[derive(Serialize)]
|
||||||
struct CountAnnotation {
|
struct SKAnnotation {
|
||||||
seq_length: usize,
|
seq_length: usize,
|
||||||
kmer_size: usize,
|
kmer_size: usize,
|
||||||
minimizer_size: usize,
|
minimizer_size: usize,
|
||||||
@@ -90,6 +91,22 @@ impl std::hash::Hash for SuperKmer {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Sequence for SuperKmer {
|
||||||
|
fn sequence(&self) -> Box<[u8]> {
|
||||||
|
self.seq.clone()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn canonical(&self) -> &Self {
|
||||||
|
&self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the XXH3-64 hash of the packed sequence bytes.
|
||||||
|
fn seq_hash(&self) -> u64 {
|
||||||
|
xxh3_64(&self.seq)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn annotation(&self) -> Annotation {}
|
||||||
|
}
|
||||||
impl SuperKmer {
|
impl SuperKmer {
|
||||||
/// `seql` is the raw stored byte: 1–255 for lengths 1–255, 0 for length 256.
|
/// `seql` is the raw stored byte: 1–255 for lengths 1–255, 0 for length 256.
|
||||||
pub fn new(seql: u8, seq: Box<[u8]>) -> Self {
|
pub fn new(seql: u8, seq: Box<[u8]>) -> Self {
|
||||||
@@ -315,11 +332,6 @@ impl SuperKmer {
|
|||||||
pub fn iter_canonical_kmers(&self, k: usize) -> impl Iterator<Item = CanonicalKmer> + '_ {
|
pub fn iter_canonical_kmers(&self, k: usize) -> impl Iterator<Item = CanonicalKmer> + '_ {
|
||||||
self.iter_kmers(k).map(move |km| km.canonical(k))
|
self.iter_kmers(k).map(move |km| km.canonical(k))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the XXH3-64 hash of the packed sequence bytes.
|
|
||||||
pub fn seq_hash(&self) -> u64 {
|
|
||||||
xxh3_64(&self.seq)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
struct SKKmerIter<'a> {
|
struct SKKmerIter<'a> {
|
||||||
|
|||||||
Reference in New Issue
Block a user