Release 4.4.44

feat: add which_max and which_min to retrieve extreme element indices
Implement reflection-based WhichMax and WhichMin to dynamically find the index or key of the maximum/minimum element in slices, arrays, or maps. Functions validate orderability, handle empty collections, and dispatch via reflect.Kind. Expose as which_max and which_min GVal functions, with float64 type assertions for compatibility and preserved error handling.
2026-06-24 17:51:00 +00:00 · 2026-06-02 14:40:09 +02:00 · 2026-06-02 14:39:31 +02:00 · 2026-06-02 14:34:00 +02:00 · 2026-06-01 14:14:40 +02:00 · 2026-06-01 13:22:58 +02:00
22 changed files with 926 additions and 114 deletions
@@ -37,6 +37,11 @@ func main() {
 	optionParser(os.Args)
 	if !obipairing.CLIHasPairedFiles() {
 		log.Error("You must provide both a forward file (-F) and a reverse file (-R)")
 		os.Exit(1)
 	}
 	obidefault.SetStrictReadWorker(2)
 	obidefault.SetStrictWriteWorker(2)
 	pairs, err := obipairing.CLIPairedSequence()
@@ -1,6 +1,7 @@
 package main
 import (
 	"fmt"
 	"os"
 	log "github.com/sirupsen/logrus"
@@ -8,6 +9,7 @@ import (
 	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
 	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
 	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
 	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obimultiplex"
 	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obipairing"
 	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obitagpcr"
 	"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
@@ -39,6 +41,17 @@ func main() {
 		obitagpcr.OptionSet)
 	optionParser(os.Args)
 	if obimultiplex.CLIAskConfigTemplate() {
 		fmt.Print(obimultiplex.CLIConfigTemplate())
 		os.Exit(0)
 	}
 	if !obipairing.CLIHasPairedFiles() {
 		log.Error("You must provide both a forward file (-F) and a reverse file (-R)")
 		os.Exit(1)
 	}
 	pairs, err := obipairing.CLIPairedSequence()
 	if err != nil {
@@ -0,0 +1,24 @@
 >HELIUM_000100422_612GNAAXX:7:118:3572:14633#0/1_sub[28..126] {"count":10172,"merged_sample":{"26a_F040644":10172},"obitag_bestid":0.9797979797979798,"obitag_bestmatch":"AY227529","obitag_match_count":1,"obitag_rank":"genus","obitag_similarity_method":"lcs","taxid":"taxon:9992 [Marmota]@genus"}
 ttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaaca
 gcctgaaactcaaaggacttggcggtgctttacatccct
 >HELIUM_000100422_612GNAAXX:7:99:9351:13090#0/1_sub[28..127] {"count":260,"merged_sample":{"29a_F260619":260},"obitag_bestid":0.9405940594059405,"obitag_bestmatch":"AF154263","obitag_match_count":9,"obitag_rank":"infraorder","obitag_similarity_method":"lcs","taxid":"taxon:35500 [Pecora]@infraorder"}
 ttagccctaaacacaaataattacacaaacaaaattgttcaccagagtactagcggcaac
 agcttaaaactcaaaggacttggcggtgctttataccctt
 >HELIUM_000100422_612GNAAXX:7:108:10111:9078#0/1_sub[28..127] {"count":7146,"merged_sample":{"13a_F730603":7146},"obitag_bestid":1,"obitag_bestmatch":"AB245427","obitag_match_count":1,"obitag_rank":"species","obitag_similarity_method":"lcs","taxid":"taxon:9860 [Cervus elaphus]@species"}
 ctagccttaaacacaaatagttatgcaaacaaaactattcgccagagtactaccggcaat
 agcttaaaactcaaaggacttggcggtgctttataccctt
 >HELIUM_000100422_612GNAAXX:7:38:14204:12725#0/1_sub[28..126] {"count":87,"merged_sample":{"26a_F040644":87},"obitag_bestid":0.9494949494949495,"obitag_bestmatch":"AY227530","obitag_match_count":2,"obitag_rank":"tribe","obitag_similarity_method":"lcs","taxid":"taxon:337730 [Marmotini]@tribe"}
 ttagccctaaacataaacattcaataaacaagaatgttcgccagaggactactagcaata
 gcttaaaactcaaaggacttggcggtgctttatatccct
 >HELIUM_000100422_612GNAAXX:7:30:9942:4495#0/1_sub[28..126] {"count":95,"merged_sample":{"26a_F040644":11,"29a_F260619":84},"obitag_bestid":0.9595959595959596,"obitag_bestmatch":"AC187326","obitag_match_count":1,"obitag_rank":"subspecies","obitag_similarity_method":"lcs","taxid":"taxon:9615 [Canis lupus familiaris]@subspecies"}
 ttagccctaaacataagctattccataacaaaataattcgccagagaactactagcaaca
 gattaaacctcaaaggacttggcagtgctttatacccct
 >HELIUM_000100422_612GNAAXX:7:51:16702:19393#0/1_sub[28..127] {"count":12004,"merged_sample":{"15a_F730814":7465,"29a_F260619":4539},"obitag_bestid":1,"obitag_bestmatch":"AJ885202","obitag_match_count":1,"obitag_rank":"species","obitag_similarity_method":"lcs","taxid":"taxon:9858 [Capreolus capreolus]@species"}
 ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaat
 agcttaaaactcaaaggacttggcggtgctttataccctt
 >HELIUM_000100422_612GNAAXX:7:84:14502:1617#0/1_sub[28..127] {"count":319,"merged_sample":{"29a_F260619":319},"obitag_bestid":1,"obitag_bestmatch":"AJ972683","obitag_match_count":1,"obitag_rank":"species","obitag_similarity_method":"lcs","taxid":"taxon:9858 [Capreolus capreolus]@species"}
 ttagccctaaacacaagtaattattataacaaaattattcgccagagtactaccggcaat
 agcttaaaactcaaaggacttggcggtgctttataccctt
 >HELIUM_000100422_612GNAAXX:7:50:10637:6527#0/1_sub[28..126] {"count":366,"merged_sample":{"13a_F730603":13,"15a_F730814":5,"26a_F040644":347,"29a_F260619":1},"obitag_bestid":1,"obitag_bestmatch":"AB048590","obitag_match_count":1,"obitag_rank":"genus","obitag_similarity_method":"lcs","taxid":"taxon:9611 [Canis]@genus"}
 ttagccctaaacatagataattttacaacaaaataattcgccagaggactactagcaata
 gcttaaaactcaaaggacttggcggtgctttatatccct
@@ -0,0 +1,48 @@
 taxid,parent,taxonomic_rank,scientific_name
 taxon:1 [root]@no rank,taxon:1 [root]@no rank,no rank,root
 taxon:131567 [cellular organisms]@cellular root,taxon:1 [root]@no rank,cellular root,cellular organisms
 taxon:2759 [Eukaryota]@domain,taxon:131567 [cellular organisms]@cellular root,domain,Eukaryota
 taxon:33154 [Opisthokonta]@clade,taxon:2759 [Eukaryota]@domain,clade,Opisthokonta
 taxon:33208 [Metazoa]@kingdom,taxon:33154 [Opisthokonta]@clade,kingdom,Metazoa
 taxon:6072 [Eumetazoa]@clade,taxon:33208 [Metazoa]@kingdom,clade,Eumetazoa
 taxon:33213 [Bilateria]@clade,taxon:6072 [Eumetazoa]@clade,clade,Bilateria
 taxon:33511 [Deuterostomia]@clade,taxon:33213 [Bilateria]@clade,clade,Deuterostomia
 taxon:7711 [Chordata]@phylum,taxon:33511 [Deuterostomia]@clade,phylum,Chordata
 taxon:89593 [Craniata]@subphylum,taxon:7711 [Chordata]@phylum,subphylum,Craniata
 taxon:7742 [Vertebrata]@clade,taxon:89593 [Craniata]@subphylum,clade,Vertebrata
 taxon:7776 [Gnathostomata]@clade,taxon:7742 [Vertebrata]@clade,clade,Gnathostomata
 taxon:117570 [Teleostomi]@clade,taxon:7776 [Gnathostomata]@clade,clade,Teleostomi
 taxon:117571 [Euteleostomi]@clade,taxon:117570 [Teleostomi]@clade,clade,Euteleostomi
 taxon:8287 [Sarcopterygii]@superclass,taxon:117571 [Euteleostomi]@clade,superclass,Sarcopterygii
 taxon:1338369 [Dipnotetrapodomorpha]@clade,taxon:8287 [Sarcopterygii]@superclass,clade,Dipnotetrapodomorpha
 taxon:32523 [Tetrapoda]@clade,taxon:1338369 [Dipnotetrapodomorpha]@clade,clade,Tetrapoda
 taxon:32524 [Amniota]@clade,taxon:32523 [Tetrapoda]@clade,clade,Amniota
 taxon:40674 [Mammalia]@class,taxon:32524 [Amniota]@clade,class,Mammalia
 taxon:32525 [Theria]@clade,taxon:40674 [Mammalia]@class,clade,Theria
 taxon:9347 [Eutheria]@clade,taxon:32525 [Theria]@clade,clade,Eutheria
 taxon:1437010 [Boreoeutheria]@clade,taxon:9347 [Eutheria]@clade,clade,Boreoeutheria
 taxon:314146 [Euarchontoglires]@superorder,taxon:1437010 [Boreoeutheria]@clade,superorder,Euarchontoglires
 taxon:314145 [Laurasiatheria]@superorder,taxon:1437010 [Boreoeutheria]@clade,superorder,Laurasiatheria
 taxon:33554 [Carnivora]@order,taxon:314145 [Laurasiatheria]@superorder,order,Carnivora
 taxon:91561 [Artiodactyla]@order,taxon:314145 [Laurasiatheria]@superorder,order,Artiodactyla
 taxon:314147 [Glires]@clade,taxon:314146 [Euarchontoglires]@superorder,clade,Glires
 taxon:9845 [Ruminantia]@suborder,taxon:91561 [Artiodactyla]@order,suborder,Ruminantia
 taxon:35500 [Pecora]@infraorder,taxon:9845 [Ruminantia]@suborder,infraorder,Pecora
 taxon:9989 [Rodentia]@order,taxon:314147 [Glires]@clade,order,Rodentia
 taxon:379584 [Caniformia]@suborder,taxon:33554 [Carnivora]@order,suborder,Caniformia
 taxon:9608 [Canidae]@family,taxon:379584 [Caniformia]@suborder,family,Canidae
 taxon:9850 [Cervidae]@family,taxon:35500 [Pecora]@infraorder,family,Cervidae
 taxon:9881 [Odocoileinae]@subfamily,taxon:9850 [Cervidae]@family,subfamily,Odocoileinae
 taxon:33553 [Sciuromorpha]@suborder,taxon:9989 [Rodentia]@order,suborder,Sciuromorpha
 taxon:55153 [Sciuridae]@family,taxon:33553 [Sciuromorpha]@suborder,family,Sciuridae
 taxon:34878 [Cervinae]@subfamily,taxon:9850 [Cervidae]@family,subfamily,Cervinae
 taxon:9611 [Canis]@genus,taxon:9608 [Canidae]@family,genus,Canis
 taxon:9857 [Capreolus]@genus,taxon:9881 [Odocoileinae]@subfamily,genus,Capreolus
 taxon:9612 [Canis lupus]@species,taxon:9611 [Canis]@genus,species,Canis lupus
 taxon:337726 [Xerinae]@subfamily,taxon:55153 [Sciuridae]@family,subfamily,Xerinae
 taxon:9859 [Cervus]@genus,taxon:34878 [Cervinae]@subfamily,genus,Cervus
 taxon:337730 [Marmotini]@tribe,taxon:337726 [Xerinae]@subfamily,tribe,Marmotini
 taxon:9992 [Marmota]@genus,taxon:337730 [Marmotini]@tribe,genus,Marmota
 taxon:9860 [Cervus elaphus]@species,taxon:9859 [Cervus]@genus,species,Cervus elaphus
 taxon:9615 [Canis lupus familiaris]@subspecies,taxon:9612 [Canis lupus]@species,subspecies,Canis lupus familiaris
 taxon:9858 [Capreolus capreolus]@species,taxon:9857 [Capreolus]@genus,species,Capreolus capreolus
@@ -44,7 +44,7 @@ cleanup() {
    rm -rf "$TMPDIR"  # Suppress the temporary directory
    if [ $failed -gt 0 ]; then
-       log "$TEST_NAME tests failed" 
+       log "$TEST_NAME tests failed"
        log
        log
       exit 1
@@ -60,10 +60,10 @@ log() {
    echo -e "[$TEST_NAME @ $(date)] $*" 1>&2
 }
-log "Testing $TEST_NAME..." 
+log "Testing $TEST_NAME..."
-log "Test directory is $TEST_DIR" 
+log "Test directory is $TEST_DIR"
-log "obitools directory is $OBITOOLS_DIR" 
+log "obitools directory is $OBITOOLS_DIR"
-log "Temporary directory is $TMPDIR" 
+log "Temporary directory is $TMPDIR"
 log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)"
 ######################################################################
@@ -94,12 +94,12 @@ log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)"
 ((ntest++))
-if $CMD -h > "${TMPDIR}/help.txt" 2>&1 
+if $CMD -h > "${TMPDIR}/help.txt" 2>&1
 then
-    log "$MCMD: printing help OK" 
+    log "$MCMD: printing help OK"
    ((success++))
 else
-    log "$MCMD: printing help failed" 
+    log "$MCMD: printing help failed"
    ((failed++))
 fi
@@ -108,15 +108,15 @@ fi
 if obiconvert -Z "${TEST_DIR}/gbpln1088.4Mb.fasta.gz" \
                 > "${TMPDIR}/xxx.fasta.gz" && \
   zdiff "${TEST_DIR}/gbpln1088.4Mb.fasta.gz" \
-                 "${TMPDIR}/xxx.fasta.gz" 
+                 "${TMPDIR}/xxx.fasta.gz"
 then
-    log "$MCMD: converting large fasta file to fasta OK" 
+    log "$MCMD: converting large fasta file to fasta OK"
    ((success++))
 else
-    log "$MCMD: converting large fasta file to fasta failed" 
+    log "$MCMD: converting large fasta file to fasta failed"
    ((failed++))
 fi
- 
+
 ((ntest++))
 if obiconvert -Z --fastq-output \
              "${TEST_DIR}/gbpln1088.4Mb.fasta.gz" \
@@ -125,15 +125,139 @@ if obiconvert -Z --fastq-output \
              "${TMPDIR}/xxx.fastq.gz" \
              > "${TMPDIR}/yyy.fasta.gz" && \
   zdiff "${TEST_DIR}/gbpln1088.4Mb.fasta.gz" \
-                 "${TMPDIR}/yyy.fasta.gz" 
+                 "${TMPDIR}/yyy.fasta.gz"
 then
-    log "$MCMD: converting large file between fasta and fastq OK" 
+    log "$MCMD: converting large file between fasta and fastq OK"
    ((success++))
 else
-    log "$MCMD: converting large file between fasta and fastq failed" 
+    log "$MCMD: converting large file between fasta and fastq failed"
    ((failed++))
 fi
 # ------------------------------------------------------------------
 # --raw-taxid tests (no taxonomy loaded)
 # ------------------------------------------------------------------
 # Running test
 ((ntest++))
 if obiconvert --raw-taxid "${TEST_DIR}/out_ecotag.fasta" \
              > "${TMPDIR}/raw_taxid.fasta" 2>/dev/null
 then
    log "$MCMD --raw-taxid: running OK"
    ((success++))
 else
    log "$MCMD --raw-taxid: running failed"
    ((failed++))
 fi
 # Taxids must be bare numbers — no full-format "taxon:ID [Name]@rank" strings
 ((ntest++))
 if grep '"taxid"' "${TMPDIR}/raw_taxid.fasta" | grep -qv '"taxid":"[0-9][0-9]*"'
 then
    log "$MCMD --raw-taxid: taxid format check failed (full-format taxid found)"
    ((failed++))
 else
    log "$MCMD --raw-taxid: taxid format OK (all taxids are bare numbers)"
    ((success++))
 fi
 # --raw-taxid is idempotent: piping through a second obiconvert --raw-taxid must
 # produce bit-for-bit identical output.
 ((ntest++))
 if obiconvert --raw-taxid "${TMPDIR}/raw_taxid.fasta" \
              > "${TMPDIR}/raw_taxid2.fasta" 2>/dev/null
 then
    log "$MCMD --raw-taxid piped: running OK"
    ((success++))
 else
    log "$MCMD --raw-taxid piped: running failed"
    ((failed++))
 fi
 ((ntest++))
 if diff "${TMPDIR}/raw_taxid.fasta" \
        "${TMPDIR}/raw_taxid2.fasta" > /dev/null
 then
    log "$MCMD --raw-taxid piped: idempotency OK"
    ((success++))
 else
    log "$MCMD --raw-taxid piped: idempotency failed (outputs differ)"
    ((failed++))
 fi
 # ------------------------------------------------------------------
 # --taxonomy tests (full-format taxid, no --raw-taxid)
 # ------------------------------------------------------------------
 # Running test
 ((ntest++))
 if obiconvert --taxonomy "${TEST_DIR}/taxonomy.csv" \
              "${TEST_DIR}/out_ecotag.fasta" \
              > "${TMPDIR}/taxo.fasta" 2>/dev/null
 then
    log "$MCMD --taxonomy: running OK"
    ((success++))
 else
    log "$MCMD --taxonomy: running failed"
    ((failed++))
 fi
 # Taxids must be in full "taxon:ID [Name]@rank" format
 ((ntest++))
 if grep '"taxid"' "${TMPDIR}/taxo.fasta" | grep -q '"taxid":"taxon:[0-9]'
 then
    log "$MCMD --taxonomy: taxid format OK (full-format taxids present)"
    ((success++))
 else
    log "$MCMD --taxonomy: taxid format check failed (no full-format taxid found)"
    ((failed++))
 fi
 # ------------------------------------------------------------------
 # --raw-taxid --taxonomy tests
 # ------------------------------------------------------------------
 # Running test
 ((ntest++))
 if obiconvert --raw-taxid --taxonomy "${TEST_DIR}/taxonomy.csv" \
              "${TEST_DIR}/out_ecotag.fasta" \
              > "${TMPDIR}/raw_taxid_taxo.fasta" 2>/dev/null
 then
    log "$MCMD --raw-taxid --taxonomy: running OK"
    ((success++))
 else
    log "$MCMD --raw-taxid --taxonomy: running failed"
    ((failed++))
 fi
 # Taxids must be bare numbers even when taxonomy is loaded
 ((ntest++))
 if grep '"taxid"' "${TMPDIR}/raw_taxid_taxo.fasta" | grep -qv '"taxid":"[0-9][0-9]*"'
 then
    log "$MCMD --raw-taxid --taxonomy: taxid format check failed (full-format taxid found)"
    ((failed++))
 else
    log "$MCMD --raw-taxid --taxonomy: taxid format OK (all taxids are bare numbers)"
    ((success++))
 fi
 # --raw-taxid with or without taxonomy must yield identical taxid values
 ((ntest++))
 if diff <(grep '"taxid"' "${TMPDIR}/raw_taxid.fasta" | grep -o '"taxid":"[^"]*"' | sort) \
        <(grep '"taxid"' "${TMPDIR}/raw_taxid_taxo.fasta" | grep -o '"taxid":"[^"]*"' | sort) \
        > /dev/null
 then
    log "$MCMD --raw-taxid vs --raw-taxid --taxonomy: taxid values match OK"
    ((success++))
 else
    log "$MCMD --raw-taxid vs --raw-taxid --taxonomy: taxid values differ (unexpected)"
    ((failed++))
 fi
 #########################################
 #
 # At the end of the tests
@@ -0,0 +1,24 @@
 >HELIUM_000100422_612GNAAXX:7:118:3572:14633#0/1_sub[28..126] {"count":10172,"merged_sample":{"26a_F040644":10172},"obitag_bestid":0.9797979797979798,"obitag_bestmatch":"AY227529","obitag_match_count":1,"obitag_rank":"genus","obitag_similarity_method":"lcs","taxid":"taxon:9992 [Marmota]@genus"}
 ttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaaca
 gcctgaaactcaaaggacttggcggtgctttacatccct
 >HELIUM_000100422_612GNAAXX:7:99:9351:13090#0/1_sub[28..127] {"count":260,"merged_sample":{"29a_F260619":260},"obitag_bestid":0.9405940594059405,"obitag_bestmatch":"AF154263","obitag_match_count":9,"obitag_rank":"infraorder","obitag_similarity_method":"lcs","taxid":"taxon:35500 [Pecora]@infraorder"}
 ttagccctaaacacaaataattacacaaacaaaattgttcaccagagtactagcggcaac
 agcttaaaactcaaaggacttggcggtgctttataccctt
 >HELIUM_000100422_612GNAAXX:7:108:10111:9078#0/1_sub[28..127] {"count":7146,"merged_sample":{"13a_F730603":7146},"obitag_bestid":1,"obitag_bestmatch":"AB245427","obitag_match_count":1,"obitag_rank":"species","obitag_similarity_method":"lcs","taxid":"taxon:9860 [Cervus elaphus]@species"}
 ctagccttaaacacaaatagttatgcaaacaaaactattcgccagagtactaccggcaat
 agcttaaaactcaaaggacttggcggtgctttataccctt
 >HELIUM_000100422_612GNAAXX:7:38:14204:12725#0/1_sub[28..126] {"count":87,"merged_sample":{"26a_F040644":87},"obitag_bestid":0.9494949494949495,"obitag_bestmatch":"AY227530","obitag_match_count":2,"obitag_rank":"tribe","obitag_similarity_method":"lcs","taxid":"taxon:337730 [Marmotini]@tribe"}
 ttagccctaaacataaacattcaataaacaagaatgttcgccagaggactactagcaata
 gcttaaaactcaaaggacttggcggtgctttatatccct
 >HELIUM_000100422_612GNAAXX:7:30:9942:4495#0/1_sub[28..126] {"count":95,"merged_sample":{"26a_F040644":11,"29a_F260619":84},"obitag_bestid":0.9595959595959596,"obitag_bestmatch":"AC187326","obitag_match_count":1,"obitag_rank":"subspecies","obitag_similarity_method":"lcs","taxid":"taxon:9615 [Canis lupus familiaris]@subspecies"}
 ttagccctaaacataagctattccataacaaaataattcgccagagaactactagcaaca
 gattaaacctcaaaggacttggcagtgctttatacccct
 >HELIUM_000100422_612GNAAXX:7:51:16702:19393#0/1_sub[28..127] {"count":12004,"merged_sample":{"15a_F730814":7465,"29a_F260619":4539},"obitag_bestid":1,"obitag_bestmatch":"AJ885202","obitag_match_count":1,"obitag_rank":"species","obitag_similarity_method":"lcs","taxid":"taxon:9858 [Capreolus capreolus]@species"}
 ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaat
 agcttaaaactcaaaggacttggcggtgctttataccctt
 >HELIUM_000100422_612GNAAXX:7:84:14502:1617#0/1_sub[28..127] {"count":319,"merged_sample":{"29a_F260619":319},"obitag_bestid":1,"obitag_bestmatch":"AJ972683","obitag_match_count":1,"obitag_rank":"species","obitag_similarity_method":"lcs","taxid":"taxon:9858 [Capreolus capreolus]@species"}
 ttagccctaaacacaagtaattattataacaaaattattcgccagagtactaccggcaat
 agcttaaaactcaaaggacttggcggtgctttataccctt
 >HELIUM_000100422_612GNAAXX:7:50:10637:6527#0/1_sub[28..126] {"count":366,"merged_sample":{"13a_F730603":13,"15a_F730814":5,"26a_F040644":347,"29a_F260619":1},"obitag_bestid":1,"obitag_bestmatch":"AB048590","obitag_match_count":1,"obitag_rank":"genus","obitag_similarity_method":"lcs","taxid":"taxon:9611 [Canis]@genus"}
 ttagccctaaacatagataattttacaacaaaataattcgccagaggactactagcaata
 gcttaaaactcaaaggacttggcggtgctttatatccct
@@ -146,6 +146,65 @@ func __match__key__(text []byte) []int {
 	return []int{} // Not a key
 }
 func __match__array__(text []byte) []int {
 	state := 0
 	level := 0
 	start := 0
 	instring := byte(0)
 	for i, r := range text {
 		if state == 2 {
 			if r == ';' {
 				return []int{start, i + 1}
 			}
 			if r != ' ' && r != '\t' {
 				return []int{}
 			}
 		}
 		if state == 0 {
 			if r == '[' {
 				level++
 				state++
 				start = i
 				continue
 			}
 			if r != ' ' && r != '\t' {
 				return []int{}
 			}
 			continue
 		}
 		// state == 1: inside the array
 		if instring != 0 {
 			if r == instring {
 				instring = 0
 			}
 			continue
 		}
 		if r == '"' || r == '\'' {
 			instring = r
 			continue
 		}
 		if r == '[' || r == '{' {
 			level++
 			continue
 		}
 		if r == ']' || r == '}' {
 			level--
 			if level == 0 {
 				state++
 			}
 		}
 	}
 	return []int{}
 }
 func __match__general__(text []byte) []int {
 	for i, r := range text {
@@ -242,6 +301,21 @@ func ParseOBIFeatures(text string, annotations obiseq.Annotation) string {
 					stop = m[1] + 1
 				} else {
 					// array value
 					m = __match__array__(part)
 					if len(m) > 0 {
 						bvalue = bytes.TrimSpace(part[m[0]:(m[1] - 1)])
 						j := bytes.ReplaceAll(bvalue, []byte("'"), []byte(`"`))
 						j = __obi_header_map_int_key__.ReplaceAll(j, []byte(`$1"$2":`))
 						arr, err := _parse_json_array_interface(j)
 						if err != nil {
 							value = string(bvalue)
 						} else {
 							value = arr
 						}
 						stop = m[1] + 1
 					} else {
 					// Generic value
 					// m = __obi_header_value_general_pattern__.FindIndex(part)
@@ -264,6 +338,7 @@ func ParseOBIFeatures(text string, annotations obiseq.Annotation) string {
 						// no value
 						break
 					} // End of No value
 					} // End of not array
 				} // End of not dict
 			} // End of not string
 		} // End of not numeric
@@ -327,19 +402,19 @@ func WriteFastSeqOBIHeade(buffer *bytes.Buffer, sequence *obiseq.BioSequence) {
 					buffer.WriteString(fmt.Sprintf("%s=", key))
 					buffer.Write(tv)
 					buffer.WriteString("; ")
 				case map[string]int,
 					map[string]string,
 					map[string]interface{}:
 					tv, err := obiutils.JsonMarshal(t)
 					if err != nil {
 						log.Fatalf("Cannot convert %v value", value)
 					}
 					tv = bytes.ReplaceAll(tv, []byte(`"`), []byte("'"))
 					buffer.WriteString(fmt.Sprintf("%s=", key))
 					buffer.Write(tv)
 					buffer.WriteString("; ")
 				default:
-					buffer.WriteString(fmt.Sprintf("%s=%v; ", key, value))
+					if obiutils.IsAMap(value) || obiutils.IsASlice(value) || obiutils.IsAnArray(value) {
 						tv, err := obiutils.JsonMarshal(t)
 						if err != nil {
 							log.Fatalf("Cannot convert %v value", value)
 						}
 						tv = bytes.ReplaceAll(tv, []byte(`"`), []byte("'"))
 						buffer.WriteString(fmt.Sprintf("%s=", key))
 						buffer.Write(tv)
 						buffer.WriteString("; ")
 					} else {
 						buffer.WriteString(fmt.Sprintf("%s=%v; ", key, value))
 					}
 				}
 			}
 		}
@@ -90,6 +90,9 @@ func FormatFastaBatch(batch obiiter.BioSequenceBatch, formater FormatHeader, ski
 	log.Debugf("FormatFastaBatch: #%d : %d seqs", batch.Order(), batch.Len())
 	for _, seq := range batch.Slice() {
 		if len(seq.Id()) == 0 {
 			log.Fatalf("Sequence identifier is empty")
 		}
 		if seq.Len() > 0 {
 			// Write header directly into bs — no intermediate string
 			bs.WriteByte('>')
@@ -64,6 +64,9 @@ func FormatFastqBatch(batch obiiter.BioSequenceBatch,
 	first := true
 	for _, seq := range batch.Slice() {
 		if len(seq.Id()) == 0 {
 			log.Fatalf("Sequence identifier is empty")
 		}
 		if seq.Len() > 0 {
 			_formatFastq(&bs, seq, formater)
@@ -631,9 +631,9 @@ func ReadCSVNGSFilter(reader io.Reader) (*obingslibrary.NGSLibrary, error) {
 			return nil, fmt.Errorf("row %d has %d columns, expected %d", len(data), len(fields), len(header))
 		}
-		forward_primer := fields[forward_primerColIndex]
+		forward_primer := strings.TrimSpace(fields[forward_primerColIndex])
-		reverse_primer := fields[reverse_primerColIndex]
+		reverse_primer := strings.TrimSpace(fields[reverse_primerColIndex])
-		tags := _parseMainNGSFilterTags(fields[sample_tagColIndex])
+		tags := _parseMainNGSFilterTags(strings.TrimSpace(fields[sample_tagColIndex]))
 		marker, _ := ngsfilter.GetMarker(forward_primer, reverse_primer)
 		pcr, ok := marker.GetPCR(tags.Forward, tags.Reverse)
@@ -644,8 +644,8 @@ func ReadCSVNGSFilter(reader io.Reader) (*obingslibrary.NGSLibrary, error) {
 					i, tags.Forward, tags.Reverse, forward_primer, reverse_primer)
 		}
-		pcr.Experiment = fields[experimentColIndex]
+		pcr.Experiment = strings.TrimSpace(fields[experimentColIndex])
-		pcr.Sample = fields[sampleColIndex]
+		pcr.Sample = strings.TrimSpace(fields[sampleColIndex])
 		if extraColumns != nil {
 			pcr.Annotations = make(obiseq.Annotation)
@@ -4,22 +4,21 @@ import "math"
 // KmerEntropy computes the entropy of a single encoded k-mer.
 //
-// The algorithm mirrors the lowmask entropy calculation: it decodes the k-mer
+// The algorithm mirrors the Rust obiskbuilder entropy: it decodes the k-mer
 // to a DNA sequence, extracts all sub-words of each size from 1 to levelMax,
 // normalizes them by circular canonical form, counts their frequencies, and
-// computes Shannon entropy normalized by the maximum possible entropy.
+// computes Shannon entropy corrected for class sizes, normalized by the
 // maximum possible entropy over 4^ws raw bins.
 // The returned value is the minimum entropy across all word sizes.
 //
 // Correction for small sequences: the raw entropy H = log(N) - Σ f·log(f)/N
 // under-estimates the true complexity when many raw words collapse to the same
 // canonical form.  Adding Σ f·log(class_size)/N recovers the entropy of the
 // underlying uncollapsed distribution (assuming uniform mixing within each
 // equivalence class).
 //
 // A value close to 0 indicates very low complexity (e.g. "AAAA..."),
 // while a value close to 1 indicates high complexity.
 //
 // Parameters:
 //   - kmer: the encoded k-mer (2 bits per base)
 //   - k: the k-mer size
 //   - levelMax: maximum sub-word size for entropy (typically 6)
 //
 // Returns:
 //   - minimum normalized entropy across all word sizes 1..levelMax
 func KmerEntropy(kmer uint64, k int, levelMax int) float64 {
 	if k < 1 || levelMax < 1 {
 		return 1.0
@@ -35,7 +34,7 @@ func KmerEntropy(kmer uint64, k int, levelMax int) float64 {
 	var seqBuf [32]byte
 	seq := DecodeKmer(kmer, k, seqBuf[:])
-	// Pre-compute nLogN lookup (same as lowmask)
+	// Pre-compute nLogN lookup
 	nLogN := make([]float64, k+1)
 	for i := 1; i <= k; i++ {
 		nLogN[i] = float64(i) * math.Log(float64(i))
@@ -51,6 +50,23 @@ func KmerEntropy(kmer uint64, k int, levelMax int) float64 {
 		}
 	}
 	// Build ln(class_size) tables: for each canonical form, how many raw
 	// words map to it under circular normalization.
 	classLogSizeTables := make([][]float64, levelMax+1)
 	for ws := 1; ws <= levelMax; ws++ {
 		tableSize := 1 << (ws * 2)
 		classSize := make([]int, tableSize)
 		for code := 0; code < tableSize; code++ {
 			classSize[normTables[ws][code]]++
 		}
 		classLogSizeTables[ws] = make([]float64, tableSize)
 		for j := 0; j < tableSize; j++ {
 			if classSize[j] > 0 {
 				classLogSizeTables[ws][j] = math.Log(float64(classSize[j]))
 			}
 		}
 	}
 	minEntropy := math.MaxFloat64
 	for ws := 1; ws <= levelMax; ws++ {
@@ -75,23 +91,13 @@ func KmerEntropy(kmer uint64, k int, levelMax int) float64 {
 			table[normWord]++
 		}
-		// Compute Shannon entropy
+		// Compute emax over 4^ws raw bins (uncollapsed distribution).
 		floatNwords := float64(nwords)
 		logNwords := math.Log(floatNwords)
-
+		na := tableSize // 4^ws
 		var sumNLogN float64
 		for j := 0; j < tableSize; j++ {
 			n := table[j]
 			if n > 0 {
 				sumNLogN += nLogN[n]
 			}
 		}
 		// Compute emax (maximum possible entropy for this word size)
 		na := CanonicalCircularKmerCount(ws)
 		var emax float64
 		if nwords < na {
-			emax = math.Log(float64(nwords))
+			emax = logNwords
 		} else {
 			cov := nwords / na
 			remains := nwords - (na * cov)
@@ -105,7 +111,19 @@ func KmerEntropy(kmer uint64, k int, levelMax int) float64 {
 			continue
 		}
-		entropy := (logNwords - sumNLogN/floatNwords) / emax
+		// Accumulate Σ f·log(f) and Σ f·log(class_size) over canonical forms.
 		classLogSize := classLogSizeTables[ws]
 		var sumNLogN, sumClassLogN float64
 		for j := 0; j < tableSize; j++ {
 			n := table[j]
 			if n > 0 {
 				sumNLogN += nLogN[n]
 				sumClassLogN += float64(n) * classLogSize[j]
 			}
 		}
 		// Corrected entropy: H_raw ≈ log(N) + (Σf·log(s) - Σf·log(f)) / N
 		entropy := (logNwords + sumClassLogN/floatNwords - sumNLogN/floatNwords) / emax
 		if entropy < 0 {
 			entropy = 0
 		}
@@ -129,24 +147,20 @@ func KmerEntropy(kmer uint64, k int, levelMax int) float64 {
 // IMPORTANT: a KmerEntropyFilter is NOT safe for concurrent use.
 // Each goroutine must create its own instance via NewKmerEntropyFilter.
 type KmerEntropyFilter struct {
-	k          int
+	k                   int
-	levelMax   int
+	levelMax            int
-	threshold  float64
+	threshold           float64
-	nLogN      []float64
+	nLogN               []float64
-	normTables [][]int
+	normTables          [][]int
-	emaxValues []float64
+	classLogSizeTables  [][]float64
-	logNwords  []float64
+	emaxValues          []float64
 	logNwords           []float64
 	// Pre-allocated frequency tables reused across Entropy() calls.
 	// One per word size (index 0 unused). Reset to zero before each use.
 	freqTables [][]int
 }
 // NewKmerEntropyFilter creates an entropy filter with pre-computed tables.
 //
 // Parameters:
 //   - k: the k-mer size
 //   - levelMax: maximum sub-word size for entropy (typically 6)
 //   - threshold: entropy threshold (k-mers with entropy <= threshold are rejected)
 func NewKmerEntropyFilter(k, levelMax int, threshold float64) *KmerEntropyFilter {
 	if levelMax >= k {
 		levelMax = k - 1
@@ -169,20 +183,38 @@ func NewKmerEntropyFilter(k, levelMax int, threshold float64) *KmerEntropyFilter
 		}
 	}
 	// ln(class_size) for each canonical form under circular normalization.
 	classLogSizeTables := make([][]float64, levelMax+1)
 	for ws := 1; ws <= levelMax; ws++ {
 		tableSize := 1 << (ws * 2)
 		classSize := make([]int, tableSize)
 		for code := 0; code < tableSize; code++ {
 			classSize[normTables[ws][code]]++
 		}
 		classLogSizeTables[ws] = make([]float64, tableSize)
 		for j := 0; j < tableSize; j++ {
 			if classSize[j] > 0 {
 				classLogSizeTables[ws][j] = math.Log(float64(classSize[j]))
 			}
 		}
 	}
 	// Pre-compute emax and logNwords per word size.
 	// emax uses 4^ws raw bins to match the corrected entropy.
 	emaxValues := make([]float64, levelMax+1)
 	logNwords := make([]float64, levelMax+1)
 	for ws := 1; ws <= levelMax; ws++ {
 		nw := k - ws + 1
-		na := CanonicalCircularKmerCount(ws)
+		na := 1 << (ws * 2) // 4^ws raw bins
 		floatNw := float64(nw)
 		logNwords[ws] = math.Log(floatNw)
 		if nw < na {
-			logNwords[ws] = math.Log(float64(nw))
+			emaxValues[ws] = logNwords[ws]
 			emaxValues[ws] = math.Log(float64(nw))
 		} else {
 			cov := nw / na
 			remains := nw - (na * cov)
-			f1 := float64(cov) / float64(nw)
+			f1 := float64(cov) / floatNw
-			f2 := float64(cov+1) / float64(nw)
+			f2 := float64(cov+1) / floatNw
 			logNwords[ws] = math.Log(float64(nw))
 			emaxValues[ws] = -(float64(na-remains)*f1*math.Log(f1) +
 				float64(remains)*f2*math.Log(f2))
 		}
@@ -195,14 +227,15 @@ func NewKmerEntropyFilter(k, levelMax int, threshold float64) *KmerEntropyFilter
 	}
 	return &KmerEntropyFilter{
-		k:          k,
+		k:                  k,
-		levelMax:   levelMax,
+		levelMax:           levelMax,
-		threshold:  threshold,
+		threshold:          threshold,
-		nLogN:      nLogN,
+		nLogN:              nLogN,
-		normTables: normTables,
+		normTables:         normTables,
-		emaxValues: emaxValues,
+		classLogSizeTables: classLogSizeTables,
-		logNwords:  logNwords,
+		emaxValues:         emaxValues,
-		freqTables: freqTables,
+		logNwords:          logNwords,
 		freqTables:         freqTables,
 	}
 }
@@ -236,7 +269,7 @@ func (ef *KmerEntropyFilter) Entropy(kmer uint64) float64 {
 		// Count circular-canonical sub-word frequencies
 		tableSize := 1 << (ws * 2)
 		table := ef.freqTables[ws]
-		clear(table) // reset to zero
+		clear(table)
 		mask := (1 << (ws * 2)) - 1
 		normTable := ef.normTables[ws]
@@ -251,19 +284,21 @@ func (ef *KmerEntropyFilter) Entropy(kmer uint64) float64 {
 			table[normWord]++
 		}
 		// Compute Shannon entropy
 		floatNwords := float64(nwords)
 		logNwords := ef.logNwords[ws]
 		classLogSize := ef.classLogSizeTables[ws]
-		var sumNLogN float64
+		var sumNLogN, sumClassLogN float64
 		for j := 0; j < tableSize; j++ {
 			n := table[j]
 			if n > 0 {
 				sumNLogN += ef.nLogN[n]
 				sumClassLogN += float64(n) * classLogSize[j]
 			}
 		}
-		entropy := (logNwords - sumNLogN/floatNwords) / emax
+		// Corrected entropy: H_raw ≈ log(N) + (Σf·log(s) - Σf·log(f)) / N
 		entropy := (logNwords + sumClassLogN/floatNwords - sumNLogN/floatNwords) / emax
 		if entropy < 0 {
 			entropy = 0
 		}
@@ -3,7 +3,7 @@ package obioptions
 // Version is automatically updated by the Makefile from version.txt
 // The patch number (third digit) is incremented on each push to the repository
-var _Version = "Release 4.4.41"
+var _Version = "Release 4.4.44"
 // Version returns the version of the obitools package.
 //
@@ -364,6 +364,24 @@ func (s *BioSequence) GetIntSlice(key string) ([]int, bool) {
 	return val, ok
 }
 func (s *BioSequence) GetMapOfIntSlice(key string) (map[string][]int, bool) {
 	v, ok := s.GetAttribute(key)
 	if !ok {
 		return nil, false
 	}
 	val, err := obiutils.InterfaceToMapOfIntSlice(v)
 	return val, err == nil
 }
 func (s *BioSequence) GetMapOfStringSlice(key string) (map[string][]string, bool) {
 	v, ok := s.GetAttribute(key)
 	if !ok {
 		return nil, false
 	}
 	val, err := obiutils.InterfaceToMapOfStringSlice(v)
 	return val, err == nil
 }
 // Count returns the value of the "count" attribute of the BioSequence.
 //
 // The count of a sequence is the number of times it has been observed in the dataset.
@@ -103,7 +103,7 @@ func TestNewBioSequence(t *testing.T) {
 // Return type: None.
 func TestNewBioSequenceWithQualities(t *testing.T) {
 	id := "123"
-	sequence := []byte("ATGC")
+	sequence := []byte("atgc")
 	definition := "DNA sequence"
 	qualities := []byte("1234")
@@ -141,6 +141,33 @@ var OBILang = gval.NewLanguage(
 	gval.Function("max", func(args ...interface{}) (interface{}, error) {
 		return obiutils.Max(args[0])
 	}),
 	gval.Function("which_max", func(args ...interface{}) (interface{}, error) {
 		result, err := obiutils.WhichMax(args[0])
 		if idx, ok := result.(int); ok {
 			return float64(idx), nil
 		}
 		return result, err
 	}),
 	gval.Function("which_min", func(args ...interface{}) (interface{}, error) {
 		result, err := obiutils.WhichMin(args[0])
 		if idx, ok := result.(int); ok {
 			return float64(idx), nil
 		}
 		return result, err
 	}),
 	gval.Function("filtermin", func(args ...interface{}) (interface{}, error) {
 		return obiutils.FilterMin(args[0], args[1])
 	}),
 	gval.Function("filtermax", func(args ...interface{}) (interface{}, error) {
 		return obiutils.FilterMax(args[0], args[1])
 	}),
 	gval.Function("saturatingsub", func(args ...interface{}) (interface{}, error) {
 		return obiutils.SaturatingSub(args[0], args[1])
 	}),
 	gval.Function("contains", func(args ...interface{}) (interface{}, error) {
 		if obiutils.IsAMap(args[0]) {
 			val := reflect.ValueOf(args[0]).MapIndex(reflect.ValueOf(args[1]))
@@ -70,6 +70,12 @@ func (s *BioSequence) SetTaxid(taxid string, rank ...string) {
 				}
 			}
 		} else if obidefault.UseRawTaxids() {
 			// Without a loaded taxonomy, extract the bare ID from full-format strings
 			// like "code:12345 [Name]@rank" so that --raw-taxid is honoured everywhere.
 			if _, rawID, _, _, parseErr := obitax.ParseTaxonString(taxid); parseErr == nil {
 				taxid = rawID
 			}
 		}
 	}
@@ -177,7 +183,7 @@ func (sequence *BioSequence) SetPath(taxonomy *obitax.Taxonomy) []string {
 	lpath := path.Len() - 1
 	for i := lpath; i >= 0; i-- {
-		spath[lpath-i] = path.Get(i).String(taxonomy.Code())
+		spath[lpath-i] = path.Get(i).FullString(taxonomy.Code())
 	}
 	sequence.SetAttribute("taxonomic_path", spath)
@@ -29,6 +29,24 @@ type TaxNode struct {
 	alternatenames *map[*string]*string
 }
 // FullString returns the full string representation of the TaxNode in the form
 // "taxonomyCode:id [scientificName]@rank", regardless of the UseRawTaxids setting.
 // This is used internally when a parseable format is required (e.g. taxonomic_path).
 func (node *TaxNode) FullString(taxonomyCode string) string {
 	if node.HasScientificName() {
 		return fmt.Sprintf("%s:%v [%s]@%s",
 			taxonomyCode,
 			*node.id,
 			node.ScientificName(),
 			node.Rank(),
 		)
 	}
 	return fmt.Sprintf("%s:%v",
 		taxonomyCode,
 		*node.id)
 }
 // String returns a string representation of the TaxNode, including the taxonomy code,
 // the node ID, and the scientific name. The output format is "taxonomyCode:id [scientificName]".
 //
@@ -42,19 +60,7 @@ func (node *TaxNode) String(taxonomyCode string) string {
 		return *node.id
 	}
-	if node.HasScientificName() {
+	return node.FullString(taxonomyCode)
 		return fmt.Sprintf("%s:%v [%s]@%s",
 			taxonomyCode,
 			*node.id,
 			node.ScientificName(),
 			node.Rank(),
 		)
 	}
 	return fmt.Sprintf("%s:%v",
 		taxonomyCode,
 		*node.id)
 }
 // Id returns the unique identifier of the TaxNode.
@@ -21,12 +21,10 @@ func PairingOptionSet(options *getoptions.GetOpt) {
 	options.StringVar(&_ForwardFile, "forward-reads", "",
 		options.Alias("F"),
 		options.ArgName("FILENAME_F"),
 		options.Required("You must provide at a forward file"),
 		options.Description("The file names containing the forward reads"))
 	options.StringVar(&_ReverseFile, "reverse-reads", "",
 		options.Alias("R"),
 		options.ArgName("FILENAME_R"),
 		options.Required("You must provide a reverse file"),
 		options.Description("The file names containing the reverse reads"))
 	options.IntVar(&_Delta, "delta", _Delta,
 		options.Alias("D"),
@@ -72,6 +70,10 @@ func CLIPairedSequence() (obiiter.IBioSequence, error) {
 	return paired, nil
 }
 func CLIHasPairedFiles() bool {
 	return _ForwardFile != "" && _ReverseFile != ""
 }
 func CLIDelta() int {
 	return _Delta
 }
@@ -114,10 +114,10 @@ func IPCRTagPESequencesBatch(iterator obiiter.IBioSequence,
 					aanot["obimultiplex_direction"] = direction
 					aanot["obimultiplex_forward_match"] = forward_match
-					aanot["obimultiplex_forward_mismatches"] = forward_mismatches
+					aanot["obimultiplex_forward_error"] = forward_mismatches
 					aanot["obimultiplex_reverse_match"] = reverse_match
-					aanot["obimultiplex_reverse_mismatches"] = reverse_mismatches
+					aanot["obimultiplex_reverse_error"] = reverse_mismatches
 					aanot["sample"] = sample
 					aanot["experiment"] = experiment
@@ -125,10 +125,10 @@ func IPCRTagPESequencesBatch(iterator obiiter.IBioSequence,
 					banot["obimultiplex_direction"] = direction
 					banot["obimultiplex_forward_match"] = forward_match
-					banot["obimultiplex_forward_mismatches"] = forward_mismatches
+					banot["obimultiplex_forward_error"] = forward_mismatches
 					banot["obimultiplex_reverse_match"] = reverse_match
-					banot["obimultiplex_reverse_mismatches"] = reverse_mismatches
+					banot["obimultiplex_reverse_error"] = reverse_mismatches
 					banot["sample"] = sample
 					banot["experiment"] = experiment
@@ -276,6 +276,44 @@ func InterfaceToStringMap(i interface{}) (val map[string]string, err error) {
 	return
 }
 func InterfaceToMapOfIntSlice(i interface{}) (val map[string][]int, err error) {
 	err = nil
 	switch m := i.(type) {
 	case map[string][]int:
 		val = m
 	case map[string]interface{}:
 		val = make(map[string][]int, len(m))
 		for k, v := range m {
 			val[k], err = InterfaceToIntSlice(v)
 			if err != nil {
 				return
 			}
 		}
 	default:
 		err = &NotAMapInt{"value attribute cannot be casted to a map[string][]int"}
 	}
 	return
 }
 func InterfaceToMapOfStringSlice(i interface{}) (val map[string][]string, err error) {
 	err = nil
 	switch m := i.(type) {
 	case map[string][]string:
 		val = m
 	case map[string]interface{}:
 		val = make(map[string][]string, len(m))
 		for k, v := range m {
 			val[k], err = InterfaceToStringSlice(v)
 			if err != nil {
 				return
 			}
 		}
 	default:
 		err = &NotAMapInt{"value attribute cannot be casted to a map[string][]string"}
 	}
 	return
 }
 func InterfaceToStringSlice(i interface{}) (val []string, err error) {
 	err = nil
@@ -34,6 +34,26 @@ func MinMaxSlice[T constraints.Ordered](vec []T) (min, max T) {
 	return
 }
 func FilterMinSlice[T constraints.Ordered](vec []T, minimum T) []T {
 	result := make([]T, 0, len(vec))
 	for _, v := range vec {
 		if v >= minimum {
 			result = append(result, v)
 		}
 	}
 	return result
 }
 func FilterMaxSlice[T constraints.Ordered](vec []T, maximum T) []T {
 	result := make([]T, 0, len(vec))
 	for _, v := range vec {
 		if v <= maximum {
 			result = append(result, v)
 		}
 	}
 	return result
 }
 func MaxMap[K comparable, T constraints.Ordered](values map[K]T) (K, T, error) {
 	var maxKey K
 	var maxValue T
@@ -73,6 +93,46 @@ func MinMap[K comparable, T constraints.Ordered](values map[K]T) (K, T, error) {
 	return minKey, minValue, nil
 }
 func FilterMinMap[K comparable, T constraints.Ordered](values map[K]T, minimum T) map[K]T {
 	result := make(map[K]T)
 	for k, v := range values {
 		if v >= minimum {
 			result[k] = v
 		}
 	}
 	return result
 }
 func FilterMaxMap[K comparable, T constraints.Ordered](values map[K]T, maximum T) map[K]T {
 	result := make(map[K]T)
 	for k, v := range values {
 		if v <= maximum {
 			result[k] = v
 		}
 	}
 	return result
 }
 func SaturatingSubSlice[T Numeric](vec []T, sub T) []T {
 	result := make([]T, len(vec))
 	for i, v := range vec {
 		if v > sub {
 			result[i] = v - sub
 		}
 	}
 	return result
 }
 func SaturatingSubMap[K comparable, T Numeric](values map[K]T, sub T) map[K]T {
 	result := make(map[K]T)
 	for k, v := range values {
 		if v > sub {
 			result[k] = v - sub
 		}
 	}
 	return result
 }
 // Min returns the smallest element in a slice/array or map,
 // or the value itself if data is a single comparable value.
 // Returns an error if the container is empty or the type is unsupported.
@@ -135,11 +195,121 @@ func Max(data interface{}) (interface{}, error) {
 	}
 }
 func FilterMin(data interface{}, minimum interface{}) (interface{}, error) {
 	v := reflect.ValueOf(data)
 	switch v.Kind() {
 	case reflect.Slice, reflect.Array:
 		if v.Len() == 0 {
 			return nil, errors.New("empty slice or array")
 		}
 		return filterMinFromIterable(v, minimum)
 	case reflect.Map:
 		if v.Len() == 0 {
 			return nil, errors.New("empty map")
 		}
 		return filterMinFromMap(v, minimum)
 	default:
 		if !isOrderedKind(v.Kind()) {
 			return nil, fmt.Errorf("unsupported type: %s", v.Kind())
 		}
 		return data, nil
 	}
 }
 func FilterMax(data interface{}, maximum interface{}) (interface{}, error) {
 	v := reflect.ValueOf(data)
 	switch v.Kind() {
 	case reflect.Slice, reflect.Array:
 		if v.Len() == 0 {
 			return nil, errors.New("empty slice or array")
 		}
 		return filterMaxFromIterable(v, maximum)
 	case reflect.Map:
 		if v.Len() == 0 {
 			return nil, errors.New("empty map")
 		}
 		return filterMaxFromMap(v, maximum)
 	default:
 		if !isOrderedKind(v.Kind()) {
 			return nil, fmt.Errorf("unsupported type: %s", v.Kind())
 		}
 		return data, nil
 	}
 }
 func SaturatingSub(data interface{}, sub interface{}) (interface{}, error) {
 	v := reflect.ValueOf(data)
 	switch v.Kind() {
 	case reflect.Slice, reflect.Array:
 		return saturatingSubFromIterable(v, sub)
 	case reflect.Map:
 		return saturatingSubFromMap(v, sub)
 	default:
 		if !isNumericKind(v.Kind()) {
 			return nil, fmt.Errorf("unsupported type: %s", v.Kind())
 		}
 		r, err := saturatingSubValues(v, reflect.ValueOf(sub))
 		if err != nil {
 			return nil, err
 		}
 		return r.Interface(), nil
 	}
 }
 func saturatingSubFromIterable(v reflect.Value, sub interface{}) (interface{}, error) {
 	subVal := reflect.ValueOf(sub)
 	result := reflect.MakeSlice(v.Type(), v.Len(), v.Len())
 	for i := 0; i < v.Len(); i++ {
 		r, err := saturatingSubValues(v.Index(i), subVal)
 		if err != nil {
 			return nil, err
 		}
 		result.Index(i).Set(r)
 	}
 	return result.Interface(), nil
 }
 func saturatingSubFromMap(v reflect.Value, sub interface{}) (interface{}, error) {
 	subVal := reflect.ValueOf(sub)
 	result := reflect.MakeMap(v.Type())
 	for _, key := range v.MapKeys() {
 		r, err := saturatingSubValues(v.MapIndex(key), subVal)
 		if err != nil {
 			return nil, err
 		}
 		if !r.IsZero() {
 			result.SetMapIndex(key, r)
 		}
 	}
 	return result.Interface(), nil
 }
 func saturatingSubValues(a, b reflect.Value) (reflect.Value, error) {
 	result := reflect.New(a.Type()).Elem()
 	switch a.Kind() {
 	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
 		if av, bv := a.Int(), b.Int(); av > bv {
 			result.SetInt(av - bv)
 		}
 	case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
 		if av, bv := a.Uint(), b.Uint(); av > bv {
 			result.SetUint(av - bv)
 		}
 	case reflect.Float32, reflect.Float64:
 		if av, bv := a.Float(), b.Float(); av > bv {
 			result.SetFloat(av - bv)
 		}
 	default:
 		return reflect.Value{}, fmt.Errorf("unsupported type for saturating subtraction: %s", a.Kind())
 	}
 	return result, nil
 }
 // maxFromIterable scans a slice/array to find the maximum.
 func maxFromIterable(v reflect.Value) (interface{}, error) {
 	var best reflect.Value
 	for i := 0; i < v.Len(); i++ {
-		elem := v.Index(i)
+		elem := unwrapInterface(v.Index(i))
 		if !isOrderedKind(elem.Kind()) {
 			return nil, fmt.Errorf("unsupported element type: %s", elem.Kind())
 		}
@@ -154,7 +324,7 @@ func maxFromIterable(v reflect.Value) (interface{}, error) {
 func minFromIterable(v reflect.Value) (interface{}, error) {
 	var minVal reflect.Value
 	for i := 0; i < v.Len(); i++ {
-		elem := v.Index(i)
+		elem := unwrapInterface(v.Index(i))
 		if !isOrderedKind(elem.Kind()) {
 			return nil, fmt.Errorf("unsupported element type: %s", elem.Kind())
 		}
@@ -165,12 +335,182 @@ func minFromIterable(v reflect.Value) (interface{}, error) {
 	return minVal.Interface(), nil
 }
 func filterMinFromIterable(v reflect.Value, minimum interface{}) (interface{}, error) {
 	minVal := reflect.ValueOf(minimum)
 	result := reflect.MakeSlice(v.Type(), 0, v.Len())
 	for i := 0; i < v.Len(); i++ {
 		elem := unwrapInterface(v.Index(i))
 		if !isOrderedKind(elem.Kind()) {
 			return nil, fmt.Errorf("unsupported element type: %s", elem.Kind())
 		}
 		if !less(elem, minVal) { // elem >= minimum
 			result = reflect.Append(result, elem)
 		}
 	}
 	return result.Interface(), nil
 }
 func filterMaxFromIterable(v reflect.Value, maximum interface{}) (interface{}, error) {
 	maxVal := reflect.ValueOf(maximum)
 	result := reflect.MakeSlice(v.Type(), 0, v.Len())
 	for i := 0; i < v.Len(); i++ {
 		elem := unwrapInterface(v.Index(i))
 		if !isOrderedKind(elem.Kind()) {
 			return nil, fmt.Errorf("unsupported element type: %s", elem.Kind())
 		}
 		if !greater(elem, maxVal) { // elem <= maximum
 			result = reflect.Append(result, elem)
 		}
 	}
 	return result.Interface(), nil
 }
 // whichMaxFromIterable returns the index of the maximum element in a slice/array.
 func whichMaxFromIterable(v reflect.Value) (int, error) {
 	var best reflect.Value
 	bestIdx := 0
 	for i := 0; i < v.Len(); i++ {
 		elem := unwrapInterface(v.Index(i))
 		if !isOrderedKind(elem.Kind()) {
 			return 0, fmt.Errorf("unsupported element type: %s", elem.Kind())
 		}
 		if i == 0 || greater(elem, best) {
 			best = elem
 			bestIdx = i
 		}
 	}
 	return bestIdx, nil
 }
 // whichMinFromIterable returns the index of the minimum element in a slice/array.
 func whichMinFromIterable(v reflect.Value) (int, error) {
 	var minVal reflect.Value
 	minIdx := 0
 	for i := 0; i < v.Len(); i++ {
 		elem := unwrapInterface(v.Index(i))
 		if !isOrderedKind(elem.Kind()) {
 			return 0, fmt.Errorf("unsupported element type: %s", elem.Kind())
 		}
 		if i == 0 || less(elem, minVal) {
 			minVal = elem
 			minIdx = i
 		}
 	}
 	return minIdx, nil
 }
 // whichMaxFromMap returns the key associated with the maximum value in a map.
 func whichMaxFromMap(v reflect.Value) (interface{}, error) {
 	var best reflect.Value
 	var bestKey reflect.Value
 	first := true
 	for _, key := range v.MapKeys() {
 		elem := unwrapInterface(v.MapIndex(key))
 		if !isOrderedKind(elem.Kind()) {
 			return nil, fmt.Errorf("unsupported element type: %s", elem.Kind())
 		}
 		if first || greater(elem, best) {
 			best = elem
 			bestKey = key
 			first = false
 		}
 	}
 	return bestKey.Interface(), nil
 }
 // whichMinFromMap returns the key associated with the minimum value in a map.
 func whichMinFromMap(v reflect.Value) (interface{}, error) {
 	var minVal reflect.Value
 	var minKey reflect.Value
 	first := true
 	for _, key := range v.MapKeys() {
 		elem := unwrapInterface(v.MapIndex(key))
 		if !isOrderedKind(elem.Kind()) {
 			return nil, fmt.Errorf("unsupported element type: %s", elem.Kind())
 		}
 		if first || less(elem, minVal) {
 			minVal = elem
 			minKey = key
 			first = false
 		}
 	}
 	return minKey.Interface(), nil
 }
 // WhichMax returns the key (for a map) or index (for a slice/array) of the maximum value.
 func WhichMax(data interface{}) (interface{}, error) {
 	v := reflect.ValueOf(data)
 	switch v.Kind() {
 	case reflect.Slice, reflect.Array:
 		if v.Len() == 0 {
 			return nil, errors.New("empty slice or array")
 		}
 		return whichMaxFromIterable(v)
 	case reflect.Map:
 		if v.Len() == 0 {
 			return nil, errors.New("empty map")
 		}
 		return whichMaxFromMap(v)
 	default:
 		return nil, fmt.Errorf("unsupported type: %s", v.Kind())
 	}
 }
 // WhichMin returns the key (for a map) or index (for a slice/array) of the minimum value.
 func WhichMin(data interface{}) (interface{}, error) {
 	v := reflect.ValueOf(data)
 	switch v.Kind() {
 	case reflect.Slice, reflect.Array:
 		if v.Len() == 0 {
 			return nil, errors.New("empty slice or array")
 		}
 		return whichMinFromIterable(v)
 	case reflect.Map:
 		if v.Len() == 0 {
 			return nil, errors.New("empty map")
 		}
 		return whichMinFromMap(v)
 	default:
 		return nil, fmt.Errorf("unsupported type: %s", v.Kind())
 	}
 }
 func filterMinFromMap(v reflect.Value, minimum interface{}) (interface{}, error) {
 	minVal := reflect.ValueOf(minimum)
 	result := reflect.MakeMap(v.Type())
 	for _, key := range v.MapKeys() {
 		elem := unwrapInterface(v.MapIndex(key))
 		if !isOrderedKind(elem.Kind()) {
 			return nil, fmt.Errorf("unsupported element type: %s", elem.Kind())
 		}
 		if !less(elem, minVal) { // elem >= minimum
 			result.SetMapIndex(key, elem)
 		}
 	}
 	return result.Interface(), nil
 }
 func filterMaxFromMap(v reflect.Value, maximum interface{}) (interface{}, error) {
 	maxVal := reflect.ValueOf(maximum)
 	result := reflect.MakeMap(v.Type())
 	for _, key := range v.MapKeys() {
 		elem := unwrapInterface(v.MapIndex(key))
 		if !isOrderedKind(elem.Kind()) {
 			return nil, fmt.Errorf("unsupported element type: %s", elem.Kind())
 		}
 		if !greater(elem, maxVal) { // elem <= maximum
 			result.SetMapIndex(key, elem)
 		}
 	}
 	return result.Interface(), nil
 }
 // maxFromMap scans map values to find the maximum.
 func maxFromMap(v reflect.Value) (interface{}, error) {
 	var best reflect.Value
 	first := true
 	for _, key := range v.MapKeys() {
-		elem := v.MapIndex(key)
+		elem := unwrapInterface(v.MapIndex(key))
 		if !isOrderedKind(elem.Kind()) {
 			return nil, fmt.Errorf("unsupported element type: %s", elem.Kind())
 		}
@@ -187,7 +527,7 @@ func minFromMap(v reflect.Value) (interface{}, error) {
 	var minVal reflect.Value
 	first := true
 	for _, key := range v.MapKeys() {
-		elem := v.MapIndex(key)
+		elem := unwrapInterface(v.MapIndex(key))
 		if !isOrderedKind(elem.Kind()) {
 			return nil, fmt.Errorf("unsupported element type: %s", elem.Kind())
 		}
@@ -199,6 +539,27 @@ func minFromMap(v reflect.Value) (interface{}, error) {
 	return minVal.Interface(), nil
 }
 func isNumericKind(k reflect.Kind) bool {
 	switch k {
 	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
 		reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64,
 		reflect.Float32, reflect.Float64:
 		return true
 	default:
 		return false
 	}
 }
 // unwrapInterface returns v.Elem() when v holds an interface value, otherwise v unchanged.
 // This is necessary when iterating map[string]interface{} or []interface{} via reflection:
 // the element Kind is reflect.Interface, not the underlying concrete type.
 func unwrapInterface(v reflect.Value) reflect.Value {
 	if v.Kind() == reflect.Interface {
 		return v.Elem()
 	}
 	return v
 }
 // isOrderedKind reports whether k supports comparison ordering.
 func isOrderedKind(k reflect.Kind) bool {
 	switch k {
@@ -1 +1 @@
-4.4.41
+4.4.44
Author	SHA1	Message	Date
Eric Coissac	e9210e28a3	Release 4.4.44	2026-06-02 14:40:09 +02:00
Eric Coissac	13a93fce11	feat: add which_max and which_min to retrieve extreme element indices Implement reflection-based WhichMax and WhichMin to dynamically find the index or key of the maximum/minimum element in slices, arrays, or maps. Functions validate orderability, handle empty collections, and dispatch via reflect.Kind. Expose as which_max and which_min GVal functions, with float64 type assertions for compatibility and preserved error handling.	2026-06-02 14:39:31 +02:00
Eric Coissac	14064c919e	fix(obiutils): correctly unwrap interface values in min/max Introduces an `unwrapInterface` reflection helper to dereference `interface{}`-wrapped values before type validation. Updates slice and map iteration loops in min/max functions to apply this helper, ensuring `isOrderedKind` accurately identifies underlying concrete types instead of incorrectly rejecting `reflect.Interface` elements.	2026-06-02 14:34:00 +02:00
coissac	1dfd68aa6d	Merge pull request #117 from metabarcoding/push-wvlmzvomslzv Release 4.4.43	2026-06-01 14:14:40 +02:00
Eric Coissac	930fe5f1ba	Release 4.4.43	2026-06-01 13:22:58 +02:00
Eric Coissac	dcdaf9e372	feat: support map and slice types in OBI attributes Extends OBI header parsing to recognize and deserialize JSON-like arrays and objects. Introduces safe conversion utilities in `obiutils` to cast generic interface values into typed maps, and exposes them via new `BioSequence` methods. Header values are now marshaled, quote-normalized, and formatted for map and slice types.	2026-06-01 13:21:11 +02:00
Eric Coissac	af7ae3d60c	Correct Shannon entropy bias for canonical k-mers Multiple raw k-mers collapsing into identical circular canonical forms introduce bias into complexity estimates. This change pre-computes `log(class_size)` tables and per-word-size maximum entropy bounds. The `KmerEntropy` function and `KmerEntropyFilter` are updated to apply the corrected formula `(log(N) + Σf·log(s) - Σf·log(f))/N / emax`, ensuring accurate sequence complexity estimation.	2026-05-17 14:54:57 +08:00
Eric Coissac	cecf90fa40	feat: add min/max filtering and saturating subtraction utilities Introduce generic and reflection-based utilities for filtering slices and maps by minimum/maximum thresholds, along with saturating subtraction. The `obiutils` package provides type-safe generic implementations alongside dynamic reflection dispatchers to handle arbitrary ordered and numeric types. These are exposed as GVAL expression functions in `obiseq`, extending the language's built-in filtering and numeric capabilities.	2026-05-14 20:58:24 +08:00
Eric Coissac	a186bd1c92	fix: validate non-empty sequence IDs in FASTA and FASTQ writers Adds a pre-processing guard that checks for empty sequence identifiers before formatting. This prevents malformed FASTA output and stops downstream processing of invalid FASTQ data by terminating early. The check is placed before existing sequence-length validations to enforce non-empty IDs during batch processing.	2026-05-05 18:07:58 +02:00
coissac	46d60c1a44	Merge pull request #115 from metabarcoding/push-lkzqoskvyqtr [4.4.2] Enhanced taxonomy handling, input robustness & PCR tag validation	2026-04-30 16:59:49 +02:00
Eric Coissac	6c4a6c697c	[4.4.2] Enhanced taxonomy handling, input robustness & PCR tag validation - obiconvert: Added `--raw-taxid` mode to output numeric taxIDs without formatting (e.g., "12345" instead of ":tax:NCBI_0987@species"). Introduced `TaxNode.FullString()` to reliably return full formatted strings regardless of global settings, and improved fallback behavior when taxonomy DB is unavailable. - ngsfilter: Input fields (primers, sample tags/IDs) are now automatically trimmed of leading/trailing whitespace to prevent parsing failures from inconsistent formatting. - obitools (pcrtag): Mismatch-related fields (`forward_mismatches`, `reverse_mishaps`) renamed to "error" for consistency across annotation dictionaries. - obipairing & obtagpcr: Enforced mandatory paired-end file input (`--forward` and `reverse`) in obipairing; added CLI support for generating config templates via AskConfigTemplate(); removed redundant `Required()` constraints and introduced helper function CLIHasPairedFiles().	2026-04-30 16:57:45 +02:00
Eric Coissac	60b3753673	feat(obiconvert): add --raw-taxid option and refactor taxID formatting - Add new `--tax-id` mode (`obiconvert --raw-taxid`) to output bare numeric taxIDs instead of full-format strings. - Introduce `TaxNode.FullString()` to always return the complete "code:id [name]@rank" format, regardless of global `UseRawTaxids()` setting. - Update `.String(taxonomyCode)` to respect the global flag, returning bare ID when `--raw-taxid` is active. - Extract raw taxID from full-format strings in taxonomy methods when needed (e.g., fallback without loaded DB). - Add comprehensive test suite covering: a) `--raw-taxid` execution and idempotency b) full-format taxID output with `--taxonomy` c interaction of both flags d format validation - Add test data: new reference files `out_ecotag.fasta`, taxonomy.csv, and updated shell script.	2026-04-30 16:57:38 +02:00
Eric Coissac	14e2840a2d	[ngsfilter] Trim whitespace from primer and sample fields Trim leading/trailing whitespaces in forward/reverse primers, tags (via sample_tag), experiment andsample fields to prevent parsing errors due to formatting inconsistencies in input data.	2026-04-30 08:14:39 +02:00
Eric Coissac	42910c7db9	🔧 Rename mismatch fields to error in pcrtag.go - Renamed `obimultiplex_forward_mismatches` to "error" for consistency - Similarly renamed `obimultiplex_reverse_mismatches` to "error" - Applied changes in both annotation dictionaries (aanot, banot)	2026-04-29 15:29:25 +02:00
Eric Coissac	8b4cf677c6	[obitools] Add validation for paired files and config template support - Enforce requirement of both forward (-F) and reverse files in obipairing/main.go - Add config template support to obtagpcr via CLIAskConfigTemplate() - Remove redundant Required() constraints in options.go - Introduce new helper CLIHasPairedFiles()	2026-04-29 15:01:37 +02:00
coissac	02765f154f	Merge pull request #113 from metabarcoding/push-oxowomxlnlnx Push oxowomxlnlnx	2026-04-16 15:04:55 +02:00