From 8b4cf677c6bbf5bdbb8541e8be15685ed9375d61 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Wed, 29 Apr 2026 15:01:09 +0200 Subject: [PATCH 1/5] [obitools] Add validation for paired files and config template support - Enforce requirement of both forward (-F) and reverse files in obipairing/main.go - Add config template support to obtagpcr via CLIAskConfigTemplate() - Remove redundant Required() constraints in options.go - Introduce new helper CLIHasPairedFiles() --- cmd/obitools/obipairing/main.go | 5 +++++ cmd/obitools/obitagpcr/main.go | 13 +++++++++++++ pkg/obitools/obipairing/options.go | 6 ++++-- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/cmd/obitools/obipairing/main.go b/cmd/obitools/obipairing/main.go index 8f9efd5..098d47d 100644 --- a/cmd/obitools/obipairing/main.go +++ b/cmd/obitools/obipairing/main.go @@ -37,6 +37,11 @@ func main() { optionParser(os.Args) + if !obipairing.CLIHasPairedFiles() { + log.Error("You must provide both a forward file (-F) and a reverse file (-R)") + os.Exit(1) + } + obidefault.SetStrictReadWorker(2) obidefault.SetStrictWriteWorker(2) pairs, err := obipairing.CLIPairedSequence() diff --git a/cmd/obitools/obitagpcr/main.go b/cmd/obitools/obitagpcr/main.go index d63d557..a62f673 100644 --- a/cmd/obitools/obitagpcr/main.go +++ b/cmd/obitools/obitagpcr/main.go @@ -1,6 +1,7 @@ package main import ( + "fmt" "os" log "github.com/sirupsen/logrus" @@ -8,6 +9,7 @@ import ( "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obimultiplex" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obipairing" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obitagpcr" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils" @@ -39,6 +41,17 @@ func main() { obitagpcr.OptionSet) optionParser(os.Args) + + if obimultiplex.CLIAskConfigTemplate() { + fmt.Print(obimultiplex.CLIConfigTemplate()) + os.Exit(0) + } + + if !obipairing.CLIHasPairedFiles() { + log.Error("You must provide both a forward file (-F) and a reverse file (-R)") + os.Exit(1) + } + pairs, err := obipairing.CLIPairedSequence() if err != nil { diff --git a/pkg/obitools/obipairing/options.go b/pkg/obitools/obipairing/options.go index 0c524fd..3336042 100644 --- a/pkg/obitools/obipairing/options.go +++ b/pkg/obitools/obipairing/options.go @@ -21,12 +21,10 @@ func PairingOptionSet(options *getoptions.GetOpt) { options.StringVar(&_ForwardFile, "forward-reads", "", options.Alias("F"), options.ArgName("FILENAME_F"), - options.Required("You must provide at a forward file"), options.Description("The file names containing the forward reads")) options.StringVar(&_ReverseFile, "reverse-reads", "", options.Alias("R"), options.ArgName("FILENAME_R"), - options.Required("You must provide a reverse file"), options.Description("The file names containing the reverse reads")) options.IntVar(&_Delta, "delta", _Delta, options.Alias("D"), @@ -72,6 +70,10 @@ func CLIPairedSequence() (obiiter.IBioSequence, error) { return paired, nil } +func CLIHasPairedFiles() bool { + return _ForwardFile != "" && _ReverseFile != "" +} + func CLIDelta() int { return _Delta } From 42910c7db90cff1fe830c016faf59576f06c2b23 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Wed, 29 Apr 2026 15:29:21 +0200 Subject: [PATCH 2/5] :wrench: Rename mismatch fields to error in pcrtag.go - Renamed `obimultiplex_forward_mismatches` to "error" for consistency - Similarly renamed `obimultiplex_reverse_mismatches` to "error" - Applied changes in both annotation dictionaries (aanot, banot) --- pkg/obitools/obitagpcr/pcrtag.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/obitools/obitagpcr/pcrtag.go b/pkg/obitools/obitagpcr/pcrtag.go index 6215c2a..bd32402 100644 --- a/pkg/obitools/obitagpcr/pcrtag.go +++ b/pkg/obitools/obitagpcr/pcrtag.go @@ -114,10 +114,10 @@ func IPCRTagPESequencesBatch(iterator obiiter.IBioSequence, aanot["obimultiplex_direction"] = direction aanot["obimultiplex_forward_match"] = forward_match - aanot["obimultiplex_forward_mismatches"] = forward_mismatches + aanot["obimultiplex_forward_error"] = forward_mismatches aanot["obimultiplex_reverse_match"] = reverse_match - aanot["obimultiplex_reverse_mismatches"] = reverse_mismatches + aanot["obimultiplex_reverse_error"] = reverse_mismatches aanot["sample"] = sample aanot["experiment"] = experiment @@ -125,10 +125,10 @@ func IPCRTagPESequencesBatch(iterator obiiter.IBioSequence, banot["obimultiplex_direction"] = direction banot["obimultiplex_forward_match"] = forward_match - banot["obimultiplex_forward_mismatches"] = forward_mismatches + banot["obimultiplex_forward_error"] = forward_mismatches banot["obimultiplex_reverse_match"] = reverse_match - banot["obimultiplex_reverse_mismatches"] = reverse_mismatches + banot["obimultiplex_reverse_error"] = reverse_mismatches banot["sample"] = sample banot["experiment"] = experiment From 14e2840a2df289bde07439e5adbe89ec3ebf5d00 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Thu, 30 Apr 2026 08:14:10 +0200 Subject: [PATCH 3/5] [ngsfilter] Trim whitespace from primer and sample fields Trim leading/trailing whitespaces in forward/reverse primers, tags (via sample_tag), experiment andsample fields to prevent parsing errors due to formatting inconsistencies in input data. --- pkg/obiformats/ngsfilter_read.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/obiformats/ngsfilter_read.go b/pkg/obiformats/ngsfilter_read.go index 57075c0..1755fda 100644 --- a/pkg/obiformats/ngsfilter_read.go +++ b/pkg/obiformats/ngsfilter_read.go @@ -631,9 +631,9 @@ func ReadCSVNGSFilter(reader io.Reader) (*obingslibrary.NGSLibrary, error) { return nil, fmt.Errorf("row %d has %d columns, expected %d", len(data), len(fields), len(header)) } - forward_primer := fields[forward_primerColIndex] - reverse_primer := fields[reverse_primerColIndex] - tags := _parseMainNGSFilterTags(fields[sample_tagColIndex]) + forward_primer := strings.TrimSpace(fields[forward_primerColIndex]) + reverse_primer := strings.TrimSpace(fields[reverse_primerColIndex]) + tags := _parseMainNGSFilterTags(strings.TrimSpace(fields[sample_tagColIndex])) marker, _ := ngsfilter.GetMarker(forward_primer, reverse_primer) pcr, ok := marker.GetPCR(tags.Forward, tags.Reverse) @@ -644,8 +644,8 @@ func ReadCSVNGSFilter(reader io.Reader) (*obingslibrary.NGSLibrary, error) { i, tags.Forward, tags.Reverse, forward_primer, reverse_primer) } - pcr.Experiment = fields[experimentColIndex] - pcr.Sample = fields[sampleColIndex] + pcr.Experiment = strings.TrimSpace(fields[experimentColIndex]) + pcr.Sample = strings.TrimSpace(fields[sampleColIndex]) if extraColumns != nil { pcr.Annotations = make(obiseq.Annotation) From 60b37536736ad78a6ccbdf68cbe86a3cb090a441 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Thu, 30 Apr 2026 16:44:28 +0200 Subject: [PATCH 4/5] feat(obiconvert): add --raw-taxid option and refactor taxID formatting - Add new `--tax-id` mode (`obiconvert --raw-taxid`) to output bare numeric taxIDs instead of full-format strings. - Introduce `TaxNode.FullString()` to always return the complete "code:id [name]@rank" format, regardless of global `UseRawTaxids()` setting. - Update `.String(taxonomyCode)` to respect the global flag, returning bare ID when `--raw-taxid` is active. - Extract raw taxID from full-format strings in taxonomy methods when needed (e.g., fallback without loaded DB). - Add comprehensive test suite covering: a) `--raw-taxid` execution and idempotency b) full-format taxID output with `--taxonomy` c interaction of both flags d format validation - Add test data: new reference files `out_ecotag.fasta`, taxonomy.csv, and updated shell script. --- obitests/obitools/obiconvert/out_ecotag.fasta | 24 +++ obitests/obitools/obiconvert/taxonomy.csv | 48 ++++++ obitests/obitools/obiconvert/test.sh | 154 ++++++++++++++++-- obitests/obitools/obiconvert/xxx | 24 +++ pkg/obiseq/taxonomy_methods.go | 8 +- pkg/obitax/taxonnode.go | 32 ++-- 6 files changed, 261 insertions(+), 29 deletions(-) create mode 100644 obitests/obitools/obiconvert/out_ecotag.fasta create mode 100644 obitests/obitools/obiconvert/taxonomy.csv create mode 100644 obitests/obitools/obiconvert/xxx diff --git a/obitests/obitools/obiconvert/out_ecotag.fasta b/obitests/obitools/obiconvert/out_ecotag.fasta new file mode 100644 index 0000000..330e57a --- /dev/null +++ b/obitests/obitools/obiconvert/out_ecotag.fasta @@ -0,0 +1,24 @@ +>HELIUM_000100422_612GNAAXX:7:118:3572:14633#0/1_sub[28..126] {"count":10172,"merged_sample":{"26a_F040644":10172},"obitag_bestid":0.9797979797979798,"obitag_bestmatch":"AY227529","obitag_match_count":1,"obitag_rank":"genus","obitag_similarity_method":"lcs","taxid":"taxon:9992 [Marmota]@genus"} +ttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaaca +gcctgaaactcaaaggacttggcggtgctttacatccct +>HELIUM_000100422_612GNAAXX:7:99:9351:13090#0/1_sub[28..127] {"count":260,"merged_sample":{"29a_F260619":260},"obitag_bestid":0.9405940594059405,"obitag_bestmatch":"AF154263","obitag_match_count":9,"obitag_rank":"infraorder","obitag_similarity_method":"lcs","taxid":"taxon:35500 [Pecora]@infraorder"} +ttagccctaaacacaaataattacacaaacaaaattgttcaccagagtactagcggcaac +agcttaaaactcaaaggacttggcggtgctttataccctt +>HELIUM_000100422_612GNAAXX:7:108:10111:9078#0/1_sub[28..127] {"count":7146,"merged_sample":{"13a_F730603":7146},"obitag_bestid":1,"obitag_bestmatch":"AB245427","obitag_match_count":1,"obitag_rank":"species","obitag_similarity_method":"lcs","taxid":"taxon:9860 [Cervus elaphus]@species"} +ctagccttaaacacaaatagttatgcaaacaaaactattcgccagagtactaccggcaat +agcttaaaactcaaaggacttggcggtgctttataccctt +>HELIUM_000100422_612GNAAXX:7:38:14204:12725#0/1_sub[28..126] {"count":87,"merged_sample":{"26a_F040644":87},"obitag_bestid":0.9494949494949495,"obitag_bestmatch":"AY227530","obitag_match_count":2,"obitag_rank":"tribe","obitag_similarity_method":"lcs","taxid":"taxon:337730 [Marmotini]@tribe"} +ttagccctaaacataaacattcaataaacaagaatgttcgccagaggactactagcaata +gcttaaaactcaaaggacttggcggtgctttatatccct +>HELIUM_000100422_612GNAAXX:7:30:9942:4495#0/1_sub[28..126] {"count":95,"merged_sample":{"26a_F040644":11,"29a_F260619":84},"obitag_bestid":0.9595959595959596,"obitag_bestmatch":"AC187326","obitag_match_count":1,"obitag_rank":"subspecies","obitag_similarity_method":"lcs","taxid":"taxon:9615 [Canis lupus familiaris]@subspecies"} +ttagccctaaacataagctattccataacaaaataattcgccagagaactactagcaaca +gattaaacctcaaaggacttggcagtgctttatacccct +>HELIUM_000100422_612GNAAXX:7:51:16702:19393#0/1_sub[28..127] {"count":12004,"merged_sample":{"15a_F730814":7465,"29a_F260619":4539},"obitag_bestid":1,"obitag_bestmatch":"AJ885202","obitag_match_count":1,"obitag_rank":"species","obitag_similarity_method":"lcs","taxid":"taxon:9858 [Capreolus capreolus]@species"} +ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaat +agcttaaaactcaaaggacttggcggtgctttataccctt +>HELIUM_000100422_612GNAAXX:7:84:14502:1617#0/1_sub[28..127] {"count":319,"merged_sample":{"29a_F260619":319},"obitag_bestid":1,"obitag_bestmatch":"AJ972683","obitag_match_count":1,"obitag_rank":"species","obitag_similarity_method":"lcs","taxid":"taxon:9858 [Capreolus capreolus]@species"} +ttagccctaaacacaagtaattattataacaaaattattcgccagagtactaccggcaat +agcttaaaactcaaaggacttggcggtgctttataccctt +>HELIUM_000100422_612GNAAXX:7:50:10637:6527#0/1_sub[28..126] {"count":366,"merged_sample":{"13a_F730603":13,"15a_F730814":5,"26a_F040644":347,"29a_F260619":1},"obitag_bestid":1,"obitag_bestmatch":"AB048590","obitag_match_count":1,"obitag_rank":"genus","obitag_similarity_method":"lcs","taxid":"taxon:9611 [Canis]@genus"} +ttagccctaaacatagataattttacaacaaaataattcgccagaggactactagcaata +gcttaaaactcaaaggacttggcggtgctttatatccct diff --git a/obitests/obitools/obiconvert/taxonomy.csv b/obitests/obitools/obiconvert/taxonomy.csv new file mode 100644 index 0000000..e3ebe26 --- /dev/null +++ b/obitests/obitools/obiconvert/taxonomy.csv @@ -0,0 +1,48 @@ +taxid,parent,taxonomic_rank,scientific_name +taxon:1 [root]@no rank,taxon:1 [root]@no rank,no rank,root +taxon:131567 [cellular organisms]@cellular root,taxon:1 [root]@no rank,cellular root,cellular organisms +taxon:2759 [Eukaryota]@domain,taxon:131567 [cellular organisms]@cellular root,domain,Eukaryota +taxon:33154 [Opisthokonta]@clade,taxon:2759 [Eukaryota]@domain,clade,Opisthokonta +taxon:33208 [Metazoa]@kingdom,taxon:33154 [Opisthokonta]@clade,kingdom,Metazoa +taxon:6072 [Eumetazoa]@clade,taxon:33208 [Metazoa]@kingdom,clade,Eumetazoa +taxon:33213 [Bilateria]@clade,taxon:6072 [Eumetazoa]@clade,clade,Bilateria +taxon:33511 [Deuterostomia]@clade,taxon:33213 [Bilateria]@clade,clade,Deuterostomia +taxon:7711 [Chordata]@phylum,taxon:33511 [Deuterostomia]@clade,phylum,Chordata +taxon:89593 [Craniata]@subphylum,taxon:7711 [Chordata]@phylum,subphylum,Craniata +taxon:7742 [Vertebrata]@clade,taxon:89593 [Craniata]@subphylum,clade,Vertebrata +taxon:7776 [Gnathostomata]@clade,taxon:7742 [Vertebrata]@clade,clade,Gnathostomata +taxon:117570 [Teleostomi]@clade,taxon:7776 [Gnathostomata]@clade,clade,Teleostomi +taxon:117571 [Euteleostomi]@clade,taxon:117570 [Teleostomi]@clade,clade,Euteleostomi +taxon:8287 [Sarcopterygii]@superclass,taxon:117571 [Euteleostomi]@clade,superclass,Sarcopterygii +taxon:1338369 [Dipnotetrapodomorpha]@clade,taxon:8287 [Sarcopterygii]@superclass,clade,Dipnotetrapodomorpha +taxon:32523 [Tetrapoda]@clade,taxon:1338369 [Dipnotetrapodomorpha]@clade,clade,Tetrapoda +taxon:32524 [Amniota]@clade,taxon:32523 [Tetrapoda]@clade,clade,Amniota +taxon:40674 [Mammalia]@class,taxon:32524 [Amniota]@clade,class,Mammalia +taxon:32525 [Theria]@clade,taxon:40674 [Mammalia]@class,clade,Theria +taxon:9347 [Eutheria]@clade,taxon:32525 [Theria]@clade,clade,Eutheria +taxon:1437010 [Boreoeutheria]@clade,taxon:9347 [Eutheria]@clade,clade,Boreoeutheria +taxon:314146 [Euarchontoglires]@superorder,taxon:1437010 [Boreoeutheria]@clade,superorder,Euarchontoglires +taxon:314145 [Laurasiatheria]@superorder,taxon:1437010 [Boreoeutheria]@clade,superorder,Laurasiatheria +taxon:33554 [Carnivora]@order,taxon:314145 [Laurasiatheria]@superorder,order,Carnivora +taxon:91561 [Artiodactyla]@order,taxon:314145 [Laurasiatheria]@superorder,order,Artiodactyla +taxon:314147 [Glires]@clade,taxon:314146 [Euarchontoglires]@superorder,clade,Glires +taxon:9845 [Ruminantia]@suborder,taxon:91561 [Artiodactyla]@order,suborder,Ruminantia +taxon:35500 [Pecora]@infraorder,taxon:9845 [Ruminantia]@suborder,infraorder,Pecora +taxon:9989 [Rodentia]@order,taxon:314147 [Glires]@clade,order,Rodentia +taxon:379584 [Caniformia]@suborder,taxon:33554 [Carnivora]@order,suborder,Caniformia +taxon:9608 [Canidae]@family,taxon:379584 [Caniformia]@suborder,family,Canidae +taxon:9850 [Cervidae]@family,taxon:35500 [Pecora]@infraorder,family,Cervidae +taxon:9881 [Odocoileinae]@subfamily,taxon:9850 [Cervidae]@family,subfamily,Odocoileinae +taxon:33553 [Sciuromorpha]@suborder,taxon:9989 [Rodentia]@order,suborder,Sciuromorpha +taxon:55153 [Sciuridae]@family,taxon:33553 [Sciuromorpha]@suborder,family,Sciuridae +taxon:34878 [Cervinae]@subfamily,taxon:9850 [Cervidae]@family,subfamily,Cervinae +taxon:9611 [Canis]@genus,taxon:9608 [Canidae]@family,genus,Canis +taxon:9857 [Capreolus]@genus,taxon:9881 [Odocoileinae]@subfamily,genus,Capreolus +taxon:9612 [Canis lupus]@species,taxon:9611 [Canis]@genus,species,Canis lupus +taxon:337726 [Xerinae]@subfamily,taxon:55153 [Sciuridae]@family,subfamily,Xerinae +taxon:9859 [Cervus]@genus,taxon:34878 [Cervinae]@subfamily,genus,Cervus +taxon:337730 [Marmotini]@tribe,taxon:337726 [Xerinae]@subfamily,tribe,Marmotini +taxon:9992 [Marmota]@genus,taxon:337730 [Marmotini]@tribe,genus,Marmota +taxon:9860 [Cervus elaphus]@species,taxon:9859 [Cervus]@genus,species,Cervus elaphus +taxon:9615 [Canis lupus familiaris]@subspecies,taxon:9612 [Canis lupus]@species,subspecies,Canis lupus familiaris +taxon:9858 [Capreolus capreolus]@species,taxon:9857 [Capreolus]@genus,species,Capreolus capreolus diff --git a/obitests/obitools/obiconvert/test.sh b/obitests/obitools/obiconvert/test.sh index 7fd72c1..a55cdec 100755 --- a/obitests/obitools/obiconvert/test.sh +++ b/obitests/obitools/obiconvert/test.sh @@ -44,7 +44,7 @@ cleanup() { rm -rf "$TMPDIR" # Suppress the temporary directory if [ $failed -gt 0 ]; then - log "$TEST_NAME tests failed" + log "$TEST_NAME tests failed" log log exit 1 @@ -60,10 +60,10 @@ log() { echo -e "[$TEST_NAME @ $(date)] $*" 1>&2 } -log "Testing $TEST_NAME..." -log "Test directory is $TEST_DIR" -log "obitools directory is $OBITOOLS_DIR" -log "Temporary directory is $TMPDIR" +log "Testing $TEST_NAME..." +log "Test directory is $TEST_DIR" +log "obitools directory is $OBITOOLS_DIR" +log "Temporary directory is $TMPDIR" log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)" ###################################################################### @@ -94,12 +94,12 @@ log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)" ((ntest++)) -if $CMD -h > "${TMPDIR}/help.txt" 2>&1 +if $CMD -h > "${TMPDIR}/help.txt" 2>&1 then - log "$MCMD: printing help OK" + log "$MCMD: printing help OK" ((success++)) else - log "$MCMD: printing help failed" + log "$MCMD: printing help failed" ((failed++)) fi @@ -108,15 +108,15 @@ fi if obiconvert -Z "${TEST_DIR}/gbpln1088.4Mb.fasta.gz" \ > "${TMPDIR}/xxx.fasta.gz" && \ zdiff "${TEST_DIR}/gbpln1088.4Mb.fasta.gz" \ - "${TMPDIR}/xxx.fasta.gz" + "${TMPDIR}/xxx.fasta.gz" then - log "$MCMD: converting large fasta file to fasta OK" + log "$MCMD: converting large fasta file to fasta OK" ((success++)) else - log "$MCMD: converting large fasta file to fasta failed" + log "$MCMD: converting large fasta file to fasta failed" ((failed++)) fi - + ((ntest++)) if obiconvert -Z --fastq-output \ "${TEST_DIR}/gbpln1088.4Mb.fasta.gz" \ @@ -125,15 +125,139 @@ if obiconvert -Z --fastq-output \ "${TMPDIR}/xxx.fastq.gz" \ > "${TMPDIR}/yyy.fasta.gz" && \ zdiff "${TEST_DIR}/gbpln1088.4Mb.fasta.gz" \ - "${TMPDIR}/yyy.fasta.gz" + "${TMPDIR}/yyy.fasta.gz" then - log "$MCMD: converting large file between fasta and fastq OK" + log "$MCMD: converting large file between fasta and fastq OK" ((success++)) else - log "$MCMD: converting large file between fasta and fastq failed" + log "$MCMD: converting large file between fasta and fastq failed" ((failed++)) fi + +# ------------------------------------------------------------------ +# --raw-taxid tests (no taxonomy loaded) +# ------------------------------------------------------------------ + +# Running test +((ntest++)) +if obiconvert --raw-taxid "${TEST_DIR}/out_ecotag.fasta" \ + > "${TMPDIR}/raw_taxid.fasta" 2>/dev/null +then + log "$MCMD --raw-taxid: running OK" + ((success++)) +else + log "$MCMD --raw-taxid: running failed" + ((failed++)) +fi + +# Taxids must be bare numbers — no full-format "taxon:ID [Name]@rank" strings +((ntest++)) +if grep '"taxid"' "${TMPDIR}/raw_taxid.fasta" | grep -qv '"taxid":"[0-9][0-9]*"' +then + log "$MCMD --raw-taxid: taxid format check failed (full-format taxid found)" + ((failed++)) +else + log "$MCMD --raw-taxid: taxid format OK (all taxids are bare numbers)" + ((success++)) +fi + +# --raw-taxid is idempotent: piping through a second obiconvert --raw-taxid must +# produce bit-for-bit identical output. +((ntest++)) +if obiconvert --raw-taxid "${TMPDIR}/raw_taxid.fasta" \ + > "${TMPDIR}/raw_taxid2.fasta" 2>/dev/null +then + log "$MCMD --raw-taxid piped: running OK" + ((success++)) +else + log "$MCMD --raw-taxid piped: running failed" + ((failed++)) +fi + +((ntest++)) +if diff "${TMPDIR}/raw_taxid.fasta" \ + "${TMPDIR}/raw_taxid2.fasta" > /dev/null +then + log "$MCMD --raw-taxid piped: idempotency OK" + ((success++)) +else + log "$MCMD --raw-taxid piped: idempotency failed (outputs differ)" + ((failed++)) +fi + + +# ------------------------------------------------------------------ +# --taxonomy tests (full-format taxid, no --raw-taxid) +# ------------------------------------------------------------------ + +# Running test +((ntest++)) +if obiconvert --taxonomy "${TEST_DIR}/taxonomy.csv" \ + "${TEST_DIR}/out_ecotag.fasta" \ + > "${TMPDIR}/taxo.fasta" 2>/dev/null +then + log "$MCMD --taxonomy: running OK" + ((success++)) +else + log "$MCMD --taxonomy: running failed" + ((failed++)) +fi + +# Taxids must be in full "taxon:ID [Name]@rank" format +((ntest++)) +if grep '"taxid"' "${TMPDIR}/taxo.fasta" | grep -q '"taxid":"taxon:[0-9]' +then + log "$MCMD --taxonomy: taxid format OK (full-format taxids present)" + ((success++)) +else + log "$MCMD --taxonomy: taxid format check failed (no full-format taxid found)" + ((failed++)) +fi + + +# ------------------------------------------------------------------ +# --raw-taxid --taxonomy tests +# ------------------------------------------------------------------ + +# Running test +((ntest++)) +if obiconvert --raw-taxid --taxonomy "${TEST_DIR}/taxonomy.csv" \ + "${TEST_DIR}/out_ecotag.fasta" \ + > "${TMPDIR}/raw_taxid_taxo.fasta" 2>/dev/null +then + log "$MCMD --raw-taxid --taxonomy: running OK" + ((success++)) +else + log "$MCMD --raw-taxid --taxonomy: running failed" + ((failed++)) +fi + +# Taxids must be bare numbers even when taxonomy is loaded +((ntest++)) +if grep '"taxid"' "${TMPDIR}/raw_taxid_taxo.fasta" | grep -qv '"taxid":"[0-9][0-9]*"' +then + log "$MCMD --raw-taxid --taxonomy: taxid format check failed (full-format taxid found)" + ((failed++)) +else + log "$MCMD --raw-taxid --taxonomy: taxid format OK (all taxids are bare numbers)" + ((success++)) +fi + +# --raw-taxid with or without taxonomy must yield identical taxid values +((ntest++)) +if diff <(grep '"taxid"' "${TMPDIR}/raw_taxid.fasta" | grep -o '"taxid":"[^"]*"' | sort) \ + <(grep '"taxid"' "${TMPDIR}/raw_taxid_taxo.fasta" | grep -o '"taxid":"[^"]*"' | sort) \ + > /dev/null +then + log "$MCMD --raw-taxid vs --raw-taxid --taxonomy: taxid values match OK" + ((success++)) +else + log "$MCMD --raw-taxid vs --raw-taxid --taxonomy: taxid values differ (unexpected)" + ((failed++)) +fi + + ######################################### # # At the end of the tests diff --git a/obitests/obitools/obiconvert/xxx b/obitests/obitools/obiconvert/xxx new file mode 100644 index 0000000..330e57a --- /dev/null +++ b/obitests/obitools/obiconvert/xxx @@ -0,0 +1,24 @@ +>HELIUM_000100422_612GNAAXX:7:118:3572:14633#0/1_sub[28..126] {"count":10172,"merged_sample":{"26a_F040644":10172},"obitag_bestid":0.9797979797979798,"obitag_bestmatch":"AY227529","obitag_match_count":1,"obitag_rank":"genus","obitag_similarity_method":"lcs","taxid":"taxon:9992 [Marmota]@genus"} +ttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaaca +gcctgaaactcaaaggacttggcggtgctttacatccct +>HELIUM_000100422_612GNAAXX:7:99:9351:13090#0/1_sub[28..127] {"count":260,"merged_sample":{"29a_F260619":260},"obitag_bestid":0.9405940594059405,"obitag_bestmatch":"AF154263","obitag_match_count":9,"obitag_rank":"infraorder","obitag_similarity_method":"lcs","taxid":"taxon:35500 [Pecora]@infraorder"} +ttagccctaaacacaaataattacacaaacaaaattgttcaccagagtactagcggcaac +agcttaaaactcaaaggacttggcggtgctttataccctt +>HELIUM_000100422_612GNAAXX:7:108:10111:9078#0/1_sub[28..127] {"count":7146,"merged_sample":{"13a_F730603":7146},"obitag_bestid":1,"obitag_bestmatch":"AB245427","obitag_match_count":1,"obitag_rank":"species","obitag_similarity_method":"lcs","taxid":"taxon:9860 [Cervus elaphus]@species"} +ctagccttaaacacaaatagttatgcaaacaaaactattcgccagagtactaccggcaat +agcttaaaactcaaaggacttggcggtgctttataccctt +>HELIUM_000100422_612GNAAXX:7:38:14204:12725#0/1_sub[28..126] {"count":87,"merged_sample":{"26a_F040644":87},"obitag_bestid":0.9494949494949495,"obitag_bestmatch":"AY227530","obitag_match_count":2,"obitag_rank":"tribe","obitag_similarity_method":"lcs","taxid":"taxon:337730 [Marmotini]@tribe"} +ttagccctaaacataaacattcaataaacaagaatgttcgccagaggactactagcaata +gcttaaaactcaaaggacttggcggtgctttatatccct +>HELIUM_000100422_612GNAAXX:7:30:9942:4495#0/1_sub[28..126] {"count":95,"merged_sample":{"26a_F040644":11,"29a_F260619":84},"obitag_bestid":0.9595959595959596,"obitag_bestmatch":"AC187326","obitag_match_count":1,"obitag_rank":"subspecies","obitag_similarity_method":"lcs","taxid":"taxon:9615 [Canis lupus familiaris]@subspecies"} +ttagccctaaacataagctattccataacaaaataattcgccagagaactactagcaaca +gattaaacctcaaaggacttggcagtgctttatacccct +>HELIUM_000100422_612GNAAXX:7:51:16702:19393#0/1_sub[28..127] {"count":12004,"merged_sample":{"15a_F730814":7465,"29a_F260619":4539},"obitag_bestid":1,"obitag_bestmatch":"AJ885202","obitag_match_count":1,"obitag_rank":"species","obitag_similarity_method":"lcs","taxid":"taxon:9858 [Capreolus capreolus]@species"} +ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaat +agcttaaaactcaaaggacttggcggtgctttataccctt +>HELIUM_000100422_612GNAAXX:7:84:14502:1617#0/1_sub[28..127] {"count":319,"merged_sample":{"29a_F260619":319},"obitag_bestid":1,"obitag_bestmatch":"AJ972683","obitag_match_count":1,"obitag_rank":"species","obitag_similarity_method":"lcs","taxid":"taxon:9858 [Capreolus capreolus]@species"} +ttagccctaaacacaagtaattattataacaaaattattcgccagagtactaccggcaat +agcttaaaactcaaaggacttggcggtgctttataccctt +>HELIUM_000100422_612GNAAXX:7:50:10637:6527#0/1_sub[28..126] {"count":366,"merged_sample":{"13a_F730603":13,"15a_F730814":5,"26a_F040644":347,"29a_F260619":1},"obitag_bestid":1,"obitag_bestmatch":"AB048590","obitag_match_count":1,"obitag_rank":"genus","obitag_similarity_method":"lcs","taxid":"taxon:9611 [Canis]@genus"} +ttagccctaaacatagataattttacaacaaaataattcgccagaggactactagcaata +gcttaaaactcaaaggacttggcggtgctttatatccct diff --git a/pkg/obiseq/taxonomy_methods.go b/pkg/obiseq/taxonomy_methods.go index ef1b0d9..e11356d 100644 --- a/pkg/obiseq/taxonomy_methods.go +++ b/pkg/obiseq/taxonomy_methods.go @@ -70,6 +70,12 @@ func (s *BioSequence) SetTaxid(taxid string, rank ...string) { } } + } else if obidefault.UseRawTaxids() { + // Without a loaded taxonomy, extract the bare ID from full-format strings + // like "code:12345 [Name]@rank" so that --raw-taxid is honoured everywhere. + if _, rawID, _, _, parseErr := obitax.ParseTaxonString(taxid); parseErr == nil { + taxid = rawID + } } } @@ -177,7 +183,7 @@ func (sequence *BioSequence) SetPath(taxonomy *obitax.Taxonomy) []string { lpath := path.Len() - 1 for i := lpath; i >= 0; i-- { - spath[lpath-i] = path.Get(i).String(taxonomy.Code()) + spath[lpath-i] = path.Get(i).FullString(taxonomy.Code()) } sequence.SetAttribute("taxonomic_path", spath) diff --git a/pkg/obitax/taxonnode.go b/pkg/obitax/taxonnode.go index e38566c..72bb7e1 100644 --- a/pkg/obitax/taxonnode.go +++ b/pkg/obitax/taxonnode.go @@ -29,6 +29,24 @@ type TaxNode struct { alternatenames *map[*string]*string } +// FullString returns the full string representation of the TaxNode in the form +// "taxonomyCode:id [scientificName]@rank", regardless of the UseRawTaxids setting. +// This is used internally when a parseable format is required (e.g. taxonomic_path). +func (node *TaxNode) FullString(taxonomyCode string) string { + if node.HasScientificName() { + return fmt.Sprintf("%s:%v [%s]@%s", + taxonomyCode, + *node.id, + node.ScientificName(), + node.Rank(), + ) + } + + return fmt.Sprintf("%s:%v", + taxonomyCode, + *node.id) +} + // String returns a string representation of the TaxNode, including the taxonomy code, // the node ID, and the scientific name. The output format is "taxonomyCode:id [scientificName]". // @@ -42,19 +60,7 @@ func (node *TaxNode) String(taxonomyCode string) string { return *node.id } - if node.HasScientificName() { - return fmt.Sprintf("%s:%v [%s]@%s", - taxonomyCode, - *node.id, - node.ScientificName(), - node.Rank(), - ) - } - - return fmt.Sprintf("%s:%v", - taxonomyCode, - *node.id) - + return node.FullString(taxonomyCode) } // Id returns the unique identifier of the TaxNode. From 6c4a6c697c6f460acff64c2e6de5f7b8733f6871 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Thu, 30 Apr 2026 16:57:45 +0200 Subject: [PATCH 5/5] [4.4.2] Enhanced taxonomy handling, input robustness & PCR tag validation - **obiconvert**: Added `--raw-taxid` mode to output numeric taxIDs without formatting (e.g., "12345" instead of ":tax:NCBI_0987@species"). Introduced `TaxNode.FullString()` to reliably return full formatted strings regardless of global settings, and improved fallback behavior when taxonomy DB is unavailable. - **ngsfilter**: Input fields (primers, sample tags/IDs) are now automatically trimmed of leading/trailing whitespace to prevent parsing failures from inconsistent formatting. - **obitools (pcrtag)**: Mismatch-related fields (`forward_mismatches`, `reverse_mishaps`) renamed to "error" for consistency across annotation dictionaries. - **obipairing & obtagpcr**: Enforced mandatory paired-end file input (`--forward` and `reverse`) in obipairing; added CLI support for generating config templates via AskConfigTemplate(); removed redundant `Required()` constraints and introduced helper function CLIHasPairedFiles(). --- pkg/obioptions/version.go | 2 +- version.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/obioptions/version.go b/pkg/obioptions/version.go index 3a47141..ef7733c 100644 --- a/pkg/obioptions/version.go +++ b/pkg/obioptions/version.go @@ -3,7 +3,7 @@ package obioptions // Version is automatically updated by the Makefile from version.txt // The patch number (third digit) is incremented on each push to the repository -var _Version = "Release 4.4.41" +var _Version = "Release 4.4.42" // Version returns the version of the obitools package. // diff --git a/version.txt b/version.txt index 8be79b9..cdc0aa1 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -4.4.41 +4.4.42