Merge pull request #115 from metabarcoding/push-lkzqoskvyqtr

[4.4.2] Enhanced taxonomy handling, input robustness & PCR tag validation
This commit is contained in:
coissac
2026-04-30 16:59:49 +02:00
committed by GitHub
13 changed files with 294 additions and 42 deletions
+5
View File
@@ -37,6 +37,11 @@ func main() {
optionParser(os.Args)
if !obipairing.CLIHasPairedFiles() {
log.Error("You must provide both a forward file (-F) and a reverse file (-R)")
os.Exit(1)
}
obidefault.SetStrictReadWorker(2)
obidefault.SetStrictWriteWorker(2)
pairs, err := obipairing.CLIPairedSequence()
+13
View File
@@ -1,6 +1,7 @@
package main
import (
"fmt"
"os"
log "github.com/sirupsen/logrus"
@@ -8,6 +9,7 @@ import (
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obimultiplex"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obipairing"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obitagpcr"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
@@ -39,6 +41,17 @@ func main() {
obitagpcr.OptionSet)
optionParser(os.Args)
if obimultiplex.CLIAskConfigTemplate() {
fmt.Print(obimultiplex.CLIConfigTemplate())
os.Exit(0)
}
if !obipairing.CLIHasPairedFiles() {
log.Error("You must provide both a forward file (-F) and a reverse file (-R)")
os.Exit(1)
}
pairs, err := obipairing.CLIPairedSequence()
if err != nil {
@@ -0,0 +1,24 @@
>HELIUM_000100422_612GNAAXX:7:118:3572:14633#0/1_sub[28..126] {"count":10172,"merged_sample":{"26a_F040644":10172},"obitag_bestid":0.9797979797979798,"obitag_bestmatch":"AY227529","obitag_match_count":1,"obitag_rank":"genus","obitag_similarity_method":"lcs","taxid":"taxon:9992 [Marmota]@genus"}
ttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaaca
gcctgaaactcaaaggacttggcggtgctttacatccct
>HELIUM_000100422_612GNAAXX:7:99:9351:13090#0/1_sub[28..127] {"count":260,"merged_sample":{"29a_F260619":260},"obitag_bestid":0.9405940594059405,"obitag_bestmatch":"AF154263","obitag_match_count":9,"obitag_rank":"infraorder","obitag_similarity_method":"lcs","taxid":"taxon:35500 [Pecora]@infraorder"}
ttagccctaaacacaaataattacacaaacaaaattgttcaccagagtactagcggcaac
agcttaaaactcaaaggacttggcggtgctttataccctt
>HELIUM_000100422_612GNAAXX:7:108:10111:9078#0/1_sub[28..127] {"count":7146,"merged_sample":{"13a_F730603":7146},"obitag_bestid":1,"obitag_bestmatch":"AB245427","obitag_match_count":1,"obitag_rank":"species","obitag_similarity_method":"lcs","taxid":"taxon:9860 [Cervus elaphus]@species"}
ctagccttaaacacaaatagttatgcaaacaaaactattcgccagagtactaccggcaat
agcttaaaactcaaaggacttggcggtgctttataccctt
>HELIUM_000100422_612GNAAXX:7:38:14204:12725#0/1_sub[28..126] {"count":87,"merged_sample":{"26a_F040644":87},"obitag_bestid":0.9494949494949495,"obitag_bestmatch":"AY227530","obitag_match_count":2,"obitag_rank":"tribe","obitag_similarity_method":"lcs","taxid":"taxon:337730 [Marmotini]@tribe"}
ttagccctaaacataaacattcaataaacaagaatgttcgccagaggactactagcaata
gcttaaaactcaaaggacttggcggtgctttatatccct
>HELIUM_000100422_612GNAAXX:7:30:9942:4495#0/1_sub[28..126] {"count":95,"merged_sample":{"26a_F040644":11,"29a_F260619":84},"obitag_bestid":0.9595959595959596,"obitag_bestmatch":"AC187326","obitag_match_count":1,"obitag_rank":"subspecies","obitag_similarity_method":"lcs","taxid":"taxon:9615 [Canis lupus familiaris]@subspecies"}
ttagccctaaacataagctattccataacaaaataattcgccagagaactactagcaaca
gattaaacctcaaaggacttggcagtgctttatacccct
>HELIUM_000100422_612GNAAXX:7:51:16702:19393#0/1_sub[28..127] {"count":12004,"merged_sample":{"15a_F730814":7465,"29a_F260619":4539},"obitag_bestid":1,"obitag_bestmatch":"AJ885202","obitag_match_count":1,"obitag_rank":"species","obitag_similarity_method":"lcs","taxid":"taxon:9858 [Capreolus capreolus]@species"}
ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaat
agcttaaaactcaaaggacttggcggtgctttataccctt
>HELIUM_000100422_612GNAAXX:7:84:14502:1617#0/1_sub[28..127] {"count":319,"merged_sample":{"29a_F260619":319},"obitag_bestid":1,"obitag_bestmatch":"AJ972683","obitag_match_count":1,"obitag_rank":"species","obitag_similarity_method":"lcs","taxid":"taxon:9858 [Capreolus capreolus]@species"}
ttagccctaaacacaagtaattattataacaaaattattcgccagagtactaccggcaat
agcttaaaactcaaaggacttggcggtgctttataccctt
>HELIUM_000100422_612GNAAXX:7:50:10637:6527#0/1_sub[28..126] {"count":366,"merged_sample":{"13a_F730603":13,"15a_F730814":5,"26a_F040644":347,"29a_F260619":1},"obitag_bestid":1,"obitag_bestmatch":"AB048590","obitag_match_count":1,"obitag_rank":"genus","obitag_similarity_method":"lcs","taxid":"taxon:9611 [Canis]@genus"}
ttagccctaaacatagataattttacaacaaaataattcgccagaggactactagcaata
gcttaaaactcaaaggacttggcggtgctttatatccct
+48
View File
@@ -0,0 +1,48 @@
taxid,parent,taxonomic_rank,scientific_name
taxon:1 [root]@no rank,taxon:1 [root]@no rank,no rank,root
taxon:131567 [cellular organisms]@cellular root,taxon:1 [root]@no rank,cellular root,cellular organisms
taxon:2759 [Eukaryota]@domain,taxon:131567 [cellular organisms]@cellular root,domain,Eukaryota
taxon:33154 [Opisthokonta]@clade,taxon:2759 [Eukaryota]@domain,clade,Opisthokonta
taxon:33208 [Metazoa]@kingdom,taxon:33154 [Opisthokonta]@clade,kingdom,Metazoa
taxon:6072 [Eumetazoa]@clade,taxon:33208 [Metazoa]@kingdom,clade,Eumetazoa
taxon:33213 [Bilateria]@clade,taxon:6072 [Eumetazoa]@clade,clade,Bilateria
taxon:33511 [Deuterostomia]@clade,taxon:33213 [Bilateria]@clade,clade,Deuterostomia
taxon:7711 [Chordata]@phylum,taxon:33511 [Deuterostomia]@clade,phylum,Chordata
taxon:89593 [Craniata]@subphylum,taxon:7711 [Chordata]@phylum,subphylum,Craniata
taxon:7742 [Vertebrata]@clade,taxon:89593 [Craniata]@subphylum,clade,Vertebrata
taxon:7776 [Gnathostomata]@clade,taxon:7742 [Vertebrata]@clade,clade,Gnathostomata
taxon:117570 [Teleostomi]@clade,taxon:7776 [Gnathostomata]@clade,clade,Teleostomi
taxon:117571 [Euteleostomi]@clade,taxon:117570 [Teleostomi]@clade,clade,Euteleostomi
taxon:8287 [Sarcopterygii]@superclass,taxon:117571 [Euteleostomi]@clade,superclass,Sarcopterygii
taxon:1338369 [Dipnotetrapodomorpha]@clade,taxon:8287 [Sarcopterygii]@superclass,clade,Dipnotetrapodomorpha
taxon:32523 [Tetrapoda]@clade,taxon:1338369 [Dipnotetrapodomorpha]@clade,clade,Tetrapoda
taxon:32524 [Amniota]@clade,taxon:32523 [Tetrapoda]@clade,clade,Amniota
taxon:40674 [Mammalia]@class,taxon:32524 [Amniota]@clade,class,Mammalia
taxon:32525 [Theria]@clade,taxon:40674 [Mammalia]@class,clade,Theria
taxon:9347 [Eutheria]@clade,taxon:32525 [Theria]@clade,clade,Eutheria
taxon:1437010 [Boreoeutheria]@clade,taxon:9347 [Eutheria]@clade,clade,Boreoeutheria
taxon:314146 [Euarchontoglires]@superorder,taxon:1437010 [Boreoeutheria]@clade,superorder,Euarchontoglires
taxon:314145 [Laurasiatheria]@superorder,taxon:1437010 [Boreoeutheria]@clade,superorder,Laurasiatheria
taxon:33554 [Carnivora]@order,taxon:314145 [Laurasiatheria]@superorder,order,Carnivora
taxon:91561 [Artiodactyla]@order,taxon:314145 [Laurasiatheria]@superorder,order,Artiodactyla
taxon:314147 [Glires]@clade,taxon:314146 [Euarchontoglires]@superorder,clade,Glires
taxon:9845 [Ruminantia]@suborder,taxon:91561 [Artiodactyla]@order,suborder,Ruminantia
taxon:35500 [Pecora]@infraorder,taxon:9845 [Ruminantia]@suborder,infraorder,Pecora
taxon:9989 [Rodentia]@order,taxon:314147 [Glires]@clade,order,Rodentia
taxon:379584 [Caniformia]@suborder,taxon:33554 [Carnivora]@order,suborder,Caniformia
taxon:9608 [Canidae]@family,taxon:379584 [Caniformia]@suborder,family,Canidae
taxon:9850 [Cervidae]@family,taxon:35500 [Pecora]@infraorder,family,Cervidae
taxon:9881 [Odocoileinae]@subfamily,taxon:9850 [Cervidae]@family,subfamily,Odocoileinae
taxon:33553 [Sciuromorpha]@suborder,taxon:9989 [Rodentia]@order,suborder,Sciuromorpha
taxon:55153 [Sciuridae]@family,taxon:33553 [Sciuromorpha]@suborder,family,Sciuridae
taxon:34878 [Cervinae]@subfamily,taxon:9850 [Cervidae]@family,subfamily,Cervinae
taxon:9611 [Canis]@genus,taxon:9608 [Canidae]@family,genus,Canis
taxon:9857 [Capreolus]@genus,taxon:9881 [Odocoileinae]@subfamily,genus,Capreolus
taxon:9612 [Canis lupus]@species,taxon:9611 [Canis]@genus,species,Canis lupus
taxon:337726 [Xerinae]@subfamily,taxon:55153 [Sciuridae]@family,subfamily,Xerinae
taxon:9859 [Cervus]@genus,taxon:34878 [Cervinae]@subfamily,genus,Cervus
taxon:337730 [Marmotini]@tribe,taxon:337726 [Xerinae]@subfamily,tribe,Marmotini
taxon:9992 [Marmota]@genus,taxon:337730 [Marmotini]@tribe,genus,Marmota
taxon:9860 [Cervus elaphus]@species,taxon:9859 [Cervus]@genus,species,Cervus elaphus
taxon:9615 [Canis lupus familiaris]@subspecies,taxon:9612 [Canis lupus]@species,subspecies,Canis lupus familiaris
taxon:9858 [Capreolus capreolus]@species,taxon:9857 [Capreolus]@genus,species,Capreolus capreolus
1 taxid parent taxonomic_rank scientific_name
2 taxon:1 [root]@no rank taxon:1 [root]@no rank no rank root
3 taxon:131567 [cellular organisms]@cellular root taxon:1 [root]@no rank cellular root cellular organisms
4 taxon:2759 [Eukaryota]@domain taxon:131567 [cellular organisms]@cellular root domain Eukaryota
5 taxon:33154 [Opisthokonta]@clade taxon:2759 [Eukaryota]@domain clade Opisthokonta
6 taxon:33208 [Metazoa]@kingdom taxon:33154 [Opisthokonta]@clade kingdom Metazoa
7 taxon:6072 [Eumetazoa]@clade taxon:33208 [Metazoa]@kingdom clade Eumetazoa
8 taxon:33213 [Bilateria]@clade taxon:6072 [Eumetazoa]@clade clade Bilateria
9 taxon:33511 [Deuterostomia]@clade taxon:33213 [Bilateria]@clade clade Deuterostomia
10 taxon:7711 [Chordata]@phylum taxon:33511 [Deuterostomia]@clade phylum Chordata
11 taxon:89593 [Craniata]@subphylum taxon:7711 [Chordata]@phylum subphylum Craniata
12 taxon:7742 [Vertebrata]@clade taxon:89593 [Craniata]@subphylum clade Vertebrata
13 taxon:7776 [Gnathostomata]@clade taxon:7742 [Vertebrata]@clade clade Gnathostomata
14 taxon:117570 [Teleostomi]@clade taxon:7776 [Gnathostomata]@clade clade Teleostomi
15 taxon:117571 [Euteleostomi]@clade taxon:117570 [Teleostomi]@clade clade Euteleostomi
16 taxon:8287 [Sarcopterygii]@superclass taxon:117571 [Euteleostomi]@clade superclass Sarcopterygii
17 taxon:1338369 [Dipnotetrapodomorpha]@clade taxon:8287 [Sarcopterygii]@superclass clade Dipnotetrapodomorpha
18 taxon:32523 [Tetrapoda]@clade taxon:1338369 [Dipnotetrapodomorpha]@clade clade Tetrapoda
19 taxon:32524 [Amniota]@clade taxon:32523 [Tetrapoda]@clade clade Amniota
20 taxon:40674 [Mammalia]@class taxon:32524 [Amniota]@clade class Mammalia
21 taxon:32525 [Theria]@clade taxon:40674 [Mammalia]@class clade Theria
22 taxon:9347 [Eutheria]@clade taxon:32525 [Theria]@clade clade Eutheria
23 taxon:1437010 [Boreoeutheria]@clade taxon:9347 [Eutheria]@clade clade Boreoeutheria
24 taxon:314146 [Euarchontoglires]@superorder taxon:1437010 [Boreoeutheria]@clade superorder Euarchontoglires
25 taxon:314145 [Laurasiatheria]@superorder taxon:1437010 [Boreoeutheria]@clade superorder Laurasiatheria
26 taxon:33554 [Carnivora]@order taxon:314145 [Laurasiatheria]@superorder order Carnivora
27 taxon:91561 [Artiodactyla]@order taxon:314145 [Laurasiatheria]@superorder order Artiodactyla
28 taxon:314147 [Glires]@clade taxon:314146 [Euarchontoglires]@superorder clade Glires
29 taxon:9845 [Ruminantia]@suborder taxon:91561 [Artiodactyla]@order suborder Ruminantia
30 taxon:35500 [Pecora]@infraorder taxon:9845 [Ruminantia]@suborder infraorder Pecora
31 taxon:9989 [Rodentia]@order taxon:314147 [Glires]@clade order Rodentia
32 taxon:379584 [Caniformia]@suborder taxon:33554 [Carnivora]@order suborder Caniformia
33 taxon:9608 [Canidae]@family taxon:379584 [Caniformia]@suborder family Canidae
34 taxon:9850 [Cervidae]@family taxon:35500 [Pecora]@infraorder family Cervidae
35 taxon:9881 [Odocoileinae]@subfamily taxon:9850 [Cervidae]@family subfamily Odocoileinae
36 taxon:33553 [Sciuromorpha]@suborder taxon:9989 [Rodentia]@order suborder Sciuromorpha
37 taxon:55153 [Sciuridae]@family taxon:33553 [Sciuromorpha]@suborder family Sciuridae
38 taxon:34878 [Cervinae]@subfamily taxon:9850 [Cervidae]@family subfamily Cervinae
39 taxon:9611 [Canis]@genus taxon:9608 [Canidae]@family genus Canis
40 taxon:9857 [Capreolus]@genus taxon:9881 [Odocoileinae]@subfamily genus Capreolus
41 taxon:9612 [Canis lupus]@species taxon:9611 [Canis]@genus species Canis lupus
42 taxon:337726 [Xerinae]@subfamily taxon:55153 [Sciuridae]@family subfamily Xerinae
43 taxon:9859 [Cervus]@genus taxon:34878 [Cervinae]@subfamily genus Cervus
44 taxon:337730 [Marmotini]@tribe taxon:337726 [Xerinae]@subfamily tribe Marmotini
45 taxon:9992 [Marmota]@genus taxon:337730 [Marmotini]@tribe genus Marmota
46 taxon:9860 [Cervus elaphus]@species taxon:9859 [Cervus]@genus species Cervus elaphus
47 taxon:9615 [Canis lupus familiaris]@subspecies taxon:9612 [Canis lupus]@species subspecies Canis lupus familiaris
48 taxon:9858 [Capreolus capreolus]@species taxon:9857 [Capreolus]@genus species Capreolus capreolus
+139 -15
View File
@@ -44,7 +44,7 @@ cleanup() {
rm -rf "$TMPDIR" # Suppress the temporary directory
if [ $failed -gt 0 ]; then
log "$TEST_NAME tests failed"
log "$TEST_NAME tests failed"
log
log
exit 1
@@ -60,10 +60,10 @@ log() {
echo -e "[$TEST_NAME @ $(date)] $*" 1>&2
}
log "Testing $TEST_NAME..."
log "Test directory is $TEST_DIR"
log "obitools directory is $OBITOOLS_DIR"
log "Temporary directory is $TMPDIR"
log "Testing $TEST_NAME..."
log "Test directory is $TEST_DIR"
log "obitools directory is $OBITOOLS_DIR"
log "Temporary directory is $TMPDIR"
log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)"
######################################################################
@@ -94,12 +94,12 @@ log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)"
((ntest++))
if $CMD -h > "${TMPDIR}/help.txt" 2>&1
if $CMD -h > "${TMPDIR}/help.txt" 2>&1
then
log "$MCMD: printing help OK"
log "$MCMD: printing help OK"
((success++))
else
log "$MCMD: printing help failed"
log "$MCMD: printing help failed"
((failed++))
fi
@@ -108,15 +108,15 @@ fi
if obiconvert -Z "${TEST_DIR}/gbpln1088.4Mb.fasta.gz" \
> "${TMPDIR}/xxx.fasta.gz" && \
zdiff "${TEST_DIR}/gbpln1088.4Mb.fasta.gz" \
"${TMPDIR}/xxx.fasta.gz"
"${TMPDIR}/xxx.fasta.gz"
then
log "$MCMD: converting large fasta file to fasta OK"
log "$MCMD: converting large fasta file to fasta OK"
((success++))
else
log "$MCMD: converting large fasta file to fasta failed"
log "$MCMD: converting large fasta file to fasta failed"
((failed++))
fi
((ntest++))
if obiconvert -Z --fastq-output \
"${TEST_DIR}/gbpln1088.4Mb.fasta.gz" \
@@ -125,15 +125,139 @@ if obiconvert -Z --fastq-output \
"${TMPDIR}/xxx.fastq.gz" \
> "${TMPDIR}/yyy.fasta.gz" && \
zdiff "${TEST_DIR}/gbpln1088.4Mb.fasta.gz" \
"${TMPDIR}/yyy.fasta.gz"
"${TMPDIR}/yyy.fasta.gz"
then
log "$MCMD: converting large file between fasta and fastq OK"
log "$MCMD: converting large file between fasta and fastq OK"
((success++))
else
log "$MCMD: converting large file between fasta and fastq failed"
log "$MCMD: converting large file between fasta and fastq failed"
((failed++))
fi
# ------------------------------------------------------------------
# --raw-taxid tests (no taxonomy loaded)
# ------------------------------------------------------------------
# Running test
((ntest++))
if obiconvert --raw-taxid "${TEST_DIR}/out_ecotag.fasta" \
> "${TMPDIR}/raw_taxid.fasta" 2>/dev/null
then
log "$MCMD --raw-taxid: running OK"
((success++))
else
log "$MCMD --raw-taxid: running failed"
((failed++))
fi
# Taxids must be bare numbers — no full-format "taxon:ID [Name]@rank" strings
((ntest++))
if grep '"taxid"' "${TMPDIR}/raw_taxid.fasta" | grep -qv '"taxid":"[0-9][0-9]*"'
then
log "$MCMD --raw-taxid: taxid format check failed (full-format taxid found)"
((failed++))
else
log "$MCMD --raw-taxid: taxid format OK (all taxids are bare numbers)"
((success++))
fi
# --raw-taxid is idempotent: piping through a second obiconvert --raw-taxid must
# produce bit-for-bit identical output.
((ntest++))
if obiconvert --raw-taxid "${TMPDIR}/raw_taxid.fasta" \
> "${TMPDIR}/raw_taxid2.fasta" 2>/dev/null
then
log "$MCMD --raw-taxid piped: running OK"
((success++))
else
log "$MCMD --raw-taxid piped: running failed"
((failed++))
fi
((ntest++))
if diff "${TMPDIR}/raw_taxid.fasta" \
"${TMPDIR}/raw_taxid2.fasta" > /dev/null
then
log "$MCMD --raw-taxid piped: idempotency OK"
((success++))
else
log "$MCMD --raw-taxid piped: idempotency failed (outputs differ)"
((failed++))
fi
# ------------------------------------------------------------------
# --taxonomy tests (full-format taxid, no --raw-taxid)
# ------------------------------------------------------------------
# Running test
((ntest++))
if obiconvert --taxonomy "${TEST_DIR}/taxonomy.csv" \
"${TEST_DIR}/out_ecotag.fasta" \
> "${TMPDIR}/taxo.fasta" 2>/dev/null
then
log "$MCMD --taxonomy: running OK"
((success++))
else
log "$MCMD --taxonomy: running failed"
((failed++))
fi
# Taxids must be in full "taxon:ID [Name]@rank" format
((ntest++))
if grep '"taxid"' "${TMPDIR}/taxo.fasta" | grep -q '"taxid":"taxon:[0-9]'
then
log "$MCMD --taxonomy: taxid format OK (full-format taxids present)"
((success++))
else
log "$MCMD --taxonomy: taxid format check failed (no full-format taxid found)"
((failed++))
fi
# ------------------------------------------------------------------
# --raw-taxid --taxonomy tests
# ------------------------------------------------------------------
# Running test
((ntest++))
if obiconvert --raw-taxid --taxonomy "${TEST_DIR}/taxonomy.csv" \
"${TEST_DIR}/out_ecotag.fasta" \
> "${TMPDIR}/raw_taxid_taxo.fasta" 2>/dev/null
then
log "$MCMD --raw-taxid --taxonomy: running OK"
((success++))
else
log "$MCMD --raw-taxid --taxonomy: running failed"
((failed++))
fi
# Taxids must be bare numbers even when taxonomy is loaded
((ntest++))
if grep '"taxid"' "${TMPDIR}/raw_taxid_taxo.fasta" | grep -qv '"taxid":"[0-9][0-9]*"'
then
log "$MCMD --raw-taxid --taxonomy: taxid format check failed (full-format taxid found)"
((failed++))
else
log "$MCMD --raw-taxid --taxonomy: taxid format OK (all taxids are bare numbers)"
((success++))
fi
# --raw-taxid with or without taxonomy must yield identical taxid values
((ntest++))
if diff <(grep '"taxid"' "${TMPDIR}/raw_taxid.fasta" | grep -o '"taxid":"[^"]*"' | sort) \
<(grep '"taxid"' "${TMPDIR}/raw_taxid_taxo.fasta" | grep -o '"taxid":"[^"]*"' | sort) \
> /dev/null
then
log "$MCMD --raw-taxid vs --raw-taxid --taxonomy: taxid values match OK"
((success++))
else
log "$MCMD --raw-taxid vs --raw-taxid --taxonomy: taxid values differ (unexpected)"
((failed++))
fi
#########################################
#
# At the end of the tests
+24
View File
@@ -0,0 +1,24 @@
>HELIUM_000100422_612GNAAXX:7:118:3572:14633#0/1_sub[28..126] {"count":10172,"merged_sample":{"26a_F040644":10172},"obitag_bestid":0.9797979797979798,"obitag_bestmatch":"AY227529","obitag_match_count":1,"obitag_rank":"genus","obitag_similarity_method":"lcs","taxid":"taxon:9992 [Marmota]@genus"}
ttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaaca
gcctgaaactcaaaggacttggcggtgctttacatccct
>HELIUM_000100422_612GNAAXX:7:99:9351:13090#0/1_sub[28..127] {"count":260,"merged_sample":{"29a_F260619":260},"obitag_bestid":0.9405940594059405,"obitag_bestmatch":"AF154263","obitag_match_count":9,"obitag_rank":"infraorder","obitag_similarity_method":"lcs","taxid":"taxon:35500 [Pecora]@infraorder"}
ttagccctaaacacaaataattacacaaacaaaattgttcaccagagtactagcggcaac
agcttaaaactcaaaggacttggcggtgctttataccctt
>HELIUM_000100422_612GNAAXX:7:108:10111:9078#0/1_sub[28..127] {"count":7146,"merged_sample":{"13a_F730603":7146},"obitag_bestid":1,"obitag_bestmatch":"AB245427","obitag_match_count":1,"obitag_rank":"species","obitag_similarity_method":"lcs","taxid":"taxon:9860 [Cervus elaphus]@species"}
ctagccttaaacacaaatagttatgcaaacaaaactattcgccagagtactaccggcaat
agcttaaaactcaaaggacttggcggtgctttataccctt
>HELIUM_000100422_612GNAAXX:7:38:14204:12725#0/1_sub[28..126] {"count":87,"merged_sample":{"26a_F040644":87},"obitag_bestid":0.9494949494949495,"obitag_bestmatch":"AY227530","obitag_match_count":2,"obitag_rank":"tribe","obitag_similarity_method":"lcs","taxid":"taxon:337730 [Marmotini]@tribe"}
ttagccctaaacataaacattcaataaacaagaatgttcgccagaggactactagcaata
gcttaaaactcaaaggacttggcggtgctttatatccct
>HELIUM_000100422_612GNAAXX:7:30:9942:4495#0/1_sub[28..126] {"count":95,"merged_sample":{"26a_F040644":11,"29a_F260619":84},"obitag_bestid":0.9595959595959596,"obitag_bestmatch":"AC187326","obitag_match_count":1,"obitag_rank":"subspecies","obitag_similarity_method":"lcs","taxid":"taxon:9615 [Canis lupus familiaris]@subspecies"}
ttagccctaaacataagctattccataacaaaataattcgccagagaactactagcaaca
gattaaacctcaaaggacttggcagtgctttatacccct
>HELIUM_000100422_612GNAAXX:7:51:16702:19393#0/1_sub[28..127] {"count":12004,"merged_sample":{"15a_F730814":7465,"29a_F260619":4539},"obitag_bestid":1,"obitag_bestmatch":"AJ885202","obitag_match_count":1,"obitag_rank":"species","obitag_similarity_method":"lcs","taxid":"taxon:9858 [Capreolus capreolus]@species"}
ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaat
agcttaaaactcaaaggacttggcggtgctttataccctt
>HELIUM_000100422_612GNAAXX:7:84:14502:1617#0/1_sub[28..127] {"count":319,"merged_sample":{"29a_F260619":319},"obitag_bestid":1,"obitag_bestmatch":"AJ972683","obitag_match_count":1,"obitag_rank":"species","obitag_similarity_method":"lcs","taxid":"taxon:9858 [Capreolus capreolus]@species"}
ttagccctaaacacaagtaattattataacaaaattattcgccagagtactaccggcaat
agcttaaaactcaaaggacttggcggtgctttataccctt
>HELIUM_000100422_612GNAAXX:7:50:10637:6527#0/1_sub[28..126] {"count":366,"merged_sample":{"13a_F730603":13,"15a_F730814":5,"26a_F040644":347,"29a_F260619":1},"obitag_bestid":1,"obitag_bestmatch":"AB048590","obitag_match_count":1,"obitag_rank":"genus","obitag_similarity_method":"lcs","taxid":"taxon:9611 [Canis]@genus"}
ttagccctaaacatagataattttacaacaaaataattcgccagaggactactagcaata
gcttaaaactcaaaggacttggcggtgctttatatccct
+5 -5
View File
@@ -631,9 +631,9 @@ func ReadCSVNGSFilter(reader io.Reader) (*obingslibrary.NGSLibrary, error) {
return nil, fmt.Errorf("row %d has %d columns, expected %d", len(data), len(fields), len(header))
}
forward_primer := fields[forward_primerColIndex]
reverse_primer := fields[reverse_primerColIndex]
tags := _parseMainNGSFilterTags(fields[sample_tagColIndex])
forward_primer := strings.TrimSpace(fields[forward_primerColIndex])
reverse_primer := strings.TrimSpace(fields[reverse_primerColIndex])
tags := _parseMainNGSFilterTags(strings.TrimSpace(fields[sample_tagColIndex]))
marker, _ := ngsfilter.GetMarker(forward_primer, reverse_primer)
pcr, ok := marker.GetPCR(tags.Forward, tags.Reverse)
@@ -644,8 +644,8 @@ func ReadCSVNGSFilter(reader io.Reader) (*obingslibrary.NGSLibrary, error) {
i, tags.Forward, tags.Reverse, forward_primer, reverse_primer)
}
pcr.Experiment = fields[experimentColIndex]
pcr.Sample = fields[sampleColIndex]
pcr.Experiment = strings.TrimSpace(fields[experimentColIndex])
pcr.Sample = strings.TrimSpace(fields[sampleColIndex])
if extraColumns != nil {
pcr.Annotations = make(obiseq.Annotation)
+1 -1
View File
@@ -3,7 +3,7 @@ package obioptions
// Version is automatically updated by the Makefile from version.txt
// The patch number (third digit) is incremented on each push to the repository
var _Version = "Release 4.4.41"
var _Version = "Release 4.4.42"
// Version returns the version of the obitools package.
//
+7 -1
View File
@@ -70,6 +70,12 @@ func (s *BioSequence) SetTaxid(taxid string, rank ...string) {
}
}
} else if obidefault.UseRawTaxids() {
// Without a loaded taxonomy, extract the bare ID from full-format strings
// like "code:12345 [Name]@rank" so that --raw-taxid is honoured everywhere.
if _, rawID, _, _, parseErr := obitax.ParseTaxonString(taxid); parseErr == nil {
taxid = rawID
}
}
}
@@ -177,7 +183,7 @@ func (sequence *BioSequence) SetPath(taxonomy *obitax.Taxonomy) []string {
lpath := path.Len() - 1
for i := lpath; i >= 0; i-- {
spath[lpath-i] = path.Get(i).String(taxonomy.Code())
spath[lpath-i] = path.Get(i).FullString(taxonomy.Code())
}
sequence.SetAttribute("taxonomic_path", spath)
+19 -13
View File
@@ -29,6 +29,24 @@ type TaxNode struct {
alternatenames *map[*string]*string
}
// FullString returns the full string representation of the TaxNode in the form
// "taxonomyCode:id [scientificName]@rank", regardless of the UseRawTaxids setting.
// This is used internally when a parseable format is required (e.g. taxonomic_path).
func (node *TaxNode) FullString(taxonomyCode string) string {
if node.HasScientificName() {
return fmt.Sprintf("%s:%v [%s]@%s",
taxonomyCode,
*node.id,
node.ScientificName(),
node.Rank(),
)
}
return fmt.Sprintf("%s:%v",
taxonomyCode,
*node.id)
}
// String returns a string representation of the TaxNode, including the taxonomy code,
// the node ID, and the scientific name. The output format is "taxonomyCode:id [scientificName]".
//
@@ -42,19 +60,7 @@ func (node *TaxNode) String(taxonomyCode string) string {
return *node.id
}
if node.HasScientificName() {
return fmt.Sprintf("%s:%v [%s]@%s",
taxonomyCode,
*node.id,
node.ScientificName(),
node.Rank(),
)
}
return fmt.Sprintf("%s:%v",
taxonomyCode,
*node.id)
return node.FullString(taxonomyCode)
}
// Id returns the unique identifier of the TaxNode.
+4 -2
View File
@@ -21,12 +21,10 @@ func PairingOptionSet(options *getoptions.GetOpt) {
options.StringVar(&_ForwardFile, "forward-reads", "",
options.Alias("F"),
options.ArgName("FILENAME_F"),
options.Required("You must provide at a forward file"),
options.Description("The file names containing the forward reads"))
options.StringVar(&_ReverseFile, "reverse-reads", "",
options.Alias("R"),
options.ArgName("FILENAME_R"),
options.Required("You must provide a reverse file"),
options.Description("The file names containing the reverse reads"))
options.IntVar(&_Delta, "delta", _Delta,
options.Alias("D"),
@@ -72,6 +70,10 @@ func CLIPairedSequence() (obiiter.IBioSequence, error) {
return paired, nil
}
func CLIHasPairedFiles() bool {
return _ForwardFile != "" && _ReverseFile != ""
}
func CLIDelta() int {
return _Delta
}
+4 -4
View File
@@ -114,10 +114,10 @@ func IPCRTagPESequencesBatch(iterator obiiter.IBioSequence,
aanot["obimultiplex_direction"] = direction
aanot["obimultiplex_forward_match"] = forward_match
aanot["obimultiplex_forward_mismatches"] = forward_mismatches
aanot["obimultiplex_forward_error"] = forward_mismatches
aanot["obimultiplex_reverse_match"] = reverse_match
aanot["obimultiplex_reverse_mismatches"] = reverse_mismatches
aanot["obimultiplex_reverse_error"] = reverse_mismatches
aanot["sample"] = sample
aanot["experiment"] = experiment
@@ -125,10 +125,10 @@ func IPCRTagPESequencesBatch(iterator obiiter.IBioSequence,
banot["obimultiplex_direction"] = direction
banot["obimultiplex_forward_match"] = forward_match
banot["obimultiplex_forward_mismatches"] = forward_mismatches
banot["obimultiplex_forward_error"] = forward_mismatches
banot["obimultiplex_reverse_match"] = reverse_match
banot["obimultiplex_reverse_mismatches"] = reverse_mismatches
banot["obimultiplex_reverse_error"] = reverse_mismatches
banot["sample"] = sample
banot["experiment"] = experiment
+1 -1
View File
@@ -1 +1 @@
4.4.41
4.4.42