mirror of
https://github.com/metabarcoding/obitools4.git
synced 2026-05-01 04:20:40 +00:00
feat(obiconvert): add --raw-taxid option and refactor taxID formatting
- Add new `--tax-id` mode (`obiconvert --raw-taxid`) to output bare numeric taxIDs instead of full-format strings. - Introduce `TaxNode.FullString()` to always return the complete "code:id [name]@rank" format, regardless of global `UseRawTaxids()` setting. - Update `.String(taxonomyCode)` to respect the global flag, returning bare ID when `--raw-taxid` is active. - Extract raw taxID from full-format strings in taxonomy methods when needed (e.g., fallback without loaded DB). - Add comprehensive test suite covering: a) `--raw-taxid` execution and idempotency b) full-format taxID output with `--taxonomy` c interaction of both flags d format validation - Add test data: new reference files `out_ecotag.fasta`, taxonomy.csv, and updated shell script.
This commit is contained in:
@@ -0,0 +1,24 @@
|
||||
>HELIUM_000100422_612GNAAXX:7:118:3572:14633#0/1_sub[28..126] {"count":10172,"merged_sample":{"26a_F040644":10172},"obitag_bestid":0.9797979797979798,"obitag_bestmatch":"AY227529","obitag_match_count":1,"obitag_rank":"genus","obitag_similarity_method":"lcs","taxid":"taxon:9992 [Marmota]@genus"}
|
||||
ttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaaca
|
||||
gcctgaaactcaaaggacttggcggtgctttacatccct
|
||||
>HELIUM_000100422_612GNAAXX:7:99:9351:13090#0/1_sub[28..127] {"count":260,"merged_sample":{"29a_F260619":260},"obitag_bestid":0.9405940594059405,"obitag_bestmatch":"AF154263","obitag_match_count":9,"obitag_rank":"infraorder","obitag_similarity_method":"lcs","taxid":"taxon:35500 [Pecora]@infraorder"}
|
||||
ttagccctaaacacaaataattacacaaacaaaattgttcaccagagtactagcggcaac
|
||||
agcttaaaactcaaaggacttggcggtgctttataccctt
|
||||
>HELIUM_000100422_612GNAAXX:7:108:10111:9078#0/1_sub[28..127] {"count":7146,"merged_sample":{"13a_F730603":7146},"obitag_bestid":1,"obitag_bestmatch":"AB245427","obitag_match_count":1,"obitag_rank":"species","obitag_similarity_method":"lcs","taxid":"taxon:9860 [Cervus elaphus]@species"}
|
||||
ctagccttaaacacaaatagttatgcaaacaaaactattcgccagagtactaccggcaat
|
||||
agcttaaaactcaaaggacttggcggtgctttataccctt
|
||||
>HELIUM_000100422_612GNAAXX:7:38:14204:12725#0/1_sub[28..126] {"count":87,"merged_sample":{"26a_F040644":87},"obitag_bestid":0.9494949494949495,"obitag_bestmatch":"AY227530","obitag_match_count":2,"obitag_rank":"tribe","obitag_similarity_method":"lcs","taxid":"taxon:337730 [Marmotini]@tribe"}
|
||||
ttagccctaaacataaacattcaataaacaagaatgttcgccagaggactactagcaata
|
||||
gcttaaaactcaaaggacttggcggtgctttatatccct
|
||||
>HELIUM_000100422_612GNAAXX:7:30:9942:4495#0/1_sub[28..126] {"count":95,"merged_sample":{"26a_F040644":11,"29a_F260619":84},"obitag_bestid":0.9595959595959596,"obitag_bestmatch":"AC187326","obitag_match_count":1,"obitag_rank":"subspecies","obitag_similarity_method":"lcs","taxid":"taxon:9615 [Canis lupus familiaris]@subspecies"}
|
||||
ttagccctaaacataagctattccataacaaaataattcgccagagaactactagcaaca
|
||||
gattaaacctcaaaggacttggcagtgctttatacccct
|
||||
>HELIUM_000100422_612GNAAXX:7:51:16702:19393#0/1_sub[28..127] {"count":12004,"merged_sample":{"15a_F730814":7465,"29a_F260619":4539},"obitag_bestid":1,"obitag_bestmatch":"AJ885202","obitag_match_count":1,"obitag_rank":"species","obitag_similarity_method":"lcs","taxid":"taxon:9858 [Capreolus capreolus]@species"}
|
||||
ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaat
|
||||
agcttaaaactcaaaggacttggcggtgctttataccctt
|
||||
>HELIUM_000100422_612GNAAXX:7:84:14502:1617#0/1_sub[28..127] {"count":319,"merged_sample":{"29a_F260619":319},"obitag_bestid":1,"obitag_bestmatch":"AJ972683","obitag_match_count":1,"obitag_rank":"species","obitag_similarity_method":"lcs","taxid":"taxon:9858 [Capreolus capreolus]@species"}
|
||||
ttagccctaaacacaagtaattattataacaaaattattcgccagagtactaccggcaat
|
||||
agcttaaaactcaaaggacttggcggtgctttataccctt
|
||||
>HELIUM_000100422_612GNAAXX:7:50:10637:6527#0/1_sub[28..126] {"count":366,"merged_sample":{"13a_F730603":13,"15a_F730814":5,"26a_F040644":347,"29a_F260619":1},"obitag_bestid":1,"obitag_bestmatch":"AB048590","obitag_match_count":1,"obitag_rank":"genus","obitag_similarity_method":"lcs","taxid":"taxon:9611 [Canis]@genus"}
|
||||
ttagccctaaacatagataattttacaacaaaataattcgccagaggactactagcaata
|
||||
gcttaaaactcaaaggacttggcggtgctttatatccct
|
||||
@@ -0,0 +1,48 @@
|
||||
taxid,parent,taxonomic_rank,scientific_name
|
||||
taxon:1 [root]@no rank,taxon:1 [root]@no rank,no rank,root
|
||||
taxon:131567 [cellular organisms]@cellular root,taxon:1 [root]@no rank,cellular root,cellular organisms
|
||||
taxon:2759 [Eukaryota]@domain,taxon:131567 [cellular organisms]@cellular root,domain,Eukaryota
|
||||
taxon:33154 [Opisthokonta]@clade,taxon:2759 [Eukaryota]@domain,clade,Opisthokonta
|
||||
taxon:33208 [Metazoa]@kingdom,taxon:33154 [Opisthokonta]@clade,kingdom,Metazoa
|
||||
taxon:6072 [Eumetazoa]@clade,taxon:33208 [Metazoa]@kingdom,clade,Eumetazoa
|
||||
taxon:33213 [Bilateria]@clade,taxon:6072 [Eumetazoa]@clade,clade,Bilateria
|
||||
taxon:33511 [Deuterostomia]@clade,taxon:33213 [Bilateria]@clade,clade,Deuterostomia
|
||||
taxon:7711 [Chordata]@phylum,taxon:33511 [Deuterostomia]@clade,phylum,Chordata
|
||||
taxon:89593 [Craniata]@subphylum,taxon:7711 [Chordata]@phylum,subphylum,Craniata
|
||||
taxon:7742 [Vertebrata]@clade,taxon:89593 [Craniata]@subphylum,clade,Vertebrata
|
||||
taxon:7776 [Gnathostomata]@clade,taxon:7742 [Vertebrata]@clade,clade,Gnathostomata
|
||||
taxon:117570 [Teleostomi]@clade,taxon:7776 [Gnathostomata]@clade,clade,Teleostomi
|
||||
taxon:117571 [Euteleostomi]@clade,taxon:117570 [Teleostomi]@clade,clade,Euteleostomi
|
||||
taxon:8287 [Sarcopterygii]@superclass,taxon:117571 [Euteleostomi]@clade,superclass,Sarcopterygii
|
||||
taxon:1338369 [Dipnotetrapodomorpha]@clade,taxon:8287 [Sarcopterygii]@superclass,clade,Dipnotetrapodomorpha
|
||||
taxon:32523 [Tetrapoda]@clade,taxon:1338369 [Dipnotetrapodomorpha]@clade,clade,Tetrapoda
|
||||
taxon:32524 [Amniota]@clade,taxon:32523 [Tetrapoda]@clade,clade,Amniota
|
||||
taxon:40674 [Mammalia]@class,taxon:32524 [Amniota]@clade,class,Mammalia
|
||||
taxon:32525 [Theria]@clade,taxon:40674 [Mammalia]@class,clade,Theria
|
||||
taxon:9347 [Eutheria]@clade,taxon:32525 [Theria]@clade,clade,Eutheria
|
||||
taxon:1437010 [Boreoeutheria]@clade,taxon:9347 [Eutheria]@clade,clade,Boreoeutheria
|
||||
taxon:314146 [Euarchontoglires]@superorder,taxon:1437010 [Boreoeutheria]@clade,superorder,Euarchontoglires
|
||||
taxon:314145 [Laurasiatheria]@superorder,taxon:1437010 [Boreoeutheria]@clade,superorder,Laurasiatheria
|
||||
taxon:33554 [Carnivora]@order,taxon:314145 [Laurasiatheria]@superorder,order,Carnivora
|
||||
taxon:91561 [Artiodactyla]@order,taxon:314145 [Laurasiatheria]@superorder,order,Artiodactyla
|
||||
taxon:314147 [Glires]@clade,taxon:314146 [Euarchontoglires]@superorder,clade,Glires
|
||||
taxon:9845 [Ruminantia]@suborder,taxon:91561 [Artiodactyla]@order,suborder,Ruminantia
|
||||
taxon:35500 [Pecora]@infraorder,taxon:9845 [Ruminantia]@suborder,infraorder,Pecora
|
||||
taxon:9989 [Rodentia]@order,taxon:314147 [Glires]@clade,order,Rodentia
|
||||
taxon:379584 [Caniformia]@suborder,taxon:33554 [Carnivora]@order,suborder,Caniformia
|
||||
taxon:9608 [Canidae]@family,taxon:379584 [Caniformia]@suborder,family,Canidae
|
||||
taxon:9850 [Cervidae]@family,taxon:35500 [Pecora]@infraorder,family,Cervidae
|
||||
taxon:9881 [Odocoileinae]@subfamily,taxon:9850 [Cervidae]@family,subfamily,Odocoileinae
|
||||
taxon:33553 [Sciuromorpha]@suborder,taxon:9989 [Rodentia]@order,suborder,Sciuromorpha
|
||||
taxon:55153 [Sciuridae]@family,taxon:33553 [Sciuromorpha]@suborder,family,Sciuridae
|
||||
taxon:34878 [Cervinae]@subfamily,taxon:9850 [Cervidae]@family,subfamily,Cervinae
|
||||
taxon:9611 [Canis]@genus,taxon:9608 [Canidae]@family,genus,Canis
|
||||
taxon:9857 [Capreolus]@genus,taxon:9881 [Odocoileinae]@subfamily,genus,Capreolus
|
||||
taxon:9612 [Canis lupus]@species,taxon:9611 [Canis]@genus,species,Canis lupus
|
||||
taxon:337726 [Xerinae]@subfamily,taxon:55153 [Sciuridae]@family,subfamily,Xerinae
|
||||
taxon:9859 [Cervus]@genus,taxon:34878 [Cervinae]@subfamily,genus,Cervus
|
||||
taxon:337730 [Marmotini]@tribe,taxon:337726 [Xerinae]@subfamily,tribe,Marmotini
|
||||
taxon:9992 [Marmota]@genus,taxon:337730 [Marmotini]@tribe,genus,Marmota
|
||||
taxon:9860 [Cervus elaphus]@species,taxon:9859 [Cervus]@genus,species,Cervus elaphus
|
||||
taxon:9615 [Canis lupus familiaris]@subspecies,taxon:9612 [Canis lupus]@species,subspecies,Canis lupus familiaris
|
||||
taxon:9858 [Capreolus capreolus]@species,taxon:9857 [Capreolus]@genus,species,Capreolus capreolus
|
||||
|
@@ -44,7 +44,7 @@ cleanup() {
|
||||
rm -rf "$TMPDIR" # Suppress the temporary directory
|
||||
|
||||
if [ $failed -gt 0 ]; then
|
||||
log "$TEST_NAME tests failed"
|
||||
log "$TEST_NAME tests failed"
|
||||
log
|
||||
log
|
||||
exit 1
|
||||
@@ -60,10 +60,10 @@ log() {
|
||||
echo -e "[$TEST_NAME @ $(date)] $*" 1>&2
|
||||
}
|
||||
|
||||
log "Testing $TEST_NAME..."
|
||||
log "Test directory is $TEST_DIR"
|
||||
log "obitools directory is $OBITOOLS_DIR"
|
||||
log "Temporary directory is $TMPDIR"
|
||||
log "Testing $TEST_NAME..."
|
||||
log "Test directory is $TEST_DIR"
|
||||
log "obitools directory is $OBITOOLS_DIR"
|
||||
log "Temporary directory is $TMPDIR"
|
||||
log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)"
|
||||
|
||||
######################################################################
|
||||
@@ -94,12 +94,12 @@ log "files: $(find $TEST_DIR | awk -F'/' '{print $NF}' | tail -n +2)"
|
||||
|
||||
|
||||
((ntest++))
|
||||
if $CMD -h > "${TMPDIR}/help.txt" 2>&1
|
||||
if $CMD -h > "${TMPDIR}/help.txt" 2>&1
|
||||
then
|
||||
log "$MCMD: printing help OK"
|
||||
log "$MCMD: printing help OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD: printing help failed"
|
||||
log "$MCMD: printing help failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
@@ -108,15 +108,15 @@ fi
|
||||
if obiconvert -Z "${TEST_DIR}/gbpln1088.4Mb.fasta.gz" \
|
||||
> "${TMPDIR}/xxx.fasta.gz" && \
|
||||
zdiff "${TEST_DIR}/gbpln1088.4Mb.fasta.gz" \
|
||||
"${TMPDIR}/xxx.fasta.gz"
|
||||
"${TMPDIR}/xxx.fasta.gz"
|
||||
then
|
||||
log "$MCMD: converting large fasta file to fasta OK"
|
||||
log "$MCMD: converting large fasta file to fasta OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD: converting large fasta file to fasta failed"
|
||||
log "$MCMD: converting large fasta file to fasta failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
|
||||
((ntest++))
|
||||
if obiconvert -Z --fastq-output \
|
||||
"${TEST_DIR}/gbpln1088.4Mb.fasta.gz" \
|
||||
@@ -125,15 +125,139 @@ if obiconvert -Z --fastq-output \
|
||||
"${TMPDIR}/xxx.fastq.gz" \
|
||||
> "${TMPDIR}/yyy.fasta.gz" && \
|
||||
zdiff "${TEST_DIR}/gbpln1088.4Mb.fasta.gz" \
|
||||
"${TMPDIR}/yyy.fasta.gz"
|
||||
"${TMPDIR}/yyy.fasta.gz"
|
||||
then
|
||||
log "$MCMD: converting large file between fasta and fastq OK"
|
||||
log "$MCMD: converting large file between fasta and fastq OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD: converting large file between fasta and fastq failed"
|
||||
log "$MCMD: converting large file between fasta and fastq failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# --raw-taxid tests (no taxonomy loaded)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
# Running test
|
||||
((ntest++))
|
||||
if obiconvert --raw-taxid "${TEST_DIR}/out_ecotag.fasta" \
|
||||
> "${TMPDIR}/raw_taxid.fasta" 2>/dev/null
|
||||
then
|
||||
log "$MCMD --raw-taxid: running OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD --raw-taxid: running failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
# Taxids must be bare numbers — no full-format "taxon:ID [Name]@rank" strings
|
||||
((ntest++))
|
||||
if grep '"taxid"' "${TMPDIR}/raw_taxid.fasta" | grep -qv '"taxid":"[0-9][0-9]*"'
|
||||
then
|
||||
log "$MCMD --raw-taxid: taxid format check failed (full-format taxid found)"
|
||||
((failed++))
|
||||
else
|
||||
log "$MCMD --raw-taxid: taxid format OK (all taxids are bare numbers)"
|
||||
((success++))
|
||||
fi
|
||||
|
||||
# --raw-taxid is idempotent: piping through a second obiconvert --raw-taxid must
|
||||
# produce bit-for-bit identical output.
|
||||
((ntest++))
|
||||
if obiconvert --raw-taxid "${TMPDIR}/raw_taxid.fasta" \
|
||||
> "${TMPDIR}/raw_taxid2.fasta" 2>/dev/null
|
||||
then
|
||||
log "$MCMD --raw-taxid piped: running OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD --raw-taxid piped: running failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
((ntest++))
|
||||
if diff "${TMPDIR}/raw_taxid.fasta" \
|
||||
"${TMPDIR}/raw_taxid2.fasta" > /dev/null
|
||||
then
|
||||
log "$MCMD --raw-taxid piped: idempotency OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD --raw-taxid piped: idempotency failed (outputs differ)"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# --taxonomy tests (full-format taxid, no --raw-taxid)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
# Running test
|
||||
((ntest++))
|
||||
if obiconvert --taxonomy "${TEST_DIR}/taxonomy.csv" \
|
||||
"${TEST_DIR}/out_ecotag.fasta" \
|
||||
> "${TMPDIR}/taxo.fasta" 2>/dev/null
|
||||
then
|
||||
log "$MCMD --taxonomy: running OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD --taxonomy: running failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
# Taxids must be in full "taxon:ID [Name]@rank" format
|
||||
((ntest++))
|
||||
if grep '"taxid"' "${TMPDIR}/taxo.fasta" | grep -q '"taxid":"taxon:[0-9]'
|
||||
then
|
||||
log "$MCMD --taxonomy: taxid format OK (full-format taxids present)"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD --taxonomy: taxid format check failed (no full-format taxid found)"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# --raw-taxid --taxonomy tests
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
# Running test
|
||||
((ntest++))
|
||||
if obiconvert --raw-taxid --taxonomy "${TEST_DIR}/taxonomy.csv" \
|
||||
"${TEST_DIR}/out_ecotag.fasta" \
|
||||
> "${TMPDIR}/raw_taxid_taxo.fasta" 2>/dev/null
|
||||
then
|
||||
log "$MCMD --raw-taxid --taxonomy: running OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD --raw-taxid --taxonomy: running failed"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
# Taxids must be bare numbers even when taxonomy is loaded
|
||||
((ntest++))
|
||||
if grep '"taxid"' "${TMPDIR}/raw_taxid_taxo.fasta" | grep -qv '"taxid":"[0-9][0-9]*"'
|
||||
then
|
||||
log "$MCMD --raw-taxid --taxonomy: taxid format check failed (full-format taxid found)"
|
||||
((failed++))
|
||||
else
|
||||
log "$MCMD --raw-taxid --taxonomy: taxid format OK (all taxids are bare numbers)"
|
||||
((success++))
|
||||
fi
|
||||
|
||||
# --raw-taxid with or without taxonomy must yield identical taxid values
|
||||
((ntest++))
|
||||
if diff <(grep '"taxid"' "${TMPDIR}/raw_taxid.fasta" | grep -o '"taxid":"[^"]*"' | sort) \
|
||||
<(grep '"taxid"' "${TMPDIR}/raw_taxid_taxo.fasta" | grep -o '"taxid":"[^"]*"' | sort) \
|
||||
> /dev/null
|
||||
then
|
||||
log "$MCMD --raw-taxid vs --raw-taxid --taxonomy: taxid values match OK"
|
||||
((success++))
|
||||
else
|
||||
log "$MCMD --raw-taxid vs --raw-taxid --taxonomy: taxid values differ (unexpected)"
|
||||
((failed++))
|
||||
fi
|
||||
|
||||
|
||||
#########################################
|
||||
#
|
||||
# At the end of the tests
|
||||
|
||||
@@ -0,0 +1,24 @@
|
||||
>HELIUM_000100422_612GNAAXX:7:118:3572:14633#0/1_sub[28..126] {"count":10172,"merged_sample":{"26a_F040644":10172},"obitag_bestid":0.9797979797979798,"obitag_bestmatch":"AY227529","obitag_match_count":1,"obitag_rank":"genus","obitag_similarity_method":"lcs","taxid":"taxon:9992 [Marmota]@genus"}
|
||||
ttagccctaaacataaacattcaataaacaagaatgttcgccagagtactactagcaaca
|
||||
gcctgaaactcaaaggacttggcggtgctttacatccct
|
||||
>HELIUM_000100422_612GNAAXX:7:99:9351:13090#0/1_sub[28..127] {"count":260,"merged_sample":{"29a_F260619":260},"obitag_bestid":0.9405940594059405,"obitag_bestmatch":"AF154263","obitag_match_count":9,"obitag_rank":"infraorder","obitag_similarity_method":"lcs","taxid":"taxon:35500 [Pecora]@infraorder"}
|
||||
ttagccctaaacacaaataattacacaaacaaaattgttcaccagagtactagcggcaac
|
||||
agcttaaaactcaaaggacttggcggtgctttataccctt
|
||||
>HELIUM_000100422_612GNAAXX:7:108:10111:9078#0/1_sub[28..127] {"count":7146,"merged_sample":{"13a_F730603":7146},"obitag_bestid":1,"obitag_bestmatch":"AB245427","obitag_match_count":1,"obitag_rank":"species","obitag_similarity_method":"lcs","taxid":"taxon:9860 [Cervus elaphus]@species"}
|
||||
ctagccttaaacacaaatagttatgcaaacaaaactattcgccagagtactaccggcaat
|
||||
agcttaaaactcaaaggacttggcggtgctttataccctt
|
||||
>HELIUM_000100422_612GNAAXX:7:38:14204:12725#0/1_sub[28..126] {"count":87,"merged_sample":{"26a_F040644":87},"obitag_bestid":0.9494949494949495,"obitag_bestmatch":"AY227530","obitag_match_count":2,"obitag_rank":"tribe","obitag_similarity_method":"lcs","taxid":"taxon:337730 [Marmotini]@tribe"}
|
||||
ttagccctaaacataaacattcaataaacaagaatgttcgccagaggactactagcaata
|
||||
gcttaaaactcaaaggacttggcggtgctttatatccct
|
||||
>HELIUM_000100422_612GNAAXX:7:30:9942:4495#0/1_sub[28..126] {"count":95,"merged_sample":{"26a_F040644":11,"29a_F260619":84},"obitag_bestid":0.9595959595959596,"obitag_bestmatch":"AC187326","obitag_match_count":1,"obitag_rank":"subspecies","obitag_similarity_method":"lcs","taxid":"taxon:9615 [Canis lupus familiaris]@subspecies"}
|
||||
ttagccctaaacataagctattccataacaaaataattcgccagagaactactagcaaca
|
||||
gattaaacctcaaaggacttggcagtgctttatacccct
|
||||
>HELIUM_000100422_612GNAAXX:7:51:16702:19393#0/1_sub[28..127] {"count":12004,"merged_sample":{"15a_F730814":7465,"29a_F260619":4539},"obitag_bestid":1,"obitag_bestmatch":"AJ885202","obitag_match_count":1,"obitag_rank":"species","obitag_similarity_method":"lcs","taxid":"taxon:9858 [Capreolus capreolus]@species"}
|
||||
ttagccctaaacacaagtaattaatataacaaaattattcgccagagtactaccggcaat
|
||||
agcttaaaactcaaaggacttggcggtgctttataccctt
|
||||
>HELIUM_000100422_612GNAAXX:7:84:14502:1617#0/1_sub[28..127] {"count":319,"merged_sample":{"29a_F260619":319},"obitag_bestid":1,"obitag_bestmatch":"AJ972683","obitag_match_count":1,"obitag_rank":"species","obitag_similarity_method":"lcs","taxid":"taxon:9858 [Capreolus capreolus]@species"}
|
||||
ttagccctaaacacaagtaattattataacaaaattattcgccagagtactaccggcaat
|
||||
agcttaaaactcaaaggacttggcggtgctttataccctt
|
||||
>HELIUM_000100422_612GNAAXX:7:50:10637:6527#0/1_sub[28..126] {"count":366,"merged_sample":{"13a_F730603":13,"15a_F730814":5,"26a_F040644":347,"29a_F260619":1},"obitag_bestid":1,"obitag_bestmatch":"AB048590","obitag_match_count":1,"obitag_rank":"genus","obitag_similarity_method":"lcs","taxid":"taxon:9611 [Canis]@genus"}
|
||||
ttagccctaaacatagataattttacaacaaaataattcgccagaggactactagcaata
|
||||
gcttaaaactcaaaggacttggcggtgctttatatccct
|
||||
@@ -70,6 +70,12 @@ func (s *BioSequence) SetTaxid(taxid string, rank ...string) {
|
||||
}
|
||||
}
|
||||
|
||||
} else if obidefault.UseRawTaxids() {
|
||||
// Without a loaded taxonomy, extract the bare ID from full-format strings
|
||||
// like "code:12345 [Name]@rank" so that --raw-taxid is honoured everywhere.
|
||||
if _, rawID, _, _, parseErr := obitax.ParseTaxonString(taxid); parseErr == nil {
|
||||
taxid = rawID
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -177,7 +183,7 @@ func (sequence *BioSequence) SetPath(taxonomy *obitax.Taxonomy) []string {
|
||||
lpath := path.Len() - 1
|
||||
|
||||
for i := lpath; i >= 0; i-- {
|
||||
spath[lpath-i] = path.Get(i).String(taxonomy.Code())
|
||||
spath[lpath-i] = path.Get(i).FullString(taxonomy.Code())
|
||||
}
|
||||
|
||||
sequence.SetAttribute("taxonomic_path", spath)
|
||||
|
||||
+19
-13
@@ -29,6 +29,24 @@ type TaxNode struct {
|
||||
alternatenames *map[*string]*string
|
||||
}
|
||||
|
||||
// FullString returns the full string representation of the TaxNode in the form
|
||||
// "taxonomyCode:id [scientificName]@rank", regardless of the UseRawTaxids setting.
|
||||
// This is used internally when a parseable format is required (e.g. taxonomic_path).
|
||||
func (node *TaxNode) FullString(taxonomyCode string) string {
|
||||
if node.HasScientificName() {
|
||||
return fmt.Sprintf("%s:%v [%s]@%s",
|
||||
taxonomyCode,
|
||||
*node.id,
|
||||
node.ScientificName(),
|
||||
node.Rank(),
|
||||
)
|
||||
}
|
||||
|
||||
return fmt.Sprintf("%s:%v",
|
||||
taxonomyCode,
|
||||
*node.id)
|
||||
}
|
||||
|
||||
// String returns a string representation of the TaxNode, including the taxonomy code,
|
||||
// the node ID, and the scientific name. The output format is "taxonomyCode:id [scientificName]".
|
||||
//
|
||||
@@ -42,19 +60,7 @@ func (node *TaxNode) String(taxonomyCode string) string {
|
||||
return *node.id
|
||||
}
|
||||
|
||||
if node.HasScientificName() {
|
||||
return fmt.Sprintf("%s:%v [%s]@%s",
|
||||
taxonomyCode,
|
||||
*node.id,
|
||||
node.ScientificName(),
|
||||
node.Rank(),
|
||||
)
|
||||
}
|
||||
|
||||
return fmt.Sprintf("%s:%v",
|
||||
taxonomyCode,
|
||||
*node.id)
|
||||
|
||||
return node.FullString(taxonomyCode)
|
||||
}
|
||||
|
||||
// Id returns the unique identifier of the TaxNode.
|
||||
|
||||
Reference in New Issue
Block a user