Add the --taxonomic-path option to obiannotate

Former-commit-id: 385309a1c4bc5ed33aeaafc63eedb9fc552f78a6
This commit is contained in:
2024-02-27 20:10:26 +01:00
parent 4127ddb26f
commit 4a0b20484f
5 changed files with 72 additions and 0 deletions

View File

@ -2,6 +2,7 @@ package obitax
import (
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
log "github.com/sirupsen/logrus"
)
// Setting the taxon at a given rank for a given sequence.
@ -46,3 +47,22 @@ func (taxonomy *Taxonomy) SetGenus(sequence *obiseq.BioSequence) *TaxNode {
func (taxonomy *Taxonomy) SetFamily(sequence *obiseq.BioSequence) *TaxNode {
return taxonomy.SetTaxonAtRank(sequence, "family")
}
func (taxonomy *Taxonomy) SetPath(sequence *obiseq.BioSequence) string {
taxid, err := taxonomy.Taxon(sequence.Taxid())
if err != nil {
log.Fatalf("Taxid %d not defined in the current taxonomy", sequence.Taxid())
}
path, err := taxid.Path()
if err != nil {
log.Fatalf("Taxonomy index error: %v", err)
}
tpath := path.String()
sequence.SetAttribute("taxonomic_path", tpath)
return tpath
}

View File

@ -51,3 +51,14 @@ func (taxonomy *Taxonomy) MakeSetFamilyWorker() obiseq.SeqWorker {
return w
}
func (taxonomy *Taxonomy) MakeSetPathWorker() obiseq.SeqWorker {
w := func(s *obiseq.BioSequence) *obiseq.BioSequence {
taxonomy.SetPath(s)
return s
}
return w
}

View File

@ -1,5 +1,10 @@
package obitax
import (
"bytes"
"fmt"
)
type TaxonSlice []*TaxNode
func (set *TaxonSlice) Get(i int) *TaxNode {
@ -9,3 +14,25 @@ func (set *TaxonSlice) Get(i int) *TaxNode {
func (set *TaxonSlice) Len() int {
return len(*set)
}
func (path *TaxonSlice) String() string {
var buffer bytes.Buffer
if len(*path) > 0 {
taxon := (*path)[len(*path)-1]
fmt.Fprintf(&buffer, "%d@%s@%s",
taxon.Taxid(),
taxon.ScientificName(),
taxon.Rank())
for i := len(*path) - 2; i >= 0; i-- {
taxon := (*path)[i]
fmt.Fprintf(&buffer, "|%d@%s@%s",
taxon.Taxid(),
taxon.ScientificName(),
taxon.Rank())
}
}
return buffer.String()
}

View File

@ -256,6 +256,12 @@ func CLIAnnotationWorker() obiseq.SeqWorker {
annotator = annotator.ChainWorkers(w)
}
if CLISetTaxonomicPath() {
taxo := obigrep.CLILoadSelectedTaxonomy()
w := taxo.MakeSetPathWorker()
annotator = annotator.ChainWorkers(w)
}
if CLIHasAddLCA() {
taxo := obigrep.CLILoadSelectedTaxonomy()
w := obitax.AddLCAWorker(taxo, CLILCASlotName(), CLILCAThreshold())

View File

@ -31,6 +31,7 @@ var _lcaSlot = ""
var _lcaError = 0.0
var _setId = ""
var _cut = ""
var _taxonomicPath = false
func SequenceAnnotationOptionSet(options *getoptions.GetOpt) {
// options.BoolVar(&_addRank, "seq-rank", _addRank,
@ -113,6 +114,9 @@ func SequenceAnnotationOptionSet(options *getoptions.GetOpt) {
options.ArgName("RANK_NAME"),
options.Description("Adds taxonomic annotation at taxonomic rank <RANK_NAME>."))
options.BoolVar(&_taxonomicPath, "taxonomic-path", _taxonomicPath,
options.Description("Annotate the sequence with its taxonomic path"))
// options.StringVar(&_tagList, "tag-list", _tagList,
// options.ArgName("FILENAME"),
// options.Description("<FILENAME> points to a file containing attribute names"+
@ -299,3 +303,7 @@ func CLIPatternError() int {
func CLIPatternInDels() bool {
return _pattern_indel
}
func CLISetTaxonomicPath() bool {
return _taxonomicPath
}