mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Adds possibility to extract a taxonomy from taxonomic path included in sequence files
This commit is contained in:
@ -1,6 +1,7 @@
|
||||
package obiseq
|
||||
|
||||
import (
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"golang.org/x/exp/slices"
|
||||
@ -179,3 +180,18 @@ func (s *BioSequenceSlice) SortOnLength(reverse bool) {
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func (s *BioSequenceSlice) ExtractTaxonomy(taxonomy *obitax.Taxonomy) (*obitax.Taxonomy, error) {
|
||||
var err error
|
||||
|
||||
for _, s := range *s {
|
||||
taxonomy, err = taxonomy.InsertPathString(s.Path())
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return taxonomy, nil
|
||||
}
|
||||
|
@ -15,13 +15,15 @@ func (sequence *BioSequence) TaxonomicDistribution(taxonomy *obitax.Taxonomy) ma
|
||||
taxonomy = taxonomy.OrDefault(true)
|
||||
|
||||
for taxid, v := range taxids {
|
||||
t := taxonomy.Taxon(taxid)
|
||||
if t == nil {
|
||||
t, err := taxonomy.Taxon(taxid)
|
||||
if err != nil {
|
||||
log.Fatalf(
|
||||
"On sequence %s taxid %s is not defined in taxonomy: %s",
|
||||
"On sequence %s taxid %s is not defined in taxonomy: %s (%v)",
|
||||
sequence.Id(),
|
||||
taxid,
|
||||
taxonomy.Name())
|
||||
taxonomy.Name(),
|
||||
err,
|
||||
)
|
||||
}
|
||||
taxons[t.Node] = v
|
||||
}
|
||||
|
@ -6,6 +6,7 @@ import (
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||
)
|
||||
|
||||
func (s *BioSequence) Taxon(taxonomy *obitax.Taxonomy) *obitax.Taxon {
|
||||
@ -14,7 +15,10 @@ func (s *BioSequence) Taxon(taxonomy *obitax.Taxonomy) *obitax.Taxon {
|
||||
if taxid == "NA" {
|
||||
return nil
|
||||
}
|
||||
return taxonomy.Taxon(taxid)
|
||||
|
||||
taxon, _ := taxonomy.Taxon(taxid)
|
||||
|
||||
return taxon
|
||||
}
|
||||
|
||||
// SetTaxid sets the taxid for the BioSequence.
|
||||
@ -23,6 +27,7 @@ func (s *BioSequence) Taxon(taxonomy *obitax.Taxonomy) *obitax.Taxon {
|
||||
//
|
||||
// taxid - the taxid to set.
|
||||
func (s *BioSequence) SetTaxid(taxid string, rank ...string) {
|
||||
var err error
|
||||
if taxid == "" {
|
||||
taxid = "NA"
|
||||
} else {
|
||||
@ -30,7 +35,12 @@ func (s *BioSequence) SetTaxid(taxid string, rank ...string) {
|
||||
taxon := (*obitax.Taxon)(nil)
|
||||
|
||||
if taxonomy != nil {
|
||||
taxon = taxonomy.Taxon(taxid)
|
||||
taxon, err = taxonomy.Taxon(taxid)
|
||||
|
||||
if err != nil {
|
||||
log.Warnf("%s: Taxid: %v is unknown from taxonomy (%v)",
|
||||
s.Id(), taxid, err)
|
||||
}
|
||||
}
|
||||
|
||||
if taxon != nil {
|
||||
@ -135,14 +145,35 @@ func (sequence *BioSequence) SetFamily(taxonomy *obitax.Taxonomy) *obitax.Taxon
|
||||
return sequence.SetTaxonAtRank(taxonomy, "family")
|
||||
}
|
||||
|
||||
func (sequence *BioSequence) SetPath(taxonomy *obitax.Taxonomy) string {
|
||||
func (sequence *BioSequence) SetPath(taxonomy *obitax.Taxonomy) []string {
|
||||
taxon := sequence.Taxon(taxonomy)
|
||||
path := taxon.Path()
|
||||
spath := make([]string, path.Len())
|
||||
lpath := path.Len() - 1
|
||||
|
||||
tpath := path.String()
|
||||
sequence.SetAttribute("taxonomic_path", tpath)
|
||||
for i := lpath; i >= 0; i-- {
|
||||
spath[lpath-i] = path.Get(i).String(taxonomy.Code())
|
||||
}
|
||||
|
||||
return tpath
|
||||
sequence.SetAttribute("taxonomic_path", spath)
|
||||
|
||||
return spath
|
||||
}
|
||||
|
||||
func (sequence *BioSequence) Path() []string {
|
||||
path, ok := sequence.GetAttribute("taxonomic_path")
|
||||
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
|
||||
slice, err := obiutils.InterfaceToStringSlice(path)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("%s: taxonomic_path has the wrong type (%v)", sequence.Id(), err)
|
||||
}
|
||||
|
||||
return slice
|
||||
}
|
||||
|
||||
func (sequence *BioSequence) SetScientificName(taxonomy *obitax.Taxonomy) string {
|
||||
|
@ -63,7 +63,12 @@ func IsSubCladeOfSlot(taxonomy *obitax.Taxonomy, key string) SequencePredicate {
|
||||
val, ok := sequence.GetStringAttribute(key)
|
||||
|
||||
if ok {
|
||||
parent := taxonomy.Taxon(val)
|
||||
parent, err := taxonomy.Taxon(val)
|
||||
|
||||
if err != nil {
|
||||
log.Warnf("%s: %s is unkown from the taxonomy (%v)", sequence.Id(), val, err)
|
||||
}
|
||||
|
||||
taxon := sequence.Taxon(taxonomy)
|
||||
return parent != nil && taxon != nil && taxon.IsSubCladeOf(parent)
|
||||
}
|
||||
|
Reference in New Issue
Block a user