Adds possibility to extract a taxonomy from taxonomic path included in sequence files

This commit is contained in:
Eric Coissac
2025-01-30 11:18:21 +01:00
parent 2452aef7a9
commit 0df082da06
20 changed files with 460 additions and 173 deletions

View File

@ -1,6 +1,7 @@
package obiseq
import (
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
log "github.com/sirupsen/logrus"
"golang.org/x/exp/slices"
@ -179,3 +180,18 @@ func (s *BioSequenceSlice) SortOnLength(reverse bool) {
}
})
}
func (s *BioSequenceSlice) ExtractTaxonomy(taxonomy *obitax.Taxonomy) (*obitax.Taxonomy, error) {
var err error
for _, s := range *s {
taxonomy, err = taxonomy.InsertPathString(s.Path())
if err != nil {
return nil, err
}
}
return taxonomy, nil
}

View File

@ -15,13 +15,15 @@ func (sequence *BioSequence) TaxonomicDistribution(taxonomy *obitax.Taxonomy) ma
taxonomy = taxonomy.OrDefault(true)
for taxid, v := range taxids {
t := taxonomy.Taxon(taxid)
if t == nil {
t, err := taxonomy.Taxon(taxid)
if err != nil {
log.Fatalf(
"On sequence %s taxid %s is not defined in taxonomy: %s",
"On sequence %s taxid %s is not defined in taxonomy: %s (%v)",
sequence.Id(),
taxid,
taxonomy.Name())
taxonomy.Name(),
err,
)
}
taxons[t.Node] = v
}

View File

@ -6,6 +6,7 @@ import (
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
)
func (s *BioSequence) Taxon(taxonomy *obitax.Taxonomy) *obitax.Taxon {
@ -14,7 +15,10 @@ func (s *BioSequence) Taxon(taxonomy *obitax.Taxonomy) *obitax.Taxon {
if taxid == "NA" {
return nil
}
return taxonomy.Taxon(taxid)
taxon, _ := taxonomy.Taxon(taxid)
return taxon
}
// SetTaxid sets the taxid for the BioSequence.
@ -23,6 +27,7 @@ func (s *BioSequence) Taxon(taxonomy *obitax.Taxonomy) *obitax.Taxon {
//
// taxid - the taxid to set.
func (s *BioSequence) SetTaxid(taxid string, rank ...string) {
var err error
if taxid == "" {
taxid = "NA"
} else {
@ -30,7 +35,12 @@ func (s *BioSequence) SetTaxid(taxid string, rank ...string) {
taxon := (*obitax.Taxon)(nil)
if taxonomy != nil {
taxon = taxonomy.Taxon(taxid)
taxon, err = taxonomy.Taxon(taxid)
if err != nil {
log.Warnf("%s: Taxid: %v is unknown from taxonomy (%v)",
s.Id(), taxid, err)
}
}
if taxon != nil {
@ -135,14 +145,35 @@ func (sequence *BioSequence) SetFamily(taxonomy *obitax.Taxonomy) *obitax.Taxon
return sequence.SetTaxonAtRank(taxonomy, "family")
}
func (sequence *BioSequence) SetPath(taxonomy *obitax.Taxonomy) string {
func (sequence *BioSequence) SetPath(taxonomy *obitax.Taxonomy) []string {
taxon := sequence.Taxon(taxonomy)
path := taxon.Path()
spath := make([]string, path.Len())
lpath := path.Len() - 1
tpath := path.String()
sequence.SetAttribute("taxonomic_path", tpath)
for i := lpath; i >= 0; i-- {
spath[lpath-i] = path.Get(i).String(taxonomy.Code())
}
return tpath
sequence.SetAttribute("taxonomic_path", spath)
return spath
}
func (sequence *BioSequence) Path() []string {
path, ok := sequence.GetAttribute("taxonomic_path")
if !ok {
return nil
}
slice, err := obiutils.InterfaceToStringSlice(path)
if err != nil {
log.Fatalf("%s: taxonomic_path has the wrong type (%v)", sequence.Id(), err)
}
return slice
}
func (sequence *BioSequence) SetScientificName(taxonomy *obitax.Taxonomy) string {

View File

@ -63,7 +63,12 @@ func IsSubCladeOfSlot(taxonomy *obitax.Taxonomy, key string) SequencePredicate {
val, ok := sequence.GetStringAttribute(key)
if ok {
parent := taxonomy.Taxon(val)
parent, err := taxonomy.Taxon(val)
if err != nil {
log.Warnf("%s: %s is unkown from the taxonomy (%v)", sequence.Id(), val, err)
}
taxon := sequence.Taxon(taxonomy)
return parent != nil && taxon != nil && taxon.IsSubCladeOf(parent)
}