mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Fisrt step in the obitax rewriting
This commit is contained in:
@ -1,64 +0,0 @@
|
||||
package obitax
|
||||
|
||||
import (
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// TaxonomyClassifier is a function that creates a new instance of the BioSequenceClassifier
|
||||
// for taxonomic classification based on a given taxonomic rank, taxonomy, and abort flag.
|
||||
//
|
||||
// Parameters:
|
||||
// - taxonomicRank: the taxonomic rank to classify the sequences at.
|
||||
// - taxonomy: the taxonomy object used for classification.
|
||||
// - abortOnMissing: a flag indicating whether to abort if a taxon is missing in the taxonomy.
|
||||
//
|
||||
// Return:
|
||||
// - *obiseq.BioSequenceClassifier: the new instance of the BioSequenceClassifier.
|
||||
func TaxonomyClassifier(taxonomicRank string,
|
||||
taxonomy *Taxonomy,
|
||||
abortOnMissing bool) *obiseq.BioSequenceClassifier {
|
||||
|
||||
code := func(sequence *obiseq.BioSequence) int {
|
||||
taxid := sequence.Taxid()
|
||||
taxon, err := taxonomy.Taxon(taxid)
|
||||
if err == nil {
|
||||
taxon = taxon.TaxonAtRank(taxonomicRank)
|
||||
} else {
|
||||
taxon = nil
|
||||
}
|
||||
if taxon == nil {
|
||||
if abortOnMissing {
|
||||
if err != nil {
|
||||
log.Fatalf("Taxid %d not found in taxonomy", taxid)
|
||||
} else {
|
||||
log.Fatalf("Taxon at rank %s not found in taxonomy for taxid %d", taxonomicRank, taxid)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return 0
|
||||
}
|
||||
return taxon.Taxid()
|
||||
}
|
||||
|
||||
value := func(k int) string {
|
||||
taxon, _ := taxonomy.Taxon(k)
|
||||
return taxon.ScientificName()
|
||||
}
|
||||
|
||||
reset := func() {
|
||||
}
|
||||
|
||||
clone := func() *obiseq.BioSequenceClassifier {
|
||||
return TaxonomyClassifier(taxonomicRank, taxonomy, abortOnMissing)
|
||||
}
|
||||
|
||||
c := obiseq.BioSequenceClassifier{
|
||||
Code: code,
|
||||
Value: value,
|
||||
Reset: reset,
|
||||
Clone: clone,
|
||||
Type: "TaxonomyClassifier"}
|
||||
return &c
|
||||
}
|
49
pkg/obitax/inner.go
Normal file
49
pkg/obitax/inner.go
Normal file
@ -0,0 +1,49 @@
|
||||
package obitax
|
||||
|
||||
import "sync"
|
||||
|
||||
// InnerString is a struct that holds a map of strings and a read-write lock for concurrent access.
|
||||
// The index map is used to store key-value pairs of strings.
|
||||
type InnerString struct {
|
||||
index map[string]string
|
||||
lock sync.RWMutex
|
||||
}
|
||||
|
||||
// NewInnerString creates a new instance of InnerString.
|
||||
// The lock is set to false.
|
||||
func NewInnerString() *InnerString {
|
||||
return &InnerString{
|
||||
index: make(map[string]string),
|
||||
}
|
||||
}
|
||||
|
||||
// Innerize stores the given value in the index map if it is not already present.
|
||||
// It returns the value associated with the key, which is either the newly stored value
|
||||
// or the existing value if it was already present in the map.
|
||||
//
|
||||
// Parameters:
|
||||
// - value: The string value to be stored in the index map.
|
||||
//
|
||||
// Returns:
|
||||
// - The string value associated with the key.
|
||||
func (i *InnerString) Innerize(value string) string {
|
||||
i.lock.Lock()
|
||||
defer i.lock.Unlock()
|
||||
s, ok := i.index[value]
|
||||
if !ok {
|
||||
i.index[value] = value
|
||||
s = value
|
||||
}
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
func (i *InnerString) Slice() []string {
|
||||
rep := make([]string, len(i.index))
|
||||
j := 0
|
||||
for _, v := range i.index {
|
||||
rep[j] = v
|
||||
j++
|
||||
}
|
||||
return rep
|
||||
}
|
@ -1,21 +1,22 @@
|
||||
package obitax
|
||||
|
||||
func (taxon *TaxNode) IsSubCladeOf(parent *TaxNode) bool {
|
||||
import "log"
|
||||
|
||||
for taxon.taxid != parent.taxid && taxon.parent != taxon.taxid {
|
||||
taxon = taxon.pparent
|
||||
func (taxon *Taxon) IsSubCladeOf(parent *Taxon) bool {
|
||||
|
||||
if taxon.Taxonomy != parent.Taxonomy {
|
||||
log.Fatalf(
|
||||
"Both taxa %s and %s must belong to the same taxonomy",
|
||||
taxon.String(),
|
||||
parent.String(),
|
||||
)
|
||||
}
|
||||
|
||||
return taxon.taxid == parent.taxid
|
||||
}
|
||||
|
||||
func (taxon *TaxNode) IsBelongingSubclades(clades *TaxonSet) bool {
|
||||
_, ok := (*clades)[taxon.taxid]
|
||||
|
||||
for !ok && taxon.parent != taxon.taxid {
|
||||
taxon = taxon.pparent
|
||||
_, ok = (*clades)[taxon.taxid]
|
||||
for t := range taxon.IPath() {
|
||||
if t.Node.Id() == parent.Node.Id() {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return ok
|
||||
return false
|
||||
}
|
||||
|
@ -17,7 +17,7 @@ func (set *TaxonSet) Iterator() *ITaxonSet {
|
||||
i := NewITaxonSet()
|
||||
|
||||
go func() {
|
||||
for _, t := range *set {
|
||||
for _, t := range set.set {
|
||||
i.source <- t
|
||||
}
|
||||
close(i.source)
|
||||
@ -30,7 +30,7 @@ func (set *TaxonSlice) Iterator() *ITaxonSet {
|
||||
i := NewITaxonSet()
|
||||
|
||||
go func() {
|
||||
for _, t := range *set {
|
||||
for _, t := range set.slice {
|
||||
i.source <- t
|
||||
}
|
||||
close(i.source)
|
||||
@ -83,7 +83,7 @@ func (iterator *ITaxonSet) TaxonSet() *TaxonSet {
|
||||
|
||||
for iterator.Next() {
|
||||
taxon := iterator.Get()
|
||||
set[taxon.taxid] = taxon
|
||||
set[taxon.id] = taxon
|
||||
}
|
||||
return &set
|
||||
}
|
||||
|
@ -1,12 +1,6 @@
|
||||
package obitax
|
||||
|
||||
import (
|
||||
"math"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
@ -41,121 +35,3 @@ func (t1 *TaxNode) LCA(t2 *TaxNode) (*TaxNode, error) {
|
||||
|
||||
return (*p1)[i1+1], nil
|
||||
}
|
||||
|
||||
func (taxonomy *Taxonomy) TaxonomicDistribution(sequence *obiseq.BioSequence) map[*TaxNode]int {
|
||||
taxids := sequence.StatsOn(obiseq.MakeStatsOnDescription("taxid"), "na")
|
||||
taxons := make(map[*TaxNode]int, len(taxids))
|
||||
|
||||
for k, v := range taxids {
|
||||
taxid, _ := strconv.Atoi(k)
|
||||
|
||||
t, et := taxonomy.Taxon(taxid)
|
||||
if et != nil {
|
||||
log.Panicf("Taxid %d not defined in taxonomy : %v", taxid, et)
|
||||
}
|
||||
taxons[t] = v
|
||||
}
|
||||
return taxons
|
||||
}
|
||||
|
||||
func (taxonomy *Taxonomy) LCA(sequence *obiseq.BioSequence, threshold float64) (*TaxNode, float64, int) {
|
||||
taxons := taxonomy.TaxonomicDistribution(sequence)
|
||||
paths := make(map[*TaxNode]*TaxonSlice, len(taxons))
|
||||
answer := (*TaxNode)(nil)
|
||||
rans := 1.0
|
||||
granTotal := 0
|
||||
|
||||
for t, w := range taxons {
|
||||
p, ep := t.Path()
|
||||
if ep != nil {
|
||||
log.Panicf("Taxonomic path cannot be retreived from Taxid %d : %v", t.Taxid(), ep)
|
||||
}
|
||||
|
||||
obiutils.Reverse(*p, true)
|
||||
paths[t] = p
|
||||
answer = (*p)[0]
|
||||
granTotal += w
|
||||
}
|
||||
|
||||
rmax := 1.0
|
||||
levels := make(map[*TaxNode]int, len(paths))
|
||||
taxonMax := answer
|
||||
|
||||
for i := 0; rmax >= threshold; i++ {
|
||||
answer = taxonMax
|
||||
rans = rmax
|
||||
taxonMax = nil
|
||||
total := 0
|
||||
for taxon, weight := range taxons {
|
||||
path := paths[taxon]
|
||||
if len(*path) > i {
|
||||
levels[(*path)[i]] += weight
|
||||
}
|
||||
total += weight
|
||||
}
|
||||
weighMax := 0
|
||||
for taxon, weight := range levels {
|
||||
if weight > weighMax {
|
||||
weighMax = weight
|
||||
taxonMax = taxon
|
||||
}
|
||||
}
|
||||
|
||||
if total > 0 {
|
||||
rmax *= float64(weighMax) / float64(total)
|
||||
} else {
|
||||
rmax = 0.0
|
||||
}
|
||||
|
||||
for taxon := range levels {
|
||||
delete(levels, taxon)
|
||||
}
|
||||
for taxon := range taxons {
|
||||
path := paths[taxon]
|
||||
if i < len(*path) {
|
||||
if (*path)[i] != taxonMax {
|
||||
delete(paths, taxon)
|
||||
delete(taxons, taxon)
|
||||
}
|
||||
}
|
||||
}
|
||||
// if taxonMax != nil {
|
||||
// log.Println("@@@>", i, taxonMax.ScientificName(), taxonMax.Taxid(), rans, weighMax, total, rmax)
|
||||
// } else {
|
||||
// log.Println("@@@>", "--", 0, rmax)
|
||||
// }
|
||||
}
|
||||
// log.Println("###>", answer.ScientificName(), answer.Taxid(), rans)
|
||||
// log.Print("========================================")
|
||||
return answer, rans, granTotal
|
||||
|
||||
}
|
||||
|
||||
func AddLCAWorker(taxonomy *Taxonomy, slot_name string, threshold float64) obiseq.SeqWorker {
|
||||
|
||||
if !strings.HasSuffix(slot_name, "taxid") {
|
||||
slot_name = slot_name + "_taxid"
|
||||
}
|
||||
|
||||
lca_error := strings.Replace(slot_name, "taxid", "error", 1)
|
||||
if lca_error == "error" {
|
||||
lca_error = "lca_error"
|
||||
}
|
||||
|
||||
lca_name := strings.Replace(slot_name, "taxid", "name", 1)
|
||||
if lca_name == "name" {
|
||||
lca_name = "scientific_name"
|
||||
}
|
||||
|
||||
f := func(sequence *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
|
||||
lca, rans, _ := taxonomy.LCA(sequence, threshold)
|
||||
|
||||
sequence.SetAttribute(slot_name, lca.Taxid())
|
||||
sequence.SetAttribute(lca_name, lca.ScientificName())
|
||||
sequence.SetAttribute(lca_error, math.Round((1-rans)*1000)/1000)
|
||||
|
||||
return obiseq.BioSequenceSlice{sequence}, nil
|
||||
}
|
||||
|
||||
return f
|
||||
}
|
||||
|
@ -1,87 +0,0 @@
|
||||
package obitax
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// Path generates the lineage path from the current taxon up to the root.
|
||||
//
|
||||
// This method does not take parameters as it is called on a TaxNode receiver.
|
||||
// It returns a pointer to a TaxonSlice containing the path and an error if
|
||||
// the taxonomy needs reindexing.
|
||||
func (taxon *TaxNode) Path() (*TaxonSlice, error) {
|
||||
|
||||
path := make(TaxonSlice, 0, 30)
|
||||
path = append(path, taxon)
|
||||
|
||||
for taxon != taxon.pparent {
|
||||
taxon = taxon.pparent
|
||||
|
||||
if taxon == nil {
|
||||
return nil, fmt.Errorf("Taxonomy must be reindexed")
|
||||
}
|
||||
|
||||
path = append(path, taxon)
|
||||
}
|
||||
|
||||
return &path, nil
|
||||
}
|
||||
|
||||
// TaxonAtRank traverses up the taxonomy tree starting from the current
|
||||
// node until it finds a node that matches the specified rank.
|
||||
//
|
||||
// If a node with the given rank is not found in the path to the root,
|
||||
// or if the taxonomy tree is not properly indexed (i.e., a node's parent
|
||||
// is itself), the function will return nil. In case the taxonomy needs
|
||||
// reindexing, the function will panic.
|
||||
//
|
||||
// rank: the taxonomic rank to search for (e.g., "species", "genus").
|
||||
//
|
||||
// Returns a pointer to a TaxNode representing the node at the
|
||||
// specified rank, or nil if no such node exists in the path.
|
||||
func (taxon *TaxNode) TaxonAtRank(rank string) *TaxNode {
|
||||
for taxon.rank != rank && taxon != taxon.pparent {
|
||||
taxon = taxon.pparent
|
||||
|
||||
if taxon == nil {
|
||||
log.Panicln("Taxonomy must be reindexed")
|
||||
}
|
||||
}
|
||||
|
||||
if taxon == taxon.pparent && taxon.rank != rank {
|
||||
taxon = nil
|
||||
}
|
||||
|
||||
return taxon
|
||||
}
|
||||
|
||||
// Species retrieves the TaxNode corresponding to the species rank.
|
||||
//
|
||||
// This method does not take any parameters. It is a convenience
|
||||
// wrapper around the TaxonAtRank method, specifically retrieving
|
||||
// the species-level taxonomic classification for the calling TaxNode.
|
||||
//
|
||||
// Returns a pointer to the TaxNode representing the species.
|
||||
func (taxon *TaxNode) Species() *TaxNode {
|
||||
return taxon.TaxonAtRank("species")
|
||||
}
|
||||
|
||||
func (taxon *TaxNode) Genus() *TaxNode {
|
||||
return taxon.TaxonAtRank("genus")
|
||||
}
|
||||
|
||||
func (taxon *TaxNode) Family() *TaxNode {
|
||||
return taxon.TaxonAtRank("family")
|
||||
}
|
||||
|
||||
func (taxonomy *Taxonomy) Path(taxid int) (*TaxonSlice, error) {
|
||||
taxon, err := taxonomy.Taxon(taxid)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return taxon.Path()
|
||||
}
|
@ -1,16 +0,0 @@
|
||||
package obitax
|
||||
|
||||
func (taxonomy *Taxonomy) RankList() []string {
|
||||
ranks := make([]string, 0, 30)
|
||||
mranks := make(map[string]bool)
|
||||
|
||||
for _, t := range *taxonomy.nodes {
|
||||
mranks[t.rank] = true
|
||||
}
|
||||
|
||||
for r := range mranks {
|
||||
ranks = append(ranks, r)
|
||||
}
|
||||
|
||||
return ranks
|
||||
}
|
@ -1,92 +0,0 @@
|
||||
package obitax
|
||||
|
||||
import (
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// Setting the taxon at a given rank for a given sequence.
|
||||
//
|
||||
// Two attributes are added to the sequence. One named by the rank name stores
|
||||
// the taxid, a second named by the rank name suffixed with '_name' contains the
|
||||
// Scientific name of the genus.
|
||||
// If the taxon at the given rank doesn't exist for the taxonomy annotation
|
||||
// of the sequence, nothing happens.
|
||||
func (taxonomy *Taxonomy) SetTaxonAtRank(sequence *obiseq.BioSequence, rank string) *TaxNode {
|
||||
var taxonAtRank *TaxNode
|
||||
|
||||
taxid := sequence.Taxid()
|
||||
taxon, err := taxonomy.Taxon(taxid)
|
||||
taxonAtRank = nil
|
||||
if err == nil {
|
||||
taxonAtRank = taxon.TaxonAtRank(rank)
|
||||
if taxonAtRank != nil {
|
||||
// log.Printf("Taxid: %d Rank: %s --> proposed : %d (%s)", taxid, rank, taxonAtRank.taxid, *(taxonAtRank.scientificname))
|
||||
sequence.SetAttribute(rank+"_taxid", taxonAtRank.taxid)
|
||||
sequence.SetAttribute(rank+"_name", *taxonAtRank.scientificname)
|
||||
} else {
|
||||
sequence.SetAttribute(rank+"_taxid", -1)
|
||||
sequence.SetAttribute(rank+"_name", "NA")
|
||||
}
|
||||
}
|
||||
|
||||
return taxonAtRank
|
||||
}
|
||||
|
||||
// Setting the species of a sequence.
|
||||
func (taxonomy *Taxonomy) SetSpecies(sequence *obiseq.BioSequence) *TaxNode {
|
||||
return taxonomy.SetTaxonAtRank(sequence, "species")
|
||||
}
|
||||
|
||||
// Setting the genus of a sequence.
|
||||
func (taxonomy *Taxonomy) SetGenus(sequence *obiseq.BioSequence) *TaxNode {
|
||||
return taxonomy.SetTaxonAtRank(sequence, "genus")
|
||||
}
|
||||
|
||||
// Setting the family of a sequence.
|
||||
func (taxonomy *Taxonomy) SetFamily(sequence *obiseq.BioSequence) *TaxNode {
|
||||
return taxonomy.SetTaxonAtRank(sequence, "family")
|
||||
}
|
||||
|
||||
func (taxonomy *Taxonomy) SetPath(sequence *obiseq.BioSequence) string {
|
||||
taxid, err := taxonomy.Taxon(sequence.Taxid())
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Taxid %d not defined in the current taxonomy", sequence.Taxid())
|
||||
}
|
||||
|
||||
path, err := taxid.Path()
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Taxonomy index error: %v", err)
|
||||
}
|
||||
|
||||
tpath := path.String()
|
||||
sequence.SetAttribute("taxonomic_path", tpath)
|
||||
|
||||
return tpath
|
||||
}
|
||||
|
||||
func (taxonomy *Taxonomy) SetScientificName(sequence *obiseq.BioSequence) string {
|
||||
taxid, err := taxonomy.Taxon(sequence.Taxid())
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Taxid %d not defined in the current taxonomy", sequence.Taxid())
|
||||
}
|
||||
|
||||
sequence.SetAttribute("scienctific_name", taxid.ScientificName())
|
||||
|
||||
return taxid.ScientificName()
|
||||
}
|
||||
|
||||
func (taxonomy *Taxonomy) SetTaxonomicRank(sequence *obiseq.BioSequence) string {
|
||||
taxid, err := taxonomy.Taxon(sequence.Taxid())
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Taxid %d not defined in the current taxonomy", sequence.Taxid())
|
||||
}
|
||||
|
||||
sequence.SetAttribute("taxonomic_rank", taxid.Rank())
|
||||
|
||||
return taxid.Rank()
|
||||
}
|
@ -1,91 +0,0 @@
|
||||
package obitax
|
||||
|
||||
import (
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||
)
|
||||
|
||||
func (taxonomy *Taxonomy) IsAValidTaxon(withAutoCorrection ...bool) obiseq.SequencePredicate {
|
||||
deprecatedTaxidsWarning := make(map[int]bool)
|
||||
|
||||
autocorrection := false
|
||||
if len(withAutoCorrection) > 0 {
|
||||
autocorrection = withAutoCorrection[0]
|
||||
}
|
||||
|
||||
f := func(sequence *obiseq.BioSequence) bool {
|
||||
taxid := sequence.Taxid()
|
||||
taxon, err := taxonomy.Taxon(taxid)
|
||||
|
||||
if err == nil && taxon.taxid != taxid {
|
||||
if autocorrection {
|
||||
sequence.SetTaxid(taxon.taxid)
|
||||
log.Printf("Sequence %s : Taxid %d updated with %d",
|
||||
sequence.Id(),
|
||||
taxid,
|
||||
taxon.taxid)
|
||||
} else {
|
||||
if _, ok := deprecatedTaxidsWarning[taxid]; !ok {
|
||||
deprecatedTaxidsWarning[taxid] = true
|
||||
log.Printf("Taxid %d is deprecated and must be replaced by %d", taxid, taxon.taxid)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return err == nil
|
||||
}
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
// A function that takes a taxonomy and a taxid as arguments and returns a function that takes a
|
||||
// pointer to a BioSequence as an argument and returns a boolean.
|
||||
func (taxonomy *Taxonomy) IsSubCladeOf(taxid int) obiseq.SequencePredicate {
|
||||
parent, err := taxonomy.Taxon(taxid)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Cannot find taxon : %d (%v)", taxid, err)
|
||||
}
|
||||
|
||||
f := func(sequence *obiseq.BioSequence) bool {
|
||||
taxon, err := taxonomy.Taxon(sequence.Taxid())
|
||||
return err == nil && taxon.IsSubCladeOf(parent)
|
||||
}
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func (taxonomy *Taxonomy) IsSubCladeOfSlot(key string) obiseq.SequencePredicate {
|
||||
|
||||
f := func(sequence *obiseq.BioSequence) bool {
|
||||
val, ok := sequence.GetStringAttribute(key)
|
||||
|
||||
if ok {
|
||||
parent, err1 := taxonomy.Taxon(val)
|
||||
taxon, err2 := taxonomy.Taxon(sequence.Taxid())
|
||||
return err1 == nil && err2 == nil && taxon.IsSubCladeOf(parent)
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func (taxonomy *Taxonomy) HasRequiredRank(rank string) obiseq.SequencePredicate {
|
||||
|
||||
if !obiutils.Contains(taxonomy.RankList(), rank) {
|
||||
log.Fatalf("%s is not a valid rank (allowed ranks are %v)",
|
||||
rank,
|
||||
taxonomy.RankList())
|
||||
}
|
||||
|
||||
f := func(sequence *obiseq.BioSequence) bool {
|
||||
taxon, err := taxonomy.Taxon(sequence.Taxid())
|
||||
return err == nil && taxon.HasRankDefined(rank)
|
||||
}
|
||||
|
||||
return f
|
||||
}
|
@ -1,64 +0,0 @@
|
||||
package obitax
|
||||
|
||||
import (
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
func (taxonomy *Taxonomy) MakeSetTaxonAtRankWorker(rank string) obiseq.SeqWorker {
|
||||
|
||||
if !obiutils.Contains(taxonomy.RankList(), rank) {
|
||||
log.Fatalf("%s is not a valid rank (allowed ranks are %v)",
|
||||
rank,
|
||||
taxonomy.RankList())
|
||||
}
|
||||
|
||||
w := func(sequence *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
|
||||
taxonomy.SetTaxonAtRank(sequence, rank)
|
||||
return obiseq.BioSequenceSlice{sequence}, nil
|
||||
}
|
||||
|
||||
return w
|
||||
}
|
||||
|
||||
func (taxonomy *Taxonomy) MakeSetSpeciesWorker() obiseq.SeqWorker {
|
||||
|
||||
w := func(sequence *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
|
||||
taxonomy.SetSpecies(sequence)
|
||||
return obiseq.BioSequenceSlice{sequence}, nil
|
||||
}
|
||||
|
||||
return w
|
||||
}
|
||||
|
||||
func (taxonomy *Taxonomy) MakeSetGenusWorker() obiseq.SeqWorker {
|
||||
|
||||
w := func(sequence *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
|
||||
taxonomy.SetGenus(sequence)
|
||||
return obiseq.BioSequenceSlice{sequence}, nil
|
||||
}
|
||||
|
||||
return w
|
||||
}
|
||||
|
||||
func (taxonomy *Taxonomy) MakeSetFamilyWorker() obiseq.SeqWorker {
|
||||
|
||||
w := func(sequence *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
|
||||
taxonomy.SetFamily(sequence)
|
||||
return obiseq.BioSequenceSlice{sequence}, nil
|
||||
}
|
||||
|
||||
return w
|
||||
}
|
||||
|
||||
func (taxonomy *Taxonomy) MakeSetPathWorker() obiseq.SeqWorker {
|
||||
|
||||
w := func(sequence *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
|
||||
taxonomy.SetPath(sequence)
|
||||
return obiseq.BioSequenceSlice{sequence}, nil
|
||||
}
|
||||
|
||||
return w
|
||||
|
||||
}
|
@ -1,75 +1,194 @@
|
||||
package obitax
|
||||
|
||||
import (
|
||||
"iter"
|
||||
"regexp"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type TaxNode struct {
|
||||
taxid int
|
||||
parent int
|
||||
pparent *TaxNode
|
||||
rank string
|
||||
scientificname *string
|
||||
alternatenames *map[string]*string
|
||||
// Taxon represents a taxon within a taxonomy, encapsulating both the taxonomy
|
||||
// it belongs to and the specific taxon node information.
|
||||
//
|
||||
// Fields:
|
||||
// - Taxonomy: A pointer to the Taxonomy[T] instance that this taxon is part of.
|
||||
// - Node: A pointer to the TaxNode[T] instance representing the specific taxon.
|
||||
type Taxon struct {
|
||||
Taxonomy *Taxonomy
|
||||
Node *TaxNode
|
||||
}
|
||||
|
||||
func NewTaxNode(taxid int, parent int, rank string) *TaxNode {
|
||||
n := TaxNode{taxid, parent, nil, rank, nil, nil}
|
||||
return &n
|
||||
// String returns a string representation of the Taxon.
|
||||
// It formats the output to include the taxonomy code, the taxon ID, and the scientific name.
|
||||
//
|
||||
// Returns:
|
||||
// - A formatted string representing the Taxon in the form "taxonomy_code:taxon_id [scientific_name]".
|
||||
func (taxon *Taxon) String() string {
|
||||
return taxon.Node.String(taxon.Taxonomy.code)
|
||||
}
|
||||
|
||||
func (node *TaxNode) ScientificName() string {
|
||||
n := node.scientificname
|
||||
if n == nil {
|
||||
return ""
|
||||
// ScientificName returns the scientific name of the Taxon.
|
||||
// It retrieves the scientific name from the underlying TaxNode associated with the taxon.
|
||||
//
|
||||
// Returns:
|
||||
// - The scientific name of the taxon as a string.
|
||||
func (taxon *Taxon) ScientificName() string {
|
||||
return taxon.Node.ScientificName()
|
||||
}
|
||||
|
||||
func (taxon *Taxon) Name(class string) string {
|
||||
return taxon.Node.Name(class)
|
||||
}
|
||||
|
||||
func (taxon *Taxon) IsNameEqual(name string) bool {
|
||||
return taxon.Node.IsNameEqual(name)
|
||||
}
|
||||
|
||||
func (taxon *Taxon) IsNameMatching(pattern *regexp.Regexp) bool {
|
||||
return taxon.Node.IsNameMatching(pattern)
|
||||
}
|
||||
|
||||
func (taxon *Taxon) SetName(name, class string) {
|
||||
class = taxon.Taxonomy.nameclasses.Innerize(class)
|
||||
taxon.Node.SetName(name, class)
|
||||
}
|
||||
|
||||
// Rank returns the rank of the Taxon.
|
||||
// It retrieves the rank from the underlying TaxNode associated with the taxon.
|
||||
//
|
||||
// Returns:
|
||||
// - The rank of the taxon as a string (e.g., species, genus, family).
|
||||
func (taxon *Taxon) Rank() string {
|
||||
return taxon.Node.Rank()
|
||||
}
|
||||
|
||||
// Parent returns a pointer to the parent Taxon of the current Taxon.
|
||||
// It retrieves the parent identifier from the underlying TaxNode and uses it
|
||||
// to create a new Taxon instance representing the parent taxon.
|
||||
//
|
||||
// Returns:
|
||||
// - A pointer to the parent Taxon[T]. If the parent does not exist, it returns
|
||||
// a Taxon with a nil Node.
|
||||
func (taxon *Taxon) Parent() *Taxon {
|
||||
pid := taxon.Node.ParentId()
|
||||
return &Taxon{taxon.Taxonomy,
|
||||
taxon.Taxonomy.nodes.Get(pid)}
|
||||
}
|
||||
|
||||
// IPath returns an iterator that yields the path from the current Taxon to the root Taxon
|
||||
// in the associated Taxonomy. It traverses up the taxonomy hierarchy until it reaches the root.
|
||||
//
|
||||
// Returns:
|
||||
// - An iterator function that takes a yield function as an argument. The yield function
|
||||
// is called with each Taxon in the path from the current taxon to the root. If the
|
||||
// taxonomy has no root node, the method logs a fatal error and terminates the program.
|
||||
func (taxon *Taxon) IPath() iter.Seq[*Taxon] {
|
||||
if taxon.Taxonomy.root == nil {
|
||||
log.Fatalf("Taxon[%v].IPath(): Taxonomy has no root node", taxon.Taxonomy.name)
|
||||
}
|
||||
|
||||
return *n
|
||||
}
|
||||
|
||||
func (node *TaxNode) Rank() string {
|
||||
return node.rank
|
||||
}
|
||||
|
||||
func (node *TaxNode) Taxid() int {
|
||||
return node.taxid
|
||||
}
|
||||
|
||||
func (node *TaxNode) Parent() *TaxNode {
|
||||
return node.pparent
|
||||
}
|
||||
|
||||
func (node *TaxNode) IsNameEqual(name string) bool {
|
||||
if *(node.scientificname) == name {
|
||||
return true
|
||||
}
|
||||
if node.alternatenames != nil {
|
||||
_, ok := (*node.alternatenames)[name]
|
||||
return ok
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (node *TaxNode) IsNameMatching(pattern *regexp.Regexp) bool {
|
||||
if pattern.MatchString(*(node.scientificname)) {
|
||||
return true
|
||||
}
|
||||
if node.alternatenames != nil {
|
||||
for n := range *node.alternatenames {
|
||||
if pattern.MatchString(n) {
|
||||
return true
|
||||
return func(yield func(*Taxon) bool) {
|
||||
for taxon.Node.parent != taxon.Taxonomy.root.id {
|
||||
if !yield(taxon) {
|
||||
return
|
||||
}
|
||||
|
||||
taxon = taxon.Parent()
|
||||
}
|
||||
|
||||
yield(taxon)
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// Path returns a slice of TaxNode[T] representing the path from the current Taxon
|
||||
// to the root Taxon in the associated Taxonomy. It collects all the nodes in the path
|
||||
// using the IPath method and returns them as a TaxonSlice.
|
||||
//
|
||||
// Returns:
|
||||
// - A pointer to a TaxonSlice[T] containing the TaxNode[T] instances in the path
|
||||
// from the current taxon to the root.
|
||||
func (taxon *Taxon) Path() *TaxonSlice {
|
||||
s := make([]*TaxNode, 0, 10)
|
||||
|
||||
for t := range taxon.IPath() {
|
||||
s = append(s, t.Node)
|
||||
}
|
||||
|
||||
return &TaxonSlice{
|
||||
slice: s,
|
||||
taxonomy: taxon.Taxonomy,
|
||||
}
|
||||
}
|
||||
|
||||
// HasRankDefined checks if any taxon in the path from the current Taxon to the root
|
||||
// has the specified rank defined. It iterates through the path using the IPath method
|
||||
// and returns true if a match is found; otherwise, it returns false.
|
||||
//
|
||||
// Parameters:
|
||||
// - rank: A string representing the rank to check for (e.g., "species", "genus").
|
||||
//
|
||||
// Returns:
|
||||
// - A boolean indicating whether any taxon in the path has the specified rank defined.
|
||||
func (taxon *Taxon) HasRankDefined(rank string) bool {
|
||||
for t := range taxon.IPath() {
|
||||
if t.Node.Rank() == rank {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func (node *TaxNode) HasRankDefined(rank string) bool {
|
||||
|
||||
for node.rank != rank && node.parent != node.taxid {
|
||||
node = node.pparent
|
||||
// TaxonAtRank returns the first Taxon in the path from the current Taxon to the root
|
||||
// that has the specified rank defined. It iterates through the path using the IPath method
|
||||
// and returns the matching Taxon if found; otherwise, it returns nil.
|
||||
//
|
||||
// Parameters:
|
||||
// - rank: A string representing the rank to search for (e.g., "species", "genus").
|
||||
//
|
||||
// Returns:
|
||||
// - A pointer to the Taxon[T] that matches the specified rank, or nil if no such taxon exists
|
||||
// in the path to the root.
|
||||
func (taxon *Taxon) TaxonAtRank(rank string) *Taxon {
|
||||
for t := range taxon.IPath() {
|
||||
if t.Node.Rank() == rank {
|
||||
return t
|
||||
}
|
||||
}
|
||||
return node.rank == rank
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Species returns the first Taxon in the path from the current Taxon to the root
|
||||
// that has the rank "species" defined. It utilizes the TaxonAtRank method to find
|
||||
// the matching Taxon.
|
||||
//
|
||||
// Returns:
|
||||
// - A pointer to the Taxon[T] that matches the "species" rank, or nil if no such taxon
|
||||
// exists in the path to the root.
|
||||
func (taxon *Taxon) Species() *Taxon {
|
||||
return taxon.TaxonAtRank("species")
|
||||
}
|
||||
|
||||
// Genus returns the first Taxon in the path from the current Taxon to the root
|
||||
// that has the rank "genus" defined. It utilizes the TaxonAtRank method to find
|
||||
// the matching Taxon.
|
||||
//
|
||||
// Returns:
|
||||
// - A pointer to the Taxon[T] that matches the "genus" rank, or nil if no such taxon
|
||||
// exists in the path to the root.
|
||||
func (taxon *Taxon) Genus() *Taxon {
|
||||
return taxon.TaxonAtRank("genus")
|
||||
}
|
||||
|
||||
// Family returns the first Taxon in the path from the current Taxon to the root
|
||||
// that has the rank "family" defined. It utilizes the TaxonAtRank method to find
|
||||
// the matching Taxon.
|
||||
//
|
||||
// Returns:
|
||||
// - A pointer to the Taxon[T] that matches the "family" rank, or nil if no such taxon
|
||||
// exists in the path to the root.
|
||||
func (taxon *Taxon) Family() *Taxon {
|
||||
return taxon.TaxonAtRank("family")
|
||||
}
|
||||
|
169
pkg/obitax/taxonnode.go
Normal file
169
pkg/obitax/taxonnode.go
Normal file
@ -0,0 +1,169 @@
|
||||
package obitax
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
)
|
||||
|
||||
// TaxNode represents a single taxon in a taxonomy.
|
||||
// It holds information about the taxon's identifier, parent taxon, rank,
|
||||
// scientific name, and alternate names.
|
||||
//
|
||||
// Fields:
|
||||
// - id: The unique identifier of the taxon of type T.
|
||||
// - parent: The identifier of the parent taxon of type T.
|
||||
// - rank: The rank of the taxon (e.g., species, genus).
|
||||
// - scientificname: A pointer to a string representing the scientific name of the taxon.
|
||||
// - alternatenames: A pointer to a map of alternate names for the taxon, where the key is
|
||||
// a string representing the class name and the value is a pointer to a string
|
||||
// representing the name.
|
||||
type TaxNode struct {
|
||||
id string
|
||||
parent string
|
||||
rank string
|
||||
scientificname *string
|
||||
alternatenames *map[string]*string
|
||||
}
|
||||
|
||||
// String returns a string representation of the TaxNode, including the taxonomy code,
|
||||
// the node ID, and the scientific name. The output format is "taxonomyCode:id [scientificName]".
|
||||
//
|
||||
// Parameters:
|
||||
// - taxonomyCode: A string representing the code of the taxonomy to which the node belongs.
|
||||
//
|
||||
// Returns:
|
||||
// - A formatted string representing the TaxNode in the form "taxonomyCode:id [scientificName]".
|
||||
func (node *TaxNode) String(taxonomyCode string) string {
|
||||
return fmt.Sprintf("%s:%v [%s]",
|
||||
taxonomyCode,
|
||||
node.id,
|
||||
node.ScientificName())
|
||||
}
|
||||
|
||||
// Id returns the unique identifier of the TaxNode.
|
||||
// It retrieves the identifier of type T associated with the taxon node.
|
||||
//
|
||||
// Returns:
|
||||
// - The unique identifier of the taxon node of type T.
|
||||
func (node *TaxNode) Id() string {
|
||||
return node.id
|
||||
}
|
||||
|
||||
// ParentId returns the identifier of the parent taxon of the TaxNode.
|
||||
// It retrieves the parent identifier of type T associated with the taxon node.
|
||||
//
|
||||
// Returns:
|
||||
// - The identifier of the parent taxon of type T.
|
||||
func (node *TaxNode) ParentId() string {
|
||||
return node.parent
|
||||
}
|
||||
|
||||
// ScientificName returns the scientific name of the TaxNode.
|
||||
// It dereferences the pointer to the scientific name string associated with the taxon node.
|
||||
//
|
||||
// Returns:
|
||||
// - The scientific name of the taxon as a string.
|
||||
// - Note: This method assumes that scientificname is not nil;
|
||||
// if it may be nil, additional error handling should be implemented.
|
||||
func (node *TaxNode) ScientificName() string {
|
||||
return *node.scientificname
|
||||
}
|
||||
|
||||
// Name retrieves the name of the TaxNode based on the specified class.
|
||||
// If the class is "scientificname", it returns the scientific name of the taxon.
|
||||
// If the class corresponds to an alternate name, it retrieves that name from the alternatenames map.
|
||||
// If the class is not recognized or if no alternate names exist, it returns an empty string.
|
||||
//
|
||||
// Parameters:
|
||||
// - class: A string representing the class of name to retrieve (e.g., "scientificname" or an alternate name class).
|
||||
//
|
||||
// Returns:
|
||||
// - The name of the taxon as a string. If the class is not recognized or if no name is available,
|
||||
// an empty string is returned.
|
||||
func (node *TaxNode) Name(class string) string {
|
||||
if class == "scientificname" {
|
||||
return *node.scientificname
|
||||
}
|
||||
|
||||
if node.alternatenames == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
if val, ok := (*node.alternatenames)[class]; ok {
|
||||
if val != nil {
|
||||
return *val
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (node *TaxNode) SetName(name, class string) {
|
||||
if class == "scientificname" {
|
||||
node.scientificname = &name
|
||||
return
|
||||
}
|
||||
|
||||
if node.alternatenames == nil {
|
||||
node.alternatenames = &map[string]*string{}
|
||||
}
|
||||
|
||||
(*node.alternatenames)[class] = &name
|
||||
}
|
||||
|
||||
// Rank returns the rank of the TaxNode.
|
||||
// It retrieves the rank associated with the taxon node, which indicates its level in the taxonomy hierarchy.
|
||||
//
|
||||
// Returns:
|
||||
// - The rank of the taxon as a string (e.g., species, genus, family).
|
||||
func (node *TaxNode) Rank() string {
|
||||
return node.rank
|
||||
}
|
||||
|
||||
// IsNameEqual checks if the provided name matches the scientific name or any alternate names
|
||||
// associated with the TaxNode. It returns true if there is a match; otherwise, it returns false.
|
||||
//
|
||||
// Parameters:
|
||||
// - name: A string representing the name to compare against the scientific name and alternate names.
|
||||
//
|
||||
// Returns:
|
||||
// - A boolean indicating whether the provided name is equal to the scientific name or exists
|
||||
// as an alternate name for the taxon.
|
||||
func (node *TaxNode) IsNameEqual(name string) bool {
|
||||
if *(node.scientificname) == name {
|
||||
return true
|
||||
}
|
||||
if node.alternatenames != nil {
|
||||
for _, n := range *node.alternatenames {
|
||||
if n != nil && *n == name {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// IsNameMatching checks if the scientific name or any alternate names of the TaxNode match
|
||||
// the provided regular expression pattern. It returns true if there is a match; otherwise, it returns false.
|
||||
//
|
||||
// Parameters:
|
||||
// - pattern: A pointer to a regexp.Regexp object representing the pattern to match against
|
||||
// the scientific name and alternate names.
|
||||
//
|
||||
// Returns:
|
||||
// - A boolean indicating whether the scientific name or any alternate names match the
|
||||
// provided regular expression pattern.
|
||||
func (node *TaxNode) IsNameMatching(pattern *regexp.Regexp) bool {
|
||||
if pattern.MatchString(*(node.scientificname)) {
|
||||
return true
|
||||
}
|
||||
if node.alternatenames != nil {
|
||||
for _, n := range *node.alternatenames {
|
||||
if n != nil && pattern.MatchString(*n) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
@ -3,57 +3,222 @@ package obitax
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strconv"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type TaxName struct {
|
||||
name *string
|
||||
nameclass *string
|
||||
}
|
||||
|
||||
// Taxonomy represents a hierarchical classification of taxa.
|
||||
// It holds information about the taxonomy's name, code, ranks, nodes, root node, aliases, and an index.
|
||||
// The generic type T is used to specify the type of taxon identifiers.
|
||||
//
|
||||
// Fields:
|
||||
// - name: The name of the taxonomy.
|
||||
// - code: A unique code representing the taxonomy.
|
||||
// - ranks: A pointer to an InnerString instance that holds the ranks of the taxa.
|
||||
// - nodes: A pointer to a TaxonSet containing all the nodes (taxa) in the taxonomy.
|
||||
// - root: A pointer to the root TaxNode of the taxonomy.
|
||||
// - index: A map that indexes taxa by their string representation for quick access.
|
||||
type Taxonomy struct {
|
||||
nodes *TaxonSet
|
||||
alias map[int]*TaxNode
|
||||
index map[string]*TaxonSet
|
||||
name string
|
||||
code string
|
||||
ranks *InnerString
|
||||
nameclasses *InnerString
|
||||
nodes *TaxonSet
|
||||
root *TaxNode
|
||||
matcher *regexp.Regexp
|
||||
index map[string]*TaxonSet
|
||||
}
|
||||
|
||||
func NewTaxonomy() *Taxonomy {
|
||||
set := make(TaxonSet)
|
||||
taxonomy := Taxonomy{
|
||||
nodes: &set,
|
||||
alias: make(TaxonSet),
|
||||
index: make(map[string]*TaxonSet)}
|
||||
return &taxonomy
|
||||
// NewTaxonomy creates and initializes a new Taxonomy instance with the specified name and code.
|
||||
// It sets up the necessary internal structures, including ranks, nodes, aliases, and an index.
|
||||
//
|
||||
// Parameters:
|
||||
// - name: The name of the taxonomy to be created.
|
||||
// - code: A unique code representing the taxonomy.
|
||||
//
|
||||
// Returns:
|
||||
// - A pointer to the newly created Taxonomy instance.
|
||||
func NewTaxonomy(name, code, codeCharacters string) *Taxonomy {
|
||||
set := make(map[string]*TaxNode)
|
||||
|
||||
// codeCharacters := "[[:alnum:]]" // [[:digit:]]
|
||||
|
||||
matcher := regexp.MustCompile(fmt.Sprintf("^[[:blank:]]*(%s:)?(%s+)", code, codeCharacters))
|
||||
|
||||
taxonomy := &Taxonomy{
|
||||
name: name,
|
||||
code: code,
|
||||
ranks: NewInnerString(),
|
||||
nameclasses: NewInnerString(),
|
||||
nodes: &TaxonSet{set: set},
|
||||
root: nil,
|
||||
matcher: matcher,
|
||||
index: make(map[string]*TaxonSet),
|
||||
}
|
||||
|
||||
taxonomy.nodes.taxonomy = taxonomy
|
||||
|
||||
return taxonomy
|
||||
}
|
||||
|
||||
// Id converts a given taxid string into the corresponding taxon identifier of type T.
|
||||
// It uses a regular expression to validate and extract the taxid. If the taxid is invalid,
|
||||
// the method returns an error along with a zero value of type T.
|
||||
//
|
||||
// Parameters:
|
||||
// - taxid: A string representation of the taxon identifier to be converted.
|
||||
//
|
||||
// Returns:
|
||||
// - The taxon identifier of type T corresponding to the provided taxid.
|
||||
// - An error if the taxid is not valid or cannot be converted.
|
||||
func (taxonomy *Taxonomy) Id(taxid string) (string, error) {
|
||||
matches := taxonomy.matcher.FindStringSubmatch(taxid)
|
||||
|
||||
if matches == nil {
|
||||
return "", fmt.Errorf("Taxid %s is not a valid taxid", taxid)
|
||||
}
|
||||
|
||||
return matches[2], nil
|
||||
}
|
||||
|
||||
// TaxidSting retrieves the string representation of a taxon node identified by the given ID.
|
||||
// It looks up the node in the taxonomy and returns its formatted string representation
|
||||
// along with the taxonomy code. If the node does not exist, it returns an error.
|
||||
//
|
||||
// Parameters:
|
||||
// - id: The identifier of the taxon node to retrieve.
|
||||
//
|
||||
// Returns:
|
||||
// - A string representing the taxon node in the format "taxonomyCode:id [scientificName]",
|
||||
// or an error if the taxon node with the specified ID does not exist in the taxonomy.
|
||||
func (taxonomy *Taxonomy) TaxidSting(id string) (string, error) {
|
||||
node := taxonomy.nodes.Get(id)
|
||||
if node == nil {
|
||||
return "", fmt.Errorf("Taxid %d is part of the taxonomy", id)
|
||||
}
|
||||
return node.String(taxonomy.code), nil
|
||||
}
|
||||
|
||||
// Taxon retrieves the Taxon associated with the given taxid string.
|
||||
// It first converts the taxid to its corresponding identifier using the Id method.
|
||||
// If the taxon is not found, it logs a fatal error and terminates the program.
|
||||
//
|
||||
// Parameters:
|
||||
// - taxid: A string representation of the taxon identifier to be retrieved.
|
||||
//
|
||||
// Returns:
|
||||
// - A pointer to the Taxon[T] instance associated with the provided taxid.
|
||||
// - If the taxid is unknown, the method will log a fatal error.
|
||||
func (taxonomy *Taxonomy) Taxon(taxid string) *Taxon {
|
||||
id, err := taxonomy.Id(taxid)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Taxid %s is not a valid taxid", taxid)
|
||||
}
|
||||
|
||||
node := taxonomy.nodes.Get(id)
|
||||
|
||||
if node == nil {
|
||||
log.Fatalf("Taxid %s is an unknown taxid", taxid)
|
||||
}
|
||||
|
||||
return &Taxon{
|
||||
Taxonomy: taxonomy,
|
||||
Node: node,
|
||||
}
|
||||
}
|
||||
|
||||
// TaxonSet returns the set of taxon nodes contained within the Taxonomy.
|
||||
// It provides access to the underlying collection of taxon nodes for further operations.
|
||||
//
|
||||
// Returns:
|
||||
// - A pointer to the TaxonSet[T] representing the collection of taxon nodes in the taxonomy.
|
||||
func (taxonomy *Taxonomy) TaxonSet() *TaxonSet {
|
||||
return taxonomy.nodes
|
||||
}
|
||||
|
||||
func (taxonomy *Taxonomy) Alias() *map[int]*TaxNode {
|
||||
return &(taxonomy.alias)
|
||||
}
|
||||
|
||||
func (taxonomy *Taxonomy) Index() *map[string]*TaxonSet {
|
||||
return &(taxonomy.index)
|
||||
}
|
||||
|
||||
// Len returns the number of taxa in the Taxonomy.
|
||||
// It delegates the call to the Len method of the underlying nodes set.
|
||||
//
|
||||
// Returns:
|
||||
// - An integer representing the total count of taxa in the taxonomy.
|
||||
func (taxonomy *Taxonomy) Len() int {
|
||||
return len(*taxonomy.nodes)
|
||||
return taxonomy.nodes.Len()
|
||||
}
|
||||
|
||||
func (taxonomy *Taxonomy) AddNewTaxa(taxid, parent int, rank string, replace bool, init bool) (*TaxNode, error) {
|
||||
if !replace {
|
||||
_, ok := (*taxonomy.nodes)[taxid]
|
||||
if ok {
|
||||
return nil, fmt.Errorf("trying to add taxoon %d already present in the taxonomy", taxid)
|
||||
}
|
||||
// AddTaxon adds a new taxon to the taxonomy with the specified parameters.
|
||||
// It checks if the taxon already exists and can replace it if specified.
|
||||
//
|
||||
// Parameters:
|
||||
// - taxid: The identifier of the taxon to be added.
|
||||
// - parent: The identifier of the parent taxon.
|
||||
// - rank: The rank of the taxon (e.g., species, genus).
|
||||
// - isRoot: A boolean indicating if this taxon is the root of the taxonomy.
|
||||
// - replace: A boolean indicating whether to replace an existing taxon with the same taxid.
|
||||
//
|
||||
// Returns:
|
||||
// - A pointer to the newly created Taxon[T] instance.
|
||||
// - An error if the taxon cannot be added (e.g., it already exists and replace is false).
|
||||
func (taxonomy *Taxonomy) AddTaxon(taxid, parent string, rank string, isRoot bool, replace bool) (*Taxon, error) {
|
||||
if !replace && taxonomy.nodes.Contains(taxid) {
|
||||
return nil, fmt.Errorf("trying to add taxon %d already present in the taxonomy", taxid)
|
||||
}
|
||||
|
||||
n := NewTaxNode(taxid, parent, rank)
|
||||
(*taxonomy.nodes)[taxid] = n
|
||||
rank = taxonomy.ranks.Innerize(rank)
|
||||
|
||||
return n, nil
|
||||
n := &TaxNode{taxid, parent, rank, nil, nil}
|
||||
|
||||
taxonomy.nodes.Insert(n)
|
||||
|
||||
if isRoot {
|
||||
n.parent = n.id
|
||||
taxonomy.root = n
|
||||
}
|
||||
|
||||
return &Taxon{
|
||||
Taxonomy: taxonomy,
|
||||
Node: n,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (taxonomy *Taxonomy) AddAlias(newtaxid, oldtaxid string, replace bool) (*Taxon, error) {
|
||||
newid, err := taxonomy.Id(newtaxid)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
oldid, err := taxonomy.Id(oldtaxid)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if !replace && taxonomy.nodes.Contains(newid) {
|
||||
return nil, fmt.Errorf("trying to add alias %s already present in the taxonomy", newtaxid)
|
||||
}
|
||||
|
||||
n := taxonomy.nodes.Get(oldid)
|
||||
|
||||
if n == nil {
|
||||
return nil, fmt.Errorf("trying to add alias %s to a taxon that does not exist", oldtaxid)
|
||||
}
|
||||
|
||||
taxonomy.nodes.Alias(newid, n)
|
||||
|
||||
return &Taxon{
|
||||
Taxonomy: taxonomy,
|
||||
Node: n,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// RankList returns a slice of strings representing the ranks of the taxa
|
||||
// in the taxonomy. It retrieves the ranks from the InnerString instance
|
||||
// associated with the taxonomy.
|
||||
//
|
||||
// Returns:
|
||||
// - A slice of strings containing the ranks of the taxa.
|
||||
func (taxonomy *Taxonomy) RankList() []string {
|
||||
return taxonomy.ranks.Slice()
|
||||
}
|
||||
|
||||
// func (taxonomy *Taxonomy) Taxon(taxid int) (*TaxNode, error) {
|
||||
@ -69,93 +234,6 @@ func (taxonomy *Taxonomy) AddNewTaxa(taxid, parent int, rank string, replace boo
|
||||
// return t, nil
|
||||
// }
|
||||
|
||||
func (taxonomy *Taxonomy) Taxon(taxid interface{}) (*TaxNode, error) {
|
||||
var itaxid int
|
||||
var err error
|
||||
|
||||
switch v := taxid.(type) {
|
||||
case int:
|
||||
itaxid = v
|
||||
case string:
|
||||
itaxid, err = strconv.Atoi(v)
|
||||
|
||||
if err != nil {
|
||||
re := regexp.MustCompile(`TX:(\d+)`)
|
||||
parts := re.FindStringSubmatch(v)
|
||||
if len(parts) != 2 {
|
||||
return nil, fmt.Errorf("I cannot parse taxid from %s", v)
|
||||
}
|
||||
itaxid, _ = strconv.Atoi(parts[1])
|
||||
}
|
||||
}
|
||||
|
||||
t, ok := (*taxonomy.nodes)[itaxid]
|
||||
|
||||
if !ok {
|
||||
a, aok := taxonomy.alias[itaxid]
|
||||
if !aok {
|
||||
return nil, fmt.Errorf("Taxid %d is not part of the taxonomy", taxid)
|
||||
}
|
||||
t = a
|
||||
}
|
||||
return t, nil
|
||||
}
|
||||
func (taxonomy *Taxonomy) AddNewName(taxid int, name, nameclass *string) error {
|
||||
node, node_err := taxonomy.Taxon(taxid)
|
||||
if node_err != nil {
|
||||
return node_err
|
||||
}
|
||||
|
||||
if *nameclass == "scientific name" {
|
||||
node.scientificname = name
|
||||
} else {
|
||||
names := node.alternatenames
|
||||
if names == nil {
|
||||
n := make(map[string]*string)
|
||||
names = &n
|
||||
node.alternatenames = names
|
||||
} else {
|
||||
(*names)[*name] = nameclass
|
||||
}
|
||||
}
|
||||
|
||||
i, ok := taxonomy.index[*name]
|
||||
if !ok {
|
||||
tnm := make(TaxonSet)
|
||||
i = &tnm
|
||||
taxonomy.index[*name] = i
|
||||
}
|
||||
(*i)[taxid] = node
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (taxonomy *Taxonomy) ReindexParent() error {
|
||||
var ok bool
|
||||
for _, taxon := range *taxonomy.nodes {
|
||||
taxon.pparent, ok = (*taxonomy.nodes)[taxon.parent]
|
||||
if !ok {
|
||||
return fmt.Errorf("Parent %d of taxon %d is not defined in taxonomy",
|
||||
taxon.taxid,
|
||||
taxon.parent)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func MakeTaxName(name, nameclass *string) *TaxName {
|
||||
tn := TaxName{name, nameclass}
|
||||
return &tn
|
||||
}
|
||||
|
||||
func (taxonomy *Taxonomy) AddNewAlias(newtaxid, oldtaxid int) error {
|
||||
n, node_err := taxonomy.Taxon(newtaxid)
|
||||
if node_err != nil {
|
||||
return node_err
|
||||
}
|
||||
|
||||
taxonomy.alias[oldtaxid] = n
|
||||
|
||||
return nil
|
||||
func (taxonomy *Taxonomy) Index() *map[string]*TaxonSet {
|
||||
return &(taxonomy.index)
|
||||
}
|
||||
|
@ -1,15 +1,126 @@
|
||||
package obitax
|
||||
|
||||
type TaxonSet map[int]*TaxNode
|
||||
import log "github.com/sirupsen/logrus"
|
||||
|
||||
func (set *TaxonSet) Get(i int) *TaxNode {
|
||||
return (*set)[i]
|
||||
// TaxonSet represents a collection of taxa within a taxonomy.
|
||||
// It holds a mapping of taxon identifiers to their corresponding TaxNode instances,
|
||||
// as well as a reference to the associated Taxonomy.
|
||||
//
|
||||
// Fields:
|
||||
// - set: A map that associates taxon identifiers of type T with their corresponding TaxNode[T] instances.
|
||||
// - taxonomy: A pointer to the Taxonomy[T] instance that this TaxonSet belongs to.
|
||||
type TaxonSet struct {
|
||||
set map[string]*TaxNode
|
||||
nalias int
|
||||
taxonomy *Taxonomy
|
||||
}
|
||||
|
||||
// Get retrieves the TaxNode[T] associated with the specified taxon identifier.
|
||||
// It returns the TaxNode if it exists in the TaxonSet; otherwise, it returns nil.
|
||||
//
|
||||
// Parameters:
|
||||
// - i: The taxon identifier of type T for which the TaxNode is to be retrieved.
|
||||
//
|
||||
// Returns:
|
||||
// - A pointer to the TaxNode[T] associated with the provided identifier, or nil
|
||||
// if no such taxon exists in the set.
|
||||
func (set *TaxonSet) Get(i string) *TaxNode {
|
||||
return set.set[i]
|
||||
}
|
||||
|
||||
// Len returns the number of unique taxa in the TaxonSet.
|
||||
// It calculates the count by subtracting the number of aliases from the total
|
||||
// number of entries in the set.
|
||||
//
|
||||
// Returns:
|
||||
// - An integer representing the count of unique taxa in the TaxonSet.
|
||||
func (set *TaxonSet) Len() int {
|
||||
return len(*set)
|
||||
return len(set.set) - set.nalias
|
||||
}
|
||||
|
||||
func (set *TaxonSet) Inserts(taxon *TaxNode) {
|
||||
(*set)[taxon.taxid] = taxon
|
||||
// Insert adds a TaxNode[T] to the TaxonSet. If a taxon with the same identifier
|
||||
// already exists in the set, it updates the reference. If the existing taxon was
|
||||
// an alias, its alias count is decremented.
|
||||
//
|
||||
// Parameters:
|
||||
// - taxon: A pointer to the TaxNode[T] instance to be added to the TaxonSet.
|
||||
//
|
||||
// Behavior:
|
||||
// - If a taxon with the same identifier already exists and is different from the
|
||||
// new taxon, the alias count is decremented.
|
||||
func (set *TaxonSet) Insert(taxon *TaxNode) {
|
||||
if old := set.set[taxon.id]; old != nil && old.id != taxon.id {
|
||||
set.nalias--
|
||||
}
|
||||
set.set[taxon.id] = taxon
|
||||
}
|
||||
|
||||
// Taxonomy returns a pointer to the Taxonomy[T] instance that this TaxonSet belongs to.
|
||||
//
|
||||
// Returns:
|
||||
// - A pointer to the Taxonomy[T] instance that this TaxonSet belongs to
|
||||
func (set *TaxonSet) Taxonomy() *Taxonomy {
|
||||
return set.taxonomy
|
||||
}
|
||||
|
||||
// Alias associates a given alias string with a specified TaxNode in the TaxonSet.
|
||||
// It first converts the alias to its corresponding identifier using the Id method.
|
||||
// If the original taxon is not part of the taxon set, it logs a fatal error and terminates the program.
|
||||
//
|
||||
// Parameters:
|
||||
// - alias: A string representing the alias to be associated with the taxon node.
|
||||
// - node: A pointer to the TaxNode[T] instance that the alias will refer to.
|
||||
//
|
||||
// Behavior:
|
||||
// - If the original taxon corresponding to the alias is not part of the taxon set,
|
||||
// the method will log a fatal error and terminate the program.
|
||||
func (set *TaxonSet) Alias(id string, node *TaxNode) {
|
||||
original := set.Get(node.id)
|
||||
if original != nil {
|
||||
log.Fatalf("Original taxon %v is not part of taxon set", id)
|
||||
}
|
||||
set.set[id] = node
|
||||
set.nalias++
|
||||
}
|
||||
|
||||
// IsAlias checks if the given identifier corresponds to an alias in the TaxonSet.
|
||||
// It retrieves the TaxNode associated with the identifier and returns true if the
|
||||
// node exists and its identifier is different from the provided identifier; otherwise, it returns false.
|
||||
//
|
||||
// Parameters:
|
||||
// - id: The identifier of type T to be checked for alias status.
|
||||
//
|
||||
// Returns:
|
||||
// - A boolean indicating whether the identifier corresponds to an alias in the set.
|
||||
func (set *TaxonSet) IsAlias(id string) bool {
|
||||
node := set.Get(id)
|
||||
return node != nil && node.id != id
|
||||
}
|
||||
|
||||
// IsATaxon checks if the given ID corresponds to a valid taxon node in the TaxonSet.
|
||||
// It returns true if the node exists and its ID matches the provided ID; otherwise, it returns false.
|
||||
// id corresponding to alias returns false.
|
||||
//
|
||||
// Parameters:
|
||||
// - id: The identifier of the taxon to check.
|
||||
//
|
||||
// Returns:
|
||||
// - A boolean indicating whether the specified ID corresponds to a valid taxon node.
|
||||
func (set *TaxonSet) IsATaxon(id string) bool {
|
||||
node := set.Get(id)
|
||||
return node != nil && node.id == id
|
||||
}
|
||||
|
||||
// Contains checks if the TaxonSet contains a taxon node with the specified ID.
|
||||
// It returns true if the node exists in the set; otherwise, it returns false.
|
||||
// id corresponding to alias or true taxa returns true.
|
||||
//
|
||||
// Parameters:
|
||||
// - id: The identifier of the taxon to check for presence in the set.
|
||||
//
|
||||
// Returns:
|
||||
// - A boolean indicating whether the TaxonSet contains a taxon node with the specified ID.
|
||||
func (set *TaxonSet) Contains(id string) bool {
|
||||
node := set.Get(id)
|
||||
return node != nil
|
||||
}
|
||||
|
@ -5,30 +5,59 @@ import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
type TaxonSlice []*TaxNode
|
||||
|
||||
func (set *TaxonSlice) Get(i int) *TaxNode {
|
||||
return (*set)[i]
|
||||
// TaxonSlice represents a slice of TaxNode[T] instances within a taxonomy.
|
||||
// It encapsulates a collection of taxon nodes and the taxonomy they belong to.
|
||||
//
|
||||
// Fields:
|
||||
// - slice: A slice of pointers to TaxNode[T] representing the taxon nodes.
|
||||
// - taxonomy: A pointer to the Taxonomy[T] instance that these taxon nodes are part of.
|
||||
type TaxonSlice struct {
|
||||
slice []*TaxNode
|
||||
taxonomy *Taxonomy
|
||||
}
|
||||
|
||||
func (set *TaxonSlice) Len() int {
|
||||
return len(*set)
|
||||
// Get retrieves the TaxNode[T] at the specified index from the TaxonSlice.
|
||||
// It returns the taxon node corresponding to the provided index.
|
||||
//
|
||||
// Parameters:
|
||||
// - i: An integer representing the index of the taxon node to retrieve.
|
||||
//
|
||||
// Returns:
|
||||
// - A pointer to the TaxNode[T] at the specified index in the slice.
|
||||
func (slice *TaxonSlice) Get(i int) *TaxNode {
|
||||
return slice.slice[i]
|
||||
}
|
||||
|
||||
// Len returns the number of TaxNode[T] instances in the TaxonSlice.
|
||||
// It provides the count of taxon nodes contained within the slice.
|
||||
//
|
||||
// Returns:
|
||||
// - An integer representing the total number of taxon nodes in the TaxonSlice.
|
||||
func (slice *TaxonSlice) Len() int {
|
||||
return len(slice.slice)
|
||||
}
|
||||
|
||||
// String returns a string representation of the TaxonSlice.
|
||||
// It formats the output to include the IDs, scientific names, and ranks of the taxon nodes
|
||||
// in the slice, concatenated in reverse order, separated by vertical bars.
|
||||
//
|
||||
// Returns:
|
||||
// - A formatted string representing the TaxonSlice, with each taxon in the format
|
||||
// "id@scientific_name@rank". If the slice is empty, it returns an empty string.
|
||||
func (path *TaxonSlice) String() string {
|
||||
var buffer bytes.Buffer
|
||||
|
||||
if len(*path) > 0 {
|
||||
taxon := (*path)[len(*path)-1]
|
||||
fmt.Fprintf(&buffer, "%d@%s@%s",
|
||||
taxon.Taxid(),
|
||||
if path.Len() > 0 {
|
||||
taxon := path.slice[path.Len()-1]
|
||||
fmt.Fprintf(&buffer, "%v@%s@%s",
|
||||
taxon.Id(),
|
||||
taxon.ScientificName(),
|
||||
taxon.Rank())
|
||||
|
||||
for i := len(*path) - 2; i >= 0; i-- {
|
||||
taxon := (*path)[i]
|
||||
fmt.Fprintf(&buffer, "|%d@%s@%s",
|
||||
taxon.Taxid(),
|
||||
for i := path.Len() - 2; i >= 0; i-- {
|
||||
taxon := path.slice[i]
|
||||
fmt.Fprintf(&buffer, "|%v@%s@%s",
|
||||
taxon.Id(),
|
||||
taxon.ScientificName(),
|
||||
taxon.Rank())
|
||||
}
|
||||
|
@ -396,28 +396,55 @@ func JsonMarshal(i interface{}) ([]byte, error) {
|
||||
|
||||
// IsAMap checks if the given value is a map.
|
||||
//
|
||||
// value: the value to be checked.
|
||||
// returns: a boolean indicating if the value is a map.
|
||||
// Parameters:
|
||||
// - value: The value to be checked.
|
||||
//
|
||||
// Returns:
|
||||
// - A boolean indicating if the value is a map.
|
||||
func IsAMap(value interface{}) bool {
|
||||
return reflect.TypeOf(value).Kind() == reflect.Map
|
||||
}
|
||||
|
||||
// IsAnArray checks if the given value is an array.
|
||||
//
|
||||
// value: The value to be checked.
|
||||
// Returns: true if the value is an array, false otherwise.
|
||||
// Parameters:
|
||||
// - value: The value to be checked.
|
||||
//
|
||||
// Returns:
|
||||
// - A boolean indicating if the value is an array.
|
||||
func IsAnArray(value interface{}) bool {
|
||||
return reflect.TypeOf(value).Kind() == reflect.Array
|
||||
}
|
||||
|
||||
// IsASlice determines if the given value is a slice.
|
||||
//
|
||||
// value: the value to check.
|
||||
// bool: true if the value is a slice, false otherwise.
|
||||
// Parameters:
|
||||
// - value: The value to check.
|
||||
//
|
||||
// Returns:
|
||||
// - A boolean indicating if the value is a slice.
|
||||
func IsASlice(value interface{}) bool {
|
||||
return reflect.TypeOf(value).Kind() == reflect.Slice
|
||||
}
|
||||
|
||||
// IsAContainer checks if the given value is a map, array, or slice.
|
||||
//
|
||||
// Parameters:
|
||||
// - value: The value to check.
|
||||
//
|
||||
// Returns:
|
||||
// - A boolean indicating if the value is a container (map, array, or slice).
|
||||
func IsAContainer(value interface{}) bool {
|
||||
return IsAMap(value) || IsAnArray(value) || IsASlice(value)
|
||||
}
|
||||
|
||||
// IsIntegral checks if the given float64 value is an integral number.
|
||||
//
|
||||
// Parameters:
|
||||
// - val: The float64 value to check.
|
||||
//
|
||||
// Returns:
|
||||
// - A boolean indicating if the value is integral (no fractional part).
|
||||
func IsIntegral(val float64) bool {
|
||||
return val == float64(int(val))
|
||||
}
|
||||
|
Reference in New Issue
Block a user