diff --git a/pkg/obitax/bioseq_classifier.go b/pkg/obitax/bioseq_classifier.go deleted file mode 100644 index fd786bd..0000000 --- a/pkg/obitax/bioseq_classifier.go +++ /dev/null @@ -1,64 +0,0 @@ -package obitax - -import ( - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" - log "github.com/sirupsen/logrus" -) - -// TaxonomyClassifier is a function that creates a new instance of the BioSequenceClassifier -// for taxonomic classification based on a given taxonomic rank, taxonomy, and abort flag. -// -// Parameters: -// - taxonomicRank: the taxonomic rank to classify the sequences at. -// - taxonomy: the taxonomy object used for classification. -// - abortOnMissing: a flag indicating whether to abort if a taxon is missing in the taxonomy. -// -// Return: -// - *obiseq.BioSequenceClassifier: the new instance of the BioSequenceClassifier. -func TaxonomyClassifier(taxonomicRank string, - taxonomy *Taxonomy, - abortOnMissing bool) *obiseq.BioSequenceClassifier { - - code := func(sequence *obiseq.BioSequence) int { - taxid := sequence.Taxid() - taxon, err := taxonomy.Taxon(taxid) - if err == nil { - taxon = taxon.TaxonAtRank(taxonomicRank) - } else { - taxon = nil - } - if taxon == nil { - if abortOnMissing { - if err != nil { - log.Fatalf("Taxid %d not found in taxonomy", taxid) - } else { - log.Fatalf("Taxon at rank %s not found in taxonomy for taxid %d", taxonomicRank, taxid) - } - - } - - return 0 - } - return taxon.Taxid() - } - - value := func(k int) string { - taxon, _ := taxonomy.Taxon(k) - return taxon.ScientificName() - } - - reset := func() { - } - - clone := func() *obiseq.BioSequenceClassifier { - return TaxonomyClassifier(taxonomicRank, taxonomy, abortOnMissing) - } - - c := obiseq.BioSequenceClassifier{ - Code: code, - Value: value, - Reset: reset, - Clone: clone, - Type: "TaxonomyClassifier"} - return &c -} diff --git a/pkg/obitax/inner.go b/pkg/obitax/inner.go new file mode 100644 index 0000000..3a248f2 --- /dev/null +++ b/pkg/obitax/inner.go @@ -0,0 +1,49 @@ +package obitax + +import "sync" + +// InnerString is a struct that holds a map of strings and a read-write lock for concurrent access. +// The index map is used to store key-value pairs of strings. +type InnerString struct { + index map[string]string + lock sync.RWMutex +} + +// NewInnerString creates a new instance of InnerString. +// The lock is set to false. +func NewInnerString() *InnerString { + return &InnerString{ + index: make(map[string]string), + } +} + +// Innerize stores the given value in the index map if it is not already present. +// It returns the value associated with the key, which is either the newly stored value +// or the existing value if it was already present in the map. +// +// Parameters: +// - value: The string value to be stored in the index map. +// +// Returns: +// - The string value associated with the key. +func (i *InnerString) Innerize(value string) string { + i.lock.Lock() + defer i.lock.Unlock() + s, ok := i.index[value] + if !ok { + i.index[value] = value + s = value + } + + return s +} + +func (i *InnerString) Slice() []string { + rep := make([]string, len(i.index)) + j := 0 + for _, v := range i.index { + rep[j] = v + j++ + } + return rep +} diff --git a/pkg/obitax/issuubcladeof.go b/pkg/obitax/issuubcladeof.go index 94f0c03..82a08ae 100644 --- a/pkg/obitax/issuubcladeof.go +++ b/pkg/obitax/issuubcladeof.go @@ -1,21 +1,22 @@ package obitax -func (taxon *TaxNode) IsSubCladeOf(parent *TaxNode) bool { +import "log" - for taxon.taxid != parent.taxid && taxon.parent != taxon.taxid { - taxon = taxon.pparent +func (taxon *Taxon) IsSubCladeOf(parent *Taxon) bool { + + if taxon.Taxonomy != parent.Taxonomy { + log.Fatalf( + "Both taxa %s and %s must belong to the same taxonomy", + taxon.String(), + parent.String(), + ) } - return taxon.taxid == parent.taxid -} - -func (taxon *TaxNode) IsBelongingSubclades(clades *TaxonSet) bool { - _, ok := (*clades)[taxon.taxid] - - for !ok && taxon.parent != taxon.taxid { - taxon = taxon.pparent - _, ok = (*clades)[taxon.taxid] + for t := range taxon.IPath() { + if t.Node.Id() == parent.Node.Id() { + return true + } } - return ok + return false } diff --git a/pkg/obitax/iterator.go b/pkg/obitax/iterator.go index b1f4fbf..5c29834 100644 --- a/pkg/obitax/iterator.go +++ b/pkg/obitax/iterator.go @@ -17,7 +17,7 @@ func (set *TaxonSet) Iterator() *ITaxonSet { i := NewITaxonSet() go func() { - for _, t := range *set { + for _, t := range set.set { i.source <- t } close(i.source) @@ -30,7 +30,7 @@ func (set *TaxonSlice) Iterator() *ITaxonSet { i := NewITaxonSet() go func() { - for _, t := range *set { + for _, t := range set.slice { i.source <- t } close(i.source) @@ -83,7 +83,7 @@ func (iterator *ITaxonSet) TaxonSet() *TaxonSet { for iterator.Next() { taxon := iterator.Get() - set[taxon.taxid] = taxon + set[taxon.id] = taxon } return &set } diff --git a/pkg/obitax/lca.go b/pkg/obitax/lca.go index 84d6cea..2b080ad 100644 --- a/pkg/obitax/lca.go +++ b/pkg/obitax/lca.go @@ -1,12 +1,6 @@ package obitax import ( - "math" - "strconv" - "strings" - - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils" log "github.com/sirupsen/logrus" ) @@ -41,121 +35,3 @@ func (t1 *TaxNode) LCA(t2 *TaxNode) (*TaxNode, error) { return (*p1)[i1+1], nil } - -func (taxonomy *Taxonomy) TaxonomicDistribution(sequence *obiseq.BioSequence) map[*TaxNode]int { - taxids := sequence.StatsOn(obiseq.MakeStatsOnDescription("taxid"), "na") - taxons := make(map[*TaxNode]int, len(taxids)) - - for k, v := range taxids { - taxid, _ := strconv.Atoi(k) - - t, et := taxonomy.Taxon(taxid) - if et != nil { - log.Panicf("Taxid %d not defined in taxonomy : %v", taxid, et) - } - taxons[t] = v - } - return taxons -} - -func (taxonomy *Taxonomy) LCA(sequence *obiseq.BioSequence, threshold float64) (*TaxNode, float64, int) { - taxons := taxonomy.TaxonomicDistribution(sequence) - paths := make(map[*TaxNode]*TaxonSlice, len(taxons)) - answer := (*TaxNode)(nil) - rans := 1.0 - granTotal := 0 - - for t, w := range taxons { - p, ep := t.Path() - if ep != nil { - log.Panicf("Taxonomic path cannot be retreived from Taxid %d : %v", t.Taxid(), ep) - } - - obiutils.Reverse(*p, true) - paths[t] = p - answer = (*p)[0] - granTotal += w - } - - rmax := 1.0 - levels := make(map[*TaxNode]int, len(paths)) - taxonMax := answer - - for i := 0; rmax >= threshold; i++ { - answer = taxonMax - rans = rmax - taxonMax = nil - total := 0 - for taxon, weight := range taxons { - path := paths[taxon] - if len(*path) > i { - levels[(*path)[i]] += weight - } - total += weight - } - weighMax := 0 - for taxon, weight := range levels { - if weight > weighMax { - weighMax = weight - taxonMax = taxon - } - } - - if total > 0 { - rmax *= float64(weighMax) / float64(total) - } else { - rmax = 0.0 - } - - for taxon := range levels { - delete(levels, taxon) - } - for taxon := range taxons { - path := paths[taxon] - if i < len(*path) { - if (*path)[i] != taxonMax { - delete(paths, taxon) - delete(taxons, taxon) - } - } - } - // if taxonMax != nil { - // log.Println("@@@>", i, taxonMax.ScientificName(), taxonMax.Taxid(), rans, weighMax, total, rmax) - // } else { - // log.Println("@@@>", "--", 0, rmax) - // } - } - // log.Println("###>", answer.ScientificName(), answer.Taxid(), rans) - // log.Print("========================================") - return answer, rans, granTotal - -} - -func AddLCAWorker(taxonomy *Taxonomy, slot_name string, threshold float64) obiseq.SeqWorker { - - if !strings.HasSuffix(slot_name, "taxid") { - slot_name = slot_name + "_taxid" - } - - lca_error := strings.Replace(slot_name, "taxid", "error", 1) - if lca_error == "error" { - lca_error = "lca_error" - } - - lca_name := strings.Replace(slot_name, "taxid", "name", 1) - if lca_name == "name" { - lca_name = "scientific_name" - } - - f := func(sequence *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) { - lca, rans, _ := taxonomy.LCA(sequence, threshold) - - sequence.SetAttribute(slot_name, lca.Taxid()) - sequence.SetAttribute(lca_name, lca.ScientificName()) - sequence.SetAttribute(lca_error, math.Round((1-rans)*1000)/1000) - - return obiseq.BioSequenceSlice{sequence}, nil - } - - return f -} diff --git a/pkg/obitax/path.go b/pkg/obitax/path.go deleted file mode 100644 index 543ef68..0000000 --- a/pkg/obitax/path.go +++ /dev/null @@ -1,87 +0,0 @@ -package obitax - -import ( - "fmt" - - log "github.com/sirupsen/logrus" -) - -// Path generates the lineage path from the current taxon up to the root. -// -// This method does not take parameters as it is called on a TaxNode receiver. -// It returns a pointer to a TaxonSlice containing the path and an error if -// the taxonomy needs reindexing. -func (taxon *TaxNode) Path() (*TaxonSlice, error) { - - path := make(TaxonSlice, 0, 30) - path = append(path, taxon) - - for taxon != taxon.pparent { - taxon = taxon.pparent - - if taxon == nil { - return nil, fmt.Errorf("Taxonomy must be reindexed") - } - - path = append(path, taxon) - } - - return &path, nil -} - -// TaxonAtRank traverses up the taxonomy tree starting from the current -// node until it finds a node that matches the specified rank. -// -// If a node with the given rank is not found in the path to the root, -// or if the taxonomy tree is not properly indexed (i.e., a node's parent -// is itself), the function will return nil. In case the taxonomy needs -// reindexing, the function will panic. -// -// rank: the taxonomic rank to search for (e.g., "species", "genus"). -// -// Returns a pointer to a TaxNode representing the node at the -// specified rank, or nil if no such node exists in the path. -func (taxon *TaxNode) TaxonAtRank(rank string) *TaxNode { - for taxon.rank != rank && taxon != taxon.pparent { - taxon = taxon.pparent - - if taxon == nil { - log.Panicln("Taxonomy must be reindexed") - } - } - - if taxon == taxon.pparent && taxon.rank != rank { - taxon = nil - } - - return taxon -} - -// Species retrieves the TaxNode corresponding to the species rank. -// -// This method does not take any parameters. It is a convenience -// wrapper around the TaxonAtRank method, specifically retrieving -// the species-level taxonomic classification for the calling TaxNode. -// -// Returns a pointer to the TaxNode representing the species. -func (taxon *TaxNode) Species() *TaxNode { - return taxon.TaxonAtRank("species") -} - -func (taxon *TaxNode) Genus() *TaxNode { - return taxon.TaxonAtRank("genus") -} - -func (taxon *TaxNode) Family() *TaxNode { - return taxon.TaxonAtRank("family") -} - -func (taxonomy *Taxonomy) Path(taxid int) (*TaxonSlice, error) { - taxon, err := taxonomy.Taxon(taxid) - - if err != nil { - return nil, err - } - - return taxon.Path() -} diff --git a/pkg/obitax/ranklist.go b/pkg/obitax/ranklist.go deleted file mode 100644 index 7056ab4..0000000 --- a/pkg/obitax/ranklist.go +++ /dev/null @@ -1,16 +0,0 @@ -package obitax - -func (taxonomy *Taxonomy) RankList() []string { - ranks := make([]string, 0, 30) - mranks := make(map[string]bool) - - for _, t := range *taxonomy.nodes { - mranks[t.rank] = true - } - - for r := range mranks { - ranks = append(ranks, r) - } - - return ranks -} diff --git a/pkg/obitax/sequence_methods.go b/pkg/obitax/sequence_methods.go deleted file mode 100644 index 384a14f..0000000 --- a/pkg/obitax/sequence_methods.go +++ /dev/null @@ -1,92 +0,0 @@ -package obitax - -import ( - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" - log "github.com/sirupsen/logrus" -) - -// Setting the taxon at a given rank for a given sequence. -// -// Two attributes are added to the sequence. One named by the rank name stores -// the taxid, a second named by the rank name suffixed with '_name' contains the -// Scientific name of the genus. -// If the taxon at the given rank doesn't exist for the taxonomy annotation -// of the sequence, nothing happens. -func (taxonomy *Taxonomy) SetTaxonAtRank(sequence *obiseq.BioSequence, rank string) *TaxNode { - var taxonAtRank *TaxNode - - taxid := sequence.Taxid() - taxon, err := taxonomy.Taxon(taxid) - taxonAtRank = nil - if err == nil { - taxonAtRank = taxon.TaxonAtRank(rank) - if taxonAtRank != nil { - // log.Printf("Taxid: %d Rank: %s --> proposed : %d (%s)", taxid, rank, taxonAtRank.taxid, *(taxonAtRank.scientificname)) - sequence.SetAttribute(rank+"_taxid", taxonAtRank.taxid) - sequence.SetAttribute(rank+"_name", *taxonAtRank.scientificname) - } else { - sequence.SetAttribute(rank+"_taxid", -1) - sequence.SetAttribute(rank+"_name", "NA") - } - } - - return taxonAtRank -} - -// Setting the species of a sequence. -func (taxonomy *Taxonomy) SetSpecies(sequence *obiseq.BioSequence) *TaxNode { - return taxonomy.SetTaxonAtRank(sequence, "species") -} - -// Setting the genus of a sequence. -func (taxonomy *Taxonomy) SetGenus(sequence *obiseq.BioSequence) *TaxNode { - return taxonomy.SetTaxonAtRank(sequence, "genus") -} - -// Setting the family of a sequence. -func (taxonomy *Taxonomy) SetFamily(sequence *obiseq.BioSequence) *TaxNode { - return taxonomy.SetTaxonAtRank(sequence, "family") -} - -func (taxonomy *Taxonomy) SetPath(sequence *obiseq.BioSequence) string { - taxid, err := taxonomy.Taxon(sequence.Taxid()) - - if err != nil { - log.Fatalf("Taxid %d not defined in the current taxonomy", sequence.Taxid()) - } - - path, err := taxid.Path() - - if err != nil { - log.Fatalf("Taxonomy index error: %v", err) - } - - tpath := path.String() - sequence.SetAttribute("taxonomic_path", tpath) - - return tpath -} - -func (taxonomy *Taxonomy) SetScientificName(sequence *obiseq.BioSequence) string { - taxid, err := taxonomy.Taxon(sequence.Taxid()) - - if err != nil { - log.Fatalf("Taxid %d not defined in the current taxonomy", sequence.Taxid()) - } - - sequence.SetAttribute("scienctific_name", taxid.ScientificName()) - - return taxid.ScientificName() -} - -func (taxonomy *Taxonomy) SetTaxonomicRank(sequence *obiseq.BioSequence) string { - taxid, err := taxonomy.Taxon(sequence.Taxid()) - - if err != nil { - log.Fatalf("Taxid %d not defined in the current taxonomy", sequence.Taxid()) - } - - sequence.SetAttribute("taxonomic_rank", taxid.Rank()) - - return taxid.Rank() -} diff --git a/pkg/obitax/sequence_predicate.go b/pkg/obitax/sequence_predicate.go deleted file mode 100644 index 551a5a8..0000000 --- a/pkg/obitax/sequence_predicate.go +++ /dev/null @@ -1,91 +0,0 @@ -package obitax - -import ( - log "github.com/sirupsen/logrus" - - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils" -) - -func (taxonomy *Taxonomy) IsAValidTaxon(withAutoCorrection ...bool) obiseq.SequencePredicate { - deprecatedTaxidsWarning := make(map[int]bool) - - autocorrection := false - if len(withAutoCorrection) > 0 { - autocorrection = withAutoCorrection[0] - } - - f := func(sequence *obiseq.BioSequence) bool { - taxid := sequence.Taxid() - taxon, err := taxonomy.Taxon(taxid) - - if err == nil && taxon.taxid != taxid { - if autocorrection { - sequence.SetTaxid(taxon.taxid) - log.Printf("Sequence %s : Taxid %d updated with %d", - sequence.Id(), - taxid, - taxon.taxid) - } else { - if _, ok := deprecatedTaxidsWarning[taxid]; !ok { - deprecatedTaxidsWarning[taxid] = true - log.Printf("Taxid %d is deprecated and must be replaced by %d", taxid, taxon.taxid) - } - } - } - - return err == nil - } - - return f -} - -// A function that takes a taxonomy and a taxid as arguments and returns a function that takes a -// pointer to a BioSequence as an argument and returns a boolean. -func (taxonomy *Taxonomy) IsSubCladeOf(taxid int) obiseq.SequencePredicate { - parent, err := taxonomy.Taxon(taxid) - - if err != nil { - log.Fatalf("Cannot find taxon : %d (%v)", taxid, err) - } - - f := func(sequence *obiseq.BioSequence) bool { - taxon, err := taxonomy.Taxon(sequence.Taxid()) - return err == nil && taxon.IsSubCladeOf(parent) - } - - return f -} - -func (taxonomy *Taxonomy) IsSubCladeOfSlot(key string) obiseq.SequencePredicate { - - f := func(sequence *obiseq.BioSequence) bool { - val, ok := sequence.GetStringAttribute(key) - - if ok { - parent, err1 := taxonomy.Taxon(val) - taxon, err2 := taxonomy.Taxon(sequence.Taxid()) - return err1 == nil && err2 == nil && taxon.IsSubCladeOf(parent) - } - - return false - } - - return f -} - -func (taxonomy *Taxonomy) HasRequiredRank(rank string) obiseq.SequencePredicate { - - if !obiutils.Contains(taxonomy.RankList(), rank) { - log.Fatalf("%s is not a valid rank (allowed ranks are %v)", - rank, - taxonomy.RankList()) - } - - f := func(sequence *obiseq.BioSequence) bool { - taxon, err := taxonomy.Taxon(sequence.Taxid()) - return err == nil && taxon.HasRankDefined(rank) - } - - return f -} diff --git a/pkg/obitax/sequence_workers.go b/pkg/obitax/sequence_workers.go deleted file mode 100644 index 0e8524f..0000000 --- a/pkg/obitax/sequence_workers.go +++ /dev/null @@ -1,64 +0,0 @@ -package obitax - -import ( - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils" - log "github.com/sirupsen/logrus" -) - -func (taxonomy *Taxonomy) MakeSetTaxonAtRankWorker(rank string) obiseq.SeqWorker { - - if !obiutils.Contains(taxonomy.RankList(), rank) { - log.Fatalf("%s is not a valid rank (allowed ranks are %v)", - rank, - taxonomy.RankList()) - } - - w := func(sequence *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) { - taxonomy.SetTaxonAtRank(sequence, rank) - return obiseq.BioSequenceSlice{sequence}, nil - } - - return w -} - -func (taxonomy *Taxonomy) MakeSetSpeciesWorker() obiseq.SeqWorker { - - w := func(sequence *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) { - taxonomy.SetSpecies(sequence) - return obiseq.BioSequenceSlice{sequence}, nil - } - - return w -} - -func (taxonomy *Taxonomy) MakeSetGenusWorker() obiseq.SeqWorker { - - w := func(sequence *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) { - taxonomy.SetGenus(sequence) - return obiseq.BioSequenceSlice{sequence}, nil - } - - return w -} - -func (taxonomy *Taxonomy) MakeSetFamilyWorker() obiseq.SeqWorker { - - w := func(sequence *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) { - taxonomy.SetFamily(sequence) - return obiseq.BioSequenceSlice{sequence}, nil - } - - return w -} - -func (taxonomy *Taxonomy) MakeSetPathWorker() obiseq.SeqWorker { - - w := func(sequence *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) { - taxonomy.SetPath(sequence) - return obiseq.BioSequenceSlice{sequence}, nil - } - - return w - -} diff --git a/pkg/obitax/taxon.go b/pkg/obitax/taxon.go index 7570662..74c9a99 100644 --- a/pkg/obitax/taxon.go +++ b/pkg/obitax/taxon.go @@ -1,75 +1,194 @@ package obitax import ( + "iter" "regexp" + + log "github.com/sirupsen/logrus" ) -type TaxNode struct { - taxid int - parent int - pparent *TaxNode - rank string - scientificname *string - alternatenames *map[string]*string +// Taxon represents a taxon within a taxonomy, encapsulating both the taxonomy +// it belongs to and the specific taxon node information. +// +// Fields: +// - Taxonomy: A pointer to the Taxonomy[T] instance that this taxon is part of. +// - Node: A pointer to the TaxNode[T] instance representing the specific taxon. +type Taxon struct { + Taxonomy *Taxonomy + Node *TaxNode } -func NewTaxNode(taxid int, parent int, rank string) *TaxNode { - n := TaxNode{taxid, parent, nil, rank, nil, nil} - return &n +// String returns a string representation of the Taxon. +// It formats the output to include the taxonomy code, the taxon ID, and the scientific name. +// +// Returns: +// - A formatted string representing the Taxon in the form "taxonomy_code:taxon_id [scientific_name]". +func (taxon *Taxon) String() string { + return taxon.Node.String(taxon.Taxonomy.code) } -func (node *TaxNode) ScientificName() string { - n := node.scientificname - if n == nil { - return "" +// ScientificName returns the scientific name of the Taxon. +// It retrieves the scientific name from the underlying TaxNode associated with the taxon. +// +// Returns: +// - The scientific name of the taxon as a string. +func (taxon *Taxon) ScientificName() string { + return taxon.Node.ScientificName() +} + +func (taxon *Taxon) Name(class string) string { + return taxon.Node.Name(class) +} + +func (taxon *Taxon) IsNameEqual(name string) bool { + return taxon.Node.IsNameEqual(name) +} + +func (taxon *Taxon) IsNameMatching(pattern *regexp.Regexp) bool { + return taxon.Node.IsNameMatching(pattern) +} + +func (taxon *Taxon) SetName(name, class string) { + class = taxon.Taxonomy.nameclasses.Innerize(class) + taxon.Node.SetName(name, class) +} + +// Rank returns the rank of the Taxon. +// It retrieves the rank from the underlying TaxNode associated with the taxon. +// +// Returns: +// - The rank of the taxon as a string (e.g., species, genus, family). +func (taxon *Taxon) Rank() string { + return taxon.Node.Rank() +} + +// Parent returns a pointer to the parent Taxon of the current Taxon. +// It retrieves the parent identifier from the underlying TaxNode and uses it +// to create a new Taxon instance representing the parent taxon. +// +// Returns: +// - A pointer to the parent Taxon[T]. If the parent does not exist, it returns +// a Taxon with a nil Node. +func (taxon *Taxon) Parent() *Taxon { + pid := taxon.Node.ParentId() + return &Taxon{taxon.Taxonomy, + taxon.Taxonomy.nodes.Get(pid)} +} + +// IPath returns an iterator that yields the path from the current Taxon to the root Taxon +// in the associated Taxonomy. It traverses up the taxonomy hierarchy until it reaches the root. +// +// Returns: +// - An iterator function that takes a yield function as an argument. The yield function +// is called with each Taxon in the path from the current taxon to the root. If the +// taxonomy has no root node, the method logs a fatal error and terminates the program. +func (taxon *Taxon) IPath() iter.Seq[*Taxon] { + if taxon.Taxonomy.root == nil { + log.Fatalf("Taxon[%v].IPath(): Taxonomy has no root node", taxon.Taxonomy.name) } - return *n -} - -func (node *TaxNode) Rank() string { - return node.rank -} - -func (node *TaxNode) Taxid() int { - return node.taxid -} - -func (node *TaxNode) Parent() *TaxNode { - return node.pparent -} - -func (node *TaxNode) IsNameEqual(name string) bool { - if *(node.scientificname) == name { - return true - } - if node.alternatenames != nil { - _, ok := (*node.alternatenames)[name] - return ok - } - return false -} - -func (node *TaxNode) IsNameMatching(pattern *regexp.Regexp) bool { - if pattern.MatchString(*(node.scientificname)) { - return true - } - if node.alternatenames != nil { - for n := range *node.alternatenames { - if pattern.MatchString(n) { - return true + return func(yield func(*Taxon) bool) { + for taxon.Node.parent != taxon.Taxonomy.root.id { + if !yield(taxon) { + return } + + taxon = taxon.Parent() + } + + yield(taxon) + + } +} + +// Path returns a slice of TaxNode[T] representing the path from the current Taxon +// to the root Taxon in the associated Taxonomy. It collects all the nodes in the path +// using the IPath method and returns them as a TaxonSlice. +// +// Returns: +// - A pointer to a TaxonSlice[T] containing the TaxNode[T] instances in the path +// from the current taxon to the root. +func (taxon *Taxon) Path() *TaxonSlice { + s := make([]*TaxNode, 0, 10) + + for t := range taxon.IPath() { + s = append(s, t.Node) + } + + return &TaxonSlice{ + slice: s, + taxonomy: taxon.Taxonomy, + } +} + +// HasRankDefined checks if any taxon in the path from the current Taxon to the root +// has the specified rank defined. It iterates through the path using the IPath method +// and returns true if a match is found; otherwise, it returns false. +// +// Parameters: +// - rank: A string representing the rank to check for (e.g., "species", "genus"). +// +// Returns: +// - A boolean indicating whether any taxon in the path has the specified rank defined. +func (taxon *Taxon) HasRankDefined(rank string) bool { + for t := range taxon.IPath() { + if t.Node.Rank() == rank { + return true } } return false } -func (node *TaxNode) HasRankDefined(rank string) bool { - - for node.rank != rank && node.parent != node.taxid { - node = node.pparent +// TaxonAtRank returns the first Taxon in the path from the current Taxon to the root +// that has the specified rank defined. It iterates through the path using the IPath method +// and returns the matching Taxon if found; otherwise, it returns nil. +// +// Parameters: +// - rank: A string representing the rank to search for (e.g., "species", "genus"). +// +// Returns: +// - A pointer to the Taxon[T] that matches the specified rank, or nil if no such taxon exists +// in the path to the root. +func (taxon *Taxon) TaxonAtRank(rank string) *Taxon { + for t := range taxon.IPath() { + if t.Node.Rank() == rank { + return t + } } - return node.rank == rank + return nil +} + +// Species returns the first Taxon in the path from the current Taxon to the root +// that has the rank "species" defined. It utilizes the TaxonAtRank method to find +// the matching Taxon. +// +// Returns: +// - A pointer to the Taxon[T] that matches the "species" rank, or nil if no such taxon +// exists in the path to the root. +func (taxon *Taxon) Species() *Taxon { + return taxon.TaxonAtRank("species") +} + +// Genus returns the first Taxon in the path from the current Taxon to the root +// that has the rank "genus" defined. It utilizes the TaxonAtRank method to find +// the matching Taxon. +// +// Returns: +// - A pointer to the Taxon[T] that matches the "genus" rank, or nil if no such taxon +// exists in the path to the root. +func (taxon *Taxon) Genus() *Taxon { + return taxon.TaxonAtRank("genus") +} + +// Family returns the first Taxon in the path from the current Taxon to the root +// that has the rank "family" defined. It utilizes the TaxonAtRank method to find +// the matching Taxon. +// +// Returns: +// - A pointer to the Taxon[T] that matches the "family" rank, or nil if no such taxon +// exists in the path to the root. +func (taxon *Taxon) Family() *Taxon { + return taxon.TaxonAtRank("family") } diff --git a/pkg/obitax/taxonnode.go b/pkg/obitax/taxonnode.go new file mode 100644 index 0000000..3c12a50 --- /dev/null +++ b/pkg/obitax/taxonnode.go @@ -0,0 +1,169 @@ +package obitax + +import ( + "fmt" + "regexp" +) + +// TaxNode represents a single taxon in a taxonomy. +// It holds information about the taxon's identifier, parent taxon, rank, +// scientific name, and alternate names. +// +// Fields: +// - id: The unique identifier of the taxon of type T. +// - parent: The identifier of the parent taxon of type T. +// - rank: The rank of the taxon (e.g., species, genus). +// - scientificname: A pointer to a string representing the scientific name of the taxon. +// - alternatenames: A pointer to a map of alternate names for the taxon, where the key is +// a string representing the class name and the value is a pointer to a string +// representing the name. +type TaxNode struct { + id string + parent string + rank string + scientificname *string + alternatenames *map[string]*string +} + +// String returns a string representation of the TaxNode, including the taxonomy code, +// the node ID, and the scientific name. The output format is "taxonomyCode:id [scientificName]". +// +// Parameters: +// - taxonomyCode: A string representing the code of the taxonomy to which the node belongs. +// +// Returns: +// - A formatted string representing the TaxNode in the form "taxonomyCode:id [scientificName]". +func (node *TaxNode) String(taxonomyCode string) string { + return fmt.Sprintf("%s:%v [%s]", + taxonomyCode, + node.id, + node.ScientificName()) +} + +// Id returns the unique identifier of the TaxNode. +// It retrieves the identifier of type T associated with the taxon node. +// +// Returns: +// - The unique identifier of the taxon node of type T. +func (node *TaxNode) Id() string { + return node.id +} + +// ParentId returns the identifier of the parent taxon of the TaxNode. +// It retrieves the parent identifier of type T associated with the taxon node. +// +// Returns: +// - The identifier of the parent taxon of type T. +func (node *TaxNode) ParentId() string { + return node.parent +} + +// ScientificName returns the scientific name of the TaxNode. +// It dereferences the pointer to the scientific name string associated with the taxon node. +// +// Returns: +// - The scientific name of the taxon as a string. +// - Note: This method assumes that scientificname is not nil; +// if it may be nil, additional error handling should be implemented. +func (node *TaxNode) ScientificName() string { + return *node.scientificname +} + +// Name retrieves the name of the TaxNode based on the specified class. +// If the class is "scientificname", it returns the scientific name of the taxon. +// If the class corresponds to an alternate name, it retrieves that name from the alternatenames map. +// If the class is not recognized or if no alternate names exist, it returns an empty string. +// +// Parameters: +// - class: A string representing the class of name to retrieve (e.g., "scientificname" or an alternate name class). +// +// Returns: +// - The name of the taxon as a string. If the class is not recognized or if no name is available, +// an empty string is returned. +func (node *TaxNode) Name(class string) string { + if class == "scientificname" { + return *node.scientificname + } + + if node.alternatenames == nil { + return "" + } + + if val, ok := (*node.alternatenames)[class]; ok { + if val != nil { + return *val + } + } + + return "" +} + +func (node *TaxNode) SetName(name, class string) { + if class == "scientificname" { + node.scientificname = &name + return + } + + if node.alternatenames == nil { + node.alternatenames = &map[string]*string{} + } + + (*node.alternatenames)[class] = &name +} + +// Rank returns the rank of the TaxNode. +// It retrieves the rank associated with the taxon node, which indicates its level in the taxonomy hierarchy. +// +// Returns: +// - The rank of the taxon as a string (e.g., species, genus, family). +func (node *TaxNode) Rank() string { + return node.rank +} + +// IsNameEqual checks if the provided name matches the scientific name or any alternate names +// associated with the TaxNode. It returns true if there is a match; otherwise, it returns false. +// +// Parameters: +// - name: A string representing the name to compare against the scientific name and alternate names. +// +// Returns: +// - A boolean indicating whether the provided name is equal to the scientific name or exists +// as an alternate name for the taxon. +func (node *TaxNode) IsNameEqual(name string) bool { + if *(node.scientificname) == name { + return true + } + if node.alternatenames != nil { + for _, n := range *node.alternatenames { + if n != nil && *n == name { + return true + } + } + } + return false +} + +// IsNameMatching checks if the scientific name or any alternate names of the TaxNode match +// the provided regular expression pattern. It returns true if there is a match; otherwise, it returns false. +// +// Parameters: +// - pattern: A pointer to a regexp.Regexp object representing the pattern to match against +// the scientific name and alternate names. +// +// Returns: +// - A boolean indicating whether the scientific name or any alternate names match the +// provided regular expression pattern. +func (node *TaxNode) IsNameMatching(pattern *regexp.Regexp) bool { + if pattern.MatchString(*(node.scientificname)) { + return true + } + if node.alternatenames != nil { + for _, n := range *node.alternatenames { + if n != nil && pattern.MatchString(*n) { + return true + } + } + } + + return false +} diff --git a/pkg/obitax/taxonomy.go b/pkg/obitax/taxonomy.go index 787f513..47c0fdb 100644 --- a/pkg/obitax/taxonomy.go +++ b/pkg/obitax/taxonomy.go @@ -3,57 +3,222 @@ package obitax import ( "fmt" "regexp" - "strconv" + + log "github.com/sirupsen/logrus" ) -type TaxName struct { - name *string - nameclass *string -} - +// Taxonomy represents a hierarchical classification of taxa. +// It holds information about the taxonomy's name, code, ranks, nodes, root node, aliases, and an index. +// The generic type T is used to specify the type of taxon identifiers. +// +// Fields: +// - name: The name of the taxonomy. +// - code: A unique code representing the taxonomy. +// - ranks: A pointer to an InnerString instance that holds the ranks of the taxa. +// - nodes: A pointer to a TaxonSet containing all the nodes (taxa) in the taxonomy. +// - root: A pointer to the root TaxNode of the taxonomy. +// - index: A map that indexes taxa by their string representation for quick access. type Taxonomy struct { - nodes *TaxonSet - alias map[int]*TaxNode - index map[string]*TaxonSet + name string + code string + ranks *InnerString + nameclasses *InnerString + nodes *TaxonSet + root *TaxNode + matcher *regexp.Regexp + index map[string]*TaxonSet } -func NewTaxonomy() *Taxonomy { - set := make(TaxonSet) - taxonomy := Taxonomy{ - nodes: &set, - alias: make(TaxonSet), - index: make(map[string]*TaxonSet)} - return &taxonomy +// NewTaxonomy creates and initializes a new Taxonomy instance with the specified name and code. +// It sets up the necessary internal structures, including ranks, nodes, aliases, and an index. +// +// Parameters: +// - name: The name of the taxonomy to be created. +// - code: A unique code representing the taxonomy. +// +// Returns: +// - A pointer to the newly created Taxonomy instance. +func NewTaxonomy(name, code, codeCharacters string) *Taxonomy { + set := make(map[string]*TaxNode) + + // codeCharacters := "[[:alnum:]]" // [[:digit:]] + + matcher := regexp.MustCompile(fmt.Sprintf("^[[:blank:]]*(%s:)?(%s+)", code, codeCharacters)) + + taxonomy := &Taxonomy{ + name: name, + code: code, + ranks: NewInnerString(), + nameclasses: NewInnerString(), + nodes: &TaxonSet{set: set}, + root: nil, + matcher: matcher, + index: make(map[string]*TaxonSet), + } + + taxonomy.nodes.taxonomy = taxonomy + + return taxonomy } +// Id converts a given taxid string into the corresponding taxon identifier of type T. +// It uses a regular expression to validate and extract the taxid. If the taxid is invalid, +// the method returns an error along with a zero value of type T. +// +// Parameters: +// - taxid: A string representation of the taxon identifier to be converted. +// +// Returns: +// - The taxon identifier of type T corresponding to the provided taxid. +// - An error if the taxid is not valid or cannot be converted. +func (taxonomy *Taxonomy) Id(taxid string) (string, error) { + matches := taxonomy.matcher.FindStringSubmatch(taxid) + + if matches == nil { + return "", fmt.Errorf("Taxid %s is not a valid taxid", taxid) + } + + return matches[2], nil +} + +// TaxidSting retrieves the string representation of a taxon node identified by the given ID. +// It looks up the node in the taxonomy and returns its formatted string representation +// along with the taxonomy code. If the node does not exist, it returns an error. +// +// Parameters: +// - id: The identifier of the taxon node to retrieve. +// +// Returns: +// - A string representing the taxon node in the format "taxonomyCode:id [scientificName]", +// or an error if the taxon node with the specified ID does not exist in the taxonomy. +func (taxonomy *Taxonomy) TaxidSting(id string) (string, error) { + node := taxonomy.nodes.Get(id) + if node == nil { + return "", fmt.Errorf("Taxid %d is part of the taxonomy", id) + } + return node.String(taxonomy.code), nil +} + +// Taxon retrieves the Taxon associated with the given taxid string. +// It first converts the taxid to its corresponding identifier using the Id method. +// If the taxon is not found, it logs a fatal error and terminates the program. +// +// Parameters: +// - taxid: A string representation of the taxon identifier to be retrieved. +// +// Returns: +// - A pointer to the Taxon[T] instance associated with the provided taxid. +// - If the taxid is unknown, the method will log a fatal error. +func (taxonomy *Taxonomy) Taxon(taxid string) *Taxon { + id, err := taxonomy.Id(taxid) + + if err != nil { + log.Fatalf("Taxid %s is not a valid taxid", taxid) + } + + node := taxonomy.nodes.Get(id) + + if node == nil { + log.Fatalf("Taxid %s is an unknown taxid", taxid) + } + + return &Taxon{ + Taxonomy: taxonomy, + Node: node, + } +} + +// TaxonSet returns the set of taxon nodes contained within the Taxonomy. +// It provides access to the underlying collection of taxon nodes for further operations. +// +// Returns: +// - A pointer to the TaxonSet[T] representing the collection of taxon nodes in the taxonomy. func (taxonomy *Taxonomy) TaxonSet() *TaxonSet { return taxonomy.nodes } -func (taxonomy *Taxonomy) Alias() *map[int]*TaxNode { - return &(taxonomy.alias) -} - -func (taxonomy *Taxonomy) Index() *map[string]*TaxonSet { - return &(taxonomy.index) -} - +// Len returns the number of taxa in the Taxonomy. +// It delegates the call to the Len method of the underlying nodes set. +// +// Returns: +// - An integer representing the total count of taxa in the taxonomy. func (taxonomy *Taxonomy) Len() int { - return len(*taxonomy.nodes) + return taxonomy.nodes.Len() } -func (taxonomy *Taxonomy) AddNewTaxa(taxid, parent int, rank string, replace bool, init bool) (*TaxNode, error) { - if !replace { - _, ok := (*taxonomy.nodes)[taxid] - if ok { - return nil, fmt.Errorf("trying to add taxoon %d already present in the taxonomy", taxid) - } +// AddTaxon adds a new taxon to the taxonomy with the specified parameters. +// It checks if the taxon already exists and can replace it if specified. +// +// Parameters: +// - taxid: The identifier of the taxon to be added. +// - parent: The identifier of the parent taxon. +// - rank: The rank of the taxon (e.g., species, genus). +// - isRoot: A boolean indicating if this taxon is the root of the taxonomy. +// - replace: A boolean indicating whether to replace an existing taxon with the same taxid. +// +// Returns: +// - A pointer to the newly created Taxon[T] instance. +// - An error if the taxon cannot be added (e.g., it already exists and replace is false). +func (taxonomy *Taxonomy) AddTaxon(taxid, parent string, rank string, isRoot bool, replace bool) (*Taxon, error) { + if !replace && taxonomy.nodes.Contains(taxid) { + return nil, fmt.Errorf("trying to add taxon %d already present in the taxonomy", taxid) } - n := NewTaxNode(taxid, parent, rank) - (*taxonomy.nodes)[taxid] = n + rank = taxonomy.ranks.Innerize(rank) - return n, nil + n := &TaxNode{taxid, parent, rank, nil, nil} + + taxonomy.nodes.Insert(n) + + if isRoot { + n.parent = n.id + taxonomy.root = n + } + + return &Taxon{ + Taxonomy: taxonomy, + Node: n, + }, nil +} + +func (taxonomy *Taxonomy) AddAlias(newtaxid, oldtaxid string, replace bool) (*Taxon, error) { + newid, err := taxonomy.Id(newtaxid) + + if err != nil { + return nil, err + } + oldid, err := taxonomy.Id(oldtaxid) + + if err != nil { + return nil, err + } + + if !replace && taxonomy.nodes.Contains(newid) { + return nil, fmt.Errorf("trying to add alias %s already present in the taxonomy", newtaxid) + } + + n := taxonomy.nodes.Get(oldid) + + if n == nil { + return nil, fmt.Errorf("trying to add alias %s to a taxon that does not exist", oldtaxid) + } + + taxonomy.nodes.Alias(newid, n) + + return &Taxon{ + Taxonomy: taxonomy, + Node: n, + }, nil +} + +// RankList returns a slice of strings representing the ranks of the taxa +// in the taxonomy. It retrieves the ranks from the InnerString instance +// associated with the taxonomy. +// +// Returns: +// - A slice of strings containing the ranks of the taxa. +func (taxonomy *Taxonomy) RankList() []string { + return taxonomy.ranks.Slice() } // func (taxonomy *Taxonomy) Taxon(taxid int) (*TaxNode, error) { @@ -69,93 +234,6 @@ func (taxonomy *Taxonomy) AddNewTaxa(taxid, parent int, rank string, replace boo // return t, nil // } -func (taxonomy *Taxonomy) Taxon(taxid interface{}) (*TaxNode, error) { - var itaxid int - var err error - - switch v := taxid.(type) { - case int: - itaxid = v - case string: - itaxid, err = strconv.Atoi(v) - - if err != nil { - re := regexp.MustCompile(`TX:(\d+)`) - parts := re.FindStringSubmatch(v) - if len(parts) != 2 { - return nil, fmt.Errorf("I cannot parse taxid from %s", v) - } - itaxid, _ = strconv.Atoi(parts[1]) - } - } - - t, ok := (*taxonomy.nodes)[itaxid] - - if !ok { - a, aok := taxonomy.alias[itaxid] - if !aok { - return nil, fmt.Errorf("Taxid %d is not part of the taxonomy", taxid) - } - t = a - } - return t, nil -} -func (taxonomy *Taxonomy) AddNewName(taxid int, name, nameclass *string) error { - node, node_err := taxonomy.Taxon(taxid) - if node_err != nil { - return node_err - } - - if *nameclass == "scientific name" { - node.scientificname = name - } else { - names := node.alternatenames - if names == nil { - n := make(map[string]*string) - names = &n - node.alternatenames = names - } else { - (*names)[*name] = nameclass - } - } - - i, ok := taxonomy.index[*name] - if !ok { - tnm := make(TaxonSet) - i = &tnm - taxonomy.index[*name] = i - } - (*i)[taxid] = node - - return nil -} - -func (taxonomy *Taxonomy) ReindexParent() error { - var ok bool - for _, taxon := range *taxonomy.nodes { - taxon.pparent, ok = (*taxonomy.nodes)[taxon.parent] - if !ok { - return fmt.Errorf("Parent %d of taxon %d is not defined in taxonomy", - taxon.taxid, - taxon.parent) - } - } - - return nil -} - -func MakeTaxName(name, nameclass *string) *TaxName { - tn := TaxName{name, nameclass} - return &tn -} - -func (taxonomy *Taxonomy) AddNewAlias(newtaxid, oldtaxid int) error { - n, node_err := taxonomy.Taxon(newtaxid) - if node_err != nil { - return node_err - } - - taxonomy.alias[oldtaxid] = n - - return nil +func (taxonomy *Taxonomy) Index() *map[string]*TaxonSet { + return &(taxonomy.index) } diff --git a/pkg/obitax/taxonset.go b/pkg/obitax/taxonset.go index 7aae346..3f464e1 100644 --- a/pkg/obitax/taxonset.go +++ b/pkg/obitax/taxonset.go @@ -1,15 +1,126 @@ package obitax -type TaxonSet map[int]*TaxNode +import log "github.com/sirupsen/logrus" -func (set *TaxonSet) Get(i int) *TaxNode { - return (*set)[i] +// TaxonSet represents a collection of taxa within a taxonomy. +// It holds a mapping of taxon identifiers to their corresponding TaxNode instances, +// as well as a reference to the associated Taxonomy. +// +// Fields: +// - set: A map that associates taxon identifiers of type T with their corresponding TaxNode[T] instances. +// - taxonomy: A pointer to the Taxonomy[T] instance that this TaxonSet belongs to. +type TaxonSet struct { + set map[string]*TaxNode + nalias int + taxonomy *Taxonomy } +// Get retrieves the TaxNode[T] associated with the specified taxon identifier. +// It returns the TaxNode if it exists in the TaxonSet; otherwise, it returns nil. +// +// Parameters: +// - i: The taxon identifier of type T for which the TaxNode is to be retrieved. +// +// Returns: +// - A pointer to the TaxNode[T] associated with the provided identifier, or nil +// if no such taxon exists in the set. +func (set *TaxonSet) Get(i string) *TaxNode { + return set.set[i] +} + +// Len returns the number of unique taxa in the TaxonSet. +// It calculates the count by subtracting the number of aliases from the total +// number of entries in the set. +// +// Returns: +// - An integer representing the count of unique taxa in the TaxonSet. func (set *TaxonSet) Len() int { - return len(*set) + return len(set.set) - set.nalias } -func (set *TaxonSet) Inserts(taxon *TaxNode) { - (*set)[taxon.taxid] = taxon +// Insert adds a TaxNode[T] to the TaxonSet. If a taxon with the same identifier +// already exists in the set, it updates the reference. If the existing taxon was +// an alias, its alias count is decremented. +// +// Parameters: +// - taxon: A pointer to the TaxNode[T] instance to be added to the TaxonSet. +// +// Behavior: +// - If a taxon with the same identifier already exists and is different from the +// new taxon, the alias count is decremented. +func (set *TaxonSet) Insert(taxon *TaxNode) { + if old := set.set[taxon.id]; old != nil && old.id != taxon.id { + set.nalias-- + } + set.set[taxon.id] = taxon +} + +// Taxonomy returns a pointer to the Taxonomy[T] instance that this TaxonSet belongs to. +// +// Returns: +// - A pointer to the Taxonomy[T] instance that this TaxonSet belongs to +func (set *TaxonSet) Taxonomy() *Taxonomy { + return set.taxonomy +} + +// Alias associates a given alias string with a specified TaxNode in the TaxonSet. +// It first converts the alias to its corresponding identifier using the Id method. +// If the original taxon is not part of the taxon set, it logs a fatal error and terminates the program. +// +// Parameters: +// - alias: A string representing the alias to be associated with the taxon node. +// - node: A pointer to the TaxNode[T] instance that the alias will refer to. +// +// Behavior: +// - If the original taxon corresponding to the alias is not part of the taxon set, +// the method will log a fatal error and terminate the program. +func (set *TaxonSet) Alias(id string, node *TaxNode) { + original := set.Get(node.id) + if original != nil { + log.Fatalf("Original taxon %v is not part of taxon set", id) + } + set.set[id] = node + set.nalias++ +} + +// IsAlias checks if the given identifier corresponds to an alias in the TaxonSet. +// It retrieves the TaxNode associated with the identifier and returns true if the +// node exists and its identifier is different from the provided identifier; otherwise, it returns false. +// +// Parameters: +// - id: The identifier of type T to be checked for alias status. +// +// Returns: +// - A boolean indicating whether the identifier corresponds to an alias in the set. +func (set *TaxonSet) IsAlias(id string) bool { + node := set.Get(id) + return node != nil && node.id != id +} + +// IsATaxon checks if the given ID corresponds to a valid taxon node in the TaxonSet. +// It returns true if the node exists and its ID matches the provided ID; otherwise, it returns false. +// id corresponding to alias returns false. +// +// Parameters: +// - id: The identifier of the taxon to check. +// +// Returns: +// - A boolean indicating whether the specified ID corresponds to a valid taxon node. +func (set *TaxonSet) IsATaxon(id string) bool { + node := set.Get(id) + return node != nil && node.id == id +} + +// Contains checks if the TaxonSet contains a taxon node with the specified ID. +// It returns true if the node exists in the set; otherwise, it returns false. +// id corresponding to alias or true taxa returns true. +// +// Parameters: +// - id: The identifier of the taxon to check for presence in the set. +// +// Returns: +// - A boolean indicating whether the TaxonSet contains a taxon node with the specified ID. +func (set *TaxonSet) Contains(id string) bool { + node := set.Get(id) + return node != nil } diff --git a/pkg/obitax/taxonslice.go b/pkg/obitax/taxonslice.go index 363297c..8c567d9 100644 --- a/pkg/obitax/taxonslice.go +++ b/pkg/obitax/taxonslice.go @@ -5,30 +5,59 @@ import ( "fmt" ) -type TaxonSlice []*TaxNode - -func (set *TaxonSlice) Get(i int) *TaxNode { - return (*set)[i] +// TaxonSlice represents a slice of TaxNode[T] instances within a taxonomy. +// It encapsulates a collection of taxon nodes and the taxonomy they belong to. +// +// Fields: +// - slice: A slice of pointers to TaxNode[T] representing the taxon nodes. +// - taxonomy: A pointer to the Taxonomy[T] instance that these taxon nodes are part of. +type TaxonSlice struct { + slice []*TaxNode + taxonomy *Taxonomy } -func (set *TaxonSlice) Len() int { - return len(*set) +// Get retrieves the TaxNode[T] at the specified index from the TaxonSlice. +// It returns the taxon node corresponding to the provided index. +// +// Parameters: +// - i: An integer representing the index of the taxon node to retrieve. +// +// Returns: +// - A pointer to the TaxNode[T] at the specified index in the slice. +func (slice *TaxonSlice) Get(i int) *TaxNode { + return slice.slice[i] } +// Len returns the number of TaxNode[T] instances in the TaxonSlice. +// It provides the count of taxon nodes contained within the slice. +// +// Returns: +// - An integer representing the total number of taxon nodes in the TaxonSlice. +func (slice *TaxonSlice) Len() int { + return len(slice.slice) +} + +// String returns a string representation of the TaxonSlice. +// It formats the output to include the IDs, scientific names, and ranks of the taxon nodes +// in the slice, concatenated in reverse order, separated by vertical bars. +// +// Returns: +// - A formatted string representing the TaxonSlice, with each taxon in the format +// "id@scientific_name@rank". If the slice is empty, it returns an empty string. func (path *TaxonSlice) String() string { var buffer bytes.Buffer - if len(*path) > 0 { - taxon := (*path)[len(*path)-1] - fmt.Fprintf(&buffer, "%d@%s@%s", - taxon.Taxid(), + if path.Len() > 0 { + taxon := path.slice[path.Len()-1] + fmt.Fprintf(&buffer, "%v@%s@%s", + taxon.Id(), taxon.ScientificName(), taxon.Rank()) - for i := len(*path) - 2; i >= 0; i-- { - taxon := (*path)[i] - fmt.Fprintf(&buffer, "|%d@%s@%s", - taxon.Taxid(), + for i := path.Len() - 2; i >= 0; i-- { + taxon := path.slice[i] + fmt.Fprintf(&buffer, "|%v@%s@%s", + taxon.Id(), taxon.ScientificName(), taxon.Rank()) } diff --git a/pkg/obiutils/goutils.go b/pkg/obiutils/goutils.go index 1bc97b9..e0bbefe 100644 --- a/pkg/obiutils/goutils.go +++ b/pkg/obiutils/goutils.go @@ -396,28 +396,55 @@ func JsonMarshal(i interface{}) ([]byte, error) { // IsAMap checks if the given value is a map. // -// value: the value to be checked. -// returns: a boolean indicating if the value is a map. +// Parameters: +// - value: The value to be checked. +// +// Returns: +// - A boolean indicating if the value is a map. func IsAMap(value interface{}) bool { return reflect.TypeOf(value).Kind() == reflect.Map } // IsAnArray checks if the given value is an array. // -// value: The value to be checked. -// Returns: true if the value is an array, false otherwise. +// Parameters: +// - value: The value to be checked. +// +// Returns: +// - A boolean indicating if the value is an array. func IsAnArray(value interface{}) bool { return reflect.TypeOf(value).Kind() == reflect.Array } // IsASlice determines if the given value is a slice. // -// value: the value to check. -// bool: true if the value is a slice, false otherwise. +// Parameters: +// - value: The value to check. +// +// Returns: +// - A boolean indicating if the value is a slice. func IsASlice(value interface{}) bool { return reflect.TypeOf(value).Kind() == reflect.Slice } +// IsAContainer checks if the given value is a map, array, or slice. +// +// Parameters: +// - value: The value to check. +// +// Returns: +// - A boolean indicating if the value is a container (map, array, or slice). +func IsAContainer(value interface{}) bool { + return IsAMap(value) || IsAnArray(value) || IsASlice(value) +} + +// IsIntegral checks if the given float64 value is an integral number. +// +// Parameters: +// - val: The float64 value to check. +// +// Returns: +// - A boolean indicating if the value is integral (no fractional part). func IsIntegral(val float64) bool { return val == float64(int(val)) }