Fisrt functional version

This commit is contained in:
Eric Coissac
2024-11-14 19:10:23 +01:00
parent 9471fedfa1
commit 03f4e88a17
26 changed files with 908 additions and 307 deletions

View File

@ -4,31 +4,32 @@ import (
"regexp"
)
func (taxonomy *Taxonomy) IFilterOnName(name string, strict bool) *ITaxonSet {
func (taxonomy *Taxonomy) IFilterOnName(name string, strict bool) *ITaxon {
if strict {
nodes, ok := taxonomy.index[name]
nodes, ok := taxonomy.index[taxonomy.names.Innerize(name)]
if ok {
return nodes.Iterator()
} else {
empty := make(TaxonSet)
return (&empty).Iterator()
empty := taxonomy.NewTaxonSet()
return empty.Iterator()
}
}
return taxonomy.Iterator().IFilterOnName(name, strict)
}
func (iterator *ITaxonSet) IFilterOnName(name string, strict bool) *ITaxonSet {
newIterator := NewITaxonSet()
sentTaxa := make(map[int]bool)
func (iterator *ITaxon) IFilterOnName(name string, strict bool) *ITaxon {
newIterator := NewITaxon()
sentTaxa := make(map[*string]bool)
if strict {
go func() {
for iterator.Next() {
taxon := iterator.Get()
if _, ok := sentTaxa[taxon.taxid]; !ok {
node := taxon.Node
if _, ok := sentTaxa[node.id]; !ok {
if taxon.IsNameEqual(name) {
sentTaxa[taxon.taxid] = true
sentTaxa[node.id] = true
newIterator.source <- taxon
}
}
@ -41,9 +42,10 @@ func (iterator *ITaxonSet) IFilterOnName(name string, strict bool) *ITaxonSet {
go func() {
for iterator.Next() {
taxon := iterator.Get()
if _, ok := sentTaxa[taxon.taxid]; !ok {
node := taxon.Node
if _, ok := sentTaxa[node.id]; !ok {
if taxon.IsNameMatching(pattern) {
sentTaxa[taxon.taxid] = true
sentTaxa[node.id] = true
newIterator.source <- taxon
}
}

View File

@ -1,12 +1,20 @@
package obitax
func (iterator *ITaxonSet) IFilterOnTaxRank(rank string) *ITaxonSet {
newIter := NewITaxonSet()
func (iterator *ITaxon) IFilterOnTaxRank(rank string) *ITaxon {
newIter := NewITaxon()
var prank *string
var ptax *Taxonomy
go func() {
for iterator.Next() {
taxon := iterator.Get()
if taxon.rank == rank {
if ptax != taxon.Taxonomy {
ptax = taxon.Taxonomy
prank = ptax.ranks.Innerize(rank)
}
if taxon.Node.rank == prank {
newIter.source <- taxon
}
}
@ -16,14 +24,14 @@ func (iterator *ITaxonSet) IFilterOnTaxRank(rank string) *ITaxonSet {
return newIter
}
func (set *TaxonSet) IFilterOnTaxRank(rank string) *ITaxonSet {
func (set *TaxonSet) IFilterOnTaxRank(rank string) *ITaxon {
return set.Iterator().IFilterOnTaxRank(rank)
}
func (slice *TaxonSlice) IFilterOnTaxRank(rank string) *ITaxonSet {
func (slice *TaxonSlice) IFilterOnTaxRank(rank string) *ITaxon {
return slice.Iterator().IFilterOnTaxRank(rank)
}
func (taxonomy *Taxonomy) IFilterOnTaxRank(rank string) *ITaxonSet {
func (taxonomy *Taxonomy) IFilterOnTaxRank(rank string) *ITaxon {
return taxonomy.Iterator().IFilterOnTaxRank(rank)
}

View File

@ -1,9 +1,7 @@
package obitax
import "reflect"
func (iterator *ITaxonSet) IFilterOnSubcladeOf(taxon *TaxNode) *ITaxonSet {
newIter := NewITaxonSet()
func (iterator *ITaxon) IFilterOnSubcladeOf(taxon *Taxon) *ITaxon {
newIter := NewITaxon()
go func() {
for iterator.Next() {
@ -18,32 +16,36 @@ func (iterator *ITaxonSet) IFilterOnSubcladeOf(taxon *TaxNode) *ITaxonSet {
return newIter
}
func (set *TaxonSet) IFilterOnSubcladeOf(taxon *TaxNode) *ITaxonSet {
func (set *TaxonSet) IFilterOnSubcladeOf(taxon *Taxon) *ITaxon {
return set.Iterator().IFilterOnSubcladeOf(taxon)
}
func (slice *TaxonSlice) IFilterOnSubcladeOf(taxon *TaxNode) *ITaxonSet {
func (slice *TaxonSlice) IFilterOnSubcladeOf(taxon *Taxon) *ITaxon {
return slice.Iterator().IFilterOnSubcladeOf(taxon)
}
func (taxonomy *Taxonomy) IFilterOnSubcladeOf(taxon *TaxNode) *ITaxonSet {
func (taxonomy *Taxonomy) IFilterOnSubcladeOf(taxon *Taxon) *ITaxon {
return taxonomy.Iterator().IFilterOnSubcladeOf(taxon)
}
func (iterator *ITaxonSet) IFilterBelongingSubclades(clades *TaxonSet) *ITaxonSet {
func (iterator *ITaxon) IFilterBelongingSubclades(clades *TaxonSet) *ITaxon {
if len(*clades) == 0 {
if clades.Len() == 0 {
return iterator
}
// Considers the second simplest case when only
// a single subclase is provided
if len(*clades) == 1 {
keys := reflect.ValueOf(*clades).MapKeys()
return iterator.IFilterOnSubcladeOf((*clades)[int(keys[0].Int())])
if clades.Len() == 1 {
keys := make([]*string, 0, len(clades.set))
for k := range clades.set {
keys = append(keys, k)
}
return iterator.IFilterOnSubcladeOf(clades.Get(keys[0]))
}
newIter := NewITaxonSet()
newIter := NewITaxon()
go func() {
for iterator.Next() {

View File

@ -5,7 +5,7 @@ import "sync"
// InnerString is a struct that holds a map of strings and a read-write lock for concurrent access.
// The index map is used to store key-value pairs of strings.
type InnerString struct {
index map[string]string
index map[string]*string
lock sync.RWMutex
}
@ -13,7 +13,7 @@ type InnerString struct {
// The lock is set to false.
func NewInnerString() *InnerString {
return &InnerString{
index: make(map[string]string),
index: make(map[string]*string),
}
}
@ -26,13 +26,13 @@ func NewInnerString() *InnerString {
//
// Returns:
// - The string value associated with the key.
func (i *InnerString) Innerize(value string) string {
func (i *InnerString) Innerize(value string) *string {
i.lock.Lock()
defer i.lock.Unlock()
s, ok := i.index[value]
if !ok {
i.index[value] = value
s = value
s = &value
i.index[value] = s
}
return s
@ -42,7 +42,7 @@ func (i *InnerString) Slice() []string {
rep := make([]string, len(i.index))
j := 0
for _, v := range i.index {
rep[j] = v
rep[j] = *v
j++
}
return rep

View File

@ -1,6 +1,6 @@
package obitax
import "log"
import log "github.com/sirupsen/logrus"
func (taxon *Taxon) IsSubCladeOf(parent *Taxon) bool {
@ -20,3 +20,18 @@ func (taxon *Taxon) IsSubCladeOf(parent *Taxon) bool {
return false
}
func (taxon *Taxon) IsBelongingSubclades(clades *TaxonSet) bool {
ok := clades.Contains(taxon.Node.id)
for !ok && !taxon.IsRoot() {
taxon = taxon.Parent()
ok = clades.Contains(taxon.Node.id)
}
if taxon.IsRoot() {
ok = clades.Contains(taxon.Node.id)
}
return ok
}

View File

@ -1,24 +1,31 @@
package obitax
type ITaxonSet struct {
source chan *TaxNode
current *TaxNode
type ITaxon struct {
source chan *Taxon
current *Taxon
finished bool
p_finished *bool
}
func NewITaxonSet() *ITaxonSet {
i := ITaxonSet{make(chan *TaxNode), nil, false, nil}
func NewITaxon() *ITaxon {
i := ITaxon{
source: make(chan *Taxon),
current: nil,
finished: false,
p_finished: nil}
i.p_finished = &i.finished
return &i
}
func (set *TaxonSet) Iterator() *ITaxonSet {
i := NewITaxonSet()
func (set *TaxonSet) Iterator() *ITaxon {
i := NewITaxon()
go func() {
for _, t := range set.set {
i.source <- t
i.source <- &Taxon{
Taxonomy: set.taxonomy,
Node: t,
}
}
close(i.source)
}()
@ -26,12 +33,15 @@ func (set *TaxonSet) Iterator() *ITaxonSet {
return i
}
func (set *TaxonSlice) Iterator() *ITaxonSet {
i := NewITaxonSet()
func (set *TaxonSlice) Iterator() *ITaxon {
i := NewITaxon()
go func() {
for _, t := range set.slice {
i.source <- t
i.source <- &Taxon{
Taxonomy: set.taxonomy,
Node: t,
}
}
close(i.source)
}()
@ -39,11 +49,11 @@ func (set *TaxonSlice) Iterator() *ITaxonSet {
return i
}
func (taxonmy *Taxonomy) Iterator() *ITaxonSet {
func (taxonmy *Taxonomy) Iterator() *ITaxon {
return taxonmy.nodes.Iterator()
}
func (iterator *ITaxonSet) Next() bool {
func (iterator *ITaxon) Next() bool {
if *(iterator.p_finished) {
return false
}
@ -63,37 +73,21 @@ func (iterator *ITaxonSet) Next() bool {
// currently pointed by the iterator. You have to use the
// 'Next' method to move to the next entry before calling
// 'Get' to retreive the following instance.
func (iterator *ITaxonSet) Get() *TaxNode {
func (iterator *ITaxon) Get() *Taxon {
return iterator.current
}
// Finished returns 'true' value if no more data is available
// from the iterator.
func (iterator *ITaxonSet) Finished() bool {
func (iterator *ITaxon) Finished() bool {
return *iterator.p_finished
}
func (iterator *ITaxonSet) Split() *ITaxonSet {
newIter := ITaxonSet{iterator.source, nil, false, iterator.p_finished}
return &newIter
}
func (iterator *ITaxonSet) TaxonSet() *TaxonSet {
set := make(TaxonSet)
for iterator.Next() {
taxon := iterator.Get()
set[taxon.id] = taxon
func (iterator *ITaxon) Split() *ITaxon {
return &ITaxon{
source: iterator.source,
current: nil,
finished: false,
p_finished: iterator.p_finished,
}
return &set
}
func (iterator *ITaxonSet) TaxonSlice() *TaxonSlice {
slice := make(TaxonSlice, 0)
for iterator.Next() {
taxon := iterator.Get()
slice = append(slice, taxon)
}
return &slice
}

View File

@ -4,7 +4,7 @@ import (
log "github.com/sirupsen/logrus"
)
func (t1 *TaxNode) LCA(t2 *TaxNode) (*TaxNode, error) {
func (t1 *Taxon) LCA(t2 *Taxon) (*Taxon, error) {
if t1 == nil {
log.Panicf("Try to get LCA of nil taxon")
}
@ -13,25 +13,19 @@ func (t1 *TaxNode) LCA(t2 *TaxNode) (*TaxNode, error) {
log.Panicf("Try to get LCA of nil taxon")
}
p1, err1 := t1.Path()
p1 := t1.Path()
p2 := t2.Path()
if err1 != nil {
return nil, err1
}
i1 := p1.Len() - 1
i2 := p2.Len() - 1
p2, err2 := t2.Path()
if err2 != nil {
return nil, err2
}
i1 := len(*p1) - 1
i2 := len(*p2) - 1
for i1 >= 0 && i2 >= 0 && (*p1)[i1].taxid == (*p2)[i2].taxid {
for i1 >= 0 && i2 >= 0 && p1.slice[i1].id == p2.slice[i2].id {
i1--
i2--
}
return (*p1)[i1+1], nil
return &Taxon{
Taxonomy: t1.Taxonomy,
Node: p1.slice[i1+1],
}, nil
}

View File

@ -24,6 +24,9 @@ type Taxon struct {
// Returns:
// - A formatted string representing the Taxon in the form "taxonomy_code:taxon_id [scientific_name]".
func (taxon *Taxon) String() string {
if taxon == nil {
return "NA"
}
return taxon.Node.String(taxon.Taxonomy.code)
}
@ -33,24 +36,52 @@ func (taxon *Taxon) String() string {
// Returns:
// - The scientific name of the taxon as a string.
func (taxon *Taxon) ScientificName() string {
if taxon == nil {
return "NA"
}
return taxon.Node.ScientificName()
}
func (taxon *Taxon) Name(class string) string {
return taxon.Node.Name(class)
if taxon == nil {
return "NA"
}
pclass := taxon.Taxonomy.nameclasses.Innerize(class)
return taxon.Node.Name(pclass)
}
func (taxon *Taxon) IsNameEqual(name string) bool {
if taxon == nil {
return false
}
return taxon.Node.IsNameEqual(name)
}
func (taxon *Taxon) IsNameMatching(pattern *regexp.Regexp) bool {
if taxon == nil {
return false
}
return taxon.Node.IsNameMatching(pattern)
}
func (taxon *Taxon) SetName(name, class string) {
class = taxon.Taxonomy.nameclasses.Innerize(class)
taxon.Node.SetName(name, class)
if taxon == nil {
log.Panicf("nil taxon pointer for name %s [%s]", name, class)
}
pclass := taxon.Taxonomy.nameclasses.Innerize(class)
pname := taxon.Taxonomy.names.Innerize(name)
taxon.Node.SetName(pname, pclass)
}
func (taxon *Taxon) IsRoot() bool {
if taxon == nil {
return true
}
return taxon.Taxonomy.root == taxon.Node
}
// Rank returns the rank of the Taxon.
@ -59,6 +90,9 @@ func (taxon *Taxon) SetName(name, class string) {
// Returns:
// - The rank of the taxon as a string (e.g., species, genus, family).
func (taxon *Taxon) Rank() string {
if taxon == nil {
return "NA"
}
return taxon.Node.Rank()
}
@ -70,9 +104,12 @@ func (taxon *Taxon) Rank() string {
// - A pointer to the parent Taxon[T]. If the parent does not exist, it returns
// a Taxon with a nil Node.
func (taxon *Taxon) Parent() *Taxon {
if taxon == nil {
return nil
}
pid := taxon.Node.ParentId()
return &Taxon{taxon.Taxonomy,
taxon.Taxonomy.nodes.Get(pid)}
return taxon.Taxonomy.nodes.Get(pid)
}
// IPath returns an iterator that yields the path from the current Taxon to the root Taxon
@ -83,12 +120,13 @@ func (taxon *Taxon) Parent() *Taxon {
// is called with each Taxon in the path from the current taxon to the root. If the
// taxonomy has no root node, the method logs a fatal error and terminates the program.
func (taxon *Taxon) IPath() iter.Seq[*Taxon] {
if taxon.Taxonomy.root == nil {
log.Fatalf("Taxon[%v].IPath(): Taxonomy has no root node", taxon.Taxonomy.name)
}
return func(yield func(*Taxon) bool) {
for taxon.Node.parent != taxon.Taxonomy.root.id {
for !taxon.IsRoot() {
if !yield(taxon) {
return
}
@ -96,8 +134,9 @@ func (taxon *Taxon) IPath() iter.Seq[*Taxon] {
taxon = taxon.Parent()
}
yield(taxon)
if taxon != nil {
yield(taxon)
}
}
}
@ -109,6 +148,10 @@ func (taxon *Taxon) IPath() iter.Seq[*Taxon] {
// - A pointer to a TaxonSlice[T] containing the TaxNode[T] instances in the path
// from the current taxon to the root.
func (taxon *Taxon) Path() *TaxonSlice {
if taxon == nil {
return nil
}
s := make([]*TaxNode, 0, 10)
for t := range taxon.IPath() {
@ -131,8 +174,13 @@ func (taxon *Taxon) Path() *TaxonSlice {
// Returns:
// - A boolean indicating whether any taxon in the path has the specified rank defined.
func (taxon *Taxon) HasRankDefined(rank string) bool {
if taxon == nil {
return false
}
prank := taxon.Taxonomy.ranks.Innerize(rank)
for t := range taxon.IPath() {
if t.Node.Rank() == rank {
if t.Node.rank == prank {
return true
}
}
@ -151,8 +199,14 @@ func (taxon *Taxon) HasRankDefined(rank string) bool {
// - A pointer to the Taxon[T] that matches the specified rank, or nil if no such taxon exists
// in the path to the root.
func (taxon *Taxon) TaxonAtRank(rank string) *Taxon {
if taxon == nil {
return nil
}
prank := taxon.Taxonomy.ranks.Innerize(rank)
for t := range taxon.IPath() {
if t.Node.Rank() == rank {
if t.Node.rank == prank {
return t
}
}

View File

@ -2,6 +2,7 @@ package obitax
import (
"fmt"
"log"
"regexp"
)
@ -18,11 +19,11 @@ import (
// a string representing the class name and the value is a pointer to a string
// representing the name.
type TaxNode struct {
id string
parent string
rank string
id *string
parent *string
rank *string
scientificname *string
alternatenames *map[string]*string
alternatenames *map[*string]*string
}
// String returns a string representation of the TaxNode, including the taxonomy code,
@ -36,7 +37,7 @@ type TaxNode struct {
func (node *TaxNode) String(taxonomyCode string) string {
return fmt.Sprintf("%s:%v [%s]",
taxonomyCode,
node.id,
*node.id,
node.ScientificName())
}
@ -45,7 +46,7 @@ func (node *TaxNode) String(taxonomyCode string) string {
//
// Returns:
// - The unique identifier of the taxon node of type T.
func (node *TaxNode) Id() string {
func (node *TaxNode) Id() *string {
return node.id
}
@ -54,7 +55,7 @@ func (node *TaxNode) Id() string {
//
// Returns:
// - The identifier of the parent taxon of type T.
func (node *TaxNode) ParentId() string {
func (node *TaxNode) ParentId() *string {
return node.parent
}
@ -66,6 +67,12 @@ func (node *TaxNode) ParentId() string {
// - Note: This method assumes that scientificname is not nil;
// if it may be nil, additional error handling should be implemented.
func (node *TaxNode) ScientificName() string {
if node == nil {
return "NA"
}
if node.scientificname == nil {
return "NA"
}
return *node.scientificname
}
@ -80,8 +87,9 @@ func (node *TaxNode) ScientificName() string {
// Returns:
// - The name of the taxon as a string. If the class is not recognized or if no name is available,
// an empty string is returned.
func (node *TaxNode) Name(class string) string {
if class == "scientificname" {
func (node *TaxNode) Name(class *string) string {
if *class == "scientific name" {
return *node.scientificname
}
@ -98,17 +106,21 @@ func (node *TaxNode) Name(class string) string {
return ""
}
func (node *TaxNode) SetName(name, class string) {
if class == "scientificname" {
node.scientificname = &name
func (node *TaxNode) SetName(name, class *string) {
if node == nil {
log.Panic("Cannot set name of nil TaxNode")
}
if *class == "scientific name" {
node.scientificname = name
return
}
if node.alternatenames == nil {
node.alternatenames = &map[string]*string{}
node.alternatenames = &map[*string]*string{}
}
(*node.alternatenames)[class] = &name
(*node.alternatenames)[class] = name
}
// Rank returns the rank of the TaxNode.
@ -117,7 +129,7 @@ func (node *TaxNode) SetName(name, class string) {
// Returns:
// - The rank of the taxon as a string (e.g., species, genus, family).
func (node *TaxNode) Rank() string {
return node.rank
return *node.rank
}
// IsNameEqual checks if the provided name matches the scientific name or any alternate names
@ -154,9 +166,14 @@ func (node *TaxNode) IsNameEqual(name string) bool {
// - A boolean indicating whether the scientific name or any alternate names match the
// provided regular expression pattern.
func (node *TaxNode) IsNameMatching(pattern *regexp.Regexp) bool {
if pattern.MatchString(*(node.scientificname)) {
if node == nil {
return false
}
if node.scientificname != nil && pattern.MatchString(*(node.scientificname)) {
return true
}
if node.alternatenames != nil {
for _, n := range *node.alternatenames {
if n != nil && pattern.MatchString(*n) {

View File

@ -21,12 +21,14 @@ import (
type Taxonomy struct {
name string
code string
ids *InnerString
ranks *InnerString
nameclasses *InnerString
names *InnerString
nodes *TaxonSet
root *TaxNode
matcher *regexp.Regexp
index map[string]*TaxonSet
index map[*string]*TaxonSet
}
// NewTaxonomy creates and initializes a new Taxonomy instance with the specified name and code.
@ -39,7 +41,7 @@ type Taxonomy struct {
// Returns:
// - A pointer to the newly created Taxonomy instance.
func NewTaxonomy(name, code, codeCharacters string) *Taxonomy {
set := make(map[string]*TaxNode)
set := make(map[*string]*TaxNode)
// codeCharacters := "[[:alnum:]]" // [[:digit:]]
@ -48,12 +50,14 @@ func NewTaxonomy(name, code, codeCharacters string) *Taxonomy {
taxonomy := &Taxonomy{
name: name,
code: code,
ids: NewInnerString(),
ranks: NewInnerString(),
nameclasses: NewInnerString(),
names: NewInnerString(),
nodes: &TaxonSet{set: set},
root: nil,
matcher: matcher,
index: make(map[string]*TaxonSet),
index: make(map[*string]*TaxonSet),
}
taxonomy.nodes.taxonomy = taxonomy
@ -69,16 +73,16 @@ func NewTaxonomy(name, code, codeCharacters string) *Taxonomy {
// - taxid: A string representation of the taxon identifier to be converted.
//
// Returns:
// - The taxon identifier of type T corresponding to the provided taxid.
// - The taxon identifier as a *string corresponding to the provided taxid.
// - An error if the taxid is not valid or cannot be converted.
func (taxonomy *Taxonomy) Id(taxid string) (string, error) {
func (taxonomy *Taxonomy) Id(taxid string) (*string, error) {
matches := taxonomy.matcher.FindStringSubmatch(taxid)
if matches == nil {
return "", fmt.Errorf("Taxid %s is not a valid taxid", taxid)
return nil, fmt.Errorf("taxid %s is not a valid taxid", taxid)
}
return matches[2], nil
return taxonomy.ids.Innerize(matches[2]), nil
}
// TaxidSting retrieves the string representation of a taxon node identified by the given ID.
@ -92,11 +96,19 @@ func (taxonomy *Taxonomy) Id(taxid string) (string, error) {
// - A string representing the taxon node in the format "taxonomyCode:id [scientificName]",
// or an error if the taxon node with the specified ID does not exist in the taxonomy.
func (taxonomy *Taxonomy) TaxidSting(id string) (string, error) {
node := taxonomy.nodes.Get(id)
if node == nil {
return "", fmt.Errorf("Taxid %d is part of the taxonomy", id)
pid, err := taxonomy.Id(id)
if err != nil {
return "", err
}
return node.String(taxonomy.code), nil
taxon := taxonomy.nodes.Get(pid)
if taxon == nil {
return "", fmt.Errorf("taxid %s is not part of the taxonomy", id)
}
return taxon.String(), nil
}
// Taxon retrieves the Taxon associated with the given taxid string.
@ -113,19 +125,18 @@ func (taxonomy *Taxonomy) Taxon(taxid string) *Taxon {
id, err := taxonomy.Id(taxid)
if err != nil {
log.Fatalf("Taxid %s is not a valid taxid", taxid)
log.Fatalf("Taxid %s: %v", taxid, err)
}
node := taxonomy.nodes.Get(id)
taxon := taxonomy.nodes.Get(id)
if node == nil {
log.Fatalf("Taxid %s is an unknown taxid", taxid)
if taxon == nil {
log.Fatalf("Taxid %s is not part of the taxonomy %s",
taxid,
taxonomy.name)
}
return &Taxon{
Taxonomy: taxonomy,
Node: node,
}
return taxon
}
// TaxonSet returns the set of taxon nodes contained within the Taxonomy.
@ -133,7 +144,7 @@ func (taxonomy *Taxonomy) Taxon(taxid string) *Taxon {
//
// Returns:
// - A pointer to the TaxonSet[T] representing the collection of taxon nodes in the taxonomy.
func (taxonomy *Taxonomy) TaxonSet() *TaxonSet {
func (taxonomy *Taxonomy) AsTaxonSet() *TaxonSet {
return taxonomy.nodes
}
@ -160,13 +171,25 @@ func (taxonomy *Taxonomy) Len() int {
// - A pointer to the newly created Taxon[T] instance.
// - An error if the taxon cannot be added (e.g., it already exists and replace is false).
func (taxonomy *Taxonomy) AddTaxon(taxid, parent string, rank string, isRoot bool, replace bool) (*Taxon, error) {
if !replace && taxonomy.nodes.Contains(taxid) {
return nil, fmt.Errorf("trying to add taxon %d already present in the taxonomy", taxid)
parentid, perr := taxonomy.Id(parent)
id, err := taxonomy.Id(taxid)
if perr != nil {
return nil, fmt.Errorf("error in parsing parent taxid %s: %v", parent, perr)
}
rank = taxonomy.ranks.Innerize(rank)
if err != nil {
return nil, fmt.Errorf("error in parsing taxid %s: %v", taxid, err)
}
n := &TaxNode{taxid, parent, rank, nil, nil}
if !replace && taxonomy.nodes.Contains(id) {
return nil, fmt.Errorf("trying to add taxon %s already present in the taxonomy", taxid)
}
prank := taxonomy.ranks.Innerize(rank)
n := &TaxNode{id, parentid, prank, nil, nil}
taxonomy.nodes.Insert(n)
@ -197,18 +220,15 @@ func (taxonomy *Taxonomy) AddAlias(newtaxid, oldtaxid string, replace bool) (*Ta
return nil, fmt.Errorf("trying to add alias %s already present in the taxonomy", newtaxid)
}
n := taxonomy.nodes.Get(oldid)
t := taxonomy.nodes.Get(oldid)
if n == nil {
if t == nil {
return nil, fmt.Errorf("trying to add alias %s to a taxon that does not exist", oldtaxid)
}
taxonomy.nodes.Alias(newid, n)
taxonomy.nodes.Alias(newid, t)
return &Taxon{
Taxonomy: taxonomy,
Node: n,
}, nil
return t, nil
}
// RankList returns a slice of strings representing the ranks of the taxa
@ -221,19 +241,14 @@ func (taxonomy *Taxonomy) RankList() []string {
return taxonomy.ranks.Slice()
}
// func (taxonomy *Taxonomy) Taxon(taxid int) (*TaxNode, error) {
// t, ok := (*taxonomy.nodes)[taxid]
// if !ok {
// a, aok := taxonomy.alias[taxid]
// if !aok {
// return nil, fmt.Errorf("Taxid %d is not part of the taxonomy", taxid)
// }
// t = a
// }
// return t, nil
// }
func (taxonomy *Taxonomy) Index() *map[string]*TaxonSet {
func (taxonomy *Taxonomy) Index() *map[*string]*TaxonSet {
return &(taxonomy.index)
}
func (taxonomy *Taxonomy) Name() string {
return taxonomy.name
}
func (taxonomy *Taxonomy) Code() string {
return taxonomy.code
}

View File

@ -1,3 +1,4 @@
// Package obitax provides functionality for managing taxonomic data structures.
package obitax
import log "github.com/sirupsen/logrus"
@ -7,25 +8,46 @@ import log "github.com/sirupsen/logrus"
// as well as a reference to the associated Taxonomy.
//
// Fields:
// - set: A map that associates taxon identifiers of type T with their corresponding TaxNode[T] instances.
// - taxonomy: A pointer to the Taxonomy[T] instance that this TaxonSet belongs to.
// - set: A map that associates taxon identifiers of type *string with their corresponding TaxNode instances.
// - nalias: The number of aliases in the TaxonSet.
// - taxonomy: A pointer to the Taxonomy instance that this TaxonSet belongs to.
type TaxonSet struct {
set map[string]*TaxNode
set map[*string]*TaxNode
nalias int
taxonomy *Taxonomy
}
// Get retrieves the TaxNode[T] associated with the specified taxon identifier.
func (taxonomy *Taxonomy) NewTaxonSet() *TaxonSet {
return &TaxonSet{
set: make(map[*string]*TaxNode),
nalias: 0,
taxonomy: taxonomy,
}
}
// Get retrieves the TaxNode associated with the specified taxon identifier.
// It returns the TaxNode if it exists in the TaxonSet; otherwise, it returns nil.
//
// Parameters:
// - i: The taxon identifier of type T for which the TaxNode is to be retrieved.
// - id: A pointer to the taxon identifier for which the TaxNode is to be retrieved.
//
// Returns:
// - A pointer to the TaxNode[T] associated with the provided identifier, or nil
// - A pointer to the TaxNode associated with the provided identifier, or nil
// if no such taxon exists in the set.
func (set *TaxonSet) Get(i string) *TaxNode {
return set.set[i]
func (set *TaxonSet) Get(id *string) *Taxon {
if set == nil {
return nil
}
node := set.set[id]
if node == nil {
return nil
}
return &Taxon{
Taxonomy: set.taxonomy,
Node: set.set[id],
}
}
// Len returns the number of unique taxa in the TaxonSet.
@ -38,27 +60,37 @@ func (set *TaxonSet) Len() int {
return len(set.set) - set.nalias
}
// Insert adds a TaxNode[T] to the TaxonSet. If a taxon with the same identifier
// Insert adds a TaxNode to the TaxonSet. If a taxon with the same identifier
// already exists in the set, it updates the reference. If the existing taxon was
// an alias, its alias count is decremented.
//
// Parameters:
// - taxon: A pointer to the TaxNode[T] instance to be added to the TaxonSet.
// - taxon: A pointer to the TaxNode instance to be added to the TaxonSet.
//
// Behavior:
// - If a taxon with the same identifier already exists and is different from the
// new taxon, the alias count is decremented.
func (set *TaxonSet) Insert(taxon *TaxNode) {
if old := set.set[taxon.id]; old != nil && old.id != taxon.id {
func (set *TaxonSet) Insert(node *TaxNode) {
if old := set.set[node.id]; old != nil && old.id != node.id {
set.nalias--
}
set.set[taxon.id] = taxon
set.set[node.id] = node
}
// Taxonomy returns a pointer to the Taxonomy[T] instance that this TaxonSet belongs to.
func (set *TaxonSet) InsertTaxon(taxon *Taxon) {
if set.taxonomy != taxon.Taxonomy {
log.Fatalf(
"Cannot insert taxon %s into taxon set belonging %s taxonomy",
taxon.String(),
set.taxonomy.name,
)
}
}
// Taxonomy returns a pointer to the Taxonomy instance that this TaxonSet belongs to.
//
// Returns:
// - A pointer to the Taxonomy[T] instance that this TaxonSet belongs to
// - A pointer to the Taxonomy instance that this TaxonSet belongs to.
func (set *TaxonSet) Taxonomy() *Taxonomy {
return set.taxonomy
}
@ -68,18 +100,18 @@ func (set *TaxonSet) Taxonomy() *Taxonomy {
// If the original taxon is not part of the taxon set, it logs a fatal error and terminates the program.
//
// Parameters:
// - alias: A string representing the alias to be associated with the taxon node.
// - node: A pointer to the TaxNode[T] instance that the alias will refer to.
// - alias: A pointer to a string representing the alias to be associated with the taxon node.
// - node: A pointer to the TaxNode instance that the alias will refer to.
//
// Behavior:
// - If the original taxon corresponding to the alias is not part of the taxon set,
// the method will log a fatal error and terminate the program.
func (set *TaxonSet) Alias(id string, node *TaxNode) {
original := set.Get(node.id)
if original != nil {
func (set *TaxonSet) Alias(id *string, taxon *Taxon) {
original := set.Get(taxon.Node.id)
if original == nil {
log.Fatalf("Original taxon %v is not part of taxon set", id)
}
set.set[id] = node
set.set[id] = taxon.Node
set.nalias++
}
@ -88,39 +120,39 @@ func (set *TaxonSet) Alias(id string, node *TaxNode) {
// node exists and its identifier is different from the provided identifier; otherwise, it returns false.
//
// Parameters:
// - id: The identifier of type T to be checked for alias status.
// - id: A pointer to the identifier to be checked for alias status.
//
// Returns:
// - A boolean indicating whether the identifier corresponds to an alias in the set.
func (set *TaxonSet) IsAlias(id string) bool {
node := set.Get(id)
return node != nil && node.id != id
func (set *TaxonSet) IsAlias(id *string) bool {
taxon := set.Get(id)
return taxon != nil && taxon.Node.id != id
}
// IsATaxon checks if the given ID corresponds to a valid taxon node in the TaxonSet.
// It returns true if the node exists and its ID matches the provided ID; otherwise, it returns false.
// id corresponding to alias returns false.
// If the ID corresponds to an alias, it will return false.
//
// Parameters:
// - id: The identifier of the taxon to check.
// - id: A pointer to the identifier of the taxon to check.
//
// Returns:
// - A boolean indicating whether the specified ID corresponds to a valid taxon node.
func (set *TaxonSet) IsATaxon(id string) bool {
node := set.Get(id)
return node != nil && node.id == id
func (set *TaxonSet) IsATaxon(id *string) bool {
taxon := set.Get(id)
return taxon != nil && taxon.Node.id == id
}
// Contains checks if the TaxonSet contains a taxon node with the specified ID.
// It returns true if the node exists in the set; otherwise, it returns false.
// id corresponding to alias or true taxa returns true.
// If the ID corresponds to an alias, it will return true if the alias exists.
//
// Parameters:
// - id: The identifier of the taxon to check for presence in the set.
// - id: A pointer to the identifier of the taxon to check for presence in the set.
//
// Returns:
// - A boolean indicating whether the TaxonSet contains a taxon node with the specified ID.
func (set *TaxonSet) Contains(id string) bool {
func (set *TaxonSet) Contains(id *string) bool {
node := set.Get(id)
return node != nil
}

View File

@ -3,6 +3,8 @@ package obitax
import (
"bytes"
"fmt"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
)
// TaxonSlice represents a slice of TaxNode[T] instances within a taxonomy.
@ -16,6 +18,13 @@ type TaxonSlice struct {
taxonomy *Taxonomy
}
func (taxonomy *Taxonomy) NewTaxonSlice(size, capacity int) *TaxonSlice {
return &TaxonSlice{
slice: make([]*TaxNode, size, capacity),
taxonomy: taxonomy,
}
}
// Get retrieves the TaxNode[T] at the specified index from the TaxonSlice.
// It returns the taxon node corresponding to the provided index.
//
@ -25,6 +34,9 @@ type TaxonSlice struct {
// Returns:
// - A pointer to the TaxNode[T] at the specified index in the slice.
func (slice *TaxonSlice) Get(i int) *TaxNode {
if slice == nil {
return nil
}
return slice.slice[i]
}
@ -34,6 +46,9 @@ func (slice *TaxonSlice) Get(i int) *TaxNode {
// Returns:
// - An integer representing the total number of taxon nodes in the TaxonSlice.
func (slice *TaxonSlice) Len() int {
if slice == nil {
return 0
}
return len(slice.slice)
}
@ -65,3 +80,19 @@ func (path *TaxonSlice) String() string {
return buffer.String()
}
func (slice *TaxonSlice) Reverse(inplace bool) *TaxonSlice {
if slice == nil {
return nil
}
rep := obiutils.Reverse(slice.slice, inplace)
if inplace {
return slice
}
return &TaxonSlice{
taxonomy: slice.taxonomy,
slice: rep,
}
}