a functional new version of obifind

This commit is contained in:
Eric Coissac
2024-11-24 19:33:24 +01:00
parent 36327c79c8
commit 3d06978808
21 changed files with 1805 additions and 343 deletions

View File

@ -14,18 +14,8 @@ import (
//
// Returns:
// - A pointer to a new ITaxon iterator containing only the Taxon instances that match the specified name.
func (taxonomy *Taxonomy) IFilterOnName(name string, strict bool) *ITaxon {
if strict {
nodes, ok := taxonomy.index[taxonomy.names.Innerize(name)]
if ok {
return nodes.Iterator()
} else {
empty := taxonomy.NewTaxonSet()
return empty.Iterator()
}
}
return taxonomy.Iterator().IFilterOnName(name, strict)
func (taxonomy *Taxonomy) IFilterOnName(name string, strict bool, ingnoreCase bool) *ITaxon {
return taxonomy.Iterator().IFilterOnName(name, strict, ingnoreCase)
}
// IFilterOnName filters the Taxon instances in the iterator based on the specified name.
@ -38,7 +28,7 @@ func (taxonomy *Taxonomy) IFilterOnName(name string, strict bool) *ITaxon {
//
// Returns:
// - A pointer to a new ITaxon iterator containing only the Taxon instances that match the specified name.
func (iterator *ITaxon) IFilterOnName(name string, strict bool) *ITaxon {
func (iterator *ITaxon) IFilterOnName(name string, strict bool, ignoreCase bool) *ITaxon {
newIterator := NewITaxon()
sentTaxa := make(map[*string]bool)
@ -48,16 +38,21 @@ func (iterator *ITaxon) IFilterOnName(name string, strict bool) *ITaxon {
taxon := iterator.Get()
node := taxon.Node
if _, ok := sentTaxa[node.id]; !ok {
if taxon.IsNameEqual(name) {
if taxon.IsNameEqual(name, ignoreCase) {
sentTaxa[node.id] = true
newIterator.source <- taxon
newIterator.Push(taxon)
}
}
}
close(newIterator.source)
}()
} else {
pattern := regexp.MustCompile(name)
var pattern *regexp.Regexp
if ignoreCase {
pattern = regexp.MustCompile("(?i)" + name)
} else {
pattern = regexp.MustCompile(name)
}
go func() {
for iterator.Next() {
@ -66,11 +61,11 @@ func (iterator *ITaxon) IFilterOnName(name string, strict bool) *ITaxon {
if _, ok := sentTaxa[node.id]; !ok {
if taxon.IsNameMatching(pattern) {
sentTaxa[node.id] = true
newIterator.source <- taxon
newIterator.Push(taxon)
}
}
}
close(newIterator.source)
newIterator.Close()
}()
}

View File

@ -28,10 +28,12 @@ func (set *TaxonSet) Iterator() *ITaxon {
go func() {
for _, t := range set.set {
i.source <- &Taxon{
taxon := &Taxon{
Taxonomy: set.taxonomy,
Metadata: nil,
Node: t,
}
i.Push(taxon)
}
close(i.source)
}()
@ -46,17 +48,25 @@ func (set *TaxonSlice) Iterator() *ITaxon {
go func() {
for _, t := range set.slice {
i.source <- &Taxon{
i.Push(&Taxon{
Taxonomy: set.taxonomy,
Node: t,
}
})
}
close(i.source)
i.Close()
}()
return i
}
func (iterator *ITaxon) Push(taxon *Taxon) {
iterator.source <- taxon
}
func (iterator *ITaxon) Close() {
close(iterator.source)
}
// Iterator creates a new ITaxon iterator for the Taxonomy's nodes.
func (taxonomy *Taxonomy) Iterator() *ITaxon {
return taxonomy.nodes.Iterator()
@ -83,17 +93,28 @@ func (iterator *ITaxon) Next() bool {
// Get returns the current Taxon instance pointed to by the iterator.
// You must call 'Next' before calling 'Get' to retrieve the next instance.
func (iterator *ITaxon) Get() *Taxon {
if iterator == nil {
return nil
}
return iterator.current
}
// Finished returns true if no more data is available from the iterator.
func (iterator *ITaxon) Finished() bool {
if iterator == nil {
return true
}
return *iterator.p_finished
}
// Split creates a new ITaxon iterator that shares the same source channel
// and finished status as the original iterator.
func (iterator *ITaxon) Split() *ITaxon {
if iterator == nil {
return nil
}
return &ITaxon{
source: iterator.source,
current: nil,
@ -101,3 +122,49 @@ func (iterator *ITaxon) Split() *ITaxon {
p_finished: iterator.p_finished,
}
}
func (iterator *ITaxon) AddMetadata(name string, value interface{}) *ITaxon {
if iterator == nil {
return nil
}
i := NewITaxon()
go func() {
for iterator.Next() {
taxon := iterator.Get()
taxon.SetMetadata(name, value)
i.Push(taxon)
}
i.Close()
}()
return i
}
func (iterator *ITaxon) Concat(iterators ...*ITaxon) *ITaxon {
newIter := NewITaxon()
go func() {
if iterator != nil {
for iterator.Next() {
taxon := iterator.Get()
newIter.Push(taxon)
}
}
for _, iter := range iterators {
if iter != nil {
for iter.Next() {
taxon := iter.Get()
newIter.Push(taxon)
}
}
}
newIter.Close()
}()
return newIter
}

View File

@ -4,6 +4,7 @@ import (
"iter"
"regexp"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
log "github.com/sirupsen/logrus"
)
@ -15,6 +16,7 @@ import (
// - Node: A pointer to the TaxNode instance representing the specific taxon.
type Taxon struct {
Taxonomy *Taxonomy
Metadata *map[string]*interface{}
Node *TaxNode
}
@ -70,12 +72,12 @@ func (taxon *Taxon) Name(class string) string {
//
// Returns:
// - A boolean indicating whether the names are equal.
func (taxon *Taxon) IsNameEqual(name string) bool {
func (taxon *Taxon) IsNameEqual(name string, ignoreCase bool) bool {
if taxon == nil {
return false
}
return taxon.Node.IsNameEqual(name)
return taxon.Node.IsNameEqual(name, ignoreCase)
}
// IsNameMatching checks if the name of the Taxon matches the given regular expression pattern.
@ -283,3 +285,89 @@ func (taxon *Taxon) Genus() *Taxon {
func (taxon *Taxon) Family() *Taxon {
return taxon.TaxonAtRank("family")
}
func (taxon *Taxon) SetMetadata(name string, value interface{}) *Taxon {
if taxon == nil {
return nil
}
if taxon.Metadata == nil {
m := make(map[string]*interface{})
taxon.Metadata = &m
}
(*taxon.Metadata)[name] = &value
return taxon
}
func (taxon *Taxon) GetMetadata(name string) *interface{} {
if taxon == nil || taxon.Metadata == nil {
return nil
}
return (*taxon.Metadata)[name]
}
func (taxon *Taxon) HasMetadata(name string) bool {
if taxon == nil || taxon.Metadata == nil {
return false
}
_, ok := (*taxon.Metadata)[name]
return ok
}
func (taxon *Taxon) RemoveMetadata(name string) {
if taxon == nil || taxon.Metadata == nil {
return
}
delete(*taxon.Metadata, name)
}
func (taxon *Taxon) MetadataAsString(name string) string {
meta := taxon.GetMetadata(name)
if meta == nil {
return ""
}
value, err := obiutils.InterfaceToString(*meta)
if err != nil {
return ""
}
return value
}
func (taxon *Taxon) MetadataKeys() []string {
if taxon == nil || taxon.Metadata == nil {
return nil
}
keys := make([]string, 0, len(*taxon.Metadata))
for k := range *taxon.Metadata {
keys = append(keys, k)
}
return keys
}
func (taxon *Taxon) MetadataValues() []interface{} {
if taxon == nil || taxon.Metadata == nil {
return nil
}
values := make([]interface{}, 0, len(*taxon.Metadata))
for _, v := range *taxon.Metadata {
values = append(values, v)
}
return values
}
func (taxon *Taxon) MetadataStringValues() []string {
if taxon == nil || taxon.Metadata == nil {
return nil
}
values := make([]string, 0, len(*taxon.Metadata))
for _, v := range *taxon.Metadata {
value, err := obiutils.InterfaceToString(v)
if err != nil {
value = ""
}
values = append(values, value)
}
return values
}

View File

@ -2,8 +2,10 @@ package obitax
import (
"fmt"
"log"
"regexp"
"strings"
log "github.com/sirupsen/logrus"
)
// TaxNode represents a single taxon in a taxonomy.
@ -161,13 +163,17 @@ func (node *TaxNode) Rank() string {
// Returns:
// - A boolean indicating whether the provided name is equal to the scientific name or exists
// as an alternate name for the taxon.
func (node *TaxNode) IsNameEqual(name string) bool {
if *(node.scientificname) == name {
func (node *TaxNode) IsNameEqual(name string, ignoreCase bool) bool {
if node == nil {
return false
}
if *(node.scientificname) == name || (ignoreCase && strings.EqualFold(*(node.scientificname), name)) {
return true
}
if node.alternatenames != nil {
for _, n := range *node.alternatenames {
if n != nil && *n == name {
if n != nil && (ignoreCase && strings.EqualFold(*n, name)) {
return true
}
}

View File

@ -84,14 +84,14 @@ func (path *TaxonSlice) String() string {
if path.Len() > 0 {
taxon := path.slice[path.Len()-1]
fmt.Fprintf(&buffer, "%v@%s@%s",
taxon.Id(),
*taxon.Id(),
taxon.ScientificName(),
taxon.Rank())
for i := path.Len() - 2; i >= 0; i-- {
taxon := path.slice[i]
fmt.Fprintf(&buffer, "|%v@%s@%s",
taxon.Id(),
*taxon.Id(),
taxon.ScientificName(),
taxon.Rank())
}