mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Fisrt functional version
This commit is contained in:
@ -22,26 +22,26 @@ func main() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
switch {
|
switch {
|
||||||
case obifind.CLIRequestsPathForTaxid() >= 0:
|
case obifind.CLIRequestsPathForTaxid() != "NA":
|
||||||
taxonomy, err := obifind.CLILoadSelectedTaxonomy()
|
taxonomy, err := obifind.CLILoadSelectedTaxonomy()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Printf("%+v", err)
|
fmt.Printf("%+v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
taxon, err := taxonomy.Taxon(obifind.CLIRequestsPathForTaxid())
|
taxon := taxonomy.Taxon(obifind.CLIRequestsPathForTaxid())
|
||||||
|
|
||||||
if err != nil {
|
if taxon == nil {
|
||||||
fmt.Printf("%+v", err)
|
fmt.Printf("%+v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
s, err := taxon.Path()
|
s := taxon.Path()
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Printf("%+v", err)
|
fmt.Printf("%+v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
obifind.TaxonWriter(s.Iterator(),
|
obifind.TaxonWriter(s.Iterator(),
|
||||||
fmt.Sprintf("path:%d", taxon.Taxid()))
|
fmt.Sprintf("path:%s", taxon.String()))
|
||||||
|
|
||||||
case len(args) == 0:
|
case len(args) == 0:
|
||||||
taxonomy, err := obifind.CLILoadSelectedTaxonomy()
|
taxonomy, err := obifind.CLILoadSelectedTaxonomy()
|
||||||
|
@ -30,15 +30,11 @@ func CSVRecord(sequence *obiseq.BioSequence, opt Options) []string {
|
|||||||
|
|
||||||
if opt.CSVTaxon() {
|
if opt.CSVTaxon() {
|
||||||
taxid := sequence.Taxid()
|
taxid := sequence.Taxid()
|
||||||
sn, ok := sequence.GetAttribute("scientific_name")
|
sn, ok := sequence.GetStringAttribute("scientific_name")
|
||||||
|
|
||||||
if !ok {
|
if !ok {
|
||||||
if taxid == 1 {
|
|
||||||
sn = "root"
|
|
||||||
} else {
|
|
||||||
sn = opt.CSVNAValue()
|
sn = opt.CSVNAValue()
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
record = append(record, fmt.Sprint(taxid), fmt.Sprint(sn))
|
record = append(record, fmt.Sprint(taxid), fmt.Sprint(sn))
|
||||||
}
|
}
|
||||||
|
@ -7,7 +7,6 @@ import (
|
|||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"path"
|
"path"
|
||||||
"strconv"
|
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
@ -26,24 +25,16 @@ func loadNodeTable(reader io.Reader, taxonomy *obitax.Taxonomy) {
|
|||||||
|
|
||||||
for record, err := file.Read(); err == nil; record, err = file.Read() {
|
for record, err := file.Read(); err == nil; record, err = file.Read() {
|
||||||
n++
|
n++
|
||||||
taxid, err := strconv.Atoi(strings.TrimSpace(record[0]))
|
taxid := strings.TrimSpace(record[0])
|
||||||
|
parent := strings.TrimSpace(record[1])
|
||||||
if err != nil {
|
|
||||||
log.Panicf("Cannot read taxon taxid at line %d: %v", n, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
parent, err := strconv.Atoi(strings.TrimSpace(record[1]))
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
log.Panicf("Cannot read taxon parent taxid at line %d: %v", n, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
rank := strings.TrimSpace(record[2])
|
rank := strings.TrimSpace(record[2])
|
||||||
|
|
||||||
taxonomy.AddNewTaxa(taxid, parent, rank, true, true)
|
_, err := taxonomy.AddTaxon(taxid, parent, rank, taxid == "1", false)
|
||||||
}
|
|
||||||
|
|
||||||
taxonomy.ReindexParent()
|
if err != nil {
|
||||||
|
log.Fatalf("Error adding taxon %s: %v\n", taxid, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func loadNameTable(reader io.Reader, taxonomy *obitax.Taxonomy, onlysn bool) int {
|
func loadNameTable(reader io.Reader, taxonomy *obitax.Taxonomy, onlysn bool) int {
|
||||||
@ -65,18 +56,14 @@ func loadNameTable(reader io.Reader, taxonomy *obitax.Taxonomy, onlysn bool) int
|
|||||||
}
|
}
|
||||||
|
|
||||||
record := strings.Split(string(line), "|")
|
record := strings.Split(string(line), "|")
|
||||||
taxid, err := strconv.Atoi(strings.TrimSpace(record[0]))
|
taxid := strings.TrimSpace(record[0])
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
log.Panicf("Cannot read taxon name taxid at line %d: %v", l, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
name := strings.TrimSpace(record[1])
|
name := strings.TrimSpace(record[1])
|
||||||
classname := strings.TrimSpace(record[3])
|
classname := strings.TrimSpace(record[3])
|
||||||
|
|
||||||
if !onlysn || classname == "scientific name" {
|
if !onlysn || classname == "scientific name" {
|
||||||
n++
|
n++
|
||||||
taxonomy.AddNewName(taxid, &name, &classname)
|
taxonomy.Taxon(taxid).SetName(name, classname)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -94,18 +81,10 @@ func loadMergedTable(reader io.Reader, taxonomy *obitax.Taxonomy) int {
|
|||||||
|
|
||||||
for record, err := file.Read(); err == nil; record, err = file.Read() {
|
for record, err := file.Read(); err == nil; record, err = file.Read() {
|
||||||
n++
|
n++
|
||||||
oldtaxid, err := strconv.Atoi(strings.TrimSpace(record[0]))
|
oldtaxid := strings.TrimSpace(record[0])
|
||||||
|
newtaxid := strings.TrimSpace(record[1])
|
||||||
|
|
||||||
if err != nil {
|
taxonomy.AddAlias(newtaxid, oldtaxid, false)
|
||||||
log.Panicf("Cannot read alias taxid at line %d: %v", n, err)
|
|
||||||
}
|
|
||||||
newtaxid, err := strconv.Atoi(strings.TrimSpace(record[1]))
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
log.Panicf("Cannot read alias new taxid at line %d: %v", n, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
taxonomy.AddNewAlias(newtaxid, oldtaxid)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return n
|
return n
|
||||||
@ -113,7 +92,7 @@ func loadMergedTable(reader io.Reader, taxonomy *obitax.Taxonomy) int {
|
|||||||
|
|
||||||
func LoadNCBITaxDump(directory string, onlysn bool) (*obitax.Taxonomy, error) {
|
func LoadNCBITaxDump(directory string, onlysn bool) (*obitax.Taxonomy, error) {
|
||||||
|
|
||||||
taxonomy := obitax.NewTaxonomy()
|
taxonomy := obitax.NewTaxonomy("NCBI Taxonomy", "taxon", "[[:digit:]]")
|
||||||
|
|
||||||
//
|
//
|
||||||
// Load the Taxonomy nodes
|
// Load the Taxonomy nodes
|
||||||
|
@ -147,10 +147,10 @@ func bioSequenceGetSetCount(luaState *lua.LState) int {
|
|||||||
func bioSequenceGetSetTaxid(luaState *lua.LState) int {
|
func bioSequenceGetSetTaxid(luaState *lua.LState) int {
|
||||||
s := checkBioSequence(luaState)
|
s := checkBioSequence(luaState)
|
||||||
if luaState.GetTop() == 2 {
|
if luaState.GetTop() == 2 {
|
||||||
s.SetTaxid(luaState.CheckInt(2))
|
s.SetTaxid(luaState.CheckString(2))
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
luaState.Push(lua.LNumber(s.Taxid()))
|
luaState.Push(lua.LString(s.Taxid()))
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@ import (
|
|||||||
// TODO: The version number is extracted from git. This induces that the version
|
// TODO: The version number is extracted from git. This induces that the version
|
||||||
// corresponds to the last commit, and not the one when the file will be
|
// corresponds to the last commit, and not the one when the file will be
|
||||||
// commited
|
// commited
|
||||||
var _Commit = "3e00d39"
|
var _Commit = "9471fed"
|
||||||
var _Version = "Release 4.2.0"
|
var _Version = "Release 4.2.0"
|
||||||
|
|
||||||
// Version returns the version of the obitools package.
|
// Version returns the version of the obitools package.
|
||||||
|
@ -9,17 +9,18 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
// AttributeKeys returns the keys of the attributes in the BioSequence.
|
// AttributeKeys returns the keys of the attributes in the BioSequence.
|
||||||
|
// It optionally skips keys associated with container values based on the skip_container parameter.
|
||||||
//
|
//
|
||||||
// It does not take any parameters.
|
// Parameters:
|
||||||
|
// - skip_container: A boolean indicating whether to skip keys associated with a container value.
|
||||||
//
|
//
|
||||||
// Returns:
|
// Returns:
|
||||||
//
|
// - A set of strings containing the keys of the BioSequence attributes.
|
||||||
// []string: The keys of the BioSequence.
|
func (s *BioSequence) AttributeKeys(skip_container bool) obiutils.Set[string] {
|
||||||
func (s *BioSequence) AttributeKeys(skip_map bool) obiutils.Set[string] {
|
|
||||||
keys := obiutils.MakeSet[string]()
|
keys := obiutils.MakeSet[string]()
|
||||||
|
|
||||||
for k, v := range s.Annotations() {
|
for k, v := range s.Annotations() {
|
||||||
if !skip_map || !obiutils.IsAMap(v) {
|
if !skip_container || !obiutils.IsAContainer(v) {
|
||||||
keys.Add(k)
|
keys.Add(k)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -27,17 +28,18 @@ func (s *BioSequence) AttributeKeys(skip_map bool) obiutils.Set[string] {
|
|||||||
return keys
|
return keys
|
||||||
}
|
}
|
||||||
|
|
||||||
// Keys returns the keys of the BioSequence.
|
// Keys returns the keys of the BioSequence, including standard keys and attribute keys.
|
||||||
//
|
//
|
||||||
// It returns a slice of strings containing the keys of the BioSequence.
|
// It returns a set of strings containing the keys of the BioSequence.
|
||||||
// The keys include "id", "sequence", "qualities", and the attribute keys
|
// The keys include "id", "sequence", "qualities", and the attribute keys of the BioSequence.
|
||||||
// of the BioSequence.
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - skip_container: A boolean indicating whether to skip keys associated with container values.
|
||||||
//
|
//
|
||||||
// Returns:
|
// Returns:
|
||||||
//
|
// - A set of strings containing the keys of the BioSequence.
|
||||||
// []string: The keys of the BioSequence.
|
func (s *BioSequence) Keys(skip_container bool) obiutils.Set[string] {
|
||||||
func (s *BioSequence) Keys(skip_map bool) obiutils.Set[string] {
|
keys := s.AttributeKeys(skip_container)
|
||||||
keys := s.AttributeKeys(skip_map)
|
|
||||||
keys.Add("id")
|
keys.Add("id")
|
||||||
|
|
||||||
if s.HasSequence() {
|
if s.HasSequence() {
|
||||||
@ -53,10 +55,10 @@ func (s *BioSequence) Keys(skip_map bool) obiutils.Set[string] {
|
|||||||
// HasAttribute checks if the BioSequence has the specified attribute.
|
// HasAttribute checks if the BioSequence has the specified attribute.
|
||||||
//
|
//
|
||||||
// Parameters:
|
// Parameters:
|
||||||
// - key: a string representing the attribute key to check.
|
// - key: A string representing the attribute key to check.
|
||||||
//
|
//
|
||||||
// Returns:
|
// Returns:
|
||||||
// - a boolean indicating whether the BioSequence has the attribute.
|
// - A boolean indicating whether the BioSequence has the attribute.
|
||||||
func (s *BioSequence) HasAttribute(key string) bool {
|
func (s *BioSequence) HasAttribute(key string) bool {
|
||||||
if key == "id" {
|
if key == "id" {
|
||||||
return true
|
return true
|
||||||
@ -386,31 +388,14 @@ func (s *BioSequence) SetCount(count int) {
|
|||||||
s.SetAttribute("count", count)
|
s.SetAttribute("count", count)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Taxid returns the taxonomic ID associated with the BioSequence.
|
|
||||||
//
|
|
||||||
// It retrieves the "taxid" attribute from the BioSequence's attributes map.
|
|
||||||
// If the attribute is not found, the function returns 1 as the default taxonomic ID.
|
|
||||||
// The taxid 1 corresponds to the root taxonomic level.
|
|
||||||
//
|
|
||||||
// The function returns an integer representing the taxonomic ID.
|
|
||||||
func (s *BioSequence) Taxid() int {
|
|
||||||
taxid, ok := s.GetIntAttribute("taxid")
|
|
||||||
|
|
||||||
if !ok {
|
|
||||||
taxid = 1
|
|
||||||
}
|
|
||||||
|
|
||||||
return taxid
|
|
||||||
}
|
|
||||||
|
|
||||||
// SetTaxid sets the taxid for the BioSequence.
|
// SetTaxid sets the taxid for the BioSequence.
|
||||||
//
|
//
|
||||||
// Parameters:
|
// Parameters:
|
||||||
//
|
//
|
||||||
// taxid - the taxid to set.
|
// taxid - the taxid to set.
|
||||||
func (s *BioSequence) SetTaxid(taxid int) {
|
func (s *BioSequence) SetTaxid(taxid string) {
|
||||||
if taxid < 1 {
|
if taxid == "" {
|
||||||
taxid = 1
|
taxid = "NA"
|
||||||
}
|
}
|
||||||
s.SetAttribute("taxid", taxid)
|
s.SetAttribute("taxid", taxid)
|
||||||
}
|
}
|
||||||
|
@ -18,6 +18,7 @@ import (
|
|||||||
"unsafe"
|
"unsafe"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
@ -63,6 +64,7 @@ type BioSequence struct {
|
|||||||
sequence []byte // The sequence itself, it is accessible by the methode Sequence
|
sequence []byte // The sequence itself, it is accessible by the methode Sequence
|
||||||
qualities []byte // The quality scores of the sequence.
|
qualities []byte // The quality scores of the sequence.
|
||||||
feature []byte
|
feature []byte
|
||||||
|
taxon *obitax.Taxon
|
||||||
paired *BioSequence // A pointer to the paired sequence
|
paired *BioSequence // A pointer to the paired sequence
|
||||||
revcomp *BioSequence // A pointer to the reverse complemented sequence
|
revcomp *BioSequence // A pointer to the reverse complemented sequence
|
||||||
annotations Annotation
|
annotations Annotation
|
||||||
@ -90,6 +92,7 @@ func NewEmptyBioSequence(preallocate int) *BioSequence {
|
|||||||
sequence: seq,
|
sequence: seq,
|
||||||
qualities: nil,
|
qualities: nil,
|
||||||
feature: nil,
|
feature: nil,
|
||||||
|
taxon: nil,
|
||||||
paired: nil,
|
paired: nil,
|
||||||
revcomp: nil,
|
revcomp: nil,
|
||||||
annotations: nil,
|
annotations: nil,
|
||||||
@ -223,7 +226,7 @@ func (s *BioSequence) HasDefinition() bool {
|
|||||||
// No parameters.
|
// No parameters.
|
||||||
// Returns a boolean.
|
// Returns a boolean.
|
||||||
func (s *BioSequence) HasSequence() bool {
|
func (s *BioSequence) HasSequence() bool {
|
||||||
return s.sequence != nil && len(s.sequence) > 0
|
return len(s.sequence) > 0
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sequence returns the sequence of the BioSequence.
|
// Sequence returns the sequence of the BioSequence.
|
||||||
@ -258,7 +261,7 @@ func (s *BioSequence) Len() int {
|
|||||||
// This function does not have any parameters.
|
// This function does not have any parameters.
|
||||||
// It returns a boolean value indicating whether the BioSequence has qualities.
|
// It returns a boolean value indicating whether the BioSequence has qualities.
|
||||||
func (s *BioSequence) HasQualities() bool {
|
func (s *BioSequence) HasQualities() bool {
|
||||||
return s.qualities != nil && len(s.qualities) > 0
|
return len(s.qualities) > 0
|
||||||
}
|
}
|
||||||
|
|
||||||
// Qualities returns the sequence quality scores of the BioSequence.
|
// Qualities returns the sequence quality scores of the BioSequence.
|
||||||
|
79
pkg/obiseq/taxonomy_classifier.go
Normal file
79
pkg/obiseq/taxonomy_classifier.go
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
package obiseq
|
||||||
|
|
||||||
|
import (
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TaxonomyClassifier is a function that creates a new instance of the BioSequenceClassifier
|
||||||
|
// for taxonomic classification based on a given taxonomic rank, taxonomy, and abort flag.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - taxonomicRank: the taxonomic rank to classify the sequences at.
|
||||||
|
// - taxonomy: the taxonomy object used for classification.
|
||||||
|
// - abortOnMissing: a flag indicating whether to abort if a taxon is missing in the taxonomy.
|
||||||
|
//
|
||||||
|
// Return:
|
||||||
|
// - *obiseq.BioSequenceClassifier: the new instance of the BioSequenceClassifier.
|
||||||
|
func TaxonomyClassifier(taxonomicRank string,
|
||||||
|
taxonomy *obitax.Taxonomy,
|
||||||
|
abortOnMissing bool) *BioSequenceClassifier {
|
||||||
|
|
||||||
|
keys := make(map[*obitax.TaxNode]int)
|
||||||
|
codes := make([]*obitax.TaxNode, 1)
|
||||||
|
codes[0] = nil
|
||||||
|
keys[nil] = 0
|
||||||
|
|
||||||
|
code := func(sequence *BioSequence) int {
|
||||||
|
taxon := sequence.Taxon(taxonomy)
|
||||||
|
if taxon != nil {
|
||||||
|
ttaxon := taxon.TaxonAtRank(taxonomicRank)
|
||||||
|
if abortOnMissing && ttaxon == nil {
|
||||||
|
log.Fatalf("Taxon at rank %s not found in taxonomy for taxid %d", taxonomicRank, taxon.String())
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if abortOnMissing {
|
||||||
|
log.Fatalf("Sequence %s: Taxid %s not found in taxonomy",
|
||||||
|
sequence.Id(),
|
||||||
|
sequence.Taxid())
|
||||||
|
}
|
||||||
|
taxon = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
k, ok := keys[taxon.Node]
|
||||||
|
|
||||||
|
if ok {
|
||||||
|
return k
|
||||||
|
}
|
||||||
|
|
||||||
|
k = len(codes)
|
||||||
|
keys[taxon.Node] = k
|
||||||
|
codes = append(codes, taxon.Node)
|
||||||
|
|
||||||
|
return k
|
||||||
|
}
|
||||||
|
|
||||||
|
value := func(k int) string {
|
||||||
|
taxon := codes[k]
|
||||||
|
return taxon.ScientificName()
|
||||||
|
}
|
||||||
|
|
||||||
|
reset := func() {
|
||||||
|
keys = make(map[*obitax.TaxNode]int)
|
||||||
|
codes = make([]*obitax.TaxNode, 1)
|
||||||
|
codes[0] = nil
|
||||||
|
keys[nil] = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
clone := func() *BioSequenceClassifier {
|
||||||
|
return TaxonomyClassifier(taxonomicRank, taxonomy, abortOnMissing)
|
||||||
|
}
|
||||||
|
|
||||||
|
c := BioSequenceClassifier{
|
||||||
|
Code: code,
|
||||||
|
Value: value,
|
||||||
|
Reset: reset,
|
||||||
|
Clone: clone,
|
||||||
|
Type: "TaxonomyClassifier"}
|
||||||
|
return &c
|
||||||
|
}
|
131
pkg/obiseq/taxonomy_lca.go
Normal file
131
pkg/obiseq/taxonomy_lca.go
Normal file
@ -0,0 +1,131 @@
|
|||||||
|
package obiseq
|
||||||
|
|
||||||
|
import (
|
||||||
|
"math"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
)
|
||||||
|
|
||||||
|
func (sequence *BioSequence) TaxonomicDistribution(taxonomy *obitax.Taxonomy) map[*obitax.TaxNode]int {
|
||||||
|
taxids := sequence.StatsOn(MakeStatsOnDescription("taxid"), "na")
|
||||||
|
taxons := make(map[*obitax.TaxNode]int, len(taxids))
|
||||||
|
|
||||||
|
for taxid, v := range taxids {
|
||||||
|
t := taxonomy.Taxon(taxid)
|
||||||
|
if t == nil {
|
||||||
|
log.Fatalf(
|
||||||
|
"On sequence %s taxid %s is not defined in taxonomy: %s",
|
||||||
|
sequence.Id(),
|
||||||
|
taxid,
|
||||||
|
taxonomy.Name())
|
||||||
|
}
|
||||||
|
taxons[t.Node] = v
|
||||||
|
}
|
||||||
|
return taxons
|
||||||
|
}
|
||||||
|
|
||||||
|
func (sequence *BioSequence) LCA(taxonomy *obitax.Taxonomy, threshold float64) (*obitax.Taxon, float64, int) {
|
||||||
|
taxons := sequence.TaxonomicDistribution(taxonomy)
|
||||||
|
paths := make(map[*obitax.TaxNode]*obitax.TaxonSlice, len(taxons))
|
||||||
|
answer := (*obitax.TaxNode)(nil)
|
||||||
|
rans := 1.0
|
||||||
|
granTotal := 0
|
||||||
|
|
||||||
|
for t, w := range taxons {
|
||||||
|
p := (&obitax.Taxon{Taxonomy: taxonomy,
|
||||||
|
Node: t,
|
||||||
|
}).Path()
|
||||||
|
if p == nil {
|
||||||
|
log.Panicf("Sequence %s: taxonomic path cannot be retreived from Taxid %d : %v", sequence.Id(), t.String(taxonomy.Code()))
|
||||||
|
}
|
||||||
|
|
||||||
|
p.Reverse(true)
|
||||||
|
paths[t] = p
|
||||||
|
answer = p.Get(0)
|
||||||
|
granTotal += w
|
||||||
|
}
|
||||||
|
|
||||||
|
rmax := 1.0
|
||||||
|
levels := make(map[*obitax.TaxNode]int, len(paths))
|
||||||
|
taxonMax := answer
|
||||||
|
|
||||||
|
for i := 0; rmax >= threshold; i++ {
|
||||||
|
answer = taxonMax
|
||||||
|
rans = rmax
|
||||||
|
taxonMax = nil
|
||||||
|
total := 0
|
||||||
|
for taxon, weight := range taxons {
|
||||||
|
path := paths[taxon]
|
||||||
|
if path.Len() > i {
|
||||||
|
levels[path.Get(i)] += weight
|
||||||
|
}
|
||||||
|
total += weight
|
||||||
|
}
|
||||||
|
weighMax := 0
|
||||||
|
for taxon, weight := range levels {
|
||||||
|
if weight > weighMax {
|
||||||
|
weighMax = weight
|
||||||
|
taxonMax = taxon
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if total > 0 {
|
||||||
|
rmax *= float64(weighMax) / float64(total)
|
||||||
|
} else {
|
||||||
|
rmax = 0.0
|
||||||
|
}
|
||||||
|
|
||||||
|
for taxon := range levels {
|
||||||
|
delete(levels, taxon)
|
||||||
|
}
|
||||||
|
for taxon := range taxons {
|
||||||
|
path := paths[taxon]
|
||||||
|
if i < path.Len() {
|
||||||
|
if path.Get(i) != taxonMax {
|
||||||
|
delete(paths, taxon)
|
||||||
|
delete(taxons, taxon)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// if taxonMax != nil {
|
||||||
|
// log.Println("@@@>", i, taxonMax.ScientificName(), taxonMax.Taxid(), rans, weighMax, total, rmax)
|
||||||
|
// } else {
|
||||||
|
// log.Println("@@@>", "--", 0, rmax)
|
||||||
|
// }
|
||||||
|
}
|
||||||
|
// log.Println("###>", answer.ScientificName(), answer.Taxid(), rans)
|
||||||
|
// log.Print("========================================")
|
||||||
|
return &obitax.Taxon{Taxonomy: taxonomy, Node: answer}, rans, granTotal
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func AddLCAWorker(taxonomy *obitax.Taxonomy, slot_name string, threshold float64) SeqWorker {
|
||||||
|
|
||||||
|
if !strings.HasSuffix(slot_name, "taxid") {
|
||||||
|
slot_name = slot_name + "_taxid"
|
||||||
|
}
|
||||||
|
|
||||||
|
lca_error := strings.Replace(slot_name, "taxid", "error", 1)
|
||||||
|
if lca_error == "error" {
|
||||||
|
lca_error = "lca_error"
|
||||||
|
}
|
||||||
|
|
||||||
|
lca_name := strings.Replace(slot_name, "taxid", "name", 1)
|
||||||
|
if lca_name == "name" {
|
||||||
|
lca_name = "scientific_name"
|
||||||
|
}
|
||||||
|
|
||||||
|
f := func(sequence *BioSequence) (BioSequenceSlice, error) {
|
||||||
|
lca, rans, _ := sequence.LCA(taxonomy, threshold)
|
||||||
|
|
||||||
|
sequence.SetAttribute(slot_name, lca.String())
|
||||||
|
sequence.SetAttribute(lca_name, lca.ScientificName())
|
||||||
|
sequence.SetAttribute(lca_error, math.Round((1-rans)*1000)/1000)
|
||||||
|
|
||||||
|
return BioSequenceSlice{sequence}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return f
|
||||||
|
}
|
106
pkg/obiseq/taxonomy_methods.go
Normal file
106
pkg/obiseq/taxonomy_methods.go
Normal file
@ -0,0 +1,106 @@
|
|||||||
|
package obiseq
|
||||||
|
|
||||||
|
import (
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||||
|
)
|
||||||
|
|
||||||
|
func (s *BioSequence) Taxon(taxonomy *obitax.Taxonomy) *obitax.Taxon {
|
||||||
|
taxid := s.Taxid()
|
||||||
|
if taxid == "NA" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return taxonomy.Taxon(taxid)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Taxid returns the taxonomic ID associated with the BioSequence.
|
||||||
|
//
|
||||||
|
// It retrieves the "taxid" attribute from the BioSequence's attributes map.
|
||||||
|
// If the attribute is not found, the function returns 1 as the default taxonomic ID.
|
||||||
|
// The taxid 1 corresponds to the root taxonomic level.
|
||||||
|
//
|
||||||
|
// The function returns an integer representing the taxonomic ID.
|
||||||
|
func (s *BioSequence) Taxid() (taxid string) {
|
||||||
|
var ok bool
|
||||||
|
if s.taxon != nil {
|
||||||
|
taxid = s.taxon.String()
|
||||||
|
ok = true
|
||||||
|
} else {
|
||||||
|
taxid, ok = s.GetStringAttribute("taxid")
|
||||||
|
}
|
||||||
|
|
||||||
|
if !ok {
|
||||||
|
taxid = "NA"
|
||||||
|
}
|
||||||
|
|
||||||
|
return taxid
|
||||||
|
}
|
||||||
|
|
||||||
|
// Setting the taxon at a given rank for a given sequence.
|
||||||
|
//
|
||||||
|
// Two attributes are added to the sequence. One named by the rank name stores
|
||||||
|
// the taxid, a second named by the rank name suffixed with '_name' contains the
|
||||||
|
// Scientific name of the genus.
|
||||||
|
// If the taxon at the given rank doesn't exist for the taxonomy annotation
|
||||||
|
// of the sequence, nothing happens.
|
||||||
|
func (sequence *BioSequence) SetTaxonAtRank(taxonomy *obitax.Taxonomy, rank string) *obitax.Taxon {
|
||||||
|
var taxonAtRank *obitax.Taxon
|
||||||
|
|
||||||
|
taxon := sequence.Taxon(taxonomy)
|
||||||
|
taxonAtRank = nil
|
||||||
|
if taxon != nil {
|
||||||
|
taxonAtRank = taxon.TaxonAtRank(rank)
|
||||||
|
if taxonAtRank != nil {
|
||||||
|
// log.Printf("Taxid: %d Rank: %s --> proposed : %d (%s)", taxid, rank, taxonAtRank.taxid, *(taxonAtRank.scientificname))
|
||||||
|
sequence.SetAttribute(rank+"_taxid", taxonAtRank.String())
|
||||||
|
sequence.SetAttribute(rank+"_name", taxonAtRank.ScientificName())
|
||||||
|
} else {
|
||||||
|
sequence.SetAttribute(rank+"_taxid", "NA")
|
||||||
|
sequence.SetAttribute(rank+"_name", "NA")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return taxonAtRank
|
||||||
|
}
|
||||||
|
|
||||||
|
// Setting the species of a sequence.
|
||||||
|
func (sequence *BioSequence) SetSpecies(taxonomy *obitax.Taxonomy) *obitax.Taxon {
|
||||||
|
return sequence.SetTaxonAtRank(taxonomy, "species")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Setting the genus of a sequence.
|
||||||
|
func (sequence *BioSequence) SetGenus(taxonomy *obitax.Taxonomy) *obitax.Taxon {
|
||||||
|
return sequence.SetTaxonAtRank(taxonomy, "genus")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Setting the family of a sequence.
|
||||||
|
func (sequence *BioSequence) SetFamily(taxonomy *obitax.Taxonomy) *obitax.Taxon {
|
||||||
|
return sequence.SetTaxonAtRank(taxonomy, "family")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (sequence *BioSequence) SetPath(taxonomy *obitax.Taxonomy) string {
|
||||||
|
taxon := sequence.Taxon(taxonomy)
|
||||||
|
path := taxon.Path()
|
||||||
|
|
||||||
|
tpath := path.String()
|
||||||
|
sequence.SetAttribute("taxonomic_path", tpath)
|
||||||
|
|
||||||
|
return tpath
|
||||||
|
}
|
||||||
|
|
||||||
|
func (sequence *BioSequence) SetScientificName(taxonomy *obitax.Taxonomy) string {
|
||||||
|
taxon := sequence.Taxon(taxonomy)
|
||||||
|
name := taxon.ScientificName()
|
||||||
|
|
||||||
|
sequence.SetAttribute("scienctific_name", name)
|
||||||
|
|
||||||
|
return name
|
||||||
|
}
|
||||||
|
|
||||||
|
func (sequence *BioSequence) SetTaxonomicRank(taxonomy *obitax.Taxonomy) string {
|
||||||
|
taxon := sequence.Taxon(taxonomy)
|
||||||
|
rank := taxon.Rank()
|
||||||
|
|
||||||
|
sequence.SetAttribute("taxonomic_rank", rank)
|
||||||
|
|
||||||
|
return rank
|
||||||
|
}
|
98
pkg/obiseq/taxonomy_predicate.go
Normal file
98
pkg/obiseq/taxonomy_predicate.go
Normal file
@ -0,0 +1,98 @@
|
|||||||
|
package obiseq
|
||||||
|
|
||||||
|
import (
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
|
)
|
||||||
|
|
||||||
|
func IsAValidTaxon(taxonomy *obitax.Taxonomy, withAutoCorrection ...bool) SequencePredicate {
|
||||||
|
// deprecatedTaxidsWarning := make(map[string]bool)
|
||||||
|
|
||||||
|
autocorrection := false
|
||||||
|
if len(withAutoCorrection) > 0 {
|
||||||
|
autocorrection = withAutoCorrection[0]
|
||||||
|
}
|
||||||
|
|
||||||
|
f := func(sequence *BioSequence) bool {
|
||||||
|
taxon := sequence.Taxon(taxonomy)
|
||||||
|
|
||||||
|
if taxon != nil {
|
||||||
|
taxid := sequence.Taxid()
|
||||||
|
ttaxid := taxon.String()
|
||||||
|
if taxid != ttaxid {
|
||||||
|
if autocorrection {
|
||||||
|
sequence.SetTaxid(ttaxid)
|
||||||
|
log.Printf(
|
||||||
|
"Sequence %s : Taxid %d updated with %d",
|
||||||
|
sequence.Id(),
|
||||||
|
taxid,
|
||||||
|
ttaxid,
|
||||||
|
)
|
||||||
|
} // else {
|
||||||
|
// if _, ok := deprecatedTaxidsWarning[taxid]; !ok {
|
||||||
|
// deprecatedTaxidsWarning[taxid] = true
|
||||||
|
// log.Printf("Taxid %d is deprecated and must be replaced by %d", taxid, taxon.taxid)
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return taxon != nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return f
|
||||||
|
}
|
||||||
|
|
||||||
|
// A function that takes a taxonomy and a taxid as arguments and returns a function that takes a
|
||||||
|
// pointer to a BioSequence as an argument and returns a boolean.
|
||||||
|
func IsSubCladeOf(taxonomy *obitax.Taxonomy, taxid string) SequencePredicate {
|
||||||
|
parent := taxonomy.Taxon(taxid)
|
||||||
|
|
||||||
|
if parent == nil {
|
||||||
|
log.Fatalf("Cannot find taxon : %s in taxonomy %s",
|
||||||
|
taxid,
|
||||||
|
taxonomy.Name())
|
||||||
|
}
|
||||||
|
|
||||||
|
f := func(sequence *BioSequence) bool {
|
||||||
|
taxon := sequence.Taxon(taxonomy)
|
||||||
|
return taxon != nil && taxon.IsSubCladeOf(parent)
|
||||||
|
}
|
||||||
|
|
||||||
|
return f
|
||||||
|
}
|
||||||
|
|
||||||
|
func IsSubCladeOfSlot(taxonomy *obitax.Taxonomy, key string) SequencePredicate {
|
||||||
|
|
||||||
|
f := func(sequence *BioSequence) bool {
|
||||||
|
val, ok := sequence.GetStringAttribute(key)
|
||||||
|
|
||||||
|
if ok {
|
||||||
|
parent := taxonomy.Taxon(val)
|
||||||
|
taxon := sequence.Taxon(taxonomy)
|
||||||
|
return parent != nil && taxon != nil && taxon.IsSubCladeOf(parent)
|
||||||
|
}
|
||||||
|
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
return f
|
||||||
|
}
|
||||||
|
|
||||||
|
func HasRequiredRank(taxonomy *obitax.Taxonomy, rank string) SequencePredicate {
|
||||||
|
|
||||||
|
if !obiutils.Contains(taxonomy.RankList(), rank) {
|
||||||
|
log.Fatalf("%s is not a valid rank (allowed ranks are %v)",
|
||||||
|
rank,
|
||||||
|
taxonomy.RankList())
|
||||||
|
}
|
||||||
|
|
||||||
|
f := func(sequence *BioSequence) bool {
|
||||||
|
taxon := sequence.Taxon(taxonomy)
|
||||||
|
return taxon != nil && taxon.HasRankDefined(rank)
|
||||||
|
}
|
||||||
|
|
||||||
|
return f
|
||||||
|
}
|
64
pkg/obiseq/taxonomy_workers.go
Normal file
64
pkg/obiseq/taxonomy_workers.go
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
package obiseq
|
||||||
|
|
||||||
|
import (
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
)
|
||||||
|
|
||||||
|
func MakeSetTaxonAtRankWorker(taxonomy *obitax.Taxonomy, rank string) SeqWorker {
|
||||||
|
|
||||||
|
if !obiutils.Contains(taxonomy.RankList(), rank) {
|
||||||
|
log.Fatalf("%s is not a valid rank (allowed ranks are %v)",
|
||||||
|
rank,
|
||||||
|
taxonomy.RankList())
|
||||||
|
}
|
||||||
|
|
||||||
|
w := func(sequence *BioSequence) (BioSequenceSlice, error) {
|
||||||
|
sequence.SetTaxonAtRank(taxonomy, rank)
|
||||||
|
return BioSequenceSlice{sequence}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return w
|
||||||
|
}
|
||||||
|
|
||||||
|
func MakeSetSpeciesWorker(taxonomy *obitax.Taxonomy) SeqWorker {
|
||||||
|
|
||||||
|
w := func(sequence *BioSequence) (BioSequenceSlice, error) {
|
||||||
|
sequence.SetSpecies(taxonomy)
|
||||||
|
return BioSequenceSlice{sequence}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return w
|
||||||
|
}
|
||||||
|
|
||||||
|
func MakeSetGenusWorker(taxonomy *obitax.Taxonomy) SeqWorker {
|
||||||
|
|
||||||
|
w := func(sequence *BioSequence) (BioSequenceSlice, error) {
|
||||||
|
sequence.SetGenus(taxonomy)
|
||||||
|
return BioSequenceSlice{sequence}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return w
|
||||||
|
}
|
||||||
|
|
||||||
|
func MakeSetFamilyWorker(taxonomy *obitax.Taxonomy) SeqWorker {
|
||||||
|
|
||||||
|
w := func(sequence *BioSequence) (BioSequenceSlice, error) {
|
||||||
|
sequence.SetFamily(taxonomy)
|
||||||
|
return BioSequenceSlice{sequence}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return w
|
||||||
|
}
|
||||||
|
|
||||||
|
func MakeSetPathWorker(taxonomy *obitax.Taxonomy) SeqWorker {
|
||||||
|
|
||||||
|
w := func(sequence *BioSequence) (BioSequenceSlice, error) {
|
||||||
|
sequence.SetPath(taxonomy)
|
||||||
|
return BioSequenceSlice{sequence}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return w
|
||||||
|
|
||||||
|
}
|
@ -4,31 +4,32 @@ import (
|
|||||||
"regexp"
|
"regexp"
|
||||||
)
|
)
|
||||||
|
|
||||||
func (taxonomy *Taxonomy) IFilterOnName(name string, strict bool) *ITaxonSet {
|
func (taxonomy *Taxonomy) IFilterOnName(name string, strict bool) *ITaxon {
|
||||||
if strict {
|
if strict {
|
||||||
nodes, ok := taxonomy.index[name]
|
nodes, ok := taxonomy.index[taxonomy.names.Innerize(name)]
|
||||||
if ok {
|
if ok {
|
||||||
return nodes.Iterator()
|
return nodes.Iterator()
|
||||||
} else {
|
} else {
|
||||||
empty := make(TaxonSet)
|
empty := taxonomy.NewTaxonSet()
|
||||||
return (&empty).Iterator()
|
return empty.Iterator()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return taxonomy.Iterator().IFilterOnName(name, strict)
|
return taxonomy.Iterator().IFilterOnName(name, strict)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (iterator *ITaxonSet) IFilterOnName(name string, strict bool) *ITaxonSet {
|
func (iterator *ITaxon) IFilterOnName(name string, strict bool) *ITaxon {
|
||||||
newIterator := NewITaxonSet()
|
newIterator := NewITaxon()
|
||||||
sentTaxa := make(map[int]bool)
|
sentTaxa := make(map[*string]bool)
|
||||||
|
|
||||||
if strict {
|
if strict {
|
||||||
go func() {
|
go func() {
|
||||||
for iterator.Next() {
|
for iterator.Next() {
|
||||||
taxon := iterator.Get()
|
taxon := iterator.Get()
|
||||||
if _, ok := sentTaxa[taxon.taxid]; !ok {
|
node := taxon.Node
|
||||||
|
if _, ok := sentTaxa[node.id]; !ok {
|
||||||
if taxon.IsNameEqual(name) {
|
if taxon.IsNameEqual(name) {
|
||||||
sentTaxa[taxon.taxid] = true
|
sentTaxa[node.id] = true
|
||||||
newIterator.source <- taxon
|
newIterator.source <- taxon
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -41,9 +42,10 @@ func (iterator *ITaxonSet) IFilterOnName(name string, strict bool) *ITaxonSet {
|
|||||||
go func() {
|
go func() {
|
||||||
for iterator.Next() {
|
for iterator.Next() {
|
||||||
taxon := iterator.Get()
|
taxon := iterator.Get()
|
||||||
if _, ok := sentTaxa[taxon.taxid]; !ok {
|
node := taxon.Node
|
||||||
|
if _, ok := sentTaxa[node.id]; !ok {
|
||||||
if taxon.IsNameMatching(pattern) {
|
if taxon.IsNameMatching(pattern) {
|
||||||
sentTaxa[taxon.taxid] = true
|
sentTaxa[node.id] = true
|
||||||
newIterator.source <- taxon
|
newIterator.source <- taxon
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,12 +1,20 @@
|
|||||||
package obitax
|
package obitax
|
||||||
|
|
||||||
func (iterator *ITaxonSet) IFilterOnTaxRank(rank string) *ITaxonSet {
|
func (iterator *ITaxon) IFilterOnTaxRank(rank string) *ITaxon {
|
||||||
newIter := NewITaxonSet()
|
newIter := NewITaxon()
|
||||||
|
var prank *string
|
||||||
|
var ptax *Taxonomy
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
for iterator.Next() {
|
for iterator.Next() {
|
||||||
|
|
||||||
taxon := iterator.Get()
|
taxon := iterator.Get()
|
||||||
if taxon.rank == rank {
|
if ptax != taxon.Taxonomy {
|
||||||
|
ptax = taxon.Taxonomy
|
||||||
|
prank = ptax.ranks.Innerize(rank)
|
||||||
|
}
|
||||||
|
|
||||||
|
if taxon.Node.rank == prank {
|
||||||
newIter.source <- taxon
|
newIter.source <- taxon
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -16,14 +24,14 @@ func (iterator *ITaxonSet) IFilterOnTaxRank(rank string) *ITaxonSet {
|
|||||||
return newIter
|
return newIter
|
||||||
}
|
}
|
||||||
|
|
||||||
func (set *TaxonSet) IFilterOnTaxRank(rank string) *ITaxonSet {
|
func (set *TaxonSet) IFilterOnTaxRank(rank string) *ITaxon {
|
||||||
return set.Iterator().IFilterOnTaxRank(rank)
|
return set.Iterator().IFilterOnTaxRank(rank)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (slice *TaxonSlice) IFilterOnTaxRank(rank string) *ITaxonSet {
|
func (slice *TaxonSlice) IFilterOnTaxRank(rank string) *ITaxon {
|
||||||
return slice.Iterator().IFilterOnTaxRank(rank)
|
return slice.Iterator().IFilterOnTaxRank(rank)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (taxonomy *Taxonomy) IFilterOnTaxRank(rank string) *ITaxonSet {
|
func (taxonomy *Taxonomy) IFilterOnTaxRank(rank string) *ITaxon {
|
||||||
return taxonomy.Iterator().IFilterOnTaxRank(rank)
|
return taxonomy.Iterator().IFilterOnTaxRank(rank)
|
||||||
}
|
}
|
||||||
|
@ -1,9 +1,7 @@
|
|||||||
package obitax
|
package obitax
|
||||||
|
|
||||||
import "reflect"
|
func (iterator *ITaxon) IFilterOnSubcladeOf(taxon *Taxon) *ITaxon {
|
||||||
|
newIter := NewITaxon()
|
||||||
func (iterator *ITaxonSet) IFilterOnSubcladeOf(taxon *TaxNode) *ITaxonSet {
|
|
||||||
newIter := NewITaxonSet()
|
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
for iterator.Next() {
|
for iterator.Next() {
|
||||||
@ -18,32 +16,36 @@ func (iterator *ITaxonSet) IFilterOnSubcladeOf(taxon *TaxNode) *ITaxonSet {
|
|||||||
return newIter
|
return newIter
|
||||||
}
|
}
|
||||||
|
|
||||||
func (set *TaxonSet) IFilterOnSubcladeOf(taxon *TaxNode) *ITaxonSet {
|
func (set *TaxonSet) IFilterOnSubcladeOf(taxon *Taxon) *ITaxon {
|
||||||
return set.Iterator().IFilterOnSubcladeOf(taxon)
|
return set.Iterator().IFilterOnSubcladeOf(taxon)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (slice *TaxonSlice) IFilterOnSubcladeOf(taxon *TaxNode) *ITaxonSet {
|
func (slice *TaxonSlice) IFilterOnSubcladeOf(taxon *Taxon) *ITaxon {
|
||||||
return slice.Iterator().IFilterOnSubcladeOf(taxon)
|
return slice.Iterator().IFilterOnSubcladeOf(taxon)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (taxonomy *Taxonomy) IFilterOnSubcladeOf(taxon *TaxNode) *ITaxonSet {
|
func (taxonomy *Taxonomy) IFilterOnSubcladeOf(taxon *Taxon) *ITaxon {
|
||||||
return taxonomy.Iterator().IFilterOnSubcladeOf(taxon)
|
return taxonomy.Iterator().IFilterOnSubcladeOf(taxon)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (iterator *ITaxonSet) IFilterBelongingSubclades(clades *TaxonSet) *ITaxonSet {
|
func (iterator *ITaxon) IFilterBelongingSubclades(clades *TaxonSet) *ITaxon {
|
||||||
|
|
||||||
if len(*clades) == 0 {
|
if clades.Len() == 0 {
|
||||||
return iterator
|
return iterator
|
||||||
}
|
}
|
||||||
|
|
||||||
// Considers the second simplest case when only
|
// Considers the second simplest case when only
|
||||||
// a single subclase is provided
|
// a single subclase is provided
|
||||||
if len(*clades) == 1 {
|
if clades.Len() == 1 {
|
||||||
keys := reflect.ValueOf(*clades).MapKeys()
|
keys := make([]*string, 0, len(clades.set))
|
||||||
return iterator.IFilterOnSubcladeOf((*clades)[int(keys[0].Int())])
|
for k := range clades.set {
|
||||||
|
keys = append(keys, k)
|
||||||
}
|
}
|
||||||
|
|
||||||
newIter := NewITaxonSet()
|
return iterator.IFilterOnSubcladeOf(clades.Get(keys[0]))
|
||||||
|
}
|
||||||
|
|
||||||
|
newIter := NewITaxon()
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
for iterator.Next() {
|
for iterator.Next() {
|
||||||
|
@ -5,7 +5,7 @@ import "sync"
|
|||||||
// InnerString is a struct that holds a map of strings and a read-write lock for concurrent access.
|
// InnerString is a struct that holds a map of strings and a read-write lock for concurrent access.
|
||||||
// The index map is used to store key-value pairs of strings.
|
// The index map is used to store key-value pairs of strings.
|
||||||
type InnerString struct {
|
type InnerString struct {
|
||||||
index map[string]string
|
index map[string]*string
|
||||||
lock sync.RWMutex
|
lock sync.RWMutex
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -13,7 +13,7 @@ type InnerString struct {
|
|||||||
// The lock is set to false.
|
// The lock is set to false.
|
||||||
func NewInnerString() *InnerString {
|
func NewInnerString() *InnerString {
|
||||||
return &InnerString{
|
return &InnerString{
|
||||||
index: make(map[string]string),
|
index: make(map[string]*string),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -26,13 +26,13 @@ func NewInnerString() *InnerString {
|
|||||||
//
|
//
|
||||||
// Returns:
|
// Returns:
|
||||||
// - The string value associated with the key.
|
// - The string value associated with the key.
|
||||||
func (i *InnerString) Innerize(value string) string {
|
func (i *InnerString) Innerize(value string) *string {
|
||||||
i.lock.Lock()
|
i.lock.Lock()
|
||||||
defer i.lock.Unlock()
|
defer i.lock.Unlock()
|
||||||
s, ok := i.index[value]
|
s, ok := i.index[value]
|
||||||
if !ok {
|
if !ok {
|
||||||
i.index[value] = value
|
s = &value
|
||||||
s = value
|
i.index[value] = s
|
||||||
}
|
}
|
||||||
|
|
||||||
return s
|
return s
|
||||||
@ -42,7 +42,7 @@ func (i *InnerString) Slice() []string {
|
|||||||
rep := make([]string, len(i.index))
|
rep := make([]string, len(i.index))
|
||||||
j := 0
|
j := 0
|
||||||
for _, v := range i.index {
|
for _, v := range i.index {
|
||||||
rep[j] = v
|
rep[j] = *v
|
||||||
j++
|
j++
|
||||||
}
|
}
|
||||||
return rep
|
return rep
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
package obitax
|
package obitax
|
||||||
|
|
||||||
import "log"
|
import log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
func (taxon *Taxon) IsSubCladeOf(parent *Taxon) bool {
|
func (taxon *Taxon) IsSubCladeOf(parent *Taxon) bool {
|
||||||
|
|
||||||
@ -20,3 +20,18 @@ func (taxon *Taxon) IsSubCladeOf(parent *Taxon) bool {
|
|||||||
|
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (taxon *Taxon) IsBelongingSubclades(clades *TaxonSet) bool {
|
||||||
|
ok := clades.Contains(taxon.Node.id)
|
||||||
|
|
||||||
|
for !ok && !taxon.IsRoot() {
|
||||||
|
taxon = taxon.Parent()
|
||||||
|
ok = clades.Contains(taxon.Node.id)
|
||||||
|
}
|
||||||
|
|
||||||
|
if taxon.IsRoot() {
|
||||||
|
ok = clades.Contains(taxon.Node.id)
|
||||||
|
}
|
||||||
|
|
||||||
|
return ok
|
||||||
|
}
|
||||||
|
@ -1,24 +1,31 @@
|
|||||||
package obitax
|
package obitax
|
||||||
|
|
||||||
type ITaxonSet struct {
|
type ITaxon struct {
|
||||||
source chan *TaxNode
|
source chan *Taxon
|
||||||
current *TaxNode
|
current *Taxon
|
||||||
finished bool
|
finished bool
|
||||||
p_finished *bool
|
p_finished *bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewITaxonSet() *ITaxonSet {
|
func NewITaxon() *ITaxon {
|
||||||
i := ITaxonSet{make(chan *TaxNode), nil, false, nil}
|
i := ITaxon{
|
||||||
|
source: make(chan *Taxon),
|
||||||
|
current: nil,
|
||||||
|
finished: false,
|
||||||
|
p_finished: nil}
|
||||||
i.p_finished = &i.finished
|
i.p_finished = &i.finished
|
||||||
return &i
|
return &i
|
||||||
}
|
}
|
||||||
|
|
||||||
func (set *TaxonSet) Iterator() *ITaxonSet {
|
func (set *TaxonSet) Iterator() *ITaxon {
|
||||||
i := NewITaxonSet()
|
i := NewITaxon()
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
for _, t := range set.set {
|
for _, t := range set.set {
|
||||||
i.source <- t
|
i.source <- &Taxon{
|
||||||
|
Taxonomy: set.taxonomy,
|
||||||
|
Node: t,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
close(i.source)
|
close(i.source)
|
||||||
}()
|
}()
|
||||||
@ -26,12 +33,15 @@ func (set *TaxonSet) Iterator() *ITaxonSet {
|
|||||||
return i
|
return i
|
||||||
}
|
}
|
||||||
|
|
||||||
func (set *TaxonSlice) Iterator() *ITaxonSet {
|
func (set *TaxonSlice) Iterator() *ITaxon {
|
||||||
i := NewITaxonSet()
|
i := NewITaxon()
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
for _, t := range set.slice {
|
for _, t := range set.slice {
|
||||||
i.source <- t
|
i.source <- &Taxon{
|
||||||
|
Taxonomy: set.taxonomy,
|
||||||
|
Node: t,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
close(i.source)
|
close(i.source)
|
||||||
}()
|
}()
|
||||||
@ -39,11 +49,11 @@ func (set *TaxonSlice) Iterator() *ITaxonSet {
|
|||||||
return i
|
return i
|
||||||
}
|
}
|
||||||
|
|
||||||
func (taxonmy *Taxonomy) Iterator() *ITaxonSet {
|
func (taxonmy *Taxonomy) Iterator() *ITaxon {
|
||||||
return taxonmy.nodes.Iterator()
|
return taxonmy.nodes.Iterator()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (iterator *ITaxonSet) Next() bool {
|
func (iterator *ITaxon) Next() bool {
|
||||||
if *(iterator.p_finished) {
|
if *(iterator.p_finished) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
@ -63,37 +73,21 @@ func (iterator *ITaxonSet) Next() bool {
|
|||||||
// currently pointed by the iterator. You have to use the
|
// currently pointed by the iterator. You have to use the
|
||||||
// 'Next' method to move to the next entry before calling
|
// 'Next' method to move to the next entry before calling
|
||||||
// 'Get' to retreive the following instance.
|
// 'Get' to retreive the following instance.
|
||||||
func (iterator *ITaxonSet) Get() *TaxNode {
|
func (iterator *ITaxon) Get() *Taxon {
|
||||||
return iterator.current
|
return iterator.current
|
||||||
}
|
}
|
||||||
|
|
||||||
// Finished returns 'true' value if no more data is available
|
// Finished returns 'true' value if no more data is available
|
||||||
// from the iterator.
|
// from the iterator.
|
||||||
func (iterator *ITaxonSet) Finished() bool {
|
func (iterator *ITaxon) Finished() bool {
|
||||||
return *iterator.p_finished
|
return *iterator.p_finished
|
||||||
}
|
}
|
||||||
|
|
||||||
func (iterator *ITaxonSet) Split() *ITaxonSet {
|
func (iterator *ITaxon) Split() *ITaxon {
|
||||||
newIter := ITaxonSet{iterator.source, nil, false, iterator.p_finished}
|
return &ITaxon{
|
||||||
return &newIter
|
source: iterator.source,
|
||||||
|
current: nil,
|
||||||
|
finished: false,
|
||||||
|
p_finished: iterator.p_finished,
|
||||||
}
|
}
|
||||||
|
|
||||||
func (iterator *ITaxonSet) TaxonSet() *TaxonSet {
|
|
||||||
set := make(TaxonSet)
|
|
||||||
|
|
||||||
for iterator.Next() {
|
|
||||||
taxon := iterator.Get()
|
|
||||||
set[taxon.id] = taxon
|
|
||||||
}
|
|
||||||
return &set
|
|
||||||
}
|
|
||||||
|
|
||||||
func (iterator *ITaxonSet) TaxonSlice() *TaxonSlice {
|
|
||||||
slice := make(TaxonSlice, 0)
|
|
||||||
|
|
||||||
for iterator.Next() {
|
|
||||||
taxon := iterator.Get()
|
|
||||||
slice = append(slice, taxon)
|
|
||||||
}
|
|
||||||
return &slice
|
|
||||||
}
|
}
|
||||||
|
@ -4,7 +4,7 @@ import (
|
|||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
|
||||||
func (t1 *TaxNode) LCA(t2 *TaxNode) (*TaxNode, error) {
|
func (t1 *Taxon) LCA(t2 *Taxon) (*Taxon, error) {
|
||||||
if t1 == nil {
|
if t1 == nil {
|
||||||
log.Panicf("Try to get LCA of nil taxon")
|
log.Panicf("Try to get LCA of nil taxon")
|
||||||
}
|
}
|
||||||
@ -13,25 +13,19 @@ func (t1 *TaxNode) LCA(t2 *TaxNode) (*TaxNode, error) {
|
|||||||
log.Panicf("Try to get LCA of nil taxon")
|
log.Panicf("Try to get LCA of nil taxon")
|
||||||
}
|
}
|
||||||
|
|
||||||
p1, err1 := t1.Path()
|
p1 := t1.Path()
|
||||||
|
p2 := t2.Path()
|
||||||
|
|
||||||
if err1 != nil {
|
i1 := p1.Len() - 1
|
||||||
return nil, err1
|
i2 := p2.Len() - 1
|
||||||
}
|
|
||||||
|
|
||||||
p2, err2 := t2.Path()
|
for i1 >= 0 && i2 >= 0 && p1.slice[i1].id == p2.slice[i2].id {
|
||||||
|
|
||||||
if err2 != nil {
|
|
||||||
return nil, err2
|
|
||||||
}
|
|
||||||
|
|
||||||
i1 := len(*p1) - 1
|
|
||||||
i2 := len(*p2) - 1
|
|
||||||
|
|
||||||
for i1 >= 0 && i2 >= 0 && (*p1)[i1].taxid == (*p2)[i2].taxid {
|
|
||||||
i1--
|
i1--
|
||||||
i2--
|
i2--
|
||||||
}
|
}
|
||||||
|
|
||||||
return (*p1)[i1+1], nil
|
return &Taxon{
|
||||||
|
Taxonomy: t1.Taxonomy,
|
||||||
|
Node: p1.slice[i1+1],
|
||||||
|
}, nil
|
||||||
}
|
}
|
||||||
|
@ -24,6 +24,9 @@ type Taxon struct {
|
|||||||
// Returns:
|
// Returns:
|
||||||
// - A formatted string representing the Taxon in the form "taxonomy_code:taxon_id [scientific_name]".
|
// - A formatted string representing the Taxon in the form "taxonomy_code:taxon_id [scientific_name]".
|
||||||
func (taxon *Taxon) String() string {
|
func (taxon *Taxon) String() string {
|
||||||
|
if taxon == nil {
|
||||||
|
return "NA"
|
||||||
|
}
|
||||||
return taxon.Node.String(taxon.Taxonomy.code)
|
return taxon.Node.String(taxon.Taxonomy.code)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -33,24 +36,52 @@ func (taxon *Taxon) String() string {
|
|||||||
// Returns:
|
// Returns:
|
||||||
// - The scientific name of the taxon as a string.
|
// - The scientific name of the taxon as a string.
|
||||||
func (taxon *Taxon) ScientificName() string {
|
func (taxon *Taxon) ScientificName() string {
|
||||||
|
if taxon == nil {
|
||||||
|
return "NA"
|
||||||
|
}
|
||||||
return taxon.Node.ScientificName()
|
return taxon.Node.ScientificName()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (taxon *Taxon) Name(class string) string {
|
func (taxon *Taxon) Name(class string) string {
|
||||||
return taxon.Node.Name(class)
|
if taxon == nil {
|
||||||
|
return "NA"
|
||||||
|
}
|
||||||
|
pclass := taxon.Taxonomy.nameclasses.Innerize(class)
|
||||||
|
return taxon.Node.Name(pclass)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (taxon *Taxon) IsNameEqual(name string) bool {
|
func (taxon *Taxon) IsNameEqual(name string) bool {
|
||||||
|
if taxon == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
return taxon.Node.IsNameEqual(name)
|
return taxon.Node.IsNameEqual(name)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (taxon *Taxon) IsNameMatching(pattern *regexp.Regexp) bool {
|
func (taxon *Taxon) IsNameMatching(pattern *regexp.Regexp) bool {
|
||||||
|
if taxon == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
return taxon.Node.IsNameMatching(pattern)
|
return taxon.Node.IsNameMatching(pattern)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (taxon *Taxon) SetName(name, class string) {
|
func (taxon *Taxon) SetName(name, class string) {
|
||||||
class = taxon.Taxonomy.nameclasses.Innerize(class)
|
if taxon == nil {
|
||||||
taxon.Node.SetName(name, class)
|
log.Panicf("nil taxon pointer for name %s [%s]", name, class)
|
||||||
|
}
|
||||||
|
|
||||||
|
pclass := taxon.Taxonomy.nameclasses.Innerize(class)
|
||||||
|
pname := taxon.Taxonomy.names.Innerize(name)
|
||||||
|
taxon.Node.SetName(pname, pclass)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (taxon *Taxon) IsRoot() bool {
|
||||||
|
if taxon == nil {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
return taxon.Taxonomy.root == taxon.Node
|
||||||
}
|
}
|
||||||
|
|
||||||
// Rank returns the rank of the Taxon.
|
// Rank returns the rank of the Taxon.
|
||||||
@ -59,6 +90,9 @@ func (taxon *Taxon) SetName(name, class string) {
|
|||||||
// Returns:
|
// Returns:
|
||||||
// - The rank of the taxon as a string (e.g., species, genus, family).
|
// - The rank of the taxon as a string (e.g., species, genus, family).
|
||||||
func (taxon *Taxon) Rank() string {
|
func (taxon *Taxon) Rank() string {
|
||||||
|
if taxon == nil {
|
||||||
|
return "NA"
|
||||||
|
}
|
||||||
return taxon.Node.Rank()
|
return taxon.Node.Rank()
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -70,9 +104,12 @@ func (taxon *Taxon) Rank() string {
|
|||||||
// - A pointer to the parent Taxon[T]. If the parent does not exist, it returns
|
// - A pointer to the parent Taxon[T]. If the parent does not exist, it returns
|
||||||
// a Taxon with a nil Node.
|
// a Taxon with a nil Node.
|
||||||
func (taxon *Taxon) Parent() *Taxon {
|
func (taxon *Taxon) Parent() *Taxon {
|
||||||
|
if taxon == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
pid := taxon.Node.ParentId()
|
pid := taxon.Node.ParentId()
|
||||||
return &Taxon{taxon.Taxonomy,
|
return taxon.Taxonomy.nodes.Get(pid)
|
||||||
taxon.Taxonomy.nodes.Get(pid)}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// IPath returns an iterator that yields the path from the current Taxon to the root Taxon
|
// IPath returns an iterator that yields the path from the current Taxon to the root Taxon
|
||||||
@ -83,12 +120,13 @@ func (taxon *Taxon) Parent() *Taxon {
|
|||||||
// is called with each Taxon in the path from the current taxon to the root. If the
|
// is called with each Taxon in the path from the current taxon to the root. If the
|
||||||
// taxonomy has no root node, the method logs a fatal error and terminates the program.
|
// taxonomy has no root node, the method logs a fatal error and terminates the program.
|
||||||
func (taxon *Taxon) IPath() iter.Seq[*Taxon] {
|
func (taxon *Taxon) IPath() iter.Seq[*Taxon] {
|
||||||
|
|
||||||
if taxon.Taxonomy.root == nil {
|
if taxon.Taxonomy.root == nil {
|
||||||
log.Fatalf("Taxon[%v].IPath(): Taxonomy has no root node", taxon.Taxonomy.name)
|
log.Fatalf("Taxon[%v].IPath(): Taxonomy has no root node", taxon.Taxonomy.name)
|
||||||
}
|
}
|
||||||
|
|
||||||
return func(yield func(*Taxon) bool) {
|
return func(yield func(*Taxon) bool) {
|
||||||
for taxon.Node.parent != taxon.Taxonomy.root.id {
|
for !taxon.IsRoot() {
|
||||||
if !yield(taxon) {
|
if !yield(taxon) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -96,8 +134,9 @@ func (taxon *Taxon) IPath() iter.Seq[*Taxon] {
|
|||||||
taxon = taxon.Parent()
|
taxon = taxon.Parent()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if taxon != nil {
|
||||||
yield(taxon)
|
yield(taxon)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -109,6 +148,10 @@ func (taxon *Taxon) IPath() iter.Seq[*Taxon] {
|
|||||||
// - A pointer to a TaxonSlice[T] containing the TaxNode[T] instances in the path
|
// - A pointer to a TaxonSlice[T] containing the TaxNode[T] instances in the path
|
||||||
// from the current taxon to the root.
|
// from the current taxon to the root.
|
||||||
func (taxon *Taxon) Path() *TaxonSlice {
|
func (taxon *Taxon) Path() *TaxonSlice {
|
||||||
|
if taxon == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
s := make([]*TaxNode, 0, 10)
|
s := make([]*TaxNode, 0, 10)
|
||||||
|
|
||||||
for t := range taxon.IPath() {
|
for t := range taxon.IPath() {
|
||||||
@ -131,8 +174,13 @@ func (taxon *Taxon) Path() *TaxonSlice {
|
|||||||
// Returns:
|
// Returns:
|
||||||
// - A boolean indicating whether any taxon in the path has the specified rank defined.
|
// - A boolean indicating whether any taxon in the path has the specified rank defined.
|
||||||
func (taxon *Taxon) HasRankDefined(rank string) bool {
|
func (taxon *Taxon) HasRankDefined(rank string) bool {
|
||||||
|
if taxon == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
prank := taxon.Taxonomy.ranks.Innerize(rank)
|
||||||
for t := range taxon.IPath() {
|
for t := range taxon.IPath() {
|
||||||
if t.Node.Rank() == rank {
|
if t.Node.rank == prank {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -151,8 +199,14 @@ func (taxon *Taxon) HasRankDefined(rank string) bool {
|
|||||||
// - A pointer to the Taxon[T] that matches the specified rank, or nil if no such taxon exists
|
// - A pointer to the Taxon[T] that matches the specified rank, or nil if no such taxon exists
|
||||||
// in the path to the root.
|
// in the path to the root.
|
||||||
func (taxon *Taxon) TaxonAtRank(rank string) *Taxon {
|
func (taxon *Taxon) TaxonAtRank(rank string) *Taxon {
|
||||||
|
if taxon == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
prank := taxon.Taxonomy.ranks.Innerize(rank)
|
||||||
|
|
||||||
for t := range taxon.IPath() {
|
for t := range taxon.IPath() {
|
||||||
if t.Node.Rank() == rank {
|
if t.Node.rank == prank {
|
||||||
return t
|
return t
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2,6 +2,7 @@ package obitax
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"log"
|
||||||
"regexp"
|
"regexp"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -18,11 +19,11 @@ import (
|
|||||||
// a string representing the class name and the value is a pointer to a string
|
// a string representing the class name and the value is a pointer to a string
|
||||||
// representing the name.
|
// representing the name.
|
||||||
type TaxNode struct {
|
type TaxNode struct {
|
||||||
id string
|
id *string
|
||||||
parent string
|
parent *string
|
||||||
rank string
|
rank *string
|
||||||
scientificname *string
|
scientificname *string
|
||||||
alternatenames *map[string]*string
|
alternatenames *map[*string]*string
|
||||||
}
|
}
|
||||||
|
|
||||||
// String returns a string representation of the TaxNode, including the taxonomy code,
|
// String returns a string representation of the TaxNode, including the taxonomy code,
|
||||||
@ -36,7 +37,7 @@ type TaxNode struct {
|
|||||||
func (node *TaxNode) String(taxonomyCode string) string {
|
func (node *TaxNode) String(taxonomyCode string) string {
|
||||||
return fmt.Sprintf("%s:%v [%s]",
|
return fmt.Sprintf("%s:%v [%s]",
|
||||||
taxonomyCode,
|
taxonomyCode,
|
||||||
node.id,
|
*node.id,
|
||||||
node.ScientificName())
|
node.ScientificName())
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -45,7 +46,7 @@ func (node *TaxNode) String(taxonomyCode string) string {
|
|||||||
//
|
//
|
||||||
// Returns:
|
// Returns:
|
||||||
// - The unique identifier of the taxon node of type T.
|
// - The unique identifier of the taxon node of type T.
|
||||||
func (node *TaxNode) Id() string {
|
func (node *TaxNode) Id() *string {
|
||||||
return node.id
|
return node.id
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -54,7 +55,7 @@ func (node *TaxNode) Id() string {
|
|||||||
//
|
//
|
||||||
// Returns:
|
// Returns:
|
||||||
// - The identifier of the parent taxon of type T.
|
// - The identifier of the parent taxon of type T.
|
||||||
func (node *TaxNode) ParentId() string {
|
func (node *TaxNode) ParentId() *string {
|
||||||
return node.parent
|
return node.parent
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -66,6 +67,12 @@ func (node *TaxNode) ParentId() string {
|
|||||||
// - Note: This method assumes that scientificname is not nil;
|
// - Note: This method assumes that scientificname is not nil;
|
||||||
// if it may be nil, additional error handling should be implemented.
|
// if it may be nil, additional error handling should be implemented.
|
||||||
func (node *TaxNode) ScientificName() string {
|
func (node *TaxNode) ScientificName() string {
|
||||||
|
if node == nil {
|
||||||
|
return "NA"
|
||||||
|
}
|
||||||
|
if node.scientificname == nil {
|
||||||
|
return "NA"
|
||||||
|
}
|
||||||
return *node.scientificname
|
return *node.scientificname
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -80,8 +87,9 @@ func (node *TaxNode) ScientificName() string {
|
|||||||
// Returns:
|
// Returns:
|
||||||
// - The name of the taxon as a string. If the class is not recognized or if no name is available,
|
// - The name of the taxon as a string. If the class is not recognized or if no name is available,
|
||||||
// an empty string is returned.
|
// an empty string is returned.
|
||||||
func (node *TaxNode) Name(class string) string {
|
func (node *TaxNode) Name(class *string) string {
|
||||||
if class == "scientificname" {
|
|
||||||
|
if *class == "scientific name" {
|
||||||
return *node.scientificname
|
return *node.scientificname
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -98,17 +106,21 @@ func (node *TaxNode) Name(class string) string {
|
|||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
func (node *TaxNode) SetName(name, class string) {
|
func (node *TaxNode) SetName(name, class *string) {
|
||||||
if class == "scientificname" {
|
if node == nil {
|
||||||
node.scientificname = &name
|
log.Panic("Cannot set name of nil TaxNode")
|
||||||
|
}
|
||||||
|
|
||||||
|
if *class == "scientific name" {
|
||||||
|
node.scientificname = name
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if node.alternatenames == nil {
|
if node.alternatenames == nil {
|
||||||
node.alternatenames = &map[string]*string{}
|
node.alternatenames = &map[*string]*string{}
|
||||||
}
|
}
|
||||||
|
|
||||||
(*node.alternatenames)[class] = &name
|
(*node.alternatenames)[class] = name
|
||||||
}
|
}
|
||||||
|
|
||||||
// Rank returns the rank of the TaxNode.
|
// Rank returns the rank of the TaxNode.
|
||||||
@ -117,7 +129,7 @@ func (node *TaxNode) SetName(name, class string) {
|
|||||||
// Returns:
|
// Returns:
|
||||||
// - The rank of the taxon as a string (e.g., species, genus, family).
|
// - The rank of the taxon as a string (e.g., species, genus, family).
|
||||||
func (node *TaxNode) Rank() string {
|
func (node *TaxNode) Rank() string {
|
||||||
return node.rank
|
return *node.rank
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsNameEqual checks if the provided name matches the scientific name or any alternate names
|
// IsNameEqual checks if the provided name matches the scientific name or any alternate names
|
||||||
@ -154,9 +166,14 @@ func (node *TaxNode) IsNameEqual(name string) bool {
|
|||||||
// - A boolean indicating whether the scientific name or any alternate names match the
|
// - A boolean indicating whether the scientific name or any alternate names match the
|
||||||
// provided regular expression pattern.
|
// provided regular expression pattern.
|
||||||
func (node *TaxNode) IsNameMatching(pattern *regexp.Regexp) bool {
|
func (node *TaxNode) IsNameMatching(pattern *regexp.Regexp) bool {
|
||||||
if pattern.MatchString(*(node.scientificname)) {
|
if node == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
if node.scientificname != nil && pattern.MatchString(*(node.scientificname)) {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
if node.alternatenames != nil {
|
if node.alternatenames != nil {
|
||||||
for _, n := range *node.alternatenames {
|
for _, n := range *node.alternatenames {
|
||||||
if n != nil && pattern.MatchString(*n) {
|
if n != nil && pattern.MatchString(*n) {
|
||||||
|
@ -21,12 +21,14 @@ import (
|
|||||||
type Taxonomy struct {
|
type Taxonomy struct {
|
||||||
name string
|
name string
|
||||||
code string
|
code string
|
||||||
|
ids *InnerString
|
||||||
ranks *InnerString
|
ranks *InnerString
|
||||||
nameclasses *InnerString
|
nameclasses *InnerString
|
||||||
|
names *InnerString
|
||||||
nodes *TaxonSet
|
nodes *TaxonSet
|
||||||
root *TaxNode
|
root *TaxNode
|
||||||
matcher *regexp.Regexp
|
matcher *regexp.Regexp
|
||||||
index map[string]*TaxonSet
|
index map[*string]*TaxonSet
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewTaxonomy creates and initializes a new Taxonomy instance with the specified name and code.
|
// NewTaxonomy creates and initializes a new Taxonomy instance with the specified name and code.
|
||||||
@ -39,7 +41,7 @@ type Taxonomy struct {
|
|||||||
// Returns:
|
// Returns:
|
||||||
// - A pointer to the newly created Taxonomy instance.
|
// - A pointer to the newly created Taxonomy instance.
|
||||||
func NewTaxonomy(name, code, codeCharacters string) *Taxonomy {
|
func NewTaxonomy(name, code, codeCharacters string) *Taxonomy {
|
||||||
set := make(map[string]*TaxNode)
|
set := make(map[*string]*TaxNode)
|
||||||
|
|
||||||
// codeCharacters := "[[:alnum:]]" // [[:digit:]]
|
// codeCharacters := "[[:alnum:]]" // [[:digit:]]
|
||||||
|
|
||||||
@ -48,12 +50,14 @@ func NewTaxonomy(name, code, codeCharacters string) *Taxonomy {
|
|||||||
taxonomy := &Taxonomy{
|
taxonomy := &Taxonomy{
|
||||||
name: name,
|
name: name,
|
||||||
code: code,
|
code: code,
|
||||||
|
ids: NewInnerString(),
|
||||||
ranks: NewInnerString(),
|
ranks: NewInnerString(),
|
||||||
nameclasses: NewInnerString(),
|
nameclasses: NewInnerString(),
|
||||||
|
names: NewInnerString(),
|
||||||
nodes: &TaxonSet{set: set},
|
nodes: &TaxonSet{set: set},
|
||||||
root: nil,
|
root: nil,
|
||||||
matcher: matcher,
|
matcher: matcher,
|
||||||
index: make(map[string]*TaxonSet),
|
index: make(map[*string]*TaxonSet),
|
||||||
}
|
}
|
||||||
|
|
||||||
taxonomy.nodes.taxonomy = taxonomy
|
taxonomy.nodes.taxonomy = taxonomy
|
||||||
@ -69,16 +73,16 @@ func NewTaxonomy(name, code, codeCharacters string) *Taxonomy {
|
|||||||
// - taxid: A string representation of the taxon identifier to be converted.
|
// - taxid: A string representation of the taxon identifier to be converted.
|
||||||
//
|
//
|
||||||
// Returns:
|
// Returns:
|
||||||
// - The taxon identifier of type T corresponding to the provided taxid.
|
// - The taxon identifier as a *string corresponding to the provided taxid.
|
||||||
// - An error if the taxid is not valid or cannot be converted.
|
// - An error if the taxid is not valid or cannot be converted.
|
||||||
func (taxonomy *Taxonomy) Id(taxid string) (string, error) {
|
func (taxonomy *Taxonomy) Id(taxid string) (*string, error) {
|
||||||
matches := taxonomy.matcher.FindStringSubmatch(taxid)
|
matches := taxonomy.matcher.FindStringSubmatch(taxid)
|
||||||
|
|
||||||
if matches == nil {
|
if matches == nil {
|
||||||
return "", fmt.Errorf("Taxid %s is not a valid taxid", taxid)
|
return nil, fmt.Errorf("taxid %s is not a valid taxid", taxid)
|
||||||
}
|
}
|
||||||
|
|
||||||
return matches[2], nil
|
return taxonomy.ids.Innerize(matches[2]), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// TaxidSting retrieves the string representation of a taxon node identified by the given ID.
|
// TaxidSting retrieves the string representation of a taxon node identified by the given ID.
|
||||||
@ -92,11 +96,19 @@ func (taxonomy *Taxonomy) Id(taxid string) (string, error) {
|
|||||||
// - A string representing the taxon node in the format "taxonomyCode:id [scientificName]",
|
// - A string representing the taxon node in the format "taxonomyCode:id [scientificName]",
|
||||||
// or an error if the taxon node with the specified ID does not exist in the taxonomy.
|
// or an error if the taxon node with the specified ID does not exist in the taxonomy.
|
||||||
func (taxonomy *Taxonomy) TaxidSting(id string) (string, error) {
|
func (taxonomy *Taxonomy) TaxidSting(id string) (string, error) {
|
||||||
node := taxonomy.nodes.Get(id)
|
pid, err := taxonomy.Id(id)
|
||||||
if node == nil {
|
|
||||||
return "", fmt.Errorf("Taxid %d is part of the taxonomy", id)
|
if err != nil {
|
||||||
|
return "", err
|
||||||
}
|
}
|
||||||
return node.String(taxonomy.code), nil
|
|
||||||
|
taxon := taxonomy.nodes.Get(pid)
|
||||||
|
|
||||||
|
if taxon == nil {
|
||||||
|
return "", fmt.Errorf("taxid %s is not part of the taxonomy", id)
|
||||||
|
}
|
||||||
|
|
||||||
|
return taxon.String(), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Taxon retrieves the Taxon associated with the given taxid string.
|
// Taxon retrieves the Taxon associated with the given taxid string.
|
||||||
@ -113,19 +125,18 @@ func (taxonomy *Taxonomy) Taxon(taxid string) *Taxon {
|
|||||||
id, err := taxonomy.Id(taxid)
|
id, err := taxonomy.Id(taxid)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("Taxid %s is not a valid taxid", taxid)
|
log.Fatalf("Taxid %s: %v", taxid, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
node := taxonomy.nodes.Get(id)
|
taxon := taxonomy.nodes.Get(id)
|
||||||
|
|
||||||
if node == nil {
|
if taxon == nil {
|
||||||
log.Fatalf("Taxid %s is an unknown taxid", taxid)
|
log.Fatalf("Taxid %s is not part of the taxonomy %s",
|
||||||
|
taxid,
|
||||||
|
taxonomy.name)
|
||||||
}
|
}
|
||||||
|
|
||||||
return &Taxon{
|
return taxon
|
||||||
Taxonomy: taxonomy,
|
|
||||||
Node: node,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TaxonSet returns the set of taxon nodes contained within the Taxonomy.
|
// TaxonSet returns the set of taxon nodes contained within the Taxonomy.
|
||||||
@ -133,7 +144,7 @@ func (taxonomy *Taxonomy) Taxon(taxid string) *Taxon {
|
|||||||
//
|
//
|
||||||
// Returns:
|
// Returns:
|
||||||
// - A pointer to the TaxonSet[T] representing the collection of taxon nodes in the taxonomy.
|
// - A pointer to the TaxonSet[T] representing the collection of taxon nodes in the taxonomy.
|
||||||
func (taxonomy *Taxonomy) TaxonSet() *TaxonSet {
|
func (taxonomy *Taxonomy) AsTaxonSet() *TaxonSet {
|
||||||
return taxonomy.nodes
|
return taxonomy.nodes
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -160,13 +171,25 @@ func (taxonomy *Taxonomy) Len() int {
|
|||||||
// - A pointer to the newly created Taxon[T] instance.
|
// - A pointer to the newly created Taxon[T] instance.
|
||||||
// - An error if the taxon cannot be added (e.g., it already exists and replace is false).
|
// - An error if the taxon cannot be added (e.g., it already exists and replace is false).
|
||||||
func (taxonomy *Taxonomy) AddTaxon(taxid, parent string, rank string, isRoot bool, replace bool) (*Taxon, error) {
|
func (taxonomy *Taxonomy) AddTaxon(taxid, parent string, rank string, isRoot bool, replace bool) (*Taxon, error) {
|
||||||
if !replace && taxonomy.nodes.Contains(taxid) {
|
|
||||||
return nil, fmt.Errorf("trying to add taxon %d already present in the taxonomy", taxid)
|
parentid, perr := taxonomy.Id(parent)
|
||||||
|
id, err := taxonomy.Id(taxid)
|
||||||
|
|
||||||
|
if perr != nil {
|
||||||
|
return nil, fmt.Errorf("error in parsing parent taxid %s: %v", parent, perr)
|
||||||
}
|
}
|
||||||
|
|
||||||
rank = taxonomy.ranks.Innerize(rank)
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("error in parsing taxid %s: %v", taxid, err)
|
||||||
|
}
|
||||||
|
|
||||||
n := &TaxNode{taxid, parent, rank, nil, nil}
|
if !replace && taxonomy.nodes.Contains(id) {
|
||||||
|
return nil, fmt.Errorf("trying to add taxon %s already present in the taxonomy", taxid)
|
||||||
|
}
|
||||||
|
|
||||||
|
prank := taxonomy.ranks.Innerize(rank)
|
||||||
|
|
||||||
|
n := &TaxNode{id, parentid, prank, nil, nil}
|
||||||
|
|
||||||
taxonomy.nodes.Insert(n)
|
taxonomy.nodes.Insert(n)
|
||||||
|
|
||||||
@ -197,18 +220,15 @@ func (taxonomy *Taxonomy) AddAlias(newtaxid, oldtaxid string, replace bool) (*Ta
|
|||||||
return nil, fmt.Errorf("trying to add alias %s already present in the taxonomy", newtaxid)
|
return nil, fmt.Errorf("trying to add alias %s already present in the taxonomy", newtaxid)
|
||||||
}
|
}
|
||||||
|
|
||||||
n := taxonomy.nodes.Get(oldid)
|
t := taxonomy.nodes.Get(oldid)
|
||||||
|
|
||||||
if n == nil {
|
if t == nil {
|
||||||
return nil, fmt.Errorf("trying to add alias %s to a taxon that does not exist", oldtaxid)
|
return nil, fmt.Errorf("trying to add alias %s to a taxon that does not exist", oldtaxid)
|
||||||
}
|
}
|
||||||
|
|
||||||
taxonomy.nodes.Alias(newid, n)
|
taxonomy.nodes.Alias(newid, t)
|
||||||
|
|
||||||
return &Taxon{
|
return t, nil
|
||||||
Taxonomy: taxonomy,
|
|
||||||
Node: n,
|
|
||||||
}, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// RankList returns a slice of strings representing the ranks of the taxa
|
// RankList returns a slice of strings representing the ranks of the taxa
|
||||||
@ -221,19 +241,14 @@ func (taxonomy *Taxonomy) RankList() []string {
|
|||||||
return taxonomy.ranks.Slice()
|
return taxonomy.ranks.Slice()
|
||||||
}
|
}
|
||||||
|
|
||||||
// func (taxonomy *Taxonomy) Taxon(taxid int) (*TaxNode, error) {
|
func (taxonomy *Taxonomy) Index() *map[*string]*TaxonSet {
|
||||||
// t, ok := (*taxonomy.nodes)[taxid]
|
|
||||||
|
|
||||||
// if !ok {
|
|
||||||
// a, aok := taxonomy.alias[taxid]
|
|
||||||
// if !aok {
|
|
||||||
// return nil, fmt.Errorf("Taxid %d is not part of the taxonomy", taxid)
|
|
||||||
// }
|
|
||||||
// t = a
|
|
||||||
// }
|
|
||||||
// return t, nil
|
|
||||||
// }
|
|
||||||
|
|
||||||
func (taxonomy *Taxonomy) Index() *map[string]*TaxonSet {
|
|
||||||
return &(taxonomy.index)
|
return &(taxonomy.index)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (taxonomy *Taxonomy) Name() string {
|
||||||
|
return taxonomy.name
|
||||||
|
}
|
||||||
|
|
||||||
|
func (taxonomy *Taxonomy) Code() string {
|
||||||
|
return taxonomy.code
|
||||||
|
}
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
// Package obitax provides functionality for managing taxonomic data structures.
|
||||||
package obitax
|
package obitax
|
||||||
|
|
||||||
import log "github.com/sirupsen/logrus"
|
import log "github.com/sirupsen/logrus"
|
||||||
@ -7,25 +8,46 @@ import log "github.com/sirupsen/logrus"
|
|||||||
// as well as a reference to the associated Taxonomy.
|
// as well as a reference to the associated Taxonomy.
|
||||||
//
|
//
|
||||||
// Fields:
|
// Fields:
|
||||||
// - set: A map that associates taxon identifiers of type T with their corresponding TaxNode[T] instances.
|
// - set: A map that associates taxon identifiers of type *string with their corresponding TaxNode instances.
|
||||||
// - taxonomy: A pointer to the Taxonomy[T] instance that this TaxonSet belongs to.
|
// - nalias: The number of aliases in the TaxonSet.
|
||||||
|
// - taxonomy: A pointer to the Taxonomy instance that this TaxonSet belongs to.
|
||||||
type TaxonSet struct {
|
type TaxonSet struct {
|
||||||
set map[string]*TaxNode
|
set map[*string]*TaxNode
|
||||||
nalias int
|
nalias int
|
||||||
taxonomy *Taxonomy
|
taxonomy *Taxonomy
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get retrieves the TaxNode[T] associated with the specified taxon identifier.
|
func (taxonomy *Taxonomy) NewTaxonSet() *TaxonSet {
|
||||||
|
return &TaxonSet{
|
||||||
|
set: make(map[*string]*TaxNode),
|
||||||
|
nalias: 0,
|
||||||
|
taxonomy: taxonomy,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get retrieves the TaxNode associated with the specified taxon identifier.
|
||||||
// It returns the TaxNode if it exists in the TaxonSet; otherwise, it returns nil.
|
// It returns the TaxNode if it exists in the TaxonSet; otherwise, it returns nil.
|
||||||
//
|
//
|
||||||
// Parameters:
|
// Parameters:
|
||||||
// - i: The taxon identifier of type T for which the TaxNode is to be retrieved.
|
// - id: A pointer to the taxon identifier for which the TaxNode is to be retrieved.
|
||||||
//
|
//
|
||||||
// Returns:
|
// Returns:
|
||||||
// - A pointer to the TaxNode[T] associated with the provided identifier, or nil
|
// - A pointer to the TaxNode associated with the provided identifier, or nil
|
||||||
// if no such taxon exists in the set.
|
// if no such taxon exists in the set.
|
||||||
func (set *TaxonSet) Get(i string) *TaxNode {
|
func (set *TaxonSet) Get(id *string) *Taxon {
|
||||||
return set.set[i]
|
if set == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
node := set.set[id]
|
||||||
|
if node == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return &Taxon{
|
||||||
|
Taxonomy: set.taxonomy,
|
||||||
|
Node: set.set[id],
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Len returns the number of unique taxa in the TaxonSet.
|
// Len returns the number of unique taxa in the TaxonSet.
|
||||||
@ -38,27 +60,37 @@ func (set *TaxonSet) Len() int {
|
|||||||
return len(set.set) - set.nalias
|
return len(set.set) - set.nalias
|
||||||
}
|
}
|
||||||
|
|
||||||
// Insert adds a TaxNode[T] to the TaxonSet. If a taxon with the same identifier
|
// Insert adds a TaxNode to the TaxonSet. If a taxon with the same identifier
|
||||||
// already exists in the set, it updates the reference. If the existing taxon was
|
// already exists in the set, it updates the reference. If the existing taxon was
|
||||||
// an alias, its alias count is decremented.
|
// an alias, its alias count is decremented.
|
||||||
//
|
//
|
||||||
// Parameters:
|
// Parameters:
|
||||||
// - taxon: A pointer to the TaxNode[T] instance to be added to the TaxonSet.
|
// - taxon: A pointer to the TaxNode instance to be added to the TaxonSet.
|
||||||
//
|
//
|
||||||
// Behavior:
|
// Behavior:
|
||||||
// - If a taxon with the same identifier already exists and is different from the
|
// - If a taxon with the same identifier already exists and is different from the
|
||||||
// new taxon, the alias count is decremented.
|
// new taxon, the alias count is decremented.
|
||||||
func (set *TaxonSet) Insert(taxon *TaxNode) {
|
func (set *TaxonSet) Insert(node *TaxNode) {
|
||||||
if old := set.set[taxon.id]; old != nil && old.id != taxon.id {
|
if old := set.set[node.id]; old != nil && old.id != node.id {
|
||||||
set.nalias--
|
set.nalias--
|
||||||
}
|
}
|
||||||
set.set[taxon.id] = taxon
|
set.set[node.id] = node
|
||||||
}
|
}
|
||||||
|
|
||||||
// Taxonomy returns a pointer to the Taxonomy[T] instance that this TaxonSet belongs to.
|
func (set *TaxonSet) InsertTaxon(taxon *Taxon) {
|
||||||
|
if set.taxonomy != taxon.Taxonomy {
|
||||||
|
log.Fatalf(
|
||||||
|
"Cannot insert taxon %s into taxon set belonging %s taxonomy",
|
||||||
|
taxon.String(),
|
||||||
|
set.taxonomy.name,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Taxonomy returns a pointer to the Taxonomy instance that this TaxonSet belongs to.
|
||||||
//
|
//
|
||||||
// Returns:
|
// Returns:
|
||||||
// - A pointer to the Taxonomy[T] instance that this TaxonSet belongs to
|
// - A pointer to the Taxonomy instance that this TaxonSet belongs to.
|
||||||
func (set *TaxonSet) Taxonomy() *Taxonomy {
|
func (set *TaxonSet) Taxonomy() *Taxonomy {
|
||||||
return set.taxonomy
|
return set.taxonomy
|
||||||
}
|
}
|
||||||
@ -68,18 +100,18 @@ func (set *TaxonSet) Taxonomy() *Taxonomy {
|
|||||||
// If the original taxon is not part of the taxon set, it logs a fatal error and terminates the program.
|
// If the original taxon is not part of the taxon set, it logs a fatal error and terminates the program.
|
||||||
//
|
//
|
||||||
// Parameters:
|
// Parameters:
|
||||||
// - alias: A string representing the alias to be associated with the taxon node.
|
// - alias: A pointer to a string representing the alias to be associated with the taxon node.
|
||||||
// - node: A pointer to the TaxNode[T] instance that the alias will refer to.
|
// - node: A pointer to the TaxNode instance that the alias will refer to.
|
||||||
//
|
//
|
||||||
// Behavior:
|
// Behavior:
|
||||||
// - If the original taxon corresponding to the alias is not part of the taxon set,
|
// - If the original taxon corresponding to the alias is not part of the taxon set,
|
||||||
// the method will log a fatal error and terminate the program.
|
// the method will log a fatal error and terminate the program.
|
||||||
func (set *TaxonSet) Alias(id string, node *TaxNode) {
|
func (set *TaxonSet) Alias(id *string, taxon *Taxon) {
|
||||||
original := set.Get(node.id)
|
original := set.Get(taxon.Node.id)
|
||||||
if original != nil {
|
if original == nil {
|
||||||
log.Fatalf("Original taxon %v is not part of taxon set", id)
|
log.Fatalf("Original taxon %v is not part of taxon set", id)
|
||||||
}
|
}
|
||||||
set.set[id] = node
|
set.set[id] = taxon.Node
|
||||||
set.nalias++
|
set.nalias++
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -88,39 +120,39 @@ func (set *TaxonSet) Alias(id string, node *TaxNode) {
|
|||||||
// node exists and its identifier is different from the provided identifier; otherwise, it returns false.
|
// node exists and its identifier is different from the provided identifier; otherwise, it returns false.
|
||||||
//
|
//
|
||||||
// Parameters:
|
// Parameters:
|
||||||
// - id: The identifier of type T to be checked for alias status.
|
// - id: A pointer to the identifier to be checked for alias status.
|
||||||
//
|
//
|
||||||
// Returns:
|
// Returns:
|
||||||
// - A boolean indicating whether the identifier corresponds to an alias in the set.
|
// - A boolean indicating whether the identifier corresponds to an alias in the set.
|
||||||
func (set *TaxonSet) IsAlias(id string) bool {
|
func (set *TaxonSet) IsAlias(id *string) bool {
|
||||||
node := set.Get(id)
|
taxon := set.Get(id)
|
||||||
return node != nil && node.id != id
|
return taxon != nil && taxon.Node.id != id
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsATaxon checks if the given ID corresponds to a valid taxon node in the TaxonSet.
|
// IsATaxon checks if the given ID corresponds to a valid taxon node in the TaxonSet.
|
||||||
// It returns true if the node exists and its ID matches the provided ID; otherwise, it returns false.
|
// It returns true if the node exists and its ID matches the provided ID; otherwise, it returns false.
|
||||||
// id corresponding to alias returns false.
|
// If the ID corresponds to an alias, it will return false.
|
||||||
//
|
//
|
||||||
// Parameters:
|
// Parameters:
|
||||||
// - id: The identifier of the taxon to check.
|
// - id: A pointer to the identifier of the taxon to check.
|
||||||
//
|
//
|
||||||
// Returns:
|
// Returns:
|
||||||
// - A boolean indicating whether the specified ID corresponds to a valid taxon node.
|
// - A boolean indicating whether the specified ID corresponds to a valid taxon node.
|
||||||
func (set *TaxonSet) IsATaxon(id string) bool {
|
func (set *TaxonSet) IsATaxon(id *string) bool {
|
||||||
node := set.Get(id)
|
taxon := set.Get(id)
|
||||||
return node != nil && node.id == id
|
return taxon != nil && taxon.Node.id == id
|
||||||
}
|
}
|
||||||
|
|
||||||
// Contains checks if the TaxonSet contains a taxon node with the specified ID.
|
// Contains checks if the TaxonSet contains a taxon node with the specified ID.
|
||||||
// It returns true if the node exists in the set; otherwise, it returns false.
|
// It returns true if the node exists in the set; otherwise, it returns false.
|
||||||
// id corresponding to alias or true taxa returns true.
|
// If the ID corresponds to an alias, it will return true if the alias exists.
|
||||||
//
|
//
|
||||||
// Parameters:
|
// Parameters:
|
||||||
// - id: The identifier of the taxon to check for presence in the set.
|
// - id: A pointer to the identifier of the taxon to check for presence in the set.
|
||||||
//
|
//
|
||||||
// Returns:
|
// Returns:
|
||||||
// - A boolean indicating whether the TaxonSet contains a taxon node with the specified ID.
|
// - A boolean indicating whether the TaxonSet contains a taxon node with the specified ID.
|
||||||
func (set *TaxonSet) Contains(id string) bool {
|
func (set *TaxonSet) Contains(id *string) bool {
|
||||||
node := set.Get(id)
|
node := set.Get(id)
|
||||||
return node != nil
|
return node != nil
|
||||||
}
|
}
|
||||||
|
@ -3,6 +3,8 @@ package obitax
|
|||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
)
|
)
|
||||||
|
|
||||||
// TaxonSlice represents a slice of TaxNode[T] instances within a taxonomy.
|
// TaxonSlice represents a slice of TaxNode[T] instances within a taxonomy.
|
||||||
@ -16,6 +18,13 @@ type TaxonSlice struct {
|
|||||||
taxonomy *Taxonomy
|
taxonomy *Taxonomy
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (taxonomy *Taxonomy) NewTaxonSlice(size, capacity int) *TaxonSlice {
|
||||||
|
return &TaxonSlice{
|
||||||
|
slice: make([]*TaxNode, size, capacity),
|
||||||
|
taxonomy: taxonomy,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Get retrieves the TaxNode[T] at the specified index from the TaxonSlice.
|
// Get retrieves the TaxNode[T] at the specified index from the TaxonSlice.
|
||||||
// It returns the taxon node corresponding to the provided index.
|
// It returns the taxon node corresponding to the provided index.
|
||||||
//
|
//
|
||||||
@ -25,6 +34,9 @@ type TaxonSlice struct {
|
|||||||
// Returns:
|
// Returns:
|
||||||
// - A pointer to the TaxNode[T] at the specified index in the slice.
|
// - A pointer to the TaxNode[T] at the specified index in the slice.
|
||||||
func (slice *TaxonSlice) Get(i int) *TaxNode {
|
func (slice *TaxonSlice) Get(i int) *TaxNode {
|
||||||
|
if slice == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
return slice.slice[i]
|
return slice.slice[i]
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -34,6 +46,9 @@ func (slice *TaxonSlice) Get(i int) *TaxNode {
|
|||||||
// Returns:
|
// Returns:
|
||||||
// - An integer representing the total number of taxon nodes in the TaxonSlice.
|
// - An integer representing the total number of taxon nodes in the TaxonSlice.
|
||||||
func (slice *TaxonSlice) Len() int {
|
func (slice *TaxonSlice) Len() int {
|
||||||
|
if slice == nil {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
return len(slice.slice)
|
return len(slice.slice)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -65,3 +80,19 @@ func (path *TaxonSlice) String() string {
|
|||||||
|
|
||||||
return buffer.String()
|
return buffer.String()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (slice *TaxonSlice) Reverse(inplace bool) *TaxonSlice {
|
||||||
|
if slice == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
rep := obiutils.Reverse(slice.slice, inplace)
|
||||||
|
if inplace {
|
||||||
|
return slice
|
||||||
|
}
|
||||||
|
|
||||||
|
return &TaxonSlice{
|
||||||
|
taxonomy: slice.taxonomy,
|
||||||
|
slice: rep,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -7,13 +7,13 @@ import (
|
|||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||||
)
|
)
|
||||||
|
|
||||||
func IFilterRankRestriction() func(*obitax.ITaxonSet) *obitax.ITaxonSet {
|
func IFilterRankRestriction() func(*obitax.ITaxon) *obitax.ITaxon {
|
||||||
f := func(s *obitax.ITaxonSet) *obitax.ITaxonSet {
|
f := func(s *obitax.ITaxon) *obitax.ITaxon {
|
||||||
return s
|
return s
|
||||||
}
|
}
|
||||||
|
|
||||||
if __restrict_rank__ != "" {
|
if __restrict_rank__ != "" {
|
||||||
f = func(s *obitax.ITaxonSet) *obitax.ITaxonSet {
|
f = func(s *obitax.ITaxon) *obitax.ITaxon {
|
||||||
return s.IFilterOnTaxRank(__restrict_rank__)
|
return s.IFilterOnTaxRank(__restrict_rank__)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -21,21 +21,21 @@ func IFilterRankRestriction() func(*obitax.ITaxonSet) *obitax.ITaxonSet {
|
|||||||
return f
|
return f
|
||||||
}
|
}
|
||||||
|
|
||||||
func ITaxonNameMatcher() (func(string) *obitax.ITaxonSet, error) {
|
func ITaxonNameMatcher() (func(string) *obitax.ITaxon, error) {
|
||||||
taxonomy, err := CLILoadSelectedTaxonomy()
|
taxonomy, err := CLILoadSelectedTaxonomy()
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
fun := func(name string) *obitax.ITaxonSet {
|
fun := func(name string) *obitax.ITaxon {
|
||||||
return taxonomy.IFilterOnName(name, __fixed_pattern__)
|
return taxonomy.IFilterOnName(name, __fixed_pattern__)
|
||||||
}
|
}
|
||||||
|
|
||||||
return fun, nil
|
return fun, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func ITaxonRestrictions() (func(*obitax.ITaxonSet) *obitax.ITaxonSet, error) {
|
func ITaxonRestrictions() (func(*obitax.ITaxon) *obitax.ITaxon, error) {
|
||||||
|
|
||||||
clades, err := CLITaxonomicalRestrictions()
|
clades, err := CLITaxonomicalRestrictions()
|
||||||
|
|
||||||
@ -45,23 +45,19 @@ func ITaxonRestrictions() (func(*obitax.ITaxonSet) *obitax.ITaxonSet, error) {
|
|||||||
|
|
||||||
rankfilter := IFilterRankRestriction()
|
rankfilter := IFilterRankRestriction()
|
||||||
|
|
||||||
fun := func(iterator *obitax.ITaxonSet) *obitax.ITaxonSet {
|
fun := func(iterator *obitax.ITaxon) *obitax.ITaxon {
|
||||||
return rankfilter(iterator).IFilterBelongingSubclades(clades)
|
return rankfilter(iterator).IFilterBelongingSubclades(clades)
|
||||||
}
|
}
|
||||||
|
|
||||||
return fun, nil
|
return fun, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func TaxonAsString(taxon *obitax.TaxNode, pattern string) string {
|
func TaxonAsString(taxon *obitax.Taxon, pattern string) string {
|
||||||
text := taxon.ScientificName()
|
text := taxon.ScientificName()
|
||||||
|
|
||||||
if __with_path__ {
|
if __with_path__ {
|
||||||
var bf bytes.Buffer
|
var bf bytes.Buffer
|
||||||
path, err := taxon.Path()
|
path := taxon.Path()
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
fmt.Printf("%+v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
bf.WriteString(path.Get(path.Len() - 1).ScientificName())
|
bf.WriteString(path.Get(path.Len() - 1).ScientificName())
|
||||||
|
|
||||||
@ -72,15 +68,15 @@ func TaxonAsString(taxon *obitax.TaxNode, pattern string) string {
|
|||||||
text = bf.String()
|
text = bf.String()
|
||||||
}
|
}
|
||||||
|
|
||||||
return fmt.Sprintf("%-20s | %10d | %10d | %-20s | %s",
|
return fmt.Sprintf("%-20s | %10s | %10s | %-20s | %s",
|
||||||
pattern,
|
pattern,
|
||||||
taxon.Taxid(),
|
taxon.String(),
|
||||||
taxon.Parent().Taxid(),
|
taxon.Parent().String(),
|
||||||
taxon.Rank(),
|
taxon.Rank(),
|
||||||
text)
|
text)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TaxonWriter(itaxa *obitax.ITaxonSet, pattern string) {
|
func TaxonWriter(itaxa *obitax.ITaxon, pattern string) {
|
||||||
for itaxa.Next() {
|
for itaxa.Next() {
|
||||||
fmt.Println(TaxonAsString(itaxa.Get(), pattern))
|
fmt.Println(TaxonAsString(itaxa.Get(), pattern))
|
||||||
}
|
}
|
||||||
|
@ -12,12 +12,12 @@ var __taxdump__ = ""
|
|||||||
var __alternative_name__ = false
|
var __alternative_name__ = false
|
||||||
var __rank_list__ = false
|
var __rank_list__ = false
|
||||||
var __selected_taxonomy__ = (*obitax.Taxonomy)(nil)
|
var __selected_taxonomy__ = (*obitax.Taxonomy)(nil)
|
||||||
var __taxonomical_restriction__ = make([]int, 0)
|
var __taxonomical_restriction__ = make([]string, 0)
|
||||||
|
|
||||||
var __fixed_pattern__ = false
|
var __fixed_pattern__ = false
|
||||||
var __with_path__ = false
|
var __with_path__ = false
|
||||||
var __taxid_path__ = -1
|
var __taxid_path__ = "NA"
|
||||||
var __taxid_sons__ = -1
|
var __taxid_sons__ = "NA"
|
||||||
var __restrict_rank__ = ""
|
var __restrict_rank__ = ""
|
||||||
|
|
||||||
func LoadTaxonomyOptionSet(options *getoptions.GetOpt, required, alternatiive bool) {
|
func LoadTaxonomyOptionSet(options *getoptions.GetOpt, required, alternatiive bool) {
|
||||||
@ -43,7 +43,7 @@ func FilterTaxonomyOptionSet(options *getoptions.GetOpt) {
|
|||||||
options.Alias("l"),
|
options.Alias("l"),
|
||||||
options.Description("List every taxonomic rank available in the taxonomy."))
|
options.Description("List every taxonomic rank available in the taxonomy."))
|
||||||
|
|
||||||
options.IntSliceVar(&__taxonomical_restriction__, "restrict-to-taxon", 1, 1,
|
options.StringSliceVar(&__taxonomical_restriction__, "restrict-to-taxon", 1, 1,
|
||||||
options.Alias("r"),
|
options.Alias("r"),
|
||||||
options.Description("Restrict output to some subclades."))
|
options.Description("Restrict output to some subclades."))
|
||||||
}
|
}
|
||||||
@ -67,18 +67,18 @@ func CLITaxonomicalRestrictions() (*obitax.TaxonSet, error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
ts := make(obitax.TaxonSet)
|
ts := taxonomy.NewTaxonSet()
|
||||||
for _, taxid := range __taxonomical_restriction__ {
|
for _, taxid := range __taxonomical_restriction__ {
|
||||||
tx, err := taxonomy.Taxon(taxid)
|
tx := taxonomy.Taxon(taxid)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
ts.Inserts(tx)
|
ts.InsertTaxon(tx)
|
||||||
}
|
}
|
||||||
|
|
||||||
return &ts, nil
|
return ts, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func CLILoadSelectedTaxonomy() (*obitax.Taxonomy, error) {
|
func CLILoadSelectedTaxonomy() (*obitax.Taxonomy, error) {
|
||||||
@ -106,17 +106,17 @@ func OptionSet(options *getoptions.GetOpt) {
|
|||||||
options.BoolVar(&__with_path__, "with-path", false,
|
options.BoolVar(&__with_path__, "with-path", false,
|
||||||
options.Alias("P"),
|
options.Alias("P"),
|
||||||
options.Description("Adds a column containing the full path for each displayed taxon."))
|
options.Description("Adds a column containing the full path for each displayed taxon."))
|
||||||
options.IntVar(&__taxid_path__, "parents", -1,
|
options.StringVar(&__taxid_path__, "parents", "NA",
|
||||||
options.Alias("p"),
|
options.Alias("p"),
|
||||||
options.Description("Displays every parental tree's information for the provided taxid."))
|
options.Description("Displays every parental tree's information for the provided taxid."))
|
||||||
options.StringVar(&__restrict_rank__, "rank", "",
|
options.StringVar(&__restrict_rank__, "rank", "",
|
||||||
options.Description("Restrict to the given taxonomic rank."))
|
options.Description("Restrict to the given taxonomic rank."))
|
||||||
}
|
}
|
||||||
|
|
||||||
func CLIRequestsPathForTaxid() int {
|
func CLIRequestsPathForTaxid() string {
|
||||||
return __taxid_path__
|
return __taxid_path__
|
||||||
}
|
}
|
||||||
|
|
||||||
func CLIRequestsSonsForTaxid() int {
|
func CLIRequestsSonsForTaxid() string {
|
||||||
return __taxid_sons__
|
return __taxid_sons__
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user