mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Changes to be committed:
modified: cmd/obitools/obitag/main.go modified: cmd/obitools/obitag2/main.go modified: go.mod modified: go.sum modified: pkg/obiformats/ncbitaxdump/read.go modified: pkg/obioptions/version.go modified: pkg/obiseq/attributes.go modified: pkg/obiseq/taxonomy_lca.go modified: pkg/obiseq/taxonomy_methods.go modified: pkg/obiseq/taxonomy_predicate.go modified: pkg/obitax/inner.go modified: pkg/obitax/lca.go new file: pkg/obitax/taxid.go modified: pkg/obitax/taxon.go modified: pkg/obitax/taxonomy.go modified: pkg/obitax/taxonslice.go modified: pkg/obitools/obicleandb/obicleandb.go modified: pkg/obitools/obigrep/options.go modified: pkg/obitools/obilandmark/obilandmark.go modified: pkg/obitools/obilandmark/options.go modified: pkg/obitools/obirefidx/famlilyindexing.go modified: pkg/obitools/obirefidx/geomindexing.go modified: pkg/obitools/obirefidx/obirefidx.go modified: pkg/obitools/obirefidx/options.go modified: pkg/obitools/obitag/obigeomtag.go modified: pkg/obitools/obitag/obitag.go modified: pkg/obitools/obitag/options.go modified: pkg/obiutils/strings.go
This commit is contained in:
@ -1,6 +1,9 @@
|
||||
package obitax
|
||||
|
||||
import "sync"
|
||||
import (
|
||||
"strings"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// InnerString is a struct that holds a map of strings and a read-write lock for concurrent access.
|
||||
// The index map is used to store key-value pairs of strings.
|
||||
@ -31,10 +34,10 @@ func (i *InnerString) Innerize(value string) *string {
|
||||
defer i.lock.Unlock()
|
||||
s, ok := i.index[value]
|
||||
if !ok {
|
||||
value = strings.Clone(value)
|
||||
s = &value
|
||||
i.index[value] = s
|
||||
}
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
|
@ -16,11 +16,24 @@ import (
|
||||
// if either of the taxa is nil, if they are not in the same taxonomy, or
|
||||
// if the taxonomy is unrooted.
|
||||
func (t1 *Taxon) LCA(t2 *Taxon) (*Taxon, error) {
|
||||
if t1 == nil || t1.Node == nil {
|
||||
return nil, fmt.Errorf("try to get LCA of nil taxon")
|
||||
if t1 == nil && t2 != nil {
|
||||
return t2, nil
|
||||
}
|
||||
|
||||
if t2 == nil || t2.Node == nil {
|
||||
if t2 == nil && t1 != nil {
|
||||
|
||||
return t1, nil
|
||||
}
|
||||
|
||||
if t1 == nil && t2 == nil {
|
||||
return nil, fmt.Errorf("try to get LCA of nil taxa")
|
||||
}
|
||||
|
||||
if t1.Node == nil {
|
||||
return nil, fmt.Errorf("try to get LCA of nil taxa")
|
||||
}
|
||||
|
||||
if t2.Node == nil {
|
||||
return nil, fmt.Errorf("try to get LCA of nil taxon")
|
||||
}
|
||||
|
||||
|
60
pkg/obitax/taxid.go
Normal file
60
pkg/obitax/taxid.go
Normal file
@ -0,0 +1,60 @@
|
||||
package obitax
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||
)
|
||||
|
||||
// Taxid represents a taxonomic identifier as a pointer to a string.
|
||||
type Taxid *string
|
||||
|
||||
// TaxidFactory is a factory for creating Taxid instances from strings and integers.
|
||||
type TaxidFactory struct {
|
||||
inner *InnerString
|
||||
code string
|
||||
alphabet obiutils.AsciiSet
|
||||
}
|
||||
|
||||
// NewTaxidFactory creates and returns a new instance of TaxidFactory.
|
||||
func NewTaxidFactory(code string, alphabet obiutils.AsciiSet) *TaxidFactory {
|
||||
return &TaxidFactory{
|
||||
inner: NewInnerString(),
|
||||
code: code + ":",
|
||||
alphabet: alphabet,
|
||||
}
|
||||
// Initialize and return a new TaxidFactory.
|
||||
}
|
||||
|
||||
// FromString converts a string representation of a taxonomic identifier into a Taxid.
|
||||
// It extracts the relevant part of the string after the first colon (':') if present.
|
||||
func (f *TaxidFactory) FromString(taxid string) (Taxid, error) {
|
||||
taxid = obiutils.AsciiSpaceSet.TrimLeft(taxid)
|
||||
part1, part2 := obiutils.SplitInTwo(taxid, ':')
|
||||
if len(part2) == 0 {
|
||||
taxid = part1
|
||||
} else {
|
||||
if part1 != f.code {
|
||||
return nil, fmt.Errorf("taxid %s string does not start with taxonomy code %s", taxid, f.code)
|
||||
}
|
||||
taxid = part2
|
||||
}
|
||||
|
||||
taxid, err := f.alphabet.FirstWord(taxid) // Get the first word from the input string.
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Return a new Taxid by innerizing the extracted taxid string.
|
||||
rep := Taxid(f.inner.Innerize(taxid))
|
||||
return rep, nil
|
||||
}
|
||||
|
||||
// FromInt converts an integer taxonomic identifier into a Taxid.
|
||||
// It first converts the integer to a string and then innerizes it.
|
||||
func (f *TaxidFactory) FromInt(taxid int) (Taxid, error) {
|
||||
s := strconv.Itoa(taxid) // Convert the integer to a string.
|
||||
return f.inner.Innerize(s), nil // Return a new Taxid by innerizing the string.
|
||||
}
|
@ -179,9 +179,9 @@ func (taxon *Taxon) IPath() iter.Seq[*Taxon] {
|
||||
}
|
||||
}
|
||||
|
||||
// Path returns a slice of TaxNode representing the path from the current Taxon
|
||||
// to the root Taxon in the associated Taxonomy. It collects all the nodes in the path
|
||||
// using the IPath method and returns them as a TaxonSlice.
|
||||
// Path returns a slice of TaxNode representing the path from the current Taxon.
|
||||
// The first element of the slice is the current Taxon, and the last element is the
|
||||
// to the root Taxon in the associated Taxonomy.
|
||||
//
|
||||
// Returns:
|
||||
// - A pointer to a TaxonSlice containing the TaxNode instances in the path
|
||||
@ -371,3 +371,11 @@ func (taxon *Taxon) MetadataStringValues() []string {
|
||||
}
|
||||
return values
|
||||
}
|
||||
|
||||
func (taxon *Taxon) SameAs(other *Taxon) bool {
|
||||
if taxon == nil || other == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
return taxon.Taxonomy == other.Taxonomy && taxon.Node.id == other.Node.id
|
||||
}
|
||||
|
@ -8,9 +8,10 @@ and retrieving information about taxa.
|
||||
package obitax
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"regexp"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
@ -32,13 +33,12 @@ import (
|
||||
type Taxonomy struct {
|
||||
name string
|
||||
code string
|
||||
ids *InnerString
|
||||
ids *TaxidFactory
|
||||
ranks *InnerString
|
||||
nameclasses *InnerString
|
||||
names *InnerString
|
||||
nodes *TaxonSet
|
||||
root *TaxNode
|
||||
matcher *regexp.Regexp
|
||||
index map[*string]*TaxonSet
|
||||
}
|
||||
|
||||
@ -52,21 +52,18 @@ type Taxonomy struct {
|
||||
//
|
||||
// Returns:
|
||||
// - A pointer to the newly created Taxonomy instance.
|
||||
func NewTaxonomy(name, code, codeCharacters string) *Taxonomy {
|
||||
func NewTaxonomy(name, code string, codeCharacters obiutils.AsciiSet) *Taxonomy {
|
||||
set := make(map[*string]*TaxNode)
|
||||
|
||||
matcher := regexp.MustCompile(fmt.Sprintf("^[[:blank:]]*(%s:)?(%s+)", code, codeCharacters))
|
||||
|
||||
taxonomy := &Taxonomy{
|
||||
name: name,
|
||||
code: code,
|
||||
ids: NewInnerString(),
|
||||
ids: NewTaxidFactory(code, codeCharacters),
|
||||
ranks: NewInnerString(),
|
||||
nameclasses: NewInnerString(),
|
||||
names: NewInnerString(),
|
||||
nodes: &TaxonSet{set: set},
|
||||
root: nil,
|
||||
matcher: matcher,
|
||||
index: make(map[*string]*TaxonSet),
|
||||
}
|
||||
|
||||
@ -85,23 +82,17 @@ func NewTaxonomy(name, code, codeCharacters string) *Taxonomy {
|
||||
// Returns:
|
||||
// - The taxon identifier as a *string corresponding to the provided taxid.
|
||||
// - An error if the taxid is not valid or cannot be converted.
|
||||
func (taxonomy *Taxonomy) Id(taxid string) (*string, error) {
|
||||
func (taxonomy *Taxonomy) Id(taxid string) (Taxid, error) {
|
||||
taxonomy = taxonomy.OrDefault(false)
|
||||
|
||||
if taxonomy == nil {
|
||||
return nil, fmt.Errorf("Cannot extract Id from nil Taxonomy")
|
||||
return nil, errors.New("Cannot extract Id from nil Taxonomy")
|
||||
}
|
||||
|
||||
matches := taxonomy.matcher.FindStringSubmatch(taxid)
|
||||
|
||||
if matches == nil {
|
||||
return nil, fmt.Errorf("taxid %s is not a valid taxid", taxid)
|
||||
}
|
||||
|
||||
return taxonomy.ids.Innerize(matches[2]), nil
|
||||
return taxonomy.ids.FromString(taxid)
|
||||
}
|
||||
|
||||
// TaxidSting retrieves the string representation of a taxon node identified by the given ID.
|
||||
// TaxidString retrieves the string representation of a taxon node identified by the given ID.
|
||||
// It looks up the node in the taxonomy and returns its formatted string representation
|
||||
// along with the taxonomy code. If the node does not exist, it returns an error.
|
||||
//
|
||||
@ -111,7 +102,7 @@ func (taxonomy *Taxonomy) Id(taxid string) (*string, error) {
|
||||
// Returns:
|
||||
// - A string representing the taxon node in the format "taxonomyCode:id [scientificName]",
|
||||
// or an error if the taxon node with the specified ID does not exist in the taxonomy.
|
||||
func (taxonomy *Taxonomy) TaxidSting(id string) (string, error) {
|
||||
func (taxonomy *Taxonomy) TaxidString(id string) (string, error) {
|
||||
taxonomy = taxonomy.OrDefault(false)
|
||||
|
||||
pid, err := taxonomy.Id(id)
|
||||
|
@ -13,6 +13,7 @@ package obitax
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"log"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||
)
|
||||
@ -59,6 +60,16 @@ func (slice *TaxonSlice) Get(i int) *TaxNode {
|
||||
return slice.slice[i]
|
||||
}
|
||||
|
||||
func (slice *TaxonSlice) Taxon(i int) *Taxon {
|
||||
if slice == nil {
|
||||
return nil
|
||||
}
|
||||
return &Taxon{
|
||||
Node: slice.slice[i],
|
||||
Taxonomy: slice.taxonomy,
|
||||
}
|
||||
}
|
||||
|
||||
// Len returns the number of TaxNode instances in the TaxonSlice.
|
||||
// It provides the count of taxon nodes contained within the slice.
|
||||
//
|
||||
@ -124,3 +135,13 @@ func (slice *TaxonSlice) Reverse(inplace bool) *TaxonSlice {
|
||||
slice: rep,
|
||||
}
|
||||
}
|
||||
|
||||
func (slice *TaxonSlice) Set(index int, taxon *Taxon) *TaxonSlice {
|
||||
if slice.taxonomy != taxon.Taxonomy {
|
||||
log.Panic("Cannot add taxon from a different taxonomy")
|
||||
}
|
||||
|
||||
slice.slice[index] = taxon.Node
|
||||
|
||||
return slice
|
||||
}
|
||||
|
Reference in New Issue
Block a user