Changes to be committed:

modified:   .gitignore
	new file:   pkg/obitax/default_taxonomy.go
	modified:   pkg/obitax/taxon.go
	modified:   pkg/obitax/taxonnode.go
	modified:   pkg/obitax/taxonomy.go
	modified:   pkg/obitax/taxonset.go
	modified:   pkg/obitax/taxonslice.go
	modified:   pkg/obitools/obifind/iterator.go
	modified:   pkg/obitools/obifind/options.go
This commit is contained in:
Eric Coissac
2024-11-16 10:01:49 +01:00
parent f3d8707c08
commit 36327c79c8
9 changed files with 153 additions and 87 deletions

9
.gitignore vendored
View File

@ -118,3 +118,12 @@ doc/book/wolf_data/Release-253/ncbitaxo/readme.txt
doc/book/results/toto.tasta
sample/.DS_Store
GO
ncbitaxo/citations.dmp
ncbitaxo/delnodes.dmp
ncbitaxo/division.dmp
ncbitaxo/gc.prt
ncbitaxo/gencode.dmp
ncbitaxo/merged.dmp
ncbitaxo/names.dmp
ncbitaxo/nodes.dmp
ncbitaxo/readme.txt

View File

@ -0,0 +1,29 @@
package obitax
import "log"
var __defaut_taxonomy__ *Taxonomy
func (taxonomy *Taxonomy) SetAsDefault() {
__defaut_taxonomy__ = taxonomy
}
func (taxonomy *Taxonomy) OrDefault(panicOnNil bool) *Taxonomy {
if taxonomy == nil {
return __defaut_taxonomy__
}
if panicOnNil && taxonomy == nil {
log.Panic("Cannot deal with nil taxonomy")
}
return taxonomy
}
func IsDefaultTaxonomyDefined() bool {
return __defaut_taxonomy__ != nil
}
func DefaultTaxonomy() *Taxonomy {
return __defaut_taxonomy__
}

View File

@ -30,6 +30,10 @@ func (taxon *Taxon) String() string {
return taxon.Node.String(taxon.Taxonomy.code)
}
func (taxon *Taxon) HasScientificName() bool {
return taxon != nil && taxon.Node.HasScientificName()
}
// ScientificName returns the scientific name of the Taxon.
// It retrieves the scientific name from the underlying TaxNode associated with the taxon.
//

View File

@ -35,10 +35,17 @@ type TaxNode struct {
// Returns:
// - A formatted string representing the TaxNode in the form "taxonomyCode:id [scientificName]".
func (node *TaxNode) String(taxonomyCode string) string {
return fmt.Sprintf("%s:%v [%s]",
if node.HasScientificName() {
return fmt.Sprintf("%s:%v [%s]",
taxonomyCode,
*node.id,
node.ScientificName())
}
return fmt.Sprintf("%s:%v",
taxonomyCode,
*node.id,
node.ScientificName())
*node.id)
}
// Id returns the unique identifier of the TaxNode.
@ -59,6 +66,10 @@ func (node *TaxNode) ParentId() *string {
return node.parent
}
func (node *TaxNode) HasScientificName() bool {
return node != nil && node.scientificname != nil
}
// ScientificName returns the scientific name of the TaxNode.
// It dereferences the pointer to the scientific name string associated with the taxon node.
//

View File

@ -86,6 +86,12 @@ func NewTaxonomy(name, code, codeCharacters string) *Taxonomy {
// - The taxon identifier as a *string corresponding to the provided taxid.
// - An error if the taxid is not valid or cannot be converted.
func (taxonomy *Taxonomy) Id(taxid string) (*string, error) {
taxonomy = taxonomy.OrDefault(false)
if taxonomy == nil {
return nil, fmt.Errorf("Cannot extract Id from nil Taxonomy")
}
matches := taxonomy.matcher.FindStringSubmatch(taxid)
if matches == nil {
@ -106,6 +112,8 @@ func (taxonomy *Taxonomy) Id(taxid string) (*string, error) {
// - A string representing the taxon node in the format "taxonomyCode:id [scientificName]",
// or an error if the taxon node with the specified ID does not exist in the taxonomy.
func (taxonomy *Taxonomy) TaxidSting(id string) (string, error) {
taxonomy = taxonomy.OrDefault(false)
pid, err := taxonomy.Id(id)
if err != nil {
@ -132,6 +140,8 @@ func (taxonomy *Taxonomy) TaxidSting(id string) (string, error) {
// - A pointer to the Taxon instance associated with the provided taxid.
// - If the taxid is unknown, the method will log a fatal error.
func (taxonomy *Taxonomy) Taxon(taxid string) *Taxon {
taxonomy = taxonomy.OrDefault(false)
id, err := taxonomy.Id(taxid)
if err != nil {
@ -155,6 +165,12 @@ func (taxonomy *Taxonomy) Taxon(taxid string) *Taxon {
// Returns:
// - A pointer to the TaxonSet representing the collection of taxon nodes in the taxonomy.
func (taxonomy *Taxonomy) AsTaxonSet() *TaxonSet {
taxonomy = taxonomy.OrDefault(false)
if taxonomy == nil {
return nil
}
return taxonomy.nodes
}
@ -164,6 +180,12 @@ func (taxonomy *Taxonomy) AsTaxonSet() *TaxonSet {
// Returns:
// - An integer representing the total count of taxa in the taxonomy.
func (taxonomy *Taxonomy) Len() int {
taxonomy = taxonomy.OrDefault(false)
if taxonomy == nil {
return 0
}
return taxonomy.nodes.Len()
}
@ -181,14 +203,14 @@ func (taxonomy *Taxonomy) Len() int {
// - A pointer to the newly created Taxon instance.
// - An error if the taxon cannot be added (e.g., it already exists and replace is false).
func (taxonomy *Taxonomy) AddTaxon(taxid, parent string, rank string, isRoot bool, replace bool) (*Taxon, error) {
taxonomy = taxonomy.OrDefault(false)
parentid, perr := taxonomy.Id(parent)
id, err := taxonomy.Id(taxid)
if perr != nil {
return nil, fmt.Errorf("error in parsing parent taxid %s: %v", parent, perr)
}
id, err := taxonomy.Id(taxid)
if err != nil {
return nil, fmt.Errorf("error in parsing taxid %s: %v", taxid, err)
}
@ -228,6 +250,8 @@ func (taxonomy *Taxonomy) AddTaxon(taxid, parent string, rank string, isRoot boo
// - A pointer to the Taxon associated with the oldtaxid.
// - An error if the alias cannot be added (e.g., the old taxon does not exist).
func (taxonomy *Taxonomy) AddAlias(newtaxid, oldtaxid string, replace bool) (*Taxon, error) {
taxonomy = taxonomy.OrDefault(false)
newid, err := taxonomy.Id(newtaxid)
if err != nil {
@ -261,6 +285,12 @@ func (taxonomy *Taxonomy) AddAlias(newtaxid, oldtaxid string, replace bool) (*Ta
// Returns:
// - A slice of strings containing the ranks of the taxa.
func (taxonomy *Taxonomy) RankList() []string {
taxonomy = taxonomy.OrDefault(false)
if taxonomy == nil {
return make([]string, 0)
}
return taxonomy.ranks.Slice()
}
@ -270,6 +300,12 @@ func (taxonomy *Taxonomy) RankList() []string {
// Returns:
// - A pointer to the map that indexes taxa in the taxonomy.
func (taxonomy *Taxonomy) Index() *map[*string]*TaxonSet {
taxonomy = taxonomy.OrDefault(false)
if taxonomy == nil {
return nil
}
return &(taxonomy.index)
}
@ -278,6 +314,7 @@ func (taxonomy *Taxonomy) Index() *map[*string]*TaxonSet {
// Returns:
// - A string representing the name of the taxonomy.
func (taxonomy *Taxonomy) Name() string {
taxonomy = taxonomy.OrDefault(true)
return taxonomy.name
}
@ -286,6 +323,7 @@ func (taxonomy *Taxonomy) Name() string {
// Returns:
// - A string representing the unique code of the taxonomy.
func (taxonomy *Taxonomy) Code() string {
taxonomy = taxonomy.OrDefault(true)
return taxonomy.code
}
@ -295,6 +333,7 @@ func (taxonomy *Taxonomy) Code() string {
// Parameters:
// - root: A pointer to the Taxon instance to be set as the root.
func (taxonomy *Taxonomy) SetRoot(root *Taxon) {
taxonomy = taxonomy.OrDefault(true)
taxonomy.root = root.Node
}
@ -304,6 +343,8 @@ func (taxonomy *Taxonomy) SetRoot(root *Taxon) {
// Returns:
// - A pointer to the Taxon instance representing the root of the taxonomy.
func (taxonomy *Taxonomy) Root() *Taxon {
taxonomy = taxonomy.OrDefault(true)
return &Taxon{
Taxonomy: taxonomy,
Node: taxonomy.root,
@ -315,5 +356,6 @@ func (taxonomy *Taxonomy) Root() *Taxon {
// Returns:
// - A boolean indicating whether the Taxonomy has a root node (true) or not (false).
func (taxonomy *Taxonomy) HasRoot() bool {
return taxonomy.root != nil
taxonomy = taxonomy.OrDefault(false)
return taxonomy != nil && taxonomy.root != nil
}

View File

@ -32,7 +32,7 @@ func (taxonomy *Taxonomy) NewTaxonSet() *TaxonSet {
return &TaxonSet{
set: make(map[*string]*TaxNode),
nalias: 0,
taxonomy: taxonomy,
taxonomy: taxonomy.OrDefault(true),
}
}
@ -68,6 +68,9 @@ func (set *TaxonSet) Get(id *string) *Taxon {
// Returns:
// - An integer representing the count of unique taxa in the TaxonSet.
func (set *TaxonSet) Len() int {
if set == nil {
return 0
}
return len(set.set) - set.nalias
}
@ -81,11 +84,17 @@ func (set *TaxonSet) Len() int {
// Behavior:
// - If a taxon with the same identifier already exists and is different from the
// new taxon, the alias count is decremented.
func (set *TaxonSet) Insert(node *TaxNode) {
func (set *TaxonSet) Insert(node *TaxNode) *TaxonSet {
if set == nil {
log.Panic("Cannot insert node into nil TaxonSet")
}
if old := set.set[node.id]; old != nil && old.id != node.id {
set.nalias--
}
set.set[node.id] = node
return set
}
// InsertTaxon adds a Taxon to the TaxonSet. It verifies that the Taxon belongs
@ -94,7 +103,11 @@ func (set *TaxonSet) Insert(node *TaxNode) {
//
// Parameters:
// - taxon: A pointer to the Taxon instance to be added to the TaxonSet.
func (set *TaxonSet) InsertTaxon(taxon *Taxon) {
func (set *TaxonSet) InsertTaxon(taxon *Taxon) *TaxonSet {
if set == nil {
set = taxon.Taxonomy.NewTaxonSet()
}
if set.taxonomy != taxon.Taxonomy {
log.Fatalf(
"Cannot insert taxon %s into taxon set belonging to %s taxonomy",
@ -102,6 +115,8 @@ func (set *TaxonSet) InsertTaxon(taxon *Taxon) {
set.taxonomy.name,
)
}
return set.Insert(taxon.Node)
}
// Taxonomy returns a pointer to the Taxonomy instance that this TaxonSet belongs to.
@ -109,6 +124,10 @@ func (set *TaxonSet) InsertTaxon(taxon *Taxon) {
// Returns:
// - A pointer to the Taxonomy instance that this TaxonSet belongs to.
func (set *TaxonSet) Taxonomy() *Taxonomy {
if set == nil {
return nil
}
return set.taxonomy
}
@ -124,6 +143,10 @@ func (set *TaxonSet) Taxonomy() *Taxonomy {
// - If the original taxon corresponding to the alias is not part of the taxon set,
// the method will log a fatal error and terminate the program.
func (set *TaxonSet) Alias(id *string, taxon *Taxon) {
if set == nil {
log.Panic("Cannot add alias to a nil TaxonSet")
}
original := set.Get(taxon.Node.id)
if original == nil {
log.Fatalf("Original taxon %v is not part of taxon set", id)

View File

@ -40,7 +40,7 @@ type TaxonSlice struct {
func (taxonomy *Taxonomy) NewTaxonSlice(size, capacity int) *TaxonSlice {
return &TaxonSlice{
slice: make([]*TaxNode, size, capacity),
taxonomy: taxonomy,
taxonomy: taxonomy.OrDefault(true),
}
}

View File

@ -1,7 +1,6 @@
package obifind
import (
"bytes"
"fmt"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
@ -22,11 +21,7 @@ func IFilterRankRestriction() func(*obitax.ITaxon) *obitax.ITaxon {
}
func ITaxonNameMatcher() (func(string) *obitax.ITaxon, error) {
taxonomy, err := CLILoadSelectedTaxonomy()
if err != nil {
return nil, err
}
taxonomy := obitax.DefaultTaxonomy()
fun := func(name string) *obitax.ITaxon {
return taxonomy.IFilterOnName(name, __fixed_pattern__)
@ -53,27 +48,25 @@ func ITaxonRestrictions() (func(*obitax.ITaxon) *obitax.ITaxon, error) {
}
func TaxonAsString(taxon *obitax.Taxon, pattern string) string {
text := taxon.ScientificName()
// var text string
// if __with_path__ {
// var bf bytes.Buffer
// path := taxon.Path()
if __with_path__ {
var bf bytes.Buffer
path := taxon.Path()
// bf.WriteString(path.Get(path.Len() - 1).ScientificName())
bf.WriteString(path.Get(path.Len() - 1).ScientificName())
// for i := path.Len() - 2; i >= 0; i-- {
// fmt.Fprintf(&bf, ":%s", path.Get(i).ScientificName())
// }
for i := path.Len() - 2; i >= 0; i-- {
fmt.Fprintf(&bf, ":%s", path.Get(i).ScientificName())
}
// text = bf.String()
// }
text = bf.String()
}
return fmt.Sprintf("%-20s | %10s | %10s | %-20s | %s",
return fmt.Sprintf("%-20s | %10s | %10s | %-20s",
pattern,
taxon.String(),
taxon.Parent().String(),
taxon.Rank(),
text)
taxon.Rank())
}
func TaxonWriter(itaxa *obitax.ITaxon, pattern string) {

View File

@ -1,17 +1,14 @@
package obifind
import (
"errors"
"fmt"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiformats/ncbitaxdump"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
"github.com/DavidGamba/go-getoptions"
)
var __taxdump__ = ""
var __alternative_name__ = false
var __rank_list__ = false
var __selected_taxonomy__ = (*obitax.Taxonomy)(nil)
var __taxonomical_restriction__ = make([]string, 0)
var __fixed_pattern__ = false
@ -20,24 +17,6 @@ var __taxid_path__ = "NA"
var __taxid_sons__ = "NA"
var __restrict_rank__ = ""
func LoadTaxonomyOptionSet(options *getoptions.GetOpt, required, alternatiive bool) {
if required {
options.StringVar(&__taxdump__, "taxdump", "",
options.Alias("t"),
options.Required(),
options.Description("Points to the directory containing the NCBI Taxonomy database dump."))
} else {
options.StringVar(&__taxdump__, "taxdump", "",
options.Alias("t"),
options.Description("Points to the directory containing the NCBI Taxonomy database dump."))
}
if alternatiive {
options.BoolVar(&__alternative_name__, "alternative-names", false,
options.Alias("a"),
options.Description("Enable the search on all alternative names and not only scientific names."))
}
}
func FilterTaxonomyOptionSet(options *getoptions.GetOpt) {
options.BoolVar(&__rank_list__, "rank-list", false,
options.Alias("l"),
@ -48,31 +27,23 @@ func FilterTaxonomyOptionSet(options *getoptions.GetOpt) {
options.Description("Restrict output to some subclades."))
}
func CLISelectedNCBITaxDump() string {
return __taxdump__
}
func CLIHasSelectedTaxonomy() bool {
return __taxdump__ != ""
}
func CLIAreAlternativeNamesSelected() bool {
return __alternative_name__
}
func CLITaxonomicalRestrictions() (*obitax.TaxonSet, error) {
taxonomy, err := CLILoadSelectedTaxonomy()
taxonomy := obitax.DefaultTaxonomy()
if err != nil {
return nil, err
if taxonomy == nil {
return nil, fmt.Errorf("no taxonomy loaded")
}
ts := taxonomy.NewTaxonSet()
for _, taxid := range __taxonomical_restriction__ {
tx := taxonomy.Taxon(taxid)
if err != nil {
return nil, err
if tx == nil {
return nil, fmt.Errorf(
"cannot find taxon %s in taxonomy %s",
taxid,
taxonomy.Name(),
)
}
ts.InsertTaxon(tx)
@ -81,24 +52,8 @@ func CLITaxonomicalRestrictions() (*obitax.TaxonSet, error) {
return ts, nil
}
func CLILoadSelectedTaxonomy() (*obitax.Taxonomy, error) {
if CLISelectedNCBITaxDump() != "" {
if __selected_taxonomy__ == nil {
var err error
__selected_taxonomy__, err = ncbitaxdump.LoadNCBITaxDump(CLISelectedNCBITaxDump(),
!CLIAreAlternativeNamesSelected())
if err != nil {
return nil, err
}
}
return __selected_taxonomy__, nil
}
return nil, errors.New("no NCBI taxdump selected using option -t|--taxdump")
}
func OptionSet(options *getoptions.GetOpt) {
LoadTaxonomyOptionSet(options, true, true)
obioptions.LoadTaxonomyOptionSet(options, true, true)
FilterTaxonomyOptionSet(options)
options.BoolVar(&__fixed_pattern__, "fixed", false,
options.Alias("F"),