Changes to be committed:

modified:   .gitignore
	new file:   pkg/obitax/default_taxonomy.go
	modified:   pkg/obitax/taxon.go
	modified:   pkg/obitax/taxonnode.go
	modified:   pkg/obitax/taxonomy.go
	modified:   pkg/obitax/taxonset.go
	modified:   pkg/obitax/taxonslice.go
	modified:   pkg/obitools/obifind/iterator.go
	modified:   pkg/obitools/obifind/options.go
This commit is contained in:
Eric Coissac
2024-11-16 10:01:49 +01:00
parent f3d8707c08
commit 36327c79c8
9 changed files with 153 additions and 87 deletions

9
.gitignore vendored
View File

@ -118,3 +118,12 @@ doc/book/wolf_data/Release-253/ncbitaxo/readme.txt
doc/book/results/toto.tasta doc/book/results/toto.tasta
sample/.DS_Store sample/.DS_Store
GO GO
ncbitaxo/citations.dmp
ncbitaxo/delnodes.dmp
ncbitaxo/division.dmp
ncbitaxo/gc.prt
ncbitaxo/gencode.dmp
ncbitaxo/merged.dmp
ncbitaxo/names.dmp
ncbitaxo/nodes.dmp
ncbitaxo/readme.txt

View File

@ -0,0 +1,29 @@
package obitax
import "log"
var __defaut_taxonomy__ *Taxonomy
func (taxonomy *Taxonomy) SetAsDefault() {
__defaut_taxonomy__ = taxonomy
}
func (taxonomy *Taxonomy) OrDefault(panicOnNil bool) *Taxonomy {
if taxonomy == nil {
return __defaut_taxonomy__
}
if panicOnNil && taxonomy == nil {
log.Panic("Cannot deal with nil taxonomy")
}
return taxonomy
}
func IsDefaultTaxonomyDefined() bool {
return __defaut_taxonomy__ != nil
}
func DefaultTaxonomy() *Taxonomy {
return __defaut_taxonomy__
}

View File

@ -30,6 +30,10 @@ func (taxon *Taxon) String() string {
return taxon.Node.String(taxon.Taxonomy.code) return taxon.Node.String(taxon.Taxonomy.code)
} }
func (taxon *Taxon) HasScientificName() bool {
return taxon != nil && taxon.Node.HasScientificName()
}
// ScientificName returns the scientific name of the Taxon. // ScientificName returns the scientific name of the Taxon.
// It retrieves the scientific name from the underlying TaxNode associated with the taxon. // It retrieves the scientific name from the underlying TaxNode associated with the taxon.
// //

View File

@ -35,10 +35,17 @@ type TaxNode struct {
// Returns: // Returns:
// - A formatted string representing the TaxNode in the form "taxonomyCode:id [scientificName]". // - A formatted string representing the TaxNode in the form "taxonomyCode:id [scientificName]".
func (node *TaxNode) String(taxonomyCode string) string { func (node *TaxNode) String(taxonomyCode string) string {
if node.HasScientificName() {
return fmt.Sprintf("%s:%v [%s]", return fmt.Sprintf("%s:%v [%s]",
taxonomyCode, taxonomyCode,
*node.id, *node.id,
node.ScientificName()) node.ScientificName())
}
return fmt.Sprintf("%s:%v",
taxonomyCode,
*node.id)
} }
// Id returns the unique identifier of the TaxNode. // Id returns the unique identifier of the TaxNode.
@ -59,6 +66,10 @@ func (node *TaxNode) ParentId() *string {
return node.parent return node.parent
} }
func (node *TaxNode) HasScientificName() bool {
return node != nil && node.scientificname != nil
}
// ScientificName returns the scientific name of the TaxNode. // ScientificName returns the scientific name of the TaxNode.
// It dereferences the pointer to the scientific name string associated with the taxon node. // It dereferences the pointer to the scientific name string associated with the taxon node.
// //

View File

@ -86,6 +86,12 @@ func NewTaxonomy(name, code, codeCharacters string) *Taxonomy {
// - The taxon identifier as a *string corresponding to the provided taxid. // - The taxon identifier as a *string corresponding to the provided taxid.
// - An error if the taxid is not valid or cannot be converted. // - An error if the taxid is not valid or cannot be converted.
func (taxonomy *Taxonomy) Id(taxid string) (*string, error) { func (taxonomy *Taxonomy) Id(taxid string) (*string, error) {
taxonomy = taxonomy.OrDefault(false)
if taxonomy == nil {
return nil, fmt.Errorf("Cannot extract Id from nil Taxonomy")
}
matches := taxonomy.matcher.FindStringSubmatch(taxid) matches := taxonomy.matcher.FindStringSubmatch(taxid)
if matches == nil { if matches == nil {
@ -106,6 +112,8 @@ func (taxonomy *Taxonomy) Id(taxid string) (*string, error) {
// - A string representing the taxon node in the format "taxonomyCode:id [scientificName]", // - A string representing the taxon node in the format "taxonomyCode:id [scientificName]",
// or an error if the taxon node with the specified ID does not exist in the taxonomy. // or an error if the taxon node with the specified ID does not exist in the taxonomy.
func (taxonomy *Taxonomy) TaxidSting(id string) (string, error) { func (taxonomy *Taxonomy) TaxidSting(id string) (string, error) {
taxonomy = taxonomy.OrDefault(false)
pid, err := taxonomy.Id(id) pid, err := taxonomy.Id(id)
if err != nil { if err != nil {
@ -132,6 +140,8 @@ func (taxonomy *Taxonomy) TaxidSting(id string) (string, error) {
// - A pointer to the Taxon instance associated with the provided taxid. // - A pointer to the Taxon instance associated with the provided taxid.
// - If the taxid is unknown, the method will log a fatal error. // - If the taxid is unknown, the method will log a fatal error.
func (taxonomy *Taxonomy) Taxon(taxid string) *Taxon { func (taxonomy *Taxonomy) Taxon(taxid string) *Taxon {
taxonomy = taxonomy.OrDefault(false)
id, err := taxonomy.Id(taxid) id, err := taxonomy.Id(taxid)
if err != nil { if err != nil {
@ -155,6 +165,12 @@ func (taxonomy *Taxonomy) Taxon(taxid string) *Taxon {
// Returns: // Returns:
// - A pointer to the TaxonSet representing the collection of taxon nodes in the taxonomy. // - A pointer to the TaxonSet representing the collection of taxon nodes in the taxonomy.
func (taxonomy *Taxonomy) AsTaxonSet() *TaxonSet { func (taxonomy *Taxonomy) AsTaxonSet() *TaxonSet {
taxonomy = taxonomy.OrDefault(false)
if taxonomy == nil {
return nil
}
return taxonomy.nodes return taxonomy.nodes
} }
@ -164,6 +180,12 @@ func (taxonomy *Taxonomy) AsTaxonSet() *TaxonSet {
// Returns: // Returns:
// - An integer representing the total count of taxa in the taxonomy. // - An integer representing the total count of taxa in the taxonomy.
func (taxonomy *Taxonomy) Len() int { func (taxonomy *Taxonomy) Len() int {
taxonomy = taxonomy.OrDefault(false)
if taxonomy == nil {
return 0
}
return taxonomy.nodes.Len() return taxonomy.nodes.Len()
} }
@ -181,14 +203,14 @@ func (taxonomy *Taxonomy) Len() int {
// - A pointer to the newly created Taxon instance. // - A pointer to the newly created Taxon instance.
// - An error if the taxon cannot be added (e.g., it already exists and replace is false). // - An error if the taxon cannot be added (e.g., it already exists and replace is false).
func (taxonomy *Taxonomy) AddTaxon(taxid, parent string, rank string, isRoot bool, replace bool) (*Taxon, error) { func (taxonomy *Taxonomy) AddTaxon(taxid, parent string, rank string, isRoot bool, replace bool) (*Taxon, error) {
taxonomy = taxonomy.OrDefault(false)
parentid, perr := taxonomy.Id(parent) parentid, perr := taxonomy.Id(parent)
id, err := taxonomy.Id(taxid)
if perr != nil { if perr != nil {
return nil, fmt.Errorf("error in parsing parent taxid %s: %v", parent, perr) return nil, fmt.Errorf("error in parsing parent taxid %s: %v", parent, perr)
} }
id, err := taxonomy.Id(taxid)
if err != nil { if err != nil {
return nil, fmt.Errorf("error in parsing taxid %s: %v", taxid, err) return nil, fmt.Errorf("error in parsing taxid %s: %v", taxid, err)
} }
@ -228,6 +250,8 @@ func (taxonomy *Taxonomy) AddTaxon(taxid, parent string, rank string, isRoot boo
// - A pointer to the Taxon associated with the oldtaxid. // - A pointer to the Taxon associated with the oldtaxid.
// - An error if the alias cannot be added (e.g., the old taxon does not exist). // - An error if the alias cannot be added (e.g., the old taxon does not exist).
func (taxonomy *Taxonomy) AddAlias(newtaxid, oldtaxid string, replace bool) (*Taxon, error) { func (taxonomy *Taxonomy) AddAlias(newtaxid, oldtaxid string, replace bool) (*Taxon, error) {
taxonomy = taxonomy.OrDefault(false)
newid, err := taxonomy.Id(newtaxid) newid, err := taxonomy.Id(newtaxid)
if err != nil { if err != nil {
@ -261,6 +285,12 @@ func (taxonomy *Taxonomy) AddAlias(newtaxid, oldtaxid string, replace bool) (*Ta
// Returns: // Returns:
// - A slice of strings containing the ranks of the taxa. // - A slice of strings containing the ranks of the taxa.
func (taxonomy *Taxonomy) RankList() []string { func (taxonomy *Taxonomy) RankList() []string {
taxonomy = taxonomy.OrDefault(false)
if taxonomy == nil {
return make([]string, 0)
}
return taxonomy.ranks.Slice() return taxonomy.ranks.Slice()
} }
@ -270,6 +300,12 @@ func (taxonomy *Taxonomy) RankList() []string {
// Returns: // Returns:
// - A pointer to the map that indexes taxa in the taxonomy. // - A pointer to the map that indexes taxa in the taxonomy.
func (taxonomy *Taxonomy) Index() *map[*string]*TaxonSet { func (taxonomy *Taxonomy) Index() *map[*string]*TaxonSet {
taxonomy = taxonomy.OrDefault(false)
if taxonomy == nil {
return nil
}
return &(taxonomy.index) return &(taxonomy.index)
} }
@ -278,6 +314,7 @@ func (taxonomy *Taxonomy) Index() *map[*string]*TaxonSet {
// Returns: // Returns:
// - A string representing the name of the taxonomy. // - A string representing the name of the taxonomy.
func (taxonomy *Taxonomy) Name() string { func (taxonomy *Taxonomy) Name() string {
taxonomy = taxonomy.OrDefault(true)
return taxonomy.name return taxonomy.name
} }
@ -286,6 +323,7 @@ func (taxonomy *Taxonomy) Name() string {
// Returns: // Returns:
// - A string representing the unique code of the taxonomy. // - A string representing the unique code of the taxonomy.
func (taxonomy *Taxonomy) Code() string { func (taxonomy *Taxonomy) Code() string {
taxonomy = taxonomy.OrDefault(true)
return taxonomy.code return taxonomy.code
} }
@ -295,6 +333,7 @@ func (taxonomy *Taxonomy) Code() string {
// Parameters: // Parameters:
// - root: A pointer to the Taxon instance to be set as the root. // - root: A pointer to the Taxon instance to be set as the root.
func (taxonomy *Taxonomy) SetRoot(root *Taxon) { func (taxonomy *Taxonomy) SetRoot(root *Taxon) {
taxonomy = taxonomy.OrDefault(true)
taxonomy.root = root.Node taxonomy.root = root.Node
} }
@ -304,6 +343,8 @@ func (taxonomy *Taxonomy) SetRoot(root *Taxon) {
// Returns: // Returns:
// - A pointer to the Taxon instance representing the root of the taxonomy. // - A pointer to the Taxon instance representing the root of the taxonomy.
func (taxonomy *Taxonomy) Root() *Taxon { func (taxonomy *Taxonomy) Root() *Taxon {
taxonomy = taxonomy.OrDefault(true)
return &Taxon{ return &Taxon{
Taxonomy: taxonomy, Taxonomy: taxonomy,
Node: taxonomy.root, Node: taxonomy.root,
@ -315,5 +356,6 @@ func (taxonomy *Taxonomy) Root() *Taxon {
// Returns: // Returns:
// - A boolean indicating whether the Taxonomy has a root node (true) or not (false). // - A boolean indicating whether the Taxonomy has a root node (true) or not (false).
func (taxonomy *Taxonomy) HasRoot() bool { func (taxonomy *Taxonomy) HasRoot() bool {
return taxonomy.root != nil taxonomy = taxonomy.OrDefault(false)
return taxonomy != nil && taxonomy.root != nil
} }

View File

@ -32,7 +32,7 @@ func (taxonomy *Taxonomy) NewTaxonSet() *TaxonSet {
return &TaxonSet{ return &TaxonSet{
set: make(map[*string]*TaxNode), set: make(map[*string]*TaxNode),
nalias: 0, nalias: 0,
taxonomy: taxonomy, taxonomy: taxonomy.OrDefault(true),
} }
} }
@ -68,6 +68,9 @@ func (set *TaxonSet) Get(id *string) *Taxon {
// Returns: // Returns:
// - An integer representing the count of unique taxa in the TaxonSet. // - An integer representing the count of unique taxa in the TaxonSet.
func (set *TaxonSet) Len() int { func (set *TaxonSet) Len() int {
if set == nil {
return 0
}
return len(set.set) - set.nalias return len(set.set) - set.nalias
} }
@ -81,11 +84,17 @@ func (set *TaxonSet) Len() int {
// Behavior: // Behavior:
// - If a taxon with the same identifier already exists and is different from the // - If a taxon with the same identifier already exists and is different from the
// new taxon, the alias count is decremented. // new taxon, the alias count is decremented.
func (set *TaxonSet) Insert(node *TaxNode) { func (set *TaxonSet) Insert(node *TaxNode) *TaxonSet {
if set == nil {
log.Panic("Cannot insert node into nil TaxonSet")
}
if old := set.set[node.id]; old != nil && old.id != node.id { if old := set.set[node.id]; old != nil && old.id != node.id {
set.nalias-- set.nalias--
} }
set.set[node.id] = node set.set[node.id] = node
return set
} }
// InsertTaxon adds a Taxon to the TaxonSet. It verifies that the Taxon belongs // InsertTaxon adds a Taxon to the TaxonSet. It verifies that the Taxon belongs
@ -94,7 +103,11 @@ func (set *TaxonSet) Insert(node *TaxNode) {
// //
// Parameters: // Parameters:
// - taxon: A pointer to the Taxon instance to be added to the TaxonSet. // - taxon: A pointer to the Taxon instance to be added to the TaxonSet.
func (set *TaxonSet) InsertTaxon(taxon *Taxon) { func (set *TaxonSet) InsertTaxon(taxon *Taxon) *TaxonSet {
if set == nil {
set = taxon.Taxonomy.NewTaxonSet()
}
if set.taxonomy != taxon.Taxonomy { if set.taxonomy != taxon.Taxonomy {
log.Fatalf( log.Fatalf(
"Cannot insert taxon %s into taxon set belonging to %s taxonomy", "Cannot insert taxon %s into taxon set belonging to %s taxonomy",
@ -102,6 +115,8 @@ func (set *TaxonSet) InsertTaxon(taxon *Taxon) {
set.taxonomy.name, set.taxonomy.name,
) )
} }
return set.Insert(taxon.Node)
} }
// Taxonomy returns a pointer to the Taxonomy instance that this TaxonSet belongs to. // Taxonomy returns a pointer to the Taxonomy instance that this TaxonSet belongs to.
@ -109,6 +124,10 @@ func (set *TaxonSet) InsertTaxon(taxon *Taxon) {
// Returns: // Returns:
// - A pointer to the Taxonomy instance that this TaxonSet belongs to. // - A pointer to the Taxonomy instance that this TaxonSet belongs to.
func (set *TaxonSet) Taxonomy() *Taxonomy { func (set *TaxonSet) Taxonomy() *Taxonomy {
if set == nil {
return nil
}
return set.taxonomy return set.taxonomy
} }
@ -124,6 +143,10 @@ func (set *TaxonSet) Taxonomy() *Taxonomy {
// - If the original taxon corresponding to the alias is not part of the taxon set, // - If the original taxon corresponding to the alias is not part of the taxon set,
// the method will log a fatal error and terminate the program. // the method will log a fatal error and terminate the program.
func (set *TaxonSet) Alias(id *string, taxon *Taxon) { func (set *TaxonSet) Alias(id *string, taxon *Taxon) {
if set == nil {
log.Panic("Cannot add alias to a nil TaxonSet")
}
original := set.Get(taxon.Node.id) original := set.Get(taxon.Node.id)
if original == nil { if original == nil {
log.Fatalf("Original taxon %v is not part of taxon set", id) log.Fatalf("Original taxon %v is not part of taxon set", id)

View File

@ -40,7 +40,7 @@ type TaxonSlice struct {
func (taxonomy *Taxonomy) NewTaxonSlice(size, capacity int) *TaxonSlice { func (taxonomy *Taxonomy) NewTaxonSlice(size, capacity int) *TaxonSlice {
return &TaxonSlice{ return &TaxonSlice{
slice: make([]*TaxNode, size, capacity), slice: make([]*TaxNode, size, capacity),
taxonomy: taxonomy, taxonomy: taxonomy.OrDefault(true),
} }
} }

View File

@ -1,7 +1,6 @@
package obifind package obifind
import ( import (
"bytes"
"fmt" "fmt"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
@ -22,11 +21,7 @@ func IFilterRankRestriction() func(*obitax.ITaxon) *obitax.ITaxon {
} }
func ITaxonNameMatcher() (func(string) *obitax.ITaxon, error) { func ITaxonNameMatcher() (func(string) *obitax.ITaxon, error) {
taxonomy, err := CLILoadSelectedTaxonomy() taxonomy := obitax.DefaultTaxonomy()
if err != nil {
return nil, err
}
fun := func(name string) *obitax.ITaxon { fun := func(name string) *obitax.ITaxon {
return taxonomy.IFilterOnName(name, __fixed_pattern__) return taxonomy.IFilterOnName(name, __fixed_pattern__)
@ -53,27 +48,25 @@ func ITaxonRestrictions() (func(*obitax.ITaxon) *obitax.ITaxon, error) {
} }
func TaxonAsString(taxon *obitax.Taxon, pattern string) string { func TaxonAsString(taxon *obitax.Taxon, pattern string) string {
text := taxon.ScientificName() // var text string
// if __with_path__ {
// var bf bytes.Buffer
// path := taxon.Path()
if __with_path__ { // bf.WriteString(path.Get(path.Len() - 1).ScientificName())
var bf bytes.Buffer
path := taxon.Path()
bf.WriteString(path.Get(path.Len() - 1).ScientificName()) // for i := path.Len() - 2; i >= 0; i-- {
// fmt.Fprintf(&bf, ":%s", path.Get(i).ScientificName())
// }
for i := path.Len() - 2; i >= 0; i-- { // text = bf.String()
fmt.Fprintf(&bf, ":%s", path.Get(i).ScientificName()) // }
}
text = bf.String() return fmt.Sprintf("%-20s | %10s | %10s | %-20s",
}
return fmt.Sprintf("%-20s | %10s | %10s | %-20s | %s",
pattern, pattern,
taxon.String(), taxon.String(),
taxon.Parent().String(), taxon.Parent().String(),
taxon.Rank(), taxon.Rank())
text)
} }
func TaxonWriter(itaxa *obitax.ITaxon, pattern string) { func TaxonWriter(itaxa *obitax.ITaxon, pattern string) {

View File

@ -1,17 +1,14 @@
package obifind package obifind
import ( import (
"errors" "fmt"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiformats/ncbitaxdump" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
"github.com/DavidGamba/go-getoptions" "github.com/DavidGamba/go-getoptions"
) )
var __taxdump__ = ""
var __alternative_name__ = false
var __rank_list__ = false var __rank_list__ = false
var __selected_taxonomy__ = (*obitax.Taxonomy)(nil)
var __taxonomical_restriction__ = make([]string, 0) var __taxonomical_restriction__ = make([]string, 0)
var __fixed_pattern__ = false var __fixed_pattern__ = false
@ -20,24 +17,6 @@ var __taxid_path__ = "NA"
var __taxid_sons__ = "NA" var __taxid_sons__ = "NA"
var __restrict_rank__ = "" var __restrict_rank__ = ""
func LoadTaxonomyOptionSet(options *getoptions.GetOpt, required, alternatiive bool) {
if required {
options.StringVar(&__taxdump__, "taxdump", "",
options.Alias("t"),
options.Required(),
options.Description("Points to the directory containing the NCBI Taxonomy database dump."))
} else {
options.StringVar(&__taxdump__, "taxdump", "",
options.Alias("t"),
options.Description("Points to the directory containing the NCBI Taxonomy database dump."))
}
if alternatiive {
options.BoolVar(&__alternative_name__, "alternative-names", false,
options.Alias("a"),
options.Description("Enable the search on all alternative names and not only scientific names."))
}
}
func FilterTaxonomyOptionSet(options *getoptions.GetOpt) { func FilterTaxonomyOptionSet(options *getoptions.GetOpt) {
options.BoolVar(&__rank_list__, "rank-list", false, options.BoolVar(&__rank_list__, "rank-list", false,
options.Alias("l"), options.Alias("l"),
@ -48,31 +27,23 @@ func FilterTaxonomyOptionSet(options *getoptions.GetOpt) {
options.Description("Restrict output to some subclades.")) options.Description("Restrict output to some subclades."))
} }
func CLISelectedNCBITaxDump() string {
return __taxdump__
}
func CLIHasSelectedTaxonomy() bool {
return __taxdump__ != ""
}
func CLIAreAlternativeNamesSelected() bool {
return __alternative_name__
}
func CLITaxonomicalRestrictions() (*obitax.TaxonSet, error) { func CLITaxonomicalRestrictions() (*obitax.TaxonSet, error) {
taxonomy, err := CLILoadSelectedTaxonomy() taxonomy := obitax.DefaultTaxonomy()
if err != nil { if taxonomy == nil {
return nil, err return nil, fmt.Errorf("no taxonomy loaded")
} }
ts := taxonomy.NewTaxonSet() ts := taxonomy.NewTaxonSet()
for _, taxid := range __taxonomical_restriction__ { for _, taxid := range __taxonomical_restriction__ {
tx := taxonomy.Taxon(taxid) tx := taxonomy.Taxon(taxid)
if err != nil { if tx == nil {
return nil, err return nil, fmt.Errorf(
"cannot find taxon %s in taxonomy %s",
taxid,
taxonomy.Name(),
)
} }
ts.InsertTaxon(tx) ts.InsertTaxon(tx)
@ -81,24 +52,8 @@ func CLITaxonomicalRestrictions() (*obitax.TaxonSet, error) {
return ts, nil return ts, nil
} }
func CLILoadSelectedTaxonomy() (*obitax.Taxonomy, error) {
if CLISelectedNCBITaxDump() != "" {
if __selected_taxonomy__ == nil {
var err error
__selected_taxonomy__, err = ncbitaxdump.LoadNCBITaxDump(CLISelectedNCBITaxDump(),
!CLIAreAlternativeNamesSelected())
if err != nil {
return nil, err
}
}
return __selected_taxonomy__, nil
}
return nil, errors.New("no NCBI taxdump selected using option -t|--taxdump")
}
func OptionSet(options *getoptions.GetOpt) { func OptionSet(options *getoptions.GetOpt) {
LoadTaxonomyOptionSet(options, true, true) obioptions.LoadTaxonomyOptionSet(options, true, true)
FilterTaxonomyOptionSet(options) FilterTaxonomyOptionSet(options)
options.BoolVar(&__fixed_pattern__, "fixed", false, options.BoolVar(&__fixed_pattern__, "fixed", false,
options.Alias("F"), options.Alias("F"),