diff --git a/.gitignore b/.gitignore index 6815284..9cb3bb2 100644 --- a/.gitignore +++ b/.gitignore @@ -118,3 +118,12 @@ doc/book/wolf_data/Release-253/ncbitaxo/readme.txt doc/book/results/toto.tasta sample/.DS_Store GO +ncbitaxo/citations.dmp +ncbitaxo/delnodes.dmp +ncbitaxo/division.dmp +ncbitaxo/gc.prt +ncbitaxo/gencode.dmp +ncbitaxo/merged.dmp +ncbitaxo/names.dmp +ncbitaxo/nodes.dmp +ncbitaxo/readme.txt diff --git a/pkg/obitax/default_taxonomy.go b/pkg/obitax/default_taxonomy.go new file mode 100644 index 0000000..d963f0f --- /dev/null +++ b/pkg/obitax/default_taxonomy.go @@ -0,0 +1,29 @@ +package obitax + +import "log" + +var __defaut_taxonomy__ *Taxonomy + +func (taxonomy *Taxonomy) SetAsDefault() { + __defaut_taxonomy__ = taxonomy +} + +func (taxonomy *Taxonomy) OrDefault(panicOnNil bool) *Taxonomy { + if taxonomy == nil { + return __defaut_taxonomy__ + } + + if panicOnNil && taxonomy == nil { + log.Panic("Cannot deal with nil taxonomy") + } + + return taxonomy +} + +func IsDefaultTaxonomyDefined() bool { + return __defaut_taxonomy__ != nil +} + +func DefaultTaxonomy() *Taxonomy { + return __defaut_taxonomy__ +} diff --git a/pkg/obitax/taxon.go b/pkg/obitax/taxon.go index c7587cb..6b8902a 100644 --- a/pkg/obitax/taxon.go +++ b/pkg/obitax/taxon.go @@ -30,6 +30,10 @@ func (taxon *Taxon) String() string { return taxon.Node.String(taxon.Taxonomy.code) } +func (taxon *Taxon) HasScientificName() bool { + return taxon != nil && taxon.Node.HasScientificName() +} + // ScientificName returns the scientific name of the Taxon. // It retrieves the scientific name from the underlying TaxNode associated with the taxon. // diff --git a/pkg/obitax/taxonnode.go b/pkg/obitax/taxonnode.go index e7c0d8b..df18dec 100644 --- a/pkg/obitax/taxonnode.go +++ b/pkg/obitax/taxonnode.go @@ -35,10 +35,17 @@ type TaxNode struct { // Returns: // - A formatted string representing the TaxNode in the form "taxonomyCode:id [scientificName]". func (node *TaxNode) String(taxonomyCode string) string { - return fmt.Sprintf("%s:%v [%s]", + if node.HasScientificName() { + return fmt.Sprintf("%s:%v [%s]", + taxonomyCode, + *node.id, + node.ScientificName()) + } + + return fmt.Sprintf("%s:%v", taxonomyCode, - *node.id, - node.ScientificName()) + *node.id) + } // Id returns the unique identifier of the TaxNode. @@ -59,6 +66,10 @@ func (node *TaxNode) ParentId() *string { return node.parent } +func (node *TaxNode) HasScientificName() bool { + return node != nil && node.scientificname != nil +} + // ScientificName returns the scientific name of the TaxNode. // It dereferences the pointer to the scientific name string associated with the taxon node. // diff --git a/pkg/obitax/taxonomy.go b/pkg/obitax/taxonomy.go index c7e628e..1cefdc4 100644 --- a/pkg/obitax/taxonomy.go +++ b/pkg/obitax/taxonomy.go @@ -86,6 +86,12 @@ func NewTaxonomy(name, code, codeCharacters string) *Taxonomy { // - The taxon identifier as a *string corresponding to the provided taxid. // - An error if the taxid is not valid or cannot be converted. func (taxonomy *Taxonomy) Id(taxid string) (*string, error) { + taxonomy = taxonomy.OrDefault(false) + + if taxonomy == nil { + return nil, fmt.Errorf("Cannot extract Id from nil Taxonomy") + } + matches := taxonomy.matcher.FindStringSubmatch(taxid) if matches == nil { @@ -106,6 +112,8 @@ func (taxonomy *Taxonomy) Id(taxid string) (*string, error) { // - A string representing the taxon node in the format "taxonomyCode:id [scientificName]", // or an error if the taxon node with the specified ID does not exist in the taxonomy. func (taxonomy *Taxonomy) TaxidSting(id string) (string, error) { + taxonomy = taxonomy.OrDefault(false) + pid, err := taxonomy.Id(id) if err != nil { @@ -132,6 +140,8 @@ func (taxonomy *Taxonomy) TaxidSting(id string) (string, error) { // - A pointer to the Taxon instance associated with the provided taxid. // - If the taxid is unknown, the method will log a fatal error. func (taxonomy *Taxonomy) Taxon(taxid string) *Taxon { + taxonomy = taxonomy.OrDefault(false) + id, err := taxonomy.Id(taxid) if err != nil { @@ -155,6 +165,12 @@ func (taxonomy *Taxonomy) Taxon(taxid string) *Taxon { // Returns: // - A pointer to the TaxonSet representing the collection of taxon nodes in the taxonomy. func (taxonomy *Taxonomy) AsTaxonSet() *TaxonSet { + taxonomy = taxonomy.OrDefault(false) + + if taxonomy == nil { + return nil + } + return taxonomy.nodes } @@ -164,6 +180,12 @@ func (taxonomy *Taxonomy) AsTaxonSet() *TaxonSet { // Returns: // - An integer representing the total count of taxa in the taxonomy. func (taxonomy *Taxonomy) Len() int { + taxonomy = taxonomy.OrDefault(false) + + if taxonomy == nil { + return 0 + } + return taxonomy.nodes.Len() } @@ -181,14 +203,14 @@ func (taxonomy *Taxonomy) Len() int { // - A pointer to the newly created Taxon instance. // - An error if the taxon cannot be added (e.g., it already exists and replace is false). func (taxonomy *Taxonomy) AddTaxon(taxid, parent string, rank string, isRoot bool, replace bool) (*Taxon, error) { + taxonomy = taxonomy.OrDefault(false) parentid, perr := taxonomy.Id(parent) - id, err := taxonomy.Id(taxid) - if perr != nil { return nil, fmt.Errorf("error in parsing parent taxid %s: %v", parent, perr) } + id, err := taxonomy.Id(taxid) if err != nil { return nil, fmt.Errorf("error in parsing taxid %s: %v", taxid, err) } @@ -228,6 +250,8 @@ func (taxonomy *Taxonomy) AddTaxon(taxid, parent string, rank string, isRoot boo // - A pointer to the Taxon associated with the oldtaxid. // - An error if the alias cannot be added (e.g., the old taxon does not exist). func (taxonomy *Taxonomy) AddAlias(newtaxid, oldtaxid string, replace bool) (*Taxon, error) { + taxonomy = taxonomy.OrDefault(false) + newid, err := taxonomy.Id(newtaxid) if err != nil { @@ -261,6 +285,12 @@ func (taxonomy *Taxonomy) AddAlias(newtaxid, oldtaxid string, replace bool) (*Ta // Returns: // - A slice of strings containing the ranks of the taxa. func (taxonomy *Taxonomy) RankList() []string { + taxonomy = taxonomy.OrDefault(false) + + if taxonomy == nil { + return make([]string, 0) + } + return taxonomy.ranks.Slice() } @@ -270,6 +300,12 @@ func (taxonomy *Taxonomy) RankList() []string { // Returns: // - A pointer to the map that indexes taxa in the taxonomy. func (taxonomy *Taxonomy) Index() *map[*string]*TaxonSet { + taxonomy = taxonomy.OrDefault(false) + + if taxonomy == nil { + return nil + } + return &(taxonomy.index) } @@ -278,6 +314,7 @@ func (taxonomy *Taxonomy) Index() *map[*string]*TaxonSet { // Returns: // - A string representing the name of the taxonomy. func (taxonomy *Taxonomy) Name() string { + taxonomy = taxonomy.OrDefault(true) return taxonomy.name } @@ -286,6 +323,7 @@ func (taxonomy *Taxonomy) Name() string { // Returns: // - A string representing the unique code of the taxonomy. func (taxonomy *Taxonomy) Code() string { + taxonomy = taxonomy.OrDefault(true) return taxonomy.code } @@ -295,6 +333,7 @@ func (taxonomy *Taxonomy) Code() string { // Parameters: // - root: A pointer to the Taxon instance to be set as the root. func (taxonomy *Taxonomy) SetRoot(root *Taxon) { + taxonomy = taxonomy.OrDefault(true) taxonomy.root = root.Node } @@ -304,6 +343,8 @@ func (taxonomy *Taxonomy) SetRoot(root *Taxon) { // Returns: // - A pointer to the Taxon instance representing the root of the taxonomy. func (taxonomy *Taxonomy) Root() *Taxon { + taxonomy = taxonomy.OrDefault(true) + return &Taxon{ Taxonomy: taxonomy, Node: taxonomy.root, @@ -315,5 +356,6 @@ func (taxonomy *Taxonomy) Root() *Taxon { // Returns: // - A boolean indicating whether the Taxonomy has a root node (true) or not (false). func (taxonomy *Taxonomy) HasRoot() bool { - return taxonomy.root != nil + taxonomy = taxonomy.OrDefault(false) + return taxonomy != nil && taxonomy.root != nil } diff --git a/pkg/obitax/taxonset.go b/pkg/obitax/taxonset.go index 22fd84b..9e6f7c2 100644 --- a/pkg/obitax/taxonset.go +++ b/pkg/obitax/taxonset.go @@ -32,7 +32,7 @@ func (taxonomy *Taxonomy) NewTaxonSet() *TaxonSet { return &TaxonSet{ set: make(map[*string]*TaxNode), nalias: 0, - taxonomy: taxonomy, + taxonomy: taxonomy.OrDefault(true), } } @@ -68,6 +68,9 @@ func (set *TaxonSet) Get(id *string) *Taxon { // Returns: // - An integer representing the count of unique taxa in the TaxonSet. func (set *TaxonSet) Len() int { + if set == nil { + return 0 + } return len(set.set) - set.nalias } @@ -81,11 +84,17 @@ func (set *TaxonSet) Len() int { // Behavior: // - If a taxon with the same identifier already exists and is different from the // new taxon, the alias count is decremented. -func (set *TaxonSet) Insert(node *TaxNode) { +func (set *TaxonSet) Insert(node *TaxNode) *TaxonSet { + if set == nil { + log.Panic("Cannot insert node into nil TaxonSet") + } + if old := set.set[node.id]; old != nil && old.id != node.id { set.nalias-- } set.set[node.id] = node + + return set } // InsertTaxon adds a Taxon to the TaxonSet. It verifies that the Taxon belongs @@ -94,7 +103,11 @@ func (set *TaxonSet) Insert(node *TaxNode) { // // Parameters: // - taxon: A pointer to the Taxon instance to be added to the TaxonSet. -func (set *TaxonSet) InsertTaxon(taxon *Taxon) { +func (set *TaxonSet) InsertTaxon(taxon *Taxon) *TaxonSet { + if set == nil { + set = taxon.Taxonomy.NewTaxonSet() + } + if set.taxonomy != taxon.Taxonomy { log.Fatalf( "Cannot insert taxon %s into taxon set belonging to %s taxonomy", @@ -102,6 +115,8 @@ func (set *TaxonSet) InsertTaxon(taxon *Taxon) { set.taxonomy.name, ) } + + return set.Insert(taxon.Node) } // Taxonomy returns a pointer to the Taxonomy instance that this TaxonSet belongs to. @@ -109,6 +124,10 @@ func (set *TaxonSet) InsertTaxon(taxon *Taxon) { // Returns: // - A pointer to the Taxonomy instance that this TaxonSet belongs to. func (set *TaxonSet) Taxonomy() *Taxonomy { + if set == nil { + return nil + } + return set.taxonomy } @@ -124,6 +143,10 @@ func (set *TaxonSet) Taxonomy() *Taxonomy { // - If the original taxon corresponding to the alias is not part of the taxon set, // the method will log a fatal error and terminate the program. func (set *TaxonSet) Alias(id *string, taxon *Taxon) { + if set == nil { + log.Panic("Cannot add alias to a nil TaxonSet") + } + original := set.Get(taxon.Node.id) if original == nil { log.Fatalf("Original taxon %v is not part of taxon set", id) diff --git a/pkg/obitax/taxonslice.go b/pkg/obitax/taxonslice.go index 89ca161..58b482d 100644 --- a/pkg/obitax/taxonslice.go +++ b/pkg/obitax/taxonslice.go @@ -40,7 +40,7 @@ type TaxonSlice struct { func (taxonomy *Taxonomy) NewTaxonSlice(size, capacity int) *TaxonSlice { return &TaxonSlice{ slice: make([]*TaxNode, size, capacity), - taxonomy: taxonomy, + taxonomy: taxonomy.OrDefault(true), } } diff --git a/pkg/obitools/obifind/iterator.go b/pkg/obitools/obifind/iterator.go index 6f261fc..c8683d4 100644 --- a/pkg/obitools/obifind/iterator.go +++ b/pkg/obitools/obifind/iterator.go @@ -1,7 +1,6 @@ package obifind import ( - "bytes" "fmt" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax" @@ -22,11 +21,7 @@ func IFilterRankRestriction() func(*obitax.ITaxon) *obitax.ITaxon { } func ITaxonNameMatcher() (func(string) *obitax.ITaxon, error) { - taxonomy, err := CLILoadSelectedTaxonomy() - - if err != nil { - return nil, err - } + taxonomy := obitax.DefaultTaxonomy() fun := func(name string) *obitax.ITaxon { return taxonomy.IFilterOnName(name, __fixed_pattern__) @@ -53,27 +48,25 @@ func ITaxonRestrictions() (func(*obitax.ITaxon) *obitax.ITaxon, error) { } func TaxonAsString(taxon *obitax.Taxon, pattern string) string { - text := taxon.ScientificName() + // var text string + // if __with_path__ { + // var bf bytes.Buffer + // path := taxon.Path() - if __with_path__ { - var bf bytes.Buffer - path := taxon.Path() + // bf.WriteString(path.Get(path.Len() - 1).ScientificName()) - bf.WriteString(path.Get(path.Len() - 1).ScientificName()) + // for i := path.Len() - 2; i >= 0; i-- { + // fmt.Fprintf(&bf, ":%s", path.Get(i).ScientificName()) + // } - for i := path.Len() - 2; i >= 0; i-- { - fmt.Fprintf(&bf, ":%s", path.Get(i).ScientificName()) - } + // text = bf.String() + // } - text = bf.String() - } - - return fmt.Sprintf("%-20s | %10s | %10s | %-20s | %s", + return fmt.Sprintf("%-20s | %10s | %10s | %-20s", pattern, taxon.String(), taxon.Parent().String(), - taxon.Rank(), - text) + taxon.Rank()) } func TaxonWriter(itaxa *obitax.ITaxon, pattern string) { diff --git a/pkg/obitools/obifind/options.go b/pkg/obitools/obifind/options.go index 2533b23..0dddbdc 100644 --- a/pkg/obitools/obifind/options.go +++ b/pkg/obitools/obifind/options.go @@ -1,17 +1,14 @@ package obifind import ( - "errors" + "fmt" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiformats/ncbitaxdump" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax" "github.com/DavidGamba/go-getoptions" ) -var __taxdump__ = "" -var __alternative_name__ = false var __rank_list__ = false -var __selected_taxonomy__ = (*obitax.Taxonomy)(nil) var __taxonomical_restriction__ = make([]string, 0) var __fixed_pattern__ = false @@ -20,24 +17,6 @@ var __taxid_path__ = "NA" var __taxid_sons__ = "NA" var __restrict_rank__ = "" -func LoadTaxonomyOptionSet(options *getoptions.GetOpt, required, alternatiive bool) { - if required { - options.StringVar(&__taxdump__, "taxdump", "", - options.Alias("t"), - options.Required(), - options.Description("Points to the directory containing the NCBI Taxonomy database dump.")) - } else { - options.StringVar(&__taxdump__, "taxdump", "", - options.Alias("t"), - options.Description("Points to the directory containing the NCBI Taxonomy database dump.")) - } - if alternatiive { - options.BoolVar(&__alternative_name__, "alternative-names", false, - options.Alias("a"), - options.Description("Enable the search on all alternative names and not only scientific names.")) - } -} - func FilterTaxonomyOptionSet(options *getoptions.GetOpt) { options.BoolVar(&__rank_list__, "rank-list", false, options.Alias("l"), @@ -48,31 +27,23 @@ func FilterTaxonomyOptionSet(options *getoptions.GetOpt) { options.Description("Restrict output to some subclades.")) } -func CLISelectedNCBITaxDump() string { - return __taxdump__ -} - -func CLIHasSelectedTaxonomy() bool { - return __taxdump__ != "" -} - -func CLIAreAlternativeNamesSelected() bool { - return __alternative_name__ -} - func CLITaxonomicalRestrictions() (*obitax.TaxonSet, error) { - taxonomy, err := CLILoadSelectedTaxonomy() + taxonomy := obitax.DefaultTaxonomy() - if err != nil { - return nil, err + if taxonomy == nil { + return nil, fmt.Errorf("no taxonomy loaded") } ts := taxonomy.NewTaxonSet() for _, taxid := range __taxonomical_restriction__ { tx := taxonomy.Taxon(taxid) - if err != nil { - return nil, err + if tx == nil { + return nil, fmt.Errorf( + "cannot find taxon %s in taxonomy %s", + taxid, + taxonomy.Name(), + ) } ts.InsertTaxon(tx) @@ -81,24 +52,8 @@ func CLITaxonomicalRestrictions() (*obitax.TaxonSet, error) { return ts, nil } -func CLILoadSelectedTaxonomy() (*obitax.Taxonomy, error) { - if CLISelectedNCBITaxDump() != "" { - if __selected_taxonomy__ == nil { - var err error - __selected_taxonomy__, err = ncbitaxdump.LoadNCBITaxDump(CLISelectedNCBITaxDump(), - !CLIAreAlternativeNamesSelected()) - if err != nil { - return nil, err - } - } - return __selected_taxonomy__, nil - } - - return nil, errors.New("no NCBI taxdump selected using option -t|--taxdump") -} - func OptionSet(options *getoptions.GetOpt) { - LoadTaxonomyOptionSet(options, true, true) + obioptions.LoadTaxonomyOptionSet(options, true, true) FilterTaxonomyOptionSet(options) options.BoolVar(&__fixed_pattern__, "fixed", false, options.Alias("F"),