mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Add managment of the taxonomy alias politic
This commit is contained in:
@ -3,6 +3,7 @@ package main
|
||||
import (
|
||||
"os"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||
@ -52,13 +53,19 @@ func main() {
|
||||
|
||||
case obitaxonomy.CLIRequestsPathForTaxid() != "NA":
|
||||
|
||||
taxon, err := obitax.DefaultTaxonomy().Taxon(obitaxonomy.CLIRequestsPathForTaxid())
|
||||
taxon, isAlias, err := obitax.DefaultTaxonomy().Taxon(obitaxonomy.CLIRequestsPathForTaxid())
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Cannot identify the requested taxon: %s (%v)",
|
||||
obitaxonomy.CLIRequestsPathForTaxid(), err)
|
||||
}
|
||||
|
||||
if isAlias {
|
||||
if obidefault.FailOnTaxonomy() {
|
||||
log.Fatalf("Taxon %s is an alias for %s", taxon.String(), taxon.Parent().String())
|
||||
}
|
||||
}
|
||||
|
||||
s := taxon.Path()
|
||||
|
||||
if s == nil {
|
||||
|
@ -2,6 +2,8 @@ package obidefault
|
||||
|
||||
var __taxonomy__ = ""
|
||||
var __alternative_name__ = false
|
||||
var __fail_on_taxonomy__ = false
|
||||
var __update_taxid__ = false
|
||||
|
||||
func SelectedTaxonomy() string {
|
||||
return __taxonomy__
|
||||
@ -30,3 +32,27 @@ func SetSelectedTaxonomy(taxonomy string) {
|
||||
func SetAlternativeNamesSelected(alt bool) {
|
||||
__alternative_name__ = alt
|
||||
}
|
||||
|
||||
func SetFailOnTaxonomy(fail bool) {
|
||||
__fail_on_taxonomy__ = fail
|
||||
}
|
||||
|
||||
func SetUpdateTaxid(update bool) {
|
||||
__update_taxid__ = update
|
||||
}
|
||||
|
||||
func FailOnTaxonomyPtr() *bool {
|
||||
return &__fail_on_taxonomy__
|
||||
}
|
||||
|
||||
func UpdateTaxidPtr() *bool {
|
||||
return &__update_taxid__
|
||||
}
|
||||
|
||||
func FailOnTaxonomy() bool {
|
||||
return __fail_on_taxonomy__
|
||||
}
|
||||
|
||||
func UpdateTaxid() bool {
|
||||
return __update_taxid__
|
||||
}
|
||||
|
@ -9,7 +9,6 @@ import (
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||
"github.com/buger/jsonparser"
|
||||
)
|
||||
@ -201,8 +200,6 @@ func _parse_json_array_interface(str []byte, sequence *obiseq.BioSequence) ([]in
|
||||
}
|
||||
|
||||
func _parse_json_header_(header string, sequence *obiseq.BioSequence) string {
|
||||
taxonomy := obitax.DefaultTaxonomy()
|
||||
|
||||
annotations := sequence.Annotations()
|
||||
start := -1
|
||||
stop := -1
|
||||
@ -291,13 +288,8 @@ func _parse_json_header_(header string, sequence *obiseq.BioSequence) string {
|
||||
|
||||
case skey == "taxid":
|
||||
if dataType == jsonparser.Number || dataType == jsonparser.String {
|
||||
taxid := obiutils.UnsafeString(value)
|
||||
taxon, err := taxonomy.Taxon(taxid)
|
||||
if err == nil {
|
||||
sequence.SetTaxon(taxon)
|
||||
} else {
|
||||
sequence.SetTaxid(string(value))
|
||||
}
|
||||
taxid := string(value)
|
||||
sequence.SetTaxid(taxid)
|
||||
} else {
|
||||
log.Fatalf("%s: Cannot parse taxid %s", sequence.Id(), string(value))
|
||||
}
|
||||
@ -306,15 +298,7 @@ func _parse_json_header_(header string, sequence *obiseq.BioSequence) string {
|
||||
if dataType == jsonparser.Number || dataType == jsonparser.String {
|
||||
rank, _ := obiutils.SplitInTwo(skey, '_')
|
||||
|
||||
taxid := obiutils.UnsafeString(value)
|
||||
taxon, err := taxonomy.Taxon(taxid)
|
||||
|
||||
if err == nil {
|
||||
taxid = taxon.String()
|
||||
} else {
|
||||
taxid = string(value)
|
||||
}
|
||||
|
||||
taxid := string(value)
|
||||
sequence.SetTaxid(taxid, rank)
|
||||
} else {
|
||||
log.Fatalf("%s: Cannot parse taxid %s", sequence.Id(), string(value))
|
||||
|
@ -1,6 +1,7 @@
|
||||
package obilua
|
||||
|
||||
import (
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||
lua "github.com/yuin/gopher-lua"
|
||||
@ -98,13 +99,18 @@ func taxonomyGetCode(luaState *lua.LState) int {
|
||||
func taxonomyGetTaxon(luaState *lua.LState) int {
|
||||
taxo := checkTaxonomy(luaState)
|
||||
taxid := luaState.CheckString(2)
|
||||
taxon, err := taxo.Taxon(taxid)
|
||||
taxon, isAlias, err := taxo.Taxon(taxid)
|
||||
|
||||
if err != nil {
|
||||
luaState.RaiseError("%s : Error on taxon taxon: %v", taxid, err)
|
||||
return 0
|
||||
}
|
||||
|
||||
if isAlias && obidefault.FailOnTaxonomy() {
|
||||
luaState.RaiseError("%s : Taxon is an alias of %s", taxid, taxon.String())
|
||||
return 0
|
||||
}
|
||||
|
||||
luaState.Push(taxon2Lua(luaState, taxon))
|
||||
return 1
|
||||
}
|
||||
|
@ -177,6 +177,15 @@ func LoadTaxonomyOptionSet(options *getoptions.GetOpt, required, alternatiive bo
|
||||
options.Alias("a"),
|
||||
options.Description("Enable the search on all alternative names and not only scientific names."))
|
||||
}
|
||||
|
||||
options.BoolVar(obidefault.FailOnTaxonomyPtr(), "fail-on-taxonomy",
|
||||
obidefault.FailOnTaxonomy(),
|
||||
options.Description("Make obitools failing on error if a used taxid is not a currently valid one"),
|
||||
)
|
||||
|
||||
options.BoolVar(obidefault.UpdateTaxidPtr(), "update-taxid", obidefault.UpdateTaxid(),
|
||||
options.Description("Make obitools automatically updating the taxid that are declared merged to a newest one."),
|
||||
)
|
||||
}
|
||||
|
||||
// CLIIsDebugMode returns whether the CLI is in debug mode.
|
||||
|
@ -8,7 +8,7 @@ import (
|
||||
// corresponds to the last commit, and not the one when the file will be
|
||||
// commited
|
||||
|
||||
var _Commit = "f2e81ad"
|
||||
var _Commit = "e2563cd"
|
||||
var _Version = "Release 4.2.0"
|
||||
|
||||
// Version returns the version of the obitools package.
|
||||
|
@ -4,6 +4,7 @@ import (
|
||||
"math"
|
||||
"strings"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
@ -15,7 +16,7 @@ func (sequence *BioSequence) TaxonomicDistribution(taxonomy *obitax.Taxonomy) ma
|
||||
taxonomy = taxonomy.OrDefault(true)
|
||||
|
||||
for taxid, v := range taxids {
|
||||
t, err := taxonomy.Taxon(taxid)
|
||||
t, isAlias, err := taxonomy.Taxon(taxid)
|
||||
if err != nil {
|
||||
log.Fatalf(
|
||||
"On sequence %s taxid %s is not defined in taxonomy: %s (%v)",
|
||||
@ -25,6 +26,11 @@ func (sequence *BioSequence) TaxonomicDistribution(taxonomy *obitax.Taxonomy) ma
|
||||
err,
|
||||
)
|
||||
}
|
||||
|
||||
if isAlias && obidefault.FailOnTaxonomy() {
|
||||
log.Fatalf("On sequence %s taxid %s is an alias on %s",
|
||||
sequence.Id(), taxid, t.String())
|
||||
}
|
||||
taxons[t.Node] = v
|
||||
}
|
||||
return taxons
|
||||
|
@ -5,6 +5,7 @@ import (
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||
)
|
||||
@ -16,7 +17,7 @@ func (s *BioSequence) Taxon(taxonomy *obitax.Taxonomy) *obitax.Taxon {
|
||||
return nil
|
||||
}
|
||||
|
||||
taxon, _ := taxonomy.Taxon(taxid)
|
||||
taxon, _, _ := taxonomy.Taxon(taxid)
|
||||
|
||||
return taxon
|
||||
}
|
||||
@ -28,6 +29,8 @@ func (s *BioSequence) Taxon(taxonomy *obitax.Taxonomy) *obitax.Taxon {
|
||||
// taxid - the taxid to set.
|
||||
func (s *BioSequence) SetTaxid(taxid string, rank ...string) {
|
||||
var err error
|
||||
var isAlias bool
|
||||
|
||||
if taxid == "" {
|
||||
taxid = "NA"
|
||||
} else {
|
||||
@ -35,16 +38,39 @@ func (s *BioSequence) SetTaxid(taxid string, rank ...string) {
|
||||
taxon := (*obitax.Taxon)(nil)
|
||||
|
||||
if taxonomy != nil {
|
||||
taxon, err = taxonomy.Taxon(taxid)
|
||||
taxon, isAlias, err = taxonomy.Taxon(taxid)
|
||||
|
||||
if err != nil {
|
||||
log.Warnf("%s: Taxid: %v is unknown from taxonomy (%v)",
|
||||
s.Id(), taxid, err)
|
||||
if obidefault.FailOnTaxonomy() {
|
||||
log.Fatalf("%s: Taxid: %v is unknown from taxonomy (%v)",
|
||||
s.Id(), taxid, err)
|
||||
} else {
|
||||
log.Warnf("%s: Taxid: %v is unknown from taxonomy (%v)",
|
||||
s.Id(), taxid, err)
|
||||
}
|
||||
}
|
||||
|
||||
if isAlias {
|
||||
if obidefault.FailOnTaxonomy() {
|
||||
log.Fatalf("%s: Taxid: %v is an alias from taxonomy (%v) to %s",
|
||||
s.Id(), taxid, taxonomy.Name(), taxon.String())
|
||||
} else {
|
||||
if obidefault.UpdateTaxid() {
|
||||
log.Warnf("%s: Taxid: %v is updated to %s",
|
||||
s.Id(), taxid, taxon.String())
|
||||
taxid = taxon.String()
|
||||
} else {
|
||||
log.Warnf("%s: Taxid %v has to be updated to %s",
|
||||
s.Id(), taxid, taxon.String())
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
if taxon != nil {
|
||||
taxid = taxon.String()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if taxon != nil {
|
||||
taxid = taxon.String()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -63,7 +63,7 @@ func IsSubCladeOfSlot(taxonomy *obitax.Taxonomy, key string) SequencePredicate {
|
||||
val, ok := sequence.GetStringAttribute(key)
|
||||
|
||||
if ok {
|
||||
parent, err := taxonomy.Taxon(val)
|
||||
parent, _, err := taxonomy.Taxon(val)
|
||||
|
||||
if err != nil {
|
||||
log.Warnf("%s: %s is unkown from the taxonomy (%v)", sequence.Id(), val, err)
|
||||
|
@ -216,7 +216,7 @@ func (taxon *Taxon) ISubTaxonomy() *ITaxon {
|
||||
}
|
||||
|
||||
func (taxonomy *Taxonomy) ISubTaxonomy(taxid string) *ITaxon {
|
||||
taxon, err := taxonomy.Taxon(taxid)
|
||||
taxon, _, err := taxonomy.Taxon(taxid)
|
||||
|
||||
if err != nil {
|
||||
return nil
|
||||
|
@ -91,7 +91,7 @@ func loadNameTable(reader io.Reader, taxonomy *Taxonomy, onlysn bool) int {
|
||||
|
||||
if !onlysn || classname == "scientific name" {
|
||||
n++
|
||||
taxon, err := taxonomy.Taxon(taxid)
|
||||
taxon, _, err := taxonomy.Taxon(taxid)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("%s: is unknown from the taxonomy", taxid)
|
||||
@ -202,7 +202,7 @@ func LoadNCBITaxDump(directory string, onlysn bool) (*Taxonomy, error) {
|
||||
n = loadMergedTable(buffered, taxonomy)
|
||||
log.Printf("%d merged taxa read\n", n)
|
||||
|
||||
root, err := taxonomy.Taxon("1")
|
||||
root, _, err := taxonomy.Taxon("1")
|
||||
|
||||
if err != nil {
|
||||
log.Fatal("cannot find the root taxon (1) in the NCBI tax dump")
|
||||
|
@ -134,7 +134,7 @@ func LoadNCBITarTaxDump(path string, onlysn bool) (*Taxonomy, error) {
|
||||
n = loadMergedTable(buffered, taxonomy)
|
||||
log.Printf("%d merged taxa read\n", n)
|
||||
|
||||
root, err := taxonomy.Taxon("1")
|
||||
root, _, err := taxonomy.Taxon("1")
|
||||
|
||||
if err != nil {
|
||||
log.Fatal("cannot find the root taxon (1) in the NCBI tax dump")
|
||||
|
@ -129,28 +129,30 @@ func (taxonomy *Taxonomy) TaxidString(id string) (string, error) {
|
||||
// Returns:
|
||||
// - A pointer to the Taxon instance associated with the provided taxid.
|
||||
// - If the taxid is unknown, the method will log a fatal error.
|
||||
func (taxonomy *Taxonomy) Taxon(taxid string) (*Taxon, error) {
|
||||
func (taxonomy *Taxonomy) Taxon(taxid string) (*Taxon, bool, error) {
|
||||
taxonomy = taxonomy.OrDefault(false)
|
||||
if taxonomy == nil {
|
||||
return nil, errors.New("cannot extract taxon from nil taxonomy")
|
||||
return nil, false, errors.New("cannot extract taxon from nil taxonomy")
|
||||
}
|
||||
|
||||
id, err := taxonomy.Id(taxid)
|
||||
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Taxid %s: %v", taxid, err)
|
||||
return nil, false, fmt.Errorf("Taxid %s: %v", taxid, err)
|
||||
}
|
||||
|
||||
taxon := taxonomy.nodes.Get(id)
|
||||
isAlias := taxon.Node.id != id
|
||||
|
||||
if taxon == nil {
|
||||
return nil,
|
||||
false,
|
||||
fmt.Errorf("Taxid %s is not part of the taxonomy %s",
|
||||
taxid,
|
||||
taxonomy.name)
|
||||
}
|
||||
|
||||
return taxon, nil
|
||||
return taxon, isAlias, nil
|
||||
}
|
||||
|
||||
// AsTaxonSet returns the set of taxon nodes contained within the Taxonomy.
|
||||
@ -385,7 +387,7 @@ func (taxonomy *Taxonomy) InsertPathString(path []string) (*Taxonomy, error) {
|
||||
}
|
||||
|
||||
var current *Taxon
|
||||
current, err = taxonomy.Taxon(taxid)
|
||||
current, _, err = taxonomy.Taxon(taxid)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@ -396,7 +398,7 @@ func (taxonomy *Taxonomy) InsertPathString(path []string) (*Taxonomy, error) {
|
||||
}
|
||||
|
||||
for _, id := range path[1:] {
|
||||
taxon, err := taxonomy.Taxon(id)
|
||||
taxon, _, err := taxonomy.Taxon(id)
|
||||
if err == nil {
|
||||
if !current.SameAs(taxon.Parent()) {
|
||||
return nil, errors.New("path is not consistent with the taxonomy, parent mismatch")
|
||||
|
@ -248,14 +248,14 @@ func CLIRestrictTaxonomyPredicate() obiseq.SequencePredicate {
|
||||
if len(_BelongTaxa) > 0 {
|
||||
taxonomy := obitax.DefaultTaxonomy()
|
||||
|
||||
taxon, err := taxonomy.Taxon(_BelongTaxa[0])
|
||||
taxon, _, err := taxonomy.Taxon(_BelongTaxa[0])
|
||||
if err != nil {
|
||||
p = obiseq.IsSubCladeOfSlot(taxonomy, _BelongTaxa[0])
|
||||
} else {
|
||||
p = obiseq.IsSubCladeOf(taxonomy, taxon)
|
||||
}
|
||||
for _, staxid := range _BelongTaxa[1:] {
|
||||
taxon, err := taxonomy.Taxon(staxid)
|
||||
taxon, _, err := taxonomy.Taxon(staxid)
|
||||
if err != nil {
|
||||
p2 = obiseq.IsSubCladeOfSlot(taxonomy, staxid)
|
||||
} else {
|
||||
@ -278,7 +278,7 @@ func CLIAvoidTaxonomyPredicate() obiseq.SequencePredicate {
|
||||
if len(_NotBelongTaxa) > 0 {
|
||||
taxonomy := obitax.DefaultTaxonomy()
|
||||
|
||||
taxon, err := taxonomy.Taxon(_NotBelongTaxa[0])
|
||||
taxon, _, err := taxonomy.Taxon(_NotBelongTaxa[0])
|
||||
if err != nil {
|
||||
p = obiseq.IsSubCladeOfSlot(taxonomy, _NotBelongTaxa[0])
|
||||
} else {
|
||||
@ -286,7 +286,7 @@ func CLIAvoidTaxonomyPredicate() obiseq.SequencePredicate {
|
||||
}
|
||||
|
||||
for _, taxid := range _NotBelongTaxa[1:] {
|
||||
taxon, err := taxonomy.Taxon(taxid)
|
||||
taxon, _, err := taxonomy.Taxon(taxid)
|
||||
if err != nil {
|
||||
p2 = obiseq.IsSubCladeOfSlot(taxonomy, taxid)
|
||||
} else {
|
||||
|
@ -43,7 +43,7 @@ func MatchDistanceIndex(taxonomy *obitax.Taxonomy, distance int, distanceIdx map
|
||||
taxon = taxonomy.Root()
|
||||
} else {
|
||||
var err error
|
||||
taxon, err = taxonomy.Taxon(distanceIdx[keys[i]])
|
||||
taxon, _, err = taxonomy.Taxon(distanceIdx[keys[i]])
|
||||
if err != nil {
|
||||
log.Panicf("Cannot identify taxon %s in %s (%v)", distanceIdx[keys[i]], taxonomy.Name(), err)
|
||||
}
|
||||
@ -197,7 +197,7 @@ func Identify(sequence *obiseq.BioSequence,
|
||||
log.Panic("Problem in identification line : ", best.Id(), "idx:", idx, "distance:", d)
|
||||
}
|
||||
|
||||
match_taxon, err := taxo.Taxon(identification)
|
||||
match_taxon, _, err := taxo.Taxon(identification)
|
||||
|
||||
if err == nil {
|
||||
taxon, _ = taxon.LCA(match_taxon)
|
||||
|
@ -91,7 +91,7 @@ func CLITaxonomicalRestrictions() (*obitax.TaxonSet, error) {
|
||||
|
||||
ts := taxonomy.NewTaxonSet()
|
||||
for _, taxid := range __taxonomical_restriction__ {
|
||||
tx, err := taxonomy.Taxon(taxid)
|
||||
tx, _, err := taxonomy.Taxon(taxid)
|
||||
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf(
|
||||
|
Reference in New Issue
Block a user