mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Adds possibility to extract a taxonomy from taxonomic path included in sequence files
This commit is contained in:
@ -5,6 +5,7 @@ import (
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obitaxonomy"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||
|
||||
@ -19,7 +20,6 @@ func main() {
|
||||
var iterator *obitax.ITaxon
|
||||
|
||||
switch {
|
||||
|
||||
case obitaxonomy.CLIDownloadNCBI():
|
||||
err := obitaxonomy.CLIDownloadNCBITaxdump()
|
||||
if err != nil {
|
||||
@ -29,16 +29,34 @@ func main() {
|
||||
|
||||
os.Exit(0)
|
||||
|
||||
case obitaxonomy.CLIExtractTaxonomy():
|
||||
iter, err := obiconvert.CLIReadBioSequences(args...)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Cannot extract taxonomy: %v", err)
|
||||
}
|
||||
|
||||
taxonomy, err := iter.ExtractTaxonomy()
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Cannot extract taxonomy: %v", err)
|
||||
}
|
||||
|
||||
taxonomy.SetAsDefault()
|
||||
|
||||
log.Infof("Number of extracted taxa: %d", taxonomy.Len())
|
||||
iterator = taxonomy.AsTaxonSet().Sort().Iterator()
|
||||
|
||||
case obitaxonomy.CLIDumpSubtaxonomy():
|
||||
iterator = obitaxonomy.CLISubTaxonomyIterator()
|
||||
|
||||
case obitaxonomy.CLIRequestsPathForTaxid() != "NA":
|
||||
|
||||
taxon := obitax.DefaultTaxonomy().Taxon(obitaxonomy.CLIRequestsPathForTaxid())
|
||||
taxon, err := obitax.DefaultTaxonomy().Taxon(obitaxonomy.CLIRequestsPathForTaxid())
|
||||
|
||||
if taxon == nil {
|
||||
log.Fatalf("Cannot identify the requested taxon: %s",
|
||||
obitaxonomy.CLIRequestsPathForTaxid())
|
||||
if err != nil {
|
||||
log.Fatalf("Cannot identify the requested taxon: %s (%v)",
|
||||
obitaxonomy.CLIRequestsPathForTaxid(), err)
|
||||
}
|
||||
|
||||
s := taxon.Path()
|
||||
|
@ -292,8 +292,8 @@ func _parse_json_header_(header string, sequence *obiseq.BioSequence) string {
|
||||
case skey == "taxid":
|
||||
if dataType == jsonparser.Number || dataType == jsonparser.String {
|
||||
taxid := obiutils.UnsafeString(value)
|
||||
taxon := taxonomy.Taxon(taxid)
|
||||
if taxon != nil {
|
||||
taxon, err := taxonomy.Taxon(taxid)
|
||||
if err == nil {
|
||||
sequence.SetTaxon(taxon)
|
||||
} else {
|
||||
sequence.SetTaxid(string(value))
|
||||
@ -307,9 +307,9 @@ func _parse_json_header_(header string, sequence *obiseq.BioSequence) string {
|
||||
rank, _ := obiutils.SplitInTwo(skey, '_')
|
||||
|
||||
taxid := obiutils.UnsafeString(value)
|
||||
taxon := taxonomy.Taxon(taxid)
|
||||
taxon, err := taxonomy.Taxon(taxid)
|
||||
|
||||
if taxon != nil {
|
||||
if err == nil {
|
||||
taxid = taxon.String()
|
||||
} else {
|
||||
taxid = string(value)
|
||||
|
18
pkg/obiiter/extract_taxonomy.go
Normal file
18
pkg/obiiter/extract_taxonomy.go
Normal file
@ -0,0 +1,18 @@
|
||||
package obiiter
|
||||
|
||||
import "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||
|
||||
func (iterator *IBioSequence) ExtractTaxonomy() (taxonomy *obitax.Taxonomy, err error) {
|
||||
|
||||
for iterator.Next() {
|
||||
slice := iterator.Get().Slice()
|
||||
|
||||
taxonomy, err = slice.ExtractTaxonomy(taxonomy)
|
||||
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
@ -8,7 +8,7 @@ import (
|
||||
// corresponds to the last commit, and not the one when the file will be
|
||||
// commited
|
||||
|
||||
var _Commit = "3379545"
|
||||
var _Commit = "2452aef"
|
||||
var _Version = "Release 4.2.0"
|
||||
|
||||
// Version returns the version of the obitools package.
|
||||
|
@ -1,6 +1,7 @@
|
||||
package obiseq
|
||||
|
||||
import (
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"golang.org/x/exp/slices"
|
||||
@ -179,3 +180,18 @@ func (s *BioSequenceSlice) SortOnLength(reverse bool) {
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func (s *BioSequenceSlice) ExtractTaxonomy(taxonomy *obitax.Taxonomy) (*obitax.Taxonomy, error) {
|
||||
var err error
|
||||
|
||||
for _, s := range *s {
|
||||
taxonomy, err = taxonomy.InsertPathString(s.Path())
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return taxonomy, nil
|
||||
}
|
||||
|
@ -15,13 +15,15 @@ func (sequence *BioSequence) TaxonomicDistribution(taxonomy *obitax.Taxonomy) ma
|
||||
taxonomy = taxonomy.OrDefault(true)
|
||||
|
||||
for taxid, v := range taxids {
|
||||
t := taxonomy.Taxon(taxid)
|
||||
if t == nil {
|
||||
t, err := taxonomy.Taxon(taxid)
|
||||
if err != nil {
|
||||
log.Fatalf(
|
||||
"On sequence %s taxid %s is not defined in taxonomy: %s",
|
||||
"On sequence %s taxid %s is not defined in taxonomy: %s (%v)",
|
||||
sequence.Id(),
|
||||
taxid,
|
||||
taxonomy.Name())
|
||||
taxonomy.Name(),
|
||||
err,
|
||||
)
|
||||
}
|
||||
taxons[t.Node] = v
|
||||
}
|
||||
|
@ -6,6 +6,7 @@ import (
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||
)
|
||||
|
||||
func (s *BioSequence) Taxon(taxonomy *obitax.Taxonomy) *obitax.Taxon {
|
||||
@ -14,7 +15,10 @@ func (s *BioSequence) Taxon(taxonomy *obitax.Taxonomy) *obitax.Taxon {
|
||||
if taxid == "NA" {
|
||||
return nil
|
||||
}
|
||||
return taxonomy.Taxon(taxid)
|
||||
|
||||
taxon, _ := taxonomy.Taxon(taxid)
|
||||
|
||||
return taxon
|
||||
}
|
||||
|
||||
// SetTaxid sets the taxid for the BioSequence.
|
||||
@ -23,6 +27,7 @@ func (s *BioSequence) Taxon(taxonomy *obitax.Taxonomy) *obitax.Taxon {
|
||||
//
|
||||
// taxid - the taxid to set.
|
||||
func (s *BioSequence) SetTaxid(taxid string, rank ...string) {
|
||||
var err error
|
||||
if taxid == "" {
|
||||
taxid = "NA"
|
||||
} else {
|
||||
@ -30,7 +35,12 @@ func (s *BioSequence) SetTaxid(taxid string, rank ...string) {
|
||||
taxon := (*obitax.Taxon)(nil)
|
||||
|
||||
if taxonomy != nil {
|
||||
taxon = taxonomy.Taxon(taxid)
|
||||
taxon, err = taxonomy.Taxon(taxid)
|
||||
|
||||
if err != nil {
|
||||
log.Warnf("%s: Taxid: %v is unknown from taxonomy (%v)",
|
||||
s.Id(), taxid, err)
|
||||
}
|
||||
}
|
||||
|
||||
if taxon != nil {
|
||||
@ -135,14 +145,35 @@ func (sequence *BioSequence) SetFamily(taxonomy *obitax.Taxonomy) *obitax.Taxon
|
||||
return sequence.SetTaxonAtRank(taxonomy, "family")
|
||||
}
|
||||
|
||||
func (sequence *BioSequence) SetPath(taxonomy *obitax.Taxonomy) string {
|
||||
func (sequence *BioSequence) SetPath(taxonomy *obitax.Taxonomy) []string {
|
||||
taxon := sequence.Taxon(taxonomy)
|
||||
path := taxon.Path()
|
||||
spath := make([]string, path.Len())
|
||||
lpath := path.Len() - 1
|
||||
|
||||
tpath := path.String()
|
||||
sequence.SetAttribute("taxonomic_path", tpath)
|
||||
for i := lpath; i >= 0; i-- {
|
||||
spath[lpath-i] = path.Get(i).String(taxonomy.Code())
|
||||
}
|
||||
|
||||
return tpath
|
||||
sequence.SetAttribute("taxonomic_path", spath)
|
||||
|
||||
return spath
|
||||
}
|
||||
|
||||
func (sequence *BioSequence) Path() []string {
|
||||
path, ok := sequence.GetAttribute("taxonomic_path")
|
||||
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
|
||||
slice, err := obiutils.InterfaceToStringSlice(path)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("%s: taxonomic_path has the wrong type (%v)", sequence.Id(), err)
|
||||
}
|
||||
|
||||
return slice
|
||||
}
|
||||
|
||||
func (sequence *BioSequence) SetScientificName(taxonomy *obitax.Taxonomy) string {
|
||||
|
@ -63,7 +63,12 @@ func IsSubCladeOfSlot(taxonomy *obitax.Taxonomy, key string) SequencePredicate {
|
||||
val, ok := sequence.GetStringAttribute(key)
|
||||
|
||||
if ok {
|
||||
parent := taxonomy.Taxon(val)
|
||||
parent, err := taxonomy.Taxon(val)
|
||||
|
||||
if err != nil {
|
||||
log.Warnf("%s: %s is unkown from the taxonomy (%v)", sequence.Id(), val, err)
|
||||
}
|
||||
|
||||
taxon := sequence.Taxon(taxonomy)
|
||||
return parent != nil && taxon != nil && taxon.IsSubCladeOf(parent)
|
||||
}
|
||||
|
@ -218,9 +218,9 @@ func (taxon *Taxon) ISubTaxonomy() *ITaxon {
|
||||
}
|
||||
|
||||
func (taxonomy *Taxonomy) ISubTaxonomy(taxid string) *ITaxon {
|
||||
taxon := taxonomy.Taxon(taxid)
|
||||
taxon, err := taxonomy.Taxon(taxid)
|
||||
|
||||
if taxon == nil {
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -91,7 +91,13 @@ func loadNameTable(reader io.Reader, taxonomy *Taxonomy, onlysn bool) int {
|
||||
|
||||
if !onlysn || classname == "scientific name" {
|
||||
n++
|
||||
taxonomy.Taxon(taxid).SetName(name, classname)
|
||||
taxon, err := taxonomy.Taxon(taxid)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("%s: is unknown from the taxonomy", taxid)
|
||||
}
|
||||
|
||||
taxon.SetName(name, classname)
|
||||
}
|
||||
}
|
||||
|
||||
@ -196,7 +202,11 @@ func LoadNCBITaxDump(directory string, onlysn bool) (*Taxonomy, error) {
|
||||
n = loadMergedTable(buffered, taxonomy)
|
||||
log.Printf("%d merged taxa read\n", n)
|
||||
|
||||
root := taxonomy.Taxon("1")
|
||||
root, err := taxonomy.Taxon("1")
|
||||
|
||||
if err != nil {
|
||||
log.Fatal("cannot find the root taxon (1) in the NCBI tax dump")
|
||||
}
|
||||
taxonomy.SetRoot(root)
|
||||
|
||||
return taxonomy, nil
|
||||
|
@ -134,7 +134,12 @@ func LoadNCBITarTaxDump(path string, onlysn bool) (*Taxonomy, error) {
|
||||
n = loadMergedTable(buffered, taxonomy)
|
||||
log.Printf("%d merged taxa read\n", n)
|
||||
|
||||
root := taxonomy.Taxon("1")
|
||||
root, err := taxonomy.Taxon("1")
|
||||
|
||||
if err != nil {
|
||||
log.Fatal("cannot find the root taxon (1) in the NCBI tax dump")
|
||||
}
|
||||
|
||||
taxonomy.SetRoot(root)
|
||||
|
||||
return taxonomy, nil
|
||||
|
64
pkg/obitax/string_parser.go
Normal file
64
pkg/obitax/string_parser.go
Normal file
@ -0,0 +1,64 @@
|
||||
package obitax
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// ParseTaxonString parses a string in the format "code:taxid [scientific name]@rank"
|
||||
// and returns the individual components. It handles extra whitespace around components.
|
||||
//
|
||||
// Parameters:
|
||||
// - taxonStr: The string to parse in the format "code:taxid [scientific name]@rank"
|
||||
//
|
||||
// Returns:
|
||||
// - code: The taxonomy code
|
||||
// - taxid: The taxon identifier
|
||||
// - scientificName: The scientific name (without brackets)
|
||||
// - rank: The rank
|
||||
// - error: An error if the string format is invalid
|
||||
func ParseTaxonString(taxonStr string) (code, taxid, scientificName, rank string, err error) {
|
||||
// Trim any leading/trailing whitespace from the entire string
|
||||
taxonStr = strings.TrimSpace(taxonStr)
|
||||
|
||||
// Split by '@' to separate rank
|
||||
parts := strings.Split(taxonStr, "@")
|
||||
if len(parts) > 2 {
|
||||
return "", "", "", "", errors.New("invalid format: multiple '@' characters found")
|
||||
}
|
||||
|
||||
mainPart := strings.TrimSpace(parts[0])
|
||||
if len(parts) == 2 {
|
||||
rank = strings.TrimSpace(parts[1])
|
||||
} else {
|
||||
rank = "no rank"
|
||||
}
|
||||
|
||||
// Find scientific name part (enclosed in square brackets)
|
||||
startBracket := strings.Index(mainPart, "[")
|
||||
endBracket := strings.LastIndex(mainPart, "]")
|
||||
|
||||
if startBracket == -1 || endBracket == -1 || startBracket > endBracket {
|
||||
return "", "", "", "", errors.New("invalid format: scientific name must be enclosed in square brackets")
|
||||
}
|
||||
|
||||
// Extract and clean scientific name
|
||||
scientificName = strings.TrimSpace(mainPart[startBracket+1 : endBracket])
|
||||
|
||||
// Process code:taxid part
|
||||
idPart := strings.TrimSpace(mainPart[:startBracket])
|
||||
idComponents := strings.Split(idPart, ":")
|
||||
|
||||
if len(idComponents) != 2 {
|
||||
return "", "", "", "", errors.New("invalid format: missing taxonomy code separator ':'")
|
||||
}
|
||||
|
||||
code = strings.TrimSpace(idComponents[0])
|
||||
taxid = strings.TrimSpace(idComponents[1])
|
||||
|
||||
if code == "" || taxid == "" || scientificName == "" {
|
||||
return "", "", "", "", errors.New("invalid format: code, taxid and scientific name cannot be empty")
|
||||
}
|
||||
|
||||
return code, taxid, scientificName, rank, nil
|
||||
}
|
@ -1,6 +1,7 @@
|
||||
package obitax
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"iter"
|
||||
"regexp"
|
||||
|
||||
@ -379,3 +380,29 @@ func (taxon *Taxon) SameAs(other *Taxon) bool {
|
||||
|
||||
return taxon.Taxonomy == other.Taxonomy && taxon.Node.id == other.Node.id
|
||||
}
|
||||
|
||||
func (taxon *Taxon) AddChild(child string, replace bool) (*Taxon, error) {
|
||||
if taxon == nil {
|
||||
return nil, errors.New("nil taxon")
|
||||
}
|
||||
|
||||
code, taxid, scientific_name, rank, err := ParseTaxonString(child)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if taxon.Taxonomy.code != code {
|
||||
return nil, errors.New("taxonomy code mismatch")
|
||||
}
|
||||
|
||||
newTaxon, err := taxon.Taxonomy.AddTaxon(taxid, *taxon.Node.id, rank, false, replace)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
newTaxon.SetName(scientific_name, "scientific name")
|
||||
|
||||
return newTaxon, nil
|
||||
}
|
||||
|
@ -12,7 +12,6 @@ import (
|
||||
"fmt"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// Taxonomy represents a hierarchical classification of taxa.
|
||||
@ -130,27 +129,28 @@ func (taxonomy *Taxonomy) TaxidString(id string) (string, error) {
|
||||
// Returns:
|
||||
// - A pointer to the Taxon instance associated with the provided taxid.
|
||||
// - If the taxid is unknown, the method will log a fatal error.
|
||||
func (taxonomy *Taxonomy) Taxon(taxid string) *Taxon {
|
||||
func (taxonomy *Taxonomy) Taxon(taxid string) (*Taxon, error) {
|
||||
taxonomy = taxonomy.OrDefault(false)
|
||||
if taxonomy == nil {
|
||||
return nil
|
||||
return nil, errors.New("cannot extract taxon from nil taxonomy")
|
||||
}
|
||||
|
||||
id, err := taxonomy.Id(taxid)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Taxid %s: %v", taxid, err)
|
||||
return nil, fmt.Errorf("Taxid %s: %v", taxid, err)
|
||||
}
|
||||
|
||||
taxon := taxonomy.nodes.Get(id)
|
||||
|
||||
if taxon == nil {
|
||||
log.Fatalf("Taxid %s is not part of the taxonomy %s",
|
||||
taxid,
|
||||
taxonomy.name)
|
||||
return nil,
|
||||
fmt.Errorf("Taxid %s is not part of the taxonomy %s",
|
||||
taxid,
|
||||
taxonomy.name)
|
||||
}
|
||||
|
||||
return taxon
|
||||
return taxon, nil
|
||||
}
|
||||
|
||||
// AsTaxonSet returns the set of taxon nodes contained within the Taxonomy.
|
||||
@ -353,3 +353,63 @@ func (taxonomy *Taxonomy) HasRoot() bool {
|
||||
taxonomy = taxonomy.OrDefault(false)
|
||||
return taxonomy != nil && taxonomy.root != nil
|
||||
}
|
||||
|
||||
func (taxonomy *Taxonomy) InsertPathString(path []string) (*Taxonomy, error) {
|
||||
if len(path) == 0 {
|
||||
return nil, errors.New("path is empty")
|
||||
}
|
||||
|
||||
code, taxid, scientific_name, rank, err := ParseTaxonString(path[0])
|
||||
|
||||
if taxonomy == nil {
|
||||
taxonomy = NewTaxonomy(code, code, obiutils.AsciiAlphaNumSet)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if taxonomy.Len() == 0 {
|
||||
|
||||
if code != taxonomy.code {
|
||||
return nil, fmt.Errorf("cannot insert taxon %s into taxonomy %s with code %s",
|
||||
path[0], taxonomy.name, taxonomy.code)
|
||||
}
|
||||
|
||||
root, err := taxonomy.AddTaxon(taxid, taxid, rank, true, true)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
root.SetName(scientific_name, "scientificName")
|
||||
}
|
||||
|
||||
var current *Taxon
|
||||
current, err = taxonomy.Taxon(taxid)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if !current.IsRoot() {
|
||||
return nil, errors.New("path does not start with a root node")
|
||||
}
|
||||
|
||||
for _, id := range path[1:] {
|
||||
taxon, err := taxonomy.Taxon(id)
|
||||
if err == nil {
|
||||
if !current.SameAs(taxon.Parent()) {
|
||||
return nil, errors.New("path is not consistent with the taxonomy, parent mismatch")
|
||||
}
|
||||
current = taxon
|
||||
} else {
|
||||
current, err = current.AddChild(id, false)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return taxonomy, nil
|
||||
}
|
||||
|
@ -212,7 +212,8 @@ func (set *TaxonSet) Sort() *TaxonSlice {
|
||||
pushed = false
|
||||
for _, node := range set.set {
|
||||
if !parent[node] && (parent[set.Get(node.parent).Node] ||
|
||||
!set.Contains(node.parent)) {
|
||||
!set.Contains(node.parent) ||
|
||||
node == taxonomy.Root().Node) {
|
||||
pushed = true
|
||||
taxa.slice = append(taxa.slice, node)
|
||||
parent[node] = true
|
||||
|
@ -248,15 +248,15 @@ func CLIRestrictTaxonomyPredicate() obiseq.SequencePredicate {
|
||||
if len(_BelongTaxa) > 0 {
|
||||
taxonomy := obitax.DefaultTaxonomy()
|
||||
|
||||
taxon := taxonomy.Taxon(_BelongTaxa[0])
|
||||
if taxon == nil {
|
||||
taxon, err := taxonomy.Taxon(_BelongTaxa[0])
|
||||
if err != nil {
|
||||
p = obiseq.IsSubCladeOfSlot(taxonomy, _BelongTaxa[0])
|
||||
} else {
|
||||
p = obiseq.IsSubCladeOf(taxonomy, taxon)
|
||||
}
|
||||
for _, staxid := range _BelongTaxa[1:] {
|
||||
taxon := taxonomy.Taxon(staxid)
|
||||
if taxon == nil {
|
||||
taxon, err := taxonomy.Taxon(staxid)
|
||||
if err != nil {
|
||||
p2 = obiseq.IsSubCladeOfSlot(taxonomy, staxid)
|
||||
} else {
|
||||
p2 = obiseq.IsSubCladeOf(taxonomy, taxon)
|
||||
@ -278,16 +278,16 @@ func CLIAvoidTaxonomyPredicate() obiseq.SequencePredicate {
|
||||
if len(_NotBelongTaxa) > 0 {
|
||||
taxonomy := obitax.DefaultTaxonomy()
|
||||
|
||||
taxon := taxonomy.Taxon(_NotBelongTaxa[0])
|
||||
if taxon == nil {
|
||||
taxon, err := taxonomy.Taxon(_NotBelongTaxa[0])
|
||||
if err != nil {
|
||||
p = obiseq.IsSubCladeOfSlot(taxonomy, _NotBelongTaxa[0])
|
||||
} else {
|
||||
p = obiseq.IsSubCladeOf(taxonomy, taxon)
|
||||
}
|
||||
|
||||
for _, taxid := range _NotBelongTaxa[1:] {
|
||||
taxon := taxonomy.Taxon(taxid)
|
||||
if taxon == nil {
|
||||
taxon, err := taxonomy.Taxon(taxid)
|
||||
if err != nil {
|
||||
p2 = obiseq.IsSubCladeOfSlot(taxonomy, taxid)
|
||||
} else {
|
||||
p2 = obiseq.IsSubCladeOf(taxonomy, taxon)
|
||||
|
@ -42,9 +42,10 @@ func MatchDistanceIndex(taxonomy *obitax.Taxonomy, distance int, distanceIdx map
|
||||
if i == len(keys) || distance > keys[len(keys)-1] {
|
||||
taxon = taxonomy.Root()
|
||||
} else {
|
||||
taxon = taxonomy.Taxon(distanceIdx[keys[i]])
|
||||
if taxon == nil {
|
||||
log.Panicf("Cannot identify taxon %s in %s", distanceIdx[keys[i]], taxonomy.Name())
|
||||
var err error
|
||||
taxon, err = taxonomy.Taxon(distanceIdx[keys[i]])
|
||||
if err != nil {
|
||||
log.Panicf("Cannot identify taxon %s in %s (%v)", distanceIdx[keys[i]], taxonomy.Name(), err)
|
||||
}
|
||||
}
|
||||
|
||||
@ -196,9 +197,9 @@ func Identify(sequence *obiseq.BioSequence,
|
||||
log.Panic("Problem in identification line : ", best.Id(), "idx:", idx, "distance:", d)
|
||||
}
|
||||
|
||||
match_taxon := taxo.Taxon(identification)
|
||||
match_taxon, err := taxo.Taxon(identification)
|
||||
|
||||
if taxon != nil {
|
||||
if err == nil {
|
||||
taxon, _ = taxon.LCA(match_taxon)
|
||||
} else {
|
||||
taxon = match_taxon
|
||||
|
@ -24,6 +24,7 @@ var __taxid_sons__ = "NA"
|
||||
var __restrict_rank__ = ""
|
||||
var __to_dump__ = ""
|
||||
var __download_ncbi__ = false
|
||||
var __extract_taxonomy__ = false
|
||||
|
||||
func FilterTaxonomyOptionSet(options *getoptions.GetOpt) {
|
||||
options.BoolVar(&__rank_list__, "rank-list", false,
|
||||
@ -76,7 +77,9 @@ func OptionSet(options *getoptions.GetOpt) {
|
||||
options.BoolVar(&__download_ncbi__, "download-ncbi", __download_ncbi__,
|
||||
options.Description("Download the current NCBI taxonomy taxdump"),
|
||||
)
|
||||
|
||||
options.BoolVar(&__extract_taxonomy__, "extract-taxonomy", __extract_taxonomy__,
|
||||
options.Description("Extract taxonomy from a sequence file"),
|
||||
)
|
||||
}
|
||||
|
||||
func CLITaxonomicalRestrictions() (*obitax.TaxonSet, error) {
|
||||
@ -88,13 +91,14 @@ func CLITaxonomicalRestrictions() (*obitax.TaxonSet, error) {
|
||||
|
||||
ts := taxonomy.NewTaxonSet()
|
||||
for _, taxid := range __taxonomical_restriction__ {
|
||||
tx := taxonomy.Taxon(taxid)
|
||||
tx, err := taxonomy.Taxon(taxid)
|
||||
|
||||
if tx == nil {
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf(
|
||||
"cannot find taxon %s in taxonomy %s",
|
||||
"cannot find taxon %s in taxonomy %s (%v)",
|
||||
taxid,
|
||||
taxonomy.Name(),
|
||||
err,
|
||||
)
|
||||
}
|
||||
|
||||
@ -155,3 +159,7 @@ func CLISubTaxonomyNode() string {
|
||||
func CLIDownloadNCBI() bool {
|
||||
return __download_ncbi__
|
||||
}
|
||||
|
||||
func CLIExtractTaxonomy() bool {
|
||||
return __extract_taxonomy__
|
||||
}
|
||||
|
@ -93,3 +93,145 @@ func MapToMapInterface(m interface{}) map[string]interface{} {
|
||||
log.Panic("Invalid map type")
|
||||
return make(map[string]interface{})
|
||||
}
|
||||
|
||||
// InterfaceToInt converts a interface{} to an integer value if possible.
|
||||
// If not a "NotAnInteger" error is returned via the err
|
||||
// return value and val is set to 0.
|
||||
func InterfaceToInt(i interface{}) (val int, err error) {
|
||||
|
||||
err = nil
|
||||
val = 0
|
||||
|
||||
switch t := i.(type) {
|
||||
case int:
|
||||
val = t
|
||||
case int8:
|
||||
val = int(t) // standardizes across systems
|
||||
case int16:
|
||||
val = int(t) // standardizes across systems
|
||||
case int32:
|
||||
val = int(t) // standardizes across systems
|
||||
case int64:
|
||||
val = int(t) // standardizes across systems
|
||||
case float32:
|
||||
val = int(t) // standardizes across systems
|
||||
case float64:
|
||||
val = int(t) // standardizes across systems
|
||||
case uint8:
|
||||
val = int(t) // standardizes across systems
|
||||
case uint16:
|
||||
val = int(t) // standardizes across systems
|
||||
case uint32:
|
||||
val = int(t) // standardizes across systems
|
||||
case uint64:
|
||||
val = int(t) // standardizes across systems
|
||||
default:
|
||||
err = &NotAnInteger{"value attribute cannot be casted to an integer"}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// InterfaceToInt converts a interface{} to an integer value if possible.
|
||||
// If not a "NotAnInteger" error is returned via the err
|
||||
// return value and val is set to 0.
|
||||
func InterfaceToFloat64(i interface{}) (val float64, err error) {
|
||||
|
||||
err = nil
|
||||
val = 0
|
||||
|
||||
switch t := i.(type) {
|
||||
case int:
|
||||
val = float64(t)
|
||||
case int8:
|
||||
val = float64(t) // standardizes across systems
|
||||
case int16:
|
||||
val = float64(t) // standardizes across systems
|
||||
case int32:
|
||||
val = float64(t) // standardizes across systems
|
||||
case int64:
|
||||
val = float64(t) // standardizes across systems
|
||||
case float32:
|
||||
val = float64(t) // standardizes across systems
|
||||
case float64:
|
||||
val = t // standardizes across systems
|
||||
case uint8:
|
||||
val = float64(t) // standardizes across systems
|
||||
case uint16:
|
||||
val = float64(t) // standardizes across systems
|
||||
case uint32:
|
||||
val = float64(t) // standardizes across systems
|
||||
case uint64:
|
||||
val = float64(t) // standardizes across systems
|
||||
default:
|
||||
err = &NotAnFloat64{"value attribute cannot be casted to a float value"}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func InterfaceToIntMap(i interface{}) (val map[string]int, err error) {
|
||||
err = nil
|
||||
|
||||
switch i := i.(type) {
|
||||
case map[string]int:
|
||||
val = i
|
||||
case map[string]interface{}:
|
||||
val = make(map[string]int, len(i))
|
||||
for k, v := range i {
|
||||
val[k], err = InterfaceToInt(v)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
case map[string]float64:
|
||||
val = make(map[string]int, len(i))
|
||||
for k, v := range i {
|
||||
val[k] = int(v)
|
||||
}
|
||||
default:
|
||||
err = &NotAMapInt{"value attribute cannot be casted to a map[string]int"}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func InterfaceToStringMap(i interface{}) (val map[string]string, err error) {
|
||||
err = nil
|
||||
|
||||
switch i := i.(type) {
|
||||
case map[string]string:
|
||||
val = i
|
||||
case map[string]interface{}:
|
||||
val = make(map[string]string, len(i))
|
||||
for k, v := range i {
|
||||
val[k], err = InterfaceToString(v)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
default:
|
||||
err = &NotAMapInt{"value attribute cannot be casted to a map[string]int"}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func InterfaceToStringSlice(i interface{}) (val []string, err error) {
|
||||
err = nil
|
||||
|
||||
switch i := i.(type) {
|
||||
case []string:
|
||||
val = i
|
||||
case []interface{}:
|
||||
val = make([]string, len(i))
|
||||
for k, v := range i {
|
||||
val[k], err = InterfaceToString(v)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
default:
|
||||
err = &NotAMapInt{"value attribute cannot be casted to a []string"}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
@ -25,43 +25,6 @@ func (m *NotAnInteger) Error() string {
|
||||
return m.message
|
||||
}
|
||||
|
||||
// InterfaceToInt converts a interface{} to an integer value if possible.
|
||||
// If not a "NotAnInteger" error is returned via the err
|
||||
// return value and val is set to 0.
|
||||
func InterfaceToInt(i interface{}) (val int, err error) {
|
||||
|
||||
err = nil
|
||||
val = 0
|
||||
|
||||
switch t := i.(type) {
|
||||
case int:
|
||||
val = t
|
||||
case int8:
|
||||
val = int(t) // standardizes across systems
|
||||
case int16:
|
||||
val = int(t) // standardizes across systems
|
||||
case int32:
|
||||
val = int(t) // standardizes across systems
|
||||
case int64:
|
||||
val = int(t) // standardizes across systems
|
||||
case float32:
|
||||
val = int(t) // standardizes across systems
|
||||
case float64:
|
||||
val = int(t) // standardizes across systems
|
||||
case uint8:
|
||||
val = int(t) // standardizes across systems
|
||||
case uint16:
|
||||
val = int(t) // standardizes across systems
|
||||
case uint32:
|
||||
val = int(t) // standardizes across systems
|
||||
case uint64:
|
||||
val = int(t) // standardizes across systems
|
||||
default:
|
||||
err = &NotAnInteger{"value attribute cannot be casted to an integer"}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// NotAnInteger defines a new type of Error : "NotAnInteger"
|
||||
type NotAnFloat64 struct {
|
||||
message string
|
||||
@ -74,43 +37,6 @@ func (m *NotAnFloat64) Error() string {
|
||||
return m.message
|
||||
}
|
||||
|
||||
// InterfaceToInt converts a interface{} to an integer value if possible.
|
||||
// If not a "NotAnInteger" error is returned via the err
|
||||
// return value and val is set to 0.
|
||||
func InterfaceToFloat64(i interface{}) (val float64, err error) {
|
||||
|
||||
err = nil
|
||||
val = 0
|
||||
|
||||
switch t := i.(type) {
|
||||
case int:
|
||||
val = float64(t)
|
||||
case int8:
|
||||
val = float64(t) // standardizes across systems
|
||||
case int16:
|
||||
val = float64(t) // standardizes across systems
|
||||
case int32:
|
||||
val = float64(t) // standardizes across systems
|
||||
case int64:
|
||||
val = float64(t) // standardizes across systems
|
||||
case float32:
|
||||
val = float64(t) // standardizes across systems
|
||||
case float64:
|
||||
val = t // standardizes across systems
|
||||
case uint8:
|
||||
val = float64(t) // standardizes across systems
|
||||
case uint16:
|
||||
val = float64(t) // standardizes across systems
|
||||
case uint32:
|
||||
val = float64(t) // standardizes across systems
|
||||
case uint64:
|
||||
val = float64(t) // standardizes across systems
|
||||
default:
|
||||
err = &NotAnFloat64{"value attribute cannot be casted to a float value"}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// NotABoolean defines a new type of Error : "NotAMapInt"
|
||||
type NotAMapInt struct {
|
||||
message string
|
||||
@ -123,53 +49,6 @@ func (m *NotAMapInt) Error() string {
|
||||
return m.message
|
||||
}
|
||||
|
||||
func InterfaceToIntMap(i interface{}) (val map[string]int, err error) {
|
||||
err = nil
|
||||
|
||||
switch i := i.(type) {
|
||||
case map[string]int:
|
||||
val = i
|
||||
case map[string]interface{}:
|
||||
val = make(map[string]int, len(i))
|
||||
for k, v := range i {
|
||||
val[k], err = InterfaceToInt(v)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
case map[string]float64:
|
||||
val = make(map[string]int, len(i))
|
||||
for k, v := range i {
|
||||
val[k] = int(v)
|
||||
}
|
||||
default:
|
||||
err = &NotAMapInt{"value attribute cannot be casted to a map[string]int"}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func InterfaceToStringMap(i interface{}) (val map[string]string, err error) {
|
||||
err = nil
|
||||
|
||||
switch i := i.(type) {
|
||||
case map[string]string:
|
||||
val = i
|
||||
case map[string]interface{}:
|
||||
val = make(map[string]string, len(i))
|
||||
for k, v := range i {
|
||||
val[k], err = InterfaceToString(v)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
default:
|
||||
err = &NotAMapInt{"value attribute cannot be casted to a map[string]int"}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// NotABoolean defines a new type of Error : "NotAMapInt"
|
||||
type NotAMapFloat64 struct {
|
||||
message string
|
||||
|
Reference in New Issue
Block a user