diff --git a/cmd/obitools/obitaxonomy/main.go b/cmd/obitools/obitaxonomy/main.go index 65fd04a..f26d596 100644 --- a/cmd/obitools/obitaxonomy/main.go +++ b/cmd/obitools/obitaxonomy/main.go @@ -5,6 +5,7 @@ import ( "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obitaxonomy" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils" @@ -19,7 +20,6 @@ func main() { var iterator *obitax.ITaxon switch { - case obitaxonomy.CLIDownloadNCBI(): err := obitaxonomy.CLIDownloadNCBITaxdump() if err != nil { @@ -29,16 +29,34 @@ func main() { os.Exit(0) + case obitaxonomy.CLIExtractTaxonomy(): + iter, err := obiconvert.CLIReadBioSequences(args...) + + if err != nil { + log.Fatalf("Cannot extract taxonomy: %v", err) + } + + taxonomy, err := iter.ExtractTaxonomy() + + if err != nil { + log.Fatalf("Cannot extract taxonomy: %v", err) + } + + taxonomy.SetAsDefault() + + log.Infof("Number of extracted taxa: %d", taxonomy.Len()) + iterator = taxonomy.AsTaxonSet().Sort().Iterator() + case obitaxonomy.CLIDumpSubtaxonomy(): iterator = obitaxonomy.CLISubTaxonomyIterator() case obitaxonomy.CLIRequestsPathForTaxid() != "NA": - taxon := obitax.DefaultTaxonomy().Taxon(obitaxonomy.CLIRequestsPathForTaxid()) + taxon, err := obitax.DefaultTaxonomy().Taxon(obitaxonomy.CLIRequestsPathForTaxid()) - if taxon == nil { - log.Fatalf("Cannot identify the requested taxon: %s", - obitaxonomy.CLIRequestsPathForTaxid()) + if err != nil { + log.Fatalf("Cannot identify the requested taxon: %s (%v)", + obitaxonomy.CLIRequestsPathForTaxid(), err) } s := taxon.Path() diff --git a/pkg/obiformats/fastseq_json_header.go b/pkg/obiformats/fastseq_json_header.go index 9459b9f..0ed53e5 100644 --- a/pkg/obiformats/fastseq_json_header.go +++ b/pkg/obiformats/fastseq_json_header.go @@ -292,8 +292,8 @@ func _parse_json_header_(header string, sequence *obiseq.BioSequence) string { case skey == "taxid": if dataType == jsonparser.Number || dataType == jsonparser.String { taxid := obiutils.UnsafeString(value) - taxon := taxonomy.Taxon(taxid) - if taxon != nil { + taxon, err := taxonomy.Taxon(taxid) + if err == nil { sequence.SetTaxon(taxon) } else { sequence.SetTaxid(string(value)) @@ -307,9 +307,9 @@ func _parse_json_header_(header string, sequence *obiseq.BioSequence) string { rank, _ := obiutils.SplitInTwo(skey, '_') taxid := obiutils.UnsafeString(value) - taxon := taxonomy.Taxon(taxid) + taxon, err := taxonomy.Taxon(taxid) - if taxon != nil { + if err == nil { taxid = taxon.String() } else { taxid = string(value) diff --git a/pkg/obiiter/extract_taxonomy.go b/pkg/obiiter/extract_taxonomy.go new file mode 100644 index 0000000..868f527 --- /dev/null +++ b/pkg/obiiter/extract_taxonomy.go @@ -0,0 +1,18 @@ +package obiiter + +import "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax" + +func (iterator *IBioSequence) ExtractTaxonomy() (taxonomy *obitax.Taxonomy, err error) { + + for iterator.Next() { + slice := iterator.Get().Slice() + + taxonomy, err = slice.ExtractTaxonomy(taxonomy) + + if err != nil { + return + } + } + + return +} diff --git a/pkg/obioptions/version.go b/pkg/obioptions/version.go index 6115614..48824fd 100644 --- a/pkg/obioptions/version.go +++ b/pkg/obioptions/version.go @@ -8,7 +8,7 @@ import ( // corresponds to the last commit, and not the one when the file will be // commited -var _Commit = "3379545" +var _Commit = "2452aef" var _Version = "Release 4.2.0" // Version returns the version of the obitools package. diff --git a/pkg/obiseq/biosequenceslice.go b/pkg/obiseq/biosequenceslice.go index 12210a6..7ba22b0 100644 --- a/pkg/obiseq/biosequenceslice.go +++ b/pkg/obiseq/biosequenceslice.go @@ -1,6 +1,7 @@ package obiseq import ( + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils" log "github.com/sirupsen/logrus" "golang.org/x/exp/slices" @@ -179,3 +180,18 @@ func (s *BioSequenceSlice) SortOnLength(reverse bool) { } }) } + +func (s *BioSequenceSlice) ExtractTaxonomy(taxonomy *obitax.Taxonomy) (*obitax.Taxonomy, error) { + var err error + + for _, s := range *s { + taxonomy, err = taxonomy.InsertPathString(s.Path()) + + if err != nil { + return nil, err + } + + } + + return taxonomy, nil +} diff --git a/pkg/obiseq/taxonomy_lca.go b/pkg/obiseq/taxonomy_lca.go index bde85c7..0758068 100644 --- a/pkg/obiseq/taxonomy_lca.go +++ b/pkg/obiseq/taxonomy_lca.go @@ -15,13 +15,15 @@ func (sequence *BioSequence) TaxonomicDistribution(taxonomy *obitax.Taxonomy) ma taxonomy = taxonomy.OrDefault(true) for taxid, v := range taxids { - t := taxonomy.Taxon(taxid) - if t == nil { + t, err := taxonomy.Taxon(taxid) + if err != nil { log.Fatalf( - "On sequence %s taxid %s is not defined in taxonomy: %s", + "On sequence %s taxid %s is not defined in taxonomy: %s (%v)", sequence.Id(), taxid, - taxonomy.Name()) + taxonomy.Name(), + err, + ) } taxons[t.Node] = v } diff --git a/pkg/obiseq/taxonomy_methods.go b/pkg/obiseq/taxonomy_methods.go index fa2b7c6..37edb63 100644 --- a/pkg/obiseq/taxonomy_methods.go +++ b/pkg/obiseq/taxonomy_methods.go @@ -6,6 +6,7 @@ import ( log "github.com/sirupsen/logrus" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils" ) func (s *BioSequence) Taxon(taxonomy *obitax.Taxonomy) *obitax.Taxon { @@ -14,7 +15,10 @@ func (s *BioSequence) Taxon(taxonomy *obitax.Taxonomy) *obitax.Taxon { if taxid == "NA" { return nil } - return taxonomy.Taxon(taxid) + + taxon, _ := taxonomy.Taxon(taxid) + + return taxon } // SetTaxid sets the taxid for the BioSequence. @@ -23,6 +27,7 @@ func (s *BioSequence) Taxon(taxonomy *obitax.Taxonomy) *obitax.Taxon { // // taxid - the taxid to set. func (s *BioSequence) SetTaxid(taxid string, rank ...string) { + var err error if taxid == "" { taxid = "NA" } else { @@ -30,7 +35,12 @@ func (s *BioSequence) SetTaxid(taxid string, rank ...string) { taxon := (*obitax.Taxon)(nil) if taxonomy != nil { - taxon = taxonomy.Taxon(taxid) + taxon, err = taxonomy.Taxon(taxid) + + if err != nil { + log.Warnf("%s: Taxid: %v is unknown from taxonomy (%v)", + s.Id(), taxid, err) + } } if taxon != nil { @@ -135,14 +145,35 @@ func (sequence *BioSequence) SetFamily(taxonomy *obitax.Taxonomy) *obitax.Taxon return sequence.SetTaxonAtRank(taxonomy, "family") } -func (sequence *BioSequence) SetPath(taxonomy *obitax.Taxonomy) string { +func (sequence *BioSequence) SetPath(taxonomy *obitax.Taxonomy) []string { taxon := sequence.Taxon(taxonomy) path := taxon.Path() + spath := make([]string, path.Len()) + lpath := path.Len() - 1 - tpath := path.String() - sequence.SetAttribute("taxonomic_path", tpath) + for i := lpath; i >= 0; i-- { + spath[lpath-i] = path.Get(i).String(taxonomy.Code()) + } - return tpath + sequence.SetAttribute("taxonomic_path", spath) + + return spath +} + +func (sequence *BioSequence) Path() []string { + path, ok := sequence.GetAttribute("taxonomic_path") + + if !ok { + return nil + } + + slice, err := obiutils.InterfaceToStringSlice(path) + + if err != nil { + log.Fatalf("%s: taxonomic_path has the wrong type (%v)", sequence.Id(), err) + } + + return slice } func (sequence *BioSequence) SetScientificName(taxonomy *obitax.Taxonomy) string { diff --git a/pkg/obiseq/taxonomy_predicate.go b/pkg/obiseq/taxonomy_predicate.go index 265c1bc..25ac551 100644 --- a/pkg/obiseq/taxonomy_predicate.go +++ b/pkg/obiseq/taxonomy_predicate.go @@ -63,7 +63,12 @@ func IsSubCladeOfSlot(taxonomy *obitax.Taxonomy, key string) SequencePredicate { val, ok := sequence.GetStringAttribute(key) if ok { - parent := taxonomy.Taxon(val) + parent, err := taxonomy.Taxon(val) + + if err != nil { + log.Warnf("%s: %s is unkown from the taxonomy (%v)", sequence.Id(), val, err) + } + taxon := sequence.Taxon(taxonomy) return parent != nil && taxon != nil && taxon.IsSubCladeOf(parent) } diff --git a/pkg/obitax/iterator.go b/pkg/obitax/iterator.go index f53b465..e6887ab 100644 --- a/pkg/obitax/iterator.go +++ b/pkg/obitax/iterator.go @@ -218,9 +218,9 @@ func (taxon *Taxon) ISubTaxonomy() *ITaxon { } func (taxonomy *Taxonomy) ISubTaxonomy(taxid string) *ITaxon { - taxon := taxonomy.Taxon(taxid) + taxon, err := taxonomy.Taxon(taxid) - if taxon == nil { + if err != nil { return nil } diff --git a/pkg/obitax/ncbitaxdump_read.go b/pkg/obitax/ncbitaxdump_read.go index f359ea4..22a033e 100644 --- a/pkg/obitax/ncbitaxdump_read.go +++ b/pkg/obitax/ncbitaxdump_read.go @@ -91,7 +91,13 @@ func loadNameTable(reader io.Reader, taxonomy *Taxonomy, onlysn bool) int { if !onlysn || classname == "scientific name" { n++ - taxonomy.Taxon(taxid).SetName(name, classname) + taxon, err := taxonomy.Taxon(taxid) + + if err != nil { + log.Fatalf("%s: is unknown from the taxonomy", taxid) + } + + taxon.SetName(name, classname) } } @@ -196,7 +202,11 @@ func LoadNCBITaxDump(directory string, onlysn bool) (*Taxonomy, error) { n = loadMergedTable(buffered, taxonomy) log.Printf("%d merged taxa read\n", n) - root := taxonomy.Taxon("1") + root, err := taxonomy.Taxon("1") + + if err != nil { + log.Fatal("cannot find the root taxon (1) in the NCBI tax dump") + } taxonomy.SetRoot(root) return taxonomy, nil diff --git a/pkg/obitax/ncbitaxdump_readtar.go b/pkg/obitax/ncbitaxdump_readtar.go index a5862eb..594d879 100644 --- a/pkg/obitax/ncbitaxdump_readtar.go +++ b/pkg/obitax/ncbitaxdump_readtar.go @@ -134,7 +134,12 @@ func LoadNCBITarTaxDump(path string, onlysn bool) (*Taxonomy, error) { n = loadMergedTable(buffered, taxonomy) log.Printf("%d merged taxa read\n", n) - root := taxonomy.Taxon("1") + root, err := taxonomy.Taxon("1") + + if err != nil { + log.Fatal("cannot find the root taxon (1) in the NCBI tax dump") + } + taxonomy.SetRoot(root) return taxonomy, nil diff --git a/pkg/obitax/string_parser.go b/pkg/obitax/string_parser.go new file mode 100644 index 0000000..c18d9d9 --- /dev/null +++ b/pkg/obitax/string_parser.go @@ -0,0 +1,64 @@ +package obitax + +import ( + "errors" + "strings" +) + +// ParseTaxonString parses a string in the format "code:taxid [scientific name]@rank" +// and returns the individual components. It handles extra whitespace around components. +// +// Parameters: +// - taxonStr: The string to parse in the format "code:taxid [scientific name]@rank" +// +// Returns: +// - code: The taxonomy code +// - taxid: The taxon identifier +// - scientificName: The scientific name (without brackets) +// - rank: The rank +// - error: An error if the string format is invalid +func ParseTaxonString(taxonStr string) (code, taxid, scientificName, rank string, err error) { + // Trim any leading/trailing whitespace from the entire string + taxonStr = strings.TrimSpace(taxonStr) + + // Split by '@' to separate rank + parts := strings.Split(taxonStr, "@") + if len(parts) > 2 { + return "", "", "", "", errors.New("invalid format: multiple '@' characters found") + } + + mainPart := strings.TrimSpace(parts[0]) + if len(parts) == 2 { + rank = strings.TrimSpace(parts[1]) + } else { + rank = "no rank" + } + + // Find scientific name part (enclosed in square brackets) + startBracket := strings.Index(mainPart, "[") + endBracket := strings.LastIndex(mainPart, "]") + + if startBracket == -1 || endBracket == -1 || startBracket > endBracket { + return "", "", "", "", errors.New("invalid format: scientific name must be enclosed in square brackets") + } + + // Extract and clean scientific name + scientificName = strings.TrimSpace(mainPart[startBracket+1 : endBracket]) + + // Process code:taxid part + idPart := strings.TrimSpace(mainPart[:startBracket]) + idComponents := strings.Split(idPart, ":") + + if len(idComponents) != 2 { + return "", "", "", "", errors.New("invalid format: missing taxonomy code separator ':'") + } + + code = strings.TrimSpace(idComponents[0]) + taxid = strings.TrimSpace(idComponents[1]) + + if code == "" || taxid == "" || scientificName == "" { + return "", "", "", "", errors.New("invalid format: code, taxid and scientific name cannot be empty") + } + + return code, taxid, scientificName, rank, nil +} diff --git a/pkg/obitax/taxon.go b/pkg/obitax/taxon.go index d0817cb..e58ef26 100644 --- a/pkg/obitax/taxon.go +++ b/pkg/obitax/taxon.go @@ -1,6 +1,7 @@ package obitax import ( + "errors" "iter" "regexp" @@ -379,3 +380,29 @@ func (taxon *Taxon) SameAs(other *Taxon) bool { return taxon.Taxonomy == other.Taxonomy && taxon.Node.id == other.Node.id } + +func (taxon *Taxon) AddChild(child string, replace bool) (*Taxon, error) { + if taxon == nil { + return nil, errors.New("nil taxon") + } + + code, taxid, scientific_name, rank, err := ParseTaxonString(child) + + if err != nil { + return nil, err + } + + if taxon.Taxonomy.code != code { + return nil, errors.New("taxonomy code mismatch") + } + + newTaxon, err := taxon.Taxonomy.AddTaxon(taxid, *taxon.Node.id, rank, false, replace) + + if err != nil { + return nil, err + } + + newTaxon.SetName(scientific_name, "scientific name") + + return newTaxon, nil +} diff --git a/pkg/obitax/taxonomy.go b/pkg/obitax/taxonomy.go index 75fdadf..31ad345 100644 --- a/pkg/obitax/taxonomy.go +++ b/pkg/obitax/taxonomy.go @@ -12,7 +12,6 @@ import ( "fmt" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils" - log "github.com/sirupsen/logrus" ) // Taxonomy represents a hierarchical classification of taxa. @@ -130,27 +129,28 @@ func (taxonomy *Taxonomy) TaxidString(id string) (string, error) { // Returns: // - A pointer to the Taxon instance associated with the provided taxid. // - If the taxid is unknown, the method will log a fatal error. -func (taxonomy *Taxonomy) Taxon(taxid string) *Taxon { +func (taxonomy *Taxonomy) Taxon(taxid string) (*Taxon, error) { taxonomy = taxonomy.OrDefault(false) if taxonomy == nil { - return nil + return nil, errors.New("cannot extract taxon from nil taxonomy") } id, err := taxonomy.Id(taxid) if err != nil { - log.Fatalf("Taxid %s: %v", taxid, err) + return nil, fmt.Errorf("Taxid %s: %v", taxid, err) } taxon := taxonomy.nodes.Get(id) if taxon == nil { - log.Fatalf("Taxid %s is not part of the taxonomy %s", - taxid, - taxonomy.name) + return nil, + fmt.Errorf("Taxid %s is not part of the taxonomy %s", + taxid, + taxonomy.name) } - return taxon + return taxon, nil } // AsTaxonSet returns the set of taxon nodes contained within the Taxonomy. @@ -353,3 +353,63 @@ func (taxonomy *Taxonomy) HasRoot() bool { taxonomy = taxonomy.OrDefault(false) return taxonomy != nil && taxonomy.root != nil } + +func (taxonomy *Taxonomy) InsertPathString(path []string) (*Taxonomy, error) { + if len(path) == 0 { + return nil, errors.New("path is empty") + } + + code, taxid, scientific_name, rank, err := ParseTaxonString(path[0]) + + if taxonomy == nil { + taxonomy = NewTaxonomy(code, code, obiutils.AsciiAlphaNumSet) + } + + if err != nil { + return nil, err + } + + if taxonomy.Len() == 0 { + + if code != taxonomy.code { + return nil, fmt.Errorf("cannot insert taxon %s into taxonomy %s with code %s", + path[0], taxonomy.name, taxonomy.code) + } + + root, err := taxonomy.AddTaxon(taxid, taxid, rank, true, true) + + if err != nil { + return nil, err + } + root.SetName(scientific_name, "scientificName") + } + + var current *Taxon + current, err = taxonomy.Taxon(taxid) + + if err != nil { + return nil, err + } + + if !current.IsRoot() { + return nil, errors.New("path does not start with a root node") + } + + for _, id := range path[1:] { + taxon, err := taxonomy.Taxon(id) + if err == nil { + if !current.SameAs(taxon.Parent()) { + return nil, errors.New("path is not consistent with the taxonomy, parent mismatch") + } + current = taxon + } else { + current, err = current.AddChild(id, false) + + if err != nil { + return nil, err + } + } + } + + return taxonomy, nil +} diff --git a/pkg/obitax/taxonset.go b/pkg/obitax/taxonset.go index 9d2da84..744dc21 100644 --- a/pkg/obitax/taxonset.go +++ b/pkg/obitax/taxonset.go @@ -212,7 +212,8 @@ func (set *TaxonSet) Sort() *TaxonSlice { pushed = false for _, node := range set.set { if !parent[node] && (parent[set.Get(node.parent).Node] || - !set.Contains(node.parent)) { + !set.Contains(node.parent) || + node == taxonomy.Root().Node) { pushed = true taxa.slice = append(taxa.slice, node) parent[node] = true diff --git a/pkg/obitools/obigrep/options.go b/pkg/obitools/obigrep/options.go index 8197905..abaa812 100644 --- a/pkg/obitools/obigrep/options.go +++ b/pkg/obitools/obigrep/options.go @@ -248,15 +248,15 @@ func CLIRestrictTaxonomyPredicate() obiseq.SequencePredicate { if len(_BelongTaxa) > 0 { taxonomy := obitax.DefaultTaxonomy() - taxon := taxonomy.Taxon(_BelongTaxa[0]) - if taxon == nil { + taxon, err := taxonomy.Taxon(_BelongTaxa[0]) + if err != nil { p = obiseq.IsSubCladeOfSlot(taxonomy, _BelongTaxa[0]) } else { p = obiseq.IsSubCladeOf(taxonomy, taxon) } for _, staxid := range _BelongTaxa[1:] { - taxon := taxonomy.Taxon(staxid) - if taxon == nil { + taxon, err := taxonomy.Taxon(staxid) + if err != nil { p2 = obiseq.IsSubCladeOfSlot(taxonomy, staxid) } else { p2 = obiseq.IsSubCladeOf(taxonomy, taxon) @@ -278,16 +278,16 @@ func CLIAvoidTaxonomyPredicate() obiseq.SequencePredicate { if len(_NotBelongTaxa) > 0 { taxonomy := obitax.DefaultTaxonomy() - taxon := taxonomy.Taxon(_NotBelongTaxa[0]) - if taxon == nil { + taxon, err := taxonomy.Taxon(_NotBelongTaxa[0]) + if err != nil { p = obiseq.IsSubCladeOfSlot(taxonomy, _NotBelongTaxa[0]) } else { p = obiseq.IsSubCladeOf(taxonomy, taxon) } for _, taxid := range _NotBelongTaxa[1:] { - taxon := taxonomy.Taxon(taxid) - if taxon == nil { + taxon, err := taxonomy.Taxon(taxid) + if err != nil { p2 = obiseq.IsSubCladeOfSlot(taxonomy, taxid) } else { p2 = obiseq.IsSubCladeOf(taxonomy, taxon) diff --git a/pkg/obitools/obitag/obitag.go b/pkg/obitools/obitag/obitag.go index e5b340f..1d1b791 100644 --- a/pkg/obitools/obitag/obitag.go +++ b/pkg/obitools/obitag/obitag.go @@ -42,9 +42,10 @@ func MatchDistanceIndex(taxonomy *obitax.Taxonomy, distance int, distanceIdx map if i == len(keys) || distance > keys[len(keys)-1] { taxon = taxonomy.Root() } else { - taxon = taxonomy.Taxon(distanceIdx[keys[i]]) - if taxon == nil { - log.Panicf("Cannot identify taxon %s in %s", distanceIdx[keys[i]], taxonomy.Name()) + var err error + taxon, err = taxonomy.Taxon(distanceIdx[keys[i]]) + if err != nil { + log.Panicf("Cannot identify taxon %s in %s (%v)", distanceIdx[keys[i]], taxonomy.Name(), err) } } @@ -196,9 +197,9 @@ func Identify(sequence *obiseq.BioSequence, log.Panic("Problem in identification line : ", best.Id(), "idx:", idx, "distance:", d) } - match_taxon := taxo.Taxon(identification) + match_taxon, err := taxo.Taxon(identification) - if taxon != nil { + if err == nil { taxon, _ = taxon.LCA(match_taxon) } else { taxon = match_taxon diff --git a/pkg/obitools/obitaxonomy/options.go b/pkg/obitools/obitaxonomy/options.go index 85ef0ac..18dc552 100644 --- a/pkg/obitools/obitaxonomy/options.go +++ b/pkg/obitools/obitaxonomy/options.go @@ -24,6 +24,7 @@ var __taxid_sons__ = "NA" var __restrict_rank__ = "" var __to_dump__ = "" var __download_ncbi__ = false +var __extract_taxonomy__ = false func FilterTaxonomyOptionSet(options *getoptions.GetOpt) { options.BoolVar(&__rank_list__, "rank-list", false, @@ -76,7 +77,9 @@ func OptionSet(options *getoptions.GetOpt) { options.BoolVar(&__download_ncbi__, "download-ncbi", __download_ncbi__, options.Description("Download the current NCBI taxonomy taxdump"), ) - + options.BoolVar(&__extract_taxonomy__, "extract-taxonomy", __extract_taxonomy__, + options.Description("Extract taxonomy from a sequence file"), + ) } func CLITaxonomicalRestrictions() (*obitax.TaxonSet, error) { @@ -88,13 +91,14 @@ func CLITaxonomicalRestrictions() (*obitax.TaxonSet, error) { ts := taxonomy.NewTaxonSet() for _, taxid := range __taxonomical_restriction__ { - tx := taxonomy.Taxon(taxid) + tx, err := taxonomy.Taxon(taxid) - if tx == nil { + if err != nil { return nil, fmt.Errorf( - "cannot find taxon %s in taxonomy %s", + "cannot find taxon %s in taxonomy %s (%v)", taxid, taxonomy.Name(), + err, ) } @@ -155,3 +159,7 @@ func CLISubTaxonomyNode() string { func CLIDownloadNCBI() bool { return __download_ncbi__ } + +func CLIExtractTaxonomy() bool { + return __extract_taxonomy__ +} diff --git a/pkg/obiutils/cast_interface.go b/pkg/obiutils/cast_interface.go index 23c5d6a..690c8dc 100644 --- a/pkg/obiutils/cast_interface.go +++ b/pkg/obiutils/cast_interface.go @@ -93,3 +93,145 @@ func MapToMapInterface(m interface{}) map[string]interface{} { log.Panic("Invalid map type") return make(map[string]interface{}) } + +// InterfaceToInt converts a interface{} to an integer value if possible. +// If not a "NotAnInteger" error is returned via the err +// return value and val is set to 0. +func InterfaceToInt(i interface{}) (val int, err error) { + + err = nil + val = 0 + + switch t := i.(type) { + case int: + val = t + case int8: + val = int(t) // standardizes across systems + case int16: + val = int(t) // standardizes across systems + case int32: + val = int(t) // standardizes across systems + case int64: + val = int(t) // standardizes across systems + case float32: + val = int(t) // standardizes across systems + case float64: + val = int(t) // standardizes across systems + case uint8: + val = int(t) // standardizes across systems + case uint16: + val = int(t) // standardizes across systems + case uint32: + val = int(t) // standardizes across systems + case uint64: + val = int(t) // standardizes across systems + default: + err = &NotAnInteger{"value attribute cannot be casted to an integer"} + } + return +} + +// InterfaceToInt converts a interface{} to an integer value if possible. +// If not a "NotAnInteger" error is returned via the err +// return value and val is set to 0. +func InterfaceToFloat64(i interface{}) (val float64, err error) { + + err = nil + val = 0 + + switch t := i.(type) { + case int: + val = float64(t) + case int8: + val = float64(t) // standardizes across systems + case int16: + val = float64(t) // standardizes across systems + case int32: + val = float64(t) // standardizes across systems + case int64: + val = float64(t) // standardizes across systems + case float32: + val = float64(t) // standardizes across systems + case float64: + val = t // standardizes across systems + case uint8: + val = float64(t) // standardizes across systems + case uint16: + val = float64(t) // standardizes across systems + case uint32: + val = float64(t) // standardizes across systems + case uint64: + val = float64(t) // standardizes across systems + default: + err = &NotAnFloat64{"value attribute cannot be casted to a float value"} + } + return +} + +func InterfaceToIntMap(i interface{}) (val map[string]int, err error) { + err = nil + + switch i := i.(type) { + case map[string]int: + val = i + case map[string]interface{}: + val = make(map[string]int, len(i)) + for k, v := range i { + val[k], err = InterfaceToInt(v) + if err != nil { + return + } + } + case map[string]float64: + val = make(map[string]int, len(i)) + for k, v := range i { + val[k] = int(v) + } + default: + err = &NotAMapInt{"value attribute cannot be casted to a map[string]int"} + } + + return +} + +func InterfaceToStringMap(i interface{}) (val map[string]string, err error) { + err = nil + + switch i := i.(type) { + case map[string]string: + val = i + case map[string]interface{}: + val = make(map[string]string, len(i)) + for k, v := range i { + val[k], err = InterfaceToString(v) + if err != nil { + return + } + } + default: + err = &NotAMapInt{"value attribute cannot be casted to a map[string]int"} + } + + return +} + +func InterfaceToStringSlice(i interface{}) (val []string, err error) { + err = nil + + switch i := i.(type) { + case []string: + val = i + case []interface{}: + val = make([]string, len(i)) + for k, v := range i { + val[k], err = InterfaceToString(v) + if err != nil { + return + } + } + default: + err = &NotAMapInt{"value attribute cannot be casted to a []string"} + } + + return +} diff --git a/pkg/obiutils/goutils.go b/pkg/obiutils/goutils.go index e0bbefe..732d6d1 100644 --- a/pkg/obiutils/goutils.go +++ b/pkg/obiutils/goutils.go @@ -25,43 +25,6 @@ func (m *NotAnInteger) Error() string { return m.message } -// InterfaceToInt converts a interface{} to an integer value if possible. -// If not a "NotAnInteger" error is returned via the err -// return value and val is set to 0. -func InterfaceToInt(i interface{}) (val int, err error) { - - err = nil - val = 0 - - switch t := i.(type) { - case int: - val = t - case int8: - val = int(t) // standardizes across systems - case int16: - val = int(t) // standardizes across systems - case int32: - val = int(t) // standardizes across systems - case int64: - val = int(t) // standardizes across systems - case float32: - val = int(t) // standardizes across systems - case float64: - val = int(t) // standardizes across systems - case uint8: - val = int(t) // standardizes across systems - case uint16: - val = int(t) // standardizes across systems - case uint32: - val = int(t) // standardizes across systems - case uint64: - val = int(t) // standardizes across systems - default: - err = &NotAnInteger{"value attribute cannot be casted to an integer"} - } - return -} - // NotAnInteger defines a new type of Error : "NotAnInteger" type NotAnFloat64 struct { message string @@ -74,43 +37,6 @@ func (m *NotAnFloat64) Error() string { return m.message } -// InterfaceToInt converts a interface{} to an integer value if possible. -// If not a "NotAnInteger" error is returned via the err -// return value and val is set to 0. -func InterfaceToFloat64(i interface{}) (val float64, err error) { - - err = nil - val = 0 - - switch t := i.(type) { - case int: - val = float64(t) - case int8: - val = float64(t) // standardizes across systems - case int16: - val = float64(t) // standardizes across systems - case int32: - val = float64(t) // standardizes across systems - case int64: - val = float64(t) // standardizes across systems - case float32: - val = float64(t) // standardizes across systems - case float64: - val = t // standardizes across systems - case uint8: - val = float64(t) // standardizes across systems - case uint16: - val = float64(t) // standardizes across systems - case uint32: - val = float64(t) // standardizes across systems - case uint64: - val = float64(t) // standardizes across systems - default: - err = &NotAnFloat64{"value attribute cannot be casted to a float value"} - } - return -} - // NotABoolean defines a new type of Error : "NotAMapInt" type NotAMapInt struct { message string @@ -123,53 +49,6 @@ func (m *NotAMapInt) Error() string { return m.message } -func InterfaceToIntMap(i interface{}) (val map[string]int, err error) { - err = nil - - switch i := i.(type) { - case map[string]int: - val = i - case map[string]interface{}: - val = make(map[string]int, len(i)) - for k, v := range i { - val[k], err = InterfaceToInt(v) - if err != nil { - return - } - } - case map[string]float64: - val = make(map[string]int, len(i)) - for k, v := range i { - val[k] = int(v) - } - default: - err = &NotAMapInt{"value attribute cannot be casted to a map[string]int"} - } - - return -} - -func InterfaceToStringMap(i interface{}) (val map[string]string, err error) { - err = nil - - switch i := i.(type) { - case map[string]string: - val = i - case map[string]interface{}: - val = make(map[string]string, len(i)) - for k, v := range i { - val[k], err = InterfaceToString(v) - if err != nil { - return - } - } - default: - err = &NotAMapInt{"value attribute cannot be casted to a map[string]int"} - } - - return -} - // NotABoolean defines a new type of Error : "NotAMapInt" type NotAMapFloat64 struct { message string