mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Rename obifind obitaxonomy and introduce the new CSV format for taxonomy.
This commit is contained in:
@ -7,6 +7,8 @@
|
||||
- In `obimultiplex`, the short version of the **--tag-list** option used to specify the list
|
||||
of tags and primers to be used for the demultiplexing has been changed from `-t` to `-s`.
|
||||
|
||||
- The command `obifind` is now renamed `obitaxonomy`.
|
||||
|
||||
- The **--taxdump** option used to specify the path to the taxdump containing the NCBI taxonomy
|
||||
has been renamed to **--taxonomy**.
|
||||
|
||||
@ -21,6 +23,11 @@
|
||||
|
||||
### New features
|
||||
|
||||
- `obitoaxonomy` a new **--dump|D** option allows for dumping a sub-taxonomy.
|
||||
|
||||
- Taxonomy dump can now be provided as a four-columns CSV file to the **--taxonomy**
|
||||
option.
|
||||
|
||||
- NCBI Taxonomy dump does not need to be uncompressed and unarchived anymore. The
|
||||
path of the tar and gziped dump file can be directly specified using the
|
||||
**--taxonomy** option.
|
||||
|
@ -6,25 +6,29 @@ import (
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obifind"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obitaxonomy"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||
)
|
||||
|
||||
func main() {
|
||||
optionParser := obioptions.GenerateOptionParser(obifind.OptionSet)
|
||||
optionParser := obioptions.GenerateOptionParser(obitaxonomy.OptionSet)
|
||||
|
||||
_, args := optionParser(os.Args)
|
||||
|
||||
var iterator *obitax.ITaxon
|
||||
|
||||
switch {
|
||||
case obifind.CLIRequestsPathForTaxid() != "NA":
|
||||
|
||||
taxon := obitax.DefaultTaxonomy().Taxon(obifind.CLIRequestsPathForTaxid())
|
||||
case obitaxonomy.CLIDumpSubtaxonomy():
|
||||
iterator = obitaxonomy.CLISubTaxonomyIterator()
|
||||
|
||||
case obitaxonomy.CLIRequestsPathForTaxid() != "NA":
|
||||
|
||||
taxon := obitax.DefaultTaxonomy().Taxon(obitaxonomy.CLIRequestsPathForTaxid())
|
||||
|
||||
if taxon == nil {
|
||||
log.Fatalf("Cannot identify the requested taxon: %s",
|
||||
obifind.CLIRequestsPathForTaxid())
|
||||
obitaxonomy.CLIRequestsPathForTaxid())
|
||||
}
|
||||
|
||||
s := taxon.Path()
|
||||
@ -35,7 +39,7 @@ func main() {
|
||||
|
||||
iterator = s.Iterator()
|
||||
|
||||
if obifind.CLIWithQuery() {
|
||||
if obitaxonomy.CLIWithQuery() {
|
||||
iterator = iterator.AddMetadata("query", taxon.String())
|
||||
}
|
||||
|
||||
@ -45,8 +49,8 @@ func main() {
|
||||
iters := make([]*obitax.ITaxon, len(args))
|
||||
|
||||
for i, pat := range args {
|
||||
ii := obitax.DefaultTaxonomy().IFilterOnName(pat, obifind.CLIFixedPattern(), true)
|
||||
if obifind.CLIWithQuery() {
|
||||
ii := obitax.DefaultTaxonomy().IFilterOnName(pat, obitaxonomy.CLIFixedPattern(), true)
|
||||
if obitaxonomy.CLIWithQuery() {
|
||||
ii = ii.AddMetadata("query", pat)
|
||||
}
|
||||
iters[i] = ii
|
||||
@ -59,8 +63,8 @@ func main() {
|
||||
}
|
||||
}
|
||||
|
||||
iterator = obifind.CLITaxonRestrictions(iterator)
|
||||
obifind.CLICSVTaxaWriter(iterator, true)
|
||||
iterator = obitaxonomy.CLITaxonRestrictions(iterator)
|
||||
obitaxonomy.CLICSVTaxaWriter(iterator, true)
|
||||
|
||||
obiutils.WaitForLastPipe()
|
||||
|
@ -8,7 +8,7 @@ import (
|
||||
// corresponds to the last commit, and not the one when the file will be
|
||||
// commited
|
||||
|
||||
var _Commit = "7c4042d"
|
||||
var _Commit = "c50a0f4"
|
||||
var _Version = "Release 4.2.0"
|
||||
|
||||
// Version returns the version of the obitools package.
|
||||
|
@ -1,11 +1,10 @@
|
||||
package obifind
|
||||
package obitax
|
||||
|
||||
import (
|
||||
"slices"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiitercsv"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||
)
|
||||
|
||||
type __options__ struct {
|
||||
@ -192,7 +191,7 @@ func OptionsWithMetadata(values ...string) WithOption {
|
||||
return f
|
||||
}
|
||||
|
||||
func NewCSVTaxaIterator(iterator *obitax.ITaxon, options ...WithOption) *obiitercsv.ICSVRecord {
|
||||
func (iterator *ITaxon) CSVTaxaIterator(options ...WithOption) *obiitercsv.ICSVRecord {
|
||||
|
||||
opt := MakeOptions(options)
|
||||
metakeys := make([]string, 0)
|
@ -11,6 +11,8 @@ import (
|
||||
|
||||
func LoadCSVTaxonomy(path string, onlysn bool) (*Taxonomy, error) {
|
||||
|
||||
log.Infof("Loading taxonomy from csv file: %s", path)
|
||||
|
||||
file, err := obiutils.Ropen(path)
|
||||
|
||||
if err != nil {
|
||||
@ -47,7 +49,7 @@ func LoadCSVTaxonomy(path string, onlysn bool) (*Taxonomy, error) {
|
||||
parentColIndex = i
|
||||
case "scientific_name":
|
||||
scientific_nameColIndex = i
|
||||
case "rank":
|
||||
case "taxonomic_rank":
|
||||
rankColIndex = i
|
||||
}
|
||||
}
|
||||
@ -70,31 +72,45 @@ func LoadCSVTaxonomy(path string, onlysn bool) (*Taxonomy, error) {
|
||||
|
||||
name := obiutils.RemoveAllExt(path)
|
||||
short := obiutils.Basename(path)
|
||||
taxonomy := NewTaxonomy(name, short, obiutils.AsciiAlphaNumSet)
|
||||
|
||||
line, err := csvfile.Read()
|
||||
if err == nil {
|
||||
parts := strings.Split(line[taxidColIndex], " ")
|
||||
parts = strings.Split(parts[0], ":")
|
||||
if len(parts) > 1 {
|
||||
short = parts[0]
|
||||
}
|
||||
}
|
||||
|
||||
for err != nil {
|
||||
log.Infof("Taxonomy name: %s", name)
|
||||
log.Infof("Taxon code: %s", short)
|
||||
|
||||
taxonomy := NewTaxonomy(name, short, obiutils.AsciiAlphaNumSet)
|
||||
|
||||
root := true
|
||||
var taxon *Taxon
|
||||
|
||||
for err == nil {
|
||||
taxid := line[taxidColIndex]
|
||||
parent := line[parentColIndex]
|
||||
scientific_name := line[scientific_nameColIndex]
|
||||
rank := line[rankColIndex]
|
||||
|
||||
parts := strings.Split(rank, ":")
|
||||
|
||||
rank = parts[0]
|
||||
|
||||
root := len(parts) > 1 && parts[1] == "root"
|
||||
|
||||
taxon, err := taxonomy.AddTaxon(taxid, parent, rank, false, root)
|
||||
taxon.SetName(scientific_name, "scientific name")
|
||||
taxon, err = taxonomy.AddTaxon(taxid, parent, rank, root, false)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
log.Fatalf("cannot add taxon %s: %v", taxid, err)
|
||||
}
|
||||
|
||||
root = false
|
||||
|
||||
taxon.SetName(scientific_name, "scientific name")
|
||||
|
||||
line, err = csvfile.Read()
|
||||
}
|
||||
|
||||
log.Infof("%d Taxa loaded", taxonomy.Len())
|
||||
|
||||
if !taxonomy.HasRoot() {
|
||||
return nil, errors.New("taxonomy file does not contain root node")
|
||||
}
|
||||
|
@ -1,11 +1 @@
|
||||
package obitax
|
||||
|
||||
import (
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiitercsv"
|
||||
)
|
||||
|
||||
func WriteTaxonomyCSV(iterator ITaxon,
|
||||
terminalAction bool, filenames ...string) *obiitercsv.ICSVRecord {
|
||||
|
||||
return nil
|
||||
}
|
||||
|
@ -2,6 +2,7 @@ package obitax
|
||||
|
||||
import (
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// ITaxon represents an iterator for traversing Taxon instances.
|
||||
@ -189,12 +190,12 @@ func (taxon *Taxon) ISubTaxonomy() *ITaxon {
|
||||
go func() {
|
||||
for i := lpath - 1; i >= 0; i-- {
|
||||
taxon := path.Taxon(i)
|
||||
parents[taxon.Node] = true
|
||||
iter.Push(taxon)
|
||||
}
|
||||
|
||||
pushed := true
|
||||
|
||||
log.Warn(parents)
|
||||
for pushed {
|
||||
itaxo := taxo.Iterator()
|
||||
pushed = false
|
||||
@ -215,3 +216,13 @@ func (taxon *Taxon) ISubTaxonomy() *ITaxon {
|
||||
|
||||
return iter
|
||||
}
|
||||
|
||||
func (taxonomy *Taxonomy) ISubTaxonomy(taxid string) *ITaxon {
|
||||
taxon := taxonomy.Taxon(taxid)
|
||||
|
||||
if taxon == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return taxon.ISubTaxonomy()
|
||||
}
|
||||
|
@ -197,7 +197,7 @@ func (taxonomy *Taxonomy) Len() int {
|
||||
// - A pointer to the newly created Taxon instance.
|
||||
// - An error if the taxon cannot be added (e.g., it already exists and replace is false).
|
||||
func (taxonomy *Taxonomy) AddTaxon(taxid, parent string, rank string, isRoot bool, replace bool) (*Taxon, error) {
|
||||
taxonomy = taxonomy.OrDefault(false)
|
||||
taxonomy = taxonomy.OrDefault(true)
|
||||
|
||||
parentid, perr := taxonomy.Id(parent)
|
||||
if perr != nil {
|
||||
|
@ -1,4 +1,4 @@
|
||||
package obifind
|
||||
package obitaxonomy
|
||||
|
||||
import (
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
||||
@ -40,24 +40,34 @@ func CLIFilterRankRestriction(iterator *obitax.ITaxon) *obitax.ITaxon {
|
||||
return iterator
|
||||
}
|
||||
|
||||
func CLISubTaxonomyIterator() *obitax.ITaxon {
|
||||
|
||||
if CLIDumpSubtaxonomy() {
|
||||
return obitax.DefaultTaxonomy().ISubTaxonomy(CLISubTaxonomyNode())
|
||||
}
|
||||
|
||||
log.Fatalf("No sub-taxonomy specified use the --dump option")
|
||||
return nil
|
||||
}
|
||||
|
||||
func CLICSVTaxaIterator(iterator *obitax.ITaxon) *obiitercsv.ICSVRecord {
|
||||
if iterator == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
options := make([]WithOption, 0)
|
||||
options := make([]obitax.WithOption, 0)
|
||||
|
||||
options = append(options,
|
||||
OptionsWithPattern(CLIWithQuery()),
|
||||
OptionsWithParent(CLIWithParent()),
|
||||
OptionsWithRank(CLIWithRank()),
|
||||
OptionsWithScientificName(CLIWithScientificName()),
|
||||
OptionsWithPath(CLIWithPath()),
|
||||
OptionsRawTaxid(CLIRawTaxid()),
|
||||
OptionsSource(obidefault.SelectedTaxonomy()),
|
||||
obitax.OptionsWithPattern(CLIWithQuery()),
|
||||
obitax.OptionsWithParent(CLIWithParent()),
|
||||
obitax.OptionsWithRank(CLIWithRank()),
|
||||
obitax.OptionsWithScientificName(CLIWithScientificName()),
|
||||
obitax.OptionsWithPath(CLIWithPath()),
|
||||
obitax.OptionsRawTaxid(CLIRawTaxid()),
|
||||
obitax.OptionsSource(obidefault.SelectedTaxonomy()),
|
||||
)
|
||||
|
||||
return NewCSVTaxaIterator(iterator, options...)
|
||||
return iterator.CSVTaxaIterator(options...)
|
||||
}
|
||||
|
||||
func CLICSVTaxaWriter(iterator *obitax.ITaxon, terminalAction bool) *obiitercsv.ICSVRecord {
|
@ -1,4 +1,4 @@
|
||||
package obifind
|
||||
package obitaxonomy
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
@ -16,11 +16,12 @@ var __with_path__ = false
|
||||
var __with_query__ = false
|
||||
var __without_rank__ = false
|
||||
var __without_parent__ = false
|
||||
var __with_scientific_name__ = false
|
||||
var __without_scientific_name__ = false
|
||||
var __raw_taxid__ = false
|
||||
var __taxid_path__ = "NA"
|
||||
var __taxid_sons__ = "NA"
|
||||
var __restrict_rank__ = ""
|
||||
var __to_dump__ = ""
|
||||
|
||||
func FilterTaxonomyOptionSet(options *getoptions.GetOpt) {
|
||||
options.BoolVar(&__rank_list__, "rank-list", false,
|
||||
@ -40,27 +41,35 @@ func OptionSet(options *getoptions.GetOpt) {
|
||||
options.Description("Match taxon names using a fixed pattern, not a regular expression"))
|
||||
options.StringVar(&__taxid_path__, "parents", "NA",
|
||||
options.Alias("p"),
|
||||
options.ArgName("TAXID"),
|
||||
options.Description("Displays every parental tree's information for the provided taxid."))
|
||||
options.StringVar(&__restrict_rank__, "rank", "",
|
||||
options.ArgName("RANK"),
|
||||
options.Description("Restrict to the given taxonomic rank."))
|
||||
options.BoolVar(&__without_parent__, "without-parent", __without_parent__,
|
||||
options.Description("Adds a column containing the parent's taxonid for each displayed taxon."))
|
||||
options.Description("Supress the column containing the parent's taxonid from the output."))
|
||||
options.StringVar(&__taxid_sons__, "sons", "NA",
|
||||
options.Alias("s"),
|
||||
options.ArgName("TAXID"),
|
||||
options.Description("Displays every sons' tree's information for the provided taxid."))
|
||||
options.BoolVar(&__with_path__, "with-path", false,
|
||||
options.Description("Adds a column containing the full path for each displayed taxon."))
|
||||
options.BoolVar(&__without_rank__, "without-rank", __without_rank__,
|
||||
options.Alias("R"),
|
||||
options.Description("Adds a column containing the taxonomic rank for each displayed taxon."))
|
||||
options.Description("Supress the column containing the taxonomic rank from the output."))
|
||||
options.BoolVar(&__with_query__, "with-query", false,
|
||||
options.Alias("P"),
|
||||
options.Description("Adds a column containing query used to filter taxon name for each displayed taxon."))
|
||||
options.BoolVar(&__with_scientific_name__, "with-scientific-name", false,
|
||||
options.BoolVar(&__without_scientific_name__, "without-scientific-name", __without_scientific_name__,
|
||||
options.Alias("S"),
|
||||
options.Description("Adds a column containing the scientific name for each displayed taxon."))
|
||||
options.Description("Supress the column containing the scientific name from the output."))
|
||||
options.BoolVar(&__raw_taxid__, "raw-taxid", false,
|
||||
options.Description("Displays the raw taxid for each displayed taxon."))
|
||||
options.StringVar(&__to_dump__, "dump", __to_dump__,
|
||||
options.Alias("D"),
|
||||
options.ArgName("TAXID"),
|
||||
options.Description("Dump a sub-taxonomy corresponding to the precised clade"),
|
||||
)
|
||||
}
|
||||
|
||||
func CLITaxonomicalRestrictions() (*obitax.TaxonSet, error) {
|
||||
@ -109,7 +118,7 @@ func CLIWithRank() bool {
|
||||
}
|
||||
|
||||
func CLIWithScientificName() bool {
|
||||
return __with_scientific_name__
|
||||
return !__without_scientific_name__
|
||||
}
|
||||
|
||||
func CLIRawTaxid() bool {
|
||||
@ -127,3 +136,11 @@ func CLIFixedPattern() bool {
|
||||
func CLIWithQuery() bool {
|
||||
return __with_query__
|
||||
}
|
||||
|
||||
func CLIDumpSubtaxonomy() bool {
|
||||
return __to_dump__ != ""
|
||||
}
|
||||
|
||||
func CLISubTaxonomyNode() string {
|
||||
return __to_dump__
|
||||
}
|
Reference in New Issue
Block a user