mirror of
https://github.com/metabarcoding/obitools4.git
synced 2026-02-03 23:00:32 +00:00
Rename obifind obitaxonomy and introduce the new CSV format for taxonomy.
This commit is contained in:
@@ -1,300 +0,0 @@
|
||||
package obifind
|
||||
|
||||
import (
|
||||
"slices"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiitercsv"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||
)
|
||||
|
||||
type __options__ struct {
|
||||
batch_size int // Number of items to process in a batch
|
||||
with_pattern bool
|
||||
with_parent bool
|
||||
with_path bool
|
||||
with_rank bool
|
||||
with_scientific_name bool
|
||||
raw_taxid bool
|
||||
with_metadata []string
|
||||
source string // Source of the data
|
||||
}
|
||||
|
||||
// Options wraps the __options__ struct to provide a pointer to the options.
|
||||
type Options struct {
|
||||
pointer *__options__ // Pointer to the underlying options
|
||||
}
|
||||
|
||||
// WithOption is a function type that takes an Options parameter and modifies it.
|
||||
type WithOption func(Options)
|
||||
|
||||
// MakeOptions creates an Options instance with default settings and applies any provided setters.
|
||||
// It returns the configured Options.
|
||||
//
|
||||
// Parameters:
|
||||
// - setters: A slice of WithOption functions to customize the options.
|
||||
//
|
||||
// Returns:
|
||||
// - An Options instance with the specified settings.
|
||||
func MakeOptions(setters []WithOption) Options {
|
||||
o := __options__{
|
||||
batch_size: obidefault.BatchSize(), // Number of items to process in a batch
|
||||
with_pattern: true,
|
||||
with_parent: false,
|
||||
with_path: false,
|
||||
with_rank: true,
|
||||
with_scientific_name: false,
|
||||
raw_taxid: false,
|
||||
source: "unknown",
|
||||
}
|
||||
opt := Options{&o}
|
||||
|
||||
for _, set := range setters {
|
||||
set(opt)
|
||||
}
|
||||
|
||||
return opt
|
||||
}
|
||||
|
||||
// BatchSize returns the size of the batch to be processed.
|
||||
// It retrieves the batch size from the underlying options.
|
||||
func (o *Options) BatchSize() int {
|
||||
return o.pointer.batch_size
|
||||
}
|
||||
|
||||
// WithPattern returns whether the pattern option is enabled.
|
||||
// It retrieves the setting from the underlying options.
|
||||
func (o *Options) WithPattern() bool {
|
||||
return o.pointer.with_pattern
|
||||
}
|
||||
|
||||
// WithParent returns whether the parent option is enabled.
|
||||
// It retrieves the setting from the underlying options.
|
||||
func (o *Options) WithParent() bool {
|
||||
return o.pointer.with_parent
|
||||
}
|
||||
|
||||
// WithPath returns whether the path option is enabled.
|
||||
// It retrieves the setting from the underlying options.
|
||||
func (o *Options) WithPath() bool {
|
||||
return o.pointer.with_path
|
||||
}
|
||||
|
||||
// WithRank returns whether the rank option is enabled.
|
||||
// It retrieves the setting from the underlying options.
|
||||
func (o *Options) WithRank() bool {
|
||||
return o.pointer.with_rank
|
||||
}
|
||||
|
||||
// WithScientificName returns whether the scientific name option is enabled.
|
||||
// It retrieves the setting from the underlying options.
|
||||
func (o *Options) WithScientificName() bool {
|
||||
return o.pointer.with_scientific_name
|
||||
}
|
||||
|
||||
// RawTaxid returns whether the raw taxid option is enabled.
|
||||
// It retrieves the setting from the underlying options.
|
||||
func (o *Options) RawTaxid() bool {
|
||||
return o.pointer.raw_taxid
|
||||
}
|
||||
|
||||
// Source returns the source of the data.
|
||||
// It retrieves the source from the underlying options.
|
||||
func (o *Options) Source() string {
|
||||
return o.pointer.source
|
||||
}
|
||||
|
||||
// WithMetadata returns a slice of strings containing the metadata
|
||||
// associated with the Options instance. It retrieves the metadata
|
||||
// from the pointer's with_metadata field.
|
||||
func (o *Options) WithMetadata() []string {
|
||||
if o.WithPattern() {
|
||||
idx := slices.Index(o.pointer.with_metadata, "query")
|
||||
if idx >= 0 {
|
||||
o.pointer.with_metadata = slices.Delete(o.pointer.with_metadata, idx, idx+1)
|
||||
}
|
||||
}
|
||||
|
||||
return o.pointer.with_metadata
|
||||
}
|
||||
|
||||
// OptionsBatchSize returns a WithOption function that sets the batch_size option.
|
||||
// Parameters:
|
||||
// - size: An integer specifying the size of the batch to be processed.
|
||||
func OptionsBatchSize(size int) WithOption {
|
||||
f := WithOption(func(opt Options) {
|
||||
opt.pointer.batch_size = size
|
||||
})
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func OptionsWithPattern(value bool) WithOption {
|
||||
f := WithOption(func(opt Options) {
|
||||
opt.pointer.with_pattern = value
|
||||
})
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func OptionsWithParent(value bool) WithOption {
|
||||
f := WithOption(func(opt Options) {
|
||||
opt.pointer.with_parent = value
|
||||
})
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func OptionsWithPath(value bool) WithOption {
|
||||
f := WithOption(func(opt Options) {
|
||||
opt.pointer.with_path = value
|
||||
})
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func OptionsWithRank(value bool) WithOption {
|
||||
f := WithOption(func(opt Options) {
|
||||
opt.pointer.with_rank = value
|
||||
})
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func OptionsWithScientificName(value bool) WithOption {
|
||||
f := WithOption(func(opt Options) {
|
||||
opt.pointer.with_scientific_name = value
|
||||
})
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func OptionsRawTaxid(value bool) WithOption {
|
||||
f := WithOption(func(opt Options) {
|
||||
opt.pointer.raw_taxid = value
|
||||
})
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func OptionsSource(value string) WithOption {
|
||||
f := WithOption(func(opt Options) {
|
||||
opt.pointer.source = value
|
||||
})
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func OptionsWithMetadata(values ...string) WithOption {
|
||||
f := WithOption(func(opt Options) {
|
||||
opt.pointer.with_metadata = values
|
||||
})
|
||||
return f
|
||||
}
|
||||
|
||||
func NewCSVTaxaIterator(iterator *obitax.ITaxon, options ...WithOption) *obiitercsv.ICSVRecord {
|
||||
|
||||
opt := MakeOptions(options)
|
||||
metakeys := make([]string, 0)
|
||||
|
||||
newIter := obiitercsv.NewICSVRecord()
|
||||
|
||||
newIter.Add(1)
|
||||
|
||||
batch_size := opt.BatchSize()
|
||||
|
||||
if opt.WithPattern() {
|
||||
newIter.AppendField("query")
|
||||
opt.pointer.with_metadata = append(opt.pointer.with_metadata, "query")
|
||||
}
|
||||
|
||||
newIter.AppendField("taxid")
|
||||
rawtaxid := opt.RawTaxid()
|
||||
|
||||
if opt.WithParent() {
|
||||
newIter.AppendField("parent")
|
||||
}
|
||||
|
||||
if opt.WithRank() {
|
||||
newIter.AppendField("taxonomic_rank")
|
||||
}
|
||||
|
||||
if opt.WithScientificName() {
|
||||
newIter.AppendField("scientific_name")
|
||||
}
|
||||
|
||||
if opt.WithMetadata() != nil {
|
||||
metakeys = opt.WithMetadata()
|
||||
for _, metadata := range metakeys {
|
||||
newIter.AppendField(metadata)
|
||||
}
|
||||
}
|
||||
|
||||
if opt.WithPath() {
|
||||
newIter.AppendField("path")
|
||||
}
|
||||
|
||||
go func() {
|
||||
newIter.WaitAndClose()
|
||||
}()
|
||||
|
||||
go func() {
|
||||
o := 0
|
||||
data := make([]obiitercsv.CSVRecord, 0, batch_size)
|
||||
for iterator.Next() {
|
||||
|
||||
taxon := iterator.Get()
|
||||
record := make(obiitercsv.CSVRecord)
|
||||
|
||||
if opt.WithPattern() {
|
||||
record["query"] = taxon.MetadataAsString("query")
|
||||
}
|
||||
|
||||
if rawtaxid {
|
||||
record["taxid"] = *taxon.Node.Id()
|
||||
} else {
|
||||
record["taxid"] = taxon.String()
|
||||
}
|
||||
|
||||
if opt.WithParent() {
|
||||
if rawtaxid {
|
||||
record["parent"] = *taxon.Node.ParentId()
|
||||
} else {
|
||||
record["parent"] = taxon.Parent().String()
|
||||
}
|
||||
}
|
||||
|
||||
if opt.WithRank() {
|
||||
record["taxonomic_rank"] = taxon.Rank()
|
||||
}
|
||||
|
||||
if opt.WithScientificName() {
|
||||
record["scientific_name"] = taxon.ScientificName()
|
||||
}
|
||||
|
||||
if opt.WithPath() {
|
||||
record["path"] = taxon.Path().String()
|
||||
}
|
||||
|
||||
for _, key := range metakeys {
|
||||
record[key] = taxon.MetadataAsString(key)
|
||||
}
|
||||
|
||||
data = append(data, record)
|
||||
if len(data) >= batch_size {
|
||||
newIter.Push(obiitercsv.MakeCSVRecordBatch(opt.Source(), o, data))
|
||||
data = make([]obiitercsv.CSVRecord, 0, batch_size)
|
||||
o++
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if len(data) > 0 {
|
||||
newIter.Push(obiitercsv.MakeCSVRecordBatch(opt.Source(), o, data))
|
||||
}
|
||||
|
||||
newIter.Done()
|
||||
}()
|
||||
|
||||
return newIter
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
package obifind
|
||||
package obitaxonomy
|
||||
|
||||
import (
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
||||
@@ -40,24 +40,34 @@ func CLIFilterRankRestriction(iterator *obitax.ITaxon) *obitax.ITaxon {
|
||||
return iterator
|
||||
}
|
||||
|
||||
func CLISubTaxonomyIterator() *obitax.ITaxon {
|
||||
|
||||
if CLIDumpSubtaxonomy() {
|
||||
return obitax.DefaultTaxonomy().ISubTaxonomy(CLISubTaxonomyNode())
|
||||
}
|
||||
|
||||
log.Fatalf("No sub-taxonomy specified use the --dump option")
|
||||
return nil
|
||||
}
|
||||
|
||||
func CLICSVTaxaIterator(iterator *obitax.ITaxon) *obiitercsv.ICSVRecord {
|
||||
if iterator == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
options := make([]WithOption, 0)
|
||||
options := make([]obitax.WithOption, 0)
|
||||
|
||||
options = append(options,
|
||||
OptionsWithPattern(CLIWithQuery()),
|
||||
OptionsWithParent(CLIWithParent()),
|
||||
OptionsWithRank(CLIWithRank()),
|
||||
OptionsWithScientificName(CLIWithScientificName()),
|
||||
OptionsWithPath(CLIWithPath()),
|
||||
OptionsRawTaxid(CLIRawTaxid()),
|
||||
OptionsSource(obidefault.SelectedTaxonomy()),
|
||||
obitax.OptionsWithPattern(CLIWithQuery()),
|
||||
obitax.OptionsWithParent(CLIWithParent()),
|
||||
obitax.OptionsWithRank(CLIWithRank()),
|
||||
obitax.OptionsWithScientificName(CLIWithScientificName()),
|
||||
obitax.OptionsWithPath(CLIWithPath()),
|
||||
obitax.OptionsRawTaxid(CLIRawTaxid()),
|
||||
obitax.OptionsSource(obidefault.SelectedTaxonomy()),
|
||||
)
|
||||
|
||||
return NewCSVTaxaIterator(iterator, options...)
|
||||
return iterator.CSVTaxaIterator(options...)
|
||||
}
|
||||
|
||||
func CLICSVTaxaWriter(iterator *obitax.ITaxon, terminalAction bool) *obiitercsv.ICSVRecord {
|
||||
@@ -1,4 +1,4 @@
|
||||
package obifind
|
||||
package obitaxonomy
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
@@ -16,11 +16,12 @@ var __with_path__ = false
|
||||
var __with_query__ = false
|
||||
var __without_rank__ = false
|
||||
var __without_parent__ = false
|
||||
var __with_scientific_name__ = false
|
||||
var __without_scientific_name__ = false
|
||||
var __raw_taxid__ = false
|
||||
var __taxid_path__ = "NA"
|
||||
var __taxid_sons__ = "NA"
|
||||
var __restrict_rank__ = ""
|
||||
var __to_dump__ = ""
|
||||
|
||||
func FilterTaxonomyOptionSet(options *getoptions.GetOpt) {
|
||||
options.BoolVar(&__rank_list__, "rank-list", false,
|
||||
@@ -40,27 +41,35 @@ func OptionSet(options *getoptions.GetOpt) {
|
||||
options.Description("Match taxon names using a fixed pattern, not a regular expression"))
|
||||
options.StringVar(&__taxid_path__, "parents", "NA",
|
||||
options.Alias("p"),
|
||||
options.ArgName("TAXID"),
|
||||
options.Description("Displays every parental tree's information for the provided taxid."))
|
||||
options.StringVar(&__restrict_rank__, "rank", "",
|
||||
options.ArgName("RANK"),
|
||||
options.Description("Restrict to the given taxonomic rank."))
|
||||
options.BoolVar(&__without_parent__, "without-parent", __without_parent__,
|
||||
options.Description("Adds a column containing the parent's taxonid for each displayed taxon."))
|
||||
options.Description("Supress the column containing the parent's taxonid from the output."))
|
||||
options.StringVar(&__taxid_sons__, "sons", "NA",
|
||||
options.Alias("s"),
|
||||
options.ArgName("TAXID"),
|
||||
options.Description("Displays every sons' tree's information for the provided taxid."))
|
||||
options.BoolVar(&__with_path__, "with-path", false,
|
||||
options.Description("Adds a column containing the full path for each displayed taxon."))
|
||||
options.BoolVar(&__without_rank__, "without-rank", __without_rank__,
|
||||
options.Alias("R"),
|
||||
options.Description("Adds a column containing the taxonomic rank for each displayed taxon."))
|
||||
options.Description("Supress the column containing the taxonomic rank from the output."))
|
||||
options.BoolVar(&__with_query__, "with-query", false,
|
||||
options.Alias("P"),
|
||||
options.Description("Adds a column containing query used to filter taxon name for each displayed taxon."))
|
||||
options.BoolVar(&__with_scientific_name__, "with-scientific-name", false,
|
||||
options.BoolVar(&__without_scientific_name__, "without-scientific-name", __without_scientific_name__,
|
||||
options.Alias("S"),
|
||||
options.Description("Adds a column containing the scientific name for each displayed taxon."))
|
||||
options.Description("Supress the column containing the scientific name from the output."))
|
||||
options.BoolVar(&__raw_taxid__, "raw-taxid", false,
|
||||
options.Description("Displays the raw taxid for each displayed taxon."))
|
||||
options.StringVar(&__to_dump__, "dump", __to_dump__,
|
||||
options.Alias("D"),
|
||||
options.ArgName("TAXID"),
|
||||
options.Description("Dump a sub-taxonomy corresponding to the precised clade"),
|
||||
)
|
||||
}
|
||||
|
||||
func CLITaxonomicalRestrictions() (*obitax.TaxonSet, error) {
|
||||
@@ -109,7 +118,7 @@ func CLIWithRank() bool {
|
||||
}
|
||||
|
||||
func CLIWithScientificName() bool {
|
||||
return __with_scientific_name__
|
||||
return !__without_scientific_name__
|
||||
}
|
||||
|
||||
func CLIRawTaxid() bool {
|
||||
@@ -127,3 +136,11 @@ func CLIFixedPattern() bool {
|
||||
func CLIWithQuery() bool {
|
||||
return __with_query__
|
||||
}
|
||||
|
||||
func CLIDumpSubtaxonomy() bool {
|
||||
return __to_dump__ != ""
|
||||
}
|
||||
|
||||
func CLISubTaxonomyNode() string {
|
||||
return __to_dump__
|
||||
}
|
||||
Reference in New Issue
Block a user