From 85349668d04d6cbc1bac930c22e5d2086c13dc87 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Thu, 16 Feb 2023 13:32:27 +0100 Subject: [PATCH] Add some options to obiannotate --- pkg/obieval/language.go | 68 ++++++++++++++++++------- pkg/obiiter/speed.go | 10 ++++ pkg/obiseq/eval.go | 62 ++++++++++++++++++++++ pkg/obiseq/predicate.go | 1 + pkg/obitools/obiannotate/obiannotate.go | 23 ++++++++- pkg/obitools/obiannotate/options.go | 18 +++++++ 6 files changed, 162 insertions(+), 20 deletions(-) create mode 100644 pkg/obiseq/eval.go diff --git a/pkg/obieval/language.go b/pkg/obieval/language.go index d7aea9a..5aa78c2 100644 --- a/pkg/obieval/language.go +++ b/pkg/obieval/language.go @@ -1,13 +1,16 @@ package obieval import ( + "fmt" + "log" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils" "github.com/PaesslerAG/gval" ) func maxIntVector(values []int) float64 { m := values[0] - for _,v := range values { + for _, v := range values { if v > m { m = v } @@ -16,17 +19,17 @@ func maxIntVector(values []int) float64 { return float64(m) } -func maxIntMap(values map[string]int) float64 { +func maxIntMap(values map[string]int) float64 { var m int first := true - for _,v := range values { + for _, v := range values { if first { first = false m = v } else { if v > m { m = v - } + } } } @@ -35,7 +38,7 @@ func maxIntMap(values map[string]int) float64 { func minIntVector(values []int) float64 { m := values[0] - for _,v := range values { + for _, v := range values { if v < m { m = v } @@ -44,27 +47,26 @@ func minIntVector(values []int) float64 { return float64(m) } -func minIntMap(values map[string]int) float64 { +func minIntMap(values map[string]int) float64 { var m int first := true - for _,v := range values { + for _, v := range values { if first { first = false m = v } else { if v < m { m = v - } + } } } return float64(m) } - func maxFloatVector(values []float64) float64 { m := values[0] - for _,v := range values { + for _, v := range values { if v > m { m = v } @@ -73,17 +75,17 @@ func maxFloatVector(values []float64) float64 { return m } -func maxFloatMap(values map[string]float64) float64 { +func maxFloatMap(values map[string]float64) float64 { var m float64 first := true - for _,v := range values { + for _, v := range values { if first { first = false m = v } else { if v > m { m = v - } + } } } @@ -92,7 +94,7 @@ func maxFloatMap(values map[string]float64) float64 { func minFloatVector(values []float64) float64 { m := values[0] - for _,v := range values { + for _, v := range values { if v < m { m = v } @@ -101,17 +103,17 @@ func minFloatVector(values []float64) float64 { return m } -func minFloatMap(values map[string]float64) float64 { +func minFloatMap(values map[string]float64) float64 { var m float64 first := true - for _,v := range values { + for _, v := range values { if first { first = false m = v } else { if v < m { m = v - } + } } } @@ -119,12 +121,12 @@ func minFloatMap(values map[string]float64) float64 { } // func maxNumeric(args ...interface{}) (interface{}, error) { -// var m float64 +// var m float64 // first := true // for _, v := range args { // switch { -// case +// case // } // } @@ -139,6 +141,34 @@ var OBILang = gval.NewLanguage( gval.Function("ismap", func(args ...interface{}) (interface{}, error) { ismap := goutils.IsAMap(args[0]) return ismap, nil + }), + gval.Function("printf", func(args ...interface{}) (interface{}, error) { + text := fmt.Sprintf(args[0].(string), args[1:]...) + return text, nil + }), + gval.Function("int", func(args ...interface{}) (interface{}, error) { + val, err := goutils.InterfaceToInt(args[0]) + + if err != nil { + log.Fatalf("%v cannot be converted to an integer value", args[0]) + } + return val, nil + }), + gval.Function("numeric", func(args ...interface{}) (interface{}, error) { + val, err := goutils.InterfaceToFloat64(args[0]) + + if err != nil { + log.Fatalf("%v cannot be converted to a numeric value", args[0]) + } + return val, nil + }), + gval.Function("bool", func(args ...interface{}) (interface{}, error) { + val, err := goutils.InterfaceToBool(args[0]) + + if err != nil { + log.Fatalf("%v cannot be converted to a boolan value", args[0]) + } + return val, nil })) func Expression(expression string) (gval.Evaluable, error) { diff --git a/pkg/obiiter/speed.go b/pkg/obiiter/speed.go index cb225c1..24014df 100644 --- a/pkg/obiiter/speed.go +++ b/pkg/obiiter/speed.go @@ -1,12 +1,21 @@ package obiiter import ( + "fmt" "os" "github.com/schollz/progressbar/v3" ) func (iterator IBioSequence) Speed(message ...string) IBioSequence { + + // If the STDERR is redicted and doesn't end up to a terminal + // No progress bar is printed. + o, _ := os.Stderr.Stat() + if (o.Mode() & os.ModeCharDevice) != os.ModeCharDevice { + return iterator + } + newIter := MakeIBioSequence() newIter.Add(1) @@ -44,6 +53,7 @@ func (iterator IBioSequence) Speed(message ...string) IBioSequence { bar.Add(l) } + fmt.Fprintln(os.Stderr) newIter.Done() }() diff --git a/pkg/obiseq/eval.go b/pkg/obiseq/eval.go new file mode 100644 index 0000000..8e007f6 --- /dev/null +++ b/pkg/obiseq/eval.go @@ -0,0 +1,62 @@ +package obiseq + +import ( + "context" + "fmt" + + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obieval" + log "github.com/sirupsen/logrus" +) + +func Expression(expression string) func(*BioSequence) (interface{},error) { + + exp, err := obieval.OBILang.NewEvaluable(expression) + if err != nil { + log.Fatalf("Error in the expression : %s", expression) + } + + f := func(sequence *BioSequence) (interface{},error) { + return exp(context.Background(), + map[string]interface{}{ + "annotations": sequence.Annotations(), + "sequence": sequence, + }, + ) + } + + return f +} + +func EditIdWorker(expression string) SeqWorker { + e := Expression(expression) + f := func(sequence *BioSequence) *BioSequence { + v,err := e(sequence) + + if err != nil { + log.Fatalf("Expression '%s' cannot be evaluated on sequence %s", + expression, + sequence.Id()) + } + sequence.SetId(fmt.Sprintf("%v",v)) + return sequence + } + + return f +} + +func EditAttributeWorker(key string, expression string) SeqWorker { + e := Expression(expression) + f := func(sequence *BioSequence) *BioSequence { + v,err := e(sequence) + + if err != nil { + log.Fatalf("Expression '%s' cannot be evaluated on sequence %s", + expression, + sequence.Id()) + } + sequence.SetAttribute(key,v) + return sequence + } + + return f +} \ No newline at end of file diff --git a/pkg/obiseq/predicate.go b/pkg/obiseq/predicate.go index fa3b24b..ade81cc 100644 --- a/pkg/obiseq/predicate.go +++ b/pkg/obiseq/predicate.go @@ -225,3 +225,4 @@ func ExpressionPredicat(expression string) SequencePredicate { return f } + diff --git a/pkg/obitools/obiannotate/obiannotate.go b/pkg/obitools/obiannotate/obiannotate.go index de6b2d5..356cd34 100644 --- a/pkg/obitools/obiannotate/obiannotate.go +++ b/pkg/obitools/obiannotate/obiannotate.go @@ -5,6 +5,7 @@ import ( "git.metabarcoding.org/lecasofts/go/obitools/pkg/obicorazick" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitax" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obigrep" @@ -65,6 +66,21 @@ func RenameAttributeWorker(toBeRenamed map[string]string) obiseq.SeqWorker { return f } +func EvalAttributeWorker(expression map[string]string) obiseq.SeqWorker { + var w obiseq.SeqWorker + w = nil + + for a,e := range expression { + if w == nil { + w = obiseq.EditAttributeWorker(a,e) + } else { + w.ChainWorkers(obiseq.EditAttributeWorker(a,e)) + } + } + + return w +} + func AddTaxonAtRankWorker(taxonomy *obitax.Taxonomy, ranks ...string) obiseq.SeqWorker { f := func(s *obiseq.BioSequence) *obiseq.BioSequence { for _, r := range ranks { @@ -121,6 +137,11 @@ func CLIAnnotationWorker() obiseq.SeqWorker { annotator = annotator.ChainWorkers(w) } + if CLIHasSetAttributeExpression() { + w := EvalAttributeWorker(CLISetAttributeExpression()) + annotator = annotator.ChainWorkers(w) + } + if CLIHasAhoCorasick() { patterns := CLIAhoCorazick() log.Println("Matching : ", len(patterns), " patterns on sequences") @@ -138,7 +159,7 @@ func CLIAnnotationPipeline() obiiter.Pipeable { worker := CLIAnnotationWorker() annotator := obiseq.SeqToSliceConditionalWorker(worker, predicate, true) - f := obiiter.SliceWorkerPipe(annotator) + f := obiiter.SliceWorkerPipe(annotator, obioptions.CLIParallelWorkers()) return f } diff --git a/pkg/obitools/obiannotate/options.go b/pkg/obitools/obiannotate/options.go index 63480c1..b3d61c0 100644 --- a/pkg/obitools/obiannotate/options.go +++ b/pkg/obitools/obiannotate/options.go @@ -16,6 +16,7 @@ var _toBeRenamed = make(map[string]string, 0) var _toBeDeleted = make([]string, 0) var _keepOnly = make([]string, 0) var _taxonAtRank = make([]string, 0) +var _evalAttribute = make(map[string]string, 0) var _tagList = "" var _clearAll = false var _setSeqLength = false @@ -41,6 +42,12 @@ func SequenceAnnotationOptionSet(options *getoptions.GetOpt) { // options.Description("Forces sequence record ids to be unique."), // ) + options.StringMapVar(&_evalAttribute, "set-tag", 1, 1, + options.Alias("S"), + options.ArgName("KEY=EXPRESSION"), + options.Description("Creates a new attribute named with a key "+ + "sets with a value computed from .")) + options.StringMapVar(&_toBeRenamed, "rename-tag", 1, 1, options.Alias("R"), options.ArgName("NEW_NAME=OLD_NAME"), @@ -140,6 +147,17 @@ func CLIHasClearAllFlag() bool { return _clearAll } +func CLIHasSetAttributeExpression() bool { + return len(_evalAttribute) > 0 +} + +func CLISetAttributeExpression() map[string]string { + return _evalAttribute +} + + + + func CLIHasAhoCorasick() bool { _, err := os.Stat(_ahoCorazick) return err == nil