Add some options to obiannotate

This commit is contained in:
2023-02-16 13:32:27 +01:00
parent 6e36b22040
commit 85349668d0
6 changed files with 162 additions and 20 deletions

View File

@@ -5,6 +5,7 @@ import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obicorazick"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitax"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obigrep"
@@ -65,6 +66,21 @@ func RenameAttributeWorker(toBeRenamed map[string]string) obiseq.SeqWorker {
return f
}
func EvalAttributeWorker(expression map[string]string) obiseq.SeqWorker {
var w obiseq.SeqWorker
w = nil
for a,e := range expression {
if w == nil {
w = obiseq.EditAttributeWorker(a,e)
} else {
w.ChainWorkers(obiseq.EditAttributeWorker(a,e))
}
}
return w
}
func AddTaxonAtRankWorker(taxonomy *obitax.Taxonomy, ranks ...string) obiseq.SeqWorker {
f := func(s *obiseq.BioSequence) *obiseq.BioSequence {
for _, r := range ranks {
@@ -121,6 +137,11 @@ func CLIAnnotationWorker() obiseq.SeqWorker {
annotator = annotator.ChainWorkers(w)
}
if CLIHasSetAttributeExpression() {
w := EvalAttributeWorker(CLISetAttributeExpression())
annotator = annotator.ChainWorkers(w)
}
if CLIHasAhoCorasick() {
patterns := CLIAhoCorazick()
log.Println("Matching : ", len(patterns), " patterns on sequences")
@@ -138,7 +159,7 @@ func CLIAnnotationPipeline() obiiter.Pipeable {
worker := CLIAnnotationWorker()
annotator := obiseq.SeqToSliceConditionalWorker(worker, predicate, true)
f := obiiter.SliceWorkerPipe(annotator)
f := obiiter.SliceWorkerPipe(annotator, obioptions.CLIParallelWorkers())
return f
}

View File

@@ -16,6 +16,7 @@ var _toBeRenamed = make(map[string]string, 0)
var _toBeDeleted = make([]string, 0)
var _keepOnly = make([]string, 0)
var _taxonAtRank = make([]string, 0)
var _evalAttribute = make(map[string]string, 0)
var _tagList = ""
var _clearAll = false
var _setSeqLength = false
@@ -41,6 +42,12 @@ func SequenceAnnotationOptionSet(options *getoptions.GetOpt) {
// options.Description("Forces sequence record ids to be unique."),
// )
options.StringMapVar(&_evalAttribute, "set-tag", 1, 1,
options.Alias("S"),
options.ArgName("KEY=EXPRESSION"),
options.Description("Creates a new attribute named with a key <KEY> "+
"sets with a value computed from <EXPRESSION>."))
options.StringMapVar(&_toBeRenamed, "rename-tag", 1, 1,
options.Alias("R"),
options.ArgName("NEW_NAME=OLD_NAME"),
@@ -140,6 +147,17 @@ func CLIHasClearAllFlag() bool {
return _clearAll
}
func CLIHasSetAttributeExpression() bool {
return len(_evalAttribute) > 0
}
func CLISetAttributeExpression() map[string]string {
return _evalAttribute
}
func CLIHasAhoCorasick() bool {
_, err := os.Stat(_ahoCorazick)
return err == nil