From d46f6b06c51512c139ea4fef3355f37941b0b247 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Wed, 7 Jun 2023 17:50:10 +0200 Subject: [PATCH] several small changes Former-commit-id: c1cdb95885e44fd6ee7d1c963860d7ab41230c96 --- pkg/obioptions/options.go | 5 ++++- pkg/obiseq/attributes.go | 4 ++++ pkg/obiseq/language.go | 4 ++++ pkg/obitools/obiannotate/obiannotate.go | 6 ++++++ pkg/obitools/obiannotate/options.go | 14 ++++++++++++++ pkg/obitools/obifind/options.go | 7 ------- 6 files changed, 32 insertions(+), 8 deletions(-) diff --git a/pkg/obioptions/options.go b/pkg/obioptions/options.go index 6e810a7..f368bb2 100644 --- a/pkg/obioptions/options.go +++ b/pkg/obioptions/options.go @@ -39,6 +39,10 @@ func GenerateOptionParser(optionset ...func(*getoptions.GetOpt)) ArgumentParser options.GetEnv("OBIMAXCPU"), options.Description("Number of parallele threads computing the result")) + options.IntVar(&_BatchSize, "batch-size", _BatchSize, + options.GetEnv("OBIBATCHSIZE"), + options.Description("Number of sequence per batch for paralelle processing")) + for _, o := range optionset { o(options) } @@ -100,7 +104,6 @@ func CLIReadParallelWorkers() int { return int(float64(_MaxAllowedCPU) * float64(_ReadWorkerPerCore)) } - // CLIParallelWorkers returns the number of parallel workers requested by // the command line option --workers|-w. func CLIMaxCPU() int { diff --git a/pkg/obiseq/attributes.go b/pkg/obiseq/attributes.go index f34287f..c9be3d6 100644 --- a/pkg/obiseq/attributes.go +++ b/pkg/obiseq/attributes.go @@ -198,3 +198,7 @@ func (s *BioSequence) OBITagRefIndex() map[int]string { return val } + +func (s *BioSequence) SetOBITagRefIndex(idx map[int]string) { + s.SetAttribute("obitag_ref_index", idx) +} \ No newline at end of file diff --git a/pkg/obiseq/language.go b/pkg/obiseq/language.go index b0abaf6..80f47eb 100644 --- a/pkg/obiseq/language.go +++ b/pkg/obiseq/language.go @@ -155,6 +155,10 @@ var OBILang = gval.NewLanguage( text := fmt.Sprintf(args[0].(string), args[1:]...) return text, nil }), + gval.Function("gsub", func(args ...interface{}) (interface{}, error) { + text := strings.ReplaceAll(args[0].(string), args[1].(string), args[2].(string)) + return text, nil + }), gval.Function("subspc", func(args ...interface{}) (interface{}, error) { text := strings.ReplaceAll(args[0].(string), " ", "_") return text, nil diff --git a/pkg/obitools/obiannotate/obiannotate.go b/pkg/obitools/obiannotate/obiannotate.go index caff95e..7f19984 100644 --- a/pkg/obitools/obiannotate/obiannotate.go +++ b/pkg/obitools/obiannotate/obiannotate.go @@ -81,6 +81,7 @@ func EvalAttributeWorker(expression map[string]string) obiseq.SeqWorker { return w } + func AddTaxonAtRankWorker(taxonomy *obitax.Taxonomy, ranks ...string) obiseq.SeqWorker { f := func(s *obiseq.BioSequence) *obiseq.BioSequence { for _, r := range ranks { @@ -111,6 +112,11 @@ func CLIAnnotationWorker() obiseq.SeqWorker { annotator = annotator.ChainWorkers(w) } + if CLIHasSetId() { + w := obiseq.EditIdWorker(CLSetIdExpression()) + annotator = annotator.ChainWorkers(w) + } + if CLIHasAttibuteToDelete() { w := DeleteAttributesWorker(CLIAttibuteToDelete()) annotator = annotator.ChainWorkers(w) diff --git a/pkg/obitools/obiannotate/options.go b/pkg/obitools/obiannotate/options.go index 3ba5fcf..8132be0 100644 --- a/pkg/obitools/obiannotate/options.go +++ b/pkg/obitools/obiannotate/options.go @@ -24,6 +24,7 @@ var _uniqueID = false var _ahoCorazick = "" var _lcaSlot = "" var _lcaError = 0.0 +var _setId = "" func SequenceAnnotationOptionSet(options *getoptions.GetOpt) { // options.BoolVar(&_addRank, "seq-rank", _addRank, @@ -47,6 +48,10 @@ func SequenceAnnotationOptionSet(options *getoptions.GetOpt) { "a new slot named is added with the taxid of the lowest common ancester corresponding "+ "to the current annotation.")) + options.StringVar(&_setId, "set-identifier", _setId, + options.ArgName("EXPRESSION"), + options.Description("An expression used to assigned the new id of the sequence")) + options.Float64Var(&_lcaError, "lca-error", _lcaError, options.ArgName("#.###"), options.Description("Error rate tolerated on the taxonomical discription during the lowest common "+ @@ -123,6 +128,15 @@ func OptionSet(options *getoptions.GetOpt) { // --uniq-id // Forces sequence record ids to be unique. +func CLIHasSetId() bool { + return _setId != "" +} + +func CLSetIdExpression() string { + return _setId +} + + func CLIHasAttributeToBeRenamed() bool { return len(_toBeRenamed) > 0 } diff --git a/pkg/obitools/obifind/options.go b/pkg/obitools/obifind/options.go index d250ac2..285745c 100644 --- a/pkg/obitools/obifind/options.go +++ b/pkg/obitools/obifind/options.go @@ -36,13 +36,6 @@ func LoadTaxonomyOptionSet(options *getoptions.GetOpt, required, alternatiive bo options.Alias("a"), options.Description("Enable the search on all alternative names and not only scientific names.")) } - options.BoolVar(&__rank_list__, "rank-list", false, - options.Alias("l"), - options.Description("List every taxonomic rank available in the taxonomy.")) - - options.IntSliceVar(&__taxonomical_restriction__, "restrict-to-taxon", 1, 1, - options.Alias("r"), - options.Description("Restrict output to some subclades.")) } func FilterTaxonomyOptionSet(options *getoptions.GetOpt) {