Correct the number of workers

Former-commit-id: febbccfb853263e0761ecfccb0f09c8c1bf88475
This commit is contained in:
2023-11-22 09:46:30 +01:00
parent 8905a16bc0
commit 2e0c1bd801
11 changed files with 206 additions and 39 deletions

View File

@@ -1,7 +1,7 @@
package obiannotate
import (
"log"
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obicorazick"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
@@ -22,6 +22,15 @@ func DeleteAttributesWorker(toBeDeleted []string) obiseq.SeqWorker {
return f
}
// func MatchPatternWorker(pattern string, errormax int, allowsIndel bool) obiseq.SeqWorker {
// pat, err := obiapat.MakeApatPattern(pattern, errormax, allowsIndel)
// f := func(s *obiseq.BioSequence) *obiseq.BioSequence {
// apats := obiapat.MakeApatSequence(s, false)
// pat.BestMatch(apats, 0)
// return s
// }
// }
func ToBeKeptAttributesWorker(toBeKept []string) obiseq.SeqWorker {
d := make(map[string]bool, len(_keepOnly))
@@ -43,6 +52,58 @@ func ToBeKeptAttributesWorker(toBeKept []string) obiseq.SeqWorker {
return f
}
func CutSequenceWorker(from, to int, breakOnError bool) obiseq.SeqWorker {
f := func(s *obiseq.BioSequence) *obiseq.BioSequence {
var f, t int
switch {
case from < 0:
f = s.Len() + from + 1
case from > 0:
f = from
}
switch {
case to < 0:
t = s.Len() + to + 1
case to > 0:
t = to
}
if from < 0 {
from = 0
}
if to >= s.Len() {
to = s.Len()
}
rep, err := s.Subsequence(f, t, false)
if err != nil {
if breakOnError {
log.Fatalf("Cannot cut sequence %s (%v)", s.Id(), err)
} else {
log.Warnf("Cannot cut sequence %s (%v), sequence discarded", s.Id(), err)
return nil
}
}
return rep
}
if from == 0 && to == 0 {
f = func(s *obiseq.BioSequence) *obiseq.BioSequence {
return s
}
}
if from > 0 {
from--
}
return f
}
func ClearAllAttributesWorker() obiseq.SeqWorker {
f := func(s *obiseq.BioSequence) *obiseq.BioSequence {
annot := s.Annotations()
@@ -81,7 +142,6 @@ func EvalAttributeWorker(expression map[string]string) obiseq.SeqWorker {
return w
}
func AddTaxonAtRankWorker(taxonomy *obitax.Taxonomy, ranks ...string) obiseq.SeqWorker {
f := func(s *obiseq.BioSequence) *obiseq.BioSequence {
for _, r := range ranks {
@@ -162,6 +222,13 @@ func CLIAnnotationWorker() obiseq.SeqWorker {
annotator = annotator.ChainWorkers(w)
}
if CLIHasCut() {
from, to := CLICut()
w := CutSequenceWorker(from, to, false)
annotator = annotator.ChainWorkers(w)
}
return annotator
}
@@ -170,7 +237,7 @@ func CLIAnnotationPipeline() obiiter.Pipeable {
predicate := obigrep.CLISequenceSelectionPredicate()
worker := CLIAnnotationWorker()
annotator := obiseq.SeqToSliceConditionalWorker(worker, predicate, true)
annotator := obiseq.SeqToSliceConditionalWorker(worker, predicate, true, false)
f := obiiter.SliceWorkerPipe(annotator, obioptions.CLIParallelWorkers())
return f

View File

@@ -1,11 +1,12 @@
package obiannotate
import (
"io/ioutil"
"log"
"os"
"strconv"
"strings"
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obigrep"
"github.com/DavidGamba/go-getoptions"
@@ -22,9 +23,11 @@ var _clearAll = false
var _setSeqLength = false
var _uniqueID = false
var _ahoCorazick = ""
var _pattern = ""
var _lcaSlot = ""
var _lcaError = 0.0
var _setId = ""
var _cut = ""
func SequenceAnnotationOptionSet(options *getoptions.GetOpt) {
// options.BoolVar(&_addRank, "seq-rank", _addRank,
@@ -42,6 +45,13 @@ func SequenceAnnotationOptionSet(options *getoptions.GetOpt) {
options.StringVar(&_ahoCorazick, "aho-corasick", _ahoCorazick,
options.Description("Adds an aho-corasick attribut with the count of matches of the provided patterns."))
options.StringVar(&_pattern, "pattern", _pattern,
options.Description("Adds a pattern attribut containing the pattern, a pattern_match slot "+
"indicating the matched sequence, "+
"and a pattern_error slot indicating the number difference between the pattern and the match "+
"to the sequence.",
))
options.StringVar(&_lcaSlot, "add-lca-in", _lcaSlot,
options.ArgName("SLOT_NAME"),
options.Description("From the taxonomic annotation of the sequence (taxid slot or merged_taxid slot), "+
@@ -59,6 +69,10 @@ func SequenceAnnotationOptionSet(options *getoptions.GetOpt) {
"estimated LCA."),
)
options.StringVar(&_cut, "cut", _cut,
options.ArgName("###:###"),
options.Description("A pattern decribing how to cut the sequence"))
// options.BoolVar(&_uniqueID, "uniq-id", _uniqueID,
// options.Description("Forces sequence record ids to be unique."),
// )
@@ -133,10 +147,9 @@ func CLIHasSetId() bool {
}
func CLSetIdExpression() string {
return _setId
return _setId
}
func CLIHasAttributeToBeRenamed() bool {
return len(_toBeRenamed) > 0
}
@@ -191,7 +204,7 @@ func CLIHasAhoCorasick() bool {
}
func CLIAhoCorazick() []string {
content, err := ioutil.ReadFile(_ahoCorazick)
content, err := os.ReadFile(_ahoCorazick)
if err != nil {
log.Fatalln("Cannot open file ", _ahoCorazick)
}
@@ -221,3 +234,33 @@ func CLIHasAddLCA() bool {
func CLILCAThreshold() float64 {
return 1 - _lcaError
}
func CLICut() (int, int) {
if _cut == "" {
return 0, 0
}
values := strings.Split(_cut, ":")
if len(values) != 2 {
log.Fatalf("Invalid cut value %s. value should be of the form start:end", _cut)
}
start, err := strconv.Atoi(values[0])
if err != nil {
log.Fatalf("Invalid cut value %s. value %s should be an integer", _cut, values[0])
}
end, err := strconv.Atoi(values[1])
if err != nil {
log.Fatalf("Invalid cut value %s. value %s should be an integer", _cut, values[1])
}
return start, end
}
func CLIHasCut() bool {
f, t := CLICut()
return f != 0 && t != 0
}