mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-12-08 16:50:27 +00:00
Correct the number of workers
Former-commit-id: febbccfb853263e0761ecfccb0f09c8c1bf88475
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
package obiannotate
|
||||
|
||||
import (
|
||||
"log"
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obicorazick"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||
@@ -22,6 +22,15 @@ func DeleteAttributesWorker(toBeDeleted []string) obiseq.SeqWorker {
|
||||
return f
|
||||
}
|
||||
|
||||
// func MatchPatternWorker(pattern string, errormax int, allowsIndel bool) obiseq.SeqWorker {
|
||||
// pat, err := obiapat.MakeApatPattern(pattern, errormax, allowsIndel)
|
||||
// f := func(s *obiseq.BioSequence) *obiseq.BioSequence {
|
||||
// apats := obiapat.MakeApatSequence(s, false)
|
||||
// pat.BestMatch(apats, 0)
|
||||
// return s
|
||||
// }
|
||||
// }
|
||||
|
||||
func ToBeKeptAttributesWorker(toBeKept []string) obiseq.SeqWorker {
|
||||
|
||||
d := make(map[string]bool, len(_keepOnly))
|
||||
@@ -43,6 +52,58 @@ func ToBeKeptAttributesWorker(toBeKept []string) obiseq.SeqWorker {
|
||||
return f
|
||||
}
|
||||
|
||||
func CutSequenceWorker(from, to int, breakOnError bool) obiseq.SeqWorker {
|
||||
|
||||
f := func(s *obiseq.BioSequence) *obiseq.BioSequence {
|
||||
var f, t int
|
||||
|
||||
switch {
|
||||
case from < 0:
|
||||
f = s.Len() + from + 1
|
||||
case from > 0:
|
||||
f = from
|
||||
}
|
||||
|
||||
switch {
|
||||
case to < 0:
|
||||
t = s.Len() + to + 1
|
||||
case to > 0:
|
||||
t = to
|
||||
}
|
||||
|
||||
if from < 0 {
|
||||
from = 0
|
||||
}
|
||||
|
||||
if to >= s.Len() {
|
||||
to = s.Len()
|
||||
}
|
||||
|
||||
rep, err := s.Subsequence(f, t, false)
|
||||
if err != nil {
|
||||
if breakOnError {
|
||||
log.Fatalf("Cannot cut sequence %s (%v)", s.Id(), err)
|
||||
} else {
|
||||
log.Warnf("Cannot cut sequence %s (%v), sequence discarded", s.Id(), err)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return rep
|
||||
}
|
||||
|
||||
if from == 0 && to == 0 {
|
||||
f = func(s *obiseq.BioSequence) *obiseq.BioSequence {
|
||||
return s
|
||||
}
|
||||
}
|
||||
|
||||
if from > 0 {
|
||||
from--
|
||||
}
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func ClearAllAttributesWorker() obiseq.SeqWorker {
|
||||
f := func(s *obiseq.BioSequence) *obiseq.BioSequence {
|
||||
annot := s.Annotations()
|
||||
@@ -81,7 +142,6 @@ func EvalAttributeWorker(expression map[string]string) obiseq.SeqWorker {
|
||||
return w
|
||||
}
|
||||
|
||||
|
||||
func AddTaxonAtRankWorker(taxonomy *obitax.Taxonomy, ranks ...string) obiseq.SeqWorker {
|
||||
f := func(s *obiseq.BioSequence) *obiseq.BioSequence {
|
||||
for _, r := range ranks {
|
||||
@@ -162,6 +222,13 @@ func CLIAnnotationWorker() obiseq.SeqWorker {
|
||||
annotator = annotator.ChainWorkers(w)
|
||||
}
|
||||
|
||||
if CLIHasCut() {
|
||||
from, to := CLICut()
|
||||
w := CutSequenceWorker(from, to, false)
|
||||
|
||||
annotator = annotator.ChainWorkers(w)
|
||||
}
|
||||
|
||||
return annotator
|
||||
}
|
||||
|
||||
@@ -170,7 +237,7 @@ func CLIAnnotationPipeline() obiiter.Pipeable {
|
||||
predicate := obigrep.CLISequenceSelectionPredicate()
|
||||
worker := CLIAnnotationWorker()
|
||||
|
||||
annotator := obiseq.SeqToSliceConditionalWorker(worker, predicate, true)
|
||||
annotator := obiseq.SeqToSliceConditionalWorker(worker, predicate, true, false)
|
||||
f := obiiter.SliceWorkerPipe(annotator, obioptions.CLIParallelWorkers())
|
||||
|
||||
return f
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
package obiannotate
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obigrep"
|
||||
"github.com/DavidGamba/go-getoptions"
|
||||
@@ -22,9 +23,11 @@ var _clearAll = false
|
||||
var _setSeqLength = false
|
||||
var _uniqueID = false
|
||||
var _ahoCorazick = ""
|
||||
var _pattern = ""
|
||||
var _lcaSlot = ""
|
||||
var _lcaError = 0.0
|
||||
var _setId = ""
|
||||
var _cut = ""
|
||||
|
||||
func SequenceAnnotationOptionSet(options *getoptions.GetOpt) {
|
||||
// options.BoolVar(&_addRank, "seq-rank", _addRank,
|
||||
@@ -42,6 +45,13 @@ func SequenceAnnotationOptionSet(options *getoptions.GetOpt) {
|
||||
options.StringVar(&_ahoCorazick, "aho-corasick", _ahoCorazick,
|
||||
options.Description("Adds an aho-corasick attribut with the count of matches of the provided patterns."))
|
||||
|
||||
options.StringVar(&_pattern, "pattern", _pattern,
|
||||
options.Description("Adds a pattern attribut containing the pattern, a pattern_match slot "+
|
||||
"indicating the matched sequence, "+
|
||||
"and a pattern_error slot indicating the number difference between the pattern and the match "+
|
||||
"to the sequence.",
|
||||
))
|
||||
|
||||
options.StringVar(&_lcaSlot, "add-lca-in", _lcaSlot,
|
||||
options.ArgName("SLOT_NAME"),
|
||||
options.Description("From the taxonomic annotation of the sequence (taxid slot or merged_taxid slot), "+
|
||||
@@ -59,6 +69,10 @@ func SequenceAnnotationOptionSet(options *getoptions.GetOpt) {
|
||||
"estimated LCA."),
|
||||
)
|
||||
|
||||
options.StringVar(&_cut, "cut", _cut,
|
||||
options.ArgName("###:###"),
|
||||
options.Description("A pattern decribing how to cut the sequence"))
|
||||
|
||||
// options.BoolVar(&_uniqueID, "uniq-id", _uniqueID,
|
||||
// options.Description("Forces sequence record ids to be unique."),
|
||||
// )
|
||||
@@ -133,10 +147,9 @@ func CLIHasSetId() bool {
|
||||
}
|
||||
|
||||
func CLSetIdExpression() string {
|
||||
return _setId
|
||||
return _setId
|
||||
}
|
||||
|
||||
|
||||
func CLIHasAttributeToBeRenamed() bool {
|
||||
return len(_toBeRenamed) > 0
|
||||
}
|
||||
@@ -191,7 +204,7 @@ func CLIHasAhoCorasick() bool {
|
||||
}
|
||||
|
||||
func CLIAhoCorazick() []string {
|
||||
content, err := ioutil.ReadFile(_ahoCorazick)
|
||||
content, err := os.ReadFile(_ahoCorazick)
|
||||
if err != nil {
|
||||
log.Fatalln("Cannot open file ", _ahoCorazick)
|
||||
}
|
||||
@@ -221,3 +234,33 @@ func CLIHasAddLCA() bool {
|
||||
func CLILCAThreshold() float64 {
|
||||
return 1 - _lcaError
|
||||
}
|
||||
|
||||
func CLICut() (int, int) {
|
||||
if _cut == "" {
|
||||
return 0, 0
|
||||
}
|
||||
values := strings.Split(_cut, ":")
|
||||
|
||||
if len(values) != 2 {
|
||||
log.Fatalf("Invalid cut value %s. value should be of the form start:end", _cut)
|
||||
}
|
||||
|
||||
start, err := strconv.Atoi(values[0])
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Invalid cut value %s. value %s should be an integer", _cut, values[0])
|
||||
}
|
||||
end, err := strconv.Atoi(values[1])
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Invalid cut value %s. value %s should be an integer", _cut, values[1])
|
||||
}
|
||||
|
||||
return start, end
|
||||
}
|
||||
|
||||
func CLIHasCut() bool {
|
||||
f, t := CLICut()
|
||||
|
||||
return f != 0 && t != 0
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user