Change the API of workers

Former-commit-id: 9b07306edd8cf28266f86f95823948fa99d39ea9
This commit is contained in:
2024-03-02 16:03:46 -04:00
parent 4a0b20484f
commit 0f3871d203
19 changed files with 194 additions and 120 deletions

View File

@@ -15,11 +15,11 @@ import (
)
func DeleteAttributesWorker(toBeDeleted []string) obiseq.SeqWorker {
f := func(s *obiseq.BioSequence) *obiseq.BioSequence {
f := func(s *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
for _, k := range toBeDeleted {
s.DeleteAttribute(k)
}
return s
return obiseq.BioSequenceSlice{s}, nil
}
return f
@@ -48,7 +48,7 @@ func MatchPatternWorker(pattern, name string, errormax int, allowsIndel bool) ob
slot_error := fmt.Sprintf("%s_error", name)
slot_location := fmt.Sprintf("%s_location", name)
f := func(s *obiseq.BioSequence) *obiseq.BioSequence {
f := func(s *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
apats, err := obiapat.MakeApatSequence(s, false)
if err != nil {
log.Fatalf("error in preparing sequence %s : %v", s.Id(), err)
@@ -59,6 +59,11 @@ func MatchPatternWorker(pattern, name string, errormax int, allowsIndel bool) ob
if matched {
annot := s.Annotations()
annot[slot] = pattern
if start < 0 {
start = 0
}
match, err := s.Subsequence(start, end, false)
if err != nil {
log.Fatalf("Error in extracting pattern of sequence %s [%d;%d[ : %v",
@@ -83,7 +88,7 @@ func MatchPatternWorker(pattern, name string, errormax int, allowsIndel bool) ob
annot[slot_location] = fmt.Sprintf("complement(%d..%d)", start+1, end)
}
}
return s
return obiseq.BioSequenceSlice{s}, nil
}
return f
@@ -97,14 +102,14 @@ func ToBeKeptAttributesWorker(toBeKept []string) obiseq.SeqWorker {
d[v] = true
}
f := func(s *obiseq.BioSequence) *obiseq.BioSequence {
f := func(s *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
annot := s.Annotations()
for key := range annot {
if _, ok := d[key]; !ok {
s.DeleteAttribute(key)
}
}
return s
return obiseq.BioSequenceSlice{s}, nil
}
return f
@@ -112,7 +117,7 @@ func ToBeKeptAttributesWorker(toBeKept []string) obiseq.SeqWorker {
func CutSequenceWorker(from, to int, breakOnError bool) obiseq.SeqWorker {
f := func(s *obiseq.BioSequence) *obiseq.BioSequence {
f := func(s *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
var f, t int
switch {
@@ -142,16 +147,15 @@ func CutSequenceWorker(from, to int, breakOnError bool) obiseq.SeqWorker {
if breakOnError {
log.Fatalf("Cannot cut sequence %s (%v)", s.Id(), err)
} else {
log.Warnf("Cannot cut sequence %s (%v), sequence discarded", s.Id(), err)
return nil
err = fmt.Errorf("Cannot cut sequence %s (%v), sequence discarded", s.Id(), err)
}
}
return rep
return obiseq.BioSequenceSlice{rep}, err
}
if from == 0 && to == 0 {
f = func(s *obiseq.BioSequence) *obiseq.BioSequence {
return s
f = func(s *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
return obiseq.BioSequenceSlice{s}, nil
}
}
@@ -163,23 +167,23 @@ func CutSequenceWorker(from, to int, breakOnError bool) obiseq.SeqWorker {
}
func ClearAllAttributesWorker() obiseq.SeqWorker {
f := func(s *obiseq.BioSequence) *obiseq.BioSequence {
f := func(s *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
annot := s.Annotations()
for key := range annot {
s.DeleteAttribute(key)
}
return s
return obiseq.BioSequenceSlice{s}, nil
}
return f
}
func RenameAttributeWorker(toBeRenamed map[string]string) obiseq.SeqWorker {
f := func(s *obiseq.BioSequence) *obiseq.BioSequence {
f := func(s *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
for newName, oldName := range toBeRenamed {
s.RenameAttribute(newName, oldName)
}
return s
return obiseq.BioSequenceSlice{s}, nil
}
return f
@@ -201,20 +205,20 @@ func EvalAttributeWorker(expression map[string]string) obiseq.SeqWorker {
}
func AddTaxonAtRankWorker(taxonomy *obitax.Taxonomy, ranks ...string) obiseq.SeqWorker {
f := func(s *obiseq.BioSequence) *obiseq.BioSequence {
f := func(s *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
for _, r := range ranks {
taxonomy.SetTaxonAtRank(s, r)
}
return s
return obiseq.BioSequenceSlice{s}, nil
}
return f
}
func AddSeqLengthWorker() obiseq.SeqWorker {
f := func(s *obiseq.BioSequence) *obiseq.BioSequence {
f := func(s *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
s.SetAttribute("seq_length", s.Len())
return s
return obiseq.BioSequenceSlice{s}, nil
}
return f
@@ -309,8 +313,8 @@ func CLIAnnotationPipeline() obiiter.Pipeable {
predicate := obigrep.CLISequenceSelectionPredicate()
worker := CLIAnnotationWorker()
annotator := obiseq.SeqToSliceConditionalWorker(worker, predicate, true, false)
f := obiiter.SliceWorkerPipe(annotator, obioptions.CLIParallelWorkers())
annotator := obiseq.SeqToSliceConditionalWorker(predicate, worker, true, false)
f := obiiter.SliceWorkerPipe(annotator, false, obioptions.CLIParallelWorkers())
return f
}

View File

@@ -60,7 +60,7 @@ func annotateOBIClean(dataset obiseq.BioSequenceSlice,
sample map[string]*([]*seqPCR),
tag, NAValue string) obiiter.IBioSequence {
batchsize := 1000
var annot = func(data obiseq.BioSequenceSlice) obiseq.BioSequenceSlice {
var annot = func(data obiseq.BioSequenceSlice) (obiseq.BioSequenceSlice, error) {
for _, s := range data {
status := Status(s)
@@ -87,11 +87,11 @@ func annotateOBIClean(dataset obiseq.BioSequenceSlice,
annotation["obiclean_samplecount"] = head + internal + singleton
}
return data
return data, nil
}
iter := obiiter.IBatchOver(dataset, batchsize)
riter := iter.MakeISliceWorker(annot)
riter := iter.MakeISliceWorker(annot, false)
return riter
}

View File

@@ -50,10 +50,13 @@ func ICleanDB(itertator obiiter.IBioSequence) obiiter.IBioSequence {
obioptions.CLIParallelWorkers())
annotated := usable.MakeIWorker(taxonomy.MakeSetSpeciesWorker(),
false,
obioptions.CLIParallelWorkers(),
).MakeIWorker(taxonomy.MakeSetGenusWorker(),
false,
obioptions.CLIParallelWorkers(),
).MakeIWorker(taxonomy.MakeSetFamilyWorker(),
false,
obioptions.CLIParallelWorkers(),
)

View File

@@ -30,7 +30,7 @@ func IExtractBarcode(iterator obiiter.IBioSequence) (obiiter.IBioSequence, error
worker := obingslibrary.ExtractBarcodeSliceWorker(ngsfilter, opts...)
newIter := iterator.MakeISliceWorker(worker)
newIter := iterator.MakeISliceWorker(worker, false)
if !CLIConservedErrors() {
log.Println("Discards unassigned sequences")

View File

@@ -60,5 +60,5 @@ func CLIPCR(iterator obiiter.IBioSequence) (obiiter.IBioSequence, error) {
iterator = iterator.Pipe(frags)
}
return iterator.MakeISliceWorker(worker, obioptions.CLIParallelWorkers(), 0), nil
return iterator.MakeISliceWorker(worker, false, obioptions.CLIParallelWorkers(), 0), nil
}

View File

@@ -1,9 +1,10 @@
package obitag
import (
log "github.com/sirupsen/logrus"
"math"
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obialign"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
@@ -190,9 +191,10 @@ func GeomIdentifySeqWorker(references *obiseq.BioSequenceSlice,
landmarks := ExtractLandmarkSeqs(references)
taxa := ExtractTaxonSet(references, taxo)
return func(sequence *obiseq.BioSequence) *obiseq.BioSequence {
return func(sequence *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
buffer := make([]uint64, 100)
return GeomIdentify(sequence, landmarks, references, taxa, taxo, &buffer)
return obiseq.BioSequenceSlice{GeomIdentify(sequence, landmarks, references, taxa, taxo, &buffer)},
nil
}
}
@@ -202,5 +204,5 @@ func CLIGeomAssignTaxonomy(iterator obiiter.IBioSequence,
) obiiter.IBioSequence {
worker := GeomIdentifySeqWorker(&references, taxo)
return iterator.MakeIWorker(worker, obioptions.CLIParallelWorkers(), 0)
return iterator.MakeIWorker(worker, false, obioptions.CLIParallelWorkers(), 0)
}

View File

@@ -259,8 +259,8 @@ func IdentifySeqWorker(references obiseq.BioSequenceSlice,
taxa obitax.TaxonSet,
taxo *obitax.Taxonomy,
runExact bool) obiseq.SeqWorker {
return func(sequence *obiseq.BioSequence) *obiseq.BioSequence {
return Identify(sequence, references, refcounts, taxa, taxo, runExact)
return func(sequence *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
return obiseq.BioSequenceSlice{Identify(sequence, references, refcounts, taxa, taxo, runExact)}, nil
}
}
@@ -285,5 +285,5 @@ func CLIAssignTaxonomy(iterator obiiter.IBioSequence,
worker := IdentifySeqWorker(references, refcounts, taxa, taxo, CLIRunExact())
return iterator.MakeIWorker(worker, obioptions.CLIParallelWorkers(), 0)
return iterator.MakeIWorker(worker, false, obioptions.CLIParallelWorkers(), 0)
}