mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Change the API of workers
Former-commit-id: 9b07306edd8cf28266f86f95823948fa99d39ea9
This commit is contained in:
@ -2,13 +2,26 @@
|
||||
|
||||
## Latest changes
|
||||
|
||||
### API Changes
|
||||
- Two of the main class `obiseq.SeqWorker` and `obiseq.SeqWorker` have their declaration changed.
|
||||
Both now return two values a `obiseq.BioSequenceSlice` and an `error`. This allow a worker to
|
||||
return potentially several sequences as the result of the processing of a single sequence, or
|
||||
zero, which is equivalent to filter out the input sequence.
|
||||
|
||||
### Enhancement
|
||||
- The bug corrected in the parsing of EMBL and Genbank files as implemented in version 4.1.2 of OBITools4,
|
||||
potentially induced some reduction in the performance of the parsing. This should have been now fixed.
|
||||
- In the same idea, parsing of genbank and EMBL files were reading and storing in memory not only the sequence
|
||||
- In the same idea, parsing of genbank and EMBL files were reading and storing in memory not only
|
||||
the sequence
|
||||
but also the annotations (features table). Up to now none of the obitools are using this information, but
|
||||
with large complete genomes, it is occupying a lot of memory. To reduce this impact, the new version of the
|
||||
parser doesn't any more store in memory the annotations by default.
|
||||
with large complete genomes, it is occupying a lot of memory. To reduce this impact, the new version of
|
||||
the parser doesn't any more store in memory the annotations by default.
|
||||
- Add a **--taxonomic-path** to `obiannotate`. The option adds a `taxonomic_path` tag to sequences describing
|
||||
the taxonomic classification of the sequence according to its taxid. The path is a string. Each level of the
|
||||
path is delimited by a `|` character. A level consists of three parts separated by a `@`. The first part is the
|
||||
taxid, the second the scientific name and the last the taxonomic rank. The first level described is always the
|
||||
root of the taxonomy. The latest corresponds to the taxid of the sequence. If a sequence is not annotated by
|
||||
a taxid, as usual the sequence is assumed having the taxid 1 (the root of the taxonomy).
|
||||
|
||||
## February 16th, 2024. Release 4.1.2
|
||||
|
||||
|
@ -24,7 +24,7 @@ func main() {
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
comp := fs.MakeIWorker(obiseq.ReverseComplementWorker(true))
|
||||
comp := fs.MakeIWorker(obiseq.ReverseComplementWorker(true), true)
|
||||
obiconvert.CLIWriteBioSequences(comp, true)
|
||||
|
||||
obiiter.WaitForLastPipe()
|
||||
|
@ -524,10 +524,10 @@ func PCRSlice(sequences obiseq.BioSequenceSlice,
|
||||
func PCRSliceWorker(options ...WithOption) obiseq.SeqSliceWorker {
|
||||
|
||||
opt := MakeOptions(options)
|
||||
worker := func(sequences obiseq.BioSequenceSlice) obiseq.BioSequenceSlice {
|
||||
worker := func(sequences obiseq.BioSequenceSlice) (obiseq.BioSequenceSlice, error) {
|
||||
result := _PCRSlice(sequences, opt)
|
||||
sequences.Recycle(true)
|
||||
return result
|
||||
return result, nil
|
||||
}
|
||||
|
||||
return worker
|
||||
|
@ -14,7 +14,7 @@ func AhoCorazickWorker(slot string, patterns []string) obiseq.SeqWorker {
|
||||
fslot := slot + "_Fwd"
|
||||
rslot := slot + "_Rev"
|
||||
|
||||
f := func(s *obiseq.BioSequence) *obiseq.BioSequence {
|
||||
f := func(s *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
|
||||
matchesF := len(matcher.FindAllByteSlice(s.Sequence()))
|
||||
matchesR := len(matcher.FindAllByteSlice(s.ReverseComplement(false).Sequence()))
|
||||
|
||||
@ -26,7 +26,7 @@ func AhoCorazickWorker(slot string, patterns []string) obiseq.SeqWorker {
|
||||
s.SetAttribute(rslot, matchesR)
|
||||
}
|
||||
|
||||
return s
|
||||
return obiseq.BioSequenceSlice{s}, nil
|
||||
}
|
||||
|
||||
return f
|
||||
|
@ -31,5 +31,6 @@ func IParseFastSeqHeaderBatch(iterator obiiter.IBioSequence,
|
||||
options ...WithOption) obiiter.IBioSequence {
|
||||
opt := MakeOptions(options)
|
||||
return iterator.MakeIWorker(obiseq.AnnotatorToSeqWorker(opt.ParseFastSeqHeader()),
|
||||
false,
|
||||
opt.ParallelWorkers())
|
||||
}
|
||||
|
@ -15,7 +15,9 @@ import (
|
||||
// Moreover the SeqWorker function, the method accepted two optional integer parameters.
|
||||
// - First is allowing to indicates the number of workers running in parallele (default 4)
|
||||
// - The second the size of the chanel buffer. By default set to the same value than the input buffer.
|
||||
func (iterator IBioSequence) MakeIWorker(worker obiseq.SeqWorker, sizes ...int) IBioSequence {
|
||||
func (iterator IBioSequence) MakeIWorker(worker obiseq.SeqWorker,
|
||||
breakOnError bool,
|
||||
sizes ...int) IBioSequence {
|
||||
nworkers := obioptions.CLIParallelWorkers()
|
||||
|
||||
if len(sizes) > 0 {
|
||||
@ -32,11 +34,15 @@ func (iterator IBioSequence) MakeIWorker(worker obiseq.SeqWorker, sizes ...int)
|
||||
|
||||
}()
|
||||
|
||||
sw := obiseq.SeqToSliceWorker(worker, true, breakOnError)
|
||||
|
||||
f := func(iterator IBioSequence) {
|
||||
var err error
|
||||
for iterator.Next() {
|
||||
batch := iterator.Get()
|
||||
for i, seq := range batch.slice {
|
||||
batch.slice[i] = worker(seq)
|
||||
batch.slice, err = sw(batch.slice)
|
||||
if err != nil && breakOnError {
|
||||
log.Fatalf("Error on sequence processing : %v", err)
|
||||
}
|
||||
newIter.Push(batch)
|
||||
}
|
||||
@ -67,7 +73,7 @@ func (iterator IBioSequence) MakeIWorker(worker obiseq.SeqWorker, sizes ...int)
|
||||
// Return:
|
||||
// - newIter: A new IBioSequence iterator with the modified sequences.
|
||||
func (iterator IBioSequence) MakeIConditionalWorker(predicate obiseq.SequencePredicate,
|
||||
worker obiseq.SeqWorker, sizes ...int) IBioSequence {
|
||||
worker obiseq.SeqWorker, breakOnError bool, sizes ...int) IBioSequence {
|
||||
nworkers := obioptions.CLIReadParallelWorkers()
|
||||
|
||||
if len(sizes) > 0 {
|
||||
@ -84,13 +90,15 @@ func (iterator IBioSequence) MakeIConditionalWorker(predicate obiseq.SequencePre
|
||||
|
||||
}()
|
||||
|
||||
sw := obiseq.SeqToSliceConditionalWorker(predicate, worker, true, breakOnError)
|
||||
|
||||
f := func(iterator IBioSequence) {
|
||||
var err error
|
||||
for iterator.Next() {
|
||||
batch := iterator.Get()
|
||||
for i, seq := range batch.slice {
|
||||
if predicate(batch.slice[i]) {
|
||||
batch.slice[i] = worker(seq)
|
||||
}
|
||||
batch.slice, err = sw(batch.slice)
|
||||
if err != nil && breakOnError {
|
||||
log.Fatalf("Error on sequence processing : %v", err)
|
||||
}
|
||||
newIter.Push(batch)
|
||||
}
|
||||
@ -120,7 +128,7 @@ func (iterator IBioSequence) MakeIConditionalWorker(predicate obiseq.SequencePre
|
||||
// provided, the default number of workers is used.
|
||||
//
|
||||
// The function returns a new IBioSequence containing the modified slices.
|
||||
func (iterator IBioSequence) MakeISliceWorker(worker obiseq.SeqSliceWorker, sizes ...int) IBioSequence {
|
||||
func (iterator IBioSequence) MakeISliceWorker(worker obiseq.SeqSliceWorker, breakOnError bool, sizes ...int) IBioSequence {
|
||||
nworkers := obioptions.CLIParallelWorkers()
|
||||
|
||||
if len(sizes) > 0 {
|
||||
@ -137,9 +145,13 @@ func (iterator IBioSequence) MakeISliceWorker(worker obiseq.SeqSliceWorker, size
|
||||
}()
|
||||
|
||||
f := func(iterator IBioSequence) {
|
||||
var err error
|
||||
for iterator.Next() {
|
||||
batch := iterator.Get()
|
||||
batch.slice = worker(batch.slice)
|
||||
batch.slice, err = worker(batch.slice)
|
||||
if err != nil && breakOnError {
|
||||
log.Fatalf("Error on sequence processing : %v", err)
|
||||
}
|
||||
newIter.Push(batch)
|
||||
}
|
||||
newIter.Done()
|
||||
@ -169,9 +181,9 @@ func (iterator IBioSequence) MakeISliceWorker(worker obiseq.SeqSliceWorker, size
|
||||
//
|
||||
// Return:
|
||||
// - f: A Pipeable object that represents the closure created by the WorkerPipe function.
|
||||
func WorkerPipe(worker obiseq.SeqWorker, sizes ...int) Pipeable {
|
||||
func WorkerPipe(worker obiseq.SeqWorker, breakOnError bool, sizes ...int) Pipeable {
|
||||
f := func(iterator IBioSequence) IBioSequence {
|
||||
return iterator.MakeIWorker(worker, sizes...)
|
||||
return iterator.MakeIWorker(worker, breakOnError, sizes...)
|
||||
}
|
||||
|
||||
return f
|
||||
@ -182,9 +194,9 @@ func WorkerPipe(worker obiseq.SeqWorker, sizes ...int) Pipeable {
|
||||
// The worker parameter is the SeqSliceWorker to be applied.
|
||||
// The sizes parameter is a variadic parameter representing the sizes of the slices.
|
||||
// The function returns a Pipeable function that applies the SeqSliceWorker to the iterator.
|
||||
func SliceWorkerPipe(worker obiseq.SeqSliceWorker, sizes ...int) Pipeable {
|
||||
func SliceWorkerPipe(worker obiseq.SeqSliceWorker, breakOnError bool, sizes ...int) Pipeable {
|
||||
f := func(iterator IBioSequence) IBioSequence {
|
||||
return iterator.MakeISliceWorker(worker, sizes...)
|
||||
return iterator.MakeISliceWorker(worker, breakOnError, sizes...)
|
||||
}
|
||||
|
||||
return f
|
||||
|
@ -172,8 +172,8 @@ func ExtractBarcodeSliceWorker(ngslibrary NGSLibrary,
|
||||
|
||||
ngslibrary.Compile(opt.AllowedMismatch(), opt.AllowsIndel())
|
||||
|
||||
worker := func(sequences obiseq.BioSequenceSlice) obiseq.BioSequenceSlice {
|
||||
return _ExtractBarcodeSlice(ngslibrary, sequences, opt)
|
||||
worker := func(sequences obiseq.BioSequenceSlice) (obiseq.BioSequenceSlice, error) {
|
||||
return _ExtractBarcodeSlice(ngslibrary, sequences, opt), nil
|
||||
}
|
||||
|
||||
return worker
|
||||
|
@ -28,16 +28,18 @@ func Expression(expression string) func(*BioSequence) (interface{}, error) {
|
||||
|
||||
func EditIdWorker(expression string) SeqWorker {
|
||||
e := Expression(expression)
|
||||
f := func(sequence *BioSequence) *BioSequence {
|
||||
f := func(sequence *BioSequence) (BioSequenceSlice, error) {
|
||||
v, err := e(sequence)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Expression '%s' cannot be evaluated on sequence %s",
|
||||
if err == nil {
|
||||
sequence.SetId(fmt.Sprintf("%v", v))
|
||||
} else {
|
||||
err = fmt.Errorf("Expression '%s' cannot be evaluated on sequence %s : %v",
|
||||
expression,
|
||||
sequence.Id())
|
||||
sequence.Id(),
|
||||
err)
|
||||
}
|
||||
sequence.SetId(fmt.Sprintf("%v", v))
|
||||
return sequence
|
||||
|
||||
return BioSequenceSlice{sequence}, err
|
||||
}
|
||||
|
||||
return f
|
||||
@ -45,16 +47,18 @@ func EditIdWorker(expression string) SeqWorker {
|
||||
|
||||
func EditAttributeWorker(key string, expression string) SeqWorker {
|
||||
e := Expression(expression)
|
||||
f := func(sequence *BioSequence) *BioSequence {
|
||||
f := func(sequence *BioSequence) (BioSequenceSlice, error) {
|
||||
v, err := e(sequence)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Expression '%s' cannot be evaluated on sequence %s",
|
||||
if err == nil {
|
||||
sequence.SetAttribute(key, v)
|
||||
} else {
|
||||
err = fmt.Errorf("Expression '%s' cannot be evaluated on sequence %s : %v",
|
||||
expression,
|
||||
sequence.Id())
|
||||
sequence.Id(),
|
||||
err)
|
||||
}
|
||||
sequence.SetAttribute(key, v)
|
||||
return sequence
|
||||
|
||||
return BioSequenceSlice{sequence}, err
|
||||
}
|
||||
|
||||
return f
|
||||
|
@ -105,8 +105,8 @@ func (sequence *BioSequence) _revcmpMutation() *BioSequence {
|
||||
* @returns {SeqWorker} A function that accepts *BioSequence and returns its reversed-complement form.
|
||||
*/
|
||||
func ReverseComplementWorker(inplace bool) SeqWorker {
|
||||
f := func(input *BioSequence) *BioSequence {
|
||||
return input.ReverseComplement(inplace)
|
||||
f := func(input *BioSequence) (BioSequenceSlice, error) {
|
||||
return BioSequenceSlice{input.ReverseComplement(inplace)}, nil
|
||||
}
|
||||
|
||||
return f
|
||||
|
@ -1,20 +1,25 @@
|
||||
package obiseq
|
||||
|
||||
import log "github.com/sirupsen/logrus"
|
||||
import (
|
||||
"fmt"
|
||||
"slices"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type SeqAnnotator func(*BioSequence)
|
||||
|
||||
type SeqWorker func(*BioSequence) *BioSequence
|
||||
type SeqSliceWorker func(BioSequenceSlice) BioSequenceSlice
|
||||
type SeqWorker func(*BioSequence) (BioSequenceSlice, error)
|
||||
type SeqSliceWorker func(BioSequenceSlice) (BioSequenceSlice, error)
|
||||
|
||||
func NilSeqWorker(seq *BioSequence) *BioSequence {
|
||||
return seq
|
||||
func NilSeqWorker(seq *BioSequence) (BioSequenceSlice, error) {
|
||||
return BioSequenceSlice{seq}, nil
|
||||
}
|
||||
|
||||
func AnnotatorToSeqWorker(function SeqAnnotator) SeqWorker {
|
||||
f := func(seq *BioSequence) *BioSequence {
|
||||
f := func(seq *BioSequence) (BioSequenceSlice, error) {
|
||||
function(seq)
|
||||
return seq
|
||||
return BioSequenceSlice{seq}, nil
|
||||
}
|
||||
return f
|
||||
}
|
||||
@ -25,35 +30,47 @@ func SeqToSliceWorker(worker SeqWorker,
|
||||
|
||||
if worker == nil {
|
||||
if inplace {
|
||||
f = func(input BioSequenceSlice) BioSequenceSlice {
|
||||
return input
|
||||
f = func(input BioSequenceSlice) (BioSequenceSlice, error) {
|
||||
return input, nil
|
||||
}
|
||||
} else {
|
||||
f = func(input BioSequenceSlice) BioSequenceSlice {
|
||||
f = func(input BioSequenceSlice) (BioSequenceSlice, error) {
|
||||
output := MakeBioSequenceSlice(len(input))
|
||||
copy(output, input)
|
||||
return output
|
||||
return output, nil
|
||||
}
|
||||
}
|
||||
} else {
|
||||
f = func(input BioSequenceSlice) BioSequenceSlice {
|
||||
f = func(input BioSequenceSlice) (BioSequenceSlice, error) {
|
||||
output := input
|
||||
if !inplace {
|
||||
output = MakeBioSequenceSlice(len(input))
|
||||
}
|
||||
i := 0
|
||||
for _, s := range input {
|
||||
r := worker(s)
|
||||
if r != nil {
|
||||
output[i] = r
|
||||
i++
|
||||
} else if breakOnError {
|
||||
log.Fatalf("got an error on sequence %s processing",
|
||||
r.Id())
|
||||
r, err := worker(s)
|
||||
if err == nil {
|
||||
for _, rs := range r {
|
||||
output[i] = rs
|
||||
i++
|
||||
if i == cap(output) {
|
||||
slices.Grow(output, cap(output))
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
if breakOnError {
|
||||
err = fmt.Errorf("got an error on sequence %s processing : %v",
|
||||
s.Id(), err)
|
||||
return BioSequenceSlice{}, err
|
||||
} else {
|
||||
log.Warnf("got an error on sequence %s processing",
|
||||
s.Id())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return output[0:i]
|
||||
return output[0:i], nil
|
||||
}
|
||||
|
||||
}
|
||||
@ -61,15 +78,16 @@ func SeqToSliceWorker(worker SeqWorker,
|
||||
return f
|
||||
}
|
||||
|
||||
func SeqToSliceConditionalWorker(worker SeqWorker,
|
||||
func SeqToSliceConditionalWorker(
|
||||
condition SequencePredicate,
|
||||
worker SeqWorker,
|
||||
inplace, breakOnError bool) SeqSliceWorker {
|
||||
|
||||
if condition == nil {
|
||||
return SeqToSliceWorker(worker, inplace, breakOnError)
|
||||
}
|
||||
|
||||
f := func(input BioSequenceSlice) BioSequenceSlice {
|
||||
f := func(input BioSequenceSlice) (BioSequenceSlice, error) {
|
||||
output := input
|
||||
if !inplace {
|
||||
output = MakeBioSequenceSlice(len(input))
|
||||
@ -79,18 +97,29 @@ func SeqToSliceConditionalWorker(worker SeqWorker,
|
||||
|
||||
for _, s := range input {
|
||||
if condition(s) {
|
||||
r := worker(s)
|
||||
if r != nil {
|
||||
output[i] = r
|
||||
i++
|
||||
} else if breakOnError {
|
||||
log.Fatalf("got an error on sequence %s processing",
|
||||
r.Id())
|
||||
r, err := worker(s)
|
||||
if err == nil {
|
||||
for _, rs := range r {
|
||||
output[i] = rs
|
||||
i++
|
||||
if i == cap(output) {
|
||||
slices.Grow(output, cap(output))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if breakOnError {
|
||||
err = fmt.Errorf("got an error on sequence %s processing : %v",
|
||||
s.Id(), err)
|
||||
return BioSequenceSlice{}, err
|
||||
} else {
|
||||
log.Warnf("got an error on sequence %s processing",
|
||||
s.Id())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return output[0:i]
|
||||
return output[0:i], nil
|
||||
}
|
||||
|
||||
return f
|
||||
@ -105,11 +134,17 @@ func (worker SeqWorker) ChainWorkers(next SeqWorker) SeqWorker {
|
||||
}
|
||||
}
|
||||
|
||||
f := func(seq *BioSequence) *BioSequence {
|
||||
sw := SeqToSliceWorker(next, true, false)
|
||||
|
||||
f := func(seq *BioSequence) (BioSequenceSlice, error) {
|
||||
if seq == nil {
|
||||
return nil
|
||||
return BioSequenceSlice{}, nil
|
||||
}
|
||||
return next(worker(seq))
|
||||
slice, err := worker(seq)
|
||||
if err == nil {
|
||||
slice, err = sw(slice)
|
||||
}
|
||||
return slice, err
|
||||
}
|
||||
|
||||
return f
|
||||
|
@ -147,14 +147,14 @@ func AddLCAWorker(taxonomy *Taxonomy, slot_name string, threshold float64) obise
|
||||
lca_name = "scientific_name"
|
||||
}
|
||||
|
||||
f := func(sequence *obiseq.BioSequence) *obiseq.BioSequence {
|
||||
f := func(sequence *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
|
||||
lca, rans, _ := taxonomy.LCA(sequence, threshold)
|
||||
|
||||
sequence.SetAttribute(slot_name, lca.Taxid())
|
||||
sequence.SetAttribute(lca_name, lca.ScientificName())
|
||||
sequence.SetAttribute(lca_error, math.Round((1-rans)*1000)/1000)
|
||||
|
||||
return sequence
|
||||
return obiseq.BioSequenceSlice{sequence}, nil
|
||||
}
|
||||
|
||||
return f
|
||||
|
@ -14,9 +14,9 @@ func (taxonomy *Taxonomy) MakeSetTaxonAtRankWorker(rank string) obiseq.SeqWorker
|
||||
taxonomy.RankList())
|
||||
}
|
||||
|
||||
w := func(sequence *obiseq.BioSequence) *obiseq.BioSequence {
|
||||
w := func(sequence *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
|
||||
taxonomy.SetTaxonAtRank(sequence, rank)
|
||||
return sequence
|
||||
return obiseq.BioSequenceSlice{sequence}, nil
|
||||
}
|
||||
|
||||
return w
|
||||
@ -24,9 +24,9 @@ func (taxonomy *Taxonomy) MakeSetTaxonAtRankWorker(rank string) obiseq.SeqWorker
|
||||
|
||||
func (taxonomy *Taxonomy) MakeSetSpeciesWorker() obiseq.SeqWorker {
|
||||
|
||||
w := func(sequence *obiseq.BioSequence) *obiseq.BioSequence {
|
||||
w := func(sequence *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
|
||||
taxonomy.SetSpecies(sequence)
|
||||
return sequence
|
||||
return obiseq.BioSequenceSlice{sequence}, nil
|
||||
}
|
||||
|
||||
return w
|
||||
@ -34,9 +34,9 @@ func (taxonomy *Taxonomy) MakeSetSpeciesWorker() obiseq.SeqWorker {
|
||||
|
||||
func (taxonomy *Taxonomy) MakeSetGenusWorker() obiseq.SeqWorker {
|
||||
|
||||
w := func(sequence *obiseq.BioSequence) *obiseq.BioSequence {
|
||||
w := func(sequence *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
|
||||
taxonomy.SetGenus(sequence)
|
||||
return sequence
|
||||
return obiseq.BioSequenceSlice{sequence}, nil
|
||||
}
|
||||
|
||||
return w
|
||||
@ -44,9 +44,9 @@ func (taxonomy *Taxonomy) MakeSetGenusWorker() obiseq.SeqWorker {
|
||||
|
||||
func (taxonomy *Taxonomy) MakeSetFamilyWorker() obiseq.SeqWorker {
|
||||
|
||||
w := func(sequence *obiseq.BioSequence) *obiseq.BioSequence {
|
||||
w := func(sequence *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
|
||||
taxonomy.SetFamily(sequence)
|
||||
return sequence
|
||||
return obiseq.BioSequenceSlice{sequence}, nil
|
||||
}
|
||||
|
||||
return w
|
||||
@ -54,9 +54,9 @@ func (taxonomy *Taxonomy) MakeSetFamilyWorker() obiseq.SeqWorker {
|
||||
|
||||
func (taxonomy *Taxonomy) MakeSetPathWorker() obiseq.SeqWorker {
|
||||
|
||||
w := func(s *obiseq.BioSequence) *obiseq.BioSequence {
|
||||
taxonomy.SetPath(s)
|
||||
return s
|
||||
w := func(sequence *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
|
||||
taxonomy.SetPath(sequence)
|
||||
return obiseq.BioSequenceSlice{sequence}, nil
|
||||
}
|
||||
|
||||
return w
|
||||
|
@ -15,11 +15,11 @@ import (
|
||||
)
|
||||
|
||||
func DeleteAttributesWorker(toBeDeleted []string) obiseq.SeqWorker {
|
||||
f := func(s *obiseq.BioSequence) *obiseq.BioSequence {
|
||||
f := func(s *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
|
||||
for _, k := range toBeDeleted {
|
||||
s.DeleteAttribute(k)
|
||||
}
|
||||
return s
|
||||
return obiseq.BioSequenceSlice{s}, nil
|
||||
}
|
||||
|
||||
return f
|
||||
@ -48,7 +48,7 @@ func MatchPatternWorker(pattern, name string, errormax int, allowsIndel bool) ob
|
||||
slot_error := fmt.Sprintf("%s_error", name)
|
||||
slot_location := fmt.Sprintf("%s_location", name)
|
||||
|
||||
f := func(s *obiseq.BioSequence) *obiseq.BioSequence {
|
||||
f := func(s *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
|
||||
apats, err := obiapat.MakeApatSequence(s, false)
|
||||
if err != nil {
|
||||
log.Fatalf("error in preparing sequence %s : %v", s.Id(), err)
|
||||
@ -59,6 +59,11 @@ func MatchPatternWorker(pattern, name string, errormax int, allowsIndel bool) ob
|
||||
if matched {
|
||||
annot := s.Annotations()
|
||||
annot[slot] = pattern
|
||||
|
||||
if start < 0 {
|
||||
start = 0
|
||||
}
|
||||
|
||||
match, err := s.Subsequence(start, end, false)
|
||||
if err != nil {
|
||||
log.Fatalf("Error in extracting pattern of sequence %s [%d;%d[ : %v",
|
||||
@ -83,7 +88,7 @@ func MatchPatternWorker(pattern, name string, errormax int, allowsIndel bool) ob
|
||||
annot[slot_location] = fmt.Sprintf("complement(%d..%d)", start+1, end)
|
||||
}
|
||||
}
|
||||
return s
|
||||
return obiseq.BioSequenceSlice{s}, nil
|
||||
}
|
||||
|
||||
return f
|
||||
@ -97,14 +102,14 @@ func ToBeKeptAttributesWorker(toBeKept []string) obiseq.SeqWorker {
|
||||
d[v] = true
|
||||
}
|
||||
|
||||
f := func(s *obiseq.BioSequence) *obiseq.BioSequence {
|
||||
f := func(s *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
|
||||
annot := s.Annotations()
|
||||
for key := range annot {
|
||||
if _, ok := d[key]; !ok {
|
||||
s.DeleteAttribute(key)
|
||||
}
|
||||
}
|
||||
return s
|
||||
return obiseq.BioSequenceSlice{s}, nil
|
||||
}
|
||||
|
||||
return f
|
||||
@ -112,7 +117,7 @@ func ToBeKeptAttributesWorker(toBeKept []string) obiseq.SeqWorker {
|
||||
|
||||
func CutSequenceWorker(from, to int, breakOnError bool) obiseq.SeqWorker {
|
||||
|
||||
f := func(s *obiseq.BioSequence) *obiseq.BioSequence {
|
||||
f := func(s *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
|
||||
var f, t int
|
||||
|
||||
switch {
|
||||
@ -142,16 +147,15 @@ func CutSequenceWorker(from, to int, breakOnError bool) obiseq.SeqWorker {
|
||||
if breakOnError {
|
||||
log.Fatalf("Cannot cut sequence %s (%v)", s.Id(), err)
|
||||
} else {
|
||||
log.Warnf("Cannot cut sequence %s (%v), sequence discarded", s.Id(), err)
|
||||
return nil
|
||||
err = fmt.Errorf("Cannot cut sequence %s (%v), sequence discarded", s.Id(), err)
|
||||
}
|
||||
}
|
||||
return rep
|
||||
return obiseq.BioSequenceSlice{rep}, err
|
||||
}
|
||||
|
||||
if from == 0 && to == 0 {
|
||||
f = func(s *obiseq.BioSequence) *obiseq.BioSequence {
|
||||
return s
|
||||
f = func(s *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
|
||||
return obiseq.BioSequenceSlice{s}, nil
|
||||
}
|
||||
}
|
||||
|
||||
@ -163,23 +167,23 @@ func CutSequenceWorker(from, to int, breakOnError bool) obiseq.SeqWorker {
|
||||
}
|
||||
|
||||
func ClearAllAttributesWorker() obiseq.SeqWorker {
|
||||
f := func(s *obiseq.BioSequence) *obiseq.BioSequence {
|
||||
f := func(s *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
|
||||
annot := s.Annotations()
|
||||
for key := range annot {
|
||||
s.DeleteAttribute(key)
|
||||
}
|
||||
return s
|
||||
return obiseq.BioSequenceSlice{s}, nil
|
||||
}
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func RenameAttributeWorker(toBeRenamed map[string]string) obiseq.SeqWorker {
|
||||
f := func(s *obiseq.BioSequence) *obiseq.BioSequence {
|
||||
f := func(s *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
|
||||
for newName, oldName := range toBeRenamed {
|
||||
s.RenameAttribute(newName, oldName)
|
||||
}
|
||||
return s
|
||||
return obiseq.BioSequenceSlice{s}, nil
|
||||
}
|
||||
|
||||
return f
|
||||
@ -201,20 +205,20 @@ func EvalAttributeWorker(expression map[string]string) obiseq.SeqWorker {
|
||||
}
|
||||
|
||||
func AddTaxonAtRankWorker(taxonomy *obitax.Taxonomy, ranks ...string) obiseq.SeqWorker {
|
||||
f := func(s *obiseq.BioSequence) *obiseq.BioSequence {
|
||||
f := func(s *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
|
||||
for _, r := range ranks {
|
||||
taxonomy.SetTaxonAtRank(s, r)
|
||||
}
|
||||
return s
|
||||
return obiseq.BioSequenceSlice{s}, nil
|
||||
}
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func AddSeqLengthWorker() obiseq.SeqWorker {
|
||||
f := func(s *obiseq.BioSequence) *obiseq.BioSequence {
|
||||
f := func(s *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
|
||||
s.SetAttribute("seq_length", s.Len())
|
||||
return s
|
||||
return obiseq.BioSequenceSlice{s}, nil
|
||||
}
|
||||
|
||||
return f
|
||||
@ -309,8 +313,8 @@ func CLIAnnotationPipeline() obiiter.Pipeable {
|
||||
predicate := obigrep.CLISequenceSelectionPredicate()
|
||||
worker := CLIAnnotationWorker()
|
||||
|
||||
annotator := obiseq.SeqToSliceConditionalWorker(worker, predicate, true, false)
|
||||
f := obiiter.SliceWorkerPipe(annotator, obioptions.CLIParallelWorkers())
|
||||
annotator := obiseq.SeqToSliceConditionalWorker(predicate, worker, true, false)
|
||||
f := obiiter.SliceWorkerPipe(annotator, false, obioptions.CLIParallelWorkers())
|
||||
|
||||
return f
|
||||
}
|
||||
|
@ -60,7 +60,7 @@ func annotateOBIClean(dataset obiseq.BioSequenceSlice,
|
||||
sample map[string]*([]*seqPCR),
|
||||
tag, NAValue string) obiiter.IBioSequence {
|
||||
batchsize := 1000
|
||||
var annot = func(data obiseq.BioSequenceSlice) obiseq.BioSequenceSlice {
|
||||
var annot = func(data obiseq.BioSequenceSlice) (obiseq.BioSequenceSlice, error) {
|
||||
|
||||
for _, s := range data {
|
||||
status := Status(s)
|
||||
@ -87,11 +87,11 @@ func annotateOBIClean(dataset obiseq.BioSequenceSlice,
|
||||
annotation["obiclean_samplecount"] = head + internal + singleton
|
||||
|
||||
}
|
||||
return data
|
||||
return data, nil
|
||||
}
|
||||
|
||||
iter := obiiter.IBatchOver(dataset, batchsize)
|
||||
riter := iter.MakeISliceWorker(annot)
|
||||
riter := iter.MakeISliceWorker(annot, false)
|
||||
|
||||
return riter
|
||||
}
|
||||
|
@ -50,10 +50,13 @@ func ICleanDB(itertator obiiter.IBioSequence) obiiter.IBioSequence {
|
||||
obioptions.CLIParallelWorkers())
|
||||
|
||||
annotated := usable.MakeIWorker(taxonomy.MakeSetSpeciesWorker(),
|
||||
false,
|
||||
obioptions.CLIParallelWorkers(),
|
||||
).MakeIWorker(taxonomy.MakeSetGenusWorker(),
|
||||
false,
|
||||
obioptions.CLIParallelWorkers(),
|
||||
).MakeIWorker(taxonomy.MakeSetFamilyWorker(),
|
||||
false,
|
||||
obioptions.CLIParallelWorkers(),
|
||||
)
|
||||
|
||||
|
@ -30,7 +30,7 @@ func IExtractBarcode(iterator obiiter.IBioSequence) (obiiter.IBioSequence, error
|
||||
|
||||
worker := obingslibrary.ExtractBarcodeSliceWorker(ngsfilter, opts...)
|
||||
|
||||
newIter := iterator.MakeISliceWorker(worker)
|
||||
newIter := iterator.MakeISliceWorker(worker, false)
|
||||
|
||||
if !CLIConservedErrors() {
|
||||
log.Println("Discards unassigned sequences")
|
||||
|
@ -60,5 +60,5 @@ func CLIPCR(iterator obiiter.IBioSequence) (obiiter.IBioSequence, error) {
|
||||
iterator = iterator.Pipe(frags)
|
||||
}
|
||||
|
||||
return iterator.MakeISliceWorker(worker, obioptions.CLIParallelWorkers(), 0), nil
|
||||
return iterator.MakeISliceWorker(worker, false, obioptions.CLIParallelWorkers(), 0), nil
|
||||
}
|
||||
|
@ -1,9 +1,10 @@
|
||||
package obitag
|
||||
|
||||
import (
|
||||
log "github.com/sirupsen/logrus"
|
||||
"math"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obialign"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||
@ -190,9 +191,10 @@ func GeomIdentifySeqWorker(references *obiseq.BioSequenceSlice,
|
||||
|
||||
landmarks := ExtractLandmarkSeqs(references)
|
||||
taxa := ExtractTaxonSet(references, taxo)
|
||||
return func(sequence *obiseq.BioSequence) *obiseq.BioSequence {
|
||||
return func(sequence *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
|
||||
buffer := make([]uint64, 100)
|
||||
return GeomIdentify(sequence, landmarks, references, taxa, taxo, &buffer)
|
||||
return obiseq.BioSequenceSlice{GeomIdentify(sequence, landmarks, references, taxa, taxo, &buffer)},
|
||||
nil
|
||||
}
|
||||
}
|
||||
|
||||
@ -202,5 +204,5 @@ func CLIGeomAssignTaxonomy(iterator obiiter.IBioSequence,
|
||||
) obiiter.IBioSequence {
|
||||
|
||||
worker := GeomIdentifySeqWorker(&references, taxo)
|
||||
return iterator.MakeIWorker(worker, obioptions.CLIParallelWorkers(), 0)
|
||||
return iterator.MakeIWorker(worker, false, obioptions.CLIParallelWorkers(), 0)
|
||||
}
|
||||
|
@ -259,8 +259,8 @@ func IdentifySeqWorker(references obiseq.BioSequenceSlice,
|
||||
taxa obitax.TaxonSet,
|
||||
taxo *obitax.Taxonomy,
|
||||
runExact bool) obiseq.SeqWorker {
|
||||
return func(sequence *obiseq.BioSequence) *obiseq.BioSequence {
|
||||
return Identify(sequence, references, refcounts, taxa, taxo, runExact)
|
||||
return func(sequence *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
|
||||
return obiseq.BioSequenceSlice{Identify(sequence, references, refcounts, taxa, taxo, runExact)}, nil
|
||||
}
|
||||
}
|
||||
|
||||
@ -285,5 +285,5 @@ func CLIAssignTaxonomy(iterator obiiter.IBioSequence,
|
||||
|
||||
worker := IdentifySeqWorker(references, refcounts, taxa, taxo, CLIRunExact())
|
||||
|
||||
return iterator.MakeIWorker(worker, obioptions.CLIParallelWorkers(), 0)
|
||||
return iterator.MakeIWorker(worker, false, obioptions.CLIParallelWorkers(), 0)
|
||||
}
|
||||
|
Reference in New Issue
Block a user