2023-01-22 22:39:13 +01:00
|
|
|
package obiseq
|
|
|
|
|
2024-03-02 16:03:46 -04:00
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"slices"
|
|
|
|
|
|
|
|
log "github.com/sirupsen/logrus"
|
|
|
|
)
|
2023-11-22 09:46:30 +01:00
|
|
|
|
2023-01-22 22:39:13 +01:00
|
|
|
type SeqAnnotator func(*BioSequence)
|
|
|
|
|
2024-03-02 16:03:46 -04:00
|
|
|
type SeqWorker func(*BioSequence) (BioSequenceSlice, error)
|
|
|
|
type SeqSliceWorker func(BioSequenceSlice) (BioSequenceSlice, error)
|
2023-01-22 22:39:13 +01:00
|
|
|
|
2024-03-02 16:03:46 -04:00
|
|
|
func NilSeqWorker(seq *BioSequence) (BioSequenceSlice, error) {
|
|
|
|
return BioSequenceSlice{seq}, nil
|
2023-01-25 13:22:56 +01:00
|
|
|
}
|
|
|
|
|
2023-01-22 22:39:13 +01:00
|
|
|
func AnnotatorToSeqWorker(function SeqAnnotator) SeqWorker {
|
2024-03-02 16:03:46 -04:00
|
|
|
f := func(seq *BioSequence) (BioSequenceSlice, error) {
|
2023-01-22 22:39:13 +01:00
|
|
|
function(seq)
|
2024-03-02 16:03:46 -04:00
|
|
|
return BioSequenceSlice{seq}, nil
|
2023-01-22 22:39:13 +01:00
|
|
|
}
|
|
|
|
return f
|
|
|
|
}
|
|
|
|
|
2023-11-22 09:46:30 +01:00
|
|
|
func SeqToSliceWorker(worker SeqWorker,
|
|
|
|
inplace, breakOnError bool) SeqSliceWorker {
|
2023-01-25 13:22:56 +01:00
|
|
|
var f SeqSliceWorker
|
|
|
|
|
|
|
|
if worker == nil {
|
|
|
|
if inplace {
|
2024-03-02 16:03:46 -04:00
|
|
|
f = func(input BioSequenceSlice) (BioSequenceSlice, error) {
|
|
|
|
return input, nil
|
2023-01-25 13:22:56 +01:00
|
|
|
}
|
2023-11-22 09:46:30 +01:00
|
|
|
} else {
|
2024-03-02 16:03:46 -04:00
|
|
|
f = func(input BioSequenceSlice) (BioSequenceSlice, error) {
|
2023-01-25 13:22:56 +01:00
|
|
|
output := MakeBioSequenceSlice(len(input))
|
2023-11-22 09:46:30 +01:00
|
|
|
copy(output, input)
|
2024-03-02 16:03:46 -04:00
|
|
|
return output, nil
|
2023-11-22 09:46:30 +01:00
|
|
|
}
|
2023-01-25 13:22:56 +01:00
|
|
|
}
|
|
|
|
} else {
|
2024-03-02 16:03:46 -04:00
|
|
|
f = func(input BioSequenceSlice) (BioSequenceSlice, error) {
|
2023-01-25 13:22:56 +01:00
|
|
|
output := input
|
|
|
|
if !inplace {
|
|
|
|
output = MakeBioSequenceSlice(len(input))
|
|
|
|
}
|
2023-11-22 09:46:30 +01:00
|
|
|
i := 0
|
|
|
|
for _, s := range input {
|
2024-03-02 16:03:46 -04:00
|
|
|
r, err := worker(s)
|
|
|
|
if err == nil {
|
|
|
|
for _, rs := range r {
|
|
|
|
output[i] = rs
|
|
|
|
i++
|
|
|
|
if i == cap(output) {
|
|
|
|
slices.Grow(output, cap(output))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
} else {
|
|
|
|
if breakOnError {
|
|
|
|
err = fmt.Errorf("got an error on sequence %s processing : %v",
|
|
|
|
s.Id(), err)
|
|
|
|
return BioSequenceSlice{}, err
|
|
|
|
} else {
|
|
|
|
log.Warnf("got an error on sequence %s processing",
|
|
|
|
s.Id())
|
|
|
|
}
|
2023-11-22 09:46:30 +01:00
|
|
|
}
|
2023-01-25 13:22:56 +01:00
|
|
|
}
|
2023-11-22 09:46:30 +01:00
|
|
|
|
2024-03-02 16:03:46 -04:00
|
|
|
return output[0:i], nil
|
2023-11-22 09:46:30 +01:00
|
|
|
}
|
|
|
|
|
2023-01-25 13:22:56 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return f
|
|
|
|
}
|
|
|
|
|
2024-03-02 16:03:46 -04:00
|
|
|
func SeqToSliceConditionalWorker(
|
2023-01-25 13:22:56 +01:00
|
|
|
condition SequencePredicate,
|
2024-03-02 16:03:46 -04:00
|
|
|
worker SeqWorker,
|
2023-11-22 09:46:30 +01:00
|
|
|
inplace, breakOnError bool) SeqSliceWorker {
|
2023-01-25 13:22:56 +01:00
|
|
|
|
|
|
|
if condition == nil {
|
2023-11-22 09:46:30 +01:00
|
|
|
return SeqToSliceWorker(worker, inplace, breakOnError)
|
2023-01-25 13:22:56 +01:00
|
|
|
}
|
|
|
|
|
2024-03-02 16:03:46 -04:00
|
|
|
f := func(input BioSequenceSlice) (BioSequenceSlice, error) {
|
2023-01-22 22:39:13 +01:00
|
|
|
output := input
|
2023-01-25 13:22:56 +01:00
|
|
|
if !inplace {
|
|
|
|
output = MakeBioSequenceSlice(len(input))
|
2023-01-22 22:39:13 +01:00
|
|
|
}
|
2023-11-22 09:46:30 +01:00
|
|
|
|
|
|
|
i := 0
|
|
|
|
|
|
|
|
for _, s := range input {
|
2023-01-25 13:22:56 +01:00
|
|
|
if condition(s) {
|
2024-03-02 16:03:46 -04:00
|
|
|
r, err := worker(s)
|
|
|
|
if err == nil {
|
|
|
|
for _, rs := range r {
|
|
|
|
output[i] = rs
|
|
|
|
i++
|
|
|
|
if i == cap(output) {
|
|
|
|
slices.Grow(output, cap(output))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if breakOnError {
|
|
|
|
err = fmt.Errorf("got an error on sequence %s processing : %v",
|
|
|
|
s.Id(), err)
|
|
|
|
return BioSequenceSlice{}, err
|
|
|
|
} else {
|
|
|
|
log.Warnf("got an error on sequence %s processing",
|
|
|
|
s.Id())
|
|
|
|
}
|
2023-11-22 09:46:30 +01:00
|
|
|
}
|
2023-01-22 22:39:13 +01:00
|
|
|
}
|
2023-01-25 13:22:56 +01:00
|
|
|
}
|
2023-01-22 22:39:13 +01:00
|
|
|
|
2024-03-02 16:03:46 -04:00
|
|
|
return output[0:i], nil
|
2023-01-22 22:39:13 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return f
|
|
|
|
}
|
|
|
|
|
2023-01-25 13:22:56 +01:00
|
|
|
func (worker SeqWorker) ChainWorkers(next SeqWorker) SeqWorker {
|
|
|
|
if worker == nil {
|
|
|
|
return next
|
|
|
|
} else {
|
|
|
|
if next == nil {
|
|
|
|
return worker
|
|
|
|
}
|
2023-11-22 09:46:30 +01:00
|
|
|
}
|
2023-01-25 13:22:56 +01:00
|
|
|
|
2024-03-02 16:03:46 -04:00
|
|
|
sw := SeqToSliceWorker(next, true, false)
|
|
|
|
|
|
|
|
f := func(seq *BioSequence) (BioSequenceSlice, error) {
|
2023-11-22 09:46:30 +01:00
|
|
|
if seq == nil {
|
2024-03-02 16:03:46 -04:00
|
|
|
return BioSequenceSlice{}, nil
|
|
|
|
}
|
|
|
|
slice, err := worker(seq)
|
|
|
|
if err == nil {
|
|
|
|
slice, err = sw(slice)
|
2023-11-22 09:46:30 +01:00
|
|
|
}
|
2024-03-02 16:03:46 -04:00
|
|
|
return slice, err
|
2023-01-25 13:22:56 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return f
|
|
|
|
}
|