2022-02-01 23:25:19 +01:00
|
|
|
package obiseq
|
|
|
|
|
2022-02-24 07:08:40 +01:00
|
|
|
import (
|
|
|
|
"context"
|
2022-02-25 07:29:52 +01:00
|
|
|
"fmt"
|
|
|
|
"regexp"
|
|
|
|
|
2022-11-17 11:09:58 +01:00
|
|
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obieval"
|
2022-02-24 12:14:52 +01:00
|
|
|
log "github.com/sirupsen/logrus"
|
2022-02-24 07:08:40 +01:00
|
|
|
)
|
|
|
|
|
2022-02-21 19:00:23 +01:00
|
|
|
type SequencePredicate func(*BioSequence) bool
|
2022-02-01 23:25:19 +01:00
|
|
|
|
|
|
|
func (predicate1 SequencePredicate) And(predicate2 SequencePredicate) SequencePredicate {
|
2022-02-25 07:29:52 +01:00
|
|
|
switch {
|
|
|
|
case predicate1 == nil:
|
|
|
|
return predicate2
|
|
|
|
case predicate2 == nil:
|
|
|
|
return predicate1
|
|
|
|
default:
|
|
|
|
return func(sequence *BioSequence) bool {
|
2022-09-28 16:13:20 +02:00
|
|
|
if !predicate1(sequence) {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
return predicate2(sequence)
|
2022-02-25 07:29:52 +01:00
|
|
|
}
|
2022-02-01 23:25:19 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (predicate1 SequencePredicate) Or(predicate2 SequencePredicate) SequencePredicate {
|
2022-02-25 07:29:52 +01:00
|
|
|
switch {
|
|
|
|
case predicate1 == nil:
|
|
|
|
return predicate2
|
|
|
|
case predicate2 == nil:
|
|
|
|
return predicate1
|
|
|
|
default:
|
|
|
|
return func(sequence *BioSequence) bool {
|
2022-09-28 16:13:20 +02:00
|
|
|
if predicate1(sequence) {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
return predicate2(sequence)
|
2022-02-25 07:29:52 +01:00
|
|
|
}
|
2022-02-01 23:25:19 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (predicate1 SequencePredicate) Xor(predicate2 SequencePredicate) SequencePredicate {
|
2022-02-25 07:29:52 +01:00
|
|
|
switch {
|
|
|
|
case predicate1 == nil:
|
|
|
|
return predicate2
|
|
|
|
case predicate2 == nil:
|
|
|
|
return predicate1
|
|
|
|
default:
|
|
|
|
return func(sequence *BioSequence) bool {
|
|
|
|
p1 := predicate1(sequence)
|
|
|
|
p2 := predicate2(sequence)
|
|
|
|
return (p1 && !p2) || (p2 && !p1)
|
|
|
|
}
|
2022-02-01 23:25:19 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (predicate1 SequencePredicate) Not() SequencePredicate {
|
2022-02-25 07:29:52 +01:00
|
|
|
switch {
|
|
|
|
case predicate1 == nil:
|
|
|
|
return nil
|
|
|
|
default:
|
|
|
|
return func(sequence *BioSequence) bool {
|
|
|
|
return !predicate1(sequence)
|
|
|
|
}
|
2022-02-01 23:25:19 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func HasAttribute(name string) SequencePredicate {
|
|
|
|
|
2022-02-21 19:00:23 +01:00
|
|
|
f := func(sequence *BioSequence) bool {
|
2022-02-01 23:25:19 +01:00
|
|
|
if sequence.HasAnnotation() {
|
|
|
|
_, ok := (sequence.Annotations())[name]
|
|
|
|
return ok
|
|
|
|
}
|
|
|
|
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
return f
|
|
|
|
}
|
|
|
|
|
2022-02-25 07:29:52 +01:00
|
|
|
func IsAttributeMatch(name string, pattern string) SequencePredicate {
|
|
|
|
pat, err := regexp.Compile(pattern)
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
log.Fatalf("error in atribute %s regular pattern syntax : %v", name, err)
|
|
|
|
}
|
|
|
|
|
|
|
|
f := func(sequence *BioSequence) bool {
|
|
|
|
if sequence.HasAnnotation() {
|
|
|
|
val, ok := (sequence.Annotations())[name]
|
|
|
|
if ok {
|
|
|
|
switch val := val.(type) {
|
|
|
|
case string:
|
|
|
|
return pat.MatchString(val)
|
|
|
|
default:
|
|
|
|
return pat.MatchString(fmt.Sprint(val))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
return f
|
|
|
|
}
|
|
|
|
|
|
|
|
func IsMoreAbundantOrEqualTo(count int) SequencePredicate {
|
2022-02-21 19:00:23 +01:00
|
|
|
f := func(sequence *BioSequence) bool {
|
2022-02-25 07:29:52 +01:00
|
|
|
return sequence.Count() >= count
|
|
|
|
}
|
|
|
|
|
|
|
|
return f
|
|
|
|
}
|
|
|
|
|
|
|
|
func IsLessAbundantOrEqualTo(count int) SequencePredicate {
|
|
|
|
f := func(sequence *BioSequence) bool {
|
|
|
|
return sequence.Count() <= count
|
2022-02-01 23:25:19 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return f
|
|
|
|
}
|
|
|
|
|
|
|
|
func IsLongerOrEqualTo(length int) SequencePredicate {
|
2022-02-21 19:00:23 +01:00
|
|
|
f := func(sequence *BioSequence) bool {
|
2022-11-17 11:09:58 +01:00
|
|
|
return sequence.Len() >= length
|
2022-02-01 23:25:19 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return f
|
|
|
|
}
|
|
|
|
|
|
|
|
func IsShorterOrEqualTo(length int) SequencePredicate {
|
2022-02-21 19:00:23 +01:00
|
|
|
f := func(sequence *BioSequence) bool {
|
2022-11-17 11:09:58 +01:00
|
|
|
return sequence.Len() <= length
|
2022-02-01 23:25:19 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return f
|
|
|
|
}
|
2022-02-24 07:08:40 +01:00
|
|
|
|
2022-02-25 07:29:52 +01:00
|
|
|
func IsSequenceMatch(pattern string) SequencePredicate {
|
|
|
|
pat, err := regexp.Compile("(?i)" + pattern)
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
log.Fatalf("error in sequence regular pattern syntax : %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
f := func(sequence *BioSequence) bool {
|
|
|
|
return pat.Match(sequence.Sequence())
|
|
|
|
}
|
|
|
|
|
|
|
|
return f
|
|
|
|
}
|
|
|
|
|
|
|
|
func IsDefinitionMatch(pattern string) SequencePredicate {
|
|
|
|
pat, err := regexp.Compile(pattern)
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
log.Fatalf("error in definition regular pattern syntax : %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
f := func(sequence *BioSequence) bool {
|
|
|
|
return pat.MatchString(sequence.Definition())
|
|
|
|
}
|
|
|
|
|
|
|
|
return f
|
|
|
|
}
|
|
|
|
|
|
|
|
func IsIdMatch(pattern string) SequencePredicate {
|
|
|
|
pat, err := regexp.Compile(pattern)
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
log.Fatalf("error in identifier regular pattern syntax : %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
f := func(sequence *BioSequence) bool {
|
|
|
|
return pat.MatchString(sequence.Id())
|
|
|
|
}
|
|
|
|
|
|
|
|
return f
|
|
|
|
}
|
|
|
|
|
|
|
|
func IsIdIn(ids ...string) SequencePredicate {
|
|
|
|
idset := make(map[string]bool)
|
|
|
|
|
|
|
|
for _, v := range ids {
|
|
|
|
idset[v] = true
|
|
|
|
}
|
|
|
|
|
|
|
|
f := func(sequence *BioSequence) bool {
|
|
|
|
_, ok := idset[sequence.Id()]
|
|
|
|
return ok
|
|
|
|
}
|
|
|
|
|
|
|
|
return f
|
|
|
|
}
|
|
|
|
|
|
|
|
func ExpressionPredicat(expression string) SequencePredicate {
|
2022-02-24 07:08:40 +01:00
|
|
|
|
2022-11-17 11:09:58 +01:00
|
|
|
exp, err := obieval.OBILang.NewEvaluable(expression)
|
2022-02-24 07:08:40 +01:00
|
|
|
if err != nil {
|
|
|
|
log.Fatalf("Error in the expression : %s", expression)
|
|
|
|
}
|
|
|
|
|
|
|
|
f := func(sequence *BioSequence) bool {
|
|
|
|
value, err := exp.EvalBool(context.Background(),
|
|
|
|
map[string]interface{}{
|
2022-09-28 16:13:20 +02:00
|
|
|
"annot": sequence.Annotations(),
|
|
|
|
"count": sequence.Count(),
|
2022-11-17 11:09:58 +01:00
|
|
|
"seqlength": sequence.Len(),
|
2022-09-28 16:13:20 +02:00
|
|
|
"sequence": sequence,
|
2022-02-24 07:08:40 +01:00
|
|
|
},
|
|
|
|
)
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
log.Fatalf("Expression '%s' cannot be evaluated on sequence %s",
|
|
|
|
expression,
|
|
|
|
sequence.Id())
|
|
|
|
}
|
|
|
|
|
|
|
|
return value
|
|
|
|
}
|
|
|
|
|
|
|
|
return f
|
|
|
|
}
|