Files
obitools4/pkg/obicorazick/worker.go

46 lines
1.1 KiB
Go
Raw Normal View History

package obicorazick
import (
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
"github.com/rrethy/ahocorasick"
)
func AhoCorazickWorker(slot string, patterns []string) obiseq.SeqWorker {
matcher := ahocorasick.CompileStrings(patterns)
fslot := slot + "_Fwd"
rslot := slot + "_Rev"
f := func(s *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
matchesF := len(matcher.FindAllByteSlice(s.Sequence()))
matchesR := len(matcher.FindAllByteSlice(s.ReverseComplement(false).Sequence()))
log.Debugln("Macthes = ", matchesF, matchesR)
matches := matchesF + matchesR
if matches > 0 {
s.SetAttribute(slot, matches)
s.SetAttribute(fslot, matchesF)
s.SetAttribute(rslot, matchesR)
}
return obiseq.BioSequenceSlice{s}, nil
}
return f
}
func AhoCorazickPredicate(minMatches int, patterns []string) obiseq.SequencePredicate {
matcher := ahocorasick.CompileStrings(patterns)
f := func(s *obiseq.BioSequence) bool {
matches := matcher.FindAllByteSlice(s.Sequence())
return len(matches) >= minMatches
}
return f
}