mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
46 lines
1.1 KiB
Go
46 lines
1.1 KiB
Go
package obicorazick
|
|
|
|
import (
|
|
log "github.com/sirupsen/logrus"
|
|
|
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
|
"github.com/rrethy/ahocorasick"
|
|
)
|
|
|
|
func AhoCorazickWorker(slot string, patterns []string) obiseq.SeqWorker {
|
|
|
|
matcher := ahocorasick.CompileStrings(patterns)
|
|
|
|
fslot := slot + "_Fwd"
|
|
rslot := slot + "_Rev"
|
|
|
|
f := func(s *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
|
|
matchesF := len(matcher.FindAllByteSlice(s.Sequence()))
|
|
matchesR := len(matcher.FindAllByteSlice(s.ReverseComplement(false).Sequence()))
|
|
|
|
log.Debugln("Macthes = ", matchesF, matchesR)
|
|
matches := matchesF + matchesR
|
|
if matches > 0 {
|
|
s.SetAttribute(slot, matches)
|
|
s.SetAttribute(fslot, matchesF)
|
|
s.SetAttribute(rslot, matchesR)
|
|
}
|
|
|
|
return obiseq.BioSequenceSlice{s}, nil
|
|
}
|
|
|
|
return f
|
|
}
|
|
|
|
func AhoCorazickPredicate(minMatches int, patterns []string) obiseq.SequencePredicate {
|
|
|
|
matcher := ahocorasick.CompileStrings(patterns)
|
|
|
|
f := func(s *obiseq.BioSequence) bool {
|
|
matches := matcher.FindAllByteSlice(s.Sequence())
|
|
return len(matches) >= minMatches
|
|
}
|
|
|
|
return f
|
|
}
|