mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Make the replace function of the eval language accepting regex
This commit is contained in:
@ -2,21 +2,75 @@ package obicorazick
|
||||
|
||||
import (
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"sync"
|
||||
"os"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
||||
"github.com/rrethy/ahocorasick"
|
||||
"github.com/schollz/progressbar/v3"
|
||||
)
|
||||
|
||||
func AhoCorazickWorker(slot string, patterns []string) obiseq.SeqWorker {
|
||||
|
||||
matcher := ahocorasick.CompileStrings(patterns)
|
||||
sizebatch:=10000000
|
||||
nmatcher := len(patterns) / sizebatch + 1
|
||||
log.Infof("Building AhoCorasick %d matcher for %d patterns in slot %s",
|
||||
nmatcher, len(patterns), slot)
|
||||
|
||||
if nmatcher == 0 {
|
||||
log.Errorln("No patterns provided")
|
||||
}
|
||||
|
||||
matchers := make([]*ahocorasick.Matcher, nmatcher)
|
||||
ieme := make(chan int)
|
||||
mutex := &sync.WaitGroup{}
|
||||
npar := min(obidefault.ParallelWorkers(), nmatcher)
|
||||
mutex.Add(npar)
|
||||
|
||||
pbopt := make([]progressbar.Option, 0, 5)
|
||||
pbopt = append(pbopt,
|
||||
progressbar.OptionSetWriter(os.Stderr),
|
||||
progressbar.OptionSetWidth(15),
|
||||
progressbar.OptionShowCount(),
|
||||
progressbar.OptionShowIts(),
|
||||
progressbar.OptionSetDescription("Building AhoCorasick matcher..."),
|
||||
)
|
||||
|
||||
bar := progressbar.NewOptions(nmatcher, pbopt...)
|
||||
bar.Add(0)
|
||||
|
||||
builder := func() {
|
||||
for i := range ieme {
|
||||
matchers[i] = ahocorasick.CompileStrings(patterns[i*sizebatch:min((i+1)*sizebatch,len(patterns))])
|
||||
bar.Add(1)
|
||||
}
|
||||
mutex.Done()
|
||||
}
|
||||
|
||||
for i := 0; i < npar; i++ {
|
||||
go builder()
|
||||
}
|
||||
|
||||
for i := 0; i < nmatcher; i++ {
|
||||
ieme <- i
|
||||
}
|
||||
|
||||
close(ieme)
|
||||
mutex.Wait()
|
||||
|
||||
fslot := slot + "_Fwd"
|
||||
rslot := slot + "_Rev"
|
||||
|
||||
f := func(s *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
|
||||
matchesF := len(matcher.FindAllByteSlice(s.Sequence()))
|
||||
matchesR := len(matcher.FindAllByteSlice(s.ReverseComplement(false).Sequence()))
|
||||
matchesF := 0
|
||||
matchesR := 0
|
||||
b := s.Sequence()
|
||||
bc := s.ReverseComplement(false).Sequence()
|
||||
|
||||
for _, matcher := range matchers {
|
||||
matchesF += len(matcher.FindAllByteSlice(b))
|
||||
matchesR += len(matcher.FindAllByteSlice(bc))
|
||||
}
|
||||
|
||||
log.Debugln("Macthes = ", matchesF, matchesR)
|
||||
matches := matchesF + matchesR
|
||||
|
Reference in New Issue
Block a user