mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Make the replace function of the eval language accepting regex
This commit is contained in:
@ -2,21 +2,75 @@ package obicorazick
|
||||
|
||||
import (
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"sync"
|
||||
"os"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
||||
"github.com/rrethy/ahocorasick"
|
||||
"github.com/schollz/progressbar/v3"
|
||||
)
|
||||
|
||||
func AhoCorazickWorker(slot string, patterns []string) obiseq.SeqWorker {
|
||||
|
||||
matcher := ahocorasick.CompileStrings(patterns)
|
||||
sizebatch:=10000000
|
||||
nmatcher := len(patterns) / sizebatch + 1
|
||||
log.Infof("Building AhoCorasick %d matcher for %d patterns in slot %s",
|
||||
nmatcher, len(patterns), slot)
|
||||
|
||||
if nmatcher == 0 {
|
||||
log.Errorln("No patterns provided")
|
||||
}
|
||||
|
||||
matchers := make([]*ahocorasick.Matcher, nmatcher)
|
||||
ieme := make(chan int)
|
||||
mutex := &sync.WaitGroup{}
|
||||
npar := min(obidefault.ParallelWorkers(), nmatcher)
|
||||
mutex.Add(npar)
|
||||
|
||||
pbopt := make([]progressbar.Option, 0, 5)
|
||||
pbopt = append(pbopt,
|
||||
progressbar.OptionSetWriter(os.Stderr),
|
||||
progressbar.OptionSetWidth(15),
|
||||
progressbar.OptionShowCount(),
|
||||
progressbar.OptionShowIts(),
|
||||
progressbar.OptionSetDescription("Building AhoCorasick matcher..."),
|
||||
)
|
||||
|
||||
bar := progressbar.NewOptions(nmatcher, pbopt...)
|
||||
bar.Add(0)
|
||||
|
||||
builder := func() {
|
||||
for i := range ieme {
|
||||
matchers[i] = ahocorasick.CompileStrings(patterns[i*sizebatch:min((i+1)*sizebatch,len(patterns))])
|
||||
bar.Add(1)
|
||||
}
|
||||
mutex.Done()
|
||||
}
|
||||
|
||||
for i := 0; i < npar; i++ {
|
||||
go builder()
|
||||
}
|
||||
|
||||
for i := 0; i < nmatcher; i++ {
|
||||
ieme <- i
|
||||
}
|
||||
|
||||
close(ieme)
|
||||
mutex.Wait()
|
||||
|
||||
fslot := slot + "_Fwd"
|
||||
rslot := slot + "_Rev"
|
||||
|
||||
f := func(s *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
|
||||
matchesF := len(matcher.FindAllByteSlice(s.Sequence()))
|
||||
matchesR := len(matcher.FindAllByteSlice(s.ReverseComplement(false).Sequence()))
|
||||
matchesF := 0
|
||||
matchesR := 0
|
||||
b := s.Sequence()
|
||||
bc := s.ReverseComplement(false).Sequence()
|
||||
|
||||
for _, matcher := range matchers {
|
||||
matchesF += len(matcher.FindAllByteSlice(b))
|
||||
matchesR += len(matcher.FindAllByteSlice(bc))
|
||||
}
|
||||
|
||||
log.Debugln("Macthes = ", matchesF, matchesR)
|
||||
matches := matchesF + matchesR
|
||||
|
@ -8,7 +8,7 @@ import (
|
||||
// corresponds to the last commit, and not the one when the file will be
|
||||
// commited
|
||||
|
||||
var _Commit = "67e5b6e"
|
||||
var _Commit = "0aec5ba"
|
||||
var _Version = "Release 4.4.0"
|
||||
|
||||
// Version returns the version of the obitools package.
|
||||
|
@ -3,6 +3,7 @@ package obiseq
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
@ -204,6 +205,8 @@ var OBILang = gval.NewLanguage(
|
||||
return scomp, nil
|
||||
}),
|
||||
gval.Function("replace", func(args ...interface{}) (interface{}, error) {
|
||||
return strings.ReplaceAll(args[0].(string), args[1].(string), args[2].(string)), nil
|
||||
pattern := regexp.MustCompile(args[1].(string))
|
||||
results := pattern.ReplaceAllString(args[0].(string), args[2].(string))
|
||||
return results, nil
|
||||
}),
|
||||
)
|
||||
|
@ -3,6 +3,7 @@ package obiutils
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
"strconv"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
@ -125,6 +126,12 @@ func InterfaceToInt(i interface{}) (val int, err error) {
|
||||
val = int(t) // standardizes across systems
|
||||
case uint64:
|
||||
val = int(t) // standardizes across systems
|
||||
case string:
|
||||
rep, err := strconv.ParseInt(t, 10, 64)
|
||||
if err != nil {
|
||||
err = &NotAnFloat64{"value attribute cannot be casted to an int value"}
|
||||
}
|
||||
val = int(rep)
|
||||
default:
|
||||
err = &NotAnInteger{"value attribute cannot be casted to an integer"}
|
||||
}
|
||||
@ -162,6 +169,11 @@ func InterfaceToFloat64(i interface{}) (val float64, err error) {
|
||||
val = float64(t) // standardizes across systems
|
||||
case uint64:
|
||||
val = float64(t) // standardizes across systems
|
||||
case string:
|
||||
val, err = strconv.ParseFloat(t, 10)
|
||||
if err != nil {
|
||||
err = &NotAnFloat64{"value attribute cannot be casted to a float value"}
|
||||
}
|
||||
default:
|
||||
err = &NotAnFloat64{"value attribute cannot be casted to a float value"}
|
||||
}
|
||||
|
Reference in New Issue
Block a user