Make the replace function of the eval language accepting regex

This commit is contained in:
Eric Coissac
2025-04-10 15:17:15 +02:00
parent c2f38e737b
commit a57cfda675
4 changed files with 75 additions and 6 deletions

View File

@ -2,21 +2,75 @@ package obicorazick
import ( import (
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
"sync"
"os"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
"github.com/rrethy/ahocorasick" "github.com/rrethy/ahocorasick"
"github.com/schollz/progressbar/v3"
) )
func AhoCorazickWorker(slot string, patterns []string) obiseq.SeqWorker { func AhoCorazickWorker(slot string, patterns []string) obiseq.SeqWorker {
matcher := ahocorasick.CompileStrings(patterns) sizebatch:=10000000
nmatcher := len(patterns) / sizebatch + 1
log.Infof("Building AhoCorasick %d matcher for %d patterns in slot %s",
nmatcher, len(patterns), slot)
if nmatcher == 0 {
log.Errorln("No patterns provided")
}
matchers := make([]*ahocorasick.Matcher, nmatcher)
ieme := make(chan int)
mutex := &sync.WaitGroup{}
npar := min(obidefault.ParallelWorkers(), nmatcher)
mutex.Add(npar)
pbopt := make([]progressbar.Option, 0, 5)
pbopt = append(pbopt,
progressbar.OptionSetWriter(os.Stderr),
progressbar.OptionSetWidth(15),
progressbar.OptionShowCount(),
progressbar.OptionShowIts(),
progressbar.OptionSetDescription("Building AhoCorasick matcher..."),
)
bar := progressbar.NewOptions(nmatcher, pbopt...)
bar.Add(0)
builder := func() {
for i := range ieme {
matchers[i] = ahocorasick.CompileStrings(patterns[i*sizebatch:min((i+1)*sizebatch,len(patterns))])
bar.Add(1)
}
mutex.Done()
}
for i := 0; i < npar; i++ {
go builder()
}
for i := 0; i < nmatcher; i++ {
ieme <- i
}
close(ieme)
mutex.Wait()
fslot := slot + "_Fwd" fslot := slot + "_Fwd"
rslot := slot + "_Rev" rslot := slot + "_Rev"
f := func(s *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) { f := func(s *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
matchesF := len(matcher.FindAllByteSlice(s.Sequence())) matchesF := 0
matchesR := len(matcher.FindAllByteSlice(s.ReverseComplement(false).Sequence())) matchesR := 0
b := s.Sequence()
bc := s.ReverseComplement(false).Sequence()
for _, matcher := range matchers {
matchesF += len(matcher.FindAllByteSlice(b))
matchesR += len(matcher.FindAllByteSlice(bc))
}
log.Debugln("Macthes = ", matchesF, matchesR) log.Debugln("Macthes = ", matchesF, matchesR)
matches := matchesF + matchesR matches := matchesF + matchesR

View File

@ -8,7 +8,7 @@ import (
// corresponds to the last commit, and not the one when the file will be // corresponds to the last commit, and not the one when the file will be
// commited // commited
var _Commit = "67e5b6e" var _Commit = "0aec5ba"
var _Version = "Release 4.4.0" var _Version = "Release 4.4.0"
// Version returns the version of the obitools package. // Version returns the version of the obitools package.

View File

@ -3,6 +3,7 @@ package obiseq
import ( import (
"fmt" "fmt"
"reflect" "reflect"
"regexp"
"strings" "strings"
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
@ -204,6 +205,8 @@ var OBILang = gval.NewLanguage(
return scomp, nil return scomp, nil
}), }),
gval.Function("replace", func(args ...interface{}) (interface{}, error) { gval.Function("replace", func(args ...interface{}) (interface{}, error) {
return strings.ReplaceAll(args[0].(string), args[1].(string), args[2].(string)), nil pattern := regexp.MustCompile(args[1].(string))
results := pattern.ReplaceAllString(args[0].(string), args[2].(string))
return results, nil
}), }),
) )

View File

@ -3,6 +3,7 @@ package obiutils
import ( import (
"fmt" "fmt"
"reflect" "reflect"
"strconv"
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
) )
@ -125,6 +126,12 @@ func InterfaceToInt(i interface{}) (val int, err error) {
val = int(t) // standardizes across systems val = int(t) // standardizes across systems
case uint64: case uint64:
val = int(t) // standardizes across systems val = int(t) // standardizes across systems
case string:
rep, err := strconv.ParseInt(t, 10, 64)
if err != nil {
err = &NotAnFloat64{"value attribute cannot be casted to an int value"}
}
val = int(rep)
default: default:
err = &NotAnInteger{"value attribute cannot be casted to an integer"} err = &NotAnInteger{"value attribute cannot be casted to an integer"}
} }
@ -162,6 +169,11 @@ func InterfaceToFloat64(i interface{}) (val float64, err error) {
val = float64(t) // standardizes across systems val = float64(t) // standardizes across systems
case uint64: case uint64:
val = float64(t) // standardizes across systems val = float64(t) // standardizes across systems
case string:
val, err = strconv.ParseFloat(t, 10)
if err != nil {
err = &NotAnFloat64{"value attribute cannot be casted to a float value"}
}
default: default:
err = &NotAnFloat64{"value attribute cannot be casted to a float value"} err = &NotAnFloat64{"value attribute cannot be casted to a float value"}
} }