mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Make the replace function of the eval language accepting regex
This commit is contained in:
@ -2,21 +2,75 @@ package obicorazick
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
"sync"
|
||||||
|
"os"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
||||||
"github.com/rrethy/ahocorasick"
|
"github.com/rrethy/ahocorasick"
|
||||||
|
"github.com/schollz/progressbar/v3"
|
||||||
)
|
)
|
||||||
|
|
||||||
func AhoCorazickWorker(slot string, patterns []string) obiseq.SeqWorker {
|
func AhoCorazickWorker(slot string, patterns []string) obiseq.SeqWorker {
|
||||||
|
|
||||||
matcher := ahocorasick.CompileStrings(patterns)
|
sizebatch:=10000000
|
||||||
|
nmatcher := len(patterns) / sizebatch + 1
|
||||||
|
log.Infof("Building AhoCorasick %d matcher for %d patterns in slot %s",
|
||||||
|
nmatcher, len(patterns), slot)
|
||||||
|
|
||||||
|
if nmatcher == 0 {
|
||||||
|
log.Errorln("No patterns provided")
|
||||||
|
}
|
||||||
|
|
||||||
|
matchers := make([]*ahocorasick.Matcher, nmatcher)
|
||||||
|
ieme := make(chan int)
|
||||||
|
mutex := &sync.WaitGroup{}
|
||||||
|
npar := min(obidefault.ParallelWorkers(), nmatcher)
|
||||||
|
mutex.Add(npar)
|
||||||
|
|
||||||
|
pbopt := make([]progressbar.Option, 0, 5)
|
||||||
|
pbopt = append(pbopt,
|
||||||
|
progressbar.OptionSetWriter(os.Stderr),
|
||||||
|
progressbar.OptionSetWidth(15),
|
||||||
|
progressbar.OptionShowCount(),
|
||||||
|
progressbar.OptionShowIts(),
|
||||||
|
progressbar.OptionSetDescription("Building AhoCorasick matcher..."),
|
||||||
|
)
|
||||||
|
|
||||||
|
bar := progressbar.NewOptions(nmatcher, pbopt...)
|
||||||
|
bar.Add(0)
|
||||||
|
|
||||||
|
builder := func() {
|
||||||
|
for i := range ieme {
|
||||||
|
matchers[i] = ahocorasick.CompileStrings(patterns[i*sizebatch:min((i+1)*sizebatch,len(patterns))])
|
||||||
|
bar.Add(1)
|
||||||
|
}
|
||||||
|
mutex.Done()
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := 0; i < npar; i++ {
|
||||||
|
go builder()
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := 0; i < nmatcher; i++ {
|
||||||
|
ieme <- i
|
||||||
|
}
|
||||||
|
|
||||||
|
close(ieme)
|
||||||
|
mutex.Wait()
|
||||||
|
|
||||||
fslot := slot + "_Fwd"
|
fslot := slot + "_Fwd"
|
||||||
rslot := slot + "_Rev"
|
rslot := slot + "_Rev"
|
||||||
|
|
||||||
f := func(s *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
|
f := func(s *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
|
||||||
matchesF := len(matcher.FindAllByteSlice(s.Sequence()))
|
matchesF := 0
|
||||||
matchesR := len(matcher.FindAllByteSlice(s.ReverseComplement(false).Sequence()))
|
matchesR := 0
|
||||||
|
b := s.Sequence()
|
||||||
|
bc := s.ReverseComplement(false).Sequence()
|
||||||
|
|
||||||
|
for _, matcher := range matchers {
|
||||||
|
matchesF += len(matcher.FindAllByteSlice(b))
|
||||||
|
matchesR += len(matcher.FindAllByteSlice(bc))
|
||||||
|
}
|
||||||
|
|
||||||
log.Debugln("Macthes = ", matchesF, matchesR)
|
log.Debugln("Macthes = ", matchesF, matchesR)
|
||||||
matches := matchesF + matchesR
|
matches := matchesF + matchesR
|
||||||
|
@ -8,7 +8,7 @@ import (
|
|||||||
// corresponds to the last commit, and not the one when the file will be
|
// corresponds to the last commit, and not the one when the file will be
|
||||||
// commited
|
// commited
|
||||||
|
|
||||||
var _Commit = "67e5b6e"
|
var _Commit = "0aec5ba"
|
||||||
var _Version = "Release 4.4.0"
|
var _Version = "Release 4.4.0"
|
||||||
|
|
||||||
// Version returns the version of the obitools package.
|
// Version returns the version of the obitools package.
|
||||||
|
@ -3,6 +3,7 @@ package obiseq
|
|||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"reflect"
|
"reflect"
|
||||||
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
@ -204,6 +205,8 @@ var OBILang = gval.NewLanguage(
|
|||||||
return scomp, nil
|
return scomp, nil
|
||||||
}),
|
}),
|
||||||
gval.Function("replace", func(args ...interface{}) (interface{}, error) {
|
gval.Function("replace", func(args ...interface{}) (interface{}, error) {
|
||||||
return strings.ReplaceAll(args[0].(string), args[1].(string), args[2].(string)), nil
|
pattern := regexp.MustCompile(args[1].(string))
|
||||||
|
results := pattern.ReplaceAllString(args[0].(string), args[2].(string))
|
||||||
|
return results, nil
|
||||||
}),
|
}),
|
||||||
)
|
)
|
||||||
|
@ -3,6 +3,7 @@ package obiutils
|
|||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"reflect"
|
"reflect"
|
||||||
|
"strconv"
|
||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
@ -125,6 +126,12 @@ func InterfaceToInt(i interface{}) (val int, err error) {
|
|||||||
val = int(t) // standardizes across systems
|
val = int(t) // standardizes across systems
|
||||||
case uint64:
|
case uint64:
|
||||||
val = int(t) // standardizes across systems
|
val = int(t) // standardizes across systems
|
||||||
|
case string:
|
||||||
|
rep, err := strconv.ParseInt(t, 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
err = &NotAnFloat64{"value attribute cannot be casted to an int value"}
|
||||||
|
}
|
||||||
|
val = int(rep)
|
||||||
default:
|
default:
|
||||||
err = &NotAnInteger{"value attribute cannot be casted to an integer"}
|
err = &NotAnInteger{"value attribute cannot be casted to an integer"}
|
||||||
}
|
}
|
||||||
@ -162,6 +169,11 @@ func InterfaceToFloat64(i interface{}) (val float64, err error) {
|
|||||||
val = float64(t) // standardizes across systems
|
val = float64(t) // standardizes across systems
|
||||||
case uint64:
|
case uint64:
|
||||||
val = float64(t) // standardizes across systems
|
val = float64(t) // standardizes across systems
|
||||||
|
case string:
|
||||||
|
val, err = strconv.ParseFloat(t, 10)
|
||||||
|
if err != nil {
|
||||||
|
err = &NotAnFloat64{"value attribute cannot be casted to a float value"}
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
err = &NotAnFloat64{"value attribute cannot be casted to a float value"}
|
err = &NotAnFloat64{"value attribute cannot be casted to a float value"}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user