mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-12-08 16:50:27 +00:00
Adds Aho-Corasick matching to obiannotate
This commit is contained in:
@@ -1,6 +1,9 @@
|
||||
package obiannotate
|
||||
|
||||
import (
|
||||
"log"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obicorazick"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitax"
|
||||
@@ -118,6 +121,14 @@ func CLIAnnotationWorker() obiseq.SeqWorker {
|
||||
annotator = annotator.ChainWorkers(w)
|
||||
}
|
||||
|
||||
if CLIHasAhoCorasick() {
|
||||
patterns := CLIAhoCorazick()
|
||||
log.Println("Matching : ", len(patterns), " patterns on sequences")
|
||||
w := obicorazick.AhoCorazickWorker("aho_corasick", patterns)
|
||||
log.Println("Automata built")
|
||||
annotator = annotator.ChainWorkers(w)
|
||||
}
|
||||
|
||||
return annotator
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,11 @@
|
||||
package obiannotate
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obigrep"
|
||||
"github.com/DavidGamba/go-getoptions"
|
||||
@@ -15,6 +20,7 @@ var _tagList = ""
|
||||
var _clearAll = false
|
||||
var _setSeqLength = false
|
||||
var _uniqueID = false
|
||||
var _ahoCorazick = ""
|
||||
|
||||
func SequenceAnnotationOptionSet(options *getoptions.GetOpt) {
|
||||
// options.BoolVar(&_addRank, "seq-rank", _addRank,
|
||||
@@ -29,6 +35,8 @@ func SequenceAnnotationOptionSet(options *getoptions.GetOpt) {
|
||||
options.Description("Adds attribute with seq_length as a key and sequence length as a value."),
|
||||
)
|
||||
|
||||
options.StringVar(&_ahoCorazick, "aho-corasick", _ahoCorazick,
|
||||
options.Description("Adds an aho-corasick attribut with the count of matches of the provided patterns."))
|
||||
// options.BoolVar(&_uniqueID, "uniq-id", _uniqueID,
|
||||
// options.Description("Forces sequence record ids to be unique."),
|
||||
// )
|
||||
@@ -130,4 +138,29 @@ func CLIHasSetLengthFlag() bool {
|
||||
|
||||
func CLIHasClearAllFlag() bool {
|
||||
return _clearAll
|
||||
}
|
||||
}
|
||||
|
||||
func CLIHasAhoCorasick() bool {
|
||||
_, err := os.Stat(_ahoCorazick)
|
||||
return err == nil
|
||||
}
|
||||
|
||||
func CLIAhoCorazick() []string {
|
||||
content, err := ioutil.ReadFile(_ahoCorazick)
|
||||
if err != nil {
|
||||
log.Fatalln("Cannot open file ", _ahoCorazick)
|
||||
}
|
||||
lines := strings.Split(string(content), "\n")
|
||||
|
||||
j := 0
|
||||
for _, s := range lines {
|
||||
if len(s) > 0 {
|
||||
lines[j] = strings.ToLower(s)
|
||||
j++
|
||||
}
|
||||
}
|
||||
|
||||
lines = lines[0:j]
|
||||
|
||||
return lines
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user