Change path of the obitools pkg

Former-commit-id: 311cbf8df3b990b393c6f4885d62e74564423b65
This commit is contained in:
2023-11-29 12:14:37 +01:00
parent 72ad84c2d4
commit 8d77cc4133
122 changed files with 1004 additions and 544 deletions

View File

@@ -1,14 +1,17 @@
package obiannotate
import (
"fmt"
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obicorazick"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitax"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obigrep"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiapat"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obicorazick"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obigrep"
)
func DeleteAttributesWorker(toBeDeleted []string) obiseq.SeqWorker {
@@ -22,14 +25,69 @@ func DeleteAttributesWorker(toBeDeleted []string) obiseq.SeqWorker {
return f
}
// func MatchPatternWorker(pattern string, errormax int, allowsIndel bool) obiseq.SeqWorker {
// pat, err := obiapat.MakeApatPattern(pattern, errormax, allowsIndel)
// f := func(s *obiseq.BioSequence) *obiseq.BioSequence {
// apats := obiapat.MakeApatSequence(s, false)
// pat.BestMatch(apats, 0)
// return s
// }
// }
func MatchPatternWorker(pattern, name string, errormax int, allowsIndel bool) obiseq.SeqWorker {
pat, err := obiapat.MakeApatPattern(pattern, errormax, allowsIndel)
if err != nil {
log.Fatalf("error in compiling pattern (%s) : %v", pattern, err)
}
cpat, err := pat.ReverseComplement()
if err != nil {
log.Fatalf("error in reverse-complementing pattern (%s) : %v", pattern, err)
}
slot := "pattern"
if name != "pattern" && name != "" {
slot = fmt.Sprintf("%s_pattern", name)
} else {
name = "pattern"
}
slot_match := fmt.Sprintf("%s_match", name)
slot_error := fmt.Sprintf("%s_error", name)
slot_location := fmt.Sprintf("%s_location", name)
f := func(s *obiseq.BioSequence) *obiseq.BioSequence {
apats, err := obiapat.MakeApatSequence(s, false)
if err != nil {
log.Fatalf("error in preparing sequence %s : %v", s.Id(), err)
}
start, end, nerr, matched := pat.BestMatch(apats, 0, s.Len())
if matched {
annot := s.Annotations()
annot[slot] = pattern
match, err := s.Subsequence(start, end, false)
if err != nil {
log.Fatalf("Error in extracting pattern of sequence %s [%d;%d[ : %v",
s.Id(), start, end, err)
}
annot[slot_match] = match.String()
annot[slot_error] = nerr
annot[slot_location] = fmt.Sprintf("%d..%d", start+1, end)
} else {
start, end, nerr, matched := cpat.BestMatch(apats, 0, s.Len())
if matched {
annot := s.Annotations()
annot[slot] = pattern
match, err := s.Subsequence(start, end, false)
if err != nil {
log.Fatalf("Error in extracting pattern of sequence %s [%d;%d[ : %v",
s.Id(), start, end, err)
}
annot[slot_match] = match.ReverseComplement(true).String()
annot[slot_error] = nerr
annot[slot_location] = fmt.Sprintf("complement(%d..%d)", start+1, end)
}
}
return s
}
return f
}
func ToBeKeptAttributesWorker(toBeKept []string) obiseq.SeqWorker {
@@ -229,6 +287,14 @@ func CLIAnnotationWorker() obiseq.SeqWorker {
annotator = annotator.ChainWorkers(w)
}
if CLIHasPattern() {
log.Infof("Match pattern %s with %d error", CLIPattern(), CLIPatternError())
w := MatchPatternWorker(CLIPattern(), CLIHasPatternName(),
CLIPatternError(), CLIPatternInDels())
annotator = annotator.ChainWorkers(w)
}
return annotator
}

View File

@@ -7,8 +7,8 @@ import (
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obigrep"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obigrep"
"github.com/DavidGamba/go-getoptions"
)
@@ -24,6 +24,9 @@ var _setSeqLength = false
var _uniqueID = false
var _ahoCorazick = ""
var _pattern = ""
var _pattern_error = 0
var _pattern_indel = false
var _pattern_name = "pattern"
var _lcaSlot = ""
var _lcaError = 0.0
var _setId = ""
@@ -52,6 +55,18 @@ func SequenceAnnotationOptionSet(options *getoptions.GetOpt) {
"to the sequence.",
))
options.StringVar(&_pattern_name, "pattern-name", _pattern_name,
options.Description("specify the name to use as prefix for the slots reporting the match"),
)
options.IntVar(&_pattern_error, "pattern-error", _pattern_error,
options.Description("Maximum number of allowed error during pattern matching"),
)
options.BoolVar(&_pattern_indel, "allows-indels", _pattern_indel,
options.Description("Allows for indel during pattern matching"),
)
options.StringVar(&_lcaSlot, "add-lca-in", _lcaSlot,
options.ArgName("SLOT_NAME"),
options.Description("From the taxonomic annotation of the sequence (taxid slot or merged_taxid slot), "+
@@ -264,3 +279,23 @@ func CLIHasCut() bool {
return f != 0 && t != 0
}
func CLIPattern() string {
return _pattern
}
func CLIHasPattern() bool {
return _pattern != ""
}
func CLIHasPatternName() string {
return _pattern_name
}
func CLIPatternError() int {
return _pattern_error
}
func CLIPatternInDels() bool {
return _pattern_indel
}