From f74c0bd517fadfc48ce4092dbb4e807d6268ba59 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Fri, 27 Jan 2023 11:35:39 +0100 Subject: [PATCH] First prelease of obiannotate --- cmd/obitools/obiannotate/main.go | 42 ++++++++++++++ pkg/obitools/obiannotate/obiannotate.go | 75 +++++++++++++++++++++++++ pkg/obitools/obiannotate/options.go | 36 +++++++++++- 3 files changed, 150 insertions(+), 3 deletions(-) create mode 100644 cmd/obitools/obiannotate/main.go diff --git a/cmd/obitools/obiannotate/main.go b/cmd/obitools/obiannotate/main.go new file mode 100644 index 0000000..a7d72c3 --- /dev/null +++ b/cmd/obitools/obiannotate/main.go @@ -0,0 +1,42 @@ +package main + +import ( + "os" + "runtime/pprof" + + log "github.com/sirupsen/logrus" + + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiannotate" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert" +) + +func main() { + + defer obiseq.LogBioSeqStatus() + + // go tool pprof -http=":8000" ./obipairing ./cpu.pprof + f, err := os.Create("cpu.pprof") + if err != nil { + log.Fatal(err) + } + pprof.StartCPUProfile(f) + defer pprof.StopCPUProfile() + + // go tool trace cpu.trace + // ftrace, err := os.Create("cpu.trace") + // if err != nil { + // log.Fatal(err) + // } + // trace.Start(ftrace) + // defer trace.Stop() + + optionParser := obioptions.GenerateOptionParser(obiannotate.OptionSet) + + _, args, _ := optionParser(os.Args) + + sequences, _ := obiconvert.ReadBioSequences(args...) + annotator := obiannotate.CLIAnnotationPipeline() + obiconvert.WriteBioSequences(sequences.Pipe(annotator), true) +} diff --git a/pkg/obitools/obiannotate/obiannotate.go b/pkg/obitools/obiannotate/obiannotate.go index a760842..a1f5c37 100644 --- a/pkg/obitools/obiannotate/obiannotate.go +++ b/pkg/obitools/obiannotate/obiannotate.go @@ -1,2 +1,77 @@ package obiannotate +import ( + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obigrep" +) + +func DeleteAttributesWorker(toBeDeleted []string) obiseq.SeqWorker { + f := func(s *obiseq.BioSequence) *obiseq.BioSequence { + for _, k := range toBeDeleted { + s.DeleteAttribute(k) + } + return s + } + + return f +} + +func ToBeKeptAttributesWorker(toBeKept map[string]bool) obiseq.SeqWorker { + + f := func(s *obiseq.BioSequence) *obiseq.BioSequence { + annot := s.Annotations() + for key := range annot { + if _, ok := toBeKept[key]; !ok { + s.DeleteAttribute(key) + } + } + return s + } + + return f +} + +func RenameAttributeWorker(toBeRenamed map[string]string) obiseq.SeqWorker { + f := func(s *obiseq.BioSequence) *obiseq.BioSequence { + for newName, oldName := range toBeRenamed { + s.RenameAttribute(newName, oldName) + } + return s + } + + return f +} + +func CLIAnnotationWorker() obiseq.SeqWorker { + var annotator obiseq.SeqWorker + annotator = nil + + if CLIHasAttributeToBeRenamed() { + w := RenameAttributeWorker(CLIAttributeToBeRenamed()) + annotator = annotator.ChainWorkers(w) + } + + if CLIHasAttibuteToDelete() { + w := DeleteAttributesWorker(CLIAttibuteToDelete()) + annotator = annotator.ChainWorkers(w) + } + + if CLIHasToBeKeptAttributes() { + w := ToBeKeptAttributesWorker(CLIToBeKeptAttributes()) + annotator = annotator.ChainWorkers(w) + } + + return annotator +} + +func CLIAnnotationPipeline() obiiter.Pipeable { + + predicate := obigrep.CLISequenceSelectionPredicate() + worker := CLIAnnotationWorker() + + annotator := obiseq.SeqToSliceConditionalWorker(worker, predicate, true) + f := obiiter.SliceWorkerPipe(annotator) + + return f +} diff --git a/pkg/obitools/obiannotate/options.go b/pkg/obitools/obiannotate/options.go index 7c8fd20..fb0018f 100644 --- a/pkg/obitools/obiannotate/options.go +++ b/pkg/obitools/obiannotate/options.go @@ -16,13 +16,12 @@ var _clearAll = false var _setSeqLength = false var _uniqueID = false -func SequenceSelectionOptionSet(options *getoptions.GetOpt) { +func SequenceAnnotationOptionSet(options *getoptions.GetOpt) { options.BoolVar(&_addRank, "seq-rank", _addRank, options.Description("Adds a new attribute named seq_rank to the sequence record indicating its entry number in the sequence file."), ) options.BoolVar(&_clearAll, "clear", _clearAll, - options.Alias("C"), options.Description("Clears all attributes associated to the sequence records."), ) @@ -65,7 +64,7 @@ func SequenceSelectionOptionSet(options *getoptions.GetOpt) { func OptionSet(options *getoptions.GetOpt) { obiconvert.OptionSet(options) obigrep.SequenceSelectionOptionSet(options) - SequenceSelectionOptionSet(options) + SequenceAnnotationOptionSet(options) } // -S :, --set-tag=: @@ -91,3 +90,34 @@ func OptionSet(options *getoptions.GetOpt) { // --uniq-id // Forces sequence record ids to be unique. + +func CLIHasAttributeToBeRenamed() bool { + return len(_toBeRenamed) > 0 +} + +func CLIAttributeToBeRenamed() map[string]string { + return _toBeRenamed +} + +func CLIHasAttibuteToDelete() bool { + return len(_toBeDeleted) > 0 +} + +func CLIAttibuteToDelete() []string { + return _toBeDeleted +} + +func CLIHasToBeKeptAttributes() bool { + return len(_keepOnly) > 0 +} + +func CLIToBeKeptAttributes() map[string]bool { + d := make(map[string]bool,len(_keepOnly)) + + for _,v := range _keepOnly { + d[v]=true + } + + return d +} +