From 2d375df94f3a33eacb00aefcb6aec1d3b3291f4f Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Sun, 22 Jan 2023 22:39:13 +0100 Subject: [PATCH] move the worker class to the obiseq package --- cmd/obitools/obicomplement/main.go | 4 +- pkg/obiapat/pcr.go | 3 +- pkg/obiformats/fastseq_header.go | 2 +- pkg/obiformats/options.go | 7 +-- pkg/obiiter/batchiterator.go | 2 + pkg/obiiter/workers.go | 29 ++------- pkg/obingslibrary/worker.go | 3 +- pkg/obiseq/revcomp.go | 9 +++ pkg/obiseq/worker.go | 31 ++++++++++ pkg/obitax/sequence_workers.go | 11 ++-- pkg/obitools/obiannotate/options.go | 93 +++++++++++++++++++++++++++++ pkg/obitools/obitag/obitag.go | 2 +- 12 files changed, 153 insertions(+), 43 deletions(-) create mode 100644 pkg/obiseq/worker.go create mode 100644 pkg/obitools/obiannotate/options.go diff --git a/cmd/obitools/obicomplement/main.go b/cmd/obitools/obicomplement/main.go index 3645d98..3036599 100644 --- a/cmd/obitools/obicomplement/main.go +++ b/cmd/obitools/obicomplement/main.go @@ -3,7 +3,7 @@ package main import ( "os" - "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions" @@ -16,6 +16,6 @@ func main() { fs, _ := obiconvert.ReadBioSequences(args...) - comp := fs.MakeIWorker(obiiter.ReverseComplementWorker(true)) + comp := fs.MakeIWorker(obiseq.ReverseComplementWorker(true)) obiconvert.WriteBioSequences(comp, true) } diff --git a/pkg/obiapat/pcr.go b/pkg/obiapat/pcr.go index 8239a9d..e446997 100644 --- a/pkg/obiapat/pcr.go +++ b/pkg/obiapat/pcr.go @@ -4,7 +4,6 @@ import ( log "github.com/sirupsen/logrus" "git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils" - "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" ) @@ -441,7 +440,7 @@ func PCRSlice(sequences obiseq.BioSequenceSlice, // PCRSliceWorker is a worker function builder which produce // job function usable by the obiseq.MakeISliceWorker function. -func PCRSliceWorker(options ...WithOption) obiiter.SeqSliceWorker { +func PCRSliceWorker(options ...WithOption) obiseq.SeqSliceWorker { opt := MakeOptions(options) worker := func(sequences obiseq.BioSequenceSlice) obiseq.BioSequenceSlice { diff --git a/pkg/obiformats/fastseq_header.go b/pkg/obiformats/fastseq_header.go index 7554f59..fca2896 100644 --- a/pkg/obiformats/fastseq_header.go +++ b/pkg/obiformats/fastseq_header.go @@ -18,7 +18,7 @@ func ParseGuessedFastSeqHeader(sequence *obiseq.BioSequence) { func IParseFastSeqHeaderBatch(iterator obiiter.IBioSequence, options ...WithOption) obiiter.IBioSequence { opt := MakeOptions(options) - return iterator.MakeIWorker(obiiter.AnnotatorToSeqWorker(opt.ParseFastSeqHeader()), + return iterator.MakeIWorker(obiseq.AnnotatorToSeqWorker(opt.ParseFastSeqHeader()), opt.ParallelWorkers(), opt.BufferSize()) } diff --git a/pkg/obiformats/options.go b/pkg/obiformats/options.go index 19cc540..9790a0c 100644 --- a/pkg/obiformats/options.go +++ b/pkg/obiformats/options.go @@ -1,12 +1,11 @@ package obiformats import ( - "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" ) type __options__ struct { - fastseq_header_parser obiiter.SeqAnnotator + fastseq_header_parser obiseq.SeqAnnotator fastseq_header_writer func(*obiseq.BioSequence) string with_progress_bar bool buffer_size int @@ -59,7 +58,7 @@ func (opt Options) ParallelWorkers() int { return opt.pointer.parallel_workers } -func (opt Options) ParseFastSeqHeader() obiiter.SeqAnnotator { +func (opt Options) ParseFastSeqHeader() obiseq.SeqAnnotator { return opt.pointer.fastseq_header_parser } @@ -124,7 +123,7 @@ func OptionsQualitySolexa() WithOption { return OptionsQualityShift(64) } -func OptionsFastSeqHeaderParser(parser obiiter.SeqAnnotator) WithOption { +func OptionsFastSeqHeaderParser(parser obiseq.SeqAnnotator) WithOption { f := WithOption(func(opt Options) { opt.pointer.fastseq_header_parser = parser }) diff --git a/pkg/obiiter/batchiterator.go b/pkg/obiiter/batchiterator.go index f24b46a..5ca2967 100644 --- a/pkg/obiiter/batchiterator.go +++ b/pkg/obiiter/batchiterator.go @@ -1,3 +1,5 @@ +// It takes a slice of BioSequence objects, and returns an iterator that will return batches of +// BioSequence objects package obiiter import ( diff --git a/pkg/obiiter/workers.go b/pkg/obiiter/workers.go index 27132f2..b5eedf5 100644 --- a/pkg/obiiter/workers.go +++ b/pkg/obiiter/workers.go @@ -6,18 +6,6 @@ import ( "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" ) -type SeqAnnotator func(*obiseq.BioSequence) - -type SeqWorker func(*obiseq.BioSequence) *obiseq.BioSequence -type SeqSliceWorker func(obiseq.BioSequenceSlice) obiseq.BioSequenceSlice - -func AnnotatorToSeqWorker(function SeqAnnotator) SeqWorker { - f := func(seq *obiseq.BioSequence) *obiseq.BioSequence { - function(seq) - return seq - } - return f -} // That method allows for applying a SeqWorker function on every sequences. // @@ -27,7 +15,7 @@ func AnnotatorToSeqWorker(function SeqAnnotator) SeqWorker { // Moreover the SeqWorker function, the method accepted two optional integer parameters. // - First is allowing to indicates the number of workers running in parallele (default 4) // - The second the size of the chanel buffer. By default set to the same value than the input buffer. -func (iterator IBioSequence) MakeIWorker(worker SeqWorker, sizes ...int) IBioSequence { +func (iterator IBioSequence) MakeIWorker(worker obiseq.SeqWorker, sizes ...int) IBioSequence { nworkers := 4 buffsize := iterator.BufferSize() @@ -70,7 +58,7 @@ func (iterator IBioSequence) MakeIWorker(worker SeqWorker, sizes ...int) IBioSeq } func (iterator IBioSequence) MakeIConditionalWorker(predicate obiseq.SequencePredicate, - worker SeqWorker, sizes ...int) IBioSequence { + worker obiseq.SeqWorker, sizes ...int) IBioSequence { nworkers := 4 buffsize := iterator.BufferSize() @@ -114,7 +102,7 @@ func (iterator IBioSequence) MakeIConditionalWorker(predicate obiseq.SequencePre return newIter } -func (iterator IBioSequence) MakeISliceWorker(worker SeqSliceWorker, sizes ...int) IBioSequence { +func (iterator IBioSequence) MakeISliceWorker(worker obiseq.SeqSliceWorker, sizes ...int) IBioSequence { nworkers := 4 buffsize := iterator.BufferSize() @@ -153,7 +141,7 @@ func (iterator IBioSequence) MakeISliceWorker(worker SeqSliceWorker, sizes ...in return newIter } -func WorkerPipe(worker SeqWorker, sizes ...int) Pipeable { +func WorkerPipe(worker obiseq.SeqWorker, sizes ...int) Pipeable { f := func(iterator IBioSequence) IBioSequence { return iterator.MakeIWorker(worker, sizes...) } @@ -161,7 +149,7 @@ func WorkerPipe(worker SeqWorker, sizes ...int) Pipeable { return f } -func SliceWorkerPipe(worker SeqSliceWorker, sizes ...int) Pipeable { +func SliceWorkerPipe(worker obiseq.SeqSliceWorker, sizes ...int) Pipeable { f := func(iterator IBioSequence) IBioSequence { return iterator.MakeISliceWorker(worker, sizes...) } @@ -169,10 +157,3 @@ func SliceWorkerPipe(worker SeqSliceWorker, sizes ...int) Pipeable { return f } -func ReverseComplementWorker(inplace bool) SeqWorker { - f := func(input *obiseq.BioSequence) *obiseq.BioSequence { - return input.ReverseComplement(inplace) - } - - return f -} diff --git a/pkg/obingslibrary/worker.go b/pkg/obingslibrary/worker.go index 515f9df..fb57f0f 100644 --- a/pkg/obingslibrary/worker.go +++ b/pkg/obingslibrary/worker.go @@ -1,7 +1,6 @@ package obingslibrary import ( - "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" ) @@ -170,7 +169,7 @@ func ExtractBarcodeSlice(ngslibrary NGSLibrary, } func ExtractBarcodeSliceWorker(ngslibrary NGSLibrary, - options ...WithOption) obiiter.SeqSliceWorker { + options ...WithOption) obiseq.SeqSliceWorker { opt := MakeOptions(options) diff --git a/pkg/obiseq/revcomp.go b/pkg/obiseq/revcomp.go index ec65488..e23bd50 100644 --- a/pkg/obiseq/revcomp.go +++ b/pkg/obiseq/revcomp.go @@ -64,3 +64,12 @@ func (sequence *BioSequence) _revcmpMutation() *BioSequence { return sequence } + + +func ReverseComplementWorker(inplace bool) SeqWorker { + f := func(input *BioSequence) *BioSequence { + return input.ReverseComplement(inplace) + } + + return f +} diff --git a/pkg/obiseq/worker.go b/pkg/obiseq/worker.go new file mode 100644 index 0000000..adecc0f --- /dev/null +++ b/pkg/obiseq/worker.go @@ -0,0 +1,31 @@ +package obiseq + +type SeqAnnotator func(*BioSequence) + +type SeqWorker func(*BioSequence) *BioSequence +type SeqSliceWorker func(BioSequenceSlice) BioSequenceSlice + +func AnnotatorToSeqWorker(function SeqAnnotator) SeqWorker { + f := func(seq *BioSequence) *BioSequence { + function(seq) + return seq + } + return f +} + +func SeqToSliceWorker(worker SeqWorker, inplace bool) SeqSliceWorker { + f := func(input BioSequenceSlice) BioSequenceSlice { + output := input + if (! inplace) { + output = MakeBioSequenceSlice() + } + for i,s := range(input) { + output[i] = worker(s) + } + + return output + } + + return f +} + diff --git a/pkg/obitax/sequence_workers.go b/pkg/obitax/sequence_workers.go index bfa591b..6ca6186 100644 --- a/pkg/obitax/sequence_workers.go +++ b/pkg/obitax/sequence_workers.go @@ -2,12 +2,11 @@ package obitax import ( "git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils" - "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" log "github.com/sirupsen/logrus" ) -func (taxonomy *Taxonomy) MakeSetTaxonAtRankWorker(rank string) obiiter.SeqWorker { +func (taxonomy *Taxonomy) MakeSetTaxonAtRankWorker(rank string) obiseq.SeqWorker { if !goutils.Contains(taxonomy.RankList(), rank) { log.Fatalf("%s is not a valid rank (allowed ranks are %v)", @@ -23,7 +22,7 @@ func (taxonomy *Taxonomy) MakeSetTaxonAtRankWorker(rank string) obiiter.SeqWorke return w } -func (taxonomy *Taxonomy) MakeSetSpeciesWorker() obiiter.SeqWorker { +func (taxonomy *Taxonomy) MakeSetSpeciesWorker() obiseq.SeqWorker { w := func(sequence *obiseq.BioSequence) *obiseq.BioSequence { taxonomy.SetSpecies(sequence) @@ -33,7 +32,7 @@ func (taxonomy *Taxonomy) MakeSetSpeciesWorker() obiiter.SeqWorker { return w } -func (taxonomy *Taxonomy) MakeSetGenusWorker() obiiter.SeqWorker { +func (taxonomy *Taxonomy) MakeSetGenusWorker() obiseq.SeqWorker { w := func(sequence *obiseq.BioSequence) *obiseq.BioSequence { taxonomy.SetGenus(sequence) @@ -43,7 +42,7 @@ func (taxonomy *Taxonomy) MakeSetGenusWorker() obiiter.SeqWorker { return w } -func (taxonomy *Taxonomy) MakeSetFamilyWorker() obiiter.SeqWorker { +func (taxonomy *Taxonomy) MakeSetFamilyWorker() obiseq.SeqWorker { w := func(sequence *obiseq.BioSequence) *obiseq.BioSequence { taxonomy.SetFamily(sequence) @@ -52,5 +51,3 @@ func (taxonomy *Taxonomy) MakeSetFamilyWorker() obiiter.SeqWorker { return w } - - diff --git a/pkg/obitools/obiannotate/options.go b/pkg/obitools/obiannotate/options.go new file mode 100644 index 0000000..7c8fd20 --- /dev/null +++ b/pkg/obitools/obiannotate/options.go @@ -0,0 +1,93 @@ +package obiannotate + +import ( + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obigrep" + "github.com/DavidGamba/go-getoptions" +) + +var _addRank = false +var _toBeRenamed = make(map[string]string, 0) +var _toBeDeleted = make([]string, 0) +var _keepOnly = make([]string, 0) +var _taxonAtRank = make([]string, 0) +var _tagList = "" +var _clearAll = false +var _setSeqLength = false +var _uniqueID = false + +func SequenceSelectionOptionSet(options *getoptions.GetOpt) { + options.BoolVar(&_addRank, "seq-rank", _addRank, + options.Description("Adds a new attribute named seq_rank to the sequence record indicating its entry number in the sequence file."), + ) + + options.BoolVar(&_clearAll, "clear", _clearAll, + options.Alias("C"), + options.Description("Clears all attributes associated to the sequence records."), + ) + + options.BoolVar(&_setSeqLength, "length", _setSeqLength, + options.Description("Adds attribute with seq_length as a key and sequence length as a value."), + ) + + options.BoolVar(&_uniqueID, "uniq-id", _uniqueID, + options.Description("Forces sequence record ids to be unique."), + ) + options.StringMapVar(&_toBeRenamed, "rename-tag", 1, 1, + options.Alias("R"), + options.ArgName("NEW_NAME=OLD_NAME"), + options.Description("Changes attribute name to . When attribute named "+ + " is missing, the sequence record is skipped and the next one is examined.")) + + options.StringSliceVar(&_toBeDeleted, "delete-tag", 1, 1, + options.ArgName("KEY"), + options.Description(" Deletes attribute named .When this attribute is missing,"+ + " the sequence record is skipped and the next one is examined.")) + + options.StringSliceVar(&_taxonAtRank, "with-taxon-at-rank", 1, 1, + options.ArgName("RANK_NAME"), + options.Description("Adds taxonomic annotation at taxonomic rank .")) + + options.StringVar(&_tagList, "tag-list", _tagList, + options.ArgName("FILENAME"), + options.Description(" points to a file containing attribute names"+ + " and values to modify for specified sequence records.")) + + options.StringSliceVar(&_keepOnly, "keep", 1, 1, + options.Alias("k"), + options.ArgName("KEY"), + options.Description("Keeps only attribute with key . Several -k options can be combined.")) + +} + +// OptionSet adds to the basic option set every options declared for +// the obipcr command +func OptionSet(options *getoptions.GetOpt) { + obiconvert.OptionSet(options) + obigrep.SequenceSelectionOptionSet(options) + SequenceSelectionOptionSet(options) +} + +// -S :, --set-tag=: +// Creates a new attribute named with a key and a value computed from . + +// --set-identifier= +// Sets sequence record identifier with a value computed from . + +// --run= +// Runs a python expression on each selected sequence. + +// --set-sequence= +// Changes the sequence itself with a value computed from . + +// -T, --set-definition= +// Sets sequence definition with a value computed from . + +// -O, --only-valid-python +// Allows only valid python expressions. + +// -m , --mcl= +// Creates a new attribute containing the number of the cluster the sequence record was assigned to, as indicated in file . + +// --uniq-id +// Forces sequence record ids to be unique. diff --git a/pkg/obitools/obitag/obitag.go b/pkg/obitools/obitag/obitag.go index 62a10cc..fe249d7 100644 --- a/pkg/obitools/obitag/obitag.go +++ b/pkg/obitools/obitag/obitag.go @@ -165,7 +165,7 @@ func Identify(sequence *obiseq.BioSequence, func IdentifySeqWorker(references obiseq.BioSequenceSlice, refcounts []*obikmer.Table4mer, taxo *obitax.Taxonomy, - runExact bool) obiiter.SeqWorker { + runExact bool) obiseq.SeqWorker { return func(sequence *obiseq.BioSequence) *obiseq.BioSequence { return Identify(sequence, references, refcounts, taxo, runExact) }