mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
move the worker class to the obiseq package
This commit is contained in:
@ -3,7 +3,7 @@ package main
|
||||
import (
|
||||
"os"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
||||
@ -16,6 +16,6 @@ func main() {
|
||||
|
||||
fs, _ := obiconvert.ReadBioSequences(args...)
|
||||
|
||||
comp := fs.MakeIWorker(obiiter.ReverseComplementWorker(true))
|
||||
comp := fs.MakeIWorker(obiseq.ReverseComplementWorker(true))
|
||||
obiconvert.WriteBioSequences(comp, true)
|
||||
}
|
||||
|
@ -4,7 +4,6 @@ import (
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||
)
|
||||
|
||||
@ -441,7 +440,7 @@ func PCRSlice(sequences obiseq.BioSequenceSlice,
|
||||
|
||||
// PCRSliceWorker is a worker function builder which produce
|
||||
// job function usable by the obiseq.MakeISliceWorker function.
|
||||
func PCRSliceWorker(options ...WithOption) obiiter.SeqSliceWorker {
|
||||
func PCRSliceWorker(options ...WithOption) obiseq.SeqSliceWorker {
|
||||
|
||||
opt := MakeOptions(options)
|
||||
worker := func(sequences obiseq.BioSequenceSlice) obiseq.BioSequenceSlice {
|
||||
|
@ -18,7 +18,7 @@ func ParseGuessedFastSeqHeader(sequence *obiseq.BioSequence) {
|
||||
func IParseFastSeqHeaderBatch(iterator obiiter.IBioSequence,
|
||||
options ...WithOption) obiiter.IBioSequence {
|
||||
opt := MakeOptions(options)
|
||||
return iterator.MakeIWorker(obiiter.AnnotatorToSeqWorker(opt.ParseFastSeqHeader()),
|
||||
return iterator.MakeIWorker(obiseq.AnnotatorToSeqWorker(opt.ParseFastSeqHeader()),
|
||||
opt.ParallelWorkers(),
|
||||
opt.BufferSize())
|
||||
}
|
||||
|
@ -1,12 +1,11 @@
|
||||
package obiformats
|
||||
|
||||
import (
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||
)
|
||||
|
||||
type __options__ struct {
|
||||
fastseq_header_parser obiiter.SeqAnnotator
|
||||
fastseq_header_parser obiseq.SeqAnnotator
|
||||
fastseq_header_writer func(*obiseq.BioSequence) string
|
||||
with_progress_bar bool
|
||||
buffer_size int
|
||||
@ -59,7 +58,7 @@ func (opt Options) ParallelWorkers() int {
|
||||
return opt.pointer.parallel_workers
|
||||
}
|
||||
|
||||
func (opt Options) ParseFastSeqHeader() obiiter.SeqAnnotator {
|
||||
func (opt Options) ParseFastSeqHeader() obiseq.SeqAnnotator {
|
||||
return opt.pointer.fastseq_header_parser
|
||||
}
|
||||
|
||||
@ -124,7 +123,7 @@ func OptionsQualitySolexa() WithOption {
|
||||
return OptionsQualityShift(64)
|
||||
}
|
||||
|
||||
func OptionsFastSeqHeaderParser(parser obiiter.SeqAnnotator) WithOption {
|
||||
func OptionsFastSeqHeaderParser(parser obiseq.SeqAnnotator) WithOption {
|
||||
f := WithOption(func(opt Options) {
|
||||
opt.pointer.fastseq_header_parser = parser
|
||||
})
|
||||
|
@ -1,3 +1,5 @@
|
||||
// It takes a slice of BioSequence objects, and returns an iterator that will return batches of
|
||||
// BioSequence objects
|
||||
package obiiter
|
||||
|
||||
import (
|
||||
|
@ -6,18 +6,6 @@ import (
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||
)
|
||||
|
||||
type SeqAnnotator func(*obiseq.BioSequence)
|
||||
|
||||
type SeqWorker func(*obiseq.BioSequence) *obiseq.BioSequence
|
||||
type SeqSliceWorker func(obiseq.BioSequenceSlice) obiseq.BioSequenceSlice
|
||||
|
||||
func AnnotatorToSeqWorker(function SeqAnnotator) SeqWorker {
|
||||
f := func(seq *obiseq.BioSequence) *obiseq.BioSequence {
|
||||
function(seq)
|
||||
return seq
|
||||
}
|
||||
return f
|
||||
}
|
||||
|
||||
// That method allows for applying a SeqWorker function on every sequences.
|
||||
//
|
||||
@ -27,7 +15,7 @@ func AnnotatorToSeqWorker(function SeqAnnotator) SeqWorker {
|
||||
// Moreover the SeqWorker function, the method accepted two optional integer parameters.
|
||||
// - First is allowing to indicates the number of workers running in parallele (default 4)
|
||||
// - The second the size of the chanel buffer. By default set to the same value than the input buffer.
|
||||
func (iterator IBioSequence) MakeIWorker(worker SeqWorker, sizes ...int) IBioSequence {
|
||||
func (iterator IBioSequence) MakeIWorker(worker obiseq.SeqWorker, sizes ...int) IBioSequence {
|
||||
nworkers := 4
|
||||
buffsize := iterator.BufferSize()
|
||||
|
||||
@ -70,7 +58,7 @@ func (iterator IBioSequence) MakeIWorker(worker SeqWorker, sizes ...int) IBioSeq
|
||||
}
|
||||
|
||||
func (iterator IBioSequence) MakeIConditionalWorker(predicate obiseq.SequencePredicate,
|
||||
worker SeqWorker, sizes ...int) IBioSequence {
|
||||
worker obiseq.SeqWorker, sizes ...int) IBioSequence {
|
||||
nworkers := 4
|
||||
buffsize := iterator.BufferSize()
|
||||
|
||||
@ -114,7 +102,7 @@ func (iterator IBioSequence) MakeIConditionalWorker(predicate obiseq.SequencePre
|
||||
return newIter
|
||||
}
|
||||
|
||||
func (iterator IBioSequence) MakeISliceWorker(worker SeqSliceWorker, sizes ...int) IBioSequence {
|
||||
func (iterator IBioSequence) MakeISliceWorker(worker obiseq.SeqSliceWorker, sizes ...int) IBioSequence {
|
||||
nworkers := 4
|
||||
buffsize := iterator.BufferSize()
|
||||
|
||||
@ -153,7 +141,7 @@ func (iterator IBioSequence) MakeISliceWorker(worker SeqSliceWorker, sizes ...in
|
||||
return newIter
|
||||
}
|
||||
|
||||
func WorkerPipe(worker SeqWorker, sizes ...int) Pipeable {
|
||||
func WorkerPipe(worker obiseq.SeqWorker, sizes ...int) Pipeable {
|
||||
f := func(iterator IBioSequence) IBioSequence {
|
||||
return iterator.MakeIWorker(worker, sizes...)
|
||||
}
|
||||
@ -161,7 +149,7 @@ func WorkerPipe(worker SeqWorker, sizes ...int) Pipeable {
|
||||
return f
|
||||
}
|
||||
|
||||
func SliceWorkerPipe(worker SeqSliceWorker, sizes ...int) Pipeable {
|
||||
func SliceWorkerPipe(worker obiseq.SeqSliceWorker, sizes ...int) Pipeable {
|
||||
f := func(iterator IBioSequence) IBioSequence {
|
||||
return iterator.MakeISliceWorker(worker, sizes...)
|
||||
}
|
||||
@ -169,10 +157,3 @@ func SliceWorkerPipe(worker SeqSliceWorker, sizes ...int) Pipeable {
|
||||
return f
|
||||
}
|
||||
|
||||
func ReverseComplementWorker(inplace bool) SeqWorker {
|
||||
f := func(input *obiseq.BioSequence) *obiseq.BioSequence {
|
||||
return input.ReverseComplement(inplace)
|
||||
}
|
||||
|
||||
return f
|
||||
}
|
||||
|
@ -1,7 +1,6 @@
|
||||
package obingslibrary
|
||||
|
||||
import (
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||
)
|
||||
|
||||
@ -170,7 +169,7 @@ func ExtractBarcodeSlice(ngslibrary NGSLibrary,
|
||||
}
|
||||
|
||||
func ExtractBarcodeSliceWorker(ngslibrary NGSLibrary,
|
||||
options ...WithOption) obiiter.SeqSliceWorker {
|
||||
options ...WithOption) obiseq.SeqSliceWorker {
|
||||
|
||||
opt := MakeOptions(options)
|
||||
|
||||
|
@ -64,3 +64,12 @@ func (sequence *BioSequence) _revcmpMutation() *BioSequence {
|
||||
|
||||
return sequence
|
||||
}
|
||||
|
||||
|
||||
func ReverseComplementWorker(inplace bool) SeqWorker {
|
||||
f := func(input *BioSequence) *BioSequence {
|
||||
return input.ReverseComplement(inplace)
|
||||
}
|
||||
|
||||
return f
|
||||
}
|
||||
|
31
pkg/obiseq/worker.go
Normal file
31
pkg/obiseq/worker.go
Normal file
@ -0,0 +1,31 @@
|
||||
package obiseq
|
||||
|
||||
type SeqAnnotator func(*BioSequence)
|
||||
|
||||
type SeqWorker func(*BioSequence) *BioSequence
|
||||
type SeqSliceWorker func(BioSequenceSlice) BioSequenceSlice
|
||||
|
||||
func AnnotatorToSeqWorker(function SeqAnnotator) SeqWorker {
|
||||
f := func(seq *BioSequence) *BioSequence {
|
||||
function(seq)
|
||||
return seq
|
||||
}
|
||||
return f
|
||||
}
|
||||
|
||||
func SeqToSliceWorker(worker SeqWorker, inplace bool) SeqSliceWorker {
|
||||
f := func(input BioSequenceSlice) BioSequenceSlice {
|
||||
output := input
|
||||
if (! inplace) {
|
||||
output = MakeBioSequenceSlice()
|
||||
}
|
||||
for i,s := range(input) {
|
||||
output[i] = worker(s)
|
||||
}
|
||||
|
||||
return output
|
||||
}
|
||||
|
||||
return f
|
||||
}
|
||||
|
@ -2,12 +2,11 @@ package obitax
|
||||
|
||||
import (
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
func (taxonomy *Taxonomy) MakeSetTaxonAtRankWorker(rank string) obiiter.SeqWorker {
|
||||
func (taxonomy *Taxonomy) MakeSetTaxonAtRankWorker(rank string) obiseq.SeqWorker {
|
||||
|
||||
if !goutils.Contains(taxonomy.RankList(), rank) {
|
||||
log.Fatalf("%s is not a valid rank (allowed ranks are %v)",
|
||||
@ -23,7 +22,7 @@ func (taxonomy *Taxonomy) MakeSetTaxonAtRankWorker(rank string) obiiter.SeqWorke
|
||||
return w
|
||||
}
|
||||
|
||||
func (taxonomy *Taxonomy) MakeSetSpeciesWorker() obiiter.SeqWorker {
|
||||
func (taxonomy *Taxonomy) MakeSetSpeciesWorker() obiseq.SeqWorker {
|
||||
|
||||
w := func(sequence *obiseq.BioSequence) *obiseq.BioSequence {
|
||||
taxonomy.SetSpecies(sequence)
|
||||
@ -33,7 +32,7 @@ func (taxonomy *Taxonomy) MakeSetSpeciesWorker() obiiter.SeqWorker {
|
||||
return w
|
||||
}
|
||||
|
||||
func (taxonomy *Taxonomy) MakeSetGenusWorker() obiiter.SeqWorker {
|
||||
func (taxonomy *Taxonomy) MakeSetGenusWorker() obiseq.SeqWorker {
|
||||
|
||||
w := func(sequence *obiseq.BioSequence) *obiseq.BioSequence {
|
||||
taxonomy.SetGenus(sequence)
|
||||
@ -43,7 +42,7 @@ func (taxonomy *Taxonomy) MakeSetGenusWorker() obiiter.SeqWorker {
|
||||
return w
|
||||
}
|
||||
|
||||
func (taxonomy *Taxonomy) MakeSetFamilyWorker() obiiter.SeqWorker {
|
||||
func (taxonomy *Taxonomy) MakeSetFamilyWorker() obiseq.SeqWorker {
|
||||
|
||||
w := func(sequence *obiseq.BioSequence) *obiseq.BioSequence {
|
||||
taxonomy.SetFamily(sequence)
|
||||
@ -52,5 +51,3 @@ func (taxonomy *Taxonomy) MakeSetFamilyWorker() obiiter.SeqWorker {
|
||||
|
||||
return w
|
||||
}
|
||||
|
||||
|
||||
|
93
pkg/obitools/obiannotate/options.go
Normal file
93
pkg/obitools/obiannotate/options.go
Normal file
@ -0,0 +1,93 @@
|
||||
package obiannotate
|
||||
|
||||
import (
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obigrep"
|
||||
"github.com/DavidGamba/go-getoptions"
|
||||
)
|
||||
|
||||
var _addRank = false
|
||||
var _toBeRenamed = make(map[string]string, 0)
|
||||
var _toBeDeleted = make([]string, 0)
|
||||
var _keepOnly = make([]string, 0)
|
||||
var _taxonAtRank = make([]string, 0)
|
||||
var _tagList = ""
|
||||
var _clearAll = false
|
||||
var _setSeqLength = false
|
||||
var _uniqueID = false
|
||||
|
||||
func SequenceSelectionOptionSet(options *getoptions.GetOpt) {
|
||||
options.BoolVar(&_addRank, "seq-rank", _addRank,
|
||||
options.Description("Adds a new attribute named seq_rank to the sequence record indicating its entry number in the sequence file."),
|
||||
)
|
||||
|
||||
options.BoolVar(&_clearAll, "clear", _clearAll,
|
||||
options.Alias("C"),
|
||||
options.Description("Clears all attributes associated to the sequence records."),
|
||||
)
|
||||
|
||||
options.BoolVar(&_setSeqLength, "length", _setSeqLength,
|
||||
options.Description("Adds attribute with seq_length as a key and sequence length as a value."),
|
||||
)
|
||||
|
||||
options.BoolVar(&_uniqueID, "uniq-id", _uniqueID,
|
||||
options.Description("Forces sequence record ids to be unique."),
|
||||
)
|
||||
options.StringMapVar(&_toBeRenamed, "rename-tag", 1, 1,
|
||||
options.Alias("R"),
|
||||
options.ArgName("NEW_NAME=OLD_NAME"),
|
||||
options.Description("Changes attribute name <OLD_NAME> to <NEW_NAME>. When attribute named <OLD_NAME>"+
|
||||
" is missing, the sequence record is skipped and the next one is examined."))
|
||||
|
||||
options.StringSliceVar(&_toBeDeleted, "delete-tag", 1, 1,
|
||||
options.ArgName("KEY"),
|
||||
options.Description(" Deletes attribute named <KEY>.When this attribute is missing,"+
|
||||
" the sequence record is skipped and the next one is examined."))
|
||||
|
||||
options.StringSliceVar(&_taxonAtRank, "with-taxon-at-rank", 1, 1,
|
||||
options.ArgName("RANK_NAME"),
|
||||
options.Description("Adds taxonomic annotation at taxonomic rank <RANK_NAME>."))
|
||||
|
||||
options.StringVar(&_tagList, "tag-list", _tagList,
|
||||
options.ArgName("FILENAME"),
|
||||
options.Description("<FILENAME> points to a file containing attribute names"+
|
||||
" and values to modify for specified sequence records."))
|
||||
|
||||
options.StringSliceVar(&_keepOnly, "keep", 1, 1,
|
||||
options.Alias("k"),
|
||||
options.ArgName("KEY"),
|
||||
options.Description("Keeps only attribute with key <KEY>. Several -k options can be combined."))
|
||||
|
||||
}
|
||||
|
||||
// OptionSet adds to the basic option set every options declared for
|
||||
// the obipcr command
|
||||
func OptionSet(options *getoptions.GetOpt) {
|
||||
obiconvert.OptionSet(options)
|
||||
obigrep.SequenceSelectionOptionSet(options)
|
||||
SequenceSelectionOptionSet(options)
|
||||
}
|
||||
|
||||
// -S <KEY>:<PYTHON_EXPRESSION>, --set-tag=<KEY>:<PYTHON_EXPRESSION>
|
||||
// Creates a new attribute named with a key <KEY> and a value computed from <PYTHON_EXPRESSION>.
|
||||
|
||||
// --set-identifier=<PYTHON_EXPRESSION>
|
||||
// Sets sequence record identifier with a value computed from <PYTHON_EXPRESSION>.
|
||||
|
||||
// --run=<PYTHON_EXPRESSION>
|
||||
// Runs a python expression on each selected sequence.
|
||||
|
||||
// --set-sequence=<PYTHON_EXPRESSION>
|
||||
// Changes the sequence itself with a value computed from <PYTHON_EXPRESSION>.
|
||||
|
||||
// -T, --set-definition=<PYTHON_EXPRESSION>
|
||||
// Sets sequence definition with a value computed from <PYTHON_EXPRESSION>.
|
||||
|
||||
// -O, --only-valid-python
|
||||
// Allows only valid python expressions.
|
||||
|
||||
// -m <MCLFILE>, --mcl=<MCLFILE>
|
||||
// Creates a new attribute containing the number of the cluster the sequence record was assigned to, as indicated in file <MCLFILE>.
|
||||
|
||||
// --uniq-id
|
||||
// Forces sequence record ids to be unique.
|
@ -165,7 +165,7 @@ func Identify(sequence *obiseq.BioSequence,
|
||||
func IdentifySeqWorker(references obiseq.BioSequenceSlice,
|
||||
refcounts []*obikmer.Table4mer,
|
||||
taxo *obitax.Taxonomy,
|
||||
runExact bool) obiiter.SeqWorker {
|
||||
runExact bool) obiseq.SeqWorker {
|
||||
return func(sequence *obiseq.BioSequence) *obiseq.BioSequence {
|
||||
return Identify(sequence, references, refcounts, taxo, runExact)
|
||||
}
|
||||
|
Reference in New Issue
Block a user