move the worker class to the obiseq package

This commit is contained in:
2023-01-22 22:39:13 +01:00
parent f97f92df72
commit 2d375df94f
12 changed files with 153 additions and 43 deletions

View File

@ -3,7 +3,7 @@ package main
import ( import (
"os" "os"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
@ -16,6 +16,6 @@ func main() {
fs, _ := obiconvert.ReadBioSequences(args...) fs, _ := obiconvert.ReadBioSequences(args...)
comp := fs.MakeIWorker(obiiter.ReverseComplementWorker(true)) comp := fs.MakeIWorker(obiseq.ReverseComplementWorker(true))
obiconvert.WriteBioSequences(comp, true) obiconvert.WriteBioSequences(comp, true)
} }

View File

@ -4,7 +4,6 @@ import (
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils" "git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
) )
@ -441,7 +440,7 @@ func PCRSlice(sequences obiseq.BioSequenceSlice,
// PCRSliceWorker is a worker function builder which produce // PCRSliceWorker is a worker function builder which produce
// job function usable by the obiseq.MakeISliceWorker function. // job function usable by the obiseq.MakeISliceWorker function.
func PCRSliceWorker(options ...WithOption) obiiter.SeqSliceWorker { func PCRSliceWorker(options ...WithOption) obiseq.SeqSliceWorker {
opt := MakeOptions(options) opt := MakeOptions(options)
worker := func(sequences obiseq.BioSequenceSlice) obiseq.BioSequenceSlice { worker := func(sequences obiseq.BioSequenceSlice) obiseq.BioSequenceSlice {

View File

@ -18,7 +18,7 @@ func ParseGuessedFastSeqHeader(sequence *obiseq.BioSequence) {
func IParseFastSeqHeaderBatch(iterator obiiter.IBioSequence, func IParseFastSeqHeaderBatch(iterator obiiter.IBioSequence,
options ...WithOption) obiiter.IBioSequence { options ...WithOption) obiiter.IBioSequence {
opt := MakeOptions(options) opt := MakeOptions(options)
return iterator.MakeIWorker(obiiter.AnnotatorToSeqWorker(opt.ParseFastSeqHeader()), return iterator.MakeIWorker(obiseq.AnnotatorToSeqWorker(opt.ParseFastSeqHeader()),
opt.ParallelWorkers(), opt.ParallelWorkers(),
opt.BufferSize()) opt.BufferSize())
} }

View File

@ -1,12 +1,11 @@
package obiformats package obiformats
import ( import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
) )
type __options__ struct { type __options__ struct {
fastseq_header_parser obiiter.SeqAnnotator fastseq_header_parser obiseq.SeqAnnotator
fastseq_header_writer func(*obiseq.BioSequence) string fastseq_header_writer func(*obiseq.BioSequence) string
with_progress_bar bool with_progress_bar bool
buffer_size int buffer_size int
@ -59,7 +58,7 @@ func (opt Options) ParallelWorkers() int {
return opt.pointer.parallel_workers return opt.pointer.parallel_workers
} }
func (opt Options) ParseFastSeqHeader() obiiter.SeqAnnotator { func (opt Options) ParseFastSeqHeader() obiseq.SeqAnnotator {
return opt.pointer.fastseq_header_parser return opt.pointer.fastseq_header_parser
} }
@ -124,7 +123,7 @@ func OptionsQualitySolexa() WithOption {
return OptionsQualityShift(64) return OptionsQualityShift(64)
} }
func OptionsFastSeqHeaderParser(parser obiiter.SeqAnnotator) WithOption { func OptionsFastSeqHeaderParser(parser obiseq.SeqAnnotator) WithOption {
f := WithOption(func(opt Options) { f := WithOption(func(opt Options) {
opt.pointer.fastseq_header_parser = parser opt.pointer.fastseq_header_parser = parser
}) })

View File

@ -1,3 +1,5 @@
// It takes a slice of BioSequence objects, and returns an iterator that will return batches of
// BioSequence objects
package obiiter package obiiter
import ( import (

View File

@ -6,18 +6,6 @@ import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
) )
type SeqAnnotator func(*obiseq.BioSequence)
type SeqWorker func(*obiseq.BioSequence) *obiseq.BioSequence
type SeqSliceWorker func(obiseq.BioSequenceSlice) obiseq.BioSequenceSlice
func AnnotatorToSeqWorker(function SeqAnnotator) SeqWorker {
f := func(seq *obiseq.BioSequence) *obiseq.BioSequence {
function(seq)
return seq
}
return f
}
// That method allows for applying a SeqWorker function on every sequences. // That method allows for applying a SeqWorker function on every sequences.
// //
@ -27,7 +15,7 @@ func AnnotatorToSeqWorker(function SeqAnnotator) SeqWorker {
// Moreover the SeqWorker function, the method accepted two optional integer parameters. // Moreover the SeqWorker function, the method accepted two optional integer parameters.
// - First is allowing to indicates the number of workers running in parallele (default 4) // - First is allowing to indicates the number of workers running in parallele (default 4)
// - The second the size of the chanel buffer. By default set to the same value than the input buffer. // - The second the size of the chanel buffer. By default set to the same value than the input buffer.
func (iterator IBioSequence) MakeIWorker(worker SeqWorker, sizes ...int) IBioSequence { func (iterator IBioSequence) MakeIWorker(worker obiseq.SeqWorker, sizes ...int) IBioSequence {
nworkers := 4 nworkers := 4
buffsize := iterator.BufferSize() buffsize := iterator.BufferSize()
@ -70,7 +58,7 @@ func (iterator IBioSequence) MakeIWorker(worker SeqWorker, sizes ...int) IBioSeq
} }
func (iterator IBioSequence) MakeIConditionalWorker(predicate obiseq.SequencePredicate, func (iterator IBioSequence) MakeIConditionalWorker(predicate obiseq.SequencePredicate,
worker SeqWorker, sizes ...int) IBioSequence { worker obiseq.SeqWorker, sizes ...int) IBioSequence {
nworkers := 4 nworkers := 4
buffsize := iterator.BufferSize() buffsize := iterator.BufferSize()
@ -114,7 +102,7 @@ func (iterator IBioSequence) MakeIConditionalWorker(predicate obiseq.SequencePre
return newIter return newIter
} }
func (iterator IBioSequence) MakeISliceWorker(worker SeqSliceWorker, sizes ...int) IBioSequence { func (iterator IBioSequence) MakeISliceWorker(worker obiseq.SeqSliceWorker, sizes ...int) IBioSequence {
nworkers := 4 nworkers := 4
buffsize := iterator.BufferSize() buffsize := iterator.BufferSize()
@ -153,7 +141,7 @@ func (iterator IBioSequence) MakeISliceWorker(worker SeqSliceWorker, sizes ...in
return newIter return newIter
} }
func WorkerPipe(worker SeqWorker, sizes ...int) Pipeable { func WorkerPipe(worker obiseq.SeqWorker, sizes ...int) Pipeable {
f := func(iterator IBioSequence) IBioSequence { f := func(iterator IBioSequence) IBioSequence {
return iterator.MakeIWorker(worker, sizes...) return iterator.MakeIWorker(worker, sizes...)
} }
@ -161,7 +149,7 @@ func WorkerPipe(worker SeqWorker, sizes ...int) Pipeable {
return f return f
} }
func SliceWorkerPipe(worker SeqSliceWorker, sizes ...int) Pipeable { func SliceWorkerPipe(worker obiseq.SeqSliceWorker, sizes ...int) Pipeable {
f := func(iterator IBioSequence) IBioSequence { f := func(iterator IBioSequence) IBioSequence {
return iterator.MakeISliceWorker(worker, sizes...) return iterator.MakeISliceWorker(worker, sizes...)
} }
@ -169,10 +157,3 @@ func SliceWorkerPipe(worker SeqSliceWorker, sizes ...int) Pipeable {
return f return f
} }
func ReverseComplementWorker(inplace bool) SeqWorker {
f := func(input *obiseq.BioSequence) *obiseq.BioSequence {
return input.ReverseComplement(inplace)
}
return f
}

View File

@ -1,7 +1,6 @@
package obingslibrary package obingslibrary
import ( import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
) )
@ -170,7 +169,7 @@ func ExtractBarcodeSlice(ngslibrary NGSLibrary,
} }
func ExtractBarcodeSliceWorker(ngslibrary NGSLibrary, func ExtractBarcodeSliceWorker(ngslibrary NGSLibrary,
options ...WithOption) obiiter.SeqSliceWorker { options ...WithOption) obiseq.SeqSliceWorker {
opt := MakeOptions(options) opt := MakeOptions(options)

View File

@ -64,3 +64,12 @@ func (sequence *BioSequence) _revcmpMutation() *BioSequence {
return sequence return sequence
} }
func ReverseComplementWorker(inplace bool) SeqWorker {
f := func(input *BioSequence) *BioSequence {
return input.ReverseComplement(inplace)
}
return f
}

31
pkg/obiseq/worker.go Normal file
View File

@ -0,0 +1,31 @@
package obiseq
type SeqAnnotator func(*BioSequence)
type SeqWorker func(*BioSequence) *BioSequence
type SeqSliceWorker func(BioSequenceSlice) BioSequenceSlice
func AnnotatorToSeqWorker(function SeqAnnotator) SeqWorker {
f := func(seq *BioSequence) *BioSequence {
function(seq)
return seq
}
return f
}
func SeqToSliceWorker(worker SeqWorker, inplace bool) SeqSliceWorker {
f := func(input BioSequenceSlice) BioSequenceSlice {
output := input
if (! inplace) {
output = MakeBioSequenceSlice()
}
for i,s := range(input) {
output[i] = worker(s)
}
return output
}
return f
}

View File

@ -2,12 +2,11 @@ package obitax
import ( import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils" "git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
) )
func (taxonomy *Taxonomy) MakeSetTaxonAtRankWorker(rank string) obiiter.SeqWorker { func (taxonomy *Taxonomy) MakeSetTaxonAtRankWorker(rank string) obiseq.SeqWorker {
if !goutils.Contains(taxonomy.RankList(), rank) { if !goutils.Contains(taxonomy.RankList(), rank) {
log.Fatalf("%s is not a valid rank (allowed ranks are %v)", log.Fatalf("%s is not a valid rank (allowed ranks are %v)",
@ -23,7 +22,7 @@ func (taxonomy *Taxonomy) MakeSetTaxonAtRankWorker(rank string) obiiter.SeqWorke
return w return w
} }
func (taxonomy *Taxonomy) MakeSetSpeciesWorker() obiiter.SeqWorker { func (taxonomy *Taxonomy) MakeSetSpeciesWorker() obiseq.SeqWorker {
w := func(sequence *obiseq.BioSequence) *obiseq.BioSequence { w := func(sequence *obiseq.BioSequence) *obiseq.BioSequence {
taxonomy.SetSpecies(sequence) taxonomy.SetSpecies(sequence)
@ -33,7 +32,7 @@ func (taxonomy *Taxonomy) MakeSetSpeciesWorker() obiiter.SeqWorker {
return w return w
} }
func (taxonomy *Taxonomy) MakeSetGenusWorker() obiiter.SeqWorker { func (taxonomy *Taxonomy) MakeSetGenusWorker() obiseq.SeqWorker {
w := func(sequence *obiseq.BioSequence) *obiseq.BioSequence { w := func(sequence *obiseq.BioSequence) *obiseq.BioSequence {
taxonomy.SetGenus(sequence) taxonomy.SetGenus(sequence)
@ -43,7 +42,7 @@ func (taxonomy *Taxonomy) MakeSetGenusWorker() obiiter.SeqWorker {
return w return w
} }
func (taxonomy *Taxonomy) MakeSetFamilyWorker() obiiter.SeqWorker { func (taxonomy *Taxonomy) MakeSetFamilyWorker() obiseq.SeqWorker {
w := func(sequence *obiseq.BioSequence) *obiseq.BioSequence { w := func(sequence *obiseq.BioSequence) *obiseq.BioSequence {
taxonomy.SetFamily(sequence) taxonomy.SetFamily(sequence)
@ -52,5 +51,3 @@ func (taxonomy *Taxonomy) MakeSetFamilyWorker() obiiter.SeqWorker {
return w return w
} }

View File

@ -0,0 +1,93 @@
package obiannotate
import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obigrep"
"github.com/DavidGamba/go-getoptions"
)
var _addRank = false
var _toBeRenamed = make(map[string]string, 0)
var _toBeDeleted = make([]string, 0)
var _keepOnly = make([]string, 0)
var _taxonAtRank = make([]string, 0)
var _tagList = ""
var _clearAll = false
var _setSeqLength = false
var _uniqueID = false
func SequenceSelectionOptionSet(options *getoptions.GetOpt) {
options.BoolVar(&_addRank, "seq-rank", _addRank,
options.Description("Adds a new attribute named seq_rank to the sequence record indicating its entry number in the sequence file."),
)
options.BoolVar(&_clearAll, "clear", _clearAll,
options.Alias("C"),
options.Description("Clears all attributes associated to the sequence records."),
)
options.BoolVar(&_setSeqLength, "length", _setSeqLength,
options.Description("Adds attribute with seq_length as a key and sequence length as a value."),
)
options.BoolVar(&_uniqueID, "uniq-id", _uniqueID,
options.Description("Forces sequence record ids to be unique."),
)
options.StringMapVar(&_toBeRenamed, "rename-tag", 1, 1,
options.Alias("R"),
options.ArgName("NEW_NAME=OLD_NAME"),
options.Description("Changes attribute name <OLD_NAME> to <NEW_NAME>. When attribute named <OLD_NAME>"+
" is missing, the sequence record is skipped and the next one is examined."))
options.StringSliceVar(&_toBeDeleted, "delete-tag", 1, 1,
options.ArgName("KEY"),
options.Description(" Deletes attribute named <KEY>.When this attribute is missing,"+
" the sequence record is skipped and the next one is examined."))
options.StringSliceVar(&_taxonAtRank, "with-taxon-at-rank", 1, 1,
options.ArgName("RANK_NAME"),
options.Description("Adds taxonomic annotation at taxonomic rank <RANK_NAME>."))
options.StringVar(&_tagList, "tag-list", _tagList,
options.ArgName("FILENAME"),
options.Description("<FILENAME> points to a file containing attribute names"+
" and values to modify for specified sequence records."))
options.StringSliceVar(&_keepOnly, "keep", 1, 1,
options.Alias("k"),
options.ArgName("KEY"),
options.Description("Keeps only attribute with key <KEY>. Several -k options can be combined."))
}
// OptionSet adds to the basic option set every options declared for
// the obipcr command
func OptionSet(options *getoptions.GetOpt) {
obiconvert.OptionSet(options)
obigrep.SequenceSelectionOptionSet(options)
SequenceSelectionOptionSet(options)
}
// -S <KEY>:<PYTHON_EXPRESSION>, --set-tag=<KEY>:<PYTHON_EXPRESSION>
// Creates a new attribute named with a key <KEY> and a value computed from <PYTHON_EXPRESSION>.
// --set-identifier=<PYTHON_EXPRESSION>
// Sets sequence record identifier with a value computed from <PYTHON_EXPRESSION>.
// --run=<PYTHON_EXPRESSION>
// Runs a python expression on each selected sequence.
// --set-sequence=<PYTHON_EXPRESSION>
// Changes the sequence itself with a value computed from <PYTHON_EXPRESSION>.
// -T, --set-definition=<PYTHON_EXPRESSION>
// Sets sequence definition with a value computed from <PYTHON_EXPRESSION>.
// -O, --only-valid-python
// Allows only valid python expressions.
// -m <MCLFILE>, --mcl=<MCLFILE>
// Creates a new attribute containing the number of the cluster the sequence record was assigned to, as indicated in file <MCLFILE>.
// --uniq-id
// Forces sequence record ids to be unique.

View File

@ -165,7 +165,7 @@ func Identify(sequence *obiseq.BioSequence,
func IdentifySeqWorker(references obiseq.BioSequenceSlice, func IdentifySeqWorker(references obiseq.BioSequenceSlice,
refcounts []*obikmer.Table4mer, refcounts []*obikmer.Table4mer,
taxo *obitax.Taxonomy, taxo *obitax.Taxonomy,
runExact bool) obiiter.SeqWorker { runExact bool) obiseq.SeqWorker {
return func(sequence *obiseq.BioSequence) *obiseq.BioSequence { return func(sequence *obiseq.BioSequence) *obiseq.BioSequence {
return Identify(sequence, references, refcounts, taxo, runExact) return Identify(sequence, references, refcounts, taxo, runExact)
} }