From 5753723618b118faab89a361eed07483de2f707b Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Fri, 14 Jan 2022 17:17:54 +0100 Subject: [PATCH] Code refactoring --- pkg/obiapat/pattern.go | 57 ++++++++++++- pkg/obiapat/pcr.go | 181 +++++++++++++++++++++++++++-------------- 2 files changed, 175 insertions(+), 63 deletions(-) diff --git a/pkg/obiapat/pattern.go b/pkg/obiapat/pattern.go index f915c6c..0d02b7f 100644 --- a/pkg/obiapat/pattern.go +++ b/pkg/obiapat/pattern.go @@ -13,19 +13,44 @@ import ( "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" ) -var MAX_PAT_LEN = int(C.MAX_PAT_LEN) +var _MaxPatLen = int(C.MAX_PAT_LEN) +// ApatPattern stores a regular pattern usable by the +// Apat algorithm functions and methods type ApatPattern struct { pointer *C.Pattern } +// ApatSequence stores sequence in structure usable by the +// Apat algorithm functions and methods type ApatSequence struct { pointer *C.Seq } +// NilApatPattern is the nil instance of the BuildAlignArena +// type. var NilApatPattern = ApatPattern{nil} + +// NilApatSequence is the nil instance of the ApatSequence +// type. var NilApatSequence = ApatSequence{nil} +// MakeApatPattern builds a new ApatPattern. +// The created object wrap a C allocated structure. +// Do not forget to free it when it is no more needed +// to forbid memory leaks using the Free methode of the +// ApatPattern. +// The pattern is a short DNA sequence (up to 64 symboles). +// Ambiguities can be represented or using UIPAC symboles, +// or using the [...] classical in regular pattern grammar. +// For example, the ambiguity A/T can be indicated using W +// or [AT]. A nucleotide can be negated by preceding it with +// a '!'. The APAT algorithm allows for error during the +// matching process. The maximum number of tolerated error +// is indicated at the construction of the pattern using +// the errormax parameter. Some positions can be marked as not +// allowed for mismatches. They have to be signaled using a '#' +// sign after the corresponding nucleotide. func MakeApatPattern(pattern string, errormax int) (ApatPattern, error) { cpattern := C.CString(pattern) defer C.free(unsafe.Pointer(cpattern)) @@ -44,6 +69,9 @@ func MakeApatPattern(pattern string, errormax int) (ApatPattern, error) { return ApatPattern{pointer: ap}, nil } +// ReverseComplement method builds a new ApatPattern +// matching the reverse complemented sequence of the original +// pattern. func (pattern ApatPattern) ReverseComplement() (ApatPattern, error) { var errno C.int32_t var errmsg *C.char @@ -58,22 +86,35 @@ func (pattern ApatPattern) ReverseComplement() (ApatPattern, error) { return ApatPattern{pointer: ap}, nil } +// String method casts the ApatPattern to a Go String. func (pattern ApatPattern) String() string { return C.GoString(pattern.pointer.cpat) } +// Length method returns the length of the matched pattern. func (pattern ApatPattern) Length() int { return int(pattern.pointer.patlen) } +// Free method ensure that the C structure wrapped is +// desallocated func (pattern ApatPattern) Free() { C.free(unsafe.Pointer(pattern.pointer)) + pattern.pointer = nil } +// Print method prints the ApatPattern to the standard output. +// This is mainly a debug method. func (pattern ApatPattern) Print() { C.PrintDebugPattern(C.PatternPtr(pattern.pointer)) } +// MakeApatSequence casts an obiseq.BioSequence to an ApatSequence. +// The circular parameter indicates the topology of the sequence. +// if sequence is circular (ciruclar = true), the match can occurs +// at the junction. To limit memory allocation, it is possible to provide +// an already allocated ApatSequence to recycle its allocated memory. +// The provided sequence is no more usable after the call. func MakeApatSequence(sequence obiseq.BioSequence, circular bool, recycle ...ApatSequence) (ApatSequence, error) { var errno C.int32_t var errmsg *C.char @@ -115,10 +156,13 @@ func MakeApatSequence(sequence obiseq.BioSequence, circular bool, recycle ...Apa return seq, nil } +// Length method returns the length of the ApatSequence. func (sequence ApatSequence) Length() int { return int(sequence.pointer.seqlen) } +// Free method ensure that the C structure wrapped is +// desallocated func (sequence ApatSequence) Free() { var errno C.int32_t var errmsg *C.char @@ -129,6 +173,17 @@ func (sequence ApatSequence) Free() { sequence.pointer = nil } +// FindAllIndex methood returns the position of every occurrences of the +// pattern on the provided sequences. The search can be limited +// to a portion of the sequence by adding one or two integer parameters +// when calling the FindAllIndex method. The fisrt optional argument indicates +// the starting point of the search. The first nucleotide of the sequence is +// indexed as 0. The second optional argument indicates the length of the region +// where the pattern is looked for. +// The FindAllIndex methood returns return a slice of [3]int. The two firsts +// values of the [3]int indicate respectively the start and the end position of +// the match. Following the GO convention the end position is not included in the +// match. The third value indicates the number of error detected for this occurrence. func (pattern ApatPattern) FindAllIndex(sequence ApatSequence, limits ...int) (loc [][3]int) { begin := 0 length := sequence.Length() diff --git a/pkg/obiapat/pcr.go b/pkg/obiapat/pcr.go index dd9c865..37eea50 100644 --- a/pkg/obiapat/pcr.go +++ b/pkg/obiapat/pcr.go @@ -5,65 +5,91 @@ import ( "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" ) -type __options__ struct { - min_length int - max_length int - circular bool - forward_error int - reverse_error int - buffer_size int - batch_size int - parallel_workers int +type _Options struct { + minLength int + maxLength int + circular bool + forwardError int + reverseError int + bufferSize int + batchSize int + parallelWorkers int } +// Options stores a set of option usable by the +// PCR simulation algotithm. type Options struct { - pointer *__options__ + pointer *_Options } +// WithOption is the standard type for function +// declaring options. type WithOption func(Options) +// MinLength method returns minimum length of +// the searched amplicon (length of the primers +// excluded) func (options Options) MinLength() int { - return options.pointer.min_length + return options.pointer.minLength } +// MaxLength method returns maximum length of +// the searched amplicon (length of the primers +// excluded) func (options Options) MaxLength() int { - return options.pointer.max_length + return options.pointer.maxLength } +// ForwardError method returns the number of +// error allowed when matching the forward +// primer. func (options Options) ForwardError() int { - return options.pointer.forward_error + return options.pointer.forwardError } +// ReverseError method returns the number of +// error allowed when matching the reverse +// primer. func (options Options) ReverseError() int { - return options.pointer.reverse_error + return options.pointer.reverseError } +// Circular method returns the topology option. +// true for circular, false for linear func (options Options) Circular() bool { return options.pointer.circular } -func (opt Options) BufferSize() int { - return opt.pointer.buffer_size +// BufferSize returns the size of the channel +// buffer specified by the options +func (options Options) BufferSize() int { + return options.pointer.bufferSize } -func (opt Options) BatchSize() int { - return opt.pointer.batch_size +// BatchSize returns the size of the +// sequence batch used by the PCR algorithm +func (options Options) BatchSize() int { + return options.pointer.batchSize } -func (opt Options) ParallelWorkers() int { - return opt.pointer.parallel_workers +// ParallelWorkers returns how many search +// jobs will be run in parallel. +func (options Options) ParallelWorkers() int { + return options.pointer.parallelWorkers } +// MakeOptions buils a new default option set for +// the PCR simulation algoithm. func MakeOptions(setters []WithOption) Options { - o := __options__{ - min_length: 0, - max_length: 0, - forward_error: 0, - reverse_error: 0, - circular: false, - parallel_workers: 4, - batch_size: 100, - buffer_size: 100, + o := _Options{ + minLength: 0, + maxLength: 0, + forwardError: 0, + reverseError: 0, + circular: false, + parallelWorkers: 4, + batchSize: 100, + bufferSize: 100, } opt := Options{&o} @@ -75,38 +101,52 @@ func MakeOptions(setters []WithOption) Options { return opt } -func OptionMinLength(min_length int) WithOption { +// OptionMinLength sets the minimum length of +// the searched amplicon (length of the primers +// excluded) +func OptionMinLength(minLength int) WithOption { f := WithOption(func(opt Options) { - opt.pointer.min_length = min_length + opt.pointer.minLength = minLength }) return f } -func OptionMaxLength(max_length int) WithOption { +// OptionMaxLength sets the maximum length of +// the searched amplicon (length of the primers +// excluded) +func OptionMaxLength(maxLength int) WithOption { f := WithOption(func(opt Options) { - opt.pointer.max_length = max_length + opt.pointer.maxLength = maxLength }) return f } +// OptionForwardError sets the number of +// error allowed when matching the forward +// primer. func OptionForwardError(max int) WithOption { f := WithOption(func(opt Options) { - opt.pointer.forward_error = max + opt.pointer.forwardError = max }) return f } +// OptionReverseError sets the number of +// error allowed when matching the reverse +// primer. func OptionReverseError(max int) WithOption { f := WithOption(func(opt Options) { - opt.pointer.reverse_error = max + opt.pointer.reverseError = max }) return f } +// OptionCircular sets the topology option. +// true for circular, false for linear func OptionCircular(circular bool) WithOption { f := WithOption(func(opt Options) { opt.pointer.circular = circular @@ -115,55 +155,61 @@ func OptionCircular(circular bool) WithOption { return f } +// OptionBufferSize sets the requested channel +// buffer size. func OptionBufferSize(size int) WithOption { f := WithOption(func(opt Options) { - opt.pointer.buffer_size = size + opt.pointer.bufferSize = size }) return f } +// OptionParallelWorkers sets how many search +// jobs will be run in parallel. func OptionParallelWorkers(nworkers int) WithOption { f := WithOption(func(opt Options) { - opt.pointer.parallel_workers = nworkers + opt.pointer.parallelWorkers = nworkers }) return f } +// OptionBatchSize sets the requested sequence +// batch size. func OptionBatchSize(size int) WithOption { f := WithOption(func(opt Options) { - opt.pointer.batch_size = size + opt.pointer.batchSize = size }) return f } -func __pcr__(seq ApatSequence, sequence obiseq.BioSequence, +func _Pcr(seq ApatSequence, sequence obiseq.BioSequence, forward, cfwd, reverse, crev ApatPattern, opt Options) obiseq.BioSequenceSlice { results := make(obiseq.BioSequenceSlice, 0, 10) - forward_matches := forward.FindAllIndex(seq) + forwardMatches := forward.FindAllIndex(seq) - if forward_matches != nil { + if forwardMatches != nil { - begin := forward_matches[0][0] + begin := forwardMatches[0][0] length := seq.Length() - begin - if opt.pointer.max_length > 0 { - length = forward_matches[len(forward_matches)-1][2] - begin + opt.MaxLength() + reverse.Length() + if opt.pointer.maxLength > 0 { + length = forwardMatches[len(forwardMatches)-1][2] - begin + opt.MaxLength() + reverse.Length() } if opt.Circular() { begin = 0 - length = seq.Length() + MAX_PAT_LEN + length = seq.Length() + _MaxPatLen } - reverse_matches := crev.FindAllIndex(seq, begin, length) + reverseMatches := crev.FindAllIndex(seq, begin, length) - if reverse_matches != nil { - for _, fm := range forward_matches { + if reverseMatches != nil { + for _, fm := range forwardMatches { posi := fm[0] @@ -171,7 +217,7 @@ func __pcr__(seq ApatSequence, sequence obiseq.BioSequence, erri := fm[2] - for _, rm := range reverse_matches { + for _, rm := range reverseMatches { posj := rm[0] if posj < seq.Length() { posj := rm[1] @@ -215,26 +261,26 @@ func __pcr__(seq ApatSequence, sequence obiseq.BioSequence, } } - forward_matches = reverse.FindAllIndex(seq) + forwardMatches = reverse.FindAllIndex(seq) - if forward_matches != nil { + if forwardMatches != nil { - begin := forward_matches[0][0] + begin := forwardMatches[0][0] length := seq.Length() - begin - if opt.pointer.max_length > 0 { - length = forward_matches[len(forward_matches)-1][2] - begin + opt.MaxLength() + reverse.Length() + if opt.pointer.maxLength > 0 { + length = forwardMatches[len(forwardMatches)-1][2] - begin + opt.MaxLength() + reverse.Length() } if opt.Circular() { begin = 0 - length = seq.Length() + MAX_PAT_LEN + length = seq.Length() + _MaxPatLen } - reverse_matches := cfwd.FindAllIndex(seq, begin, length) + reverseMatches := cfwd.FindAllIndex(seq, begin, length) - if reverse_matches != nil { - for _, fm := range forward_matches { + if reverseMatches != nil { + for _, fm := range forwardMatches { posi := fm[0] @@ -242,7 +288,7 @@ func __pcr__(seq ApatSequence, sequence obiseq.BioSequence, erri := fm[2] - for _, rm := range reverse_matches { + for _, rm := range reverseMatches { posj := rm[0] if posj < seq.Length() { posj := rm[1] @@ -290,6 +336,10 @@ func __pcr__(seq ApatSequence, sequence obiseq.BioSequence, return results } +// PCR runs the PCR simulation algorithm on a single +// obiseq.BioSequence instance. PCR parameters are +// specified using the corresponding Option functions +// defined for the PCR algorithm. func PCR(sequence obiseq.BioSequence, forward, reverse string, options ...WithOption) obiseq.BioSequenceSlice { @@ -302,7 +352,7 @@ func PCR(sequence obiseq.BioSequence, cfwd, _ := fwd.ReverseComplement() crev, _ := rev.ReverseComplement() - results := __pcr__(seq, sequence, + results := _Pcr(seq, sequence, fwd, cfwd, rev, crev, opt) @@ -316,6 +366,11 @@ func PCR(sequence obiseq.BioSequence, return results } +// PCRSlice runs the PCR simulation algorithm on a set of +// obiseq.BioSequence instances grouped in a obiseq.BioSequenceSlice. +// PCR parameters are +// specified using the corresponding Option functions +// defined for the PCR algorithm. func PCRSlice(sequences obiseq.BioSequenceSlice, forward, reverse string, options ...WithOption) obiseq.BioSequenceSlice { @@ -330,7 +385,7 @@ func PCRSlice(sequences obiseq.BioSequenceSlice, if len(sequences) > 0 { seq, _ := MakeApatSequence(sequences[0], opt.Circular()) - amplicons := __pcr__(seq, sequences[0], + amplicons := _Pcr(seq, sequences[0], fwd, cfwd, rev, crev, opt) @@ -340,7 +395,7 @@ func PCRSlice(sequences obiseq.BioSequenceSlice, for _, sequence := range sequences[1:] { seq, _ := MakeApatSequence(sequence, opt.Circular(), seq) - amplicons = __pcr__(seq, sequence, + amplicons = _Pcr(seq, sequence, fwd, cfwd, rev, crev, opt) if len(amplicons) > 0 { @@ -359,6 +414,8 @@ func PCRSlice(sequences obiseq.BioSequenceSlice, return results } +// PCRSliceWorker is a worker function builder which produce +// job function usable by the obiseq.MakeISliceWorker function. func PCRSliceWorker(forward, reverse string, options ...WithOption) obiseq.SeqSliceWorker {