Code refactoring

This commit is contained in:
2022-01-14 17:17:54 +01:00
parent ff40222902
commit 5753723618
2 changed files with 175 additions and 63 deletions

View File

@ -13,19 +13,44 @@ import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
) )
var MAX_PAT_LEN = int(C.MAX_PAT_LEN) var _MaxPatLen = int(C.MAX_PAT_LEN)
// ApatPattern stores a regular pattern usable by the
// Apat algorithm functions and methods
type ApatPattern struct { type ApatPattern struct {
pointer *C.Pattern pointer *C.Pattern
} }
// ApatSequence stores sequence in structure usable by the
// Apat algorithm functions and methods
type ApatSequence struct { type ApatSequence struct {
pointer *C.Seq pointer *C.Seq
} }
// NilApatPattern is the nil instance of the BuildAlignArena
// type.
var NilApatPattern = ApatPattern{nil} var NilApatPattern = ApatPattern{nil}
// NilApatSequence is the nil instance of the ApatSequence
// type.
var NilApatSequence = ApatSequence{nil} var NilApatSequence = ApatSequence{nil}
// MakeApatPattern builds a new ApatPattern.
// The created object wrap a C allocated structure.
// Do not forget to free it when it is no more needed
// to forbid memory leaks using the Free methode of the
// ApatPattern.
// The pattern is a short DNA sequence (up to 64 symboles).
// Ambiguities can be represented or using UIPAC symboles,
// or using the [...] classical in regular pattern grammar.
// For example, the ambiguity A/T can be indicated using W
// or [AT]. A nucleotide can be negated by preceding it with
// a '!'. The APAT algorithm allows for error during the
// matching process. The maximum number of tolerated error
// is indicated at the construction of the pattern using
// the errormax parameter. Some positions can be marked as not
// allowed for mismatches. They have to be signaled using a '#'
// sign after the corresponding nucleotide.
func MakeApatPattern(pattern string, errormax int) (ApatPattern, error) { func MakeApatPattern(pattern string, errormax int) (ApatPattern, error) {
cpattern := C.CString(pattern) cpattern := C.CString(pattern)
defer C.free(unsafe.Pointer(cpattern)) defer C.free(unsafe.Pointer(cpattern))
@ -44,6 +69,9 @@ func MakeApatPattern(pattern string, errormax int) (ApatPattern, error) {
return ApatPattern{pointer: ap}, nil return ApatPattern{pointer: ap}, nil
} }
// ReverseComplement method builds a new ApatPattern
// matching the reverse complemented sequence of the original
// pattern.
func (pattern ApatPattern) ReverseComplement() (ApatPattern, error) { func (pattern ApatPattern) ReverseComplement() (ApatPattern, error) {
var errno C.int32_t var errno C.int32_t
var errmsg *C.char var errmsg *C.char
@ -58,22 +86,35 @@ func (pattern ApatPattern) ReverseComplement() (ApatPattern, error) {
return ApatPattern{pointer: ap}, nil return ApatPattern{pointer: ap}, nil
} }
// String method casts the ApatPattern to a Go String.
func (pattern ApatPattern) String() string { func (pattern ApatPattern) String() string {
return C.GoString(pattern.pointer.cpat) return C.GoString(pattern.pointer.cpat)
} }
// Length method returns the length of the matched pattern.
func (pattern ApatPattern) Length() int { func (pattern ApatPattern) Length() int {
return int(pattern.pointer.patlen) return int(pattern.pointer.patlen)
} }
// Free method ensure that the C structure wrapped is
// desallocated
func (pattern ApatPattern) Free() { func (pattern ApatPattern) Free() {
C.free(unsafe.Pointer(pattern.pointer)) C.free(unsafe.Pointer(pattern.pointer))
pattern.pointer = nil
} }
// Print method prints the ApatPattern to the standard output.
// This is mainly a debug method.
func (pattern ApatPattern) Print() { func (pattern ApatPattern) Print() {
C.PrintDebugPattern(C.PatternPtr(pattern.pointer)) C.PrintDebugPattern(C.PatternPtr(pattern.pointer))
} }
// MakeApatSequence casts an obiseq.BioSequence to an ApatSequence.
// The circular parameter indicates the topology of the sequence.
// if sequence is circular (ciruclar = true), the match can occurs
// at the junction. To limit memory allocation, it is possible to provide
// an already allocated ApatSequence to recycle its allocated memory.
// The provided sequence is no more usable after the call.
func MakeApatSequence(sequence obiseq.BioSequence, circular bool, recycle ...ApatSequence) (ApatSequence, error) { func MakeApatSequence(sequence obiseq.BioSequence, circular bool, recycle ...ApatSequence) (ApatSequence, error) {
var errno C.int32_t var errno C.int32_t
var errmsg *C.char var errmsg *C.char
@ -115,10 +156,13 @@ func MakeApatSequence(sequence obiseq.BioSequence, circular bool, recycle ...Apa
return seq, nil return seq, nil
} }
// Length method returns the length of the ApatSequence.
func (sequence ApatSequence) Length() int { func (sequence ApatSequence) Length() int {
return int(sequence.pointer.seqlen) return int(sequence.pointer.seqlen)
} }
// Free method ensure that the C structure wrapped is
// desallocated
func (sequence ApatSequence) Free() { func (sequence ApatSequence) Free() {
var errno C.int32_t var errno C.int32_t
var errmsg *C.char var errmsg *C.char
@ -129,6 +173,17 @@ func (sequence ApatSequence) Free() {
sequence.pointer = nil sequence.pointer = nil
} }
// FindAllIndex methood returns the position of every occurrences of the
// pattern on the provided sequences. The search can be limited
// to a portion of the sequence by adding one or two integer parameters
// when calling the FindAllIndex method. The fisrt optional argument indicates
// the starting point of the search. The first nucleotide of the sequence is
// indexed as 0. The second optional argument indicates the length of the region
// where the pattern is looked for.
// The FindAllIndex methood returns return a slice of [3]int. The two firsts
// values of the [3]int indicate respectively the start and the end position of
// the match. Following the GO convention the end position is not included in the
// match. The third value indicates the number of error detected for this occurrence.
func (pattern ApatPattern) FindAllIndex(sequence ApatSequence, limits ...int) (loc [][3]int) { func (pattern ApatPattern) FindAllIndex(sequence ApatSequence, limits ...int) (loc [][3]int) {
begin := 0 begin := 0
length := sequence.Length() length := sequence.Length()

View File

@ -5,65 +5,91 @@ import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
) )
type __options__ struct { type _Options struct {
min_length int minLength int
max_length int maxLength int
circular bool circular bool
forward_error int forwardError int
reverse_error int reverseError int
buffer_size int bufferSize int
batch_size int batchSize int
parallel_workers int parallelWorkers int
} }
// Options stores a set of option usable by the
// PCR simulation algotithm.
type Options struct { type Options struct {
pointer *__options__ pointer *_Options
} }
// WithOption is the standard type for function
// declaring options.
type WithOption func(Options) type WithOption func(Options)
// MinLength method returns minimum length of
// the searched amplicon (length of the primers
// excluded)
func (options Options) MinLength() int { func (options Options) MinLength() int {
return options.pointer.min_length return options.pointer.minLength
} }
// MaxLength method returns maximum length of
// the searched amplicon (length of the primers
// excluded)
func (options Options) MaxLength() int { func (options Options) MaxLength() int {
return options.pointer.max_length return options.pointer.maxLength
} }
// ForwardError method returns the number of
// error allowed when matching the forward
// primer.
func (options Options) ForwardError() int { func (options Options) ForwardError() int {
return options.pointer.forward_error return options.pointer.forwardError
} }
// ReverseError method returns the number of
// error allowed when matching the reverse
// primer.
func (options Options) ReverseError() int { func (options Options) ReverseError() int {
return options.pointer.reverse_error return options.pointer.reverseError
} }
// Circular method returns the topology option.
// true for circular, false for linear
func (options Options) Circular() bool { func (options Options) Circular() bool {
return options.pointer.circular return options.pointer.circular
} }
func (opt Options) BufferSize() int { // BufferSize returns the size of the channel
return opt.pointer.buffer_size // buffer specified by the options
func (options Options) BufferSize() int {
return options.pointer.bufferSize
} }
func (opt Options) BatchSize() int { // BatchSize returns the size of the
return opt.pointer.batch_size // sequence batch used by the PCR algorithm
func (options Options) BatchSize() int {
return options.pointer.batchSize
} }
func (opt Options) ParallelWorkers() int { // ParallelWorkers returns how many search
return opt.pointer.parallel_workers // jobs will be run in parallel.
func (options Options) ParallelWorkers() int {
return options.pointer.parallelWorkers
} }
// MakeOptions buils a new default option set for
// the PCR simulation algoithm.
func MakeOptions(setters []WithOption) Options { func MakeOptions(setters []WithOption) Options {
o := __options__{ o := _Options{
min_length: 0, minLength: 0,
max_length: 0, maxLength: 0,
forward_error: 0, forwardError: 0,
reverse_error: 0, reverseError: 0,
circular: false, circular: false,
parallel_workers: 4, parallelWorkers: 4,
batch_size: 100, batchSize: 100,
buffer_size: 100, bufferSize: 100,
} }
opt := Options{&o} opt := Options{&o}
@ -75,38 +101,52 @@ func MakeOptions(setters []WithOption) Options {
return opt return opt
} }
func OptionMinLength(min_length int) WithOption { // OptionMinLength sets the minimum length of
// the searched amplicon (length of the primers
// excluded)
func OptionMinLength(minLength int) WithOption {
f := WithOption(func(opt Options) { f := WithOption(func(opt Options) {
opt.pointer.min_length = min_length opt.pointer.minLength = minLength
}) })
return f return f
} }
func OptionMaxLength(max_length int) WithOption { // OptionMaxLength sets the maximum length of
// the searched amplicon (length of the primers
// excluded)
func OptionMaxLength(maxLength int) WithOption {
f := WithOption(func(opt Options) { f := WithOption(func(opt Options) {
opt.pointer.max_length = max_length opt.pointer.maxLength = maxLength
}) })
return f return f
} }
// OptionForwardError sets the number of
// error allowed when matching the forward
// primer.
func OptionForwardError(max int) WithOption { func OptionForwardError(max int) WithOption {
f := WithOption(func(opt Options) { f := WithOption(func(opt Options) {
opt.pointer.forward_error = max opt.pointer.forwardError = max
}) })
return f return f
} }
// OptionReverseError sets the number of
// error allowed when matching the reverse
// primer.
func OptionReverseError(max int) WithOption { func OptionReverseError(max int) WithOption {
f := WithOption(func(opt Options) { f := WithOption(func(opt Options) {
opt.pointer.reverse_error = max opt.pointer.reverseError = max
}) })
return f return f
} }
// OptionCircular sets the topology option.
// true for circular, false for linear
func OptionCircular(circular bool) WithOption { func OptionCircular(circular bool) WithOption {
f := WithOption(func(opt Options) { f := WithOption(func(opt Options) {
opt.pointer.circular = circular opt.pointer.circular = circular
@ -115,55 +155,61 @@ func OptionCircular(circular bool) WithOption {
return f return f
} }
// OptionBufferSize sets the requested channel
// buffer size.
func OptionBufferSize(size int) WithOption { func OptionBufferSize(size int) WithOption {
f := WithOption(func(opt Options) { f := WithOption(func(opt Options) {
opt.pointer.buffer_size = size opt.pointer.bufferSize = size
}) })
return f return f
} }
// OptionParallelWorkers sets how many search
// jobs will be run in parallel.
func OptionParallelWorkers(nworkers int) WithOption { func OptionParallelWorkers(nworkers int) WithOption {
f := WithOption(func(opt Options) { f := WithOption(func(opt Options) {
opt.pointer.parallel_workers = nworkers opt.pointer.parallelWorkers = nworkers
}) })
return f return f
} }
// OptionBatchSize sets the requested sequence
// batch size.
func OptionBatchSize(size int) WithOption { func OptionBatchSize(size int) WithOption {
f := WithOption(func(opt Options) { f := WithOption(func(opt Options) {
opt.pointer.batch_size = size opt.pointer.batchSize = size
}) })
return f return f
} }
func __pcr__(seq ApatSequence, sequence obiseq.BioSequence, func _Pcr(seq ApatSequence, sequence obiseq.BioSequence,
forward, cfwd, reverse, crev ApatPattern, forward, cfwd, reverse, crev ApatPattern,
opt Options) obiseq.BioSequenceSlice { opt Options) obiseq.BioSequenceSlice {
results := make(obiseq.BioSequenceSlice, 0, 10) results := make(obiseq.BioSequenceSlice, 0, 10)
forward_matches := forward.FindAllIndex(seq) forwardMatches := forward.FindAllIndex(seq)
if forward_matches != nil { if forwardMatches != nil {
begin := forward_matches[0][0] begin := forwardMatches[0][0]
length := seq.Length() - begin length := seq.Length() - begin
if opt.pointer.max_length > 0 { if opt.pointer.maxLength > 0 {
length = forward_matches[len(forward_matches)-1][2] - begin + opt.MaxLength() + reverse.Length() length = forwardMatches[len(forwardMatches)-1][2] - begin + opt.MaxLength() + reverse.Length()
} }
if opt.Circular() { if opt.Circular() {
begin = 0 begin = 0
length = seq.Length() + MAX_PAT_LEN length = seq.Length() + _MaxPatLen
} }
reverse_matches := crev.FindAllIndex(seq, begin, length) reverseMatches := crev.FindAllIndex(seq, begin, length)
if reverse_matches != nil { if reverseMatches != nil {
for _, fm := range forward_matches { for _, fm := range forwardMatches {
posi := fm[0] posi := fm[0]
@ -171,7 +217,7 @@ func __pcr__(seq ApatSequence, sequence obiseq.BioSequence,
erri := fm[2] erri := fm[2]
for _, rm := range reverse_matches { for _, rm := range reverseMatches {
posj := rm[0] posj := rm[0]
if posj < seq.Length() { if posj < seq.Length() {
posj := rm[1] posj := rm[1]
@ -215,26 +261,26 @@ func __pcr__(seq ApatSequence, sequence obiseq.BioSequence,
} }
} }
forward_matches = reverse.FindAllIndex(seq) forwardMatches = reverse.FindAllIndex(seq)
if forward_matches != nil { if forwardMatches != nil {
begin := forward_matches[0][0] begin := forwardMatches[0][0]
length := seq.Length() - begin length := seq.Length() - begin
if opt.pointer.max_length > 0 { if opt.pointer.maxLength > 0 {
length = forward_matches[len(forward_matches)-1][2] - begin + opt.MaxLength() + reverse.Length() length = forwardMatches[len(forwardMatches)-1][2] - begin + opt.MaxLength() + reverse.Length()
} }
if opt.Circular() { if opt.Circular() {
begin = 0 begin = 0
length = seq.Length() + MAX_PAT_LEN length = seq.Length() + _MaxPatLen
} }
reverse_matches := cfwd.FindAllIndex(seq, begin, length) reverseMatches := cfwd.FindAllIndex(seq, begin, length)
if reverse_matches != nil { if reverseMatches != nil {
for _, fm := range forward_matches { for _, fm := range forwardMatches {
posi := fm[0] posi := fm[0]
@ -242,7 +288,7 @@ func __pcr__(seq ApatSequence, sequence obiseq.BioSequence,
erri := fm[2] erri := fm[2]
for _, rm := range reverse_matches { for _, rm := range reverseMatches {
posj := rm[0] posj := rm[0]
if posj < seq.Length() { if posj < seq.Length() {
posj := rm[1] posj := rm[1]
@ -290,6 +336,10 @@ func __pcr__(seq ApatSequence, sequence obiseq.BioSequence,
return results return results
} }
// PCR runs the PCR simulation algorithm on a single
// obiseq.BioSequence instance. PCR parameters are
// specified using the corresponding Option functions
// defined for the PCR algorithm.
func PCR(sequence obiseq.BioSequence, func PCR(sequence obiseq.BioSequence,
forward, reverse string, options ...WithOption) obiseq.BioSequenceSlice { forward, reverse string, options ...WithOption) obiseq.BioSequenceSlice {
@ -302,7 +352,7 @@ func PCR(sequence obiseq.BioSequence,
cfwd, _ := fwd.ReverseComplement() cfwd, _ := fwd.ReverseComplement()
crev, _ := rev.ReverseComplement() crev, _ := rev.ReverseComplement()
results := __pcr__(seq, sequence, results := _Pcr(seq, sequence,
fwd, cfwd, rev, crev, fwd, cfwd, rev, crev,
opt) opt)
@ -316,6 +366,11 @@ func PCR(sequence obiseq.BioSequence,
return results return results
} }
// PCRSlice runs the PCR simulation algorithm on a set of
// obiseq.BioSequence instances grouped in a obiseq.BioSequenceSlice.
// PCR parameters are
// specified using the corresponding Option functions
// defined for the PCR algorithm.
func PCRSlice(sequences obiseq.BioSequenceSlice, func PCRSlice(sequences obiseq.BioSequenceSlice,
forward, reverse string, options ...WithOption) obiseq.BioSequenceSlice { forward, reverse string, options ...WithOption) obiseq.BioSequenceSlice {
@ -330,7 +385,7 @@ func PCRSlice(sequences obiseq.BioSequenceSlice,
if len(sequences) > 0 { if len(sequences) > 0 {
seq, _ := MakeApatSequence(sequences[0], opt.Circular()) seq, _ := MakeApatSequence(sequences[0], opt.Circular())
amplicons := __pcr__(seq, sequences[0], amplicons := _Pcr(seq, sequences[0],
fwd, cfwd, rev, crev, fwd, cfwd, rev, crev,
opt) opt)
@ -340,7 +395,7 @@ func PCRSlice(sequences obiseq.BioSequenceSlice,
for _, sequence := range sequences[1:] { for _, sequence := range sequences[1:] {
seq, _ := MakeApatSequence(sequence, opt.Circular(), seq) seq, _ := MakeApatSequence(sequence, opt.Circular(), seq)
amplicons = __pcr__(seq, sequence, amplicons = _Pcr(seq, sequence,
fwd, cfwd, rev, crev, fwd, cfwd, rev, crev,
opt) opt)
if len(amplicons) > 0 { if len(amplicons) > 0 {
@ -359,6 +414,8 @@ func PCRSlice(sequences obiseq.BioSequenceSlice,
return results return results
} }
// PCRSliceWorker is a worker function builder which produce
// job function usable by the obiseq.MakeISliceWorker function.
func PCRSliceWorker(forward, reverse string, func PCRSliceWorker(forward, reverse string,
options ...WithOption) obiseq.SeqSliceWorker { options ...WithOption) obiseq.SeqSliceWorker {