Files
obitools4/pkg/obiapat/pcr.go

441 lines
11 KiB
Go
Raw Normal View History

2022-01-13 23:27:39 +01:00
package obiapat
import (
"log"
2022-01-13 23:43:01 +01:00
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
2022-01-13 23:27:39 +01:00
)
2022-01-14 17:17:54 +01:00
type _Options struct {
minLength int
maxLength int
circular bool
forwardError int
reverseError int
bufferSize int
batchSize int
parallelWorkers int
forward ApatPattern
cfwd ApatPattern
reverse ApatPattern
crev ApatPattern
2022-01-13 23:27:39 +01:00
}
2022-01-14 17:17:54 +01:00
// Options stores a set of option usable by the
// PCR simulation algotithm.
2022-01-13 23:27:39 +01:00
type Options struct {
2022-01-14 17:17:54 +01:00
pointer *_Options
2022-01-13 23:27:39 +01:00
}
2022-01-14 17:17:54 +01:00
// WithOption is the standard type for function
// declaring options.
2022-01-13 23:27:39 +01:00
type WithOption func(Options)
2022-01-14 17:17:54 +01:00
// MinLength method returns minimum length of
// the searched amplicon (length of the primers
// excluded)
2022-01-13 23:27:39 +01:00
func (options Options) MinLength() int {
2022-01-14 17:17:54 +01:00
return options.pointer.minLength
2022-01-13 23:27:39 +01:00
}
2022-01-14 17:17:54 +01:00
// MaxLength method returns maximum length of
// the searched amplicon (length of the primers
// excluded)
2022-01-13 23:27:39 +01:00
func (options Options) MaxLength() int {
2022-01-14 17:17:54 +01:00
return options.pointer.maxLength
2022-01-13 23:27:39 +01:00
}
2022-01-14 17:17:54 +01:00
// ForwardError method returns the number of
// error allowed when matching the forward
// primer.
2022-01-13 23:27:39 +01:00
func (options Options) ForwardError() int {
2022-01-14 17:17:54 +01:00
return options.pointer.forwardError
2022-01-13 23:27:39 +01:00
}
2022-01-14 17:17:54 +01:00
// ReverseError method returns the number of
// error allowed when matching the reverse
// primer.
2022-01-13 23:27:39 +01:00
func (options Options) ReverseError() int {
2022-01-14 17:17:54 +01:00
return options.pointer.reverseError
2022-01-13 23:27:39 +01:00
}
2022-01-14 17:17:54 +01:00
// Circular method returns the topology option.
// true for circular, false for linear
2022-01-13 23:27:39 +01:00
func (options Options) Circular() bool {
return options.pointer.circular
}
2022-01-14 17:17:54 +01:00
// BufferSize returns the size of the channel
// buffer specified by the options
func (options Options) BufferSize() int {
return options.pointer.bufferSize
2022-01-13 23:27:39 +01:00
}
2022-01-14 17:17:54 +01:00
// BatchSize returns the size of the
// sequence batch used by the PCR algorithm
func (options Options) BatchSize() int {
return options.pointer.batchSize
2022-01-13 23:27:39 +01:00
}
2022-01-14 17:17:54 +01:00
// ParallelWorkers returns how many search
// jobs will be run in parallel.
func (options Options) ParallelWorkers() int {
return options.pointer.parallelWorkers
2022-01-13 23:27:39 +01:00
}
2022-01-14 17:17:54 +01:00
// MakeOptions buils a new default option set for
// the PCR simulation algoithm.
2022-01-13 23:27:39 +01:00
func MakeOptions(setters []WithOption) Options {
2022-01-14 17:17:54 +01:00
o := _Options{
minLength: 0,
maxLength: 0,
forwardError: 0,
reverseError: 0,
circular: false,
parallelWorkers: 4,
batchSize: 100,
bufferSize: 100,
forward: NilApatPattern,
cfwd: NilApatPattern,
reverse: NilApatPattern,
crev: NilApatPattern,
2022-01-13 23:27:39 +01:00
}
opt := Options{&o}
for _, set := range setters {
set(opt)
}
return opt
}
2022-01-14 17:17:54 +01:00
// OptionMinLength sets the minimum length of
// the searched amplicon (length of the primers
// excluded)
func OptionMinLength(minLength int) WithOption {
2022-01-13 23:27:39 +01:00
f := WithOption(func(opt Options) {
2022-01-14 17:17:54 +01:00
opt.pointer.minLength = minLength
2022-01-13 23:27:39 +01:00
})
return f
}
2022-01-14 17:17:54 +01:00
// OptionMaxLength sets the maximum length of
// the searched amplicon (length of the primers
// excluded)
func OptionMaxLength(maxLength int) WithOption {
2022-01-13 23:27:39 +01:00
f := WithOption(func(opt Options) {
2022-01-14 17:17:54 +01:00
opt.pointer.maxLength = maxLength
2022-01-13 23:27:39 +01:00
})
return f
}
2022-01-14 17:17:54 +01:00
// OptionForwardError sets the number of
// error allowed when matching the forward
// primer.
func OptionForwardPrimer(primer string, max int) WithOption {
2022-01-13 23:27:39 +01:00
f := WithOption(func(opt Options) {
var err error
opt.pointer.forward, err = MakeApatPattern(primer, max)
if err != nil {
log.Fatalf("error : %v\n", err)
}
opt.pointer.cfwd, err = opt.pointer.forward.ReverseComplement()
if err != nil {
log.Fatalf("error : %v\n", err)
}
2022-01-14 17:17:54 +01:00
opt.pointer.forwardError = max
2022-01-13 23:27:39 +01:00
})
return f
}
// OptionForwardError sets the number of
// error allowed when matching the forward
2022-01-14 17:17:54 +01:00
// primer.
func OptionReversePrimer(primer string, max int) WithOption {
2022-01-13 23:27:39 +01:00
f := WithOption(func(opt Options) {
var err error
opt.pointer.reverse, err = MakeApatPattern(primer, max)
if err != nil {
log.Fatalf("error : %v\n", err)
}
opt.pointer.crev, err = opt.pointer.reverse.ReverseComplement()
if err != nil {
log.Fatalf("error : %v\n", err)
}
2022-01-14 17:17:54 +01:00
opt.pointer.reverseError = max
2022-01-13 23:27:39 +01:00
})
return f
}
2022-01-14 17:17:54 +01:00
// OptionCircular sets the topology option.
// true for circular, false for linear
2022-01-13 23:27:39 +01:00
func OptionCircular(circular bool) WithOption {
f := WithOption(func(opt Options) {
opt.pointer.circular = circular
})
return f
}
2022-01-14 17:17:54 +01:00
// OptionBufferSize sets the requested channel
// buffer size.
2022-01-13 23:27:39 +01:00
func OptionBufferSize(size int) WithOption {
f := WithOption(func(opt Options) {
2022-01-14 17:17:54 +01:00
opt.pointer.bufferSize = size
2022-01-13 23:27:39 +01:00
})
return f
}
2022-01-14 17:17:54 +01:00
// OptionParallelWorkers sets how many search
// jobs will be run in parallel.
2022-01-13 23:27:39 +01:00
func OptionParallelWorkers(nworkers int) WithOption {
f := WithOption(func(opt Options) {
2022-01-14 17:17:54 +01:00
opt.pointer.parallelWorkers = nworkers
2022-01-13 23:27:39 +01:00
})
return f
}
2022-01-14 17:17:54 +01:00
// OptionBatchSize sets the requested sequence
// batch size.
2022-01-13 23:27:39 +01:00
func OptionBatchSize(size int) WithOption {
f := WithOption(func(opt Options) {
2022-01-14 17:17:54 +01:00
opt.pointer.batchSize = size
2022-01-13 23:27:39 +01:00
})
return f
}
func _Pcr(seq ApatSequence,
sequence obiseq.BioSequence,
2022-01-13 23:27:39 +01:00
opt Options) obiseq.BioSequenceSlice {
results := make(obiseq.BioSequenceSlice, 0, 10)
forward := opt.pointer.forward
cfwd := opt.pointer.cfwd
reverse := opt.pointer.reverse
crev := opt.pointer.crev
2022-01-14 17:17:54 +01:00
forwardMatches := forward.FindAllIndex(seq)
2022-01-13 23:27:39 +01:00
if len(forwardMatches) > 0 {
2022-01-13 23:27:39 +01:00
2022-01-14 17:17:54 +01:00
begin := forwardMatches[0][0]
2022-01-13 23:27:39 +01:00
length := seq.Length() - begin
2022-01-14 17:17:54 +01:00
if opt.pointer.maxLength > 0 {
length = forwardMatches[len(forwardMatches)-1][2] - begin + opt.MaxLength() + reverse.Length()
2022-01-13 23:27:39 +01:00
}
if opt.Circular() {
begin = 0
2022-01-14 17:17:54 +01:00
length = seq.Length() + _MaxPatLen
2022-01-13 23:27:39 +01:00
}
2022-01-14 17:17:54 +01:00
reverseMatches := crev.FindAllIndex(seq, begin, length)
2022-01-13 23:27:39 +01:00
2022-01-14 17:17:54 +01:00
if reverseMatches != nil {
for _, fm := range forwardMatches {
2022-01-13 23:27:39 +01:00
posi := fm[0]
if posi < seq.Length() {
erri := fm[2]
2022-01-14 17:17:54 +01:00
for _, rm := range reverseMatches {
2022-01-13 23:27:39 +01:00
posj := rm[0]
if posj < seq.Length() {
posj := rm[1]
errj := rm[2]
length = 0
if posj > posi {
length = rm[0] - fm[1]
} else {
if opt.Circular() {
length = rm[0] + seq.Length() - posi - forward.Length()
}
}
if length > 0 && // For when primers touch or overlap
(opt.MinLength() == 0 || length >= opt.MinLength()) &&
(opt.MaxLength() == 0 || length <= opt.MaxLength()) {
amplicon, _ := sequence.Subsequence(fm[1], rm[0], opt.pointer.circular)
annot := amplicon.Annotations()
goutils.CopyMap(annot, sequence.Annotations())
annot["forward_primer"] = forward.String()
match, _ := sequence.Subsequence(fm[0], fm[1], opt.pointer.circular)
annot["forward_match"] = match.String()
(&match).Recycle()
2022-01-13 23:27:39 +01:00
annot["forward_error"] = erri
annot["reverse_primer"] = reverse.String()
match, _ = sequence.Subsequence(rm[0], rm[1], opt.pointer.circular)
match = match.ReverseComplement(true)
annot["reverse_match"] = match.String()
(&match).Recycle()
2022-01-13 23:27:39 +01:00
annot["reverse_error"] = errj
results = append(results, amplicon)
}
}
}
}
}
}
}
2022-01-14 17:17:54 +01:00
forwardMatches = reverse.FindAllIndex(seq)
if forwardMatches != nil {
2022-01-13 23:27:39 +01:00
2022-01-14 17:17:54 +01:00
begin := forwardMatches[0][0]
2022-01-13 23:27:39 +01:00
length := seq.Length() - begin
2022-01-14 17:17:54 +01:00
if opt.pointer.maxLength > 0 {
length = forwardMatches[len(forwardMatches)-1][2] - begin + opt.MaxLength() + reverse.Length()
2022-01-13 23:27:39 +01:00
}
if opt.Circular() {
begin = 0
2022-01-14 17:17:54 +01:00
length = seq.Length() + _MaxPatLen
2022-01-13 23:27:39 +01:00
}
2022-01-14 17:17:54 +01:00
reverseMatches := cfwd.FindAllIndex(seq, begin, length)
2022-01-13 23:27:39 +01:00
2022-01-14 17:17:54 +01:00
if reverseMatches != nil {
for _, fm := range forwardMatches {
2022-01-13 23:27:39 +01:00
posi := fm[0]
if posi < seq.Length() {
erri := fm[2]
2022-01-14 17:17:54 +01:00
for _, rm := range reverseMatches {
2022-01-13 23:27:39 +01:00
posj := rm[0]
if posj < seq.Length() {
posj := rm[1]
errj := rm[2]
length = 0
if posj > posi {
length = rm[0] - fm[1]
} else {
if opt.Circular() {
length = rm[0] + seq.Length() - posi - forward.Length()
}
}
if length > 0 && // For when primers touch or overlap
(opt.MinLength() == 0 || length >= opt.MinLength()) &&
(opt.MaxLength() == 0 || length <= opt.MaxLength()) {
amplicon, _ := sequence.Subsequence(fm[1], rm[0], opt.pointer.circular)
amplicon = amplicon.ReverseComplement(true)
annot := amplicon.Annotations()
goutils.CopyMap(annot, sequence.Annotations())
annot["forward_primer"] = forward.String()
match, _ := sequence.Subsequence(rm[0], rm[1], opt.pointer.circular)
match.ReverseComplement(true)
annot["forward_match"] = match.String()
(&match).Recycle()
2022-01-13 23:27:39 +01:00
annot["forward_error"] = errj
annot["reverse_primer"] = reverse.String()
match, _ = sequence.Subsequence(fm[0], fm[1], opt.pointer.circular)
annot["reverse_match"] = match.String()
(&match).Recycle()
2022-01-13 23:27:39 +01:00
annot["reverse_error"] = erri
results = append(results, amplicon)
}
}
}
}
}
}
}
return results
}
2022-01-14 17:17:54 +01:00
// PCR runs the PCR simulation algorithm on a single
// obiseq.BioSequence instance. PCR parameters are
// specified using the corresponding Option functions
// defined for the PCR algorithm.
2022-01-27 12:13:27 +01:00
func PCRSim(sequence obiseq.BioSequence, options ...WithOption) obiseq.BioSequenceSlice {
2022-01-13 23:27:39 +01:00
opt := MakeOptions(options)
seq, _ := MakeApatSequence(sequence, opt.Circular())
defer seq.Free()
2022-01-13 23:27:39 +01:00
results := _Pcr(seq, sequence, opt)
2022-01-13 23:27:39 +01:00
return results
}
func _PCRSlice(sequences obiseq.BioSequenceSlice,
options Options) obiseq.BioSequenceSlice {
2022-01-13 23:27:39 +01:00
results := make(obiseq.BioSequenceSlice, 0, len(sequences))
if len(sequences) > 0 {
seq, _ := MakeApatSequence(sequences[0], options.Circular())
amplicons := _Pcr(seq, sequences[0], options)
2022-01-13 23:27:39 +01:00
if len(amplicons) > 0 {
results = append(results, amplicons...)
}
for _, sequence := range sequences[1:] {
seq, _ = MakeApatSequence(sequence, options.Circular(), seq)
amplicons = _Pcr(seq, sequence, options)
2022-01-13 23:27:39 +01:00
if len(amplicons) > 0 {
results = append(results, amplicons...)
}
}
2022-01-24 22:54:41 +01:00
// seq.Free()
2022-01-13 23:27:39 +01:00
}
return results
}
// PCRSlice runs the PCR simulation algorithm on a set of
// obiseq.BioSequence instances grouped in a obiseq.BioSequenceSlice.
// PCR parameters are
// specified using the corresponding Option functions
// defined for the PCR algorithm.
func PCRSlice(sequences obiseq.BioSequenceSlice,
options ...WithOption) obiseq.BioSequenceSlice {
opt := MakeOptions(options)
return _PCRSlice(sequences, opt)
}
2022-01-14 17:17:54 +01:00
// PCRSliceWorker is a worker function builder which produce
// job function usable by the obiseq.MakeISliceWorker function.
func PCRSliceWorker(options ...WithOption) obiseq.SeqSliceWorker {
2022-01-13 23:27:39 +01:00
opt := MakeOptions(options)
2022-01-13 23:27:39 +01:00
worker := func(sequences obiseq.BioSequenceSlice) obiseq.BioSequenceSlice {
return _PCRSlice(sequences, opt)
2022-01-13 23:27:39 +01:00
}
return worker
}