package obiapat import ( log "github.com/sirupsen/logrus" "git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" ) type _Options struct { minLength int maxLength int circular bool forwardError int reverseError int batchSize int parallelWorkers int forward ApatPattern cfwd ApatPattern reverse ApatPattern crev ApatPattern } // Options stores a set of option usable by the // PCR simulation algotithm. type Options struct { pointer *_Options } // WithOption is the standard type for function // declaring options. type WithOption func(Options) // MinLength method returns minimum length of // the searched amplicon (length of the primers // excluded) func (options Options) MinLength() int { return options.pointer.minLength } // MaxLength method returns maximum length of // the searched amplicon (length of the primers // excluded) func (options Options) MaxLength() int { return options.pointer.maxLength } // ForwardError method returns the number of // error allowed when matching the forward // primer. func (options Options) ForwardError() int { return options.pointer.forwardError } // ReverseError method returns the number of // error allowed when matching the reverse // primer. func (options Options) ReverseError() int { return options.pointer.reverseError } // Circular method returns the topology option. // true for circular, false for linear func (options Options) Circular() bool { return options.pointer.circular } // BatchSize returns the size of the // sequence batch used by the PCR algorithm func (options Options) BatchSize() int { return options.pointer.batchSize } // ParallelWorkers returns how many search // jobs will be run in parallel. func (options Options) ParallelWorkers() int { return options.pointer.parallelWorkers } // MakeOptions buils a new default option set for // the PCR simulation algoithm. func MakeOptions(setters []WithOption) Options { o := _Options{ minLength: 0, maxLength: 0, forwardError: 0, reverseError: 0, circular: false, parallelWorkers: 4, batchSize: 100, forward: NilApatPattern, cfwd: NilApatPattern, reverse: NilApatPattern, crev: NilApatPattern, } opt := Options{&o} for _, set := range setters { set(opt) } return opt } // OptionMinLength sets the minimum length of // the searched amplicon (length of the primers // excluded) func OptionMinLength(minLength int) WithOption { f := WithOption(func(opt Options) { opt.pointer.minLength = minLength }) return f } // OptionMaxLength sets the maximum length of // the searched amplicon (length of the primers // excluded) func OptionMaxLength(maxLength int) WithOption { f := WithOption(func(opt Options) { opt.pointer.maxLength = maxLength }) return f } // OptionForwardError sets the number of // error allowed when matching the forward // primer. func OptionForwardPrimer(primer string, max int) WithOption { f := WithOption(func(opt Options) { var err error opt.pointer.forward, err = MakeApatPattern(primer, max) if err != nil { log.Fatalf("error : %v\n", err) } opt.pointer.cfwd, err = opt.pointer.forward.ReverseComplement() if err != nil { log.Fatalf("error : %v\n", err) } opt.pointer.forwardError = max }) return f } // OptionForwardError sets the number of // error allowed when matching the forward // primer. func OptionReversePrimer(primer string, max int) WithOption { f := WithOption(func(opt Options) { var err error opt.pointer.reverse, err = MakeApatPattern(primer, max) if err != nil { log.Fatalf("error : %v\n", err) } opt.pointer.crev, err = opt.pointer.reverse.ReverseComplement() if err != nil { log.Fatalf("error : %v\n", err) } opt.pointer.reverseError = max }) return f } // OptionCircular sets the topology option. // true for circular, false for linear func OptionCircular(circular bool) WithOption { f := WithOption(func(opt Options) { opt.pointer.circular = circular }) return f } // OptionParallelWorkers sets how many search // jobs will be run in parallel. func OptionParallelWorkers(nworkers int) WithOption { f := WithOption(func(opt Options) { opt.pointer.parallelWorkers = nworkers }) return f } // OptionBatchSize sets the requested sequence // batch size. func OptionBatchSize(size int) WithOption { f := WithOption(func(opt Options) { opt.pointer.batchSize = size }) return f } func _Pcr(seq ApatSequence, sequence *obiseq.BioSequence, opt Options) obiseq.BioSequenceSlice { results := make(obiseq.BioSequenceSlice, 0, 10) forward := opt.pointer.forward cfwd := opt.pointer.cfwd reverse := opt.pointer.reverse crev := opt.pointer.crev forwardMatches := forward.FindAllIndex(seq) if len(forwardMatches) > 0 { begin := forwardMatches[0][0] length := seq.Len() - begin if opt.pointer.maxLength > 0 { length = forwardMatches[len(forwardMatches)-1][2] - begin + opt.MaxLength() + reverse.Len() } if opt.Circular() { begin = 0 length = seq.Len() + _MaxPatLen } reverseMatches := crev.FindAllIndex(seq, begin, length) if reverseMatches != nil { for _, fm := range forwardMatches { posi := fm[0] if posi < seq.Len() { erri := fm[2] for _, rm := range reverseMatches { posj := rm[0] if posj < seq.Len() { posj := rm[1] errj := rm[2] length = 0 if posj > posi { length = rm[0] - fm[1] } else { if opt.Circular() { length = rm[0] + seq.Len() - posi - forward.Len() } } if length > 0 && // For when primers touch or overlap (opt.MinLength() == 0 || length >= opt.MinLength()) && (opt.MaxLength() == 0 || length <= opt.MaxLength()) { amplicon, _ := sequence.Subsequence(fm[1], rm[0], opt.pointer.circular) annot := amplicon.Annotations() goutils.MustFillMap(annot, sequence.Annotations()) annot["forward_primer"] = forward.String() match, _ := sequence.Subsequence(fm[0], fm[1], opt.pointer.circular) annot["forward_match"] = match.String() match.Recycle() annot["forward_error"] = erri annot["reverse_primer"] = reverse.String() match, _ = sequence.Subsequence(rm[0], rm[1], opt.pointer.circular) match = match.ReverseComplement(true) annot["reverse_match"] = match.String() match.Recycle() annot["reverse_error"] = errj // log.Debugf("amplicon sequence capacity : %d", cap(amplicon.Sequence())) results = append(results, amplicon) } } } } } } } forwardMatches = reverse.FindAllIndex(seq) if forwardMatches != nil { begin := forwardMatches[0][0] length := seq.Len() - begin if opt.pointer.maxLength > 0 { length = forwardMatches[len(forwardMatches)-1][2] - begin + opt.MaxLength() + reverse.Len() } if opt.Circular() { begin = 0 length = seq.Len() + _MaxPatLen } reverseMatches := cfwd.FindAllIndex(seq, begin, length) if reverseMatches != nil { for _, fm := range forwardMatches { posi := fm[0] if posi < seq.Len() { erri := fm[2] for _, rm := range reverseMatches { posj := rm[0] if posj < seq.Len() { posj := rm[1] errj := rm[2] length = 0 if posj > posi { length = rm[0] - fm[1] } else { if opt.Circular() { length = rm[0] + seq.Len() - posi - forward.Len() } } if length > 0 && // For when primers touch or overlap (opt.MinLength() == 0 || length >= opt.MinLength()) && (opt.MaxLength() == 0 || length <= opt.MaxLength()) { amplicon, _ := sequence.Subsequence(fm[1], rm[0], opt.pointer.circular) amplicon = amplicon.ReverseComplement(true) annot := amplicon.Annotations() goutils.MustFillMap(annot, sequence.Annotations()) annot["forward_primer"] = forward.String() match, _ := sequence.Subsequence(rm[0], rm[1], opt.pointer.circular) match.ReverseComplement(true) annot["forward_match"] = match.String() match.Recycle() annot["forward_error"] = errj annot["reverse_primer"] = reverse.String() match, _ = sequence.Subsequence(fm[0], fm[1], opt.pointer.circular) annot["reverse_match"] = match.String() match.Recycle() annot["reverse_error"] = erri results = append(results, amplicon) // log.Debugf("amplicon sequence capacity : %d", cap(amplicon.Sequence())) } } } } } } } return results } // PCR runs the PCR simulation algorithm on a single // obiseq.BioSequence instance. PCR parameters are // specified using the corresponding Option functions // defined for the PCR algorithm. func PCRSim(sequence *obiseq.BioSequence, options ...WithOption) obiseq.BioSequenceSlice { opt := MakeOptions(options) seq, _ := MakeApatSequence(sequence, opt.Circular()) defer seq.Free() results := _Pcr(seq, sequence, opt) return results } func _PCRSlice(sequences obiseq.BioSequenceSlice, options Options) obiseq.BioSequenceSlice { results := make(obiseq.BioSequenceSlice, 0, len(sequences)) if len(sequences) > 0 { seq, _ := MakeApatSequence(sequences[0], options.Circular()) // if AllocatedApaSequences() == 0 { // log.Panicln("Bizarre....") // } amplicons := _Pcr(seq, sequences[0], options) if len(amplicons) > 0 { results = append(results, amplicons...) } for _, sequence := range sequences[1:] { seq, _ = MakeApatSequence(sequence, options.Circular(), seq) amplicons = _Pcr(seq, sequence, options) if len(amplicons) > 0 { results = append(results, amplicons...) } } // log.Println(AllocatedApaSequences()) // seq.Free() } return results } // PCRSlice runs the PCR simulation algorithm on a set of // obiseq.BioSequence instances grouped in a obiseq.BioSequenceSlice. // PCR parameters are // specified using the corresponding Option functions // defined for the PCR algorithm. func PCRSlice(sequences obiseq.BioSequenceSlice, options ...WithOption) obiseq.BioSequenceSlice { opt := MakeOptions(options) return _PCRSlice(sequences, opt) } // PCRSliceWorker is a worker function builder which produce // job function usable by the obiseq.MakeISliceWorker function. func PCRSliceWorker(options ...WithOption) obiseq.SeqSliceWorker { opt := MakeOptions(options) worker := func(sequences obiseq.BioSequenceSlice) obiseq.BioSequenceSlice { return _PCRSlice(sequences, opt) } return worker }