mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Add the -D (--delta) option to obipcr
Former-commit-id: 753079807faaddb23e03fd6350125bfa67f98c7f
This commit is contained in:
@ -2,6 +2,17 @@
|
||||
|
||||
## Latest changes
|
||||
|
||||
### Enhancement
|
||||
|
||||
- A new completely rewritten GO version of the fastq and fasta parser is now used instead of the original C version.
|
||||
- A new file format guesser is now implemented. This is a first step towards allowing new formats to be managed by obitools.
|
||||
- New way of handling header definitions of fasta and fastq formats with JSON headers.
|
||||
The sequence definition is now printed in new files as an attribute of the json header named "definition".
|
||||
- The -D (--delta) option has been added to `obipcr`. It allows to extract flanking sequences of the barcode.
|
||||
+ If -D is not set, the output sequence is the barcode itself without the priming sites.
|
||||
+ If -D is set to 0, the output sequence is the barcode with the priming sites.
|
||||
+ When -D is set to ### (where ### is an integer), the output sequence is the barcode with the priming sites.
|
||||
and ### base pairs of flanking sequences.
|
||||
### Becareful
|
||||
|
||||
GO 1.21.0 is out, and it includes new functionalities which are used in the OBITools4 code.
|
||||
|
@ -13,6 +13,7 @@ type _Options struct {
|
||||
circular bool
|
||||
forwardError int
|
||||
reverseError int
|
||||
extension int
|
||||
batchSize int
|
||||
parallelWorkers int
|
||||
forward ApatPattern
|
||||
@ -31,6 +32,15 @@ type Options struct {
|
||||
// declaring options.
|
||||
type WithOption func(Options)
|
||||
|
||||
func (options Options) HasExtension() bool {
|
||||
return options.pointer.extension > -1
|
||||
|
||||
}
|
||||
|
||||
func (options Options) Extension() int {
|
||||
return options.pointer.extension
|
||||
}
|
||||
|
||||
// MinLength method returns minimum length of
|
||||
// the searched amplicon (length of the primers
|
||||
// excluded)
|
||||
@ -85,6 +95,7 @@ func MakeOptions(setters []WithOption) Options {
|
||||
maxLength: 0,
|
||||
forwardError: 0,
|
||||
reverseError: 0,
|
||||
extension: -1,
|
||||
circular: false,
|
||||
parallelWorkers: 4,
|
||||
batchSize: 100,
|
||||
@ -148,6 +159,19 @@ func OptionForwardPrimer(primer string, max int) WithOption {
|
||||
return f
|
||||
}
|
||||
|
||||
// OptionWithExtension sets the length of the extension to be added to the sequence.
|
||||
//
|
||||
// An negative value indicates that no extension is added
|
||||
// The extension parameter is an integer that represents the extension value to be set.
|
||||
// The returned function is of type WithOption.
|
||||
func OptionWithExtension(extension int) WithOption {
|
||||
f := WithOption(func(opt Options) {
|
||||
opt.pointer.extension = extension
|
||||
})
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
// OptionForwardError sets the number of
|
||||
// error allowed when matching the forward
|
||||
// primer.
|
||||
@ -253,8 +277,16 @@ func _Pcr(seq ApatSequence,
|
||||
if length > 0 && // For when primers touch or overlap
|
||||
(opt.MinLength() == 0 || length >= opt.MinLength()) &&
|
||||
(opt.MaxLength() == 0 || length <= opt.MaxLength()) {
|
||||
amplicon, _ := seq.pointer.reference.Subsequence(fm[1], rm[0], opt.pointer.circular)
|
||||
log.Debugf("seq length : %d capacity : %d",amplicon.Len(),cap(amplicon.Sequence()))
|
||||
var from, to int
|
||||
if opt.HasExtension() {
|
||||
from = fm[0] - opt.Extension()
|
||||
to = rm[1] + opt.Extension()
|
||||
} else {
|
||||
from = fm[1]
|
||||
to = rm[0]
|
||||
}
|
||||
amplicon, _ := seq.pointer.reference.Subsequence(from, to, opt.pointer.circular)
|
||||
log.Debugf("seq length : %d capacity : %d", amplicon.Len(), cap(amplicon.Sequence()))
|
||||
annot := amplicon.Annotations()
|
||||
obiutils.MustFillMap(annot, seq.pointer.reference.Annotations())
|
||||
|
||||
@ -287,7 +319,7 @@ func _Pcr(seq ApatSequence,
|
||||
}
|
||||
|
||||
forwardMatches = reverse.FindAllIndex(seq, 0, -1)
|
||||
|
||||
|
||||
if forwardMatches != nil {
|
||||
|
||||
begin := forwardMatches[0][0]
|
||||
@ -303,7 +335,6 @@ func _Pcr(seq ApatSequence,
|
||||
}
|
||||
|
||||
reverseMatches := cfwd.FindAllIndex(seq, begin, length)
|
||||
|
||||
|
||||
if reverseMatches != nil {
|
||||
for _, fm := range forwardMatches {
|
||||
@ -331,7 +362,15 @@ func _Pcr(seq ApatSequence,
|
||||
if length > 0 && // For when primers touch or overlap
|
||||
(opt.MinLength() == 0 || length >= opt.MinLength()) &&
|
||||
(opt.MaxLength() == 0 || length <= opt.MaxLength()) {
|
||||
amplicon, _ := seq.pointer.reference.Subsequence(fm[1], rm[0], opt.pointer.circular)
|
||||
var from, to int
|
||||
if opt.HasExtension() {
|
||||
from = fm[0] - opt.Extension()
|
||||
to = rm[1] + opt.Extension()
|
||||
} else {
|
||||
from = fm[1]
|
||||
to = rm[0]
|
||||
}
|
||||
amplicon, _ := seq.pointer.reference.Subsequence(from, to, opt.pointer.circular)
|
||||
amplicon = amplicon.ReverseComplement(true)
|
||||
|
||||
annot := amplicon.Annotations()
|
||||
@ -401,13 +440,13 @@ func _PCRSlice(sequences obiseq.BioSequenceSlice,
|
||||
for _, sequence := range sequences[1:] {
|
||||
seq, _ = MakeApatSequence(sequence, options.Circular(), seq)
|
||||
amplicons = _Pcr(seq, options)
|
||||
|
||||
|
||||
if len(amplicons) > 0 {
|
||||
results = append(results, amplicons...)
|
||||
}
|
||||
}
|
||||
|
||||
//log.Debugln(AllocatedApaSequences())
|
||||
//log.Debugln(AllocatedApaSequences())
|
||||
|
||||
// seq.Free()
|
||||
}
|
||||
@ -433,7 +472,7 @@ func PCRSliceWorker(options ...WithOption) obiseq.SeqSliceWorker {
|
||||
|
||||
opt := MakeOptions(options)
|
||||
worker := func(sequences obiseq.BioSequenceSlice) obiseq.BioSequenceSlice {
|
||||
result := _PCRSlice(sequences, opt)
|
||||
result := _PCRSlice(sequences, opt)
|
||||
sequences.Recycle(true)
|
||||
return result
|
||||
}
|
||||
|
@ -17,6 +17,7 @@ var _AllowedMismatch = 0
|
||||
var _MinimumLength = 0
|
||||
var _MaximumLength = -1
|
||||
var _Fragmented = false
|
||||
var _Delta = -1
|
||||
|
||||
// PCROptionSet defines every options related to a simulated PCR.
|
||||
//
|
||||
@ -54,6 +55,9 @@ func PCROptionSet(options *getoptions.GetOpt) {
|
||||
options.Alias("L"),
|
||||
options.Required("You must indicate the maximum size of the amplicon (excluded primer length)"),
|
||||
options.Description("Maximum length of the barcode (primers excluded)."))
|
||||
options.IntVar(&_Delta, "delta", -1,
|
||||
options.Alias("D"),
|
||||
options.Description("Lenght of the sequence fragment to be added to the barcode extremities."))
|
||||
}
|
||||
|
||||
// OptionSet adds to the basic option set every options declared for
|
||||
@ -119,3 +123,11 @@ func CLIMaxLength() int {
|
||||
func CLIFragmented() bool {
|
||||
return _Fragmented
|
||||
}
|
||||
|
||||
func CLIWithExtension() bool {
|
||||
return _Delta >= 0
|
||||
}
|
||||
|
||||
func CLIExtension() int {
|
||||
return _Delta
|
||||
}
|
||||
|
@ -30,6 +30,10 @@ func CLIPCR(iterator obiiter.IBioSequence) (obiiter.IBioSequence, error) {
|
||||
opts = append(opts, obiapat.OptionMinLength(CLIMinLength()))
|
||||
}
|
||||
|
||||
if CLIWithExtension() {
|
||||
opts = append(opts, obiapat.OptionWithExtension(CLIExtension()))
|
||||
}
|
||||
|
||||
opts = append(opts, obiapat.OptionMaxLength(CLIMaxLength()))
|
||||
|
||||
if CLICircular() {
|
||||
|
Reference in New Issue
Block a user