Add the -D (--delta) option to obipcr

Former-commit-id: 753079807faaddb23e03fd6350125bfa67f98c7f
This commit is contained in:
2023-10-13 17:04:10 +02:00
parent 157c26cdc7
commit ddf0f2cede
4 changed files with 74 additions and 8 deletions

View File

@ -2,6 +2,17 @@
## Latest changes
### Enhancement
- A new completely rewritten GO version of the fastq and fasta parser is now used instead of the original C version.
- A new file format guesser is now implemented. This is a first step towards allowing new formats to be managed by obitools.
- New way of handling header definitions of fasta and fastq formats with JSON headers.
The sequence definition is now printed in new files as an attribute of the json header named "definition".
- The -D (--delta) option has been added to `obipcr`. It allows to extract flanking sequences of the barcode.
+ If -D is not set, the output sequence is the barcode itself without the priming sites.
+ If -D is set to 0, the output sequence is the barcode with the priming sites.
+ When -D is set to ### (where ### is an integer), the output sequence is the barcode with the priming sites.
and ### base pairs of flanking sequences.
### Becareful
GO 1.21.0 is out, and it includes new functionalities which are used in the OBITools4 code.

View File

@ -13,6 +13,7 @@ type _Options struct {
circular bool
forwardError int
reverseError int
extension int
batchSize int
parallelWorkers int
forward ApatPattern
@ -31,6 +32,15 @@ type Options struct {
// declaring options.
type WithOption func(Options)
func (options Options) HasExtension() bool {
return options.pointer.extension > -1
}
func (options Options) Extension() int {
return options.pointer.extension
}
// MinLength method returns minimum length of
// the searched amplicon (length of the primers
// excluded)
@ -85,6 +95,7 @@ func MakeOptions(setters []WithOption) Options {
maxLength: 0,
forwardError: 0,
reverseError: 0,
extension: -1,
circular: false,
parallelWorkers: 4,
batchSize: 100,
@ -148,6 +159,19 @@ func OptionForwardPrimer(primer string, max int) WithOption {
return f
}
// OptionWithExtension sets the length of the extension to be added to the sequence.
//
// An negative value indicates that no extension is added
// The extension parameter is an integer that represents the extension value to be set.
// The returned function is of type WithOption.
func OptionWithExtension(extension int) WithOption {
f := WithOption(func(opt Options) {
opt.pointer.extension = extension
})
return f
}
// OptionForwardError sets the number of
// error allowed when matching the forward
// primer.
@ -253,8 +277,16 @@ func _Pcr(seq ApatSequence,
if length > 0 && // For when primers touch or overlap
(opt.MinLength() == 0 || length >= opt.MinLength()) &&
(opt.MaxLength() == 0 || length <= opt.MaxLength()) {
amplicon, _ := seq.pointer.reference.Subsequence(fm[1], rm[0], opt.pointer.circular)
log.Debugf("seq length : %d capacity : %d",amplicon.Len(),cap(amplicon.Sequence()))
var from, to int
if opt.HasExtension() {
from = fm[0] - opt.Extension()
to = rm[1] + opt.Extension()
} else {
from = fm[1]
to = rm[0]
}
amplicon, _ := seq.pointer.reference.Subsequence(from, to, opt.pointer.circular)
log.Debugf("seq length : %d capacity : %d", amplicon.Len(), cap(amplicon.Sequence()))
annot := amplicon.Annotations()
obiutils.MustFillMap(annot, seq.pointer.reference.Annotations())
@ -287,7 +319,7 @@ func _Pcr(seq ApatSequence,
}
forwardMatches = reverse.FindAllIndex(seq, 0, -1)
if forwardMatches != nil {
begin := forwardMatches[0][0]
@ -303,7 +335,6 @@ func _Pcr(seq ApatSequence,
}
reverseMatches := cfwd.FindAllIndex(seq, begin, length)
if reverseMatches != nil {
for _, fm := range forwardMatches {
@ -331,7 +362,15 @@ func _Pcr(seq ApatSequence,
if length > 0 && // For when primers touch or overlap
(opt.MinLength() == 0 || length >= opt.MinLength()) &&
(opt.MaxLength() == 0 || length <= opt.MaxLength()) {
amplicon, _ := seq.pointer.reference.Subsequence(fm[1], rm[0], opt.pointer.circular)
var from, to int
if opt.HasExtension() {
from = fm[0] - opt.Extension()
to = rm[1] + opt.Extension()
} else {
from = fm[1]
to = rm[0]
}
amplicon, _ := seq.pointer.reference.Subsequence(from, to, opt.pointer.circular)
amplicon = amplicon.ReverseComplement(true)
annot := amplicon.Annotations()
@ -401,13 +440,13 @@ func _PCRSlice(sequences obiseq.BioSequenceSlice,
for _, sequence := range sequences[1:] {
seq, _ = MakeApatSequence(sequence, options.Circular(), seq)
amplicons = _Pcr(seq, options)
if len(amplicons) > 0 {
results = append(results, amplicons...)
}
}
//log.Debugln(AllocatedApaSequences())
//log.Debugln(AllocatedApaSequences())
// seq.Free()
}
@ -433,7 +472,7 @@ func PCRSliceWorker(options ...WithOption) obiseq.SeqSliceWorker {
opt := MakeOptions(options)
worker := func(sequences obiseq.BioSequenceSlice) obiseq.BioSequenceSlice {
result := _PCRSlice(sequences, opt)
result := _PCRSlice(sequences, opt)
sequences.Recycle(true)
return result
}

View File

@ -17,6 +17,7 @@ var _AllowedMismatch = 0
var _MinimumLength = 0
var _MaximumLength = -1
var _Fragmented = false
var _Delta = -1
// PCROptionSet defines every options related to a simulated PCR.
//
@ -54,6 +55,9 @@ func PCROptionSet(options *getoptions.GetOpt) {
options.Alias("L"),
options.Required("You must indicate the maximum size of the amplicon (excluded primer length)"),
options.Description("Maximum length of the barcode (primers excluded)."))
options.IntVar(&_Delta, "delta", -1,
options.Alias("D"),
options.Description("Lenght of the sequence fragment to be added to the barcode extremities."))
}
// OptionSet adds to the basic option set every options declared for
@ -119,3 +123,11 @@ func CLIMaxLength() int {
func CLIFragmented() bool {
return _Fragmented
}
func CLIWithExtension() bool {
return _Delta >= 0
}
func CLIExtension() int {
return _Delta
}

View File

@ -30,6 +30,10 @@ func CLIPCR(iterator obiiter.IBioSequence) (obiiter.IBioSequence, error) {
opts = append(opts, obiapat.OptionMinLength(CLIMinLength()))
}
if CLIWithExtension() {
opts = append(opts, obiapat.OptionWithExtension(CLIExtension()))
}
opts = append(opts, obiapat.OptionMaxLength(CLIMaxLength()))
if CLICircular() {