Add the -D (--delta) option to obipcr

Former-commit-id: 753079807faaddb23e03fd6350125bfa67f98c7f
This commit is contained in:
2023-10-13 17:04:10 +02:00
parent 157c26cdc7
commit ddf0f2cede
4 changed files with 74 additions and 8 deletions

View File

@ -2,6 +2,17 @@
## Latest changes ## Latest changes
### Enhancement
- A new completely rewritten GO version of the fastq and fasta parser is now used instead of the original C version.
- A new file format guesser is now implemented. This is a first step towards allowing new formats to be managed by obitools.
- New way of handling header definitions of fasta and fastq formats with JSON headers.
The sequence definition is now printed in new files as an attribute of the json header named "definition".
- The -D (--delta) option has been added to `obipcr`. It allows to extract flanking sequences of the barcode.
+ If -D is not set, the output sequence is the barcode itself without the priming sites.
+ If -D is set to 0, the output sequence is the barcode with the priming sites.
+ When -D is set to ### (where ### is an integer), the output sequence is the barcode with the priming sites.
and ### base pairs of flanking sequences.
### Becareful ### Becareful
GO 1.21.0 is out, and it includes new functionalities which are used in the OBITools4 code. GO 1.21.0 is out, and it includes new functionalities which are used in the OBITools4 code.

View File

@ -13,6 +13,7 @@ type _Options struct {
circular bool circular bool
forwardError int forwardError int
reverseError int reverseError int
extension int
batchSize int batchSize int
parallelWorkers int parallelWorkers int
forward ApatPattern forward ApatPattern
@ -31,6 +32,15 @@ type Options struct {
// declaring options. // declaring options.
type WithOption func(Options) type WithOption func(Options)
func (options Options) HasExtension() bool {
return options.pointer.extension > -1
}
func (options Options) Extension() int {
return options.pointer.extension
}
// MinLength method returns minimum length of // MinLength method returns minimum length of
// the searched amplicon (length of the primers // the searched amplicon (length of the primers
// excluded) // excluded)
@ -85,6 +95,7 @@ func MakeOptions(setters []WithOption) Options {
maxLength: 0, maxLength: 0,
forwardError: 0, forwardError: 0,
reverseError: 0, reverseError: 0,
extension: -1,
circular: false, circular: false,
parallelWorkers: 4, parallelWorkers: 4,
batchSize: 100, batchSize: 100,
@ -148,6 +159,19 @@ func OptionForwardPrimer(primer string, max int) WithOption {
return f return f
} }
// OptionWithExtension sets the length of the extension to be added to the sequence.
//
// An negative value indicates that no extension is added
// The extension parameter is an integer that represents the extension value to be set.
// The returned function is of type WithOption.
func OptionWithExtension(extension int) WithOption {
f := WithOption(func(opt Options) {
opt.pointer.extension = extension
})
return f
}
// OptionForwardError sets the number of // OptionForwardError sets the number of
// error allowed when matching the forward // error allowed when matching the forward
// primer. // primer.
@ -253,8 +277,16 @@ func _Pcr(seq ApatSequence,
if length > 0 && // For when primers touch or overlap if length > 0 && // For when primers touch or overlap
(opt.MinLength() == 0 || length >= opt.MinLength()) && (opt.MinLength() == 0 || length >= opt.MinLength()) &&
(opt.MaxLength() == 0 || length <= opt.MaxLength()) { (opt.MaxLength() == 0 || length <= opt.MaxLength()) {
amplicon, _ := seq.pointer.reference.Subsequence(fm[1], rm[0], opt.pointer.circular) var from, to int
log.Debugf("seq length : %d capacity : %d",amplicon.Len(),cap(amplicon.Sequence())) if opt.HasExtension() {
from = fm[0] - opt.Extension()
to = rm[1] + opt.Extension()
} else {
from = fm[1]
to = rm[0]
}
amplicon, _ := seq.pointer.reference.Subsequence(from, to, opt.pointer.circular)
log.Debugf("seq length : %d capacity : %d", amplicon.Len(), cap(amplicon.Sequence()))
annot := amplicon.Annotations() annot := amplicon.Annotations()
obiutils.MustFillMap(annot, seq.pointer.reference.Annotations()) obiutils.MustFillMap(annot, seq.pointer.reference.Annotations())
@ -287,7 +319,7 @@ func _Pcr(seq ApatSequence,
} }
forwardMatches = reverse.FindAllIndex(seq, 0, -1) forwardMatches = reverse.FindAllIndex(seq, 0, -1)
if forwardMatches != nil { if forwardMatches != nil {
begin := forwardMatches[0][0] begin := forwardMatches[0][0]
@ -303,7 +335,6 @@ func _Pcr(seq ApatSequence,
} }
reverseMatches := cfwd.FindAllIndex(seq, begin, length) reverseMatches := cfwd.FindAllIndex(seq, begin, length)
if reverseMatches != nil { if reverseMatches != nil {
for _, fm := range forwardMatches { for _, fm := range forwardMatches {
@ -331,7 +362,15 @@ func _Pcr(seq ApatSequence,
if length > 0 && // For when primers touch or overlap if length > 0 && // For when primers touch or overlap
(opt.MinLength() == 0 || length >= opt.MinLength()) && (opt.MinLength() == 0 || length >= opt.MinLength()) &&
(opt.MaxLength() == 0 || length <= opt.MaxLength()) { (opt.MaxLength() == 0 || length <= opt.MaxLength()) {
amplicon, _ := seq.pointer.reference.Subsequence(fm[1], rm[0], opt.pointer.circular) var from, to int
if opt.HasExtension() {
from = fm[0] - opt.Extension()
to = rm[1] + opt.Extension()
} else {
from = fm[1]
to = rm[0]
}
amplicon, _ := seq.pointer.reference.Subsequence(from, to, opt.pointer.circular)
amplicon = amplicon.ReverseComplement(true) amplicon = amplicon.ReverseComplement(true)
annot := amplicon.Annotations() annot := amplicon.Annotations()
@ -401,13 +440,13 @@ func _PCRSlice(sequences obiseq.BioSequenceSlice,
for _, sequence := range sequences[1:] { for _, sequence := range sequences[1:] {
seq, _ = MakeApatSequence(sequence, options.Circular(), seq) seq, _ = MakeApatSequence(sequence, options.Circular(), seq)
amplicons = _Pcr(seq, options) amplicons = _Pcr(seq, options)
if len(amplicons) > 0 { if len(amplicons) > 0 {
results = append(results, amplicons...) results = append(results, amplicons...)
} }
} }
//log.Debugln(AllocatedApaSequences()) //log.Debugln(AllocatedApaSequences())
// seq.Free() // seq.Free()
} }
@ -433,7 +472,7 @@ func PCRSliceWorker(options ...WithOption) obiseq.SeqSliceWorker {
opt := MakeOptions(options) opt := MakeOptions(options)
worker := func(sequences obiseq.BioSequenceSlice) obiseq.BioSequenceSlice { worker := func(sequences obiseq.BioSequenceSlice) obiseq.BioSequenceSlice {
result := _PCRSlice(sequences, opt) result := _PCRSlice(sequences, opt)
sequences.Recycle(true) sequences.Recycle(true)
return result return result
} }

View File

@ -17,6 +17,7 @@ var _AllowedMismatch = 0
var _MinimumLength = 0 var _MinimumLength = 0
var _MaximumLength = -1 var _MaximumLength = -1
var _Fragmented = false var _Fragmented = false
var _Delta = -1
// PCROptionSet defines every options related to a simulated PCR. // PCROptionSet defines every options related to a simulated PCR.
// //
@ -54,6 +55,9 @@ func PCROptionSet(options *getoptions.GetOpt) {
options.Alias("L"), options.Alias("L"),
options.Required("You must indicate the maximum size of the amplicon (excluded primer length)"), options.Required("You must indicate the maximum size of the amplicon (excluded primer length)"),
options.Description("Maximum length of the barcode (primers excluded).")) options.Description("Maximum length of the barcode (primers excluded)."))
options.IntVar(&_Delta, "delta", -1,
options.Alias("D"),
options.Description("Lenght of the sequence fragment to be added to the barcode extremities."))
} }
// OptionSet adds to the basic option set every options declared for // OptionSet adds to the basic option set every options declared for
@ -119,3 +123,11 @@ func CLIMaxLength() int {
func CLIFragmented() bool { func CLIFragmented() bool {
return _Fragmented return _Fragmented
} }
func CLIWithExtension() bool {
return _Delta >= 0
}
func CLIExtension() int {
return _Delta
}

View File

@ -30,6 +30,10 @@ func CLIPCR(iterator obiiter.IBioSequence) (obiiter.IBioSequence, error) {
opts = append(opts, obiapat.OptionMinLength(CLIMinLength())) opts = append(opts, obiapat.OptionMinLength(CLIMinLength()))
} }
if CLIWithExtension() {
opts = append(opts, obiapat.OptionWithExtension(CLIExtension()))
}
opts = append(opts, obiapat.OptionMaxLength(CLIMaxLength())) opts = append(opts, obiapat.OptionMaxLength(CLIMaxLength()))
if CLICircular() { if CLICircular() {