diff --git a/Release-notes.md b/Release-notes.md index 2e174cc..a8312ac 100644 --- a/Release-notes.md +++ b/Release-notes.md @@ -2,6 +2,17 @@ ## Latest changes +### Enhancement + +- A new completely rewritten GO version of the fastq and fasta parser is now used instead of the original C version. +- A new file format guesser is now implemented. This is a first step towards allowing new formats to be managed by obitools. +- New way of handling header definitions of fasta and fastq formats with JSON headers. + The sequence definition is now printed in new files as an attribute of the json header named "definition". +- The -D (--delta) option has been added to `obipcr`. It allows to extract flanking sequences of the barcode. + + If -D is not set, the output sequence is the barcode itself without the priming sites. + + If -D is set to 0, the output sequence is the barcode with the priming sites. + + When -D is set to ### (where ### is an integer), the output sequence is the barcode with the priming sites. + and ### base pairs of flanking sequences. ### Becareful GO 1.21.0 is out, and it includes new functionalities which are used in the OBITools4 code. diff --git a/pkg/obiapat/pcr.go b/pkg/obiapat/pcr.go index f090482..3585037 100644 --- a/pkg/obiapat/pcr.go +++ b/pkg/obiapat/pcr.go @@ -13,6 +13,7 @@ type _Options struct { circular bool forwardError int reverseError int + extension int batchSize int parallelWorkers int forward ApatPattern @@ -31,6 +32,15 @@ type Options struct { // declaring options. type WithOption func(Options) +func (options Options) HasExtension() bool { + return options.pointer.extension > -1 + +} + +func (options Options) Extension() int { + return options.pointer.extension +} + // MinLength method returns minimum length of // the searched amplicon (length of the primers // excluded) @@ -85,6 +95,7 @@ func MakeOptions(setters []WithOption) Options { maxLength: 0, forwardError: 0, reverseError: 0, + extension: -1, circular: false, parallelWorkers: 4, batchSize: 100, @@ -148,6 +159,19 @@ func OptionForwardPrimer(primer string, max int) WithOption { return f } +// OptionWithExtension sets the length of the extension to be added to the sequence. +// +// An negative value indicates that no extension is added +// The extension parameter is an integer that represents the extension value to be set. +// The returned function is of type WithOption. +func OptionWithExtension(extension int) WithOption { + f := WithOption(func(opt Options) { + opt.pointer.extension = extension + }) + + return f +} + // OptionForwardError sets the number of // error allowed when matching the forward // primer. @@ -253,8 +277,16 @@ func _Pcr(seq ApatSequence, if length > 0 && // For when primers touch or overlap (opt.MinLength() == 0 || length >= opt.MinLength()) && (opt.MaxLength() == 0 || length <= opt.MaxLength()) { - amplicon, _ := seq.pointer.reference.Subsequence(fm[1], rm[0], opt.pointer.circular) - log.Debugf("seq length : %d capacity : %d",amplicon.Len(),cap(amplicon.Sequence())) + var from, to int + if opt.HasExtension() { + from = fm[0] - opt.Extension() + to = rm[1] + opt.Extension() + } else { + from = fm[1] + to = rm[0] + } + amplicon, _ := seq.pointer.reference.Subsequence(from, to, opt.pointer.circular) + log.Debugf("seq length : %d capacity : %d", amplicon.Len(), cap(amplicon.Sequence())) annot := amplicon.Annotations() obiutils.MustFillMap(annot, seq.pointer.reference.Annotations()) @@ -287,7 +319,7 @@ func _Pcr(seq ApatSequence, } forwardMatches = reverse.FindAllIndex(seq, 0, -1) - + if forwardMatches != nil { begin := forwardMatches[0][0] @@ -303,7 +335,6 @@ func _Pcr(seq ApatSequence, } reverseMatches := cfwd.FindAllIndex(seq, begin, length) - if reverseMatches != nil { for _, fm := range forwardMatches { @@ -331,7 +362,15 @@ func _Pcr(seq ApatSequence, if length > 0 && // For when primers touch or overlap (opt.MinLength() == 0 || length >= opt.MinLength()) && (opt.MaxLength() == 0 || length <= opt.MaxLength()) { - amplicon, _ := seq.pointer.reference.Subsequence(fm[1], rm[0], opt.pointer.circular) + var from, to int + if opt.HasExtension() { + from = fm[0] - opt.Extension() + to = rm[1] + opt.Extension() + } else { + from = fm[1] + to = rm[0] + } + amplicon, _ := seq.pointer.reference.Subsequence(from, to, opt.pointer.circular) amplicon = amplicon.ReverseComplement(true) annot := amplicon.Annotations() @@ -401,13 +440,13 @@ func _PCRSlice(sequences obiseq.BioSequenceSlice, for _, sequence := range sequences[1:] { seq, _ = MakeApatSequence(sequence, options.Circular(), seq) amplicons = _Pcr(seq, options) - + if len(amplicons) > 0 { results = append(results, amplicons...) } } - //log.Debugln(AllocatedApaSequences()) + //log.Debugln(AllocatedApaSequences()) // seq.Free() } @@ -433,7 +472,7 @@ func PCRSliceWorker(options ...WithOption) obiseq.SeqSliceWorker { opt := MakeOptions(options) worker := func(sequences obiseq.BioSequenceSlice) obiseq.BioSequenceSlice { - result := _PCRSlice(sequences, opt) + result := _PCRSlice(sequences, opt) sequences.Recycle(true) return result } diff --git a/pkg/obitools/obipcr/options.go b/pkg/obitools/obipcr/options.go index 5814ba1..a2f0b44 100644 --- a/pkg/obitools/obipcr/options.go +++ b/pkg/obitools/obipcr/options.go @@ -17,6 +17,7 @@ var _AllowedMismatch = 0 var _MinimumLength = 0 var _MaximumLength = -1 var _Fragmented = false +var _Delta = -1 // PCROptionSet defines every options related to a simulated PCR. // @@ -54,6 +55,9 @@ func PCROptionSet(options *getoptions.GetOpt) { options.Alias("L"), options.Required("You must indicate the maximum size of the amplicon (excluded primer length)"), options.Description("Maximum length of the barcode (primers excluded).")) + options.IntVar(&_Delta, "delta", -1, + options.Alias("D"), + options.Description("Lenght of the sequence fragment to be added to the barcode extremities.")) } // OptionSet adds to the basic option set every options declared for @@ -119,3 +123,11 @@ func CLIMaxLength() int { func CLIFragmented() bool { return _Fragmented } + +func CLIWithExtension() bool { + return _Delta >= 0 +} + +func CLIExtension() int { + return _Delta +} diff --git a/pkg/obitools/obipcr/pcr.go b/pkg/obitools/obipcr/pcr.go index 2ec4288..e290540 100644 --- a/pkg/obitools/obipcr/pcr.go +++ b/pkg/obitools/obipcr/pcr.go @@ -30,6 +30,10 @@ func CLIPCR(iterator obiiter.IBioSequence) (obiiter.IBioSequence, error) { opts = append(opts, obiapat.OptionMinLength(CLIMinLength())) } + if CLIWithExtension() { + opts = append(opts, obiapat.OptionWithExtension(CLIExtension())) + } + opts = append(opts, obiapat.OptionMaxLength(CLIMaxLength())) if CLICircular() {