mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
optimize sequence readers and patch a bug in the format guesser
Former-commit-id: 9dce1e96c57ae9a88c26fac5c8e1bdcdc2c0c7a5
This commit is contained in:
@ -14,6 +14,7 @@ type _Options struct {
|
|||||||
forwardError int
|
forwardError int
|
||||||
reverseError int
|
reverseError int
|
||||||
extension int
|
extension int
|
||||||
|
fullExtension bool
|
||||||
batchSize int
|
batchSize int
|
||||||
parallelWorkers int
|
parallelWorkers int
|
||||||
forward ApatPattern
|
forward ApatPattern
|
||||||
@ -41,6 +42,10 @@ func (options Options) Extension() int {
|
|||||||
return options.pointer.extension
|
return options.pointer.extension
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (options Options) OnlyFullExtension() bool {
|
||||||
|
return options.pointer.fullExtension
|
||||||
|
}
|
||||||
|
|
||||||
// MinLength method returns minimum length of
|
// MinLength method returns minimum length of
|
||||||
// the searched amplicon (length of the primers
|
// the searched amplicon (length of the primers
|
||||||
// excluded)
|
// excluded)
|
||||||
@ -96,6 +101,7 @@ func MakeOptions(setters []WithOption) Options {
|
|||||||
forwardError: 0,
|
forwardError: 0,
|
||||||
reverseError: 0,
|
reverseError: 0,
|
||||||
extension: -1,
|
extension: -1,
|
||||||
|
fullExtension: false,
|
||||||
circular: false,
|
circular: false,
|
||||||
parallelWorkers: 4,
|
parallelWorkers: 4,
|
||||||
batchSize: 100,
|
batchSize: 100,
|
||||||
@ -172,6 +178,14 @@ func OptionWithExtension(extension int) WithOption {
|
|||||||
return f
|
return f
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func OptionOnlyFullExtension(full bool) WithOption {
|
||||||
|
f := WithOption(func(opt Options) {
|
||||||
|
opt.pointer.fullExtension = full
|
||||||
|
})
|
||||||
|
|
||||||
|
return f
|
||||||
|
}
|
||||||
|
|
||||||
// OptionForwardError sets the number of
|
// OptionForwardError sets the number of
|
||||||
// error allowed when matching the forward
|
// error allowed when matching the forward
|
||||||
// primer.
|
// primer.
|
||||||
@ -285,31 +299,51 @@ func _Pcr(seq ApatSequence,
|
|||||||
from = fm[1]
|
from = fm[1]
|
||||||
to = rm[0]
|
to = rm[0]
|
||||||
}
|
}
|
||||||
amplicon, _ := seq.pointer.reference.Subsequence(from, to, opt.pointer.circular)
|
|
||||||
log.Debugf("seq length : %d capacity : %d", amplicon.Len(), cap(amplicon.Sequence()))
|
|
||||||
annot := amplicon.Annotations()
|
|
||||||
obiutils.MustFillMap(annot, seq.pointer.reference.Annotations())
|
|
||||||
|
|
||||||
annot["forward_primer"] = forward.String()
|
if opt.HasExtension() && !opt.OnlyFullExtension() && !opt.Circular() {
|
||||||
|
if from < 0 {
|
||||||
|
from = 0
|
||||||
|
}
|
||||||
|
if to > seq.Len() {
|
||||||
|
to = seq.Len()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
match, _ := seq.pointer.reference.Subsequence(fm[0], fm[1], opt.pointer.circular)
|
if (opt.HasExtension() && ((from >= 0 && to <= seq.Len()) || opt.Circular())) ||
|
||||||
annot["forward_match"] = match.String()
|
!opt.HasExtension() {
|
||||||
match.Recycle()
|
|
||||||
|
|
||||||
annot["forward_error"] = erri
|
amplicon, error := seq.pointer.reference.Subsequence(from, to, opt.Circular())
|
||||||
|
|
||||||
annot["reverse_primer"] = reverse.String()
|
if error != nil {
|
||||||
match, _ = seq.pointer.reference.Subsequence(rm[0], rm[1], opt.pointer.circular)
|
log.Fatalf("error : %v\n", error)
|
||||||
match = match.ReverseComplement(true)
|
}
|
||||||
annot["reverse_match"] = match.String()
|
|
||||||
match.Recycle()
|
|
||||||
|
|
||||||
annot["reverse_error"] = errj
|
log.Debugf("seq length : %d capacity : %d", amplicon.Len(), cap(amplicon.Sequence()))
|
||||||
annot["direction"] = "forward"
|
annot := amplicon.Annotations()
|
||||||
|
obiutils.MustFillMap(annot, seq.pointer.reference.Annotations())
|
||||||
|
|
||||||
// log.Debugf("amplicon sequence capacity : %d", cap(amplicon.Sequence()))
|
annot["forward_primer"] = forward.String()
|
||||||
|
|
||||||
results = append(results, amplicon)
|
match, _ := seq.pointer.reference.Subsequence(fm[0], fm[1], opt.pointer.circular)
|
||||||
|
annot["forward_match"] = match.String()
|
||||||
|
match.Recycle()
|
||||||
|
|
||||||
|
annot["forward_error"] = erri
|
||||||
|
|
||||||
|
annot["reverse_primer"] = reverse.String()
|
||||||
|
match, _ = seq.pointer.reference.Subsequence(rm[0], rm[1], opt.pointer.circular)
|
||||||
|
match = match.ReverseComplement(true)
|
||||||
|
annot["reverse_match"] = match.String()
|
||||||
|
match.Recycle()
|
||||||
|
|
||||||
|
annot["reverse_error"] = errj
|
||||||
|
annot["direction"] = "forward"
|
||||||
|
|
||||||
|
// log.Debugf("amplicon sequence capacity : %d", cap(amplicon.Sequence()))
|
||||||
|
|
||||||
|
results = append(results, amplicon)
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -370,30 +404,48 @@ func _Pcr(seq ApatSequence,
|
|||||||
from = fm[1]
|
from = fm[1]
|
||||||
to = rm[0]
|
to = rm[0]
|
||||||
}
|
}
|
||||||
amplicon, _ := seq.pointer.reference.Subsequence(from, to, opt.pointer.circular)
|
|
||||||
amplicon = amplicon.ReverseComplement(true)
|
|
||||||
|
|
||||||
annot := amplicon.Annotations()
|
if opt.HasExtension() && !opt.OnlyFullExtension() && !opt.Circular() {
|
||||||
obiutils.MustFillMap(annot, seq.pointer.reference.Annotations())
|
if from < 0 {
|
||||||
annot["forward_primer"] = forward.String()
|
from = 0
|
||||||
|
}
|
||||||
|
if to > seq.Len() {
|
||||||
|
to = seq.Len()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
match, _ := seq.pointer.reference.Subsequence(rm[0], rm[1], opt.pointer.circular)
|
if (opt.HasExtension() && ((from >= 0 && to <= seq.Len()) || opt.Circular())) ||
|
||||||
match.ReverseComplement(true)
|
!opt.HasExtension() {
|
||||||
annot["forward_match"] = match.String()
|
amplicon, error := seq.pointer.reference.Subsequence(from, to, opt.pointer.circular)
|
||||||
match.Recycle()
|
|
||||||
|
|
||||||
annot["forward_error"] = errj
|
if error != nil {
|
||||||
|
log.Fatalf("error : %v\n", error)
|
||||||
|
}
|
||||||
|
|
||||||
annot["reverse_primer"] = reverse.String()
|
amplicon = amplicon.ReverseComplement(true)
|
||||||
match, _ = seq.pointer.reference.Subsequence(fm[0], fm[1], opt.pointer.circular)
|
|
||||||
annot["reverse_match"] = match.String()
|
|
||||||
match.Recycle()
|
|
||||||
|
|
||||||
annot["reverse_error"] = erri
|
annot := amplicon.Annotations()
|
||||||
annot["direction"] = "reverse"
|
obiutils.MustFillMap(annot, seq.pointer.reference.Annotations())
|
||||||
|
annot["forward_primer"] = forward.String()
|
||||||
|
|
||||||
results = append(results, amplicon)
|
match, _ := seq.pointer.reference.Subsequence(rm[0], rm[1], opt.pointer.circular)
|
||||||
// log.Debugf("amplicon sequence capacity : %d", cap(amplicon.Sequence()))
|
match.ReverseComplement(true)
|
||||||
|
annot["forward_match"] = match.String()
|
||||||
|
match.Recycle()
|
||||||
|
|
||||||
|
annot["forward_error"] = errj
|
||||||
|
|
||||||
|
annot["reverse_primer"] = reverse.String()
|
||||||
|
match, _ = seq.pointer.reference.Subsequence(fm[0], fm[1], opt.pointer.circular)
|
||||||
|
annot["reverse_match"] = match.String()
|
||||||
|
match.Recycle()
|
||||||
|
|
||||||
|
annot["reverse_error"] = erri
|
||||||
|
annot["direction"] = "reverse"
|
||||||
|
|
||||||
|
results = append(results, amplicon)
|
||||||
|
// log.Debugf("amplicon sequence capacity : %d", cap(amplicon.Sequence()))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -192,8 +192,10 @@ func _ReadFlatFileChunk(reader io.Reader, readers chan _FileChunk) {
|
|||||||
for err == nil {
|
for err == nil {
|
||||||
|
|
||||||
// Read from the reader until the buffer is full or the end of the file is reached
|
// Read from the reader until the buffer is full or the end of the file is reached
|
||||||
for ; err == nil && l < len(buff); l += size {
|
l, err = io.ReadFull(reader, buff)
|
||||||
size, err = reader.Read(buff[l:])
|
|
||||||
|
if err == io.ErrUnexpectedEOF {
|
||||||
|
err = nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create an extended buffer to read from if the end of the last entry is not found in the current buffer
|
// Create an extended buffer to read from if the end of the last entry is not found in the current buffer
|
||||||
@ -205,7 +207,7 @@ func _ReadFlatFileChunk(reader io.Reader, readers chan _FileChunk) {
|
|||||||
// Read from the reader in 1 MB increments until the end of the last entry is found
|
// Read from the reader in 1 MB increments until the end of the last entry is found
|
||||||
for end = _EndOfLastEntry(buff); err == nil && end < 0; end = _EndOfLastEntry(extbuff[:size]) {
|
for end = _EndOfLastEntry(buff); err == nil && end < 0; end = _EndOfLastEntry(extbuff[:size]) {
|
||||||
ic++
|
ic++
|
||||||
size, err = reader.Read(extbuff)
|
size, err = io.ReadFull(reader, extbuff)
|
||||||
buff = append(buff, extbuff[:size]...)
|
buff = append(buff, extbuff[:size]...)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -92,7 +92,12 @@ func FastaChunkReader(r io.Reader, size int, cutHead bool) (chan FastxChunk, err
|
|||||||
out := make(chan FastxChunk)
|
out := make(chan FastxChunk)
|
||||||
buff := make([]byte, size)
|
buff := make([]byte, size)
|
||||||
|
|
||||||
n, err := r.Read(buff)
|
n, err := io.ReadFull(r, buff)
|
||||||
|
|
||||||
|
if err == io.ErrUnexpectedEOF {
|
||||||
|
err = nil
|
||||||
|
}
|
||||||
|
|
||||||
if n > 0 && err == nil {
|
if n > 0 && err == nil {
|
||||||
if n < size {
|
if n < size {
|
||||||
buff = buff[:n]
|
buff = buff[:n]
|
||||||
@ -128,13 +133,20 @@ func FastaChunkReader(r io.Reader, size int, cutHead bool) (chan FastxChunk, err
|
|||||||
index: idx,
|
index: idx,
|
||||||
}
|
}
|
||||||
idx++
|
idx++
|
||||||
|
} else {
|
||||||
|
size = size * 2
|
||||||
}
|
}
|
||||||
|
|
||||||
buff = slices.Grow(buff[:0], size)[0:size]
|
buff = slices.Grow(buff[:0], size)[0:size]
|
||||||
n, err = r.Read(buff)
|
n, err = io.ReadFull(r, buff)
|
||||||
if n < size {
|
if n < size {
|
||||||
buff = buff[:n]
|
buff = buff[:n]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if err == io.ErrUnexpectedEOF {
|
||||||
|
err = nil
|
||||||
|
}
|
||||||
|
|
||||||
// fmt.Printf("n = %d, err = %v\n", n, err)
|
// fmt.Printf("n = %d, err = %v\n", n, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -99,7 +99,11 @@ func FastqChunkReader(r io.Reader, size int) (chan FastxChunk, error) {
|
|||||||
out := make(chan FastxChunk)
|
out := make(chan FastxChunk)
|
||||||
buff := make([]byte, size)
|
buff := make([]byte, size)
|
||||||
|
|
||||||
n, err := r.Read(buff)
|
n, err := io.ReadFull(r, buff)
|
||||||
|
|
||||||
|
if err == io.ErrUnexpectedEOF {
|
||||||
|
err = nil
|
||||||
|
}
|
||||||
|
|
||||||
if n > 0 && err == nil {
|
if n > 0 && err == nil {
|
||||||
if n < size {
|
if n < size {
|
||||||
@ -130,13 +134,19 @@ func FastqChunkReader(r io.Reader, size int) (chan FastxChunk, error) {
|
|||||||
index: idx,
|
index: idx,
|
||||||
}
|
}
|
||||||
idx++
|
idx++
|
||||||
|
} else {
|
||||||
|
size = size * 2
|
||||||
}
|
}
|
||||||
|
|
||||||
buff = slices.Grow(buff[:0], size)[0:size]
|
buff = slices.Grow(buff[:0], size)[0:size]
|
||||||
n, err = r.Read(buff)
|
n, err = io.ReadFull(r, buff)
|
||||||
if n < size {
|
if n < size {
|
||||||
buff = buff[:n]
|
buff = buff[:n]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if err == io.ErrUnexpectedEOF {
|
||||||
|
err = nil
|
||||||
|
}
|
||||||
// fmt.Printf("n = %d, err = %v\n", n, err)
|
// fmt.Printf("n = %d, err = %v\n", n, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -69,11 +69,17 @@ func OBIMimeTypeGuesser(stream io.Reader) (*mimetype.MIME, io.Reader, error) {
|
|||||||
mimetype.Lookup("text/plain").Extend(genbankDetector, "text/genbank", ".seq")
|
mimetype.Lookup("text/plain").Extend(genbankDetector, "text/genbank", ".seq")
|
||||||
mimetype.Lookup("text/plain").Extend(emblDetector, "text/embl", ".dat")
|
mimetype.Lookup("text/plain").Extend(emblDetector, "text/embl", ".dat")
|
||||||
|
|
||||||
|
mimetype.Lookup("application/octet-stream").Extend(fastaDetector, "text/fasta", ".fasta")
|
||||||
|
mimetype.Lookup("application/octet-stream").Extend(fastqDetector, "text/fastq", ".fastq")
|
||||||
|
mimetype.Lookup("application/octet-stream").Extend(ecoPCR2Detector, "text/ecopcr2", ".ecopcr")
|
||||||
|
mimetype.Lookup("application/octet-stream").Extend(genbankDetector, "text/genbank", ".seq")
|
||||||
|
mimetype.Lookup("application/octet-stream").Extend(emblDetector, "text/embl", ".dat")
|
||||||
|
|
||||||
// Create a buffer to store the read data
|
// Create a buffer to store the read data
|
||||||
buf := make([]byte, 1024*128)
|
buf := make([]byte, 1024*128)
|
||||||
n, err := stream.Read(buf)
|
n, err := io.ReadFull(stream, buf)
|
||||||
|
|
||||||
if err != nil && err != io.EOF {
|
if err != nil && err != io.ErrUnexpectedEOF {
|
||||||
return nil, nil, err
|
return nil, nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -84,7 +90,11 @@ func OBIMimeTypeGuesser(stream io.Reader) (*mimetype.MIME, io.Reader, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Create a new reader based on the read data
|
// Create a new reader based on the read data
|
||||||
newReader := io.MultiReader(bytes.NewReader(buf[:n]), stream)
|
newReader := io.Reader(bytes.NewReader(buf[:n]))
|
||||||
|
|
||||||
|
if err == nil {
|
||||||
|
newReader = io.MultiReader(newReader, stream)
|
||||||
|
}
|
||||||
|
|
||||||
return mimeType, newReader, nil
|
return mimeType, newReader, nil
|
||||||
}
|
}
|
||||||
|
@ -18,6 +18,7 @@ var _MinimumLength = 0
|
|||||||
var _MaximumLength = -1
|
var _MaximumLength = -1
|
||||||
var _Fragmented = false
|
var _Fragmented = false
|
||||||
var _Delta = -1
|
var _Delta = -1
|
||||||
|
var _OnlyFull = false
|
||||||
|
|
||||||
// PCROptionSet defines every options related to a simulated PCR.
|
// PCROptionSet defines every options related to a simulated PCR.
|
||||||
//
|
//
|
||||||
@ -58,6 +59,9 @@ func PCROptionSet(options *getoptions.GetOpt) {
|
|||||||
options.IntVar(&_Delta, "delta", -1,
|
options.IntVar(&_Delta, "delta", -1,
|
||||||
options.Alias("D"),
|
options.Alias("D"),
|
||||||
options.Description("Lenght of the sequence fragment to be added to the barcode extremities."))
|
options.Description("Lenght of the sequence fragment to be added to the barcode extremities."))
|
||||||
|
options.BoolVar(&_OnlyFull, "only-complete-flanking", false,
|
||||||
|
options.Description("Only fragments with complete flanking sequences are printed."))
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// OptionSet adds to the basic option set every options declared for
|
// OptionSet adds to the basic option set every options declared for
|
||||||
@ -131,3 +135,7 @@ func CLIWithExtension() bool {
|
|||||||
func CLIExtension() int {
|
func CLIExtension() int {
|
||||||
return _Delta
|
return _Delta
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func CLIOnlyFull() bool {
|
||||||
|
return _OnlyFull
|
||||||
|
}
|
||||||
|
@ -24,6 +24,7 @@ func CLIPCR(iterator obiiter.IBioSequence) (obiiter.IBioSequence, error) {
|
|||||||
CLIReversePrimer(),
|
CLIReversePrimer(),
|
||||||
CLIAllowedMismatch(),
|
CLIAllowedMismatch(),
|
||||||
),
|
),
|
||||||
|
obiapat.OptionOnlyFullExtension(CLIOnlyFull()),
|
||||||
)
|
)
|
||||||
|
|
||||||
if CLIMinLength() > 0 {
|
if CLIMinLength() > 0 {
|
||||||
|
Reference in New Issue
Block a user