mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-12-08 08:40:26 +00:00
optimize sequence readers and patch a bug in the format guesser
Former-commit-id: 9dce1e96c57ae9a88c26fac5c8e1bdcdc2c0c7a5
This commit is contained in:
@@ -192,8 +192,10 @@ func _ReadFlatFileChunk(reader io.Reader, readers chan _FileChunk) {
|
||||
for err == nil {
|
||||
|
||||
// Read from the reader until the buffer is full or the end of the file is reached
|
||||
for ; err == nil && l < len(buff); l += size {
|
||||
size, err = reader.Read(buff[l:])
|
||||
l, err = io.ReadFull(reader, buff)
|
||||
|
||||
if err == io.ErrUnexpectedEOF {
|
||||
err = nil
|
||||
}
|
||||
|
||||
// Create an extended buffer to read from if the end of the last entry is not found in the current buffer
|
||||
@@ -205,7 +207,7 @@ func _ReadFlatFileChunk(reader io.Reader, readers chan _FileChunk) {
|
||||
// Read from the reader in 1 MB increments until the end of the last entry is found
|
||||
for end = _EndOfLastEntry(buff); err == nil && end < 0; end = _EndOfLastEntry(extbuff[:size]) {
|
||||
ic++
|
||||
size, err = reader.Read(extbuff)
|
||||
size, err = io.ReadFull(reader, extbuff)
|
||||
buff = append(buff, extbuff[:size]...)
|
||||
}
|
||||
|
||||
|
||||
@@ -92,7 +92,12 @@ func FastaChunkReader(r io.Reader, size int, cutHead bool) (chan FastxChunk, err
|
||||
out := make(chan FastxChunk)
|
||||
buff := make([]byte, size)
|
||||
|
||||
n, err := r.Read(buff)
|
||||
n, err := io.ReadFull(r, buff)
|
||||
|
||||
if err == io.ErrUnexpectedEOF {
|
||||
err = nil
|
||||
}
|
||||
|
||||
if n > 0 && err == nil {
|
||||
if n < size {
|
||||
buff = buff[:n]
|
||||
@@ -128,13 +133,20 @@ func FastaChunkReader(r io.Reader, size int, cutHead bool) (chan FastxChunk, err
|
||||
index: idx,
|
||||
}
|
||||
idx++
|
||||
} else {
|
||||
size = size * 2
|
||||
}
|
||||
|
||||
buff = slices.Grow(buff[:0], size)[0:size]
|
||||
n, err = r.Read(buff)
|
||||
n, err = io.ReadFull(r, buff)
|
||||
if n < size {
|
||||
buff = buff[:n]
|
||||
}
|
||||
|
||||
if err == io.ErrUnexpectedEOF {
|
||||
err = nil
|
||||
}
|
||||
|
||||
// fmt.Printf("n = %d, err = %v\n", n, err)
|
||||
}
|
||||
|
||||
|
||||
@@ -99,7 +99,11 @@ func FastqChunkReader(r io.Reader, size int) (chan FastxChunk, error) {
|
||||
out := make(chan FastxChunk)
|
||||
buff := make([]byte, size)
|
||||
|
||||
n, err := r.Read(buff)
|
||||
n, err := io.ReadFull(r, buff)
|
||||
|
||||
if err == io.ErrUnexpectedEOF {
|
||||
err = nil
|
||||
}
|
||||
|
||||
if n > 0 && err == nil {
|
||||
if n < size {
|
||||
@@ -130,13 +134,19 @@ func FastqChunkReader(r io.Reader, size int) (chan FastxChunk, error) {
|
||||
index: idx,
|
||||
}
|
||||
idx++
|
||||
} else {
|
||||
size = size * 2
|
||||
}
|
||||
|
||||
buff = slices.Grow(buff[:0], size)[0:size]
|
||||
n, err = r.Read(buff)
|
||||
n, err = io.ReadFull(r, buff)
|
||||
if n < size {
|
||||
buff = buff[:n]
|
||||
}
|
||||
|
||||
if err == io.ErrUnexpectedEOF {
|
||||
err = nil
|
||||
}
|
||||
// fmt.Printf("n = %d, err = %v\n", n, err)
|
||||
}
|
||||
|
||||
|
||||
@@ -69,11 +69,17 @@ func OBIMimeTypeGuesser(stream io.Reader) (*mimetype.MIME, io.Reader, error) {
|
||||
mimetype.Lookup("text/plain").Extend(genbankDetector, "text/genbank", ".seq")
|
||||
mimetype.Lookup("text/plain").Extend(emblDetector, "text/embl", ".dat")
|
||||
|
||||
mimetype.Lookup("application/octet-stream").Extend(fastaDetector, "text/fasta", ".fasta")
|
||||
mimetype.Lookup("application/octet-stream").Extend(fastqDetector, "text/fastq", ".fastq")
|
||||
mimetype.Lookup("application/octet-stream").Extend(ecoPCR2Detector, "text/ecopcr2", ".ecopcr")
|
||||
mimetype.Lookup("application/octet-stream").Extend(genbankDetector, "text/genbank", ".seq")
|
||||
mimetype.Lookup("application/octet-stream").Extend(emblDetector, "text/embl", ".dat")
|
||||
|
||||
// Create a buffer to store the read data
|
||||
buf := make([]byte, 1024*128)
|
||||
n, err := stream.Read(buf)
|
||||
n, err := io.ReadFull(stream, buf)
|
||||
|
||||
if err != nil && err != io.EOF {
|
||||
if err != nil && err != io.ErrUnexpectedEOF {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
@@ -84,7 +90,11 @@ func OBIMimeTypeGuesser(stream io.Reader) (*mimetype.MIME, io.Reader, error) {
|
||||
}
|
||||
|
||||
// Create a new reader based on the read data
|
||||
newReader := io.MultiReader(bytes.NewReader(buf[:n]), stream)
|
||||
newReader := io.Reader(bytes.NewReader(buf[:n]))
|
||||
|
||||
if err == nil {
|
||||
newReader = io.MultiReader(newReader, stream)
|
||||
}
|
||||
|
||||
return mimeType, newReader, nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user