Reduce memory allocation events

Former-commit-id: fbdb2afc857b02adc2593e2278d3bd838e99b0b2
This commit is contained in:
Eric Coissac
2024-06-22 21:01:53 +02:00
parent 54a138196c
commit e6b87ecd02
19 changed files with 166 additions and 75 deletions

View File

@@ -169,7 +169,9 @@ func _ParseEmblFile(source string, input ChannelSeqFileChunk,
func ReadEMBL(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
opt := MakeOptions(options)
entry_channel := ReadSeqFileChunk(reader, _EndOfLastEntry)
buff := make([]byte, 1024*1024*1024*256)
entry_channel := ReadSeqFileChunk(reader, buff, _EndOfLastEntry)
newIter := obiiter.MakeIBioSequence()
nworkers := opt.ParallelWorkers()
@@ -179,7 +181,7 @@ func ReadEMBL(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
newIter.Add(1)
go _ParseEmblFile(opt.Source(), entry_channel, newIter,
opt.WithFeatureTable(),
opt.BatchSize(),
opt.BatchSize(),
opt.TotalSeqSize())
}

View File

@@ -228,7 +228,9 @@ func ReadFasta(reader io.Reader, options ...WithOption) (obiiter.IBioSequence, e
nworker := opt.ParallelWorkers()
chkchan := ReadSeqFileChunk(reader, _EndOfLastFastaEntry)
buff := make([]byte, 1024*1024*1024)
chkchan := ReadSeqFileChunk(reader, buff, _EndOfLastFastaEntry)
chunck_order := obiutils.AtomicCounter()
for i := 0; i < nworker; i++ {

View File

@@ -112,7 +112,7 @@ func _storeSequenceQuality(bytes *bytes.Buffer, out *obiseq.BioSequence, quality
}
for i := 0; i < len(q); i++ {
q[i] = q[i] - quality_shift
q[i] -= quality_shift
}
out.SetQualities(q)
}
@@ -309,7 +309,9 @@ func ReadFastq(reader io.Reader, options ...WithOption) (obiiter.IBioSequence, e
nworker := opt.ParallelWorkers()
chunkorder := obiutils.AtomicCounter()
chkchan := ReadSeqFileChunk(reader, _EndOfLastFastqEntry)
buff := make([]byte, 1024*1024*1024)
chkchan := ReadSeqFileChunk(reader, buff, _EndOfLastFastqEntry)
for i := 0; i < nworker; i++ {
out.Add(1)

View File

@@ -46,17 +46,8 @@ func FormatFastqBatch(batch obiiter.BioSequenceBatch,
for _, seq := range batch.Slice() {
if seq.Len() > 0 {
fs := FormatFastq(seq, formater)
lb := bs.Len()
n, _ := bs.WriteString(fs)
if n < len(fs) {
log.Panicln("FormatFastqBatch: Cannot write all FASTQ sequences")
}
bs.WriteString(fs)
bs.WriteString("\n")
if bs.Len()-lb < len(fs)+1 {
log.Panicln("FormatFastqBatch: Cannot write all FASTQ sequences correctly")
}
} else {
if skipEmpty {
log.Warnf("Sequence %s is empty and skiped in output", seq.Id())
@@ -69,12 +60,6 @@ func FormatFastqBatch(batch obiiter.BioSequenceBatch,
chunk := bs.Bytes()
chunk = chunk[:bs.Len()]
if chunk[0] != '@' {
log.Panicln("FormatFastqBatch: FASTQ format error")
}
return chunk
}

View File

@@ -233,7 +233,9 @@ func ReadGenbank(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
opt := MakeOptions(options)
// entry_channel := make(chan _FileChunk)
entry_channel := ReadSeqFileChunk(reader, _EndOfLastEntry)
buff := make([]byte, 1024*1024*1024*256)
entry_channel := ReadSeqFileChunk(reader, buff, _EndOfLastEntry)
newIter := obiiter.MakeIBioSequence()
nworkers := opt.ParallelWorkers()

View File

@@ -33,10 +33,10 @@ type LastSeqRecord func([]byte) int
// Returns:
// None
func ReadSeqFileChunk(reader io.Reader,
buff []byte,
splitter LastSeqRecord) ChannelSeqFileChunk {
var err error
var fullbuff []byte
var buff []byte
chunk_channel := make(ChannelSeqFileChunk)
@@ -46,8 +46,7 @@ func ReadSeqFileChunk(reader io.Reader,
i := 0
// Initialize the buffer to the size of a chunk of data
fullbuff = make([]byte, _FileChunkSize, _FileChunkSize*2)
buff = fullbuff
fullbuff = buff
// Read from the reader until the buffer is full or the end of the file is reached
l, err = io.ReadFull(reader, buff)