Reducing memory allocation events

Former-commit-id: c94e79ba116464504580fc397270ead154063971
This commit is contained in:
Eric Coissac
2024-06-22 22:32:31 +02:00
parent e6b87ecd02
commit 93f9dcb95f
8 changed files with 98 additions and 46 deletions

View File

@@ -1,8 +1,10 @@
package obiformats
import (
"bytes"
"math"
"strings"
"unsafe"
log "github.com/sirupsen/logrus"
@@ -85,17 +87,26 @@ func ParseFastSeqJsonHeader(sequence *obiseq.BioSequence) {
}
}
func FormatFastSeqJsonHeader(sequence *obiseq.BioSequence) string {
func WriteFastSeqJsonHeader(buffer *bytes.Buffer, sequence *obiseq.BioSequence) {
annotations := sequence.Annotations()
if len(annotations) > 0 {
text, err := obiutils.JsonMarshal(sequence.Annotations())
err := obiutils.JsonMarshalByteBuffer(buffer, sequence.Annotations())
if err != nil {
log.Fatal(err)
}
}
}
return string(text)
func FormatFastSeqJsonHeader(sequence *obiseq.BioSequence) string {
annotations := sequence.Annotations()
buffer := bytes.Buffer{}
if len(annotations) > 0 {
obiutils.JsonMarshalByteBuffer(&buffer, sequence.Annotations())
return unsafe.String(unsafe.SliceData(buffer.Bytes()), len(buffer.Bytes()))
}
return ""

View File

@@ -7,6 +7,7 @@ import (
"regexp"
"strconv"
"strings"
"unsafe"
log "github.com/sirupsen/logrus"
@@ -298,17 +299,18 @@ func ParseFastSeqOBIHeader(sequence *obiseq.BioSequence) {
}
}
func FormatFastSeqOBIHeader(sequence *obiseq.BioSequence) string {
func WriteFastSeqOBIHeade(buffer *bytes.Buffer, sequence *obiseq.BioSequence) {
annotations := sequence.Annotations()
if annotations != nil {
var text strings.Builder
if len(annotations) > 0 {
for key, value := range annotations {
if key != "definition" {
switch t := value.(type) {
case string:
text.WriteString(fmt.Sprintf("%s=%s; ", key, t))
buffer.WriteString(fmt.Sprintf("%s=%s; ", key, t))
case map[string]int,
map[string]string,
map[string]interface{},
@@ -318,16 +320,30 @@ func FormatFastSeqOBIHeader(sequence *obiseq.BioSequence) string {
log.Fatalf("Cannot convert %v value", value)
}
tv = bytes.ReplaceAll(tv, []byte(`"`), []byte("'"))
text.WriteString(fmt.Sprintf("%s=", key))
text.Write(tv)
text.WriteString("; ")
buffer.WriteString(fmt.Sprintf("%s=", key))
buffer.Write(tv)
buffer.WriteString("; ")
default:
text.WriteString(fmt.Sprintf("%s=%v; ", key, value))
buffer.WriteString(fmt.Sprintf("%s=%v; ", key, value))
}
}
}
return text.String() + " " + sequence.Definition()
if sequence.HasDefinition() {
buffer.WriteByte(' ')
buffer.WriteString(sequence.Definition())
}
}
}
func FormatFastSeqOBIHeader(sequence *obiseq.BioSequence) string {
annotations := sequence.Annotations()
if annotations != nil {
var text bytes.Buffer
WriteFastSeqOBIHeade(&text, sequence)
return unsafe.String(unsafe.SliceData(text.Bytes()), len(text.String()))
}
return ""

View File

@@ -81,12 +81,16 @@ func FormatFastaBatch(batch obiiter.BioSequenceBatch, formater FormatHeader, ski
var bs bytes.Buffer
// Iterate over each sequence in the batch
for _, seq := range batch.Slice() {
for i, seq := range batch.Slice() {
// Check if the sequence is empty
if seq.Len() > 0 {
// Format the sequence using the provided formater function
formattedSeq := FormatFasta(seq, formater)
if i == 0 {
bs.Grow(len(formattedSeq) * len(batch.Slice()) * 5 / 4)
}
// Append the formatted sequence to the buffer
bs.WriteString(formattedSeq)
bs.WriteByte('\n')

View File

@@ -2,7 +2,6 @@ package obiformats
import (
"bytes"
"fmt"
"io"
"os"
"sync"
@@ -15,39 +14,52 @@ import (
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
)
// The function FormatFastq takes a BioSequence object, a quality shift value, and a header formatter
// function as input, and returns a formatted string in FASTQ format.
func FormatFastq(seq *obiseq.BioSequence, formater FormatHeader) string {
q := seq.QualitiesString()
func _formatFastq(buff *bytes.Buffer, seq *obiseq.BioSequence, formater FormatHeader) {
info := ""
if formater != nil {
info = formater(seq)
}
f := fmt.Sprintf("@%s %s\n%s\n+\n%s",
seq.Id(), info,
seq.String(),
q,
)
buff.WriteByte('@')
buff.WriteString(seq.Id())
buff.WriteByte(' ')
if f[0] != '@' {
log.Panicln("FormatFastq: FASTQ format error")
}
buff.WriteString(info)
buff.WriteByte('\n')
return f
buff.Write(seq.Sequence())
buff.WriteString("\n+\n")
q := seq.QualitiesString()
buff.WriteString(q)
buff.WriteByte('\n')
}
// The function FormatFastq takes a BioSequence object, a quality shift value, and a header formatter
// function as input, and returns a formatted string in FASTQ format.
func FormatFastq(seq *obiseq.BioSequence, formater FormatHeader) string {
var buff bytes.Buffer
_formatFastq(&buff, seq, formater)
return buff.String()
}
func FormatFastqBatch(batch obiiter.BioSequenceBatch,
formater FormatHeader, skipEmpty bool) []byte {
var bs bytes.Buffer
for _, seq := range batch.Slice() {
for i, seq := range batch.Slice() {
if seq.Len() > 0 {
fs := FormatFastq(seq, formater)
bs.WriteString(fs)
bs.WriteString("\n")
_formatFastq(&bs, seq, formater)
if i == 0 {
bs.Grow(len(bs.Bytes()) * len(batch.Slice()) * 5 / 4)
}
} else {
if skipEmpty {
log.Warnf("Sequence %s is empty and skiped in output", seq.Id())