From 93f9dcb95f89a83a5b146722d2163ee59c07a790 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Sat, 22 Jun 2024 22:32:31 +0200 Subject: [PATCH] Reducing memory allocation events Former-commit-id: c94e79ba116464504580fc397270ead154063971 --- pkg/obialign/backtracking.go | 11 +++--- pkg/obiformats/fastseq_json_header.go | 17 +++++++-- pkg/obiformats/fastseq_obi_header.go | 34 +++++++++++++----- pkg/obiformats/fastseq_write_fasta.go | 6 +++- pkg/obiformats/fastseq_write_fastq.go | 50 +++++++++++++++++---------- pkg/obioptions/version.go | 2 +- pkg/obiseq/biosequence.go | 8 +++-- pkg/obiutils/goutils.go | 16 ++++++--- 8 files changed, 98 insertions(+), 46 deletions(-) diff --git a/pkg/obialign/backtracking.go b/pkg/obialign/backtracking.go index 9f4a030..9ab913f 100644 --- a/pkg/obialign/backtracking.go +++ b/pkg/obialign/backtracking.go @@ -1,15 +1,14 @@ package obialign +import "slices" + func _Backtracking(pathMatrix []int, lseqA, lseqB int, path *[]int) []int { needed := (lseqA + lseqB) * 2 - - if needed > cap(*path) { - *path = make([]int, 0, needed) - } - - *path = (*path)[:cap(*path)] + (*path) = (*path)[:0] + (*path) = slices.Grow((*path), needed) p := cap(*path) + *path = (*path)[:p] i := lseqA - 1 j := lseqB - 1 diff --git a/pkg/obiformats/fastseq_json_header.go b/pkg/obiformats/fastseq_json_header.go index e4f0410..00b3886 100644 --- a/pkg/obiformats/fastseq_json_header.go +++ b/pkg/obiformats/fastseq_json_header.go @@ -1,8 +1,10 @@ package obiformats import ( + "bytes" "math" "strings" + "unsafe" log "github.com/sirupsen/logrus" @@ -85,17 +87,26 @@ func ParseFastSeqJsonHeader(sequence *obiseq.BioSequence) { } } -func FormatFastSeqJsonHeader(sequence *obiseq.BioSequence) string { +func WriteFastSeqJsonHeader(buffer *bytes.Buffer, sequence *obiseq.BioSequence) { + annotations := sequence.Annotations() if len(annotations) > 0 { - text, err := obiutils.JsonMarshal(sequence.Annotations()) + err := obiutils.JsonMarshalByteBuffer(buffer, sequence.Annotations()) if err != nil { log.Fatal(err) } + } +} - return string(text) +func FormatFastSeqJsonHeader(sequence *obiseq.BioSequence) string { + annotations := sequence.Annotations() + buffer := bytes.Buffer{} + + if len(annotations) > 0 { + obiutils.JsonMarshalByteBuffer(&buffer, sequence.Annotations()) + return unsafe.String(unsafe.SliceData(buffer.Bytes()), len(buffer.Bytes())) } return "" diff --git a/pkg/obiformats/fastseq_obi_header.go b/pkg/obiformats/fastseq_obi_header.go index d7ac4d5..05d76bd 100644 --- a/pkg/obiformats/fastseq_obi_header.go +++ b/pkg/obiformats/fastseq_obi_header.go @@ -7,6 +7,7 @@ import ( "regexp" "strconv" "strings" + "unsafe" log "github.com/sirupsen/logrus" @@ -298,17 +299,18 @@ func ParseFastSeqOBIHeader(sequence *obiseq.BioSequence) { } } -func FormatFastSeqOBIHeader(sequence *obiseq.BioSequence) string { +func WriteFastSeqOBIHeade(buffer *bytes.Buffer, sequence *obiseq.BioSequence) { + annotations := sequence.Annotations() - if annotations != nil { - var text strings.Builder + if len(annotations) > 0 { for key, value := range annotations { if key != "definition" { + switch t := value.(type) { case string: - text.WriteString(fmt.Sprintf("%s=%s; ", key, t)) + buffer.WriteString(fmt.Sprintf("%s=%s; ", key, t)) case map[string]int, map[string]string, map[string]interface{}, @@ -318,16 +320,30 @@ func FormatFastSeqOBIHeader(sequence *obiseq.BioSequence) string { log.Fatalf("Cannot convert %v value", value) } tv = bytes.ReplaceAll(tv, []byte(`"`), []byte("'")) - text.WriteString(fmt.Sprintf("%s=", key)) - text.Write(tv) - text.WriteString("; ") + buffer.WriteString(fmt.Sprintf("%s=", key)) + buffer.Write(tv) + buffer.WriteString("; ") default: - text.WriteString(fmt.Sprintf("%s=%v; ", key, value)) + buffer.WriteString(fmt.Sprintf("%s=%v; ", key, value)) } } } - return text.String() + " " + sequence.Definition() + if sequence.HasDefinition() { + buffer.WriteByte(' ') + buffer.WriteString(sequence.Definition()) + } + } + +} + +func FormatFastSeqOBIHeader(sequence *obiseq.BioSequence) string { + annotations := sequence.Annotations() + + if annotations != nil { + var text bytes.Buffer + WriteFastSeqOBIHeade(&text, sequence) + return unsafe.String(unsafe.SliceData(text.Bytes()), len(text.String())) } return "" diff --git a/pkg/obiformats/fastseq_write_fasta.go b/pkg/obiformats/fastseq_write_fasta.go index b50af65..165d7c1 100644 --- a/pkg/obiformats/fastseq_write_fasta.go +++ b/pkg/obiformats/fastseq_write_fasta.go @@ -81,12 +81,16 @@ func FormatFastaBatch(batch obiiter.BioSequenceBatch, formater FormatHeader, ski var bs bytes.Buffer // Iterate over each sequence in the batch - for _, seq := range batch.Slice() { + for i, seq := range batch.Slice() { // Check if the sequence is empty if seq.Len() > 0 { // Format the sequence using the provided formater function formattedSeq := FormatFasta(seq, formater) + if i == 0 { + bs.Grow(len(formattedSeq) * len(batch.Slice()) * 5 / 4) + } + // Append the formatted sequence to the buffer bs.WriteString(formattedSeq) bs.WriteByte('\n') diff --git a/pkg/obiformats/fastseq_write_fastq.go b/pkg/obiformats/fastseq_write_fastq.go index aa946bf..9d8842f 100644 --- a/pkg/obiformats/fastseq_write_fastq.go +++ b/pkg/obiformats/fastseq_write_fastq.go @@ -2,7 +2,6 @@ package obiformats import ( "bytes" - "fmt" "io" "os" "sync" @@ -15,39 +14,52 @@ import ( "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils" ) -// The function FormatFastq takes a BioSequence object, a quality shift value, and a header formatter -// function as input, and returns a formatted string in FASTQ format. -func FormatFastq(seq *obiseq.BioSequence, formater FormatHeader) string { - - q := seq.QualitiesString() +func _formatFastq(buff *bytes.Buffer, seq *obiseq.BioSequence, formater FormatHeader) { info := "" if formater != nil { info = formater(seq) } - f := fmt.Sprintf("@%s %s\n%s\n+\n%s", - seq.Id(), info, - seq.String(), - q, - ) + buff.WriteByte('@') + buff.WriteString(seq.Id()) + buff.WriteByte(' ') - if f[0] != '@' { - log.Panicln("FormatFastq: FASTQ format error") - } + buff.WriteString(info) + buff.WriteByte('\n') - return f + buff.Write(seq.Sequence()) + buff.WriteString("\n+\n") + + q := seq.QualitiesString() + buff.WriteString(q) + buff.WriteByte('\n') + +} + +// The function FormatFastq takes a BioSequence object, a quality shift value, and a header formatter +// function as input, and returns a formatted string in FASTQ format. +func FormatFastq(seq *obiseq.BioSequence, formater FormatHeader) string { + + var buff bytes.Buffer + + _formatFastq(&buff, seq, formater) + + return buff.String() } func FormatFastqBatch(batch obiiter.BioSequenceBatch, formater FormatHeader, skipEmpty bool) []byte { var bs bytes.Buffer - for _, seq := range batch.Slice() { + for i, seq := range batch.Slice() { if seq.Len() > 0 { - fs := FormatFastq(seq, formater) - bs.WriteString(fs) - bs.WriteString("\n") + _formatFastq(&bs, seq, formater) + + if i == 0 { + + bs.Grow(len(bs.Bytes()) * len(batch.Slice()) * 5 / 4) + } } else { if skipEmpty { log.Warnf("Sequence %s is empty and skiped in output", seq.Id()) diff --git a/pkg/obioptions/version.go b/pkg/obioptions/version.go index 2989ca6..b4a591e 100644 --- a/pkg/obioptions/version.go +++ b/pkg/obioptions/version.go @@ -7,7 +7,7 @@ import ( // TODO: The version number is extracted from git. This induces that the version // corresponds to the last commit, and not the one when the file will be // commited -var _Commit = "bcaa264" +var _Commit = "fbdb2af" var _Version = "Release 4.2.0" // Version returns the version of the obitools package. diff --git a/pkg/obiseq/biosequence.go b/pkg/obiseq/biosequence.go index 92ae400..ee17212 100644 --- a/pkg/obiseq/biosequence.go +++ b/pkg/obiseq/biosequence.go @@ -15,6 +15,7 @@ import ( "slices" "sync" "sync/atomic" + "unsafe" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils" @@ -279,7 +280,8 @@ func (s *BioSequence) QualitiesString() string { quality_shift := obioptions.OutputQualityShift() qual := s.Qualities() - qual_ascii := GetSlice(len(qual))[0:len(qual)] + qual_ascii := make([]byte, len(qual)) + for i := 0; i < len(qual); i++ { quality := qual[i] if quality > 93 { @@ -287,8 +289,8 @@ func (s *BioSequence) QualitiesString() string { } qual_ascii[i] = quality + quality_shift } - qual_sting := string(qual_ascii) - RecycleSlice(&qual_ascii) + + qual_sting := unsafe.String(unsafe.SliceData(qual_ascii), len(qual)) return qual_sting } diff --git a/pkg/obiutils/goutils.go b/pkg/obiutils/goutils.go index 374573b..50dbd1e 100644 --- a/pkg/obiutils/goutils.go +++ b/pkg/obiutils/goutils.go @@ -363,6 +363,16 @@ func AtomicCounter(initial ...int) func() int { return nextCounter } +func JsonMarshalByteBuffer(buffer *bytes.Buffer, i interface{}) error { + encoder := json.NewEncoder(buffer) + encoder.SetEscapeHTML(false) + err := encoder.Encode(i) + b := buffer.Bytes() + b = bytes.TrimRight(b, "\n") + buffer.Truncate(len(b)) + return err +} + // JsonMarshal marshals an interface into JSON format. // // JsonMarshal is a UTF-8 friendly marshaler. Go's json.Marshal is not UTF-8 @@ -375,10 +385,8 @@ func AtomicCounter(initial ...int) func() int { // It takes an interface as a parameter and returns a byte slice and an error. func JsonMarshal(i interface{}) ([]byte, error) { buffer := &bytes.Buffer{} - encoder := json.NewEncoder(buffer) - encoder.SetEscapeHTML(false) - err := encoder.Encode(i) - return bytes.TrimRight(buffer.Bytes(), "\n"), err + err := JsonMarshalByteBuffer(buffer, i) + return buffer.Bytes(), err } // IsAMap checks if the given value is a map.