Reducing memory allocation events

Former-commit-id: c94e79ba116464504580fc397270ead154063971
This commit is contained in:
Eric Coissac
2024-06-22 22:32:31 +02:00
parent e6b87ecd02
commit 93f9dcb95f
8 changed files with 98 additions and 46 deletions

View File

@ -1,15 +1,14 @@
package obialign package obialign
import "slices"
func _Backtracking(pathMatrix []int, lseqA, lseqB int, path *[]int) []int { func _Backtracking(pathMatrix []int, lseqA, lseqB int, path *[]int) []int {
needed := (lseqA + lseqB) * 2 needed := (lseqA + lseqB) * 2
(*path) = (*path)[:0]
if needed > cap(*path) { (*path) = slices.Grow((*path), needed)
*path = make([]int, 0, needed)
}
*path = (*path)[:cap(*path)]
p := cap(*path) p := cap(*path)
*path = (*path)[:p]
i := lseqA - 1 i := lseqA - 1
j := lseqB - 1 j := lseqB - 1

View File

@ -1,8 +1,10 @@
package obiformats package obiformats
import ( import (
"bytes"
"math" "math"
"strings" "strings"
"unsafe"
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
@ -85,17 +87,26 @@ func ParseFastSeqJsonHeader(sequence *obiseq.BioSequence) {
} }
} }
func FormatFastSeqJsonHeader(sequence *obiseq.BioSequence) string { func WriteFastSeqJsonHeader(buffer *bytes.Buffer, sequence *obiseq.BioSequence) {
annotations := sequence.Annotations() annotations := sequence.Annotations()
if len(annotations) > 0 { if len(annotations) > 0 {
text, err := obiutils.JsonMarshal(sequence.Annotations()) err := obiutils.JsonMarshalByteBuffer(buffer, sequence.Annotations())
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
} }
}
}
return string(text) func FormatFastSeqJsonHeader(sequence *obiseq.BioSequence) string {
annotations := sequence.Annotations()
buffer := bytes.Buffer{}
if len(annotations) > 0 {
obiutils.JsonMarshalByteBuffer(&buffer, sequence.Annotations())
return unsafe.String(unsafe.SliceData(buffer.Bytes()), len(buffer.Bytes()))
} }
return "" return ""

View File

@ -7,6 +7,7 @@ import (
"regexp" "regexp"
"strconv" "strconv"
"strings" "strings"
"unsafe"
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
@ -298,17 +299,18 @@ func ParseFastSeqOBIHeader(sequence *obiseq.BioSequence) {
} }
} }
func FormatFastSeqOBIHeader(sequence *obiseq.BioSequence) string { func WriteFastSeqOBIHeade(buffer *bytes.Buffer, sequence *obiseq.BioSequence) {
annotations := sequence.Annotations() annotations := sequence.Annotations()
if annotations != nil { if len(annotations) > 0 {
var text strings.Builder
for key, value := range annotations { for key, value := range annotations {
if key != "definition" { if key != "definition" {
switch t := value.(type) { switch t := value.(type) {
case string: case string:
text.WriteString(fmt.Sprintf("%s=%s; ", key, t)) buffer.WriteString(fmt.Sprintf("%s=%s; ", key, t))
case map[string]int, case map[string]int,
map[string]string, map[string]string,
map[string]interface{}, map[string]interface{},
@ -318,16 +320,30 @@ func FormatFastSeqOBIHeader(sequence *obiseq.BioSequence) string {
log.Fatalf("Cannot convert %v value", value) log.Fatalf("Cannot convert %v value", value)
} }
tv = bytes.ReplaceAll(tv, []byte(`"`), []byte("'")) tv = bytes.ReplaceAll(tv, []byte(`"`), []byte("'"))
text.WriteString(fmt.Sprintf("%s=", key)) buffer.WriteString(fmt.Sprintf("%s=", key))
text.Write(tv) buffer.Write(tv)
text.WriteString("; ") buffer.WriteString("; ")
default: default:
text.WriteString(fmt.Sprintf("%s=%v; ", key, value)) buffer.WriteString(fmt.Sprintf("%s=%v; ", key, value))
} }
} }
} }
return text.String() + " " + sequence.Definition() if sequence.HasDefinition() {
buffer.WriteByte(' ')
buffer.WriteString(sequence.Definition())
}
}
}
func FormatFastSeqOBIHeader(sequence *obiseq.BioSequence) string {
annotations := sequence.Annotations()
if annotations != nil {
var text bytes.Buffer
WriteFastSeqOBIHeade(&text, sequence)
return unsafe.String(unsafe.SliceData(text.Bytes()), len(text.String()))
} }
return "" return ""

View File

@ -81,12 +81,16 @@ func FormatFastaBatch(batch obiiter.BioSequenceBatch, formater FormatHeader, ski
var bs bytes.Buffer var bs bytes.Buffer
// Iterate over each sequence in the batch // Iterate over each sequence in the batch
for _, seq := range batch.Slice() { for i, seq := range batch.Slice() {
// Check if the sequence is empty // Check if the sequence is empty
if seq.Len() > 0 { if seq.Len() > 0 {
// Format the sequence using the provided formater function // Format the sequence using the provided formater function
formattedSeq := FormatFasta(seq, formater) formattedSeq := FormatFasta(seq, formater)
if i == 0 {
bs.Grow(len(formattedSeq) * len(batch.Slice()) * 5 / 4)
}
// Append the formatted sequence to the buffer // Append the formatted sequence to the buffer
bs.WriteString(formattedSeq) bs.WriteString(formattedSeq)
bs.WriteByte('\n') bs.WriteByte('\n')

View File

@ -2,7 +2,6 @@ package obiformats
import ( import (
"bytes" "bytes"
"fmt"
"io" "io"
"os" "os"
"sync" "sync"
@ -15,39 +14,52 @@ import (
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
) )
// The function FormatFastq takes a BioSequence object, a quality shift value, and a header formatter func _formatFastq(buff *bytes.Buffer, seq *obiseq.BioSequence, formater FormatHeader) {
// function as input, and returns a formatted string in FASTQ format.
func FormatFastq(seq *obiseq.BioSequence, formater FormatHeader) string {
q := seq.QualitiesString()
info := "" info := ""
if formater != nil { if formater != nil {
info = formater(seq) info = formater(seq)
} }
f := fmt.Sprintf("@%s %s\n%s\n+\n%s", buff.WriteByte('@')
seq.Id(), info, buff.WriteString(seq.Id())
seq.String(), buff.WriteByte(' ')
q,
)
if f[0] != '@' { buff.WriteString(info)
log.Panicln("FormatFastq: FASTQ format error") buff.WriteByte('\n')
}
return f buff.Write(seq.Sequence())
buff.WriteString("\n+\n")
q := seq.QualitiesString()
buff.WriteString(q)
buff.WriteByte('\n')
}
// The function FormatFastq takes a BioSequence object, a quality shift value, and a header formatter
// function as input, and returns a formatted string in FASTQ format.
func FormatFastq(seq *obiseq.BioSequence, formater FormatHeader) string {
var buff bytes.Buffer
_formatFastq(&buff, seq, formater)
return buff.String()
} }
func FormatFastqBatch(batch obiiter.BioSequenceBatch, func FormatFastqBatch(batch obiiter.BioSequenceBatch,
formater FormatHeader, skipEmpty bool) []byte { formater FormatHeader, skipEmpty bool) []byte {
var bs bytes.Buffer var bs bytes.Buffer
for _, seq := range batch.Slice() { for i, seq := range batch.Slice() {
if seq.Len() > 0 { if seq.Len() > 0 {
fs := FormatFastq(seq, formater) _formatFastq(&bs, seq, formater)
bs.WriteString(fs)
bs.WriteString("\n") if i == 0 {
bs.Grow(len(bs.Bytes()) * len(batch.Slice()) * 5 / 4)
}
} else { } else {
if skipEmpty { if skipEmpty {
log.Warnf("Sequence %s is empty and skiped in output", seq.Id()) log.Warnf("Sequence %s is empty and skiped in output", seq.Id())

View File

@ -7,7 +7,7 @@ import (
// TODO: The version number is extracted from git. This induces that the version // TODO: The version number is extracted from git. This induces that the version
// corresponds to the last commit, and not the one when the file will be // corresponds to the last commit, and not the one when the file will be
// commited // commited
var _Commit = "bcaa264" var _Commit = "fbdb2af"
var _Version = "Release 4.2.0" var _Version = "Release 4.2.0"
// Version returns the version of the obitools package. // Version returns the version of the obitools package.

View File

@ -15,6 +15,7 @@ import (
"slices" "slices"
"sync" "sync"
"sync/atomic" "sync/atomic"
"unsafe"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
@ -279,7 +280,8 @@ func (s *BioSequence) QualitiesString() string {
quality_shift := obioptions.OutputQualityShift() quality_shift := obioptions.OutputQualityShift()
qual := s.Qualities() qual := s.Qualities()
qual_ascii := GetSlice(len(qual))[0:len(qual)] qual_ascii := make([]byte, len(qual))
for i := 0; i < len(qual); i++ { for i := 0; i < len(qual); i++ {
quality := qual[i] quality := qual[i]
if quality > 93 { if quality > 93 {
@ -287,8 +289,8 @@ func (s *BioSequence) QualitiesString() string {
} }
qual_ascii[i] = quality + quality_shift qual_ascii[i] = quality + quality_shift
} }
qual_sting := string(qual_ascii)
RecycleSlice(&qual_ascii) qual_sting := unsafe.String(unsafe.SliceData(qual_ascii), len(qual))
return qual_sting return qual_sting
} }

View File

@ -363,6 +363,16 @@ func AtomicCounter(initial ...int) func() int {
return nextCounter return nextCounter
} }
func JsonMarshalByteBuffer(buffer *bytes.Buffer, i interface{}) error {
encoder := json.NewEncoder(buffer)
encoder.SetEscapeHTML(false)
err := encoder.Encode(i)
b := buffer.Bytes()
b = bytes.TrimRight(b, "\n")
buffer.Truncate(len(b))
return err
}
// JsonMarshal marshals an interface into JSON format. // JsonMarshal marshals an interface into JSON format.
// //
// JsonMarshal is a UTF-8 friendly marshaler. Go's json.Marshal is not UTF-8 // JsonMarshal is a UTF-8 friendly marshaler. Go's json.Marshal is not UTF-8
@ -375,10 +385,8 @@ func AtomicCounter(initial ...int) func() int {
// It takes an interface as a parameter and returns a byte slice and an error. // It takes an interface as a parameter and returns a byte slice and an error.
func JsonMarshal(i interface{}) ([]byte, error) { func JsonMarshal(i interface{}) ([]byte, error) {
buffer := &bytes.Buffer{} buffer := &bytes.Buffer{}
encoder := json.NewEncoder(buffer) err := JsonMarshalByteBuffer(buffer, i)
encoder.SetEscapeHTML(false) return buffer.Bytes(), err
err := encoder.Encode(i)
return bytes.TrimRight(buffer.Bytes(), "\n"), err
} }
// IsAMap checks if the given value is a map. // IsAMap checks if the given value is a map.