Reducing memory allocation events

Former-commit-id: c94e79ba116464504580fc397270ead154063971
This commit is contained in:
Eric Coissac
2024-06-22 22:32:31 +02:00
parent e6b87ecd02
commit 93f9dcb95f
8 changed files with 98 additions and 46 deletions

View File

@ -1,15 +1,14 @@
package obialign
import "slices"
func _Backtracking(pathMatrix []int, lseqA, lseqB int, path *[]int) []int {
needed := (lseqA + lseqB) * 2
if needed > cap(*path) {
*path = make([]int, 0, needed)
}
*path = (*path)[:cap(*path)]
(*path) = (*path)[:0]
(*path) = slices.Grow((*path), needed)
p := cap(*path)
*path = (*path)[:p]
i := lseqA - 1
j := lseqB - 1

View File

@ -1,8 +1,10 @@
package obiformats
import (
"bytes"
"math"
"strings"
"unsafe"
log "github.com/sirupsen/logrus"
@ -85,17 +87,26 @@ func ParseFastSeqJsonHeader(sequence *obiseq.BioSequence) {
}
}
func FormatFastSeqJsonHeader(sequence *obiseq.BioSequence) string {
func WriteFastSeqJsonHeader(buffer *bytes.Buffer, sequence *obiseq.BioSequence) {
annotations := sequence.Annotations()
if len(annotations) > 0 {
text, err := obiutils.JsonMarshal(sequence.Annotations())
err := obiutils.JsonMarshalByteBuffer(buffer, sequence.Annotations())
if err != nil {
log.Fatal(err)
}
}
}
return string(text)
func FormatFastSeqJsonHeader(sequence *obiseq.BioSequence) string {
annotations := sequence.Annotations()
buffer := bytes.Buffer{}
if len(annotations) > 0 {
obiutils.JsonMarshalByteBuffer(&buffer, sequence.Annotations())
return unsafe.String(unsafe.SliceData(buffer.Bytes()), len(buffer.Bytes()))
}
return ""

View File

@ -7,6 +7,7 @@ import (
"regexp"
"strconv"
"strings"
"unsafe"
log "github.com/sirupsen/logrus"
@ -298,17 +299,18 @@ func ParseFastSeqOBIHeader(sequence *obiseq.BioSequence) {
}
}
func FormatFastSeqOBIHeader(sequence *obiseq.BioSequence) string {
func WriteFastSeqOBIHeade(buffer *bytes.Buffer, sequence *obiseq.BioSequence) {
annotations := sequence.Annotations()
if annotations != nil {
var text strings.Builder
if len(annotations) > 0 {
for key, value := range annotations {
if key != "definition" {
switch t := value.(type) {
case string:
text.WriteString(fmt.Sprintf("%s=%s; ", key, t))
buffer.WriteString(fmt.Sprintf("%s=%s; ", key, t))
case map[string]int,
map[string]string,
map[string]interface{},
@ -318,16 +320,30 @@ func FormatFastSeqOBIHeader(sequence *obiseq.BioSequence) string {
log.Fatalf("Cannot convert %v value", value)
}
tv = bytes.ReplaceAll(tv, []byte(`"`), []byte("'"))
text.WriteString(fmt.Sprintf("%s=", key))
text.Write(tv)
text.WriteString("; ")
buffer.WriteString(fmt.Sprintf("%s=", key))
buffer.Write(tv)
buffer.WriteString("; ")
default:
text.WriteString(fmt.Sprintf("%s=%v; ", key, value))
buffer.WriteString(fmt.Sprintf("%s=%v; ", key, value))
}
}
}
return text.String() + " " + sequence.Definition()
if sequence.HasDefinition() {
buffer.WriteByte(' ')
buffer.WriteString(sequence.Definition())
}
}
}
func FormatFastSeqOBIHeader(sequence *obiseq.BioSequence) string {
annotations := sequence.Annotations()
if annotations != nil {
var text bytes.Buffer
WriteFastSeqOBIHeade(&text, sequence)
return unsafe.String(unsafe.SliceData(text.Bytes()), len(text.String()))
}
return ""

View File

@ -81,12 +81,16 @@ func FormatFastaBatch(batch obiiter.BioSequenceBatch, formater FormatHeader, ski
var bs bytes.Buffer
// Iterate over each sequence in the batch
for _, seq := range batch.Slice() {
for i, seq := range batch.Slice() {
// Check if the sequence is empty
if seq.Len() > 0 {
// Format the sequence using the provided formater function
formattedSeq := FormatFasta(seq, formater)
if i == 0 {
bs.Grow(len(formattedSeq) * len(batch.Slice()) * 5 / 4)
}
// Append the formatted sequence to the buffer
bs.WriteString(formattedSeq)
bs.WriteByte('\n')

View File

@ -2,7 +2,6 @@ package obiformats
import (
"bytes"
"fmt"
"io"
"os"
"sync"
@ -15,39 +14,52 @@ import (
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
)
// The function FormatFastq takes a BioSequence object, a quality shift value, and a header formatter
// function as input, and returns a formatted string in FASTQ format.
func FormatFastq(seq *obiseq.BioSequence, formater FormatHeader) string {
q := seq.QualitiesString()
func _formatFastq(buff *bytes.Buffer, seq *obiseq.BioSequence, formater FormatHeader) {
info := ""
if formater != nil {
info = formater(seq)
}
f := fmt.Sprintf("@%s %s\n%s\n+\n%s",
seq.Id(), info,
seq.String(),
q,
)
buff.WriteByte('@')
buff.WriteString(seq.Id())
buff.WriteByte(' ')
if f[0] != '@' {
log.Panicln("FormatFastq: FASTQ format error")
}
buff.WriteString(info)
buff.WriteByte('\n')
return f
buff.Write(seq.Sequence())
buff.WriteString("\n+\n")
q := seq.QualitiesString()
buff.WriteString(q)
buff.WriteByte('\n')
}
// The function FormatFastq takes a BioSequence object, a quality shift value, and a header formatter
// function as input, and returns a formatted string in FASTQ format.
func FormatFastq(seq *obiseq.BioSequence, formater FormatHeader) string {
var buff bytes.Buffer
_formatFastq(&buff, seq, formater)
return buff.String()
}
func FormatFastqBatch(batch obiiter.BioSequenceBatch,
formater FormatHeader, skipEmpty bool) []byte {
var bs bytes.Buffer
for _, seq := range batch.Slice() {
for i, seq := range batch.Slice() {
if seq.Len() > 0 {
fs := FormatFastq(seq, formater)
bs.WriteString(fs)
bs.WriteString("\n")
_formatFastq(&bs, seq, formater)
if i == 0 {
bs.Grow(len(bs.Bytes()) * len(batch.Slice()) * 5 / 4)
}
} else {
if skipEmpty {
log.Warnf("Sequence %s is empty and skiped in output", seq.Id())

View File

@ -7,7 +7,7 @@ import (
// TODO: The version number is extracted from git. This induces that the version
// corresponds to the last commit, and not the one when the file will be
// commited
var _Commit = "bcaa264"
var _Commit = "fbdb2af"
var _Version = "Release 4.2.0"
// Version returns the version of the obitools package.

View File

@ -15,6 +15,7 @@ import (
"slices"
"sync"
"sync/atomic"
"unsafe"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
@ -279,7 +280,8 @@ func (s *BioSequence) QualitiesString() string {
quality_shift := obioptions.OutputQualityShift()
qual := s.Qualities()
qual_ascii := GetSlice(len(qual))[0:len(qual)]
qual_ascii := make([]byte, len(qual))
for i := 0; i < len(qual); i++ {
quality := qual[i]
if quality > 93 {
@ -287,8 +289,8 @@ func (s *BioSequence) QualitiesString() string {
}
qual_ascii[i] = quality + quality_shift
}
qual_sting := string(qual_ascii)
RecycleSlice(&qual_ascii)
qual_sting := unsafe.String(unsafe.SliceData(qual_ascii), len(qual))
return qual_sting
}

View File

@ -363,6 +363,16 @@ func AtomicCounter(initial ...int) func() int {
return nextCounter
}
func JsonMarshalByteBuffer(buffer *bytes.Buffer, i interface{}) error {
encoder := json.NewEncoder(buffer)
encoder.SetEscapeHTML(false)
err := encoder.Encode(i)
b := buffer.Bytes()
b = bytes.TrimRight(b, "\n")
buffer.Truncate(len(b))
return err
}
// JsonMarshal marshals an interface into JSON format.
//
// JsonMarshal is a UTF-8 friendly marshaler. Go's json.Marshal is not UTF-8
@ -375,10 +385,8 @@ func AtomicCounter(initial ...int) func() int {
// It takes an interface as a parameter and returns a byte slice and an error.
func JsonMarshal(i interface{}) ([]byte, error) {
buffer := &bytes.Buffer{}
encoder := json.NewEncoder(buffer)
encoder.SetEscapeHTML(false)
err := encoder.Encode(i)
return bytes.TrimRight(buffer.Bytes(), "\n"), err
err := JsonMarshalByteBuffer(buffer, i)
return buffer.Bytes(), err
}
// IsAMap checks if the given value is a map.