mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Reducing memory allocation events
Former-commit-id: c94e79ba116464504580fc397270ead154063971
This commit is contained in:
@ -1,15 +1,14 @@
|
||||
package obialign
|
||||
|
||||
import "slices"
|
||||
|
||||
func _Backtracking(pathMatrix []int, lseqA, lseqB int, path *[]int) []int {
|
||||
|
||||
needed := (lseqA + lseqB) * 2
|
||||
|
||||
if needed > cap(*path) {
|
||||
*path = make([]int, 0, needed)
|
||||
}
|
||||
|
||||
*path = (*path)[:cap(*path)]
|
||||
(*path) = (*path)[:0]
|
||||
(*path) = slices.Grow((*path), needed)
|
||||
p := cap(*path)
|
||||
*path = (*path)[:p]
|
||||
|
||||
i := lseqA - 1
|
||||
j := lseqB - 1
|
||||
|
@ -1,8 +1,10 @@
|
||||
package obiformats
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"math"
|
||||
"strings"
|
||||
"unsafe"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
@ -85,17 +87,26 @@ func ParseFastSeqJsonHeader(sequence *obiseq.BioSequence) {
|
||||
}
|
||||
}
|
||||
|
||||
func FormatFastSeqJsonHeader(sequence *obiseq.BioSequence) string {
|
||||
func WriteFastSeqJsonHeader(buffer *bytes.Buffer, sequence *obiseq.BioSequence) {
|
||||
|
||||
annotations := sequence.Annotations()
|
||||
|
||||
if len(annotations) > 0 {
|
||||
text, err := obiutils.JsonMarshal(sequence.Annotations())
|
||||
err := obiutils.JsonMarshalByteBuffer(buffer, sequence.Annotations())
|
||||
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return string(text)
|
||||
func FormatFastSeqJsonHeader(sequence *obiseq.BioSequence) string {
|
||||
annotations := sequence.Annotations()
|
||||
buffer := bytes.Buffer{}
|
||||
|
||||
if len(annotations) > 0 {
|
||||
obiutils.JsonMarshalByteBuffer(&buffer, sequence.Annotations())
|
||||
return unsafe.String(unsafe.SliceData(buffer.Bytes()), len(buffer.Bytes()))
|
||||
}
|
||||
|
||||
return ""
|
||||
|
@ -7,6 +7,7 @@ import (
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unsafe"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
@ -298,17 +299,18 @@ func ParseFastSeqOBIHeader(sequence *obiseq.BioSequence) {
|
||||
}
|
||||
}
|
||||
|
||||
func FormatFastSeqOBIHeader(sequence *obiseq.BioSequence) string {
|
||||
func WriteFastSeqOBIHeade(buffer *bytes.Buffer, sequence *obiseq.BioSequence) {
|
||||
|
||||
annotations := sequence.Annotations()
|
||||
|
||||
if annotations != nil {
|
||||
var text strings.Builder
|
||||
if len(annotations) > 0 {
|
||||
|
||||
for key, value := range annotations {
|
||||
if key != "definition" {
|
||||
|
||||
switch t := value.(type) {
|
||||
case string:
|
||||
text.WriteString(fmt.Sprintf("%s=%s; ", key, t))
|
||||
buffer.WriteString(fmt.Sprintf("%s=%s; ", key, t))
|
||||
case map[string]int,
|
||||
map[string]string,
|
||||
map[string]interface{},
|
||||
@ -318,16 +320,30 @@ func FormatFastSeqOBIHeader(sequence *obiseq.BioSequence) string {
|
||||
log.Fatalf("Cannot convert %v value", value)
|
||||
}
|
||||
tv = bytes.ReplaceAll(tv, []byte(`"`), []byte("'"))
|
||||
text.WriteString(fmt.Sprintf("%s=", key))
|
||||
text.Write(tv)
|
||||
text.WriteString("; ")
|
||||
buffer.WriteString(fmt.Sprintf("%s=", key))
|
||||
buffer.Write(tv)
|
||||
buffer.WriteString("; ")
|
||||
default:
|
||||
text.WriteString(fmt.Sprintf("%s=%v; ", key, value))
|
||||
buffer.WriteString(fmt.Sprintf("%s=%v; ", key, value))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return text.String() + " " + sequence.Definition()
|
||||
if sequence.HasDefinition() {
|
||||
buffer.WriteByte(' ')
|
||||
buffer.WriteString(sequence.Definition())
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func FormatFastSeqOBIHeader(sequence *obiseq.BioSequence) string {
|
||||
annotations := sequence.Annotations()
|
||||
|
||||
if annotations != nil {
|
||||
var text bytes.Buffer
|
||||
WriteFastSeqOBIHeade(&text, sequence)
|
||||
return unsafe.String(unsafe.SliceData(text.Bytes()), len(text.String()))
|
||||
}
|
||||
|
||||
return ""
|
||||
|
@ -81,12 +81,16 @@ func FormatFastaBatch(batch obiiter.BioSequenceBatch, formater FormatHeader, ski
|
||||
var bs bytes.Buffer
|
||||
|
||||
// Iterate over each sequence in the batch
|
||||
for _, seq := range batch.Slice() {
|
||||
for i, seq := range batch.Slice() {
|
||||
// Check if the sequence is empty
|
||||
if seq.Len() > 0 {
|
||||
// Format the sequence using the provided formater function
|
||||
formattedSeq := FormatFasta(seq, formater)
|
||||
|
||||
if i == 0 {
|
||||
bs.Grow(len(formattedSeq) * len(batch.Slice()) * 5 / 4)
|
||||
}
|
||||
|
||||
// Append the formatted sequence to the buffer
|
||||
bs.WriteString(formattedSeq)
|
||||
bs.WriteByte('\n')
|
||||
|
@ -2,7 +2,6 @@ package obiformats
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"sync"
|
||||
@ -15,39 +14,52 @@ import (
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||
)
|
||||
|
||||
// The function FormatFastq takes a BioSequence object, a quality shift value, and a header formatter
|
||||
// function as input, and returns a formatted string in FASTQ format.
|
||||
func FormatFastq(seq *obiseq.BioSequence, formater FormatHeader) string {
|
||||
|
||||
q := seq.QualitiesString()
|
||||
func _formatFastq(buff *bytes.Buffer, seq *obiseq.BioSequence, formater FormatHeader) {
|
||||
|
||||
info := ""
|
||||
if formater != nil {
|
||||
info = formater(seq)
|
||||
}
|
||||
|
||||
f := fmt.Sprintf("@%s %s\n%s\n+\n%s",
|
||||
seq.Id(), info,
|
||||
seq.String(),
|
||||
q,
|
||||
)
|
||||
buff.WriteByte('@')
|
||||
buff.WriteString(seq.Id())
|
||||
buff.WriteByte(' ')
|
||||
|
||||
if f[0] != '@' {
|
||||
log.Panicln("FormatFastq: FASTQ format error")
|
||||
}
|
||||
buff.WriteString(info)
|
||||
buff.WriteByte('\n')
|
||||
|
||||
return f
|
||||
buff.Write(seq.Sequence())
|
||||
buff.WriteString("\n+\n")
|
||||
|
||||
q := seq.QualitiesString()
|
||||
buff.WriteString(q)
|
||||
buff.WriteByte('\n')
|
||||
|
||||
}
|
||||
|
||||
// The function FormatFastq takes a BioSequence object, a quality shift value, and a header formatter
|
||||
// function as input, and returns a formatted string in FASTQ format.
|
||||
func FormatFastq(seq *obiseq.BioSequence, formater FormatHeader) string {
|
||||
|
||||
var buff bytes.Buffer
|
||||
|
||||
_formatFastq(&buff, seq, formater)
|
||||
|
||||
return buff.String()
|
||||
}
|
||||
|
||||
func FormatFastqBatch(batch obiiter.BioSequenceBatch,
|
||||
formater FormatHeader, skipEmpty bool) []byte {
|
||||
var bs bytes.Buffer
|
||||
|
||||
for _, seq := range batch.Slice() {
|
||||
for i, seq := range batch.Slice() {
|
||||
if seq.Len() > 0 {
|
||||
fs := FormatFastq(seq, formater)
|
||||
bs.WriteString(fs)
|
||||
bs.WriteString("\n")
|
||||
_formatFastq(&bs, seq, formater)
|
||||
|
||||
if i == 0 {
|
||||
|
||||
bs.Grow(len(bs.Bytes()) * len(batch.Slice()) * 5 / 4)
|
||||
}
|
||||
} else {
|
||||
if skipEmpty {
|
||||
log.Warnf("Sequence %s is empty and skiped in output", seq.Id())
|
||||
|
@ -7,7 +7,7 @@ import (
|
||||
// TODO: The version number is extracted from git. This induces that the version
|
||||
// corresponds to the last commit, and not the one when the file will be
|
||||
// commited
|
||||
var _Commit = "bcaa264"
|
||||
var _Commit = "fbdb2af"
|
||||
var _Version = "Release 4.2.0"
|
||||
|
||||
// Version returns the version of the obitools package.
|
||||
|
@ -15,6 +15,7 @@ import (
|
||||
"slices"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"unsafe"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||
@ -279,7 +280,8 @@ func (s *BioSequence) QualitiesString() string {
|
||||
quality_shift := obioptions.OutputQualityShift()
|
||||
|
||||
qual := s.Qualities()
|
||||
qual_ascii := GetSlice(len(qual))[0:len(qual)]
|
||||
qual_ascii := make([]byte, len(qual))
|
||||
|
||||
for i := 0; i < len(qual); i++ {
|
||||
quality := qual[i]
|
||||
if quality > 93 {
|
||||
@ -287,8 +289,8 @@ func (s *BioSequence) QualitiesString() string {
|
||||
}
|
||||
qual_ascii[i] = quality + quality_shift
|
||||
}
|
||||
qual_sting := string(qual_ascii)
|
||||
RecycleSlice(&qual_ascii)
|
||||
|
||||
qual_sting := unsafe.String(unsafe.SliceData(qual_ascii), len(qual))
|
||||
return qual_sting
|
||||
}
|
||||
|
||||
|
@ -363,6 +363,16 @@ func AtomicCounter(initial ...int) func() int {
|
||||
return nextCounter
|
||||
}
|
||||
|
||||
func JsonMarshalByteBuffer(buffer *bytes.Buffer, i interface{}) error {
|
||||
encoder := json.NewEncoder(buffer)
|
||||
encoder.SetEscapeHTML(false)
|
||||
err := encoder.Encode(i)
|
||||
b := buffer.Bytes()
|
||||
b = bytes.TrimRight(b, "\n")
|
||||
buffer.Truncate(len(b))
|
||||
return err
|
||||
}
|
||||
|
||||
// JsonMarshal marshals an interface into JSON format.
|
||||
//
|
||||
// JsonMarshal is a UTF-8 friendly marshaler. Go's json.Marshal is not UTF-8
|
||||
@ -375,10 +385,8 @@ func AtomicCounter(initial ...int) func() int {
|
||||
// It takes an interface as a parameter and returns a byte slice and an error.
|
||||
func JsonMarshal(i interface{}) ([]byte, error) {
|
||||
buffer := &bytes.Buffer{}
|
||||
encoder := json.NewEncoder(buffer)
|
||||
encoder.SetEscapeHTML(false)
|
||||
err := encoder.Encode(i)
|
||||
return bytes.TrimRight(buffer.Bytes(), "\n"), err
|
||||
err := JsonMarshalByteBuffer(buffer, i)
|
||||
return buffer.Bytes(), err
|
||||
}
|
||||
|
||||
// IsAMap checks if the given value is a map.
|
||||
|
Reference in New Issue
Block a user