From bc82422bc5362407a3560bf7f392e29e4d2bf9bb Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Tue, 28 Mar 2023 11:43:04 +0700 Subject: [PATCH] Reduce redundante call to bytes.ToLower and substitute the last call by an home made version doing the conversion in place Former-commit-id: d9ea22f649d97be352f8dbb37acc1495df830118 --- pkg/obiformats/ecopcr_read.go | 3 +-- pkg/obiformats/embl_read.go | 4 ++-- pkg/obiformats/fastseq_read.go | 3 +-- pkg/obiformats/genbank_read.go | 2 +- pkg/obiseq/biosequence.go | 5 ++--- pkg/obiutils/bytes.go | 11 +++++++++++ 6 files changed, 18 insertions(+), 10 deletions(-) create mode 100644 pkg/obiutils/bytes.go diff --git a/pkg/obiformats/ecopcr_read.go b/pkg/obiformats/ecopcr_read.go index cf5be7d..96b68b7 100644 --- a/pkg/obiformats/ecopcr_read.go +++ b/pkg/obiformats/ecopcr_read.go @@ -1,7 +1,6 @@ package obiformats import ( - "bytes" "compress/gzip" "encoding/csv" "fmt" @@ -70,7 +69,7 @@ func __read_ecopcr_bioseq__(file *__ecopcr_file__) (*obiseq.BioSequence, error) comment = strings.TrimSpace(record[19]) } - bseq := obiseq.NewBioSequence(name, bytes.ToLower(sequence), comment) + bseq := obiseq.NewBioSequence(name, sequence, comment) annotation := bseq.Annotations() annotation["ac"] = name diff --git a/pkg/obiformats/embl_read.go b/pkg/obiformats/embl_read.go index a0f81ec..0ff9a6a 100644 --- a/pkg/obiformats/embl_read.go +++ b/pkg/obiformats/embl_read.go @@ -141,10 +141,10 @@ func _ParseEmblFile(source string, input <-chan _FileChunk, out obiiter.IBioSequ } case line == "//": sequence := obiseq.NewBioSequence(id, - bytes.ToLower(seqBytes.Bytes()), + seqBytes.Bytes(), defBytes.String()) sequence.SetSource(source) - + sequence.SetFeatures(featBytes.Bytes()) annot := sequence.Annotations() diff --git a/pkg/obiformats/fastseq_read.go b/pkg/obiformats/fastseq_read.go index e73db30..e18a3b5 100644 --- a/pkg/obiformats/fastseq_read.go +++ b/pkg/obiformats/fastseq_read.go @@ -7,7 +7,6 @@ package obiformats import "C" import ( - "bytes" "fmt" "os" "path" @@ -42,7 +41,7 @@ func _FastseqReader(source string, comment = "" } - rep := obiseq.NewBioSequence(name, bytes.ToLower(sequence), comment) + rep := obiseq.NewBioSequence(name, sequence, comment) rep.SetSource(source) if s.qual.l > C.ulong(0) { cquality := unsafe.Slice(s.qual.s, C.int(s.qual.l)) diff --git a/pkg/obiformats/genbank_read.go b/pkg/obiformats/genbank_read.go index 1ca1f59..195c974 100644 --- a/pkg/obiformats/genbank_read.go +++ b/pkg/obiformats/genbank_read.go @@ -69,7 +69,7 @@ func _ParseGenbankFile(source string, case line == "//": log.Debugln("Total lines := ", nl) sequence := obiseq.NewBioSequence(id, - bytes.ToLower(seqBytes.Bytes()), + seqBytes.Bytes(), defBytes.String()) sequence.SetSource(source) state = inHeader diff --git a/pkg/obiseq/biosequence.go b/pkg/obiseq/biosequence.go index 99b1e4c..e38b685 100644 --- a/pkg/obiseq/biosequence.go +++ b/pkg/obiseq/biosequence.go @@ -11,10 +11,10 @@ package obiseq import ( - "bytes" "crypto/md5" "sync/atomic" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils" log "github.com/sirupsen/logrus" ) @@ -210,7 +210,6 @@ func (s *BioSequence) Source() string { return s.source } - // Returning the MD5 hash of the sequence. func (s *BioSequence) MD5() [16]byte { return md5.Sum(s.sequence) @@ -244,7 +243,7 @@ func (s *BioSequence) SetSequence(sequence []byte) { if s.sequence != nil { RecycleSlice(&s.sequence) } - s.sequence = bytes.ToLower(sequence) + s.sequence = obiutils.InPlaceToLower(sequence) } // Setting the qualities of the BioSequence. diff --git a/pkg/obiutils/bytes.go b/pkg/obiutils/bytes.go new file mode 100644 index 0000000..b17d226 --- /dev/null +++ b/pkg/obiutils/bytes.go @@ -0,0 +1,11 @@ +package obiutils + +func InPlaceToLower(data []byte) []byte { + for i,l := range data { + if l >= 'A' && l <='Z' { + data[i]|=32 + } + } + + return data +} \ No newline at end of file