Use NewBioSequenceOwning to avoid unnecessary sequence copying

Replace NewBioSequence with NewBioSequenceOwning in genbank_read.go to take ownership of sequence slices without copying, improving performance. Update biosequence.go to add the new TakeSequence method and NewBioSequenceOwning constructor.
This commit is contained in:
Eric Coissac
2026-03-10 15:51:28 +01:00
parent b246025907
commit 1342c83db6
2 changed files with 21 additions and 2 deletions

View File

@@ -287,7 +287,7 @@ func GenbankChunkParserRope(source string, rope *PieceOfChunk,
if id == "" { if id == "" {
log.Warn("Empty id when parsing genbank file") log.Warn("Empty id when parsing genbank file")
} }
sequence := obiseq.NewBioSequence(id, seqDest, defBytes.String()) sequence := obiseq.NewBioSequenceOwning(id, seqDest, defBytes.String())
sequence.SetSource(source) sequence.SetSource(source)
if withFeatureTable { if withFeatureTable {
sequence.SetFeatures(featBytes.Bytes()) sequence.SetFeatures(featBytes.Bytes())
@@ -320,7 +320,7 @@ func GenbankChunkParserRope(source string, rope *PieceOfChunk,
if id == "" { if id == "" {
log.Warn("Empty id when parsing genbank file") log.Warn("Empty id when parsing genbank file")
} }
sequence := obiseq.NewBioSequence(id, seqDest, defBytes.String()) sequence := obiseq.NewBioSequenceOwning(id, seqDest, defBytes.String())
sequence.SetSource(source) sequence.SetSource(source)
if withFeatureTable { if withFeatureTable {
sequence.SetFeatures(featBytes.Bytes()) sequence.SetFeatures(featBytes.Bytes())

View File

@@ -120,6 +120,19 @@ func NewBioSequence(id string,
return bs return bs
} }
// NewBioSequenceOwning creates a BioSequence taking ownership of the sequence
// slice without copying it. The caller must not use the slice after this call.
// Use this when the slice was allocated specifically for this sequence.
func NewBioSequenceOwning(id string,
sequence []byte,
definition string) *BioSequence {
bs := NewEmptyBioSequence(0)
bs.SetId(id)
bs.TakeSequence(sequence)
bs.SetDefinition(definition)
return bs
}
// NewBioSequenceWithQualities creates a new BioSequence object with the given id, sequence, definition, and qualities. // NewBioSequenceWithQualities creates a new BioSequence object with the given id, sequence, definition, and qualities.
// //
// Parameters: // Parameters:
@@ -444,6 +457,12 @@ func (s *BioSequence) SetSequence(sequence []byte) {
s.sequence = obiutils.InPlaceToLower(CopySlice(sequence)) s.sequence = obiutils.InPlaceToLower(CopySlice(sequence))
} }
// TakeSequence stores the slice directly without copying, then lowercases in-place.
// The caller must not use the slice after this call.
func (s *BioSequence) TakeSequence(sequence []byte) {
s.sequence = obiutils.InPlaceToLower(sequence)
}
func (s *BioSequence) HasValidSequence() bool { func (s *BioSequence) HasValidSequence() bool {
for _, c := range s.sequence { for _, c := range s.sequence {
if !((c >= 'a' && c <= 'z') || c == '-' || c == '.' || c == '[' || c == ']') { if !((c >= 'a' && c <= 'z') || c == '-' || c == '.' || c == '[' || c == ']') {