Files
obitools4/pkg/obiseq/biosequence.go

335 lines
7.6 KiB
Go
Raw Normal View History

2022-01-13 23:27:39 +01:00
package obiseq
import (
"crypto/md5"
"fmt"
"sync/atomic"
2022-01-13 23:27:39 +01:00
2022-02-24 12:14:52 +01:00
log "github.com/sirupsen/logrus"
2022-01-13 23:43:01 +01:00
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
2022-01-13 23:27:39 +01:00
)
var _NewSeq = int32(0)
var _RecycleSeq = int32(0)
var _InMemSeq = int32(0)
var _MaxInMemSeq = int32(0)
var _BioLogRate = int(100000)
func LogBioSeqStatus() {
2022-02-24 12:14:52 +01:00
log.Debugf("Created seq : %d Destroyed : %d In Memory : %d", _NewSeq, _RecycleSeq, _InMemSeq)
}
2022-01-13 23:27:39 +01:00
type Quality []uint8
var __default_qualities__ = make(Quality, 0, 500)
func __make_default_qualities__(length int) Quality {
cl := len(__default_qualities__)
if cl < length {
for i := cl; i <= length; i++ {
__default_qualities__ = append(__default_qualities__, 40)
}
}
return __default_qualities__[0:length]
}
// `Annotation` is a map of strings to interfaces.
// It is used to store
2022-01-13 23:27:39 +01:00
type Annotation map[string]interface{}
// A BioSequence is a sequence of bytes with an identifier, a definition, a sequence, qualities,
// features and annotations. It aims to represent a biological sequence
type BioSequence struct {
id string // The identidier of the sequence (private accessible through the method Id)
definition string // The documentation of the sequence (private accessible through the method Definition)
sequence []byte // The sequence itself, it is accessible by the methode Sequence
qualities []byte // The quality scores of the sequence.
2022-01-16 00:21:42 +01:00
feature []byte
2022-01-13 23:27:39 +01:00
annotations Annotation
}
// MakeEmptyBioSequence() creates a new BioSequence object with no data
2022-01-16 00:21:42 +01:00
func MakeEmptyBioSequence() BioSequence {
atomic.AddInt32(&_NewSeq, 1)
atomic.AddInt32(&_InMemSeq, 1)
return BioSequence{
2022-01-16 00:21:42 +01:00
id: "",
definition: "",
sequence: nil,
qualities: nil,
feature: nil,
annotations: nil,
}
}
// `NewEmptyBioSequence()` returns a pointer to a new empty BioSequence
func NewEmptyBioSequence() *BioSequence {
s := MakeEmptyBioSequence()
return &s
2022-01-16 00:21:42 +01:00
}
// `MakeBioSequence` creates a new `BioSequence` with the given `id`, `sequence`, and `definition`
2022-01-16 00:21:42 +01:00
func MakeBioSequence(id string,
sequence []byte,
definition string) BioSequence {
bs := MakeEmptyBioSequence()
bs.SetId(id)
bs.SetSequence(sequence)
bs.SetDefinition(definition)
return bs
}
// `NewBioSequence` creates a new `BioSequence` struct and returns a pointer to it
func NewBioSequence(id string,
sequence []byte,
definition string) *BioSequence {
s := MakeBioSequence(id, sequence, definition)
return &s
}
2022-01-16 00:21:42 +01:00
// A method that is called when the sequence is no longer needed.
func (sequence *BioSequence) Recycle() {
2022-01-16 00:21:42 +01:00
atomic.AddInt32(&_RecycleSeq, 1)
atomic.AddInt32(&_InMemSeq, -1)
2022-01-16 00:21:42 +01:00
// if int(_RecycleSeq)%int(_BioLogRate) == 0 {
// LogBioSeqStatus()
// }
2022-01-13 23:27:39 +01:00
if sequence != nil {
RecycleSlice(&sequence.sequence)
sequence.sequence = nil
RecycleSlice(&sequence.feature)
sequence.feature = nil
RecycleSlice(&sequence.qualities)
sequence.qualities = nil
2022-01-13 23:27:39 +01:00
RecycleAnnotation(&sequence.annotations)
sequence.annotations = nil
}
2022-01-13 23:27:39 +01:00
}
// Copying the BioSequence.
func (s *BioSequence) Copy() *BioSequence {
2022-01-16 00:21:42 +01:00
newSeq := MakeEmptyBioSequence()
2022-01-13 23:27:39 +01:00
newSeq.id = s.id
newSeq.definition = s.definition
2022-01-13 23:27:39 +01:00
newSeq.sequence = GetSlice(s.sequence...)
newSeq.qualities = GetSlice(s.qualities...)
newSeq.feature = GetSlice(s.feature...)
2022-01-13 23:27:39 +01:00
if len(s.annotations) > 0 {
newSeq.annotations = GetAnnotation(s.annotations)
2022-01-13 23:27:39 +01:00
}
return &newSeq
2022-01-13 23:27:39 +01:00
}
// A method that returns the id of the sequence.
func (s *BioSequence) Id() string {
return s.id
2022-01-13 23:27:39 +01:00
}
// A method that returns the definition of the sequence.
func (s *BioSequence) Definition() string {
return s.definition
2022-01-13 23:27:39 +01:00
}
// A method that returns the sequence as a byte slice.
func (s *BioSequence) Sequence() []byte {
return s.sequence
2022-01-13 23:27:39 +01:00
}
// A method that returns the sequence as a string.
func (s *BioSequence) String() string {
return string(s.sequence)
2022-01-13 23:27:39 +01:00
}
// Returning the length of the sequence.
func (s *BioSequence) Length() int {
return len(s.sequence)
2022-01-13 23:27:39 +01:00
}
// Checking if the BioSequence has quality scores.
func (s *BioSequence) HasQualities() bool {
return len(s.qualities) > 0
2022-01-13 23:27:39 +01:00
}
// Returning the qualities of the sequence.
func (s *BioSequence) Qualities() Quality {
2022-01-13 23:27:39 +01:00
if s.HasQualities() {
return s.qualities
2022-01-13 23:27:39 +01:00
} else {
return __make_default_qualities__(len(s.sequence))
2022-01-13 23:27:39 +01:00
}
}
func (s *BioSequence) Features() string {
return string(s.feature)
2022-01-13 23:27:39 +01:00
}
// Checking if the BioSequence has annotations.
func (s *BioSequence) HasAnnotation() bool {
return len(s.annotations) > 0
}
// Returning the annotations of the BioSequence.
func (s *BioSequence) Annotations() Annotation {
2022-02-01 17:31:28 +01:00
if s.annotations == nil {
s.annotations = GetAnnotation()
2022-01-16 00:21:42 +01:00
}
2022-02-01 17:31:28 +01:00
return s.annotations
2022-01-13 23:27:39 +01:00
}
// A method that returns the value of the key in the annotation map.
func (s *BioSequence) Get(key string) (interface{}, bool) {
var val interface{}
ok := s.annotations != nil
if ok {
val, ok = s.annotations[key]
}
return val, ok
}
// A method that returns the value of the key in the annotation map.
func (s *BioSequence) GetInt(key string) (int, bool) {
var val int
var err error
v, ok := s.Get(key)
if ok {
val, err = goutils.InterfaceToInt(v)
ok = err == nil
}
return val, ok
}
// A method that returns the value of the key in the annotation map.
func (s *BioSequence) GetString(key string) (string, bool) {
var val string
v, ok := s.Get(key)
if ok {
val = fmt.Sprint(v)
}
return val, ok
}
// A method that returns the value of the key in the annotation map.
func (s *BioSequence) GetBool(key string) (bool, bool) {
var val bool
var err error
v, ok := s.Get(key)
if ok {
val, err = goutils.InterfaceToBool(v)
ok = err == nil
}
return val, ok
}
// Returning the MD5 hash of the sequence.
func (s *BioSequence) MD5() [16]byte {
return md5.Sum(s.sequence)
2022-01-13 23:27:39 +01:00
}
// Returning the number of times the sequence has been observed.
func (s *BioSequence) Count() int {
count, ok := s.GetInt("count")
2022-01-13 23:27:39 +01:00
if !ok {
count = 1
2022-01-13 23:27:39 +01:00
}
return count
2022-01-13 23:27:39 +01:00
}
// Returning the taxid of the sequence.
func (s *BioSequence) Taxid() int {
taxid, ok := s.GetInt("count")
2022-01-13 23:27:39 +01:00
if !ok {
taxid = 1
2022-01-13 23:27:39 +01:00
}
return taxid
2022-01-13 23:27:39 +01:00
}
// Setting the id of the BioSequence.
func (s *BioSequence) SetId(id string) {
s.id = id
2022-01-13 23:27:39 +01:00
}
// Setting the definition of the sequence.
func (s *BioSequence) SetDefinition(definition string) {
s.definition = definition
2022-01-13 23:27:39 +01:00
}
// Setting the features of the BioSequence.
func (s *BioSequence) SetFeatures(feature []byte) {
if cap(s.feature) >= 300 {
RecycleSlice(&s.feature)
2022-01-16 00:21:42 +01:00
}
s.feature = feature
2022-01-13 23:27:39 +01:00
}
// Setting the sequence of the BioSequence.
func (s *BioSequence) SetSequence(sequence []byte) {
if s.sequence != nil {
RecycleSlice(&s.sequence)
2022-01-16 00:21:42 +01:00
}
s.sequence = sequence
2022-01-13 23:27:39 +01:00
}
// Setting the qualities of the BioSequence.
func (s *BioSequence) SetQualities(qualities Quality) {
if s.qualities != nil {
RecycleSlice(&s.qualities)
2022-01-16 00:21:42 +01:00
}
s.qualities = qualities
2022-01-13 23:27:39 +01:00
}
// A method that appends a byte slice to the qualities of the BioSequence.
func (s *BioSequence) WriteQualities(data []byte) (int, error) {
s.qualities = append(s.qualities, data...)
2022-01-16 00:21:42 +01:00
return len(data), nil
}
// Appending a byte to the qualities of the BioSequence.
func (s *BioSequence) WriteByteQualities(data byte) error {
s.qualities = append(s.qualities, data)
2022-01-16 00:21:42 +01:00
return nil
}
// A method that appends a byte slice to the sequence.
func (s *BioSequence) Write(data []byte) (int, error) {
s.sequence = append(s.sequence, data...)
2022-01-16 00:21:42 +01:00
return len(data), nil
2022-01-13 23:27:39 +01:00
}
// A method that appends a string to the sequence.
func (s *BioSequence) WriteString(data string) (int, error) {
2022-01-16 00:21:42 +01:00
bdata := []byte(data)
return s.Write(bdata)
2022-01-13 23:27:39 +01:00
}
// A method that appends a byte to the sequence.
func (s *BioSequence) WriteByte(data byte) error {
s.sequence = append(s.sequence, data)
2022-01-16 00:21:42 +01:00
return nil
2022-01-13 23:27:39 +01:00
}