From 31c45bf46fb2ee75388873870e8e40ffb1d7e7b1 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Sun, 21 Aug 2022 17:53:19 +0200 Subject: [PATCH] Adds documentation and few methods to the BioSequences --- pkg/goutils/goutils.go | 51 ++++++++++++++- pkg/obiseq/biosequence.go | 132 +++++++++++++++++++++++++++++--------- 2 files changed, 153 insertions(+), 30 deletions(-) diff --git a/pkg/goutils/goutils.go b/pkg/goutils/goutils.go index c6133ec..4ca032e 100644 --- a/pkg/goutils/goutils.go +++ b/pkg/goutils/goutils.go @@ -52,7 +52,56 @@ func InterfaceToInt(i interface{}) (val int, err error) { case uint64: val = int(t) // standardizes across systems default: - err = &NotAnInteger{"count attribute is not an integer"} + err = &NotABoolean{"value attribute cannot be casted to an integer"} + } + return +} + +// NotABoolean defines a new type of Error : "NotABoolean" +type NotABoolean struct { + message string +} + +// Error() retreives the error message associated to the "NotABoolean" +// error. Tha addition of that Error message make the "NotABoolean" +// complying with the error interface +func (m *NotABoolean) Error() string { + return m.message +} + + +// It converts an interface{} to a bool, and returns an error if the interface{} cannot be converted +// to a bool +func InterfaceToBool(i interface{}) (val bool, err error) { + + err = nil + val = false + + switch t := i.(type) { + case int: + val = t!=0 + case int8: + val = t!=0 // standardizes across systems + case int16: + val = t!=0 // standardizes across systems + case int32: + val = t!=0 // standardizes across systems + case int64: + val = t!=0 // standardizes across systems + case float32: + val = t!=0 // standardizes across systems + case float64: + val = t!=0 // standardizes across systems + case uint8: + val = t!=0 // standardizes across systems + case uint16: + val = t!=0 // standardizes across systems + case uint32: + val = t!=0 // standardizes across systems + case uint64: + val = t!=0 // standardizes across systems + default: + err = &NotABoolean{"value attribute cannot be casted to a boolean"} } return } diff --git a/pkg/obiseq/biosequence.go b/pkg/obiseq/biosequence.go index 56d27ec..01257fe 100644 --- a/pkg/obiseq/biosequence.go +++ b/pkg/obiseq/biosequence.go @@ -2,6 +2,7 @@ package obiseq import ( "crypto/md5" + "fmt" "sync/atomic" log "github.com/sirupsen/logrus" @@ -33,27 +34,26 @@ func __make_default_qualities__(length int) Quality { return __default_qualities__[0:length] } +// `Annotation` is a map of strings to interfaces. +// It is used to store type Annotation map[string]interface{} +// A BioSequence is a sequence of bytes with an identifier, a definition, a sequence, qualities, +// features and annotations. It aims to represent a biological sequence type BioSequence struct { - id string - definition string - sequence []byte - qualities []byte + id string // The identidier of the sequence (private accessible through the method Id) + definition string // The documentation of the sequence (private accessible through the method Definition) + sequence []byte // The sequence itself, it is accessible by the methode Sequence + qualities []byte // The quality scores of the sequence. feature []byte annotations Annotation } +// MakeEmptyBioSequence() creates a new BioSequence object with no data func MakeEmptyBioSequence() BioSequence { atomic.AddInt32(&_NewSeq, 1) atomic.AddInt32(&_InMemSeq, 1) - //if atomic.CompareAndSwapInt32()() - - // if int(_NewSeq)%int(_BioLogRate) == 0 { - // LogBioSeqStatus() - // } - return BioSequence{ id: "", definition: "", @@ -64,11 +64,13 @@ func MakeEmptyBioSequence() BioSequence { } } +// `NewEmptyBioSequence()` returns a pointer to a new empty BioSequence func NewEmptyBioSequence() *BioSequence { s := MakeEmptyBioSequence() return &s } +// `MakeBioSequence` creates a new `BioSequence` with the given `id`, `sequence`, and `definition` func MakeBioSequence(id string, sequence []byte, definition string) BioSequence { @@ -79,6 +81,7 @@ func MakeBioSequence(id string, return bs } +// `NewBioSequence` creates a new `BioSequence` struct and returns a pointer to it func NewBioSequence(id string, sequence []byte, definition string) *BioSequence { @@ -86,6 +89,7 @@ func NewBioSequence(id string, return &s } +// A method that is called when the sequence is no longer needed. func (sequence *BioSequence) Recycle() { atomic.AddInt32(&_RecycleSeq, 1) @@ -108,6 +112,7 @@ func (sequence *BioSequence) Recycle() { } } +// Copying the BioSequence. func (s *BioSequence) Copy() *BioSequence { newSeq := MakeEmptyBioSequence() @@ -125,28 +130,37 @@ func (s *BioSequence) Copy() *BioSequence { return &newSeq } +// A method that returns the id of the sequence. func (s *BioSequence) Id() string { return s.id } + +// A method that returns the definition of the sequence. func (s *BioSequence) Definition() string { return s.definition } +// A method that returns the sequence as a byte slice. func (s *BioSequence) Sequence() []byte { return s.sequence } +// A method that returns the sequence as a string. func (s *BioSequence) String() string { return string(s.sequence) } + +// Returning the length of the sequence. func (s *BioSequence) Length() int { return len(s.sequence) } +// Checking if the BioSequence has quality scores. func (s *BioSequence) HasQualities() bool { return len(s.qualities) > 0 } +// Returning the qualities of the sequence. func (s *BioSequence) Qualities() Quality { if s.HasQualities() { return s.qualities @@ -159,10 +173,12 @@ func (s *BioSequence) Features() string { return string(s.feature) } +// Checking if the BioSequence has annotations. func (s *BioSequence) HasAnnotation() bool { return len(s.annotations) > 0 } +// Returning the annotations of the BioSequence. func (s *BioSequence) Annotations() Annotation { if s.annotations == nil { @@ -172,46 +188,98 @@ func (s *BioSequence) Annotations() Annotation { return s.annotations } +// A method that returns the value of the key in the annotation map. +func (s *BioSequence) Get(key string) (interface{}, bool) { + var val interface{} + ok := s.annotations != nil + + if ok { + val, ok = s.annotations[key] + } + + return val, ok +} + +// A method that returns the value of the key in the annotation map. +func (s *BioSequence) GetInt(key string) (int, bool) { + var val int + var err error + + v, ok := s.Get(key) + + if ok { + val, err = goutils.InterfaceToInt(v) + ok = err == nil + } + + return val, ok +} + +// A method that returns the value of the key in the annotation map. +func (s *BioSequence) GetString(key string) (string, bool) { + var val string + v, ok := s.Get(key) + + if ok { + val = fmt.Sprint(v) + } + + return val, ok +} + +// A method that returns the value of the key in the annotation map. +func (s *BioSequence) GetBool(key string) (bool, bool) { + var val bool + var err error + + v, ok := s.Get(key) + + if ok { + val, err = goutils.InterfaceToBool(v) + ok = err == nil + } + + return val, ok +} + +// Returning the MD5 hash of the sequence. func (s *BioSequence) MD5() [16]byte { return md5.Sum(s.sequence) } +// Returning the number of times the sequence has been observed. func (s *BioSequence) Count() int { - if s.annotations == nil { - return 1 + count, ok := s.GetInt("count") + + if !ok { + count = 1 } - if val, ok := (s.annotations)["count"]; ok { - val, err := goutils.InterfaceToInt(val) - if err == nil { - return val - } - } - return 1 + return count } +// Returning the taxid of the sequence. func (s *BioSequence) Taxid() int { - if s.annotations == nil { - return 1 + taxid, ok := s.GetInt("count") + + if !ok { + taxid = 1 } - if val, ok := (s.annotations)["taxid"]; ok { - val, err := goutils.InterfaceToInt(val) - if err == nil { - return val - } - } - return 1 + return taxid } +// Setting the id of the BioSequence. func (s *BioSequence) SetId(id string) { s.id = id } +// Setting the definition of the sequence. func (s *BioSequence) SetDefinition(definition string) { s.definition = definition } +// Setting the features of the BioSequence. func (s *BioSequence) SetFeatures(feature []byte) { if cap(s.feature) >= 300 { RecycleSlice(&s.feature) @@ -219,6 +287,7 @@ func (s *BioSequence) SetFeatures(feature []byte) { s.feature = feature } +// Setting the sequence of the BioSequence. func (s *BioSequence) SetSequence(sequence []byte) { if s.sequence != nil { RecycleSlice(&s.sequence) @@ -226,6 +295,7 @@ func (s *BioSequence) SetSequence(sequence []byte) { s.sequence = sequence } +// Setting the qualities of the BioSequence. func (s *BioSequence) SetQualities(qualities Quality) { if s.qualities != nil { RecycleSlice(&s.qualities) @@ -233,28 +303,32 @@ func (s *BioSequence) SetQualities(qualities Quality) { s.qualities = qualities } +// A method that appends a byte slice to the qualities of the BioSequence. func (s *BioSequence) WriteQualities(data []byte) (int, error) { s.qualities = append(s.qualities, data...) return len(data), nil } +// Appending a byte to the qualities of the BioSequence. func (s *BioSequence) WriteByteQualities(data byte) error { s.qualities = append(s.qualities, data) return nil } +// A method that appends a byte slice to the sequence. func (s *BioSequence) Write(data []byte) (int, error) { s.sequence = append(s.sequence, data...) return len(data), nil } +// A method that appends a string to the sequence. func (s *BioSequence) WriteString(data string) (int, error) { bdata := []byte(data) return s.Write(bdata) } +// A method that appends a byte to the sequence. func (s *BioSequence) WriteByte(data byte) error { s.sequence = append(s.sequence, data) return nil } -