Adds documentation and few methods to the BioSequences

This commit is contained in:
2022-08-21 17:53:19 +02:00
parent eca1af9957
commit 31c45bf46f
2 changed files with 153 additions and 30 deletions

View File

@ -52,7 +52,56 @@ func InterfaceToInt(i interface{}) (val int, err error) {
case uint64: case uint64:
val = int(t) // standardizes across systems val = int(t) // standardizes across systems
default: default:
err = &NotAnInteger{"count attribute is not an integer"} err = &NotABoolean{"value attribute cannot be casted to an integer"}
}
return
}
// NotABoolean defines a new type of Error : "NotABoolean"
type NotABoolean struct {
message string
}
// Error() retreives the error message associated to the "NotABoolean"
// error. Tha addition of that Error message make the "NotABoolean"
// complying with the error interface
func (m *NotABoolean) Error() string {
return m.message
}
// It converts an interface{} to a bool, and returns an error if the interface{} cannot be converted
// to a bool
func InterfaceToBool(i interface{}) (val bool, err error) {
err = nil
val = false
switch t := i.(type) {
case int:
val = t!=0
case int8:
val = t!=0 // standardizes across systems
case int16:
val = t!=0 // standardizes across systems
case int32:
val = t!=0 // standardizes across systems
case int64:
val = t!=0 // standardizes across systems
case float32:
val = t!=0 // standardizes across systems
case float64:
val = t!=0 // standardizes across systems
case uint8:
val = t!=0 // standardizes across systems
case uint16:
val = t!=0 // standardizes across systems
case uint32:
val = t!=0 // standardizes across systems
case uint64:
val = t!=0 // standardizes across systems
default:
err = &NotABoolean{"value attribute cannot be casted to a boolean"}
} }
return return
} }

View File

@ -2,6 +2,7 @@ package obiseq
import ( import (
"crypto/md5" "crypto/md5"
"fmt"
"sync/atomic" "sync/atomic"
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
@ -33,27 +34,26 @@ func __make_default_qualities__(length int) Quality {
return __default_qualities__[0:length] return __default_qualities__[0:length]
} }
// `Annotation` is a map of strings to interfaces.
// It is used to store
type Annotation map[string]interface{} type Annotation map[string]interface{}
// A BioSequence is a sequence of bytes with an identifier, a definition, a sequence, qualities,
// features and annotations. It aims to represent a biological sequence
type BioSequence struct { type BioSequence struct {
id string id string // The identidier of the sequence (private accessible through the method Id)
definition string definition string // The documentation of the sequence (private accessible through the method Definition)
sequence []byte sequence []byte // The sequence itself, it is accessible by the methode Sequence
qualities []byte qualities []byte // The quality scores of the sequence.
feature []byte feature []byte
annotations Annotation annotations Annotation
} }
// MakeEmptyBioSequence() creates a new BioSequence object with no data
func MakeEmptyBioSequence() BioSequence { func MakeEmptyBioSequence() BioSequence {
atomic.AddInt32(&_NewSeq, 1) atomic.AddInt32(&_NewSeq, 1)
atomic.AddInt32(&_InMemSeq, 1) atomic.AddInt32(&_InMemSeq, 1)
//if atomic.CompareAndSwapInt32()()
// if int(_NewSeq)%int(_BioLogRate) == 0 {
// LogBioSeqStatus()
// }
return BioSequence{ return BioSequence{
id: "", id: "",
definition: "", definition: "",
@ -64,11 +64,13 @@ func MakeEmptyBioSequence() BioSequence {
} }
} }
// `NewEmptyBioSequence()` returns a pointer to a new empty BioSequence
func NewEmptyBioSequence() *BioSequence { func NewEmptyBioSequence() *BioSequence {
s := MakeEmptyBioSequence() s := MakeEmptyBioSequence()
return &s return &s
} }
// `MakeBioSequence` creates a new `BioSequence` with the given `id`, `sequence`, and `definition`
func MakeBioSequence(id string, func MakeBioSequence(id string,
sequence []byte, sequence []byte,
definition string) BioSequence { definition string) BioSequence {
@ -79,6 +81,7 @@ func MakeBioSequence(id string,
return bs return bs
} }
// `NewBioSequence` creates a new `BioSequence` struct and returns a pointer to it
func NewBioSequence(id string, func NewBioSequence(id string,
sequence []byte, sequence []byte,
definition string) *BioSequence { definition string) *BioSequence {
@ -86,6 +89,7 @@ func NewBioSequence(id string,
return &s return &s
} }
// A method that is called when the sequence is no longer needed.
func (sequence *BioSequence) Recycle() { func (sequence *BioSequence) Recycle() {
atomic.AddInt32(&_RecycleSeq, 1) atomic.AddInt32(&_RecycleSeq, 1)
@ -108,6 +112,7 @@ func (sequence *BioSequence) Recycle() {
} }
} }
// Copying the BioSequence.
func (s *BioSequence) Copy() *BioSequence { func (s *BioSequence) Copy() *BioSequence {
newSeq := MakeEmptyBioSequence() newSeq := MakeEmptyBioSequence()
@ -125,28 +130,37 @@ func (s *BioSequence) Copy() *BioSequence {
return &newSeq return &newSeq
} }
// A method that returns the id of the sequence.
func (s *BioSequence) Id() string { func (s *BioSequence) Id() string {
return s.id return s.id
} }
// A method that returns the definition of the sequence.
func (s *BioSequence) Definition() string { func (s *BioSequence) Definition() string {
return s.definition return s.definition
} }
// A method that returns the sequence as a byte slice.
func (s *BioSequence) Sequence() []byte { func (s *BioSequence) Sequence() []byte {
return s.sequence return s.sequence
} }
// A method that returns the sequence as a string.
func (s *BioSequence) String() string { func (s *BioSequence) String() string {
return string(s.sequence) return string(s.sequence)
} }
// Returning the length of the sequence.
func (s *BioSequence) Length() int { func (s *BioSequence) Length() int {
return len(s.sequence) return len(s.sequence)
} }
// Checking if the BioSequence has quality scores.
func (s *BioSequence) HasQualities() bool { func (s *BioSequence) HasQualities() bool {
return len(s.qualities) > 0 return len(s.qualities) > 0
} }
// Returning the qualities of the sequence.
func (s *BioSequence) Qualities() Quality { func (s *BioSequence) Qualities() Quality {
if s.HasQualities() { if s.HasQualities() {
return s.qualities return s.qualities
@ -159,10 +173,12 @@ func (s *BioSequence) Features() string {
return string(s.feature) return string(s.feature)
} }
// Checking if the BioSequence has annotations.
func (s *BioSequence) HasAnnotation() bool { func (s *BioSequence) HasAnnotation() bool {
return len(s.annotations) > 0 return len(s.annotations) > 0
} }
// Returning the annotations of the BioSequence.
func (s *BioSequence) Annotations() Annotation { func (s *BioSequence) Annotations() Annotation {
if s.annotations == nil { if s.annotations == nil {
@ -172,46 +188,98 @@ func (s *BioSequence) Annotations() Annotation {
return s.annotations return s.annotations
} }
// A method that returns the value of the key in the annotation map.
func (s *BioSequence) Get(key string) (interface{}, bool) {
var val interface{}
ok := s.annotations != nil
if ok {
val, ok = s.annotations[key]
}
return val, ok
}
// A method that returns the value of the key in the annotation map.
func (s *BioSequence) GetInt(key string) (int, bool) {
var val int
var err error
v, ok := s.Get(key)
if ok {
val, err = goutils.InterfaceToInt(v)
ok = err == nil
}
return val, ok
}
// A method that returns the value of the key in the annotation map.
func (s *BioSequence) GetString(key string) (string, bool) {
var val string
v, ok := s.Get(key)
if ok {
val = fmt.Sprint(v)
}
return val, ok
}
// A method that returns the value of the key in the annotation map.
func (s *BioSequence) GetBool(key string) (bool, bool) {
var val bool
var err error
v, ok := s.Get(key)
if ok {
val, err = goutils.InterfaceToBool(v)
ok = err == nil
}
return val, ok
}
// Returning the MD5 hash of the sequence.
func (s *BioSequence) MD5() [16]byte { func (s *BioSequence) MD5() [16]byte {
return md5.Sum(s.sequence) return md5.Sum(s.sequence)
} }
// Returning the number of times the sequence has been observed.
func (s *BioSequence) Count() int { func (s *BioSequence) Count() int {
if s.annotations == nil { count, ok := s.GetInt("count")
return 1
if !ok {
count = 1
} }
if val, ok := (s.annotations)["count"]; ok { return count
val, err := goutils.InterfaceToInt(val)
if err == nil {
return val
}
}
return 1
} }
// Returning the taxid of the sequence.
func (s *BioSequence) Taxid() int { func (s *BioSequence) Taxid() int {
if s.annotations == nil { taxid, ok := s.GetInt("count")
return 1
if !ok {
taxid = 1
} }
if val, ok := (s.annotations)["taxid"]; ok { return taxid
val, err := goutils.InterfaceToInt(val)
if err == nil {
return val
}
}
return 1
} }
// Setting the id of the BioSequence.
func (s *BioSequence) SetId(id string) { func (s *BioSequence) SetId(id string) {
s.id = id s.id = id
} }
// Setting the definition of the sequence.
func (s *BioSequence) SetDefinition(definition string) { func (s *BioSequence) SetDefinition(definition string) {
s.definition = definition s.definition = definition
} }
// Setting the features of the BioSequence.
func (s *BioSequence) SetFeatures(feature []byte) { func (s *BioSequence) SetFeatures(feature []byte) {
if cap(s.feature) >= 300 { if cap(s.feature) >= 300 {
RecycleSlice(&s.feature) RecycleSlice(&s.feature)
@ -219,6 +287,7 @@ func (s *BioSequence) SetFeatures(feature []byte) {
s.feature = feature s.feature = feature
} }
// Setting the sequence of the BioSequence.
func (s *BioSequence) SetSequence(sequence []byte) { func (s *BioSequence) SetSequence(sequence []byte) {
if s.sequence != nil { if s.sequence != nil {
RecycleSlice(&s.sequence) RecycleSlice(&s.sequence)
@ -226,6 +295,7 @@ func (s *BioSequence) SetSequence(sequence []byte) {
s.sequence = sequence s.sequence = sequence
} }
// Setting the qualities of the BioSequence.
func (s *BioSequence) SetQualities(qualities Quality) { func (s *BioSequence) SetQualities(qualities Quality) {
if s.qualities != nil { if s.qualities != nil {
RecycleSlice(&s.qualities) RecycleSlice(&s.qualities)
@ -233,28 +303,32 @@ func (s *BioSequence) SetQualities(qualities Quality) {
s.qualities = qualities s.qualities = qualities
} }
// A method that appends a byte slice to the qualities of the BioSequence.
func (s *BioSequence) WriteQualities(data []byte) (int, error) { func (s *BioSequence) WriteQualities(data []byte) (int, error) {
s.qualities = append(s.qualities, data...) s.qualities = append(s.qualities, data...)
return len(data), nil return len(data), nil
} }
// Appending a byte to the qualities of the BioSequence.
func (s *BioSequence) WriteByteQualities(data byte) error { func (s *BioSequence) WriteByteQualities(data byte) error {
s.qualities = append(s.qualities, data) s.qualities = append(s.qualities, data)
return nil return nil
} }
// A method that appends a byte slice to the sequence.
func (s *BioSequence) Write(data []byte) (int, error) { func (s *BioSequence) Write(data []byte) (int, error) {
s.sequence = append(s.sequence, data...) s.sequence = append(s.sequence, data...)
return len(data), nil return len(data), nil
} }
// A method that appends a string to the sequence.
func (s *BioSequence) WriteString(data string) (int, error) { func (s *BioSequence) WriteString(data string) (int, error) {
bdata := []byte(data) bdata := []byte(data)
return s.Write(bdata) return s.Write(bdata)
} }
// A method that appends a byte to the sequence.
func (s *BioSequence) WriteByte(data byte) error { func (s *BioSequence) WriteByte(data byte) error {
s.sequence = append(s.sequence, data) s.sequence = append(s.sequence, data)
return nil return nil
} }