A go implementation of the fasta reader

Former-commit-id: 603592c4761fb0722e9e0501d78de1bd3ba238fa
This commit is contained in:
2023-09-01 09:30:12 +02:00
parent 3f8c0d6a2f
commit 62b57f4ede
15 changed files with 1403 additions and 77 deletions

View File

@@ -203,77 +203,135 @@ func (s *BioSequence) Len() int {
return len(s.sequence)
}
// Checking if the BioSequence has quality scores.
// HasQualities checks if the BioSequence has sequence qualitiy scores.
//
// This function does not have any parameters.
// It returns a boolean value indicating whether the BioSequence has qualities.
func (s *BioSequence) HasQualities() bool {
return len(s.qualities) > 0
}
// Returning the qualities of the sequence.
// Qualities returns the sequence quality scores of the BioSequence.
//
// It checks if the BioSequence has qualities. If it does, it returns the qualities
// stored in the BioSequence struct. Otherwise, it creates and returns default
// qualities based on the length of the sequence.
//
// Returns:
// - Quality: The quality of the BioSequence.
func (s *BioSequence) Qualities() Quality {
if s.HasQualities() {
return s.qualities
} else {
return __make_default_qualities__(len(s.sequence))
}
return __make_default_qualities__(len(s.sequence))
}
// Features returns the feature string of the BioSequence.
//
// The feature string contains the EMBL/GenBank not parsed feature table
//
// as extracted from the flat file.
//
// No parameters.
// Returns a string.
func (s *BioSequence) Features() string {
return string(s.feature)
}
// Checking if the BioSequence has annotations.
// HasAnnotation checks if the BioSequence has any annotations.
//
// It does not take any parameters.
// It returns a boolean value indicating whether the BioSequence has any annotations.
func (s *BioSequence) HasAnnotation() bool {
return len(s.annotations) > 0
}
// Returning the annotations of the BioSequence.
// Annotations returns the Annotation object associated with the BioSequence.
//
// This function does not take any parameters.
// It returns an Annotation object.
func (s *BioSequence) Annotations() Annotation {
if s.annotations == nil {
s.annotations = GetAnnotation()
}
return s.annotations
}
// AnnotationsLock locks the annotation of the BioSequence.
//
// This function acquires a lock on the annotation of the BioSequence,
// preventing concurrent access to it.
func (s *BioSequence) AnnotationsLock() {
s.annot_lock.Lock()
}
// AnnotationsUnlock unlocks the annotations mutex in the BioSequence struct.
//
// No parameters.
// No return types.
func (s *BioSequence) AnnotationsUnlock() {
s.annot_lock.Unlock()
}
// Checking if the BioSequence has a source.
// HasSource checks if the BioSequence has a source.
//
// The source is the filename without directory name and extension from where the sequence was read.
//
// No parameters.
// Returns a boolean value indicating whether the BioSequence has a source or not.
func (s *BioSequence) HasSource() bool {
return len(s.source) > 0
}
// Source returns the source of the BioSequence.
//
// The source is the filename without directory name and extension from where the sequence was read.
//
// This function does not take any parameters.
// It returns a string.
func (s *BioSequence) Source() string {
return s.source
}
// Returning the MD5 hash of the sequence.
// MD5 calculates the MD5 hash of the BioSequence.
//
// No parameters.
// Returns [16]byte, the MD5 hash of the BioSequence.
func (s *BioSequence) MD5() [16]byte {
return md5.Sum(s.sequence)
}
// Setting the id of the BioSequence.
// SetId sets the id of the BioSequence.
//
// Parameters:
// - id: the new id for the BioSequence.
//
// No return value.
func (s *BioSequence) SetId(id string) {
s.id = id
}
// Setting the definition of the sequence.
// SetDefinition sets the definition of the BioSequence.
//
// It takes a string parameter 'definition' and assigns it to the 'definition' field of the BioSequence struct.
func (s *BioSequence) SetDefinition(definition string) {
s.definition = definition
}
// Setting the source of the sequence.
// SetSource sets the source of the BioSequence.
//
// Parameter:
// - source: a string representing the filename without directory name and extension from where the sequence was read.
func (s *BioSequence) SetSource(source string) {
s.source = source
}
// Setting the features of the BioSequence.
// SetFeatures sets the feature of the BioSequence.
//
// Parameters:
// - feature: a byte slice representing the feature to be set.
//
// No return value.
func (s *BioSequence) SetFeatures(feature []byte) {
if cap(s.feature) >= 300 {
RecycleSlice(&s.feature)

View File

@@ -328,6 +328,225 @@ func TestBioSequence_Len(t *testing.T) {
}
}
// TestHasQualities tests the HasQualities method of the BioSequence struct.
//
// It includes two test cases:
//
// 1. Test case 1: BioSequence with empty qualities slice
// - Creates a BioSequence instance with an empty qualities slice.
// - Expects false as the result of calling the HasQualities method on the BioSequence instance.
//
// 2. Test case 2: BioSequence with non-empty qualities slice
// - Creates a BioSequence instance with a non-empty qualities slice.
// - Expects true as the result of calling the HasQualities method on the BioSequence instance.
//
// No parameters are required.
// No return types are specified.
func TestHasQualities(t *testing.T) {
// Test case 1: BioSequence with empty qualities slice
seq1 := NewBioSequence("", []byte(""), "")
seq1.qualities = []byte{}
if seq1.HasQualities() != false {
t.Errorf("Test case 1 failed: expected false, got true")
}
// Test case 2: BioSequence with non-empty qualities slice
seq2 := NewBioSequence("", []byte(""), "")
seq2.qualities = []byte{20, 30, 40}
if seq2.HasQualities() != true {
t.Errorf("Test case 2 failed: expected true, got false")
}
}
// TestQualities tests the Qualities method of the BioSequence struct.
//
// It creates a BioSequence with a given sequence and qualities and sets them.
// Then it compares the returned qualities with the expected ones.
// If the qualities are not equal, it fails the test case.
//
// Test case 1: BioSequence has qualities
// - sequence: []byte("ATCG")
// - qualities: Quality{10, 20, 30, 40}
// - expected: Quality{10, 20, 30, 40}
//
// Test case 2: BioSequence does not have qualities
// - sequence: []byte("ATCG")
// - qualities: nil
// - expected: defaultQualities
//
// Parameters:
// - t: *testing.T - the testing struct for running test cases and reporting failures.
//
// Return type:
// None
func TestQualities(t *testing.T) {
// Test case: BioSequence has qualities
sequence := []byte("ATCG")
qualities := Quality{10, 20, 30, 40}
bioSeq := NewBioSequence("ABC123", sequence, "Test Sequence")
bioSeq.SetQualities(qualities)
result := bioSeq.Qualities()
expected := qualities
if !reflect.DeepEqual(result, expected) {
t.Errorf("Test case failed: BioSequence has qualities")
}
// Test case: BioSequence does not have qualities
defaultQualities := __make_default_qualities__(len(sequence))
bioSeq = NewBioSequence("ABC123", sequence, "Test Sequence")
bioSeq.SetQualities(nil)
result = bioSeq.Qualities()
expected = defaultQualities
if !reflect.DeepEqual(result, expected) {
t.Errorf("Test case failed: BioSequence does not have qualities")
}
}
// TestBioSequence_Features tests the Features function of the BioSequence struct.
//
// It first tests the case when the feature string is empty. It creates a new BioSequence
// with an empty feature string and an empty byte slice. It expects an empty string as
// the result of calling the Features function on this BioSequence. If the result does
// not match the expected value, it prints an error message.
//
// It then tests the case when the feature string is non-empty. It creates a new BioSequence
// with an empty feature string and an empty byte slice. It sets the feature string to
// "test sequence" and expects "test sequence" as the result of calling the Features function
// on this BioSequence. If the result does not match the expected value, it prints an error message.
func TestBioSequence_Features(t *testing.T) {
// Testing empty feature string
seq := NewBioSequence("", []byte(""), "")
expected := ""
if got := seq.Features(); got != expected {
t.Errorf("Expected %q, but got %q", expected, got)
}
// Testing non-empty feature string
seq = NewBioSequence("", []byte(""), "")
seq.feature = []byte("test sequence")
expected = "test sequence"
if got := seq.Features(); got != expected {
t.Errorf("Expected %q, but got %q", expected, got)
}
}
// TestHasAnnotation is a unit test function that tests the HasAnnotation method of the BioSequence struct.
//
// This function tests the behavior of the HasAnnotation method in different scenarios:
// - Test case: BioSequence with no annotations.
// - Test case: BioSequence with one annotation.
// - Test case: BioSequence with multiple annotations.
//
// The function verifies that the HasAnnotation method returns the expected boolean value for each test case.
// It uses the *testing.T parameter to report any test failures.
//
// No parameters.
// No return values.
func TestHasAnnotation(t *testing.T) {
// Test case: BioSequence with no annotations
seq := BioSequence{}
expected := false
if got := seq.HasAnnotation(); got != expected {
t.Errorf("Expected %v, but got %v", expected, got)
}
// Test case: BioSequence with one annotation
seq = BioSequence{annotations: map[string]interface{}{"annotation1": "value1"}}
expected = true
if got := seq.HasAnnotation(); got != expected {
t.Errorf("Expected %v, but got %v", expected, got)
}
// Test case: BioSequence with multiple annotations
seq = BioSequence{
annotations: map[string]interface{}{
"annotation1": "value1",
"annotation2": "value2",
},
}
expected = true
if got := seq.HasAnnotation(); got != expected {
t.Errorf("Expected %v, but got %v", expected, got)
}
}
// TestBioSequenceAnnotations tests the Annotations method of the BioSequence struct.
//
// It verifies the behavior of the method when the `annotations` field of the BioSequence struct is nil and when it is not nil.
// The method should return the expected annotation values and fail the test if the returned annotations do not match the expected ones.
// The test cases cover both scenarios to ensure the correctness of the method.
func TestBioSequenceAnnotations(t *testing.T) {
s := &BioSequence{}
// Test case 1: Annotations is nil
s.annotations = nil
expected := GetAnnotation()
actual := s.Annotations()
if !reflect.DeepEqual(expected, actual) {
t.Errorf("Test case 1 failed: Expected %v, but got %v", expected, actual)
}
// Test case 2: Annotations is not nil
s.annotations = Annotation{}
expected = s.annotations
actual = s.Annotations()
if !reflect.DeepEqual(expected, actual) {
t.Errorf("Test case 2 failed: Expected %v, but got %v", expected, actual)
}
}
func TestAnnotationsLock(t *testing.T) {
// Test case 1: Lock the annotation of an empty BioSequence
seq := NewEmptyBioSequence(0)
seq.AnnotationsLock()
// Test case 2: Lock the annotation of a BioSequence with existing annotations
seq2 := NewEmptyBioSequence(0)
seq2.annotations = map[string]interface{}{
"key1": "value1",
"key2": "value2",
}
seq2.AnnotationsLock()
}
// TestBioSequence_MD5 tests the MD5 function of the BioSequence struct.
//
// It includes two test cases: one for an empty sequence and one for a non-empty sequence.
// Each test case creates a BioSequence instance with a specific sequence and compares the MD5 result with the expected value.
// If the result does not match the expected value, an error is reported using the t.Errorf function.
// The expected MD5 values are hardcoded in the test cases.
func TestBioSequence_MD5(t *testing.T) {
// Test case 1: Empty sequence
{
s := &BioSequence{sequence: []byte("")}
expected := [16]byte{
0xd4, 0x1d, 0x8c, 0xd9, 0x8f, 0x00, 0xb2, 0x04,
0xe9, 0x80, 0x09, 0x98, 0xec, 0xf8, 0x42, 0x7e,
}
result := s.MD5()
if result != expected {
t.Errorf("Test case 1 failed. Expected: %v, got: %v", expected, result)
}
}
// Test case 2: Non-empty sequence
{
s := &BioSequence{sequence: []byte("ACGT")}
expected := [16]byte{
0xf1, 0xf8, 0xf4, 0xbf, 0x41, 0x3b, 0x16, 0xad,
0x13, 0x57, 0x22, 0xaa, 0x45, 0x91, 0x04, 0x3e,
}
result := s.MD5()
if result != expected {
t.Errorf("Test case 2 failed. Expected: %v, got: %v", expected, result)
}
}
}
// TestBioSequence_Composition tests the Composition method of the BioSequence struct.
//
// It tests the method with three different test cases:

View File

@@ -1,8 +1,10 @@
package obiseq
import (
log "github.com/sirupsen/logrus"
"sync"
log "github.com/sirupsen/logrus"
"golang.org/x/exp/slices"
)
// BioSequenceSlice represents a collection or a set of BioSequence.
@@ -18,22 +20,39 @@ var _BioSequenceSlicePool = sync.Pool{
},
}
// > This function returns a pointer to a new `BioSequenceSlice` object
// NewBioSequenceSlice returns a new BioSequenceSlice with the specified size.
//
// The size parameter is optional. If provided, the returned slice will be
// resized accordingly.
//
// Returns a pointer to the newly created BioSequenceSlice.
func NewBioSequenceSlice(size ...int) *BioSequenceSlice {
slice := _BioSequenceSlicePool.Get().(*BioSequenceSlice)
if len(size) > 0 {
s := size[0]
slice = slice.InsureCapacity(s)
(*slice)=(*slice)[0:s]
(*slice) = (*slice)[0:s]
}
return slice
}
// `MakeBioSequenceSlice()` returns a pointer to a new `BioSequenceSlice` struct
// MakeBioSequenceSlice creates a new BioSequenceSlice with the specified size(s).
//
// Parameters:
// - size: The size(s) of the BioSequenceSlice to create (optional).
//
// Return:
// A new BioSequenceSlice with the specified size(s).
func MakeBioSequenceSlice(size ...int) BioSequenceSlice {
return *NewBioSequenceSlice(size...)
}
// Recycle cleans up the BioSequenceSlice by recycling its elements and resetting its length.
//
// If including_seq is true, each element of the BioSequenceSlice is recycled using the Recycle method,
// and then set to nil. If including_seq is false, each element is simply set to nil.
//
// The function does not return anything.
func (s *BioSequenceSlice) Recycle(including_seq bool) {
if s == nil {
log.Panicln("Trying too recycle a nil pointer")
@@ -42,60 +61,113 @@ func (s *BioSequenceSlice) Recycle(including_seq bool) {
// Code added to potentially limit memory leaks
if including_seq {
for i := range *s {
(*s)[i] .Recycle()
(*s)[i].Recycle()
(*s)[i] = nil
}
} else {
for i := range *s {
(*s)[i] = nil
}
}
}
*s = (*s)[:0]
_BioSequenceSlicePool.Put(s)
}
// Making sure that the slice has enough capacity to hold the number of elements that are being added
// to it.
// InsureCapacity ensures that the BioSequenceSlice has a minimum capacity
//
// It takes an integer `capacity` as a parameter, which represents the desired minimum capacity of the BioSequenceSlice.
// It returns a pointer to the BioSequenceSlice.
func (s *BioSequenceSlice) InsureCapacity(capacity int) *BioSequenceSlice {
var c int
if s != nil {
c = cap(*s)
c = cap(*s)
} else {
c = 0
}
if c < capacity {
sl := make(BioSequenceSlice, 0,capacity)
s = &sl
}
*s = slices.Grow[BioSequenceSlice](*s, capacity-c)
return s
}
// Appending the sequence to the slice.
// Push appends a BioSequence to the BioSequenceSlice.
//
// It takes a pointer to a BioSequenceSlice and a BioSequence as parameters.
// It does not return anything.
func (s *BioSequenceSlice) Push(sequence *BioSequence) {
*s = append(*s, sequence)
}
// Returning the last element of the slice and removing it from the slice.
// Pop returns and removes the last element from the BioSequenceSlice.
//
// It does not take any parameters.
// It returns *BioSequence, the last element of the slice.
func (s *BioSequenceSlice) Pop() *BioSequence {
_s := (*s)[len(*s)-1]
(*s)[len(*s)-1] = nil
*s = (*s)[:len(*s)-1]
return _s
// Get the length of the slice
length := len(*s)
// If the slice is empty, return nil
if length == 0 {
return nil
}
// Get the last element of the slice
lastElement := (*s)[length-1]
// Set the last element to nil
(*s)[length-1] = nil
// Remove the last element from the slice
*s = (*s)[:length-1]
// Return the last element
return lastElement
}
// Returning the first element of the slice and removing it from the slice.
// Pop0 returns and removes the first element of the BioSequenceSlice.
//
// It does not take any parameters.
// It returns a pointer to a BioSequence object.
func (s *BioSequenceSlice) Pop0() *BioSequence {
_s := (*s)[0]
if len(*s) == 0 {
return nil
}
firstElement := (*s)[0]
(*s)[0] = nil
*s = (*s)[1:]
return _s
return firstElement
}
// Test that a slice of sequences contains at least a sequence.
// NotEmpty checks if the BioSequenceSlice is not empty.
//
// No parameters.
// Returns a boolean value indicating if the BioSequenceSlice is not empty.
func (s BioSequenceSlice) NotEmpty() bool {
return len(s) > 0
}
// Len returns the length of the BioSequenceSlice.
//
// It has no parameters.
// It returns an integer.
func (s BioSequenceSlice) Len() int {
return len(s)
}
// Size returns the total size of the BioSequenceSlice.
//
// It calculates the size by iterating over each BioSequence in the slice
// and summing up their lengths.
//
// Returns an integer representing the total size of the BioSequenceSlice.
func (s BioSequenceSlice) Size() int {
size := 0
for _, s := range s {
size += s.Len()
}
return size
}