mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-12-08 16:50:27 +00:00
A go implementation of the fasta reader
Former-commit-id: 603592c4761fb0722e9e0501d78de1bd3ba238fa
This commit is contained in:
@@ -203,77 +203,135 @@ func (s *BioSequence) Len() int {
|
||||
return len(s.sequence)
|
||||
}
|
||||
|
||||
// Checking if the BioSequence has quality scores.
|
||||
// HasQualities checks if the BioSequence has sequence qualitiy scores.
|
||||
//
|
||||
// This function does not have any parameters.
|
||||
// It returns a boolean value indicating whether the BioSequence has qualities.
|
||||
func (s *BioSequence) HasQualities() bool {
|
||||
return len(s.qualities) > 0
|
||||
}
|
||||
|
||||
// Returning the qualities of the sequence.
|
||||
// Qualities returns the sequence quality scores of the BioSequence.
|
||||
//
|
||||
// It checks if the BioSequence has qualities. If it does, it returns the qualities
|
||||
// stored in the BioSequence struct. Otherwise, it creates and returns default
|
||||
// qualities based on the length of the sequence.
|
||||
//
|
||||
// Returns:
|
||||
// - Quality: The quality of the BioSequence.
|
||||
func (s *BioSequence) Qualities() Quality {
|
||||
if s.HasQualities() {
|
||||
return s.qualities
|
||||
} else {
|
||||
return __make_default_qualities__(len(s.sequence))
|
||||
}
|
||||
return __make_default_qualities__(len(s.sequence))
|
||||
}
|
||||
|
||||
// Features returns the feature string of the BioSequence.
|
||||
//
|
||||
// The feature string contains the EMBL/GenBank not parsed feature table
|
||||
//
|
||||
// as extracted from the flat file.
|
||||
//
|
||||
// No parameters.
|
||||
// Returns a string.
|
||||
func (s *BioSequence) Features() string {
|
||||
return string(s.feature)
|
||||
}
|
||||
|
||||
// Checking if the BioSequence has annotations.
|
||||
// HasAnnotation checks if the BioSequence has any annotations.
|
||||
//
|
||||
// It does not take any parameters.
|
||||
// It returns a boolean value indicating whether the BioSequence has any annotations.
|
||||
func (s *BioSequence) HasAnnotation() bool {
|
||||
return len(s.annotations) > 0
|
||||
}
|
||||
|
||||
// Returning the annotations of the BioSequence.
|
||||
// Annotations returns the Annotation object associated with the BioSequence.
|
||||
//
|
||||
// This function does not take any parameters.
|
||||
// It returns an Annotation object.
|
||||
func (s *BioSequence) Annotations() Annotation {
|
||||
|
||||
if s.annotations == nil {
|
||||
s.annotations = GetAnnotation()
|
||||
}
|
||||
|
||||
return s.annotations
|
||||
}
|
||||
|
||||
// AnnotationsLock locks the annotation of the BioSequence.
|
||||
//
|
||||
// This function acquires a lock on the annotation of the BioSequence,
|
||||
// preventing concurrent access to it.
|
||||
func (s *BioSequence) AnnotationsLock() {
|
||||
s.annot_lock.Lock()
|
||||
}
|
||||
|
||||
// AnnotationsUnlock unlocks the annotations mutex in the BioSequence struct.
|
||||
//
|
||||
// No parameters.
|
||||
// No return types.
|
||||
func (s *BioSequence) AnnotationsUnlock() {
|
||||
s.annot_lock.Unlock()
|
||||
}
|
||||
|
||||
// Checking if the BioSequence has a source.
|
||||
// HasSource checks if the BioSequence has a source.
|
||||
//
|
||||
// The source is the filename without directory name and extension from where the sequence was read.
|
||||
//
|
||||
// No parameters.
|
||||
// Returns a boolean value indicating whether the BioSequence has a source or not.
|
||||
func (s *BioSequence) HasSource() bool {
|
||||
return len(s.source) > 0
|
||||
}
|
||||
|
||||
// Source returns the source of the BioSequence.
|
||||
//
|
||||
// The source is the filename without directory name and extension from where the sequence was read.
|
||||
//
|
||||
// This function does not take any parameters.
|
||||
// It returns a string.
|
||||
func (s *BioSequence) Source() string {
|
||||
return s.source
|
||||
}
|
||||
|
||||
// Returning the MD5 hash of the sequence.
|
||||
// MD5 calculates the MD5 hash of the BioSequence.
|
||||
//
|
||||
// No parameters.
|
||||
// Returns [16]byte, the MD5 hash of the BioSequence.
|
||||
func (s *BioSequence) MD5() [16]byte {
|
||||
return md5.Sum(s.sequence)
|
||||
}
|
||||
|
||||
// Setting the id of the BioSequence.
|
||||
// SetId sets the id of the BioSequence.
|
||||
//
|
||||
// Parameters:
|
||||
// - id: the new id for the BioSequence.
|
||||
//
|
||||
// No return value.
|
||||
func (s *BioSequence) SetId(id string) {
|
||||
s.id = id
|
||||
}
|
||||
|
||||
// Setting the definition of the sequence.
|
||||
// SetDefinition sets the definition of the BioSequence.
|
||||
//
|
||||
// It takes a string parameter 'definition' and assigns it to the 'definition' field of the BioSequence struct.
|
||||
func (s *BioSequence) SetDefinition(definition string) {
|
||||
s.definition = definition
|
||||
}
|
||||
|
||||
// Setting the source of the sequence.
|
||||
// SetSource sets the source of the BioSequence.
|
||||
//
|
||||
// Parameter:
|
||||
// - source: a string representing the filename without directory name and extension from where the sequence was read.
|
||||
func (s *BioSequence) SetSource(source string) {
|
||||
s.source = source
|
||||
}
|
||||
|
||||
// Setting the features of the BioSequence.
|
||||
// SetFeatures sets the feature of the BioSequence.
|
||||
//
|
||||
// Parameters:
|
||||
// - feature: a byte slice representing the feature to be set.
|
||||
//
|
||||
// No return value.
|
||||
func (s *BioSequence) SetFeatures(feature []byte) {
|
||||
if cap(s.feature) >= 300 {
|
||||
RecycleSlice(&s.feature)
|
||||
|
||||
@@ -328,6 +328,225 @@ func TestBioSequence_Len(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestHasQualities tests the HasQualities method of the BioSequence struct.
|
||||
//
|
||||
// It includes two test cases:
|
||||
//
|
||||
// 1. Test case 1: BioSequence with empty qualities slice
|
||||
// - Creates a BioSequence instance with an empty qualities slice.
|
||||
// - Expects false as the result of calling the HasQualities method on the BioSequence instance.
|
||||
//
|
||||
// 2. Test case 2: BioSequence with non-empty qualities slice
|
||||
// - Creates a BioSequence instance with a non-empty qualities slice.
|
||||
// - Expects true as the result of calling the HasQualities method on the BioSequence instance.
|
||||
//
|
||||
// No parameters are required.
|
||||
// No return types are specified.
|
||||
func TestHasQualities(t *testing.T) {
|
||||
// Test case 1: BioSequence with empty qualities slice
|
||||
seq1 := NewBioSequence("", []byte(""), "")
|
||||
seq1.qualities = []byte{}
|
||||
if seq1.HasQualities() != false {
|
||||
t.Errorf("Test case 1 failed: expected false, got true")
|
||||
}
|
||||
|
||||
// Test case 2: BioSequence with non-empty qualities slice
|
||||
seq2 := NewBioSequence("", []byte(""), "")
|
||||
seq2.qualities = []byte{20, 30, 40}
|
||||
if seq2.HasQualities() != true {
|
||||
t.Errorf("Test case 2 failed: expected true, got false")
|
||||
}
|
||||
}
|
||||
|
||||
// TestQualities tests the Qualities method of the BioSequence struct.
|
||||
//
|
||||
// It creates a BioSequence with a given sequence and qualities and sets them.
|
||||
// Then it compares the returned qualities with the expected ones.
|
||||
// If the qualities are not equal, it fails the test case.
|
||||
//
|
||||
// Test case 1: BioSequence has qualities
|
||||
// - sequence: []byte("ATCG")
|
||||
// - qualities: Quality{10, 20, 30, 40}
|
||||
// - expected: Quality{10, 20, 30, 40}
|
||||
//
|
||||
// Test case 2: BioSequence does not have qualities
|
||||
// - sequence: []byte("ATCG")
|
||||
// - qualities: nil
|
||||
// - expected: defaultQualities
|
||||
//
|
||||
// Parameters:
|
||||
// - t: *testing.T - the testing struct for running test cases and reporting failures.
|
||||
//
|
||||
// Return type:
|
||||
// None
|
||||
func TestQualities(t *testing.T) {
|
||||
// Test case: BioSequence has qualities
|
||||
sequence := []byte("ATCG")
|
||||
qualities := Quality{10, 20, 30, 40}
|
||||
bioSeq := NewBioSequence("ABC123", sequence, "Test Sequence")
|
||||
bioSeq.SetQualities(qualities)
|
||||
|
||||
result := bioSeq.Qualities()
|
||||
expected := qualities
|
||||
|
||||
if !reflect.DeepEqual(result, expected) {
|
||||
t.Errorf("Test case failed: BioSequence has qualities")
|
||||
}
|
||||
|
||||
// Test case: BioSequence does not have qualities
|
||||
defaultQualities := __make_default_qualities__(len(sequence))
|
||||
bioSeq = NewBioSequence("ABC123", sequence, "Test Sequence")
|
||||
bioSeq.SetQualities(nil)
|
||||
|
||||
result = bioSeq.Qualities()
|
||||
expected = defaultQualities
|
||||
|
||||
if !reflect.DeepEqual(result, expected) {
|
||||
t.Errorf("Test case failed: BioSequence does not have qualities")
|
||||
}
|
||||
}
|
||||
|
||||
// TestBioSequence_Features tests the Features function of the BioSequence struct.
|
||||
//
|
||||
// It first tests the case when the feature string is empty. It creates a new BioSequence
|
||||
// with an empty feature string and an empty byte slice. It expects an empty string as
|
||||
// the result of calling the Features function on this BioSequence. If the result does
|
||||
// not match the expected value, it prints an error message.
|
||||
//
|
||||
// It then tests the case when the feature string is non-empty. It creates a new BioSequence
|
||||
// with an empty feature string and an empty byte slice. It sets the feature string to
|
||||
// "test sequence" and expects "test sequence" as the result of calling the Features function
|
||||
// on this BioSequence. If the result does not match the expected value, it prints an error message.
|
||||
func TestBioSequence_Features(t *testing.T) {
|
||||
// Testing empty feature string
|
||||
seq := NewBioSequence("", []byte(""), "")
|
||||
expected := ""
|
||||
if got := seq.Features(); got != expected {
|
||||
t.Errorf("Expected %q, but got %q", expected, got)
|
||||
}
|
||||
|
||||
// Testing non-empty feature string
|
||||
seq = NewBioSequence("", []byte(""), "")
|
||||
seq.feature = []byte("test sequence")
|
||||
expected = "test sequence"
|
||||
if got := seq.Features(); got != expected {
|
||||
t.Errorf("Expected %q, but got %q", expected, got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestHasAnnotation is a unit test function that tests the HasAnnotation method of the BioSequence struct.
|
||||
//
|
||||
// This function tests the behavior of the HasAnnotation method in different scenarios:
|
||||
// - Test case: BioSequence with no annotations.
|
||||
// - Test case: BioSequence with one annotation.
|
||||
// - Test case: BioSequence with multiple annotations.
|
||||
//
|
||||
// The function verifies that the HasAnnotation method returns the expected boolean value for each test case.
|
||||
// It uses the *testing.T parameter to report any test failures.
|
||||
//
|
||||
// No parameters.
|
||||
// No return values.
|
||||
func TestHasAnnotation(t *testing.T) {
|
||||
// Test case: BioSequence with no annotations
|
||||
seq := BioSequence{}
|
||||
expected := false
|
||||
if got := seq.HasAnnotation(); got != expected {
|
||||
t.Errorf("Expected %v, but got %v", expected, got)
|
||||
}
|
||||
|
||||
// Test case: BioSequence with one annotation
|
||||
seq = BioSequence{annotations: map[string]interface{}{"annotation1": "value1"}}
|
||||
expected = true
|
||||
if got := seq.HasAnnotation(); got != expected {
|
||||
t.Errorf("Expected %v, but got %v", expected, got)
|
||||
}
|
||||
|
||||
// Test case: BioSequence with multiple annotations
|
||||
seq = BioSequence{
|
||||
annotations: map[string]interface{}{
|
||||
"annotation1": "value1",
|
||||
"annotation2": "value2",
|
||||
},
|
||||
}
|
||||
expected = true
|
||||
if got := seq.HasAnnotation(); got != expected {
|
||||
t.Errorf("Expected %v, but got %v", expected, got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestBioSequenceAnnotations tests the Annotations method of the BioSequence struct.
|
||||
//
|
||||
// It verifies the behavior of the method when the `annotations` field of the BioSequence struct is nil and when it is not nil.
|
||||
// The method should return the expected annotation values and fail the test if the returned annotations do not match the expected ones.
|
||||
// The test cases cover both scenarios to ensure the correctness of the method.
|
||||
func TestBioSequenceAnnotations(t *testing.T) {
|
||||
s := &BioSequence{}
|
||||
|
||||
// Test case 1: Annotations is nil
|
||||
s.annotations = nil
|
||||
expected := GetAnnotation()
|
||||
actual := s.Annotations()
|
||||
if !reflect.DeepEqual(expected, actual) {
|
||||
t.Errorf("Test case 1 failed: Expected %v, but got %v", expected, actual)
|
||||
}
|
||||
|
||||
// Test case 2: Annotations is not nil
|
||||
s.annotations = Annotation{}
|
||||
expected = s.annotations
|
||||
actual = s.Annotations()
|
||||
if !reflect.DeepEqual(expected, actual) {
|
||||
t.Errorf("Test case 2 failed: Expected %v, but got %v", expected, actual)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAnnotationsLock(t *testing.T) {
|
||||
// Test case 1: Lock the annotation of an empty BioSequence
|
||||
seq := NewEmptyBioSequence(0)
|
||||
seq.AnnotationsLock()
|
||||
|
||||
// Test case 2: Lock the annotation of a BioSequence with existing annotations
|
||||
seq2 := NewEmptyBioSequence(0)
|
||||
seq2.annotations = map[string]interface{}{
|
||||
"key1": "value1",
|
||||
"key2": "value2",
|
||||
}
|
||||
seq2.AnnotationsLock()
|
||||
}
|
||||
|
||||
// TestBioSequence_MD5 tests the MD5 function of the BioSequence struct.
|
||||
//
|
||||
// It includes two test cases: one for an empty sequence and one for a non-empty sequence.
|
||||
// Each test case creates a BioSequence instance with a specific sequence and compares the MD5 result with the expected value.
|
||||
// If the result does not match the expected value, an error is reported using the t.Errorf function.
|
||||
// The expected MD5 values are hardcoded in the test cases.
|
||||
func TestBioSequence_MD5(t *testing.T) {
|
||||
// Test case 1: Empty sequence
|
||||
{
|
||||
s := &BioSequence{sequence: []byte("")}
|
||||
expected := [16]byte{
|
||||
0xd4, 0x1d, 0x8c, 0xd9, 0x8f, 0x00, 0xb2, 0x04,
|
||||
0xe9, 0x80, 0x09, 0x98, 0xec, 0xf8, 0x42, 0x7e,
|
||||
}
|
||||
result := s.MD5()
|
||||
if result != expected {
|
||||
t.Errorf("Test case 1 failed. Expected: %v, got: %v", expected, result)
|
||||
}
|
||||
}
|
||||
|
||||
// Test case 2: Non-empty sequence
|
||||
{
|
||||
s := &BioSequence{sequence: []byte("ACGT")}
|
||||
expected := [16]byte{
|
||||
0xf1, 0xf8, 0xf4, 0xbf, 0x41, 0x3b, 0x16, 0xad,
|
||||
0x13, 0x57, 0x22, 0xaa, 0x45, 0x91, 0x04, 0x3e,
|
||||
}
|
||||
result := s.MD5()
|
||||
if result != expected {
|
||||
t.Errorf("Test case 2 failed. Expected: %v, got: %v", expected, result)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestBioSequence_Composition tests the Composition method of the BioSequence struct.
|
||||
//
|
||||
// It tests the method with three different test cases:
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
package obiseq
|
||||
|
||||
import (
|
||||
log "github.com/sirupsen/logrus"
|
||||
"sync"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
"golang.org/x/exp/slices"
|
||||
)
|
||||
|
||||
// BioSequenceSlice represents a collection or a set of BioSequence.
|
||||
@@ -18,22 +20,39 @@ var _BioSequenceSlicePool = sync.Pool{
|
||||
},
|
||||
}
|
||||
|
||||
// > This function returns a pointer to a new `BioSequenceSlice` object
|
||||
// NewBioSequenceSlice returns a new BioSequenceSlice with the specified size.
|
||||
//
|
||||
// The size parameter is optional. If provided, the returned slice will be
|
||||
// resized accordingly.
|
||||
//
|
||||
// Returns a pointer to the newly created BioSequenceSlice.
|
||||
func NewBioSequenceSlice(size ...int) *BioSequenceSlice {
|
||||
slice := _BioSequenceSlicePool.Get().(*BioSequenceSlice)
|
||||
if len(size) > 0 {
|
||||
s := size[0]
|
||||
slice = slice.InsureCapacity(s)
|
||||
(*slice)=(*slice)[0:s]
|
||||
(*slice) = (*slice)[0:s]
|
||||
}
|
||||
return slice
|
||||
}
|
||||
|
||||
// `MakeBioSequenceSlice()` returns a pointer to a new `BioSequenceSlice` struct
|
||||
// MakeBioSequenceSlice creates a new BioSequenceSlice with the specified size(s).
|
||||
//
|
||||
// Parameters:
|
||||
// - size: The size(s) of the BioSequenceSlice to create (optional).
|
||||
//
|
||||
// Return:
|
||||
// A new BioSequenceSlice with the specified size(s).
|
||||
func MakeBioSequenceSlice(size ...int) BioSequenceSlice {
|
||||
return *NewBioSequenceSlice(size...)
|
||||
}
|
||||
|
||||
// Recycle cleans up the BioSequenceSlice by recycling its elements and resetting its length.
|
||||
//
|
||||
// If including_seq is true, each element of the BioSequenceSlice is recycled using the Recycle method,
|
||||
// and then set to nil. If including_seq is false, each element is simply set to nil.
|
||||
//
|
||||
// The function does not return anything.
|
||||
func (s *BioSequenceSlice) Recycle(including_seq bool) {
|
||||
if s == nil {
|
||||
log.Panicln("Trying too recycle a nil pointer")
|
||||
@@ -42,60 +61,113 @@ func (s *BioSequenceSlice) Recycle(including_seq bool) {
|
||||
// Code added to potentially limit memory leaks
|
||||
if including_seq {
|
||||
for i := range *s {
|
||||
(*s)[i] .Recycle()
|
||||
(*s)[i].Recycle()
|
||||
(*s)[i] = nil
|
||||
}
|
||||
|
||||
|
||||
} else {
|
||||
for i := range *s {
|
||||
(*s)[i] = nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
*s = (*s)[:0]
|
||||
_BioSequenceSlicePool.Put(s)
|
||||
}
|
||||
|
||||
// Making sure that the slice has enough capacity to hold the number of elements that are being added
|
||||
// to it.
|
||||
// InsureCapacity ensures that the BioSequenceSlice has a minimum capacity
|
||||
//
|
||||
// It takes an integer `capacity` as a parameter, which represents the desired minimum capacity of the BioSequenceSlice.
|
||||
// It returns a pointer to the BioSequenceSlice.
|
||||
func (s *BioSequenceSlice) InsureCapacity(capacity int) *BioSequenceSlice {
|
||||
var c int
|
||||
if s != nil {
|
||||
c = cap(*s)
|
||||
c = cap(*s)
|
||||
} else {
|
||||
c = 0
|
||||
}
|
||||
|
||||
if c < capacity {
|
||||
sl := make(BioSequenceSlice, 0,capacity)
|
||||
s = &sl
|
||||
}
|
||||
*s = slices.Grow[BioSequenceSlice](*s, capacity-c)
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
// Appending the sequence to the slice.
|
||||
// Push appends a BioSequence to the BioSequenceSlice.
|
||||
//
|
||||
// It takes a pointer to a BioSequenceSlice and a BioSequence as parameters.
|
||||
// It does not return anything.
|
||||
func (s *BioSequenceSlice) Push(sequence *BioSequence) {
|
||||
*s = append(*s, sequence)
|
||||
}
|
||||
|
||||
// Returning the last element of the slice and removing it from the slice.
|
||||
// Pop returns and removes the last element from the BioSequenceSlice.
|
||||
//
|
||||
// It does not take any parameters.
|
||||
// It returns *BioSequence, the last element of the slice.
|
||||
func (s *BioSequenceSlice) Pop() *BioSequence {
|
||||
_s := (*s)[len(*s)-1]
|
||||
(*s)[len(*s)-1] = nil
|
||||
*s = (*s)[:len(*s)-1]
|
||||
return _s
|
||||
// Get the length of the slice
|
||||
length := len(*s)
|
||||
|
||||
// If the slice is empty, return nil
|
||||
if length == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Get the last element of the slice
|
||||
lastElement := (*s)[length-1]
|
||||
|
||||
// Set the last element to nil
|
||||
(*s)[length-1] = nil
|
||||
|
||||
// Remove the last element from the slice
|
||||
*s = (*s)[:length-1]
|
||||
|
||||
// Return the last element
|
||||
return lastElement
|
||||
}
|
||||
|
||||
// Returning the first element of the slice and removing it from the slice.
|
||||
// Pop0 returns and removes the first element of the BioSequenceSlice.
|
||||
//
|
||||
// It does not take any parameters.
|
||||
// It returns a pointer to a BioSequence object.
|
||||
func (s *BioSequenceSlice) Pop0() *BioSequence {
|
||||
_s := (*s)[0]
|
||||
if len(*s) == 0 {
|
||||
return nil
|
||||
}
|
||||
firstElement := (*s)[0]
|
||||
(*s)[0] = nil
|
||||
*s = (*s)[1:]
|
||||
return _s
|
||||
return firstElement
|
||||
}
|
||||
|
||||
// Test that a slice of sequences contains at least a sequence.
|
||||
// NotEmpty checks if the BioSequenceSlice is not empty.
|
||||
//
|
||||
// No parameters.
|
||||
// Returns a boolean value indicating if the BioSequenceSlice is not empty.
|
||||
func (s BioSequenceSlice) NotEmpty() bool {
|
||||
return len(s) > 0
|
||||
}
|
||||
|
||||
// Len returns the length of the BioSequenceSlice.
|
||||
//
|
||||
// It has no parameters.
|
||||
// It returns an integer.
|
||||
func (s BioSequenceSlice) Len() int {
|
||||
return len(s)
|
||||
}
|
||||
|
||||
// Size returns the total size of the BioSequenceSlice.
|
||||
//
|
||||
// It calculates the size by iterating over each BioSequence in the slice
|
||||
// and summing up their lengths.
|
||||
//
|
||||
// Returns an integer representing the total size of the BioSequenceSlice.
|
||||
func (s BioSequenceSlice) Size() int {
|
||||
size := 0
|
||||
|
||||
for _, s := range s {
|
||||
size += s.Len()
|
||||
}
|
||||
|
||||
return size
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user