Patch a bug in Subsequence and adds few unit tests.

Former-commit-id: caddc9ad6523e4ef02899bfe83cc8681ef674383
This commit is contained in:
2023-11-08 10:16:34 +02:00
parent 8f96517f3c
commit dedf125f6e
8 changed files with 186 additions and 44 deletions

View File

@ -111,6 +111,28 @@ func NewBioSequence(id string,
return bs
}
// NewBioSequenceWithQualities creates a new BioSequence object with the given id, sequence, definition, and qualities.
//
// Parameters:
// - id: the id of the BioSequence.
// - sequence: the sequence data of the BioSequence.
// - definition: the definition of the BioSequence.
// - qualities: the qualities data of the BioSequence.
//
// Returns:
// - *BioSequence: a pointer to the newly created BioSequence object.
func NewBioSequenceWithQualities(id string,
sequence []byte,
definition string,
qualities []byte) *BioSequence {
bs := NewEmptyBioSequence(0)
bs.SetId(id)
bs.SetSequence(sequence)
bs.SetDefinition(definition)
bs.SetQualities(qualities)
return bs
}
// Recycle recycles the BioSequence object.
//
// It decreases the count of in-memory sequences and increases the count of recycled sequences.

View File

@ -94,6 +94,42 @@ func TestNewBioSequence(t *testing.T) {
}
}
// TestNewBioSequenceWithQualities tests the NewBioSequenceWithQualities function.
//
// It tests that the BioSequence object is created with the correct id, sequence,
// definition, and qualities.
// Parameters:
// - t: A pointer to a testing.T object.
// Return type: None.
func TestNewBioSequenceWithQualities(t *testing.T) {
id := "123"
sequence := []byte("ATGC")
definition := "DNA sequence"
qualities := []byte("1234")
bs := NewBioSequenceWithQualities(id, sequence, definition, qualities)
// Test that the BioSequence object is created with the correct id
if bs.Id() != id {
t.Errorf("Expected id to be %s, but got %s", id, bs.Id())
}
// Test that the BioSequence object is created with the correct sequence
if string(bs.Sequence()) != string(sequence) {
t.Errorf("Expected sequence to be %s, but got %s", string(sequence), string(bs.Sequence()))
}
// Test that the BioSequence object is created with the correct definition
if bs.Definition() != definition {
t.Errorf("Expected definition to be %s, but got %s", definition, bs.Definition())
}
// Test that the BioSequence object is created with the correct qualities
if string(bs.Qualities()) != string(qualities) {
t.Errorf("Expected qualities to be %s, but got %s", string(qualities), string(bs.Qualities()))
}
}
// TestBioSequence_Recycle tests the Recycle method of the BioSequence struct.
//
// Test case 1: Recycle a BioSequence object with non-nil slices and annotations.

View File

@ -3,18 +3,44 @@ package obiseq
// ".ABCDEFGHIJKLMNOPQRSTUVWXYZ#![]"
var _revcmpDNA = []byte(".TVGHNNCDNNMNKNNNNYSAABWNRN]N[NNN")
func complement(n byte) byte {
// nucComplement returns the complement of a nucleotide.
//
// It takes a byte as input and returns the complement of the nucleotide.
// The function handles various cases:
// - If the input is '.' or '-', it returns the same character.
// - If the input is '[', it returns ']'.
// - If the input is ']', it returns '['.
// - If the input is a letter from 'A' to 'z', it returns the complement of the nucleotide.
// The complement is calculated using the _revcmpDNA lookup table.
// - If none of the above cases match, it returns 'n'.
func nucComplement(n byte) byte {
switch {
case n == '.' || n == '-':
return n
case n == '[':
return ']'
case n == ']':
return '['
case (n >= 'A' && n <= 'z'):
return _revcmpDNA[n&31] | (n & 0x20)
return _revcmpDNA[n&31] | 0x20
}
return 'n'
}
// Reverse complements a DNA sequence.
// If the inplace parametter is true, that operation is done in place.
// ReverseComplement reverses and complements a BioSequence.
//
// If `inplace` is `false`, a new copy of the BioSequence is created before
// performing the reverse complement. If `inplace` is `true`, the reverse
// complement is performed directly on the original BioSequence.
//
// The function first reverses the sequence by swapping the characters from the
// beginning and end of the sequence. Then, it complements each character in the
// sequence by finding its complement using the `nucComplement` function.
//
// If the BioSequence has qualities, the function also reverse the qualities in
// the same way as the sequence.
//
// The function returns the reverse complemented BioSequence.
func (sequence *BioSequence) ReverseComplement(inplace bool) *BioSequence {
if !inplace {
@ -28,7 +54,7 @@ func (sequence *BioSequence) ReverseComplement(inplace bool) *BioSequence {
// ASCII code & 31 -> builds an index in witch (a|A) is 1
// ASCII code & 0x20 -> Foce lower case
s[j], s[i] = complement(s[i]), complement(s[j])
s[j], s[i] = nucComplement(s[i]), nucComplement(s[j])
j++
}
@ -49,7 +75,7 @@ func (sequence *BioSequence) _revcmpMutation() *BioSequence {
b := []byte(m)
// Echange and reverse complement symboles
b[1], b[9] = complement(b[9]), complement(b[1])
b[1], b[9] = nucComplement(b[9]), nucComplement(b[1])
// Exchange sequencing scores
b[3], b[4], b[11], b[12] = b[11], b[12], b[3], b[4]

View File

@ -0,0 +1,93 @@
package obiseq
import (
"reflect"
"testing"
)
// TestComplement is a test function that tests the complement function.
//
// It tests the complement function by providing a set of input bytes and their
// expected output bytes. It verifies that the complement function correctly
// computes the complement of each input byte.
//
// Parameters:
// - t: *testing.T - the testing object for running test cases and reporting
// failures.
//
// Returns: None.
func TestNucComplement(t *testing.T) {
tests := []struct {
input byte
want byte
}{
{'.', '.'},
{'-', '-'},
{'a', 't'},
{'G', 'c'},
{'T', 'a'},
{'C', 'g'},
{'n', 'n'},
{'[', ']'},
{']', '['},
}
for _, tc := range tests {
got := nucComplement(tc.input)
if got != tc.want {
t.Errorf("complement(%c) = %c, want %c", tc.input, got, tc.want)
}
}
}
// TestReverseComplement is a test function for the ReverseComplement method.
//
// It tests the behavior of the ReverseComplement method under different scenarios.
// The function checks if the ReverseComplement method returns the expected result
// when the 'inplace' parameter is set to false or true. It also verifies if the
// ReverseComplement method correctly handles BioSequences with qualities.
// The function uses the NewBioSequence and NewBioSequenceWithQualities functions
// to create BioSequence objects with different sequences and qualities.
// It compares the result of the ReverseComplement method with the expected result
// and reports an error if they are not equal. Additionally, it compares the
// qualities of the result BioSequence with the expected qualities and reports
// an error if they are not equal.
func TestReverseComplement(t *testing.T) {
// Test when inplace is false
seq := NewBioSequence("123", []byte("ATCG"), "")
expected := NewBioSequence("123", []byte("CGAT"), "")
result := seq.ReverseComplement(false)
if result.String() != expected.String() {
t.Errorf("Expected %v, but got %v", expected, result)
}
// Test when inplace is true
seq = NewBioSequence("123", []byte("ATCG"), "")
expected = NewBioSequence("123", []byte("CGAT"), "")
result = seq.ReverseComplement(true)
if result.String() != expected.String() {
t.Errorf("Expected %v, but got %v", expected, result)
}
// Test when BioSequence has qualities
seq = NewBioSequenceWithQualities("123", []byte("ATCG"), "", []byte{30, 20, 10, 40})
expected = NewBioSequenceWithQualities("123", []byte("CGAT"), "", []byte{40, 10, 20, 30})
result = seq.ReverseComplement(false)
if result.String() != expected.String() {
t.Errorf("Expected %v, but got %v", expected, result)
}
if !reflect.DeepEqual(result.Qualities(), expected.Qualities()) {
t.Errorf("Expected %v, but got %v", expected.Qualities(), result.Qualities())
}
// Test when BioSequence has qualities and inplace is true
seq = NewBioSequenceWithQualities("123", []byte("ATCG"), "", []byte{30, 20, 10, 40})
expected = NewBioSequenceWithQualities("123", []byte("CGAT"), "", []byte{40, 10, 20, 30})
result = seq.ReverseComplement(true)
if result.String() != expected.String() {
t.Errorf("Expected %v, but got %v", expected, result)
}
if !reflect.DeepEqual(result.Qualities(), expected.Qualities()) {
t.Errorf("Expected %v, but got %v", expected.Qualities(), result.Qualities())
}
}

View File

@ -28,7 +28,6 @@ func (sequence *BioSequence) Subsequence(from, to int, circular bool) (*BioSeque
if sequence.HasQualities() {
newSeq.qualities = CopySlice(sequence.Qualities()[from:to])
newSeq.WriteQualities(sequence.Qualities()[from:to])
}
newSeq.id = fmt.Sprintf("%s_sub[%d..%d]", sequence.Id(), from+1, to)