mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Patch a bug in Subsequence and adds few unit tests.
Former-commit-id: caddc9ad6523e4ef02899bfe83cc8681ef674383
This commit is contained in:
@ -111,6 +111,28 @@ func NewBioSequence(id string,
|
||||
return bs
|
||||
}
|
||||
|
||||
// NewBioSequenceWithQualities creates a new BioSequence object with the given id, sequence, definition, and qualities.
|
||||
//
|
||||
// Parameters:
|
||||
// - id: the id of the BioSequence.
|
||||
// - sequence: the sequence data of the BioSequence.
|
||||
// - definition: the definition of the BioSequence.
|
||||
// - qualities: the qualities data of the BioSequence.
|
||||
//
|
||||
// Returns:
|
||||
// - *BioSequence: a pointer to the newly created BioSequence object.
|
||||
func NewBioSequenceWithQualities(id string,
|
||||
sequence []byte,
|
||||
definition string,
|
||||
qualities []byte) *BioSequence {
|
||||
bs := NewEmptyBioSequence(0)
|
||||
bs.SetId(id)
|
||||
bs.SetSequence(sequence)
|
||||
bs.SetDefinition(definition)
|
||||
bs.SetQualities(qualities)
|
||||
return bs
|
||||
}
|
||||
|
||||
// Recycle recycles the BioSequence object.
|
||||
//
|
||||
// It decreases the count of in-memory sequences and increases the count of recycled sequences.
|
||||
|
@ -94,6 +94,42 @@ func TestNewBioSequence(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestNewBioSequenceWithQualities tests the NewBioSequenceWithQualities function.
|
||||
//
|
||||
// It tests that the BioSequence object is created with the correct id, sequence,
|
||||
// definition, and qualities.
|
||||
// Parameters:
|
||||
// - t: A pointer to a testing.T object.
|
||||
// Return type: None.
|
||||
func TestNewBioSequenceWithQualities(t *testing.T) {
|
||||
id := "123"
|
||||
sequence := []byte("ATGC")
|
||||
definition := "DNA sequence"
|
||||
qualities := []byte("1234")
|
||||
|
||||
bs := NewBioSequenceWithQualities(id, sequence, definition, qualities)
|
||||
|
||||
// Test that the BioSequence object is created with the correct id
|
||||
if bs.Id() != id {
|
||||
t.Errorf("Expected id to be %s, but got %s", id, bs.Id())
|
||||
}
|
||||
|
||||
// Test that the BioSequence object is created with the correct sequence
|
||||
if string(bs.Sequence()) != string(sequence) {
|
||||
t.Errorf("Expected sequence to be %s, but got %s", string(sequence), string(bs.Sequence()))
|
||||
}
|
||||
|
||||
// Test that the BioSequence object is created with the correct definition
|
||||
if bs.Definition() != definition {
|
||||
t.Errorf("Expected definition to be %s, but got %s", definition, bs.Definition())
|
||||
}
|
||||
|
||||
// Test that the BioSequence object is created with the correct qualities
|
||||
if string(bs.Qualities()) != string(qualities) {
|
||||
t.Errorf("Expected qualities to be %s, but got %s", string(qualities), string(bs.Qualities()))
|
||||
}
|
||||
}
|
||||
|
||||
// TestBioSequence_Recycle tests the Recycle method of the BioSequence struct.
|
||||
//
|
||||
// Test case 1: Recycle a BioSequence object with non-nil slices and annotations.
|
||||
|
@ -3,18 +3,44 @@ package obiseq
|
||||
// ".ABCDEFGHIJKLMNOPQRSTUVWXYZ#![]"
|
||||
var _revcmpDNA = []byte(".TVGHNNCDNNMNKNNNNYSAABWNRN]N[NNN")
|
||||
|
||||
func complement(n byte) byte {
|
||||
// nucComplement returns the complement of a nucleotide.
|
||||
//
|
||||
// It takes a byte as input and returns the complement of the nucleotide.
|
||||
// The function handles various cases:
|
||||
// - If the input is '.' or '-', it returns the same character.
|
||||
// - If the input is '[', it returns ']'.
|
||||
// - If the input is ']', it returns '['.
|
||||
// - If the input is a letter from 'A' to 'z', it returns the complement of the nucleotide.
|
||||
// The complement is calculated using the _revcmpDNA lookup table.
|
||||
// - If none of the above cases match, it returns 'n'.
|
||||
func nucComplement(n byte) byte {
|
||||
switch {
|
||||
case n == '.' || n == '-':
|
||||
return n
|
||||
case n == '[':
|
||||
return ']'
|
||||
case n == ']':
|
||||
return '['
|
||||
case (n >= 'A' && n <= 'z'):
|
||||
return _revcmpDNA[n&31] | (n & 0x20)
|
||||
return _revcmpDNA[n&31] | 0x20
|
||||
}
|
||||
return 'n'
|
||||
}
|
||||
|
||||
// Reverse complements a DNA sequence.
|
||||
// If the inplace parametter is true, that operation is done in place.
|
||||
// ReverseComplement reverses and complements a BioSequence.
|
||||
//
|
||||
// If `inplace` is `false`, a new copy of the BioSequence is created before
|
||||
// performing the reverse complement. If `inplace` is `true`, the reverse
|
||||
// complement is performed directly on the original BioSequence.
|
||||
//
|
||||
// The function first reverses the sequence by swapping the characters from the
|
||||
// beginning and end of the sequence. Then, it complements each character in the
|
||||
// sequence by finding its complement using the `nucComplement` function.
|
||||
//
|
||||
// If the BioSequence has qualities, the function also reverse the qualities in
|
||||
// the same way as the sequence.
|
||||
//
|
||||
// The function returns the reverse complemented BioSequence.
|
||||
func (sequence *BioSequence) ReverseComplement(inplace bool) *BioSequence {
|
||||
|
||||
if !inplace {
|
||||
@ -28,7 +54,7 @@ func (sequence *BioSequence) ReverseComplement(inplace bool) *BioSequence {
|
||||
// ASCII code & 31 -> builds an index in witch (a|A) is 1
|
||||
// ASCII code & 0x20 -> Foce lower case
|
||||
|
||||
s[j], s[i] = complement(s[i]), complement(s[j])
|
||||
s[j], s[i] = nucComplement(s[i]), nucComplement(s[j])
|
||||
j++
|
||||
}
|
||||
|
||||
@ -49,7 +75,7 @@ func (sequence *BioSequence) _revcmpMutation() *BioSequence {
|
||||
b := []byte(m)
|
||||
|
||||
// Echange and reverse complement symboles
|
||||
b[1], b[9] = complement(b[9]), complement(b[1])
|
||||
b[1], b[9] = nucComplement(b[9]), nucComplement(b[1])
|
||||
|
||||
// Exchange sequencing scores
|
||||
b[3], b[4], b[11], b[12] = b[11], b[12], b[3], b[4]
|
||||
|
93
pkg/obiseq/revcomp_test.go
Normal file
93
pkg/obiseq/revcomp_test.go
Normal file
@ -0,0 +1,93 @@
|
||||
package obiseq
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestComplement is a test function that tests the complement function.
|
||||
//
|
||||
// It tests the complement function by providing a set of input bytes and their
|
||||
// expected output bytes. It verifies that the complement function correctly
|
||||
// computes the complement of each input byte.
|
||||
//
|
||||
// Parameters:
|
||||
// - t: *testing.T - the testing object for running test cases and reporting
|
||||
// failures.
|
||||
//
|
||||
// Returns: None.
|
||||
func TestNucComplement(t *testing.T) {
|
||||
tests := []struct {
|
||||
input byte
|
||||
want byte
|
||||
}{
|
||||
{'.', '.'},
|
||||
{'-', '-'},
|
||||
{'a', 't'},
|
||||
{'G', 'c'},
|
||||
{'T', 'a'},
|
||||
{'C', 'g'},
|
||||
{'n', 'n'},
|
||||
{'[', ']'},
|
||||
{']', '['},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
got := nucComplement(tc.input)
|
||||
if got != tc.want {
|
||||
t.Errorf("complement(%c) = %c, want %c", tc.input, got, tc.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestReverseComplement is a test function for the ReverseComplement method.
|
||||
//
|
||||
// It tests the behavior of the ReverseComplement method under different scenarios.
|
||||
// The function checks if the ReverseComplement method returns the expected result
|
||||
// when the 'inplace' parameter is set to false or true. It also verifies if the
|
||||
// ReverseComplement method correctly handles BioSequences with qualities.
|
||||
// The function uses the NewBioSequence and NewBioSequenceWithQualities functions
|
||||
// to create BioSequence objects with different sequences and qualities.
|
||||
// It compares the result of the ReverseComplement method with the expected result
|
||||
// and reports an error if they are not equal. Additionally, it compares the
|
||||
// qualities of the result BioSequence with the expected qualities and reports
|
||||
// an error if they are not equal.
|
||||
func TestReverseComplement(t *testing.T) {
|
||||
// Test when inplace is false
|
||||
seq := NewBioSequence("123", []byte("ATCG"), "")
|
||||
expected := NewBioSequence("123", []byte("CGAT"), "")
|
||||
result := seq.ReverseComplement(false)
|
||||
if result.String() != expected.String() {
|
||||
t.Errorf("Expected %v, but got %v", expected, result)
|
||||
}
|
||||
|
||||
// Test when inplace is true
|
||||
seq = NewBioSequence("123", []byte("ATCG"), "")
|
||||
expected = NewBioSequence("123", []byte("CGAT"), "")
|
||||
result = seq.ReverseComplement(true)
|
||||
if result.String() != expected.String() {
|
||||
t.Errorf("Expected %v, but got %v", expected, result)
|
||||
}
|
||||
|
||||
// Test when BioSequence has qualities
|
||||
seq = NewBioSequenceWithQualities("123", []byte("ATCG"), "", []byte{30, 20, 10, 40})
|
||||
expected = NewBioSequenceWithQualities("123", []byte("CGAT"), "", []byte{40, 10, 20, 30})
|
||||
result = seq.ReverseComplement(false)
|
||||
if result.String() != expected.String() {
|
||||
t.Errorf("Expected %v, but got %v", expected, result)
|
||||
}
|
||||
if !reflect.DeepEqual(result.Qualities(), expected.Qualities()) {
|
||||
t.Errorf("Expected %v, but got %v", expected.Qualities(), result.Qualities())
|
||||
}
|
||||
|
||||
// Test when BioSequence has qualities and inplace is true
|
||||
seq = NewBioSequenceWithQualities("123", []byte("ATCG"), "", []byte{30, 20, 10, 40})
|
||||
expected = NewBioSequenceWithQualities("123", []byte("CGAT"), "", []byte{40, 10, 20, 30})
|
||||
result = seq.ReverseComplement(true)
|
||||
if result.String() != expected.String() {
|
||||
t.Errorf("Expected %v, but got %v", expected, result)
|
||||
}
|
||||
if !reflect.DeepEqual(result.Qualities(), expected.Qualities()) {
|
||||
t.Errorf("Expected %v, but got %v", expected.Qualities(), result.Qualities())
|
||||
}
|
||||
}
|
@ -28,7 +28,6 @@ func (sequence *BioSequence) Subsequence(from, to int, circular bool) (*BioSeque
|
||||
|
||||
if sequence.HasQualities() {
|
||||
newSeq.qualities = CopySlice(sequence.Qualities()[from:to])
|
||||
newSeq.WriteQualities(sequence.Qualities()[from:to])
|
||||
}
|
||||
|
||||
newSeq.id = fmt.Sprintf("%s_sub[%d..%d]", sequence.Id(), from+1, to)
|
||||
|
Reference in New Issue
Block a user