Patch a bug in Subsequence and adds few unit tests.

Former-commit-id: caddc9ad6523e4ef02899bfe83cc8681ef674383
This commit is contained in:
2023-11-08 10:16:34 +02:00
parent 8f96517f3c
commit dedf125f6e
8 changed files with 186 additions and 44 deletions

View File

@@ -3,18 +3,44 @@ package obiseq
// ".ABCDEFGHIJKLMNOPQRSTUVWXYZ#![]"
var _revcmpDNA = []byte(".TVGHNNCDNNMNKNNNNYSAABWNRN]N[NNN")
func complement(n byte) byte {
// nucComplement returns the complement of a nucleotide.
//
// It takes a byte as input and returns the complement of the nucleotide.
// The function handles various cases:
// - If the input is '.' or '-', it returns the same character.
// - If the input is '[', it returns ']'.
// - If the input is ']', it returns '['.
// - If the input is a letter from 'A' to 'z', it returns the complement of the nucleotide.
// The complement is calculated using the _revcmpDNA lookup table.
// - If none of the above cases match, it returns 'n'.
func nucComplement(n byte) byte {
switch {
case n == '.' || n == '-':
return n
case n == '[':
return ']'
case n == ']':
return '['
case (n >= 'A' && n <= 'z'):
return _revcmpDNA[n&31] | (n & 0x20)
return _revcmpDNA[n&31] | 0x20
}
return 'n'
}
// Reverse complements a DNA sequence.
// If the inplace parametter is true, that operation is done in place.
// ReverseComplement reverses and complements a BioSequence.
//
// If `inplace` is `false`, a new copy of the BioSequence is created before
// performing the reverse complement. If `inplace` is `true`, the reverse
// complement is performed directly on the original BioSequence.
//
// The function first reverses the sequence by swapping the characters from the
// beginning and end of the sequence. Then, it complements each character in the
// sequence by finding its complement using the `nucComplement` function.
//
// If the BioSequence has qualities, the function also reverse the qualities in
// the same way as the sequence.
//
// The function returns the reverse complemented BioSequence.
func (sequence *BioSequence) ReverseComplement(inplace bool) *BioSequence {
if !inplace {
@@ -28,7 +54,7 @@ func (sequence *BioSequence) ReverseComplement(inplace bool) *BioSequence {
// ASCII code & 31 -> builds an index in witch (a|A) is 1
// ASCII code & 0x20 -> Foce lower case
s[j], s[i] = complement(s[i]), complement(s[j])
s[j], s[i] = nucComplement(s[i]), nucComplement(s[j])
j++
}
@@ -49,7 +75,7 @@ func (sequence *BioSequence) _revcmpMutation() *BioSequence {
b := []byte(m)
// Echange and reverse complement symboles
b[1], b[9] = complement(b[9]), complement(b[1])
b[1], b[9] = nucComplement(b[9]), nucComplement(b[1])
// Exchange sequencing scores
b[3], b[4], b[11], b[12] = b[11], b[12], b[3], b[4]