Patch a bug in Subsequence and adds few unit tests.

Former-commit-id: caddc9ad6523e4ef02899bfe83cc8681ef674383
This commit is contained in:
2023-11-08 10:16:34 +02:00
parent 8f96517f3c
commit dedf125f6e
8 changed files with 186 additions and 44 deletions

View File

@ -33,7 +33,8 @@
+ `.` and `-` characters are returned without change
+ `[` is complemented to `]` and oppositely
+ all other characters are complemented as `n`
- Correction of a bug is the `Subsequence` method of the `BioSequence` class, duplicating the quality values.
This made `obimultiplex` to produce fastq files with sequences having quality values duplicated.
### Becareful

10
go.mod
View File

@ -13,7 +13,6 @@ require (
github.com/rrethy/ahocorasick v1.0.0
github.com/schollz/progressbar/v3 v3.13.1
github.com/sirupsen/logrus v1.9.3
github.com/stretchr/testify v1.7.0
github.com/tevino/abool/v2 v2.1.0
golang.org/x/exp v0.0.0-20231006140011-7918f672742d
gonum.org/v1/gonum v0.14.0
@ -21,28 +20,21 @@ require (
)
require (
github.com/deckarep/golang-set/v2 v2.3.1 // indirect
github.com/kr/pretty v0.2.1 // indirect
github.com/kr/text v0.1.0 // indirect
)
require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/dsnet/compress v0.0.1
github.com/gabriel-vasile/mimetype v1.4.3
github.com/klauspost/compress v1.17.2 // indirect
github.com/klauspost/compress v1.17.2
github.com/mattn/go-runewidth v0.0.15 // indirect
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/rivo/uniseg v0.4.4 // indirect
github.com/shopspring/decimal v1.3.1 // indirect
github.com/ulikunitz/xz v0.5.11
github.com/yuin/goldmark v1.4.13 // indirect
golang.org/x/mod v0.13.0 // indirect
golang.org/x/net v0.17.0 // indirect
golang.org/x/sys v0.13.0 // indirect
golang.org/x/term v0.13.0 // indirect
golang.org/x/tools v0.14.0 // indirect
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c // indirect
)

27
go.sum
View File

@ -1,5 +1,3 @@
github.com/DavidGamba/go-getoptions v0.27.0 h1:hldKJSwO9SwvR+z9pe6ojhEcYECrRiO/bar9B7MnBKA=
github.com/DavidGamba/go-getoptions v0.27.0/go.mod h1:qLaLSYeQ8sUVOfKuu5JT5qKKS3OCwyhkYSJnoG+ggmo=
github.com/DavidGamba/go-getoptions v0.28.0 h1:18wgEvfZdrlfIhVDGEBO3Dl0fkOyXqXLa0tLMCKxM1c=
github.com/DavidGamba/go-getoptions v0.28.0/go.mod h1:zE97E3PR9P3BI/HKyNYgdMlYxodcuiC6W68KIgeYT84=
github.com/PaesslerAG/gval v1.2.2 h1:Y7iBzhgE09IGTt5QgGQ2IdaYYYOU134YGHBThD+wm9E=
@ -15,21 +13,15 @@ github.com/daichi-m/go18ds v1.12.1/go.mod h1:wc2dURUr8aMxxC4Mn5ObJGVM7uIKU8JagY4
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/deckarep/golang-set/v2 v2.3.1 h1:vjmkvJt/IV27WXPyYQpAh4bRyWJc5Y435D17XQ9QU5A=
github.com/deckarep/golang-set/v2 v2.3.1/go.mod h1:VAky9rY/yGXJOLEDv3OMci+7wtDpOF4IN+y82NBOac4=
github.com/dsnet/compress v0.0.1 h1:PlZu0n3Tuv04TzpfPbrnI0HW/YwodEXDS+oPKahKF0Q=
github.com/dsnet/compress v0.0.1/go.mod h1:Aw8dCMJ7RioblQeTqt88akK31OvO8Dhf5JflhBbQEHo=
github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU=
github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA=
github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk=
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw=
github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I=
github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
github.com/klauspost/compress v1.17.2 h1:RlWWUY/Dr4fL8qk9YG7DTZ7PDgME2V4csBXA8L/ixi4=
github.com/klauspost/compress v1.17.2/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
@ -68,39 +60,20 @@ github.com/tevino/abool/v2 v2.1.0/go.mod h1:+Lmlqk6bHDWHqN1cbxqhwEAwMPXgc8I1SDEa
github.com/ulikunitz/xz v0.5.6/go.mod h1:2bypXElzHzzJZwzH67Y6wb67pO62Rzfn7BSiF4ABRW8=
github.com/ulikunitz/xz v0.5.11 h1:kpFauv27b6ynzBNT/Xy+1k+fK4WswhN/6PN5WhFAGw8=
github.com/ulikunitz/xz v0.5.11/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
github.com/yuin/goldmark v1.4.13 h1:fVcFKWvrslecOb/tg+Cc05dkeYx540o0FuFt3nUVDoE=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63 h1:m64FZMko/V45gv0bNmrNYoDEq8U5YUhetc9cBWKS1TQ=
golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63/go.mod h1:0v4NqG35kSWCMzLaMeX+IQrlSnVE/bqGSyC2cz/9Le8=
golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI=
golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo=
golang.org/x/mod v0.12.0 h1:rmsUpXtvNzj340zd98LZ4KntptpfRHwpFOHG188oHXc=
golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/mod v0.13.0 h1:I/DsJXRlw/8l/0c24sM9yb0T4z9liZTduXvdAWYiysY=
golang.org/x/mod v0.13.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/net v0.14.0 h1:BONx9s002vGdD9umnlX1Po8vOZmrgH34qlHcD1MfK14=
golang.org/x/net v0.14.0/go.mod h1:PpSgVXXLK0OxS0F31C1/tv6XNguvCrnXIDrFMspZIUI=
golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.11.0 h1:eG7RXZHdqOJ1i+0lgLgCpSXAp6M3LYlAo6osgSi0xOM=
golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE=
golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U=
golang.org/x/term v0.11.0 h1:F9tnn/DA/Im8nCwm+fX+1/eBwi4qFjRT++MhtVC4ZX0=
golang.org/x/term v0.11.0/go.mod h1:zC9APTIj3jG3FdV/Ons+XE1riIZXG4aZ4GTHiPZJPIU=
golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek=
golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U=
golang.org/x/tools v0.12.1-0.20230815132531-74c255bcf846 h1:Vve/L0v7CXXuxUmaMGIEK/dEeq7uiqb5qBgQrZzIE7E=
golang.org/x/tools v0.12.1-0.20230815132531-74c255bcf846/go.mod h1:Sc0INKfu04TlqNoRA1hgpFZbhYXHPr4V5DzpSBTPqQM=
golang.org/x/tools v0.14.0 h1:jvNa2pY0M4r62jkRQ6RwEZZyPcymeL9XZMLBbV7U2nc=
golang.org/x/tools v0.14.0/go.mod h1:uYBEerGOWcJyEORxN+Ek8+TT266gXkNlHdJBwexUsBg=
gonum.org/v1/gonum v0.14.0 h1:2NiG67LD1tEH0D7kM+ps2V+fXmsAnpUeec7n8tcr4S0=
gonum.org/v1/gonum v0.14.0/go.mod h1:AoWeoz0becf9QMWtE8iWXNXc27fK4fNeHNf/oMejGfU=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=

View File

@ -111,6 +111,28 @@ func NewBioSequence(id string,
return bs
}
// NewBioSequenceWithQualities creates a new BioSequence object with the given id, sequence, definition, and qualities.
//
// Parameters:
// - id: the id of the BioSequence.
// - sequence: the sequence data of the BioSequence.
// - definition: the definition of the BioSequence.
// - qualities: the qualities data of the BioSequence.
//
// Returns:
// - *BioSequence: a pointer to the newly created BioSequence object.
func NewBioSequenceWithQualities(id string,
sequence []byte,
definition string,
qualities []byte) *BioSequence {
bs := NewEmptyBioSequence(0)
bs.SetId(id)
bs.SetSequence(sequence)
bs.SetDefinition(definition)
bs.SetQualities(qualities)
return bs
}
// Recycle recycles the BioSequence object.
//
// It decreases the count of in-memory sequences and increases the count of recycled sequences.

View File

@ -94,6 +94,42 @@ func TestNewBioSequence(t *testing.T) {
}
}
// TestNewBioSequenceWithQualities tests the NewBioSequenceWithQualities function.
//
// It tests that the BioSequence object is created with the correct id, sequence,
// definition, and qualities.
// Parameters:
// - t: A pointer to a testing.T object.
// Return type: None.
func TestNewBioSequenceWithQualities(t *testing.T) {
id := "123"
sequence := []byte("ATGC")
definition := "DNA sequence"
qualities := []byte("1234")
bs := NewBioSequenceWithQualities(id, sequence, definition, qualities)
// Test that the BioSequence object is created with the correct id
if bs.Id() != id {
t.Errorf("Expected id to be %s, but got %s", id, bs.Id())
}
// Test that the BioSequence object is created with the correct sequence
if string(bs.Sequence()) != string(sequence) {
t.Errorf("Expected sequence to be %s, but got %s", string(sequence), string(bs.Sequence()))
}
// Test that the BioSequence object is created with the correct definition
if bs.Definition() != definition {
t.Errorf("Expected definition to be %s, but got %s", definition, bs.Definition())
}
// Test that the BioSequence object is created with the correct qualities
if string(bs.Qualities()) != string(qualities) {
t.Errorf("Expected qualities to be %s, but got %s", string(qualities), string(bs.Qualities()))
}
}
// TestBioSequence_Recycle tests the Recycle method of the BioSequence struct.
//
// Test case 1: Recycle a BioSequence object with non-nil slices and annotations.

View File

@ -3,18 +3,44 @@ package obiseq
// ".ABCDEFGHIJKLMNOPQRSTUVWXYZ#![]"
var _revcmpDNA = []byte(".TVGHNNCDNNMNKNNNNYSAABWNRN]N[NNN")
func complement(n byte) byte {
// nucComplement returns the complement of a nucleotide.
//
// It takes a byte as input and returns the complement of the nucleotide.
// The function handles various cases:
// - If the input is '.' or '-', it returns the same character.
// - If the input is '[', it returns ']'.
// - If the input is ']', it returns '['.
// - If the input is a letter from 'A' to 'z', it returns the complement of the nucleotide.
// The complement is calculated using the _revcmpDNA lookup table.
// - If none of the above cases match, it returns 'n'.
func nucComplement(n byte) byte {
switch {
case n == '.' || n == '-':
return n
case n == '[':
return ']'
case n == ']':
return '['
case (n >= 'A' && n <= 'z'):
return _revcmpDNA[n&31] | (n & 0x20)
return _revcmpDNA[n&31] | 0x20
}
return 'n'
}
// Reverse complements a DNA sequence.
// If the inplace parametter is true, that operation is done in place.
// ReverseComplement reverses and complements a BioSequence.
//
// If `inplace` is `false`, a new copy of the BioSequence is created before
// performing the reverse complement. If `inplace` is `true`, the reverse
// complement is performed directly on the original BioSequence.
//
// The function first reverses the sequence by swapping the characters from the
// beginning and end of the sequence. Then, it complements each character in the
// sequence by finding its complement using the `nucComplement` function.
//
// If the BioSequence has qualities, the function also reverse the qualities in
// the same way as the sequence.
//
// The function returns the reverse complemented BioSequence.
func (sequence *BioSequence) ReverseComplement(inplace bool) *BioSequence {
if !inplace {
@ -28,7 +54,7 @@ func (sequence *BioSequence) ReverseComplement(inplace bool) *BioSequence {
// ASCII code & 31 -> builds an index in witch (a|A) is 1
// ASCII code & 0x20 -> Foce lower case
s[j], s[i] = complement(s[i]), complement(s[j])
s[j], s[i] = nucComplement(s[i]), nucComplement(s[j])
j++
}
@ -49,7 +75,7 @@ func (sequence *BioSequence) _revcmpMutation() *BioSequence {
b := []byte(m)
// Echange and reverse complement symboles
b[1], b[9] = complement(b[9]), complement(b[1])
b[1], b[9] = nucComplement(b[9]), nucComplement(b[1])
// Exchange sequencing scores
b[3], b[4], b[11], b[12] = b[11], b[12], b[3], b[4]

View File

@ -0,0 +1,93 @@
package obiseq
import (
"reflect"
"testing"
)
// TestComplement is a test function that tests the complement function.
//
// It tests the complement function by providing a set of input bytes and their
// expected output bytes. It verifies that the complement function correctly
// computes the complement of each input byte.
//
// Parameters:
// - t: *testing.T - the testing object for running test cases and reporting
// failures.
//
// Returns: None.
func TestNucComplement(t *testing.T) {
tests := []struct {
input byte
want byte
}{
{'.', '.'},
{'-', '-'},
{'a', 't'},
{'G', 'c'},
{'T', 'a'},
{'C', 'g'},
{'n', 'n'},
{'[', ']'},
{']', '['},
}
for _, tc := range tests {
got := nucComplement(tc.input)
if got != tc.want {
t.Errorf("complement(%c) = %c, want %c", tc.input, got, tc.want)
}
}
}
// TestReverseComplement is a test function for the ReverseComplement method.
//
// It tests the behavior of the ReverseComplement method under different scenarios.
// The function checks if the ReverseComplement method returns the expected result
// when the 'inplace' parameter is set to false or true. It also verifies if the
// ReverseComplement method correctly handles BioSequences with qualities.
// The function uses the NewBioSequence and NewBioSequenceWithQualities functions
// to create BioSequence objects with different sequences and qualities.
// It compares the result of the ReverseComplement method with the expected result
// and reports an error if they are not equal. Additionally, it compares the
// qualities of the result BioSequence with the expected qualities and reports
// an error if they are not equal.
func TestReverseComplement(t *testing.T) {
// Test when inplace is false
seq := NewBioSequence("123", []byte("ATCG"), "")
expected := NewBioSequence("123", []byte("CGAT"), "")
result := seq.ReverseComplement(false)
if result.String() != expected.String() {
t.Errorf("Expected %v, but got %v", expected, result)
}
// Test when inplace is true
seq = NewBioSequence("123", []byte("ATCG"), "")
expected = NewBioSequence("123", []byte("CGAT"), "")
result = seq.ReverseComplement(true)
if result.String() != expected.String() {
t.Errorf("Expected %v, but got %v", expected, result)
}
// Test when BioSequence has qualities
seq = NewBioSequenceWithQualities("123", []byte("ATCG"), "", []byte{30, 20, 10, 40})
expected = NewBioSequenceWithQualities("123", []byte("CGAT"), "", []byte{40, 10, 20, 30})
result = seq.ReverseComplement(false)
if result.String() != expected.String() {
t.Errorf("Expected %v, but got %v", expected, result)
}
if !reflect.DeepEqual(result.Qualities(), expected.Qualities()) {
t.Errorf("Expected %v, but got %v", expected.Qualities(), result.Qualities())
}
// Test when BioSequence has qualities and inplace is true
seq = NewBioSequenceWithQualities("123", []byte("ATCG"), "", []byte{30, 20, 10, 40})
expected = NewBioSequenceWithQualities("123", []byte("CGAT"), "", []byte{40, 10, 20, 30})
result = seq.ReverseComplement(true)
if result.String() != expected.String() {
t.Errorf("Expected %v, but got %v", expected, result)
}
if !reflect.DeepEqual(result.Qualities(), expected.Qualities()) {
t.Errorf("Expected %v, but got %v", expected.Qualities(), result.Qualities())
}
}

View File

@ -28,7 +28,6 @@ func (sequence *BioSequence) Subsequence(from, to int, circular bool) (*BioSeque
if sequence.HasQualities() {
newSeq.qualities = CopySlice(sequence.Qualities()[from:to])
newSeq.WriteQualities(sequence.Qualities()[from:to])
}
newSeq.id = fmt.Sprintf("%s_sub[%d..%d]", sequence.Id(), from+1, to)