From dedf125f6e1a5c373cdc1ab858a50a090ee6e602 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Wed, 8 Nov 2023 10:16:34 +0200 Subject: [PATCH] Patch a bug in Subsequence and adds few unit tests. Former-commit-id: caddc9ad6523e4ef02899bfe83cc8681ef674383 --- Release-notes.md | 3 +- go.mod | 10 +--- go.sum | 27 ---------- pkg/obiseq/biosequence.go | 22 ++++++++ pkg/obiseq/biosequence_test.go | 36 +++++++++++++ pkg/obiseq/revcomp.go | 38 +++++++++++--- pkg/obiseq/revcomp_test.go | 93 ++++++++++++++++++++++++++++++++++ pkg/obiseq/subseq.go | 1 - 8 files changed, 186 insertions(+), 44 deletions(-) create mode 100644 pkg/obiseq/revcomp_test.go diff --git a/Release-notes.md b/Release-notes.md index e6495dd..e1b15e8 100644 --- a/Release-notes.md +++ b/Release-notes.md @@ -33,7 +33,8 @@ + `.` and `-` characters are returned without change + `[` is complemented to `]` and oppositely + all other characters are complemented as `n` - +- Correction of a bug is the `Subsequence` method of the `BioSequence` class, duplicating the quality values. + This made `obimultiplex` to produce fastq files with sequences having quality values duplicated. ### Becareful diff --git a/go.mod b/go.mod index 4e1a4ed..7e66a37 100644 --- a/go.mod +++ b/go.mod @@ -13,7 +13,6 @@ require ( github.com/rrethy/ahocorasick v1.0.0 github.com/schollz/progressbar/v3 v3.13.1 github.com/sirupsen/logrus v1.9.3 - github.com/stretchr/testify v1.7.0 github.com/tevino/abool/v2 v2.1.0 golang.org/x/exp v0.0.0-20231006140011-7918f672742d gonum.org/v1/gonum v0.14.0 @@ -21,28 +20,21 @@ require ( ) require ( - github.com/deckarep/golang-set/v2 v2.3.1 // indirect github.com/kr/pretty v0.2.1 // indirect github.com/kr/text v0.1.0 // indirect ) require ( - github.com/davecgh/go-spew v1.1.1 // indirect github.com/dsnet/compress v0.0.1 github.com/gabriel-vasile/mimetype v1.4.3 - github.com/klauspost/compress v1.17.2 // indirect + github.com/klauspost/compress v1.17.2 github.com/mattn/go-runewidth v0.0.15 // indirect github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect - github.com/pmezard/go-difflib v1.0.0 // indirect github.com/rivo/uniseg v0.4.4 // indirect github.com/shopspring/decimal v1.3.1 // indirect github.com/ulikunitz/xz v0.5.11 - github.com/yuin/goldmark v1.4.13 // indirect - golang.org/x/mod v0.13.0 // indirect golang.org/x/net v0.17.0 // indirect golang.org/x/sys v0.13.0 // indirect golang.org/x/term v0.13.0 // indirect - golang.org/x/tools v0.14.0 // indirect gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c - gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c // indirect ) diff --git a/go.sum b/go.sum index c7d4b51..cbd68ff 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,3 @@ -github.com/DavidGamba/go-getoptions v0.27.0 h1:hldKJSwO9SwvR+z9pe6ojhEcYECrRiO/bar9B7MnBKA= -github.com/DavidGamba/go-getoptions v0.27.0/go.mod h1:qLaLSYeQ8sUVOfKuu5JT5qKKS3OCwyhkYSJnoG+ggmo= github.com/DavidGamba/go-getoptions v0.28.0 h1:18wgEvfZdrlfIhVDGEBO3Dl0fkOyXqXLa0tLMCKxM1c= github.com/DavidGamba/go-getoptions v0.28.0/go.mod h1:zE97E3PR9P3BI/HKyNYgdMlYxodcuiC6W68KIgeYT84= github.com/PaesslerAG/gval v1.2.2 h1:Y7iBzhgE09IGTt5QgGQ2IdaYYYOU134YGHBThD+wm9E= @@ -15,21 +13,15 @@ github.com/daichi-m/go18ds v1.12.1/go.mod h1:wc2dURUr8aMxxC4Mn5ObJGVM7uIKU8JagY4 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/deckarep/golang-set/v2 v2.3.1 h1:vjmkvJt/IV27WXPyYQpAh4bRyWJc5Y435D17XQ9QU5A= -github.com/deckarep/golang-set/v2 v2.3.1/go.mod h1:VAky9rY/yGXJOLEDv3OMci+7wtDpOF4IN+y82NBOac4= github.com/dsnet/compress v0.0.1 h1:PlZu0n3Tuv04TzpfPbrnI0HW/YwodEXDS+oPKahKF0Q= github.com/dsnet/compress v0.0.1/go.mod h1:Aw8dCMJ7RioblQeTqt88akK31OvO8Dhf5JflhBbQEHo= github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY= -github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU= -github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA= github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0= github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk= github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw= github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= -github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I= -github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= github.com/klauspost/compress v1.17.2 h1:RlWWUY/Dr4fL8qk9YG7DTZ7PDgME2V4csBXA8L/ixi4= github.com/klauspost/compress v1.17.2/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= @@ -68,39 +60,20 @@ github.com/tevino/abool/v2 v2.1.0/go.mod h1:+Lmlqk6bHDWHqN1cbxqhwEAwMPXgc8I1SDEa github.com/ulikunitz/xz v0.5.6/go.mod h1:2bypXElzHzzJZwzH67Y6wb67pO62Rzfn7BSiF4ABRW8= github.com/ulikunitz/xz v0.5.11 h1:kpFauv27b6ynzBNT/Xy+1k+fK4WswhN/6PN5WhFAGw8= github.com/ulikunitz/xz v0.5.11/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= -github.com/yuin/goldmark v1.4.13 h1:fVcFKWvrslecOb/tg+Cc05dkeYx540o0FuFt3nUVDoE= -github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= -golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63 h1:m64FZMko/V45gv0bNmrNYoDEq8U5YUhetc9cBWKS1TQ= -golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63/go.mod h1:0v4NqG35kSWCMzLaMeX+IQrlSnVE/bqGSyC2cz/9Le8= golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI= golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo= -golang.org/x/mod v0.12.0 h1:rmsUpXtvNzj340zd98LZ4KntptpfRHwpFOHG188oHXc= -golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= -golang.org/x/mod v0.13.0 h1:I/DsJXRlw/8l/0c24sM9yb0T4z9liZTduXvdAWYiysY= -golang.org/x/mod v0.13.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= -golang.org/x/net v0.14.0 h1:BONx9s002vGdD9umnlX1Po8vOZmrgH34qlHcD1MfK14= -golang.org/x/net v0.14.0/go.mod h1:PpSgVXXLK0OxS0F31C1/tv6XNguvCrnXIDrFMspZIUI= golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM= golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.11.0 h1:eG7RXZHdqOJ1i+0lgLgCpSXAp6M3LYlAo6osgSi0xOM= -golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE= golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U= -golang.org/x/term v0.11.0 h1:F9tnn/DA/Im8nCwm+fX+1/eBwi4qFjRT++MhtVC4ZX0= -golang.org/x/term v0.11.0/go.mod h1:zC9APTIj3jG3FdV/Ons+XE1riIZXG4aZ4GTHiPZJPIU= golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek= golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U= -golang.org/x/tools v0.12.1-0.20230815132531-74c255bcf846 h1:Vve/L0v7CXXuxUmaMGIEK/dEeq7uiqb5qBgQrZzIE7E= -golang.org/x/tools v0.12.1-0.20230815132531-74c255bcf846/go.mod h1:Sc0INKfu04TlqNoRA1hgpFZbhYXHPr4V5DzpSBTPqQM= -golang.org/x/tools v0.14.0 h1:jvNa2pY0M4r62jkRQ6RwEZZyPcymeL9XZMLBbV7U2nc= -golang.org/x/tools v0.14.0/go.mod h1:uYBEerGOWcJyEORxN+Ek8+TT266gXkNlHdJBwexUsBg= gonum.org/v1/gonum v0.14.0 h1:2NiG67LD1tEH0D7kM+ps2V+fXmsAnpUeec7n8tcr4S0= gonum.org/v1/gonum v0.14.0/go.mod h1:AoWeoz0becf9QMWtE8iWXNXc27fK4fNeHNf/oMejGfU= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= diff --git a/pkg/obiseq/biosequence.go b/pkg/obiseq/biosequence.go index c89d8d9..871036e 100644 --- a/pkg/obiseq/biosequence.go +++ b/pkg/obiseq/biosequence.go @@ -111,6 +111,28 @@ func NewBioSequence(id string, return bs } +// NewBioSequenceWithQualities creates a new BioSequence object with the given id, sequence, definition, and qualities. +// +// Parameters: +// - id: the id of the BioSequence. +// - sequence: the sequence data of the BioSequence. +// - definition: the definition of the BioSequence. +// - qualities: the qualities data of the BioSequence. +// +// Returns: +// - *BioSequence: a pointer to the newly created BioSequence object. +func NewBioSequenceWithQualities(id string, + sequence []byte, + definition string, + qualities []byte) *BioSequence { + bs := NewEmptyBioSequence(0) + bs.SetId(id) + bs.SetSequence(sequence) + bs.SetDefinition(definition) + bs.SetQualities(qualities) + return bs +} + // Recycle recycles the BioSequence object. // // It decreases the count of in-memory sequences and increases the count of recycled sequences. diff --git a/pkg/obiseq/biosequence_test.go b/pkg/obiseq/biosequence_test.go index c9ce5f1..400a92a 100644 --- a/pkg/obiseq/biosequence_test.go +++ b/pkg/obiseq/biosequence_test.go @@ -94,6 +94,42 @@ func TestNewBioSequence(t *testing.T) { } } +// TestNewBioSequenceWithQualities tests the NewBioSequenceWithQualities function. +// +// It tests that the BioSequence object is created with the correct id, sequence, +// definition, and qualities. +// Parameters: +// - t: A pointer to a testing.T object. +// Return type: None. +func TestNewBioSequenceWithQualities(t *testing.T) { + id := "123" + sequence := []byte("ATGC") + definition := "DNA sequence" + qualities := []byte("1234") + + bs := NewBioSequenceWithQualities(id, sequence, definition, qualities) + + // Test that the BioSequence object is created with the correct id + if bs.Id() != id { + t.Errorf("Expected id to be %s, but got %s", id, bs.Id()) + } + + // Test that the BioSequence object is created with the correct sequence + if string(bs.Sequence()) != string(sequence) { + t.Errorf("Expected sequence to be %s, but got %s", string(sequence), string(bs.Sequence())) + } + + // Test that the BioSequence object is created with the correct definition + if bs.Definition() != definition { + t.Errorf("Expected definition to be %s, but got %s", definition, bs.Definition()) + } + + // Test that the BioSequence object is created with the correct qualities + if string(bs.Qualities()) != string(qualities) { + t.Errorf("Expected qualities to be %s, but got %s", string(qualities), string(bs.Qualities())) + } +} + // TestBioSequence_Recycle tests the Recycle method of the BioSequence struct. // // Test case 1: Recycle a BioSequence object with non-nil slices and annotations. diff --git a/pkg/obiseq/revcomp.go b/pkg/obiseq/revcomp.go index 9ff0ca0..35b9e79 100644 --- a/pkg/obiseq/revcomp.go +++ b/pkg/obiseq/revcomp.go @@ -3,18 +3,44 @@ package obiseq // ".ABCDEFGHIJKLMNOPQRSTUVWXYZ#![]" var _revcmpDNA = []byte(".TVGHNNCDNNMNKNNNNYSAABWNRN]N[NNN") -func complement(n byte) byte { +// nucComplement returns the complement of a nucleotide. +// +// It takes a byte as input and returns the complement of the nucleotide. +// The function handles various cases: +// - If the input is '.' or '-', it returns the same character. +// - If the input is '[', it returns ']'. +// - If the input is ']', it returns '['. +// - If the input is a letter from 'A' to 'z', it returns the complement of the nucleotide. +// The complement is calculated using the _revcmpDNA lookup table. +// - If none of the above cases match, it returns 'n'. +func nucComplement(n byte) byte { switch { case n == '.' || n == '-': return n + case n == '[': + return ']' + case n == ']': + return '[' case (n >= 'A' && n <= 'z'): - return _revcmpDNA[n&31] | (n & 0x20) + return _revcmpDNA[n&31] | 0x20 } return 'n' } -// Reverse complements a DNA sequence. -// If the inplace parametter is true, that operation is done in place. +// ReverseComplement reverses and complements a BioSequence. +// +// If `inplace` is `false`, a new copy of the BioSequence is created before +// performing the reverse complement. If `inplace` is `true`, the reverse +// complement is performed directly on the original BioSequence. +// +// The function first reverses the sequence by swapping the characters from the +// beginning and end of the sequence. Then, it complements each character in the +// sequence by finding its complement using the `nucComplement` function. +// +// If the BioSequence has qualities, the function also reverse the qualities in +// the same way as the sequence. +// +// The function returns the reverse complemented BioSequence. func (sequence *BioSequence) ReverseComplement(inplace bool) *BioSequence { if !inplace { @@ -28,7 +54,7 @@ func (sequence *BioSequence) ReverseComplement(inplace bool) *BioSequence { // ASCII code & 31 -> builds an index in witch (a|A) is 1 // ASCII code & 0x20 -> Foce lower case - s[j], s[i] = complement(s[i]), complement(s[j]) + s[j], s[i] = nucComplement(s[i]), nucComplement(s[j]) j++ } @@ -49,7 +75,7 @@ func (sequence *BioSequence) _revcmpMutation() *BioSequence { b := []byte(m) // Echange and reverse complement symboles - b[1], b[9] = complement(b[9]), complement(b[1]) + b[1], b[9] = nucComplement(b[9]), nucComplement(b[1]) // Exchange sequencing scores b[3], b[4], b[11], b[12] = b[11], b[12], b[3], b[4] diff --git a/pkg/obiseq/revcomp_test.go b/pkg/obiseq/revcomp_test.go new file mode 100644 index 0000000..174a7d6 --- /dev/null +++ b/pkg/obiseq/revcomp_test.go @@ -0,0 +1,93 @@ +package obiseq + +import ( + "reflect" + "testing" +) + +// TestComplement is a test function that tests the complement function. +// +// It tests the complement function by providing a set of input bytes and their +// expected output bytes. It verifies that the complement function correctly +// computes the complement of each input byte. +// +// Parameters: +// - t: *testing.T - the testing object for running test cases and reporting +// failures. +// +// Returns: None. +func TestNucComplement(t *testing.T) { + tests := []struct { + input byte + want byte + }{ + {'.', '.'}, + {'-', '-'}, + {'a', 't'}, + {'G', 'c'}, + {'T', 'a'}, + {'C', 'g'}, + {'n', 'n'}, + {'[', ']'}, + {']', '['}, + } + + for _, tc := range tests { + got := nucComplement(tc.input) + if got != tc.want { + t.Errorf("complement(%c) = %c, want %c", tc.input, got, tc.want) + } + } +} + +// TestReverseComplement is a test function for the ReverseComplement method. +// +// It tests the behavior of the ReverseComplement method under different scenarios. +// The function checks if the ReverseComplement method returns the expected result +// when the 'inplace' parameter is set to false or true. It also verifies if the +// ReverseComplement method correctly handles BioSequences with qualities. +// The function uses the NewBioSequence and NewBioSequenceWithQualities functions +// to create BioSequence objects with different sequences and qualities. +// It compares the result of the ReverseComplement method with the expected result +// and reports an error if they are not equal. Additionally, it compares the +// qualities of the result BioSequence with the expected qualities and reports +// an error if they are not equal. +func TestReverseComplement(t *testing.T) { + // Test when inplace is false + seq := NewBioSequence("123", []byte("ATCG"), "") + expected := NewBioSequence("123", []byte("CGAT"), "") + result := seq.ReverseComplement(false) + if result.String() != expected.String() { + t.Errorf("Expected %v, but got %v", expected, result) + } + + // Test when inplace is true + seq = NewBioSequence("123", []byte("ATCG"), "") + expected = NewBioSequence("123", []byte("CGAT"), "") + result = seq.ReverseComplement(true) + if result.String() != expected.String() { + t.Errorf("Expected %v, but got %v", expected, result) + } + + // Test when BioSequence has qualities + seq = NewBioSequenceWithQualities("123", []byte("ATCG"), "", []byte{30, 20, 10, 40}) + expected = NewBioSequenceWithQualities("123", []byte("CGAT"), "", []byte{40, 10, 20, 30}) + result = seq.ReverseComplement(false) + if result.String() != expected.String() { + t.Errorf("Expected %v, but got %v", expected, result) + } + if !reflect.DeepEqual(result.Qualities(), expected.Qualities()) { + t.Errorf("Expected %v, but got %v", expected.Qualities(), result.Qualities()) + } + + // Test when BioSequence has qualities and inplace is true + seq = NewBioSequenceWithQualities("123", []byte("ATCG"), "", []byte{30, 20, 10, 40}) + expected = NewBioSequenceWithQualities("123", []byte("CGAT"), "", []byte{40, 10, 20, 30}) + result = seq.ReverseComplement(true) + if result.String() != expected.String() { + t.Errorf("Expected %v, but got %v", expected, result) + } + if !reflect.DeepEqual(result.Qualities(), expected.Qualities()) { + t.Errorf("Expected %v, but got %v", expected.Qualities(), result.Qualities()) + } +} diff --git a/pkg/obiseq/subseq.go b/pkg/obiseq/subseq.go index e78047c..59c290d 100644 --- a/pkg/obiseq/subseq.go +++ b/pkg/obiseq/subseq.go @@ -28,7 +28,6 @@ func (sequence *BioSequence) Subsequence(from, to int, circular bool) (*BioSeque if sequence.HasQualities() { newSeq.qualities = CopySlice(sequence.Qualities()[from:to]) - newSeq.WriteQualities(sequence.Qualities()[from:to]) } newSeq.id = fmt.Sprintf("%s_sub[%d..%d]", sequence.Id(), from+1, to)