mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
A go implementation of the fasta reader
Former-commit-id: 603592c4761fb0722e9e0501d78de1bd3ba238fa
This commit is contained in:
13
go.mod
13
go.mod
@ -20,15 +20,28 @@ require (
|
|||||||
scientificgo.org/special v0.0.0
|
scientificgo.org/special v0.0.0
|
||||||
)
|
)
|
||||||
|
|
||||||
|
require (
|
||||||
|
github.com/kr/pretty v0.2.1 // indirect
|
||||||
|
github.com/kr/text v0.1.0 // indirect
|
||||||
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||||
|
github.com/dsnet/compress v0.0.1
|
||||||
|
github.com/gabriel-vasile/mimetype v1.4.2 // indirect
|
||||||
github.com/klauspost/compress v1.16.7 // indirect
|
github.com/klauspost/compress v1.16.7 // indirect
|
||||||
github.com/mattn/go-runewidth v0.0.15 // indirect
|
github.com/mattn/go-runewidth v0.0.15 // indirect
|
||||||
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
|
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
|
||||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||||
github.com/rivo/uniseg v0.4.4 // indirect
|
github.com/rivo/uniseg v0.4.4 // indirect
|
||||||
github.com/shopspring/decimal v1.3.1 // indirect
|
github.com/shopspring/decimal v1.3.1 // indirect
|
||||||
|
github.com/ulikunitz/xz v0.5.11
|
||||||
|
github.com/yuin/goldmark v1.4.13 // indirect
|
||||||
|
golang.org/x/mod v0.12.0 // indirect
|
||||||
|
golang.org/x/net v0.14.0 // indirect
|
||||||
golang.org/x/sys v0.11.0 // indirect
|
golang.org/x/sys v0.11.0 // indirect
|
||||||
golang.org/x/term v0.11.0 // indirect
|
golang.org/x/term v0.11.0 // indirect
|
||||||
|
golang.org/x/tools v0.12.1-0.20230815132531-74c255bcf846 // indirect
|
||||||
|
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c
|
||||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c // indirect
|
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c // indirect
|
||||||
)
|
)
|
||||||
|
25
go.sum
25
go.sum
@ -13,13 +13,25 @@ github.com/daichi-m/go18ds v1.12.1/go.mod h1:wc2dURUr8aMxxC4Mn5ObJGVM7uIKU8JagY4
|
|||||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
|
github.com/dsnet/compress v0.0.1 h1:PlZu0n3Tuv04TzpfPbrnI0HW/YwodEXDS+oPKahKF0Q=
|
||||||
|
github.com/dsnet/compress v0.0.1/go.mod h1:Aw8dCMJ7RioblQeTqt88akK31OvO8Dhf5JflhBbQEHo=
|
||||||
|
github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
|
||||||
|
github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU=
|
||||||
|
github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA=
|
||||||
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
|
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
|
||||||
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
|
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
|
||||||
github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw=
|
github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw=
|
||||||
|
github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
|
||||||
github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I=
|
github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I=
|
||||||
github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
|
github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
|
||||||
|
github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
|
||||||
github.com/klauspost/pgzip v1.2.6 h1:8RXeL5crjEUFnR2/Sn6GJNWtSQ3Dk8pq4CL3jvdDyjU=
|
github.com/klauspost/pgzip v1.2.6 h1:8RXeL5crjEUFnR2/Sn6GJNWtSQ3Dk8pq4CL3jvdDyjU=
|
||||||
github.com/klauspost/pgzip v1.2.6/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
|
github.com/klauspost/pgzip v1.2.6/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
|
||||||
|
github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI=
|
||||||
|
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
|
||||||
|
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
||||||
|
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
|
||||||
|
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
||||||
github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
|
github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
|
||||||
github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
|
github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
|
||||||
github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U=
|
github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U=
|
||||||
@ -45,8 +57,17 @@ github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5Cc
|
|||||||
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||||
github.com/tevino/abool/v2 v2.1.0 h1:7w+Vf9f/5gmKT4m4qkayb33/92M+Um45F2BkHOR+L/c=
|
github.com/tevino/abool/v2 v2.1.0 h1:7w+Vf9f/5gmKT4m4qkayb33/92M+Um45F2BkHOR+L/c=
|
||||||
github.com/tevino/abool/v2 v2.1.0/go.mod h1:+Lmlqk6bHDWHqN1cbxqhwEAwMPXgc8I1SDEamtseuXY=
|
github.com/tevino/abool/v2 v2.1.0/go.mod h1:+Lmlqk6bHDWHqN1cbxqhwEAwMPXgc8I1SDEamtseuXY=
|
||||||
|
github.com/ulikunitz/xz v0.5.6/go.mod h1:2bypXElzHzzJZwzH67Y6wb67pO62Rzfn7BSiF4ABRW8=
|
||||||
|
github.com/ulikunitz/xz v0.5.11 h1:kpFauv27b6ynzBNT/Xy+1k+fK4WswhN/6PN5WhFAGw8=
|
||||||
|
github.com/ulikunitz/xz v0.5.11/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
|
||||||
|
github.com/yuin/goldmark v1.4.13 h1:fVcFKWvrslecOb/tg+Cc05dkeYx540o0FuFt3nUVDoE=
|
||||||
|
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
|
||||||
golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63 h1:m64FZMko/V45gv0bNmrNYoDEq8U5YUhetc9cBWKS1TQ=
|
golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63 h1:m64FZMko/V45gv0bNmrNYoDEq8U5YUhetc9cBWKS1TQ=
|
||||||
golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63/go.mod h1:0v4NqG35kSWCMzLaMeX+IQrlSnVE/bqGSyC2cz/9Le8=
|
golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63/go.mod h1:0v4NqG35kSWCMzLaMeX+IQrlSnVE/bqGSyC2cz/9Le8=
|
||||||
|
golang.org/x/mod v0.12.0 h1:rmsUpXtvNzj340zd98LZ4KntptpfRHwpFOHG188oHXc=
|
||||||
|
golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
|
||||||
|
golang.org/x/net v0.14.0 h1:BONx9s002vGdD9umnlX1Po8vOZmrgH34qlHcD1MfK14=
|
||||||
|
golang.org/x/net v0.14.0/go.mod h1:PpSgVXXLK0OxS0F31C1/tv6XNguvCrnXIDrFMspZIUI=
|
||||||
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
@ -55,10 +76,14 @@ golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
|||||||
golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U=
|
golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U=
|
||||||
golang.org/x/term v0.11.0 h1:F9tnn/DA/Im8nCwm+fX+1/eBwi4qFjRT++MhtVC4ZX0=
|
golang.org/x/term v0.11.0 h1:F9tnn/DA/Im8nCwm+fX+1/eBwi4qFjRT++MhtVC4ZX0=
|
||||||
golang.org/x/term v0.11.0/go.mod h1:zC9APTIj3jG3FdV/Ons+XE1riIZXG4aZ4GTHiPZJPIU=
|
golang.org/x/term v0.11.0/go.mod h1:zC9APTIj3jG3FdV/Ons+XE1riIZXG4aZ4GTHiPZJPIU=
|
||||||
|
golang.org/x/tools v0.12.1-0.20230815132531-74c255bcf846 h1:Vve/L0v7CXXuxUmaMGIEK/dEeq7uiqb5qBgQrZzIE7E=
|
||||||
|
golang.org/x/tools v0.12.1-0.20230815132531-74c255bcf846/go.mod h1:Sc0INKfu04TlqNoRA1hgpFZbhYXHPr4V5DzpSBTPqQM=
|
||||||
gonum.org/v1/gonum v0.14.0 h1:2NiG67LD1tEH0D7kM+ps2V+fXmsAnpUeec7n8tcr4S0=
|
gonum.org/v1/gonum v0.14.0 h1:2NiG67LD1tEH0D7kM+ps2V+fXmsAnpUeec7n8tcr4S0=
|
||||||
gonum.org/v1/gonum v0.14.0/go.mod h1:AoWeoz0becf9QMWtE8iWXNXc27fK4fNeHNf/oMejGfU=
|
gonum.org/v1/gonum v0.14.0/go.mod h1:AoWeoz0becf9QMWtE8iWXNXc27fK4fNeHNf/oMejGfU=
|
||||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
|
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
|
||||||
|
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
|
||||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
|
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
|
||||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
scientificgo.org/special v0.0.0 h1:P6WJkECo6tgtvZAEfNXl+KEB9ReAatjKAeX8U07mjSc=
|
scientificgo.org/special v0.0.0 h1:P6WJkECo6tgtvZAEfNXl+KEB9ReAatjKAeX8U07mjSc=
|
||||||
|
@ -3,13 +3,14 @@ package obiformats
|
|||||||
import (
|
import (
|
||||||
"encoding/csv"
|
"encoding/csv"
|
||||||
"fmt"
|
"fmt"
|
||||||
gzip "github.com/klauspost/pgzip"
|
|
||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"path"
|
"path"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
gzip "github.com/klauspost/pgzip"
|
||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||||
@ -209,7 +210,7 @@ func ReadEcoPCR(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
|
|||||||
}()
|
}()
|
||||||
|
|
||||||
if opt.pointer.full_file_batch {
|
if opt.pointer.full_file_batch {
|
||||||
newIter = newIter.FullFileIterator()
|
newIter = newIter.CompleteFileIterator()
|
||||||
}
|
}
|
||||||
|
|
||||||
return newIter
|
return newIter
|
||||||
|
@ -266,7 +266,7 @@ func ReadEMBL(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
|
|||||||
go _ReadFlatFileChunk(reader, entry_channel)
|
go _ReadFlatFileChunk(reader, entry_channel)
|
||||||
|
|
||||||
if opt.pointer.full_file_batch {
|
if opt.pointer.full_file_batch {
|
||||||
newIter = newIter.FullFileIterator()
|
newIter = newIter.CompleteFileIterator()
|
||||||
}
|
}
|
||||||
|
|
||||||
return newIter
|
return newIter
|
||||||
|
322
pkg/obiformats/fastaseq_read.go
Normal file
322
pkg/obiformats/fastaseq_read.go
Normal file
@ -0,0 +1,322 @@
|
|||||||
|
package obiformats
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"path"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils"
|
||||||
|
"golang.org/x/exp/slices"
|
||||||
|
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
)
|
||||||
|
|
||||||
|
// lastSequenceCut extracts the up to the last sequence cut from a given buffer.
|
||||||
|
//
|
||||||
|
// It takes a parameter:
|
||||||
|
// - buffer []byte: the buffer to extract the sequence cut from.
|
||||||
|
//
|
||||||
|
// It returns two values:
|
||||||
|
// - []byte: the extracted sequences.
|
||||||
|
// - []byte: the remaining buffer after the sequence cut (the last sequence).
|
||||||
|
func lastSequenceCut(buffer []byte) ([]byte, []byte) {
|
||||||
|
imax := len(buffer)
|
||||||
|
last := 0
|
||||||
|
state := 0
|
||||||
|
for i := imax - 1; i >= 0 && state < 2; i-- {
|
||||||
|
if state == 0 && buffer[i] == '>' {
|
||||||
|
state = 1
|
||||||
|
last = i
|
||||||
|
} else if state == 1 && (buffer[i] == '\r' || buffer[i] == '\n') {
|
||||||
|
state = 2
|
||||||
|
} else {
|
||||||
|
state = 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if state == 2 {
|
||||||
|
return buffer[:last], bytes.Clone(buffer[last:])
|
||||||
|
}
|
||||||
|
return []byte{}, buffer
|
||||||
|
}
|
||||||
|
|
||||||
|
// firstSequenceCut cuts the input buffer at the first occurrence of a ">" character
|
||||||
|
// following a sequence of "\r" or "\n" characters.
|
||||||
|
//
|
||||||
|
// It takes a byte slice as input, representing the buffer to be cut.
|
||||||
|
// It returns two byte slices: the first slice contains the part of the buffer before the cut,
|
||||||
|
// and the second slice contains the part of the buffer after the cut.
|
||||||
|
func firstSequenceCut(buffer []byte) ([]byte, []byte) {
|
||||||
|
imax := len(buffer)
|
||||||
|
last := 0
|
||||||
|
state := 0
|
||||||
|
for i := 0; i < imax && state < 2; i++ {
|
||||||
|
if (state == 0 || state == 1) && (buffer[i] == '\r' || buffer[i] == '\n') {
|
||||||
|
state = 1
|
||||||
|
} else if (state == 1 || i == 0) && buffer[i] == '>' {
|
||||||
|
state = 2
|
||||||
|
last = i
|
||||||
|
} else {
|
||||||
|
state = 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if state == 2 {
|
||||||
|
return bytes.Clone(buffer[:last]), buffer[last:]
|
||||||
|
}
|
||||||
|
return buffer, []byte{}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func fullSequenceCut(buffer []byte) ([]byte, []byte, []byte) {
|
||||||
|
before, buffer := firstSequenceCut(buffer)
|
||||||
|
|
||||||
|
if len(buffer) == 0 {
|
||||||
|
return before, []byte{}, []byte{}
|
||||||
|
}
|
||||||
|
|
||||||
|
buffer, after := lastSequenceCut(buffer)
|
||||||
|
return before, buffer, after
|
||||||
|
}
|
||||||
|
|
||||||
|
func Concatenate[S ~[]E, E any](s1, s2 S) S {
|
||||||
|
if len(s1) > 0 {
|
||||||
|
if len(s2) > 0 {
|
||||||
|
return append(s1[:len(s1):len(s1)], s2...)
|
||||||
|
}
|
||||||
|
return s1
|
||||||
|
}
|
||||||
|
return s2
|
||||||
|
}
|
||||||
|
|
||||||
|
type FastxChunk struct {
|
||||||
|
Bytes []byte
|
||||||
|
index int
|
||||||
|
}
|
||||||
|
|
||||||
|
func FastaChunkReader(r io.Reader, size int, cutHead bool) (chan FastxChunk, error) {
|
||||||
|
out := make(chan FastxChunk)
|
||||||
|
buff := make([]byte, size)
|
||||||
|
|
||||||
|
n, err := r.Read(buff)
|
||||||
|
if n > 0 && err == nil {
|
||||||
|
if n < size {
|
||||||
|
buff = buff[:n]
|
||||||
|
}
|
||||||
|
|
||||||
|
begin, buff := firstSequenceCut(buff)
|
||||||
|
|
||||||
|
if len(begin) > 0 && !cutHead {
|
||||||
|
return out, fmt.Errorf("begin is not empty : %s", string(begin))
|
||||||
|
}
|
||||||
|
|
||||||
|
go func(buff []byte) {
|
||||||
|
idx := 0
|
||||||
|
end := []byte{}
|
||||||
|
|
||||||
|
for err == nil && n > 0 {
|
||||||
|
// fmt.Println("============end=========================")
|
||||||
|
// fmt.Println(string(end))
|
||||||
|
// fmt.Println("------------buff------------------------")
|
||||||
|
// fmt.Println(string(buff))
|
||||||
|
buff = Concatenate(end, buff)
|
||||||
|
// fmt.Println("------------buff--pasted----------------")
|
||||||
|
// fmt.Println(string(buff))
|
||||||
|
buff, end = lastSequenceCut(buff)
|
||||||
|
// fmt.Println("----------------buff--cutted------------")
|
||||||
|
// fmt.Println(string(buff))
|
||||||
|
// fmt.Println("------------------end-------------------")
|
||||||
|
// fmt.Println(string(end))
|
||||||
|
// fmt.Println("========================================")
|
||||||
|
if len(buff) > 0 {
|
||||||
|
out <- FastxChunk{
|
||||||
|
Bytes: bytes.Clone(buff),
|
||||||
|
index: idx,
|
||||||
|
}
|
||||||
|
idx++
|
||||||
|
}
|
||||||
|
|
||||||
|
buff = slices.Grow(buff[:0], size)[0:size]
|
||||||
|
n, err = r.Read(buff)
|
||||||
|
if n < size {
|
||||||
|
buff = buff[:n]
|
||||||
|
}
|
||||||
|
// fmt.Printf("n = %d, err = %v\n", n, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(end) > 0 {
|
||||||
|
out <- FastxChunk{
|
||||||
|
Bytes: bytes.Clone(end),
|
||||||
|
index: idx,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
close(out)
|
||||||
|
}(buff)
|
||||||
|
}
|
||||||
|
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func ParseFastaChunk(source string, ch FastxChunk) *obiiter.BioSequenceBatch {
|
||||||
|
slice := make(obiseq.BioSequenceSlice, 0, obioptions.CLIBatchSize())
|
||||||
|
|
||||||
|
state := 0
|
||||||
|
start := 0
|
||||||
|
current := 0
|
||||||
|
var identifier string
|
||||||
|
var definition string
|
||||||
|
|
||||||
|
for i := 0; i < len(ch.Bytes); i++ {
|
||||||
|
C := ch.Bytes[i]
|
||||||
|
is_end_of_line := C == '\r' || C == '\n'
|
||||||
|
is_space := C == ' ' || C == '\t'
|
||||||
|
is_sep := is_space || is_end_of_line
|
||||||
|
|
||||||
|
switch state {
|
||||||
|
case 0:
|
||||||
|
if C == '>' {
|
||||||
|
// Beginning of sequence
|
||||||
|
state = 1
|
||||||
|
}
|
||||||
|
case 1:
|
||||||
|
if is_sep {
|
||||||
|
// No identifier -> ERROR
|
||||||
|
return nil
|
||||||
|
} else {
|
||||||
|
// Beginning of identifier
|
||||||
|
state = 2
|
||||||
|
start = i
|
||||||
|
}
|
||||||
|
case 2:
|
||||||
|
if is_sep {
|
||||||
|
// End of identifier
|
||||||
|
identifier = string(ch.Bytes[start:i])
|
||||||
|
state = 3
|
||||||
|
}
|
||||||
|
case 3:
|
||||||
|
if is_end_of_line {
|
||||||
|
// Definition empty
|
||||||
|
definition = ""
|
||||||
|
state = 5
|
||||||
|
} else if !is_space {
|
||||||
|
// Beginning of definition
|
||||||
|
start = i
|
||||||
|
state = 4
|
||||||
|
}
|
||||||
|
case 4:
|
||||||
|
if is_end_of_line {
|
||||||
|
definition = string(ch.Bytes[start:i])
|
||||||
|
state = 5
|
||||||
|
|
||||||
|
}
|
||||||
|
case 5:
|
||||||
|
if !is_end_of_line {
|
||||||
|
// Beginning of sequence
|
||||||
|
start = i
|
||||||
|
current = i + 1
|
||||||
|
state = 6
|
||||||
|
}
|
||||||
|
case 6:
|
||||||
|
if C == '>' {
|
||||||
|
// End of sequence
|
||||||
|
s := obiseq.NewBioSequence(identifier, bytes.Clone(ch.Bytes[start:current]), definition)
|
||||||
|
s.SetSource(source)
|
||||||
|
slice = append(slice, s)
|
||||||
|
state = 1
|
||||||
|
|
||||||
|
} else if !is_sep {
|
||||||
|
if C >= 'A' && C <= 'Z' {
|
||||||
|
C = C + 'a' - 'A'
|
||||||
|
}
|
||||||
|
// Removing white space from the sequence
|
||||||
|
if (C >= 'a' && C <= 'z') || C == '-' || C == '.' {
|
||||||
|
ch.Bytes[current] = C
|
||||||
|
current++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
slice = append(slice, obiseq.NewBioSequence(identifier, bytes.Clone(ch.Bytes[start:current]), definition))
|
||||||
|
batch := obiiter.MakeBioSequenceBatch(ch.index, slice)
|
||||||
|
return &batch
|
||||||
|
}
|
||||||
|
|
||||||
|
func ReadFasta(reader io.Reader, options ...WithOption) (obiiter.IBioSequence, error) {
|
||||||
|
opt := MakeOptions(options)
|
||||||
|
out := obiiter.MakeIBioSequence()
|
||||||
|
|
||||||
|
source := opt.Source()
|
||||||
|
|
||||||
|
nworker := obioptions.CLIReadParallelWorkers()
|
||||||
|
out.Add(nworker)
|
||||||
|
|
||||||
|
chkchan, err := FastaChunkReader(reader, 1024*500, false)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return obiiter.NilIBioSequence, err
|
||||||
|
}
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
out.WaitAndClose()
|
||||||
|
}()
|
||||||
|
|
||||||
|
parser := func() {
|
||||||
|
defer out.Done()
|
||||||
|
for chk := range chkchan {
|
||||||
|
seqs := ParseFastaChunk(source, chk)
|
||||||
|
if seqs != nil {
|
||||||
|
out.Push(*seqs)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := 0; i < nworker; i++ {
|
||||||
|
go parser()
|
||||||
|
}
|
||||||
|
|
||||||
|
newIter := out.SortBatches().Rebatch(opt.BatchSize())
|
||||||
|
|
||||||
|
log.Debugln("Full file batch mode : ", opt.FullFileBatch())
|
||||||
|
if opt.FullFileBatch() {
|
||||||
|
newIter = newIter.CompleteFileIterator()
|
||||||
|
}
|
||||||
|
|
||||||
|
annotParser := opt.ParseFastSeqHeader()
|
||||||
|
|
||||||
|
if annotParser != nil {
|
||||||
|
return IParseFastSeqHeaderBatch(newIter, options...), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return newIter, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func ReadFastaFromFile(filename string, options ...WithOption) (obiiter.IBioSequence, error) {
|
||||||
|
options = append(options, OptionsSource(obiutils.RemoveAllExt((path.Base(filename)))))
|
||||||
|
|
||||||
|
file, err := Ropen(filename)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return obiiter.NilIBioSequence, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return ReadFasta(file, options...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func ReadFastaFromStdin(reader io.Reader, options ...WithOption) (obiiter.IBioSequence, error) {
|
||||||
|
options = append(options, OptionsSource(obiutils.RemoveAllExt("stdin")))
|
||||||
|
input, err := Buf(os.Stdin)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("open file error: %v", err)
|
||||||
|
return obiiter.NilIBioSequence, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return ReadFasta(input, options...)
|
||||||
|
}
|
@ -124,7 +124,7 @@ func ReadFastSeqFromFile(filename string, options ...WithOption) (obiiter.IBioSe
|
|||||||
|
|
||||||
log.Debugln("Full file batch mode : ", opt.FullFileBatch())
|
log.Debugln("Full file batch mode : ", opt.FullFileBatch())
|
||||||
if opt.FullFileBatch() {
|
if opt.FullFileBatch() {
|
||||||
newIter = newIter.FullFileIterator()
|
newIter = newIter.CompleteFileIterator()
|
||||||
}
|
}
|
||||||
|
|
||||||
parser := opt.ParseFastSeqHeader()
|
parser := opt.ParseFastSeqHeader()
|
||||||
@ -155,7 +155,7 @@ func ReadFastSeqFromStdin(options ...WithOption) obiiter.IBioSequence {
|
|||||||
|
|
||||||
log.Debugln("Full file batch mode : ", opt.FullFileBatch())
|
log.Debugln("Full file batch mode : ", opt.FullFileBatch())
|
||||||
if opt.FullFileBatch() {
|
if opt.FullFileBatch() {
|
||||||
newIter = newIter.FullFileIterator()
|
newIter = newIter.CompleteFileIterator()
|
||||||
}
|
}
|
||||||
|
|
||||||
parser := opt.ParseFastSeqHeader()
|
parser := opt.ParseFastSeqHeader()
|
||||||
|
@ -165,7 +165,7 @@ func ReadGenbank(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
|
|||||||
go _ReadFlatFileChunk(reader, entry_channel)
|
go _ReadFlatFileChunk(reader, entry_channel)
|
||||||
|
|
||||||
if opt.pointer.full_file_batch {
|
if opt.pointer.full_file_batch {
|
||||||
newIter = newIter.FullFileIterator()
|
newIter = newIter.CompleteFileIterator()
|
||||||
}
|
}
|
||||||
|
|
||||||
return newIter
|
return newIter
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
package obiformats
|
package obiformats
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -43,8 +44,8 @@ func MakeOptions(setters []WithOption) Options {
|
|||||||
with_progress_bar: false,
|
with_progress_bar: false,
|
||||||
buffer_size: 2,
|
buffer_size: 2,
|
||||||
quality_shift: 33,
|
quality_shift: 33,
|
||||||
parallel_workers: 4,
|
parallel_workers: obioptions.CLIReadParallelWorkers(),
|
||||||
batch_size: 5000,
|
batch_size: obioptions.CLIBatchSize(),
|
||||||
full_file_batch: false,
|
full_file_batch: false,
|
||||||
closefile: false,
|
closefile: false,
|
||||||
appendfile: false,
|
appendfile: false,
|
||||||
|
@ -4,12 +4,10 @@ import (
|
|||||||
"bufio"
|
"bufio"
|
||||||
"bytes"
|
"bytes"
|
||||||
"io"
|
"io"
|
||||||
"os"
|
|
||||||
"path"
|
"path"
|
||||||
"regexp"
|
"regexp"
|
||||||
|
|
||||||
"github.com/gabriel-vasile/mimetype"
|
"github.com/gabriel-vasile/mimetype"
|
||||||
gzip "github.com/klauspost/pgzip"
|
|
||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
@ -91,6 +89,36 @@ func OBIMimeTypeGuesser(stream io.Reader) (*mimetype.MIME, io.Reader, error) {
|
|||||||
return mimeType, newReader, nil
|
return mimeType, newReader, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// func ReadSequences(reader io.Reader,
|
||||||
|
// options ...WithOption) (obiiter.IBioSequence, error) {
|
||||||
|
|
||||||
|
// mime, reader, err := OBIMimeTypeGuesser(reader)
|
||||||
|
|
||||||
|
// if err != nil {
|
||||||
|
// return obiiter.NilIBioSequence, err
|
||||||
|
// }
|
||||||
|
|
||||||
|
// reader = bufio.NewReader(reader)
|
||||||
|
|
||||||
|
// switch mime.String() {
|
||||||
|
// case "text/fasta", "text/fastq":
|
||||||
|
// file.Close()
|
||||||
|
// is, err := ReadFastSeqFromFile(filename, options...)
|
||||||
|
// return is, err
|
||||||
|
// case "text/ecopcr2":
|
||||||
|
// return ReadEcoPCR(reader, options...), nil
|
||||||
|
// case "text/embl":
|
||||||
|
// return ReadEMBL(reader, options...), nil
|
||||||
|
// case "text/genbank":
|
||||||
|
// return ReadGenbank(reader, options...), nil
|
||||||
|
// default:
|
||||||
|
// log.Fatalf("File %s has guessed format %s which is not yet implemented",
|
||||||
|
// filename, mime.String())
|
||||||
|
// }
|
||||||
|
|
||||||
|
// return obiiter.NilIBioSequence, nil
|
||||||
|
// }
|
||||||
|
|
||||||
// ReadSequencesFromFile reads sequences from a file and returns an iterator of bio sequences and an error.
|
// ReadSequencesFromFile reads sequences from a file and returns an iterator of bio sequences and an error.
|
||||||
//
|
//
|
||||||
// Parameters:
|
// Parameters:
|
||||||
@ -102,32 +130,20 @@ func OBIMimeTypeGuesser(stream io.Reader) (*mimetype.MIME, io.Reader, error) {
|
|||||||
// - error: An error if any occurred during the reading process.
|
// - error: An error if any occurred during the reading process.
|
||||||
func ReadSequencesFromFile(filename string,
|
func ReadSequencesFromFile(filename string,
|
||||||
options ...WithOption) (obiiter.IBioSequence, error) {
|
options ...WithOption) (obiiter.IBioSequence, error) {
|
||||||
var file *os.File
|
var file *Reader
|
||||||
var reader io.Reader
|
var reader io.Reader
|
||||||
var greader io.Reader
|
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
options = append(options, OptionsSource(obiutils.RemoveAllExt((path.Base(filename)))))
|
options = append(options, OptionsSource(obiutils.RemoveAllExt((path.Base(filename)))))
|
||||||
|
|
||||||
file, err = os.Open(filename)
|
file, err = Ropen(filename)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("open file error: %v", err)
|
log.Fatalf("open file error: %v", err)
|
||||||
return obiiter.NilIBioSequence, err
|
return obiiter.NilIBioSequence, err
|
||||||
}
|
}
|
||||||
|
|
||||||
reader = file
|
mime, reader, err := OBIMimeTypeGuesser(file)
|
||||||
|
|
||||||
// Test if the flux is compressed by gzip
|
|
||||||
greader, err = gzip.NewReader(reader)
|
|
||||||
if err != nil {
|
|
||||||
file.Seek(0, 0)
|
|
||||||
} else {
|
|
||||||
log.Debugf("File %s is gz compressed ", filename)
|
|
||||||
reader = greader
|
|
||||||
}
|
|
||||||
|
|
||||||
mime, reader, err := OBIMimeTypeGuesser(reader)
|
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return obiiter.NilIBioSequence, err
|
return obiiter.NilIBioSequence, err
|
||||||
@ -136,10 +152,12 @@ func ReadSequencesFromFile(filename string,
|
|||||||
reader = bufio.NewReader(reader)
|
reader = bufio.NewReader(reader)
|
||||||
|
|
||||||
switch mime.String() {
|
switch mime.String() {
|
||||||
case "text/fasta", "text/fastq":
|
case "text/fastq":
|
||||||
file.Close()
|
file.Close()
|
||||||
is, err := ReadFastSeqFromFile(filename, options...)
|
is, err := ReadFastSeqFromFile(filename, options...)
|
||||||
return is, err
|
return is, err
|
||||||
|
case "text/fasta":
|
||||||
|
return ReadFasta(reader, options...)
|
||||||
case "text/ecopcr2":
|
case "text/ecopcr2":
|
||||||
return ReadEcoPCR(reader, options...), nil
|
return ReadEcoPCR(reader, options...), nil
|
||||||
case "text/embl":
|
case "text/embl":
|
||||||
@ -153,3 +171,9 @@ func ReadSequencesFromFile(filename string,
|
|||||||
|
|
||||||
return obiiter.NilIBioSequence, nil
|
return obiiter.NilIBioSequence, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// func ReadSequencesFromStdin(options ...WithOption) obiiter.IBioSequence {
|
||||||
|
|
||||||
|
// options = append(options, OptionsSource("stdin"))
|
||||||
|
|
||||||
|
// }
|
||||||
|
437
pkg/obiformats/xopen.go
Normal file
437
pkg/obiformats/xopen.go
Normal file
@ -0,0 +1,437 @@
|
|||||||
|
// This is an integration of the xopen package originally written by Brent Pedersen
|
||||||
|
// (https://github.com/brentp/xopen).
|
||||||
|
//
|
||||||
|
// Here it can be considered as a fork of [Wei Shen](http://shenwei.me) the version :
|
||||||
|
//
|
||||||
|
// https://github.com/shenwei356/xopen
|
||||||
|
//
|
||||||
|
// Package xopen makes it easy to get buffered readers and writers.
|
||||||
|
// Ropen opens a (possibly gzipped) file/process/http site for buffered reading.
|
||||||
|
// Wopen opens a (possibly gzipped) file for buffered writing.
|
||||||
|
// Both will use gzip when appropriate and will user buffered IO.
|
||||||
|
package obiformats
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"os/user"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/dsnet/compress/bzip2"
|
||||||
|
"github.com/klauspost/compress/zstd"
|
||||||
|
gzip "github.com/klauspost/pgzip"
|
||||||
|
"github.com/ulikunitz/xz"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Level is the default compression level of gzip.
|
||||||
|
// This value will be automatically adjusted to the default value of zstd or bzip2.
|
||||||
|
var Level = gzip.DefaultCompression
|
||||||
|
|
||||||
|
// ErrNoContent means nothing in the stream/file.
|
||||||
|
var ErrNoContent = errors.New("xopen: no content")
|
||||||
|
|
||||||
|
// ErrDirNotSupported means the path is a directory.
|
||||||
|
var ErrDirNotSupported = errors.New("xopen: input is a directory")
|
||||||
|
|
||||||
|
// IsGzip returns true buffered Reader has the gzip magic.
|
||||||
|
func IsGzip(b *bufio.Reader) (bool, error) {
|
||||||
|
return CheckBytes(b, []byte{0x1f, 0x8b})
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsXz returns true buffered Reader has the xz magic.
|
||||||
|
func IsXz(b *bufio.Reader) (bool, error) {
|
||||||
|
return CheckBytes(b, []byte{0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00})
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsZst returns true buffered Reader has the zstd magic.
|
||||||
|
func IsZst(b *bufio.Reader) (bool, error) {
|
||||||
|
return CheckBytes(b, []byte{0x28, 0xB5, 0x2f, 0xfd})
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsBzip2 returns true buffered Reader has the bzip2 magic.
|
||||||
|
func IsBzip2(b *bufio.Reader) (bool, error) {
|
||||||
|
return CheckBytes(b, []byte{0x42, 0x5a, 0x68})
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsStdin checks if we are getting data from stdin.
|
||||||
|
func IsStdin() bool {
|
||||||
|
// http://stackoverflow.com/a/26567513
|
||||||
|
stat, err := os.Stdin.Stat()
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return (stat.Mode() & os.ModeCharDevice) == 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// ExpandUser expands ~/path and ~otheruser/path appropriately
|
||||||
|
func ExpandUser(path string) (string, error) {
|
||||||
|
if len(path) == 0 || path[0] != '~' {
|
||||||
|
return path, nil
|
||||||
|
}
|
||||||
|
var u *user.User
|
||||||
|
var err error
|
||||||
|
if len(path) == 1 || path[1] == '/' {
|
||||||
|
u, err = user.Current()
|
||||||
|
} else {
|
||||||
|
name := strings.Split(path[1:], "/")[0]
|
||||||
|
u, err = user.Lookup(name)
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
home := u.HomeDir
|
||||||
|
path = home + "/" + path[1:]
|
||||||
|
return path, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Exists checks if a local file exits
|
||||||
|
func Exists(path string) bool {
|
||||||
|
path, perr := ExpandUser(path)
|
||||||
|
if perr != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
_, err := os.Stat(path)
|
||||||
|
return err == nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// CheckBytes peeks at a buffered stream and checks if the first read bytes match.
|
||||||
|
func CheckBytes(b *bufio.Reader, buf []byte) (bool, error) {
|
||||||
|
|
||||||
|
m, err := b.Peek(len(buf))
|
||||||
|
if err != nil {
|
||||||
|
// return false, ErrNoContent
|
||||||
|
return false, err // EOF
|
||||||
|
}
|
||||||
|
for i := range buf {
|
||||||
|
if m[i] != buf[i] {
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reader is returned by Ropen
|
||||||
|
type Reader struct {
|
||||||
|
*bufio.Reader
|
||||||
|
rdr io.Reader
|
||||||
|
gz io.ReadCloser
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close the associated files.
|
||||||
|
func (r *Reader) Close() error {
|
||||||
|
var err error
|
||||||
|
if r.gz != nil {
|
||||||
|
err = r.gz.Close()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if c, ok := r.rdr.(io.ReadCloser); ok {
|
||||||
|
err = c.Close()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Writer is returned by Wopen
|
||||||
|
type Writer struct {
|
||||||
|
*bufio.Writer
|
||||||
|
wtr *os.File
|
||||||
|
gz *gzip.Writer
|
||||||
|
xw *xz.Writer
|
||||||
|
zw *zstd.Encoder
|
||||||
|
bz2 *bzip2.Writer
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close the associated files.
|
||||||
|
func (w *Writer) Close() error {
|
||||||
|
var err error
|
||||||
|
err = w.Flush()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if w.gz != nil {
|
||||||
|
err = w.gz.Close()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if w.xw != nil {
|
||||||
|
err = w.xw.Close()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if w.zw != nil {
|
||||||
|
err = w.zw.Close()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if w.bz2 != nil {
|
||||||
|
err = w.bz2.Close()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return w.wtr.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Flush the writer.
|
||||||
|
func (w *Writer) Flush() error {
|
||||||
|
var err error
|
||||||
|
err = w.Writer.Flush()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if w.gz != nil {
|
||||||
|
err = w.gz.Flush()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if w.zw != nil {
|
||||||
|
err = w.zw.Flush()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var bufSize = 65536
|
||||||
|
|
||||||
|
// Buf returns a buffered reader from an io.Reader
|
||||||
|
// If f == "-", then it will attempt to read from os.Stdin.
|
||||||
|
// If the file is gzipped, it will be read as such.
|
||||||
|
func Buf(r io.Reader) (*Reader, error) {
|
||||||
|
b := bufio.NewReaderSize(r, bufSize)
|
||||||
|
var rd io.Reader
|
||||||
|
var rdr io.ReadCloser
|
||||||
|
|
||||||
|
if is, err := IsGzip(b); err != nil {
|
||||||
|
// check BOM
|
||||||
|
t, _, err := b.ReadRune() // no content
|
||||||
|
if err != nil {
|
||||||
|
return nil, ErrNoContent
|
||||||
|
}
|
||||||
|
if t != '\uFEFF' {
|
||||||
|
b.UnreadRune()
|
||||||
|
}
|
||||||
|
return &Reader{b, r, rdr}, nil // non-gzip file with content less than 2 bytes
|
||||||
|
} else if is {
|
||||||
|
rdr, err = gzip.NewReader(b)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
b = bufio.NewReaderSize(rdr, bufSize)
|
||||||
|
} else if is, err := IsZst(b); err != nil {
|
||||||
|
// check BOM
|
||||||
|
t, _, err := b.ReadRune() // no content
|
||||||
|
if err != nil {
|
||||||
|
return nil, ErrNoContent
|
||||||
|
}
|
||||||
|
if t != '\uFEFF' {
|
||||||
|
b.UnreadRune()
|
||||||
|
}
|
||||||
|
return &Reader{b, r, rdr}, nil // non-gzip/zst file with content less than 4 bytes
|
||||||
|
} else if is {
|
||||||
|
rd, err = zstd.NewReader(b)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
b = bufio.NewReaderSize(rd, bufSize)
|
||||||
|
} else if is, err := IsXz(b); err != nil {
|
||||||
|
// check BOM
|
||||||
|
t, _, err := b.ReadRune() // no content
|
||||||
|
if err != nil {
|
||||||
|
return nil, ErrNoContent
|
||||||
|
}
|
||||||
|
if t != '\uFEFF' {
|
||||||
|
b.UnreadRune()
|
||||||
|
}
|
||||||
|
return &Reader{b, r, rdr}, nil // non-gzip/zst/xz file with content less than 6 bytes
|
||||||
|
} else if is {
|
||||||
|
rd, err = xz.NewReader(b)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
b = bufio.NewReaderSize(rd, bufSize)
|
||||||
|
} else if is, err := IsBzip2(b); err != nil {
|
||||||
|
// check BOM
|
||||||
|
t, _, err := b.ReadRune() // no content
|
||||||
|
if err != nil {
|
||||||
|
return nil, ErrNoContent
|
||||||
|
}
|
||||||
|
if t != '\uFEFF' {
|
||||||
|
b.UnreadRune()
|
||||||
|
}
|
||||||
|
return &Reader{b, r, rdr}, nil // non-gzip/zst/xz file with content less than 6 bytes
|
||||||
|
} else if is {
|
||||||
|
rd, err = bzip2.NewReader(b, &bzip2.ReaderConfig{})
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
b = bufio.NewReaderSize(rd, bufSize)
|
||||||
|
}
|
||||||
|
|
||||||
|
// other files with content >= 6 bytes
|
||||||
|
|
||||||
|
// check BOM
|
||||||
|
t, _, err := b.ReadRune()
|
||||||
|
if err != nil {
|
||||||
|
return nil, ErrNoContent
|
||||||
|
}
|
||||||
|
if t != '\uFEFF' {
|
||||||
|
b.UnreadRune()
|
||||||
|
}
|
||||||
|
return &Reader{b, r, rdr}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// XReader returns a reader from a url string or a file.
|
||||||
|
func XReader(f string) (io.Reader, error) {
|
||||||
|
if strings.HasPrefix(f, "http://") || strings.HasPrefix(f, "https://") {
|
||||||
|
var rsp *http.Response
|
||||||
|
rsp, err := http.Get(f)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if rsp.StatusCode != 200 {
|
||||||
|
return nil, fmt.Errorf("http error downloading %s. status: %s", f, rsp.Status)
|
||||||
|
}
|
||||||
|
rdr := rsp.Body
|
||||||
|
return rdr, nil
|
||||||
|
}
|
||||||
|
f, err := ExpandUser(f)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
fi, err := os.Stat(f)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if fi.IsDir() {
|
||||||
|
return nil, ErrDirNotSupported
|
||||||
|
}
|
||||||
|
|
||||||
|
return os.Open(f)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ropen opens a buffered reader.
|
||||||
|
func Ropen(f string) (*Reader, error) {
|
||||||
|
var err error
|
||||||
|
var rdr io.Reader
|
||||||
|
if f == "-" {
|
||||||
|
if !IsStdin() {
|
||||||
|
return nil, errors.New("stdin not detected")
|
||||||
|
}
|
||||||
|
b, err := Buf(os.Stdin)
|
||||||
|
return b, err
|
||||||
|
} else if f[0] == '|' {
|
||||||
|
// TODO: use csv to handle quoted file names.
|
||||||
|
cmdStrs := strings.Split(f[1:], " ")
|
||||||
|
var cmd *exec.Cmd
|
||||||
|
if len(cmdStrs) == 2 {
|
||||||
|
cmd = exec.Command(cmdStrs[0], cmdStrs[1:]...)
|
||||||
|
} else {
|
||||||
|
cmd = exec.Command(cmdStrs[0])
|
||||||
|
}
|
||||||
|
rdr, err = cmd.StdoutPipe()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
err = cmd.Start()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
rdr, err = XReader(f)
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
b, err := Buf(rdr)
|
||||||
|
return b, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wopen opens a buffered reader.
|
||||||
|
// If f == "-", then stdout will be used.
|
||||||
|
// If f endswith ".gz", then the output will be gzipped.
|
||||||
|
// If f endswith ".xz", then the output will be zx-compressed.
|
||||||
|
// If f endswith ".zst", then the output will be zstd-compressed.
|
||||||
|
// If f endswith ".bz2", then the output will be bzip2-compressed.
|
||||||
|
func Wopen(f string) (*Writer, error) {
|
||||||
|
return WopenFile(f, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0666)
|
||||||
|
}
|
||||||
|
|
||||||
|
// WopenFile opens a buffered reader.
|
||||||
|
// If f == "-", then stdout will be used.
|
||||||
|
// If f endswith ".gz", then the output will be gzipped.
|
||||||
|
// If f endswith ".xz", then the output will be zx-compressed.
|
||||||
|
// If f endswith ".bz2", then the output will be bzip2-compressed.
|
||||||
|
func WopenFile(f string, flag int, perm os.FileMode) (*Writer, error) {
|
||||||
|
var wtr *os.File
|
||||||
|
if f == "-" {
|
||||||
|
wtr = os.Stdout
|
||||||
|
} else {
|
||||||
|
dir := filepath.Dir(f)
|
||||||
|
fi, err := os.Stat(dir)
|
||||||
|
if err == nil && !fi.IsDir() {
|
||||||
|
return nil, fmt.Errorf("can not write file into a non-directory path: %s", dir)
|
||||||
|
}
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
os.MkdirAll(dir, 0755)
|
||||||
|
}
|
||||||
|
wtr, err = os.OpenFile(f, flag, perm)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
f2 := strings.ToLower(f)
|
||||||
|
if strings.HasSuffix(f2, ".gz") {
|
||||||
|
gz, err := gzip.NewWriterLevel(wtr, Level)
|
||||||
|
if err != nil {
|
||||||
|
err = errors.New(fmt.Sprintf("xopen: %s", err))
|
||||||
|
}
|
||||||
|
return &Writer{bufio.NewWriterSize(gz, bufSize), wtr, gz, nil, nil, nil}, err
|
||||||
|
}
|
||||||
|
if strings.HasSuffix(f2, ".xz") {
|
||||||
|
xw, err := xz.NewWriter(wtr)
|
||||||
|
return &Writer{bufio.NewWriterSize(xw, bufSize), wtr, nil, xw, nil, nil}, err
|
||||||
|
}
|
||||||
|
if strings.HasSuffix(f2, ".zst") {
|
||||||
|
level := Level
|
||||||
|
if level == gzip.DefaultCompression {
|
||||||
|
level = 2
|
||||||
|
}
|
||||||
|
zw, err := zstd.NewWriter(wtr, zstd.WithEncoderLevel(zstd.EncoderLevel(level)))
|
||||||
|
if err != nil {
|
||||||
|
err = errors.New(fmt.Sprintf("xopen: zstd: %s", err))
|
||||||
|
}
|
||||||
|
return &Writer{bufio.NewWriterSize(zw, bufSize), wtr, nil, nil, zw, nil}, err
|
||||||
|
}
|
||||||
|
if strings.HasSuffix(f2, ".bz2") {
|
||||||
|
level := Level
|
||||||
|
if level == gzip.DefaultCompression {
|
||||||
|
level = 6
|
||||||
|
}
|
||||||
|
bz2, err := bzip2.NewWriter(wtr, &bzip2.WriterConfig{Level: level})
|
||||||
|
if err != nil {
|
||||||
|
err = errors.New(fmt.Sprintf("xopen: %s", err))
|
||||||
|
}
|
||||||
|
return &Writer{bufio.NewWriterSize(bz2, bufSize), wtr, nil, nil, nil, bz2}, err
|
||||||
|
}
|
||||||
|
return &Writer{bufio.NewWriterSize(wtr, bufSize), wtr, nil, nil, nil, nil}, nil
|
||||||
|
}
|
148
pkg/obiformats/xopen_test.go
Normal file
148
pkg/obiformats/xopen_test.go
Normal file
@ -0,0 +1,148 @@
|
|||||||
|
package obiformats
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"bytes"
|
||||||
|
"compress/gzip"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
. "gopkg.in/check.v1"
|
||||||
|
)
|
||||||
|
|
||||||
|
func Test(t *testing.T) { TestingT(t) }
|
||||||
|
|
||||||
|
type XopenTest struct{}
|
||||||
|
|
||||||
|
var _ = Suite(&XopenTest{})
|
||||||
|
|
||||||
|
func gzFromString(s string) string {
|
||||||
|
var c bytes.Buffer
|
||||||
|
gz := gzip.NewWriter(&c)
|
||||||
|
gz.Write([]byte(s))
|
||||||
|
return c.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
var gzTests = []struct {
|
||||||
|
isGz bool
|
||||||
|
data string
|
||||||
|
}{
|
||||||
|
{false, "asdf"},
|
||||||
|
{true, gzFromString("asdf")},
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *XopenTest) TestIsGzip(c *C) {
|
||||||
|
for _, t := range gzTests {
|
||||||
|
isGz, err := IsGzip(bufio.NewReader(strings.NewReader(t.data)))
|
||||||
|
c.Assert(err, IsNil)
|
||||||
|
c.Assert(t.isGz, Equals, isGz)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *XopenTest) TestIsStdin(c *C) {
|
||||||
|
r := IsStdin()
|
||||||
|
c.Assert(r, Equals, false)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *XopenTest) TestRopen(c *C) {
|
||||||
|
rdr, err := Ropen("-")
|
||||||
|
c.Assert(err, ErrorMatches, "stdin not detected")
|
||||||
|
c.Assert(rdr, IsNil)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *XopenTest) TestWopen(c *C) {
|
||||||
|
for _, f := range []string{"t.gz", "t"} {
|
||||||
|
testString := "ASDF1234"
|
||||||
|
wtr, err := Wopen(f)
|
||||||
|
c.Assert(err, IsNil)
|
||||||
|
_, err = os.Stat(f)
|
||||||
|
c.Assert(err, IsNil)
|
||||||
|
c.Assert(wtr.wtr, NotNil)
|
||||||
|
fmt.Fprintf(wtr, testString)
|
||||||
|
wtr.Close()
|
||||||
|
|
||||||
|
rdr, err := Ropen(f)
|
||||||
|
c.Assert(err, IsNil)
|
||||||
|
|
||||||
|
str, err := rdr.ReadString(99)
|
||||||
|
c.Assert(str, Equals, testString)
|
||||||
|
c.Assert(err, Equals, io.EOF)
|
||||||
|
str, err = rdr.ReadString(99)
|
||||||
|
c.Assert(str, Equals, "")
|
||||||
|
|
||||||
|
rdr.Close()
|
||||||
|
os.Remove(f)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var httpTests = []struct {
|
||||||
|
url string
|
||||||
|
expectError bool
|
||||||
|
}{
|
||||||
|
{"https://raw.githubusercontent.com/brentp/xopen/master/README.md", false},
|
||||||
|
{"http://raw.githubusercontent.com/brentp/xopen/master/README.md", false},
|
||||||
|
{"http://raw.githubusercontent.com/brentp/xopen/master/BAD.md", true},
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *XopenTest) TestReadHttp(c *C) {
|
||||||
|
for _, t := range httpTests {
|
||||||
|
rdr, err := Ropen(t.url)
|
||||||
|
if !t.expectError {
|
||||||
|
c.Assert(err, IsNil)
|
||||||
|
v, err := rdr.ReadString(byte('\n'))
|
||||||
|
c.Assert(err, IsNil)
|
||||||
|
c.Assert(len(v), Not(Equals), 0)
|
||||||
|
} else {
|
||||||
|
c.Assert(err, ErrorMatches, ".* 404 Not Found")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *XopenTest) TestReadProcess(c *C) {
|
||||||
|
for _, cmd := range []string{"|ls -lh", "|ls", "|ls -lh xopen_test.go"} {
|
||||||
|
rdr, err := Ropen(cmd)
|
||||||
|
c.Assert(err, IsNil)
|
||||||
|
b := make([]byte, 1000)
|
||||||
|
_, err = rdr.Read(b)
|
||||||
|
if err != io.EOF {
|
||||||
|
c.Assert(err, IsNil)
|
||||||
|
}
|
||||||
|
lines := strings.Split(string(b), "\n")
|
||||||
|
has := false
|
||||||
|
for _, line := range lines {
|
||||||
|
if strings.Contains(line, "xopen_test.go") {
|
||||||
|
has = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
c.Assert(has, Equals, true)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *XopenTest) TestOpenStdout(c *C) {
|
||||||
|
w, err := Wopen("-")
|
||||||
|
c.Assert(err, IsNil)
|
||||||
|
c.Assert(w.wtr, Equals, os.Stdout)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *XopenTest) TestOpenBadFile(c *C) {
|
||||||
|
r, err := Ropen("XXXXXXXXXXXXXXXXXXXXXXX")
|
||||||
|
c.Assert(r, IsNil)
|
||||||
|
c.Assert(err, ErrorMatches, ".*no such file.*")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *XopenTest) TestExists(c *C) {
|
||||||
|
c.Assert(Exists("xopen.go"), Equals, true)
|
||||||
|
c.Assert(Exists("____xx"), Equals, false)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *XopenTest) TestUser(c *C) {
|
||||||
|
c.Assert(Exists("~"), Equals, true)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *XopenTest) TestExpand(c *C) {
|
||||||
|
_, err := ExpandUser("~baduser66")
|
||||||
|
c.Assert(err, Not(IsNil))
|
||||||
|
}
|
@ -687,7 +687,14 @@ func (iterator IBioSequence) Load() obiseq.BioSequenceSlice {
|
|||||||
return chunck
|
return chunck
|
||||||
}
|
}
|
||||||
|
|
||||||
func (iterator IBioSequence) FullFileIterator() IBioSequence {
|
// CompleteFileIterator generates a new iterator for reading a complete file.
|
||||||
|
//
|
||||||
|
// This iterator reads all the remaining sequences in the file, and returns them as a
|
||||||
|
// single obiseq.BioSequenceSlice.
|
||||||
|
//
|
||||||
|
// The function takes no parameters.
|
||||||
|
// It returns an IBioSequence object.
|
||||||
|
func (iterator IBioSequence) CompleteFileIterator() IBioSequence {
|
||||||
|
|
||||||
newIter := MakeIBioSequence()
|
newIter := MakeIBioSequence()
|
||||||
log.Debug("Stream is read in full file mode")
|
log.Debug("Stream is read in full file mode")
|
||||||
@ -716,7 +723,6 @@ func (iterator IBioSequence) FullFileIterator() IBioSequence {
|
|||||||
|
|
||||||
// It takes a slice of BioSequence objects, and returns an iterator that will return batches of
|
// It takes a slice of BioSequence objects, and returns an iterator that will return batches of
|
||||||
// BioSequence objects
|
// BioSequence objects
|
||||||
|
|
||||||
func IBatchOver(data obiseq.BioSequenceSlice,
|
func IBatchOver(data obiseq.BioSequenceSlice,
|
||||||
size int, sizes ...int) IBioSequence {
|
size int, sizes ...int) IBioSequence {
|
||||||
|
|
||||||
|
@ -203,77 +203,135 @@ func (s *BioSequence) Len() int {
|
|||||||
return len(s.sequence)
|
return len(s.sequence)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Checking if the BioSequence has quality scores.
|
// HasQualities checks if the BioSequence has sequence qualitiy scores.
|
||||||
|
//
|
||||||
|
// This function does not have any parameters.
|
||||||
|
// It returns a boolean value indicating whether the BioSequence has qualities.
|
||||||
func (s *BioSequence) HasQualities() bool {
|
func (s *BioSequence) HasQualities() bool {
|
||||||
return len(s.qualities) > 0
|
return len(s.qualities) > 0
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returning the qualities of the sequence.
|
// Qualities returns the sequence quality scores of the BioSequence.
|
||||||
|
//
|
||||||
|
// It checks if the BioSequence has qualities. If it does, it returns the qualities
|
||||||
|
// stored in the BioSequence struct. Otherwise, it creates and returns default
|
||||||
|
// qualities based on the length of the sequence.
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
// - Quality: The quality of the BioSequence.
|
||||||
func (s *BioSequence) Qualities() Quality {
|
func (s *BioSequence) Qualities() Quality {
|
||||||
if s.HasQualities() {
|
if s.HasQualities() {
|
||||||
return s.qualities
|
return s.qualities
|
||||||
} else {
|
}
|
||||||
return __make_default_qualities__(len(s.sequence))
|
return __make_default_qualities__(len(s.sequence))
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
|
// Features returns the feature string of the BioSequence.
|
||||||
|
//
|
||||||
|
// The feature string contains the EMBL/GenBank not parsed feature table
|
||||||
|
//
|
||||||
|
// as extracted from the flat file.
|
||||||
|
//
|
||||||
|
// No parameters.
|
||||||
|
// Returns a string.
|
||||||
func (s *BioSequence) Features() string {
|
func (s *BioSequence) Features() string {
|
||||||
return string(s.feature)
|
return string(s.feature)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Checking if the BioSequence has annotations.
|
// HasAnnotation checks if the BioSequence has any annotations.
|
||||||
|
//
|
||||||
|
// It does not take any parameters.
|
||||||
|
// It returns a boolean value indicating whether the BioSequence has any annotations.
|
||||||
func (s *BioSequence) HasAnnotation() bool {
|
func (s *BioSequence) HasAnnotation() bool {
|
||||||
return len(s.annotations) > 0
|
return len(s.annotations) > 0
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returning the annotations of the BioSequence.
|
// Annotations returns the Annotation object associated with the BioSequence.
|
||||||
|
//
|
||||||
|
// This function does not take any parameters.
|
||||||
|
// It returns an Annotation object.
|
||||||
func (s *BioSequence) Annotations() Annotation {
|
func (s *BioSequence) Annotations() Annotation {
|
||||||
|
|
||||||
if s.annotations == nil {
|
if s.annotations == nil {
|
||||||
s.annotations = GetAnnotation()
|
s.annotations = GetAnnotation()
|
||||||
}
|
}
|
||||||
|
|
||||||
return s.annotations
|
return s.annotations
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// AnnotationsLock locks the annotation of the BioSequence.
|
||||||
|
//
|
||||||
|
// This function acquires a lock on the annotation of the BioSequence,
|
||||||
|
// preventing concurrent access to it.
|
||||||
func (s *BioSequence) AnnotationsLock() {
|
func (s *BioSequence) AnnotationsLock() {
|
||||||
s.annot_lock.Lock()
|
s.annot_lock.Lock()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// AnnotationsUnlock unlocks the annotations mutex in the BioSequence struct.
|
||||||
|
//
|
||||||
|
// No parameters.
|
||||||
|
// No return types.
|
||||||
func (s *BioSequence) AnnotationsUnlock() {
|
func (s *BioSequence) AnnotationsUnlock() {
|
||||||
s.annot_lock.Unlock()
|
s.annot_lock.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Checking if the BioSequence has a source.
|
// HasSource checks if the BioSequence has a source.
|
||||||
|
//
|
||||||
|
// The source is the filename without directory name and extension from where the sequence was read.
|
||||||
|
//
|
||||||
|
// No parameters.
|
||||||
|
// Returns a boolean value indicating whether the BioSequence has a source or not.
|
||||||
func (s *BioSequence) HasSource() bool {
|
func (s *BioSequence) HasSource() bool {
|
||||||
return len(s.source) > 0
|
return len(s.source) > 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Source returns the source of the BioSequence.
|
||||||
|
//
|
||||||
|
// The source is the filename without directory name and extension from where the sequence was read.
|
||||||
|
//
|
||||||
|
// This function does not take any parameters.
|
||||||
|
// It returns a string.
|
||||||
func (s *BioSequence) Source() string {
|
func (s *BioSequence) Source() string {
|
||||||
return s.source
|
return s.source
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returning the MD5 hash of the sequence.
|
// MD5 calculates the MD5 hash of the BioSequence.
|
||||||
|
//
|
||||||
|
// No parameters.
|
||||||
|
// Returns [16]byte, the MD5 hash of the BioSequence.
|
||||||
func (s *BioSequence) MD5() [16]byte {
|
func (s *BioSequence) MD5() [16]byte {
|
||||||
return md5.Sum(s.sequence)
|
return md5.Sum(s.sequence)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Setting the id of the BioSequence.
|
// SetId sets the id of the BioSequence.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - id: the new id for the BioSequence.
|
||||||
|
//
|
||||||
|
// No return value.
|
||||||
func (s *BioSequence) SetId(id string) {
|
func (s *BioSequence) SetId(id string) {
|
||||||
s.id = id
|
s.id = id
|
||||||
}
|
}
|
||||||
|
|
||||||
// Setting the definition of the sequence.
|
// SetDefinition sets the definition of the BioSequence.
|
||||||
|
//
|
||||||
|
// It takes a string parameter 'definition' and assigns it to the 'definition' field of the BioSequence struct.
|
||||||
func (s *BioSequence) SetDefinition(definition string) {
|
func (s *BioSequence) SetDefinition(definition string) {
|
||||||
s.definition = definition
|
s.definition = definition
|
||||||
}
|
}
|
||||||
|
|
||||||
// Setting the source of the sequence.
|
// SetSource sets the source of the BioSequence.
|
||||||
|
//
|
||||||
|
// Parameter:
|
||||||
|
// - source: a string representing the filename without directory name and extension from where the sequence was read.
|
||||||
func (s *BioSequence) SetSource(source string) {
|
func (s *BioSequence) SetSource(source string) {
|
||||||
s.source = source
|
s.source = source
|
||||||
}
|
}
|
||||||
|
|
||||||
// Setting the features of the BioSequence.
|
// SetFeatures sets the feature of the BioSequence.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - feature: a byte slice representing the feature to be set.
|
||||||
|
//
|
||||||
|
// No return value.
|
||||||
func (s *BioSequence) SetFeatures(feature []byte) {
|
func (s *BioSequence) SetFeatures(feature []byte) {
|
||||||
if cap(s.feature) >= 300 {
|
if cap(s.feature) >= 300 {
|
||||||
RecycleSlice(&s.feature)
|
RecycleSlice(&s.feature)
|
||||||
|
@ -328,6 +328,225 @@ func TestBioSequence_Len(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestHasQualities tests the HasQualities method of the BioSequence struct.
|
||||||
|
//
|
||||||
|
// It includes two test cases:
|
||||||
|
//
|
||||||
|
// 1. Test case 1: BioSequence with empty qualities slice
|
||||||
|
// - Creates a BioSequence instance with an empty qualities slice.
|
||||||
|
// - Expects false as the result of calling the HasQualities method on the BioSequence instance.
|
||||||
|
//
|
||||||
|
// 2. Test case 2: BioSequence with non-empty qualities slice
|
||||||
|
// - Creates a BioSequence instance with a non-empty qualities slice.
|
||||||
|
// - Expects true as the result of calling the HasQualities method on the BioSequence instance.
|
||||||
|
//
|
||||||
|
// No parameters are required.
|
||||||
|
// No return types are specified.
|
||||||
|
func TestHasQualities(t *testing.T) {
|
||||||
|
// Test case 1: BioSequence with empty qualities slice
|
||||||
|
seq1 := NewBioSequence("", []byte(""), "")
|
||||||
|
seq1.qualities = []byte{}
|
||||||
|
if seq1.HasQualities() != false {
|
||||||
|
t.Errorf("Test case 1 failed: expected false, got true")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test case 2: BioSequence with non-empty qualities slice
|
||||||
|
seq2 := NewBioSequence("", []byte(""), "")
|
||||||
|
seq2.qualities = []byte{20, 30, 40}
|
||||||
|
if seq2.HasQualities() != true {
|
||||||
|
t.Errorf("Test case 2 failed: expected true, got false")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestQualities tests the Qualities method of the BioSequence struct.
|
||||||
|
//
|
||||||
|
// It creates a BioSequence with a given sequence and qualities and sets them.
|
||||||
|
// Then it compares the returned qualities with the expected ones.
|
||||||
|
// If the qualities are not equal, it fails the test case.
|
||||||
|
//
|
||||||
|
// Test case 1: BioSequence has qualities
|
||||||
|
// - sequence: []byte("ATCG")
|
||||||
|
// - qualities: Quality{10, 20, 30, 40}
|
||||||
|
// - expected: Quality{10, 20, 30, 40}
|
||||||
|
//
|
||||||
|
// Test case 2: BioSequence does not have qualities
|
||||||
|
// - sequence: []byte("ATCG")
|
||||||
|
// - qualities: nil
|
||||||
|
// - expected: defaultQualities
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - t: *testing.T - the testing struct for running test cases and reporting failures.
|
||||||
|
//
|
||||||
|
// Return type:
|
||||||
|
// None
|
||||||
|
func TestQualities(t *testing.T) {
|
||||||
|
// Test case: BioSequence has qualities
|
||||||
|
sequence := []byte("ATCG")
|
||||||
|
qualities := Quality{10, 20, 30, 40}
|
||||||
|
bioSeq := NewBioSequence("ABC123", sequence, "Test Sequence")
|
||||||
|
bioSeq.SetQualities(qualities)
|
||||||
|
|
||||||
|
result := bioSeq.Qualities()
|
||||||
|
expected := qualities
|
||||||
|
|
||||||
|
if !reflect.DeepEqual(result, expected) {
|
||||||
|
t.Errorf("Test case failed: BioSequence has qualities")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test case: BioSequence does not have qualities
|
||||||
|
defaultQualities := __make_default_qualities__(len(sequence))
|
||||||
|
bioSeq = NewBioSequence("ABC123", sequence, "Test Sequence")
|
||||||
|
bioSeq.SetQualities(nil)
|
||||||
|
|
||||||
|
result = bioSeq.Qualities()
|
||||||
|
expected = defaultQualities
|
||||||
|
|
||||||
|
if !reflect.DeepEqual(result, expected) {
|
||||||
|
t.Errorf("Test case failed: BioSequence does not have qualities")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestBioSequence_Features tests the Features function of the BioSequence struct.
|
||||||
|
//
|
||||||
|
// It first tests the case when the feature string is empty. It creates a new BioSequence
|
||||||
|
// with an empty feature string and an empty byte slice. It expects an empty string as
|
||||||
|
// the result of calling the Features function on this BioSequence. If the result does
|
||||||
|
// not match the expected value, it prints an error message.
|
||||||
|
//
|
||||||
|
// It then tests the case when the feature string is non-empty. It creates a new BioSequence
|
||||||
|
// with an empty feature string and an empty byte slice. It sets the feature string to
|
||||||
|
// "test sequence" and expects "test sequence" as the result of calling the Features function
|
||||||
|
// on this BioSequence. If the result does not match the expected value, it prints an error message.
|
||||||
|
func TestBioSequence_Features(t *testing.T) {
|
||||||
|
// Testing empty feature string
|
||||||
|
seq := NewBioSequence("", []byte(""), "")
|
||||||
|
expected := ""
|
||||||
|
if got := seq.Features(); got != expected {
|
||||||
|
t.Errorf("Expected %q, but got %q", expected, got)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Testing non-empty feature string
|
||||||
|
seq = NewBioSequence("", []byte(""), "")
|
||||||
|
seq.feature = []byte("test sequence")
|
||||||
|
expected = "test sequence"
|
||||||
|
if got := seq.Features(); got != expected {
|
||||||
|
t.Errorf("Expected %q, but got %q", expected, got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestHasAnnotation is a unit test function that tests the HasAnnotation method of the BioSequence struct.
|
||||||
|
//
|
||||||
|
// This function tests the behavior of the HasAnnotation method in different scenarios:
|
||||||
|
// - Test case: BioSequence with no annotations.
|
||||||
|
// - Test case: BioSequence with one annotation.
|
||||||
|
// - Test case: BioSequence with multiple annotations.
|
||||||
|
//
|
||||||
|
// The function verifies that the HasAnnotation method returns the expected boolean value for each test case.
|
||||||
|
// It uses the *testing.T parameter to report any test failures.
|
||||||
|
//
|
||||||
|
// No parameters.
|
||||||
|
// No return values.
|
||||||
|
func TestHasAnnotation(t *testing.T) {
|
||||||
|
// Test case: BioSequence with no annotations
|
||||||
|
seq := BioSequence{}
|
||||||
|
expected := false
|
||||||
|
if got := seq.HasAnnotation(); got != expected {
|
||||||
|
t.Errorf("Expected %v, but got %v", expected, got)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test case: BioSequence with one annotation
|
||||||
|
seq = BioSequence{annotations: map[string]interface{}{"annotation1": "value1"}}
|
||||||
|
expected = true
|
||||||
|
if got := seq.HasAnnotation(); got != expected {
|
||||||
|
t.Errorf("Expected %v, but got %v", expected, got)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test case: BioSequence with multiple annotations
|
||||||
|
seq = BioSequence{
|
||||||
|
annotations: map[string]interface{}{
|
||||||
|
"annotation1": "value1",
|
||||||
|
"annotation2": "value2",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
expected = true
|
||||||
|
if got := seq.HasAnnotation(); got != expected {
|
||||||
|
t.Errorf("Expected %v, but got %v", expected, got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestBioSequenceAnnotations tests the Annotations method of the BioSequence struct.
|
||||||
|
//
|
||||||
|
// It verifies the behavior of the method when the `annotations` field of the BioSequence struct is nil and when it is not nil.
|
||||||
|
// The method should return the expected annotation values and fail the test if the returned annotations do not match the expected ones.
|
||||||
|
// The test cases cover both scenarios to ensure the correctness of the method.
|
||||||
|
func TestBioSequenceAnnotations(t *testing.T) {
|
||||||
|
s := &BioSequence{}
|
||||||
|
|
||||||
|
// Test case 1: Annotations is nil
|
||||||
|
s.annotations = nil
|
||||||
|
expected := GetAnnotation()
|
||||||
|
actual := s.Annotations()
|
||||||
|
if !reflect.DeepEqual(expected, actual) {
|
||||||
|
t.Errorf("Test case 1 failed: Expected %v, but got %v", expected, actual)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test case 2: Annotations is not nil
|
||||||
|
s.annotations = Annotation{}
|
||||||
|
expected = s.annotations
|
||||||
|
actual = s.Annotations()
|
||||||
|
if !reflect.DeepEqual(expected, actual) {
|
||||||
|
t.Errorf("Test case 2 failed: Expected %v, but got %v", expected, actual)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAnnotationsLock(t *testing.T) {
|
||||||
|
// Test case 1: Lock the annotation of an empty BioSequence
|
||||||
|
seq := NewEmptyBioSequence(0)
|
||||||
|
seq.AnnotationsLock()
|
||||||
|
|
||||||
|
// Test case 2: Lock the annotation of a BioSequence with existing annotations
|
||||||
|
seq2 := NewEmptyBioSequence(0)
|
||||||
|
seq2.annotations = map[string]interface{}{
|
||||||
|
"key1": "value1",
|
||||||
|
"key2": "value2",
|
||||||
|
}
|
||||||
|
seq2.AnnotationsLock()
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestBioSequence_MD5 tests the MD5 function of the BioSequence struct.
|
||||||
|
//
|
||||||
|
// It includes two test cases: one for an empty sequence and one for a non-empty sequence.
|
||||||
|
// Each test case creates a BioSequence instance with a specific sequence and compares the MD5 result with the expected value.
|
||||||
|
// If the result does not match the expected value, an error is reported using the t.Errorf function.
|
||||||
|
// The expected MD5 values are hardcoded in the test cases.
|
||||||
|
func TestBioSequence_MD5(t *testing.T) {
|
||||||
|
// Test case 1: Empty sequence
|
||||||
|
{
|
||||||
|
s := &BioSequence{sequence: []byte("")}
|
||||||
|
expected := [16]byte{
|
||||||
|
0xd4, 0x1d, 0x8c, 0xd9, 0x8f, 0x00, 0xb2, 0x04,
|
||||||
|
0xe9, 0x80, 0x09, 0x98, 0xec, 0xf8, 0x42, 0x7e,
|
||||||
|
}
|
||||||
|
result := s.MD5()
|
||||||
|
if result != expected {
|
||||||
|
t.Errorf("Test case 1 failed. Expected: %v, got: %v", expected, result)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test case 2: Non-empty sequence
|
||||||
|
{
|
||||||
|
s := &BioSequence{sequence: []byte("ACGT")}
|
||||||
|
expected := [16]byte{
|
||||||
|
0xf1, 0xf8, 0xf4, 0xbf, 0x41, 0x3b, 0x16, 0xad,
|
||||||
|
0x13, 0x57, 0x22, 0xaa, 0x45, 0x91, 0x04, 0x3e,
|
||||||
|
}
|
||||||
|
result := s.MD5()
|
||||||
|
if result != expected {
|
||||||
|
t.Errorf("Test case 2 failed. Expected: %v, got: %v", expected, result)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// TestBioSequence_Composition tests the Composition method of the BioSequence struct.
|
// TestBioSequence_Composition tests the Composition method of the BioSequence struct.
|
||||||
//
|
//
|
||||||
// It tests the method with three different test cases:
|
// It tests the method with three different test cases:
|
||||||
|
@ -1,8 +1,10 @@
|
|||||||
package obiseq
|
package obiseq
|
||||||
|
|
||||||
import (
|
import (
|
||||||
log "github.com/sirupsen/logrus"
|
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
"golang.org/x/exp/slices"
|
||||||
)
|
)
|
||||||
|
|
||||||
// BioSequenceSlice represents a collection or a set of BioSequence.
|
// BioSequenceSlice represents a collection or a set of BioSequence.
|
||||||
@ -18,7 +20,12 @@ var _BioSequenceSlicePool = sync.Pool{
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
// > This function returns a pointer to a new `BioSequenceSlice` object
|
// NewBioSequenceSlice returns a new BioSequenceSlice with the specified size.
|
||||||
|
//
|
||||||
|
// The size parameter is optional. If provided, the returned slice will be
|
||||||
|
// resized accordingly.
|
||||||
|
//
|
||||||
|
// Returns a pointer to the newly created BioSequenceSlice.
|
||||||
func NewBioSequenceSlice(size ...int) *BioSequenceSlice {
|
func NewBioSequenceSlice(size ...int) *BioSequenceSlice {
|
||||||
slice := _BioSequenceSlicePool.Get().(*BioSequenceSlice)
|
slice := _BioSequenceSlicePool.Get().(*BioSequenceSlice)
|
||||||
if len(size) > 0 {
|
if len(size) > 0 {
|
||||||
@ -29,11 +36,23 @@ func NewBioSequenceSlice(size ...int) *BioSequenceSlice {
|
|||||||
return slice
|
return slice
|
||||||
}
|
}
|
||||||
|
|
||||||
// `MakeBioSequenceSlice()` returns a pointer to a new `BioSequenceSlice` struct
|
// MakeBioSequenceSlice creates a new BioSequenceSlice with the specified size(s).
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - size: The size(s) of the BioSequenceSlice to create (optional).
|
||||||
|
//
|
||||||
|
// Return:
|
||||||
|
// A new BioSequenceSlice with the specified size(s).
|
||||||
func MakeBioSequenceSlice(size ...int) BioSequenceSlice {
|
func MakeBioSequenceSlice(size ...int) BioSequenceSlice {
|
||||||
return *NewBioSequenceSlice(size...)
|
return *NewBioSequenceSlice(size...)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Recycle cleans up the BioSequenceSlice by recycling its elements and resetting its length.
|
||||||
|
//
|
||||||
|
// If including_seq is true, each element of the BioSequenceSlice is recycled using the Recycle method,
|
||||||
|
// and then set to nil. If including_seq is false, each element is simply set to nil.
|
||||||
|
//
|
||||||
|
// The function does not return anything.
|
||||||
func (s *BioSequenceSlice) Recycle(including_seq bool) {
|
func (s *BioSequenceSlice) Recycle(including_seq bool) {
|
||||||
if s == nil {
|
if s == nil {
|
||||||
log.Panicln("Trying too recycle a nil pointer")
|
log.Panicln("Trying too recycle a nil pointer")
|
||||||
@ -56,8 +75,10 @@ func (s *BioSequenceSlice) Recycle(including_seq bool) {
|
|||||||
_BioSequenceSlicePool.Put(s)
|
_BioSequenceSlicePool.Put(s)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Making sure that the slice has enough capacity to hold the number of elements that are being added
|
// InsureCapacity ensures that the BioSequenceSlice has a minimum capacity
|
||||||
// to it.
|
//
|
||||||
|
// It takes an integer `capacity` as a parameter, which represents the desired minimum capacity of the BioSequenceSlice.
|
||||||
|
// It returns a pointer to the BioSequenceSlice.
|
||||||
func (s *BioSequenceSlice) InsureCapacity(capacity int) *BioSequenceSlice {
|
func (s *BioSequenceSlice) InsureCapacity(capacity int) *BioSequenceSlice {
|
||||||
var c int
|
var c int
|
||||||
if s != nil {
|
if s != nil {
|
||||||
@ -66,36 +87,87 @@ func (s *BioSequenceSlice) InsureCapacity(capacity int) *BioSequenceSlice {
|
|||||||
c = 0
|
c = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
if c < capacity {
|
*s = slices.Grow[BioSequenceSlice](*s, capacity-c)
|
||||||
sl := make(BioSequenceSlice, 0,capacity)
|
|
||||||
s = &sl
|
|
||||||
}
|
|
||||||
|
|
||||||
return s
|
return s
|
||||||
}
|
}
|
||||||
|
|
||||||
// Appending the sequence to the slice.
|
// Push appends a BioSequence to the BioSequenceSlice.
|
||||||
|
//
|
||||||
|
// It takes a pointer to a BioSequenceSlice and a BioSequence as parameters.
|
||||||
|
// It does not return anything.
|
||||||
func (s *BioSequenceSlice) Push(sequence *BioSequence) {
|
func (s *BioSequenceSlice) Push(sequence *BioSequence) {
|
||||||
*s = append(*s, sequence)
|
*s = append(*s, sequence)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returning the last element of the slice and removing it from the slice.
|
// Pop returns and removes the last element from the BioSequenceSlice.
|
||||||
|
//
|
||||||
|
// It does not take any parameters.
|
||||||
|
// It returns *BioSequence, the last element of the slice.
|
||||||
func (s *BioSequenceSlice) Pop() *BioSequence {
|
func (s *BioSequenceSlice) Pop() *BioSequence {
|
||||||
_s := (*s)[len(*s)-1]
|
// Get the length of the slice
|
||||||
(*s)[len(*s)-1] = nil
|
length := len(*s)
|
||||||
*s = (*s)[:len(*s)-1]
|
|
||||||
return _s
|
// If the slice is empty, return nil
|
||||||
|
if length == 0 {
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returning the first element of the slice and removing it from the slice.
|
// Get the last element of the slice
|
||||||
|
lastElement := (*s)[length-1]
|
||||||
|
|
||||||
|
// Set the last element to nil
|
||||||
|
(*s)[length-1] = nil
|
||||||
|
|
||||||
|
// Remove the last element from the slice
|
||||||
|
*s = (*s)[:length-1]
|
||||||
|
|
||||||
|
// Return the last element
|
||||||
|
return lastElement
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pop0 returns and removes the first element of the BioSequenceSlice.
|
||||||
|
//
|
||||||
|
// It does not take any parameters.
|
||||||
|
// It returns a pointer to a BioSequence object.
|
||||||
func (s *BioSequenceSlice) Pop0() *BioSequence {
|
func (s *BioSequenceSlice) Pop0() *BioSequence {
|
||||||
_s := (*s)[0]
|
if len(*s) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
firstElement := (*s)[0]
|
||||||
(*s)[0] = nil
|
(*s)[0] = nil
|
||||||
*s = (*s)[1:]
|
*s = (*s)[1:]
|
||||||
return _s
|
return firstElement
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test that a slice of sequences contains at least a sequence.
|
// NotEmpty checks if the BioSequenceSlice is not empty.
|
||||||
|
//
|
||||||
|
// No parameters.
|
||||||
|
// Returns a boolean value indicating if the BioSequenceSlice is not empty.
|
||||||
func (s BioSequenceSlice) NotEmpty() bool {
|
func (s BioSequenceSlice) NotEmpty() bool {
|
||||||
return len(s) > 0
|
return len(s) > 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Len returns the length of the BioSequenceSlice.
|
||||||
|
//
|
||||||
|
// It has no parameters.
|
||||||
|
// It returns an integer.
|
||||||
|
func (s BioSequenceSlice) Len() int {
|
||||||
|
return len(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Size returns the total size of the BioSequenceSlice.
|
||||||
|
//
|
||||||
|
// It calculates the size by iterating over each BioSequence in the slice
|
||||||
|
// and summing up their lengths.
|
||||||
|
//
|
||||||
|
// Returns an integer representing the total size of the BioSequenceSlice.
|
||||||
|
func (s BioSequenceSlice) Size() int {
|
||||||
|
size := 0
|
||||||
|
|
||||||
|
for _, s := range s {
|
||||||
|
size += s.Len()
|
||||||
|
}
|
||||||
|
|
||||||
|
return size
|
||||||
|
}
|
||||||
|
Reference in New Issue
Block a user