mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Patch rev complement and first implementation of --auto in obicsv
Former-commit-id: f3020e81283b1073c4d1c2d2ff0887e3998e6764
This commit is contained in:
@ -2,6 +2,10 @@
|
|||||||
|
|
||||||
## Latest changes
|
## Latest changes
|
||||||
|
|
||||||
|
### New feature
|
||||||
|
|
||||||
|
- In the obitools language a new `gc` computes the gc fraction of a sequence.
|
||||||
|
|
||||||
### Enhancement
|
### Enhancement
|
||||||
|
|
||||||
- A new completely rewritten GO version of the fastq and fasta parser is now used instead of the original C version.
|
- A new completely rewritten GO version of the fastq and fasta parser is now used instead of the original C version.
|
||||||
@ -13,6 +17,17 @@
|
|||||||
+ If -D is set to 0, the output sequence is the barcode with the priming sites.
|
+ If -D is set to 0, the output sequence is the barcode with the priming sites.
|
||||||
+ When -D is set to ### (where ### is an integer), the output sequence is the barcode with the priming sites.
|
+ When -D is set to ### (where ### is an integer), the output sequence is the barcode with the priming sites.
|
||||||
and ### base pairs of flanking sequences.
|
and ### base pairs of flanking sequences.
|
||||||
|
|
||||||
|
### Bugs
|
||||||
|
|
||||||
|
- in the obitools language, the `composition` function now returns a map indexded by lowercase string "a", "c", "g", "t" and "o" for other instead of being indexed by the ascii codes of the corresponding letters.
|
||||||
|
- Correction of the reverse-complement operation. Every reverse complement of the DNA sequence follow now the following rules :
|
||||||
|
+ Nucleotides code are complemented to their lower complementary base
|
||||||
|
+ `.` and `-` characters are returned without change
|
||||||
|
+ `[` is complemented to `]` and oppositely
|
||||||
|
+ all other characters are complemented as `n`
|
||||||
|
|
||||||
|
|
||||||
### Becareful
|
### Becareful
|
||||||
|
|
||||||
GO 1.21.0 is out, and it includes new functionalities which are used in the OBITools4 code.
|
GO 1.21.0 is out, and it includes new functionalities which are used in the OBITools4 code.
|
||||||
|
19
go.mod
19
go.mod
@ -3,7 +3,7 @@ module git.metabarcoding.org/lecasofts/go/obitools
|
|||||||
go 1.20
|
go 1.20
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/DavidGamba/go-getoptions v0.27.0
|
github.com/DavidGamba/go-getoptions v0.28.0
|
||||||
github.com/PaesslerAG/gval v1.2.2
|
github.com/PaesslerAG/gval v1.2.2
|
||||||
github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df
|
github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df
|
||||||
github.com/chen3feng/stl4go v0.1.1
|
github.com/chen3feng/stl4go v0.1.1
|
||||||
@ -15,12 +15,13 @@ require (
|
|||||||
github.com/sirupsen/logrus v1.9.3
|
github.com/sirupsen/logrus v1.9.3
|
||||||
github.com/stretchr/testify v1.7.0
|
github.com/stretchr/testify v1.7.0
|
||||||
github.com/tevino/abool/v2 v2.1.0
|
github.com/tevino/abool/v2 v2.1.0
|
||||||
golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63
|
golang.org/x/exp v0.0.0-20231006140011-7918f672742d
|
||||||
gonum.org/v1/gonum v0.14.0
|
gonum.org/v1/gonum v0.14.0
|
||||||
scientificgo.org/special v0.0.0
|
scientificgo.org/special v0.0.0
|
||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
|
github.com/deckarep/golang-set/v2 v2.3.1 // indirect
|
||||||
github.com/kr/pretty v0.2.1 // indirect
|
github.com/kr/pretty v0.2.1 // indirect
|
||||||
github.com/kr/text v0.1.0 // indirect
|
github.com/kr/text v0.1.0 // indirect
|
||||||
)
|
)
|
||||||
@ -28,8 +29,8 @@ require (
|
|||||||
require (
|
require (
|
||||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||||
github.com/dsnet/compress v0.0.1
|
github.com/dsnet/compress v0.0.1
|
||||||
github.com/gabriel-vasile/mimetype v1.4.2 // indirect
|
github.com/gabriel-vasile/mimetype v1.4.3
|
||||||
github.com/klauspost/compress v1.16.7 // indirect
|
github.com/klauspost/compress v1.17.2 // indirect
|
||||||
github.com/mattn/go-runewidth v0.0.15 // indirect
|
github.com/mattn/go-runewidth v0.0.15 // indirect
|
||||||
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
|
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
|
||||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||||
@ -37,11 +38,11 @@ require (
|
|||||||
github.com/shopspring/decimal v1.3.1 // indirect
|
github.com/shopspring/decimal v1.3.1 // indirect
|
||||||
github.com/ulikunitz/xz v0.5.11
|
github.com/ulikunitz/xz v0.5.11
|
||||||
github.com/yuin/goldmark v1.4.13 // indirect
|
github.com/yuin/goldmark v1.4.13 // indirect
|
||||||
golang.org/x/mod v0.12.0 // indirect
|
golang.org/x/mod v0.13.0 // indirect
|
||||||
golang.org/x/net v0.14.0 // indirect
|
golang.org/x/net v0.17.0 // indirect
|
||||||
golang.org/x/sys v0.11.0 // indirect
|
golang.org/x/sys v0.13.0 // indirect
|
||||||
golang.org/x/term v0.11.0 // indirect
|
golang.org/x/term v0.13.0 // indirect
|
||||||
golang.org/x/tools v0.12.1-0.20230815132531-74c255bcf846 // indirect
|
golang.org/x/tools v0.14.0 // indirect
|
||||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c
|
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c
|
||||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c // indirect
|
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c // indirect
|
||||||
)
|
)
|
||||||
|
20
go.sum
20
go.sum
@ -1,5 +1,7 @@
|
|||||||
github.com/DavidGamba/go-getoptions v0.27.0 h1:hldKJSwO9SwvR+z9pe6ojhEcYECrRiO/bar9B7MnBKA=
|
github.com/DavidGamba/go-getoptions v0.27.0 h1:hldKJSwO9SwvR+z9pe6ojhEcYECrRiO/bar9B7MnBKA=
|
||||||
github.com/DavidGamba/go-getoptions v0.27.0/go.mod h1:qLaLSYeQ8sUVOfKuu5JT5qKKS3OCwyhkYSJnoG+ggmo=
|
github.com/DavidGamba/go-getoptions v0.27.0/go.mod h1:qLaLSYeQ8sUVOfKuu5JT5qKKS3OCwyhkYSJnoG+ggmo=
|
||||||
|
github.com/DavidGamba/go-getoptions v0.28.0 h1:18wgEvfZdrlfIhVDGEBO3Dl0fkOyXqXLa0tLMCKxM1c=
|
||||||
|
github.com/DavidGamba/go-getoptions v0.28.0/go.mod h1:zE97E3PR9P3BI/HKyNYgdMlYxodcuiC6W68KIgeYT84=
|
||||||
github.com/PaesslerAG/gval v1.2.2 h1:Y7iBzhgE09IGTt5QgGQ2IdaYYYOU134YGHBThD+wm9E=
|
github.com/PaesslerAG/gval v1.2.2 h1:Y7iBzhgE09IGTt5QgGQ2IdaYYYOU134YGHBThD+wm9E=
|
||||||
github.com/PaesslerAG/gval v1.2.2/go.mod h1:XRFLwvmkTEdYziLdaCeCa5ImcGVrfQbeNUbVR+C6xac=
|
github.com/PaesslerAG/gval v1.2.2/go.mod h1:XRFLwvmkTEdYziLdaCeCa5ImcGVrfQbeNUbVR+C6xac=
|
||||||
github.com/PaesslerAG/jsonpath v0.1.0 h1:gADYeifvlqK3R3i2cR5B4DGgxLXIPb3TRTH1mGi0jPI=
|
github.com/PaesslerAG/jsonpath v0.1.0 h1:gADYeifvlqK3R3i2cR5B4DGgxLXIPb3TRTH1mGi0jPI=
|
||||||
@ -13,17 +15,23 @@ github.com/daichi-m/go18ds v1.12.1/go.mod h1:wc2dURUr8aMxxC4Mn5ObJGVM7uIKU8JagY4
|
|||||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
|
github.com/deckarep/golang-set/v2 v2.3.1 h1:vjmkvJt/IV27WXPyYQpAh4bRyWJc5Y435D17XQ9QU5A=
|
||||||
|
github.com/deckarep/golang-set/v2 v2.3.1/go.mod h1:VAky9rY/yGXJOLEDv3OMci+7wtDpOF4IN+y82NBOac4=
|
||||||
github.com/dsnet/compress v0.0.1 h1:PlZu0n3Tuv04TzpfPbrnI0HW/YwodEXDS+oPKahKF0Q=
|
github.com/dsnet/compress v0.0.1 h1:PlZu0n3Tuv04TzpfPbrnI0HW/YwodEXDS+oPKahKF0Q=
|
||||||
github.com/dsnet/compress v0.0.1/go.mod h1:Aw8dCMJ7RioblQeTqt88akK31OvO8Dhf5JflhBbQEHo=
|
github.com/dsnet/compress v0.0.1/go.mod h1:Aw8dCMJ7RioblQeTqt88akK31OvO8Dhf5JflhBbQEHo=
|
||||||
github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
|
github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
|
||||||
github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU=
|
github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU=
|
||||||
github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA=
|
github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA=
|
||||||
|
github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
|
||||||
|
github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk=
|
||||||
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
|
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
|
||||||
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
|
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
|
||||||
github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw=
|
github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw=
|
||||||
github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
|
github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
|
||||||
github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I=
|
github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I=
|
||||||
github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
|
github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
|
||||||
|
github.com/klauspost/compress v1.17.2 h1:RlWWUY/Dr4fL8qk9YG7DTZ7PDgME2V4csBXA8L/ixi4=
|
||||||
|
github.com/klauspost/compress v1.17.2/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
|
||||||
github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
|
github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
|
||||||
github.com/klauspost/pgzip v1.2.6 h1:8RXeL5crjEUFnR2/Sn6GJNWtSQ3Dk8pq4CL3jvdDyjU=
|
github.com/klauspost/pgzip v1.2.6 h1:8RXeL5crjEUFnR2/Sn6GJNWtSQ3Dk8pq4CL3jvdDyjU=
|
||||||
github.com/klauspost/pgzip v1.2.6/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
|
github.com/klauspost/pgzip v1.2.6/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
|
||||||
@ -64,20 +72,32 @@ github.com/yuin/goldmark v1.4.13 h1:fVcFKWvrslecOb/tg+Cc05dkeYx540o0FuFt3nUVDoE=
|
|||||||
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
|
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
|
||||||
golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63 h1:m64FZMko/V45gv0bNmrNYoDEq8U5YUhetc9cBWKS1TQ=
|
golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63 h1:m64FZMko/V45gv0bNmrNYoDEq8U5YUhetc9cBWKS1TQ=
|
||||||
golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63/go.mod h1:0v4NqG35kSWCMzLaMeX+IQrlSnVE/bqGSyC2cz/9Le8=
|
golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63/go.mod h1:0v4NqG35kSWCMzLaMeX+IQrlSnVE/bqGSyC2cz/9Le8=
|
||||||
|
golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI=
|
||||||
|
golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo=
|
||||||
golang.org/x/mod v0.12.0 h1:rmsUpXtvNzj340zd98LZ4KntptpfRHwpFOHG188oHXc=
|
golang.org/x/mod v0.12.0 h1:rmsUpXtvNzj340zd98LZ4KntptpfRHwpFOHG188oHXc=
|
||||||
golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
|
golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
|
||||||
|
golang.org/x/mod v0.13.0 h1:I/DsJXRlw/8l/0c24sM9yb0T4z9liZTduXvdAWYiysY=
|
||||||
|
golang.org/x/mod v0.13.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
|
||||||
golang.org/x/net v0.14.0 h1:BONx9s002vGdD9umnlX1Po8vOZmrgH34qlHcD1MfK14=
|
golang.org/x/net v0.14.0 h1:BONx9s002vGdD9umnlX1Po8vOZmrgH34qlHcD1MfK14=
|
||||||
golang.org/x/net v0.14.0/go.mod h1:PpSgVXXLK0OxS0F31C1/tv6XNguvCrnXIDrFMspZIUI=
|
golang.org/x/net v0.14.0/go.mod h1:PpSgVXXLK0OxS0F31C1/tv6XNguvCrnXIDrFMspZIUI=
|
||||||
|
golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
|
||||||
|
golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
|
||||||
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.11.0 h1:eG7RXZHdqOJ1i+0lgLgCpSXAp6M3LYlAo6osgSi0xOM=
|
golang.org/x/sys v0.11.0 h1:eG7RXZHdqOJ1i+0lgLgCpSXAp6M3LYlAo6osgSi0xOM=
|
||||||
golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE=
|
||||||
|
golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U=
|
golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U=
|
||||||
golang.org/x/term v0.11.0 h1:F9tnn/DA/Im8nCwm+fX+1/eBwi4qFjRT++MhtVC4ZX0=
|
golang.org/x/term v0.11.0 h1:F9tnn/DA/Im8nCwm+fX+1/eBwi4qFjRT++MhtVC4ZX0=
|
||||||
golang.org/x/term v0.11.0/go.mod h1:zC9APTIj3jG3FdV/Ons+XE1riIZXG4aZ4GTHiPZJPIU=
|
golang.org/x/term v0.11.0/go.mod h1:zC9APTIj3jG3FdV/Ons+XE1riIZXG4aZ4GTHiPZJPIU=
|
||||||
|
golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek=
|
||||||
|
golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U=
|
||||||
golang.org/x/tools v0.12.1-0.20230815132531-74c255bcf846 h1:Vve/L0v7CXXuxUmaMGIEK/dEeq7uiqb5qBgQrZzIE7E=
|
golang.org/x/tools v0.12.1-0.20230815132531-74c255bcf846 h1:Vve/L0v7CXXuxUmaMGIEK/dEeq7uiqb5qBgQrZzIE7E=
|
||||||
golang.org/x/tools v0.12.1-0.20230815132531-74c255bcf846/go.mod h1:Sc0INKfu04TlqNoRA1hgpFZbhYXHPr4V5DzpSBTPqQM=
|
golang.org/x/tools v0.12.1-0.20230815132531-74c255bcf846/go.mod h1:Sc0INKfu04TlqNoRA1hgpFZbhYXHPr4V5DzpSBTPqQM=
|
||||||
|
golang.org/x/tools v0.14.0 h1:jvNa2pY0M4r62jkRQ6RwEZZyPcymeL9XZMLBbV7U2nc=
|
||||||
|
golang.org/x/tools v0.14.0/go.mod h1:uYBEerGOWcJyEORxN+Ek8+TT266gXkNlHdJBwexUsBg=
|
||||||
gonum.org/v1/gonum v0.14.0 h1:2NiG67LD1tEH0D7kM+ps2V+fXmsAnpUeec7n8tcr4S0=
|
gonum.org/v1/gonum v0.14.0 h1:2NiG67LD1tEH0D7kM+ps2V+fXmsAnpUeec7n8tcr4S0=
|
||||||
gonum.org/v1/gonum v0.14.0/go.mod h1:AoWeoz0becf9QMWtE8iWXNXc27fK4fNeHNf/oMejGfU=
|
gonum.org/v1/gonum v0.14.0/go.mod h1:AoWeoz0becf9QMWtE8iWXNXc27fK4fNeHNf/oMejGfU=
|
||||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||||
|
@ -10,6 +10,7 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils"
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
@ -65,7 +66,7 @@ func CSVRecord(sequence *obiseq.BioSequence, opt Options) []string {
|
|||||||
l := sequence.Len()
|
l := sequence.Len()
|
||||||
q := sequence.Qualities()
|
q := sequence.Qualities()
|
||||||
ascii := make([]byte, l)
|
ascii := make([]byte, l)
|
||||||
quality_shift := opt.QualityShift()
|
quality_shift := obioptions.OutputQualityShift()
|
||||||
for j := 0; j < l; j++ {
|
for j := 0; j < l; j++ {
|
||||||
ascii[j] = uint8(q[j]) + uint8(quality_shift)
|
ascii[j] = uint8(q[j]) + uint8(quality_shift)
|
||||||
}
|
}
|
||||||
@ -130,6 +131,8 @@ func FormatCVSBatch(batch obiiter.BioSequenceBatch, opt Options) []byte {
|
|||||||
func WriteCSV(iterator obiiter.IBioSequence,
|
func WriteCSV(iterator obiiter.IBioSequence,
|
||||||
file io.WriteCloser,
|
file io.WriteCloser,
|
||||||
options ...WithOption) (obiiter.IBioSequence, error) {
|
options ...WithOption) (obiiter.IBioSequence, error) {
|
||||||
|
|
||||||
|
var auto_slot obiutils.Set[string]
|
||||||
opt := MakeOptions(options)
|
opt := MakeOptions(options)
|
||||||
|
|
||||||
file, _ = obiutils.CompressStream(file, opt.CompressedFile(), opt.CloseFile())
|
file, _ = obiutils.CompressStream(file, opt.CompressedFile(), opt.CloseFile())
|
||||||
@ -167,12 +170,6 @@ func WriteCSV(iterator obiiter.IBioSequence,
|
|||||||
newIter.Done()
|
newIter.Done()
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Debugln("Start of the CSV file writing")
|
|
||||||
go ff(iterator)
|
|
||||||
for i := 0; i < nwriters-1; i++ {
|
|
||||||
go ff(iterator.Split())
|
|
||||||
}
|
|
||||||
|
|
||||||
next_to_send := 0
|
next_to_send := 0
|
||||||
received := make(map[int]FileChunck, 100)
|
received := make(map[int]FileChunck, 100)
|
||||||
|
|
||||||
@ -203,6 +200,25 @@ func WriteCSV(iterator obiiter.IBioSequence,
|
|||||||
|
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
if opt.pointer.csv_auto {
|
||||||
|
if iterator.Next() {
|
||||||
|
batch := iterator.Get()
|
||||||
|
auto_slot = batch.Slice().AttributeKeys(true)
|
||||||
|
CSVKeys(auto_slot.Members())(opt)
|
||||||
|
chunkchan <- FileChunck{
|
||||||
|
FormatCVSBatch(batch, opt),
|
||||||
|
batch.Order(),
|
||||||
|
}
|
||||||
|
newIter.Push(batch)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Debugln("Start of the CSV file writing")
|
||||||
|
go ff(iterator)
|
||||||
|
for i := 0; i < nwriters-1; i++ {
|
||||||
|
go ff(iterator.Split())
|
||||||
|
}
|
||||||
|
|
||||||
return newIter, nil
|
return newIter, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -234,7 +234,7 @@ func ParseFastaChunk(source string, ch FastxChunk) *obiiter.BioSequenceBatch {
|
|||||||
C = C + 'a' - 'A'
|
C = C + 'a' - 'A'
|
||||||
}
|
}
|
||||||
// Removing white space from the sequence
|
// Removing white space from the sequence
|
||||||
if (C >= 'a' && C <= 'z') || C == '-' || C == '.' {
|
if (C >= 'a' && C <= 'z') || C == '-' || C == '.' || C == '[' || C == ']' {
|
||||||
ch.Bytes[current] = C
|
ch.Bytes[current] = C
|
||||||
current++
|
current++
|
||||||
}
|
}
|
||||||
|
@ -329,7 +329,7 @@ func ReadFastq(reader io.Reader, options ...WithOption) (obiiter.IBioSequence, e
|
|||||||
parser := func() {
|
parser := func() {
|
||||||
defer out.Done()
|
defer out.Done()
|
||||||
for chk := range chkchan {
|
for chk := range chkchan {
|
||||||
seqs := ParseFastqChunk(source, chk, byte(opt.QualityShift()))
|
seqs := ParseFastqChunk(source, chk, byte(obioptions.InputQualityShift()))
|
||||||
if seqs != nil {
|
if seqs != nil {
|
||||||
out.Push(*seqs)
|
out.Push(*seqs)
|
||||||
} else {
|
} else {
|
||||||
|
@ -15,6 +15,7 @@ import (
|
|||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils"
|
||||||
)
|
)
|
||||||
@ -91,7 +92,7 @@ func ReadFastSeqFromFile(filename string, options ...WithOption) (obiiter.IBioSe
|
|||||||
name := C.CString(filename)
|
name := C.CString(filename)
|
||||||
defer C.free(unsafe.Pointer(name))
|
defer C.free(unsafe.Pointer(name))
|
||||||
|
|
||||||
pointer := C.open_fast_sek_file(name, C.int32_t(opt.QualityShift()))
|
pointer := C.open_fast_sek_file(name, C.int32_t(obioptions.InputQualityShift()))
|
||||||
|
|
||||||
var err error
|
var err error
|
||||||
err = nil
|
err = nil
|
||||||
@ -150,7 +151,7 @@ func ReadFastSeqFromStdin(options ...WithOption) obiiter.IBioSequence {
|
|||||||
}(newIter)
|
}(newIter)
|
||||||
|
|
||||||
go _FastseqReader(opt.Source(),
|
go _FastseqReader(opt.Source(),
|
||||||
C.open_fast_sek_stdin(C.int32_t(opt.QualityShift())),
|
C.open_fast_sek_stdin(C.int32_t(obioptions.InputQualityShift())),
|
||||||
newIter, opt.BatchSize())
|
newIter, opt.BatchSize())
|
||||||
|
|
||||||
log.Debugln("Full file batch mode : ", opt.FullFileBatch())
|
log.Debugln("Full file batch mode : ", opt.FullFileBatch())
|
||||||
|
@ -11,21 +11,16 @@ import (
|
|||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils"
|
||||||
)
|
)
|
||||||
|
|
||||||
// The function FormatFastq takes a BioSequence object, a quality shift value, and a header formatter
|
// The function FormatFastq takes a BioSequence object, a quality shift value, and a header formatter
|
||||||
// function as input, and returns a formatted string in FASTQ format.
|
// function as input, and returns a formatted string in FASTQ format.
|
||||||
func FormatFastq(seq *obiseq.BioSequence, quality_shift int, formater FormatHeader) string {
|
func FormatFastq(seq *obiseq.BioSequence, formater FormatHeader) string {
|
||||||
|
|
||||||
l := seq.Len()
|
q := seq.QualitiesString()
|
||||||
q := seq.Qualities()
|
|
||||||
ascii := make([]byte, seq.Len())
|
|
||||||
|
|
||||||
for j := 0; j < l; j++ {
|
|
||||||
ascii[j] = uint8(q[j]) + uint8(quality_shift)
|
|
||||||
}
|
|
||||||
|
|
||||||
info := ""
|
info := ""
|
||||||
if formater != nil {
|
if formater != nil {
|
||||||
@ -34,8 +29,8 @@ func FormatFastq(seq *obiseq.BioSequence, quality_shift int, formater FormatHead
|
|||||||
|
|
||||||
return fmt.Sprintf("@%s %s\n%s\n+\n%s",
|
return fmt.Sprintf("@%s %s\n%s\n+\n%s",
|
||||||
seq.Id(), info,
|
seq.Id(), info,
|
||||||
string(seq.Sequence()),
|
seq.String(),
|
||||||
string(ascii),
|
q,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -44,7 +39,7 @@ func FormatFastqBatch(batch obiiter.BioSequenceBatch, quality_shift int,
|
|||||||
var bs bytes.Buffer
|
var bs bytes.Buffer
|
||||||
for _, seq := range batch.Slice() {
|
for _, seq := range batch.Slice() {
|
||||||
if seq.Len() > 0 {
|
if seq.Len() > 0 {
|
||||||
bs.WriteString(FormatFastq(seq, quality_shift, formater))
|
bs.WriteString(FormatFastq(seq, formater))
|
||||||
bs.WriteString("\n")
|
bs.WriteString("\n")
|
||||||
} else {
|
} else {
|
||||||
if skipEmpty {
|
if skipEmpty {
|
||||||
@ -81,7 +76,7 @@ func WriteFastq(iterator obiiter.IBioSequence,
|
|||||||
chunkchan := make(chan FileChunck)
|
chunkchan := make(chan FileChunck)
|
||||||
|
|
||||||
header_format := opt.FormatFastSeqHeader()
|
header_format := opt.FormatFastSeqHeader()
|
||||||
quality := opt.QualityShift()
|
quality := obioptions.OutputQualityShift()
|
||||||
|
|
||||||
newIter.Add(nwriters)
|
newIter.Add(nwriters)
|
||||||
|
|
||||||
|
@ -12,7 +12,6 @@ type __options__ struct {
|
|||||||
buffer_size int
|
buffer_size int
|
||||||
batch_size int
|
batch_size int
|
||||||
full_file_batch bool
|
full_file_batch bool
|
||||||
quality_shift int
|
|
||||||
parallel_workers int
|
parallel_workers int
|
||||||
closefile bool
|
closefile bool
|
||||||
appendfile bool
|
appendfile bool
|
||||||
@ -27,6 +26,7 @@ type __options__ struct {
|
|||||||
csv_keys []string
|
csv_keys []string
|
||||||
csv_separator string
|
csv_separator string
|
||||||
csv_navalue string
|
csv_navalue string
|
||||||
|
csv_auto bool
|
||||||
paired_filename string
|
paired_filename string
|
||||||
source string
|
source string
|
||||||
}
|
}
|
||||||
@ -43,7 +43,6 @@ func MakeOptions(setters []WithOption) Options {
|
|||||||
fastseq_header_writer: FormatFastSeqJsonHeader,
|
fastseq_header_writer: FormatFastSeqJsonHeader,
|
||||||
with_progress_bar: false,
|
with_progress_bar: false,
|
||||||
buffer_size: 2,
|
buffer_size: 2,
|
||||||
quality_shift: 33,
|
|
||||||
parallel_workers: obioptions.CLIReadParallelWorkers(),
|
parallel_workers: obioptions.CLIReadParallelWorkers(),
|
||||||
batch_size: obioptions.CLIBatchSize(),
|
batch_size: obioptions.CLIBatchSize(),
|
||||||
full_file_batch: false,
|
full_file_batch: false,
|
||||||
@ -60,6 +59,7 @@ func MakeOptions(setters []WithOption) Options {
|
|||||||
csv_separator: ",",
|
csv_separator: ",",
|
||||||
csv_navalue: "NA",
|
csv_navalue: "NA",
|
||||||
csv_keys: make([]string, 0),
|
csv_keys: make([]string, 0),
|
||||||
|
csv_auto: false,
|
||||||
paired_filename: "",
|
paired_filename: "",
|
||||||
source: "",
|
source: "",
|
||||||
}
|
}
|
||||||
@ -73,10 +73,6 @@ func MakeOptions(setters []WithOption) Options {
|
|||||||
return opt
|
return opt
|
||||||
}
|
}
|
||||||
|
|
||||||
func (opt Options) QualityShift() int {
|
|
||||||
return opt.pointer.quality_shift
|
|
||||||
}
|
|
||||||
|
|
||||||
func (opt Options) BatchSize() int {
|
func (opt Options) BatchSize() int {
|
||||||
return opt.pointer.batch_size
|
return opt.pointer.batch_size
|
||||||
}
|
}
|
||||||
@ -153,6 +149,10 @@ func (opt Options) CSVNAValue() string {
|
|||||||
return opt.pointer.csv_navalue
|
return opt.pointer.csv_navalue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (opt Options) CSVAutoColumn() bool {
|
||||||
|
return opt.pointer.csv_auto
|
||||||
|
}
|
||||||
|
|
||||||
func (opt Options) HaveToSavePaired() bool {
|
func (opt Options) HaveToSavePaired() bool {
|
||||||
return opt.pointer.paired_filename != ""
|
return opt.pointer.paired_filename != ""
|
||||||
}
|
}
|
||||||
@ -217,31 +217,6 @@ func OptionsNewFile() WithOption {
|
|||||||
return f
|
return f
|
||||||
}
|
}
|
||||||
|
|
||||||
// Allows to specify the ascii code corresponding to
|
|
||||||
// a quality of 0 in fastq encoded quality scores.
|
|
||||||
func OptionsQualityShift(shift int) WithOption {
|
|
||||||
f := WithOption(func(opt Options) {
|
|
||||||
opt.pointer.quality_shift = shift
|
|
||||||
})
|
|
||||||
|
|
||||||
return f
|
|
||||||
}
|
|
||||||
|
|
||||||
// Allows to specify a quality shift of 33, corresponding
|
|
||||||
// to a FastQ file qualities encoded following Sanger
|
|
||||||
// convention. This corresponds to Illumina produced FastQ
|
|
||||||
// files.
|
|
||||||
func OptionsQualitySanger() WithOption {
|
|
||||||
return OptionsQualityShift(33)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Allows to specify a quality shift of 64, corresponding
|
|
||||||
// to a FastQ file qualities encoded following the Solexa
|
|
||||||
// convention.
|
|
||||||
func OptionsQualitySolexa() WithOption {
|
|
||||||
return OptionsQualityShift(64)
|
|
||||||
}
|
|
||||||
|
|
||||||
func OptionsFastSeqHeaderParser(parser obiseq.SeqAnnotator) WithOption {
|
func OptionsFastSeqHeaderParser(parser obiseq.SeqAnnotator) WithOption {
|
||||||
f := WithOption(func(opt Options) {
|
f := WithOption(func(opt Options) {
|
||||||
opt.pointer.fastseq_header_parser = parser
|
opt.pointer.fastseq_header_parser = parser
|
||||||
@ -403,3 +378,11 @@ func CSVNAValue(navalue string) WithOption {
|
|||||||
|
|
||||||
return f
|
return f
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func CSVAutoColumn(auto bool) WithOption {
|
||||||
|
f := WithOption(func(opt Options) {
|
||||||
|
opt.pointer.csv_auto = auto
|
||||||
|
})
|
||||||
|
|
||||||
|
return f
|
||||||
|
}
|
||||||
|
@ -19,6 +19,8 @@ var _ReadWorkerPerCore = 1.0
|
|||||||
var _MaxAllowedCPU = runtime.NumCPU()
|
var _MaxAllowedCPU = runtime.NumCPU()
|
||||||
var _BatchSize = 5000
|
var _BatchSize = 5000
|
||||||
var _Pprof = false
|
var _Pprof = false
|
||||||
|
var _Quality_Shift_Input = 33
|
||||||
|
var _Quality_Shift_Output = 33
|
||||||
|
|
||||||
type ArgumentParser func([]string) (*getoptions.GetOpt, []string)
|
type ArgumentParser func([]string) (*getoptions.GetOpt, []string)
|
||||||
|
|
||||||
@ -43,6 +45,10 @@ func GenerateOptionParser(optionset ...func(*getoptions.GetOpt)) ArgumentParser
|
|||||||
options.GetEnv("OBIBATCHSIZE"),
|
options.GetEnv("OBIBATCHSIZE"),
|
||||||
options.Description("Number of sequence per batch for paralelle processing"))
|
options.Description("Number of sequence per batch for paralelle processing"))
|
||||||
|
|
||||||
|
options.Bool("solexa", false,
|
||||||
|
options.GetEnv("OBISOLEXA"),
|
||||||
|
options.Description("Decodes quality string according to the Solexa specification."))
|
||||||
|
|
||||||
for _, o := range optionset {
|
for _, o := range optionset {
|
||||||
o(options)
|
o(options)
|
||||||
}
|
}
|
||||||
@ -85,6 +91,15 @@ func GenerateOptionParser(optionset ...func(*getoptions.GetOpt)) ArgumentParser
|
|||||||
}
|
}
|
||||||
|
|
||||||
log.Printf("Number of workers set %d", CLIParallelWorkers())
|
log.Printf("Number of workers set %d", CLIParallelWorkers())
|
||||||
|
|
||||||
|
if options.Called("workers") {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if options.Called("solexa") {
|
||||||
|
SetInputQualityShift(64)
|
||||||
|
}
|
||||||
|
|
||||||
return options, remaining
|
return options, remaining
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -144,3 +159,19 @@ func ReadWorkerPerCore() float64 {
|
|||||||
func SetBatchSize(n int) {
|
func SetBatchSize(n int) {
|
||||||
_BatchSize = n
|
_BatchSize = n
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func InputQualityShift() int {
|
||||||
|
return _Quality_Shift_Input
|
||||||
|
}
|
||||||
|
|
||||||
|
func OutputQualityShift() int {
|
||||||
|
return _Quality_Shift_Output
|
||||||
|
}
|
||||||
|
|
||||||
|
func SetInputQualityShift(n int) {
|
||||||
|
_Quality_Shift_Input = n
|
||||||
|
}
|
||||||
|
|
||||||
|
func SetOutputQualityShift(n int) {
|
||||||
|
_Quality_Shift_Output = n
|
||||||
|
}
|
||||||
|
@ -8,6 +8,48 @@ import (
|
|||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// AttributeKeys returns the keys of the attributes in the BioSequence.
|
||||||
|
//
|
||||||
|
// It does not take any parameters.
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
//
|
||||||
|
// []string: The keys of the BioSequence.
|
||||||
|
func (s *BioSequence) AttributeKeys(skip_map bool) obiutils.Set[string] {
|
||||||
|
keys := obiutils.MakeSet[string]()
|
||||||
|
|
||||||
|
for k, v := range s.Annotations() {
|
||||||
|
if !skip_map || !obiutils.IsAMap(v) {
|
||||||
|
keys.Add(k)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return keys
|
||||||
|
}
|
||||||
|
|
||||||
|
// Keys returns the keys of the BioSequence.
|
||||||
|
//
|
||||||
|
// It returns a slice of strings containing the keys of the BioSequence.
|
||||||
|
// The keys include "id", "sequence", "qualities", and the attribute keys
|
||||||
|
// of the BioSequence.
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
//
|
||||||
|
// []string: The keys of the BioSequence.
|
||||||
|
func (s *BioSequence) Keys(skip_map bool) obiutils.Set[string] {
|
||||||
|
keys := s.AttributeKeys(skip_map)
|
||||||
|
keys.Add("id")
|
||||||
|
|
||||||
|
if s.HasSequence() {
|
||||||
|
keys.Add("sequence")
|
||||||
|
}
|
||||||
|
if s.HasQualities() {
|
||||||
|
keys.Add("qualities")
|
||||||
|
}
|
||||||
|
|
||||||
|
return keys
|
||||||
|
}
|
||||||
|
|
||||||
// HasAttribute checks if the BioSequence has the specified attribute.
|
// HasAttribute checks if the BioSequence has the specified attribute.
|
||||||
//
|
//
|
||||||
// Parameters:
|
// Parameters:
|
||||||
@ -16,6 +58,17 @@ import (
|
|||||||
// Returns:
|
// Returns:
|
||||||
// - a boolean indicating whether the BioSequence has the attribute.
|
// - a boolean indicating whether the BioSequence has the attribute.
|
||||||
func (s *BioSequence) HasAttribute(key string) bool {
|
func (s *BioSequence) HasAttribute(key string) bool {
|
||||||
|
if key == "id" {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
if key == "sequence" && s.sequence != nil {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
if key == "qualities" && s.qualities != nil {
|
||||||
|
return true
|
||||||
|
}
|
||||||
ok := s.annotations != nil
|
ok := s.annotations != nil
|
||||||
|
|
||||||
if ok {
|
if ok {
|
||||||
@ -36,6 +89,25 @@ func (s *BioSequence) HasAttribute(key string) bool {
|
|||||||
// - val: The value associated with the given key.
|
// - val: The value associated with the given key.
|
||||||
// - ok: A boolean indicating whether the key exists in the annotations map.
|
// - ok: A boolean indicating whether the key exists in the annotations map.
|
||||||
func (s *BioSequence) GetAttribute(key string) (interface{}, bool) {
|
func (s *BioSequence) GetAttribute(key string) (interface{}, bool) {
|
||||||
|
|
||||||
|
if key == "id" {
|
||||||
|
return s.id, true
|
||||||
|
}
|
||||||
|
|
||||||
|
if key == "sequence" {
|
||||||
|
if s.HasSequence() {
|
||||||
|
return s.String(), true
|
||||||
|
}
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
|
||||||
|
if key == "qualities" {
|
||||||
|
if s.HasQualities() {
|
||||||
|
return s.QualitiesString(), true
|
||||||
|
}
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
|
||||||
var val interface{}
|
var val interface{}
|
||||||
ok := s.annotations != nil
|
ok := s.annotations != nil
|
||||||
|
|
||||||
@ -54,6 +126,17 @@ func (s *BioSequence) GetAttribute(key string) (interface{}, bool) {
|
|||||||
// - key: the key to set the value for.
|
// - key: the key to set the value for.
|
||||||
// - value: the value to set for the given key.
|
// - value: the value to set for the given key.
|
||||||
func (s *BioSequence) SetAttribute(key string, value interface{}) {
|
func (s *BioSequence) SetAttribute(key string, value interface{}) {
|
||||||
|
|
||||||
|
if key == "id" {
|
||||||
|
s.SetId(value.(string))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if key == "sequence" {
|
||||||
|
s.SetSequence(value.([]byte))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
annot := s.Annotations()
|
annot := s.Annotations()
|
||||||
|
|
||||||
defer s.AnnotationsUnlock()
|
defer s.AnnotationsUnlock()
|
||||||
|
@ -15,6 +15,7 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils"
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
@ -55,8 +56,7 @@ type Annotation map[string]interface{}
|
|||||||
// A BioSequence is a sequence of bytes with an identifier, a definition, a sequence, qualities,
|
// A BioSequence is a sequence of bytes with an identifier, a definition, a sequence, qualities,
|
||||||
// features and annotations. It aims to represent a biological sequence
|
// features and annotations. It aims to represent a biological sequence
|
||||||
type BioSequence struct {
|
type BioSequence struct {
|
||||||
id string // The identidier of the sequence (private accessible through the method Id)
|
id string // The identidier of the sequence (private accessible through the method Id)
|
||||||
//definition string // The documentation of the sequence (private accessible through the method Definition)
|
|
||||||
source string // The filename without directory name and extension from where the sequence was read.
|
source string // The filename without directory name and extension from where the sequence was read.
|
||||||
sequence []byte // The sequence itself, it is accessible by the methode Sequence
|
sequence []byte // The sequence itself, it is accessible by the methode Sequence
|
||||||
qualities []byte // The quality scores of the sequence.
|
qualities []byte // The quality scores of the sequence.
|
||||||
@ -188,6 +188,14 @@ func (s *BioSequence) Definition() string {
|
|||||||
return definition
|
return definition
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// HasSequence checks if the BioSequence has a sequence.
|
||||||
|
//
|
||||||
|
// No parameters.
|
||||||
|
// Returns a boolean.
|
||||||
|
func (s *BioSequence) HasSequence() bool {
|
||||||
|
return s.sequence != nil && len(s.sequence) > 0
|
||||||
|
}
|
||||||
|
|
||||||
// Sequence returns the sequence of the BioSequence.
|
// Sequence returns the sequence of the BioSequence.
|
||||||
//
|
//
|
||||||
// Returns:
|
// Returns:
|
||||||
@ -217,7 +225,7 @@ func (s *BioSequence) Len() int {
|
|||||||
// This function does not have any parameters.
|
// This function does not have any parameters.
|
||||||
// It returns a boolean value indicating whether the BioSequence has qualities.
|
// It returns a boolean value indicating whether the BioSequence has qualities.
|
||||||
func (s *BioSequence) HasQualities() bool {
|
func (s *BioSequence) HasQualities() bool {
|
||||||
return len(s.qualities) > 0
|
return s.qualities != nil && len(s.qualities) > 0
|
||||||
}
|
}
|
||||||
|
|
||||||
// Qualities returns the sequence quality scores of the BioSequence.
|
// Qualities returns the sequence quality scores of the BioSequence.
|
||||||
@ -235,6 +243,19 @@ func (s *BioSequence) Qualities() Quality {
|
|||||||
return __make_default_qualities__(len(s.sequence))
|
return __make_default_qualities__(len(s.sequence))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// QualitiesString returns the string representation of the qualities of the BioSequence.
|
||||||
|
//
|
||||||
|
// Returns a string representing the qualities of the BioSequence after applying the shift.
|
||||||
|
func (s *BioSequence) QualitiesString() string {
|
||||||
|
quality_shift := obioptions.OutputQualityShift()
|
||||||
|
qual := s.Qualities()
|
||||||
|
qual_ascii := make([]byte, len(qual))
|
||||||
|
for i := 0; i < len(qual); i++ {
|
||||||
|
qual_ascii[i] = byte(qual[i] + byte(quality_shift))
|
||||||
|
}
|
||||||
|
return string(qual_ascii)
|
||||||
|
}
|
||||||
|
|
||||||
// Features returns the feature string of the BioSequence.
|
// Features returns the feature string of the BioSequence.
|
||||||
//
|
//
|
||||||
// The feature string contains the EMBL/GenBank not parsed feature table
|
// The feature string contains the EMBL/GenBank not parsed feature table
|
||||||
|
@ -3,6 +3,7 @@ package obiseq
|
|||||||
import (
|
import (
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiutils"
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
"golang.org/x/exp/slices"
|
"golang.org/x/exp/slices"
|
||||||
)
|
)
|
||||||
@ -171,3 +172,13 @@ func (s BioSequenceSlice) Size() int {
|
|||||||
|
|
||||||
return size
|
return size
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s BioSequenceSlice) AttributeKeys(skip_map bool) obiutils.Set[string] {
|
||||||
|
keys := obiutils.MakeSet[string]()
|
||||||
|
|
||||||
|
for _, k := range s {
|
||||||
|
keys = keys.Union(k.AttributeKeys(skip_map))
|
||||||
|
}
|
||||||
|
|
||||||
|
return keys
|
||||||
|
}
|
||||||
|
@ -198,6 +198,15 @@ var OBILang = gval.NewLanguage(
|
|||||||
composition := (args[0].(*BioSequence)).Composition()
|
composition := (args[0].(*BioSequence)).Composition()
|
||||||
return float64(composition['g']-composition['c']) / float64(composition['g']+composition['c']), nil
|
return float64(composition['g']-composition['c']) / float64(composition['g']+composition['c']), nil
|
||||||
}),
|
}),
|
||||||
|
gval.Function("gc", func(args ...interface{}) (interface{}, error) {
|
||||||
|
composition := (args[0].(*BioSequence)).Composition()
|
||||||
|
return float64(composition['g']+composition['c']) / float64(args[0].(*BioSequence).Len()), nil
|
||||||
|
}),
|
||||||
gval.Function("composition", func(args ...interface{}) (interface{}, error) {
|
gval.Function("composition", func(args ...interface{}) (interface{}, error) {
|
||||||
return (args[0].(*BioSequence)).Composition(), nil
|
comp := (args[0].(*BioSequence)).Composition()
|
||||||
|
scomp := make(map[string]float64)
|
||||||
|
for k, v := range comp {
|
||||||
|
scomp[string(k)] = float64(v)
|
||||||
|
}
|
||||||
|
return scomp, nil
|
||||||
}))
|
}))
|
||||||
|
@ -1,7 +1,17 @@
|
|||||||
package obiseq
|
package obiseq
|
||||||
|
|
||||||
// ".ABCDEFGHIJKLMNOPQRSTUVWXYZ#![]"
|
// ".ABCDEFGHIJKLMNOPQRSTUVWXYZ#![]"
|
||||||
var _revcmpDNA = []byte(".TVGHEFCDIJMLKNOPQYSAABWXRZ#!][")
|
var _revcmpDNA = []byte(".TVGHNNCDNNMNKNNNNYSAABWNRN]N[NNN")
|
||||||
|
|
||||||
|
func complement(n byte) byte {
|
||||||
|
switch {
|
||||||
|
case n == '.' || n == '-':
|
||||||
|
return n
|
||||||
|
case (n >= 'A' && n <= 'z'):
|
||||||
|
return _revcmpDNA[n&31] | (n & 0x20)
|
||||||
|
}
|
||||||
|
return 'n'
|
||||||
|
}
|
||||||
|
|
||||||
// Reverse complements a DNA sequence.
|
// Reverse complements a DNA sequence.
|
||||||
// If the inplace parametter is true, that operation is done in place.
|
// If the inplace parametter is true, that operation is done in place.
|
||||||
@ -18,8 +28,7 @@ func (sequence *BioSequence) ReverseComplement(inplace bool) *BioSequence {
|
|||||||
// ASCII code & 31 -> builds an index in witch (a|A) is 1
|
// ASCII code & 31 -> builds an index in witch (a|A) is 1
|
||||||
// ASCII code & 0x20 -> Foce lower case
|
// ASCII code & 0x20 -> Foce lower case
|
||||||
|
|
||||||
s[j], s[i] = _revcmpDNA[s[i]&31]|(s[i]&0x20),
|
s[j], s[i] = complement(s[i]), complement(s[j])
|
||||||
_revcmpDNA[s[j]&31]|(s[j]&0x20)
|
|
||||||
j++
|
j++
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -40,8 +49,7 @@ func (sequence *BioSequence) _revcmpMutation() *BioSequence {
|
|||||||
b := []byte(m)
|
b := []byte(m)
|
||||||
|
|
||||||
// Echange and reverse complement symboles
|
// Echange and reverse complement symboles
|
||||||
b[1], b[9] = _revcmpDNA[b[9]&31]|(b[9]&0x20),
|
b[1], b[9] = complement(b[9]), complement(b[1])
|
||||||
_revcmpDNA[b[1]&31]|(b[1]&0x20)
|
|
||||||
|
|
||||||
// Exchange sequencing scores
|
// Exchange sequencing scores
|
||||||
b[3], b[4], b[11], b[12] = b[11], b[12], b[3], b[4]
|
b[3], b[4], b[11], b[12] = b[11], b[12], b[3], b[4]
|
||||||
@ -65,7 +73,6 @@ func (sequence *BioSequence) _revcmpMutation() *BioSequence {
|
|||||||
return sequence
|
return sequence
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
func ReverseComplementWorker(inplace bool) SeqWorker {
|
func ReverseComplementWorker(inplace bool) SeqWorker {
|
||||||
f := func(input *BioSequence) *BioSequence {
|
f := func(input *BioSequence) *BioSequence {
|
||||||
return input.ReverseComplement(inplace)
|
return input.ReverseComplement(inplace)
|
||||||
|
@ -16,13 +16,10 @@ var __input_ecopcr_format__ = false
|
|||||||
var __input_embl_format__ = false
|
var __input_embl_format__ = false
|
||||||
var __input_genbank_format__ = false
|
var __input_genbank_format__ = false
|
||||||
|
|
||||||
var __input_solexa_quality__ = false
|
|
||||||
|
|
||||||
var __output_in_fasta__ = false
|
var __output_in_fasta__ = false
|
||||||
var __output_in_fastq__ = false
|
var __output_in_fastq__ = false
|
||||||
var __output_fastjson_format__ = false
|
var __output_fastjson_format__ = false
|
||||||
var __output_fastobi_format__ = false
|
var __output_fastobi_format__ = false
|
||||||
var __output_solexa_quality__ = false
|
|
||||||
|
|
||||||
var __no_progress_bar__ = false
|
var __no_progress_bar__ = false
|
||||||
var __compressed__ = false
|
var __compressed__ = false
|
||||||
@ -54,9 +51,6 @@ func InputOptionSet(options *getoptions.GetOpt) {
|
|||||||
options.BoolVar(&__input_genbank_format__, "genbank", __input_genbank_format__,
|
options.BoolVar(&__input_genbank_format__, "genbank", __input_genbank_format__,
|
||||||
options.Description("Read data following the Genbank flatfile format."))
|
options.Description("Read data following the Genbank flatfile format."))
|
||||||
|
|
||||||
options.BoolVar(&__input_solexa_quality__, "solexa", __input_solexa_quality__,
|
|
||||||
options.Description("Decodes quality string according to the Solexa specification."))
|
|
||||||
|
|
||||||
options.BoolVar(&__no_ordered_input__, "no-order", __no_ordered_input__,
|
options.BoolVar(&__no_ordered_input__, "no-order", __no_ordered_input__,
|
||||||
options.Description("When several input files are provided, "+
|
options.Description("When several input files are provided, "+
|
||||||
"indicates that there is no order among them."))
|
"indicates that there is no order among them."))
|
||||||
@ -71,7 +65,7 @@ func OutputModeOptionSet(options *getoptions.GetOpt) {
|
|||||||
options.Alias("Z"),
|
options.Alias("Z"),
|
||||||
options.Description("Output is compressed"))
|
options.Description("Output is compressed"))
|
||||||
|
|
||||||
options.BoolVar(&__skip_empty__, "skip-empty", __skip_empty__,
|
options.BoolVar(&__skip_empty__, "skip-empty", __skip_empty__,
|
||||||
options.Description("Sequences of length equal to zero are suppressed from the output"))
|
options.Description("Sequences of length equal to zero are suppressed from the output"))
|
||||||
|
|
||||||
options.StringVar(&__output_file_name__, "out", __output_file_name__,
|
options.StringVar(&__output_file_name__, "out", __output_file_name__,
|
||||||
@ -146,7 +140,7 @@ func CLICompressed() bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func CLISkipEmpty() bool {
|
func CLISkipEmpty() bool {
|
||||||
return __skip_empty__
|
return __skip_empty__
|
||||||
}
|
}
|
||||||
|
|
||||||
func CLIInputFastHeaderFormat() string {
|
func CLIInputFastHeaderFormat() string {
|
||||||
@ -181,22 +175,6 @@ func CLIAnalyzeOnly() int {
|
|||||||
return __read_only_entries__
|
return __read_only_entries__
|
||||||
}
|
}
|
||||||
|
|
||||||
func CLIInputQualityShift() int {
|
|
||||||
if __input_solexa_quality__ {
|
|
||||||
return 64
|
|
||||||
} else {
|
|
||||||
return 33
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func CLIOutputQualityShift() int {
|
|
||||||
if __output_solexa_quality__ {
|
|
||||||
return 64
|
|
||||||
} else {
|
|
||||||
return 33
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func CLIProgressBar() bool {
|
func CLIProgressBar() bool {
|
||||||
return !__no_progress_bar__
|
return !__no_progress_bar__
|
||||||
}
|
}
|
||||||
@ -217,4 +195,4 @@ func SetFullFileBatch() {
|
|||||||
}
|
}
|
||||||
func FullFileBatch() bool {
|
func FullFileBatch() bool {
|
||||||
return __full_file_batch__
|
return __full_file_batch__
|
||||||
}
|
}
|
||||||
|
@ -98,10 +98,8 @@ func CLIReadBioSequences(filenames ...string) (obiiter.IBioSequence, error) {
|
|||||||
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers))
|
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers))
|
||||||
opts = append(opts, obiformats.OptionsBatchSize(obioptions.CLIBatchSize()))
|
opts = append(opts, obiformats.OptionsBatchSize(obioptions.CLIBatchSize()))
|
||||||
|
|
||||||
opts = append(opts, obiformats.OptionsQualityShift(CLIInputQualityShift()))
|
|
||||||
opts = append(opts, obiformats.OptionsFullFileBatch(FullFileBatch()))
|
opts = append(opts, obiformats.OptionsFullFileBatch(FullFileBatch()))
|
||||||
|
|
||||||
|
|
||||||
if len(filenames) == 0 {
|
if len(filenames) == 0 {
|
||||||
log.Printf("Reading sequences from stdin in %s\n", CLIInputFormat())
|
log.Printf("Reading sequences from stdin in %s\n", CLIInputFormat())
|
||||||
opts = append(opts, obiformats.OptionsSource("stdin"))
|
opts = append(opts, obiformats.OptionsSource("stdin"))
|
||||||
|
@ -59,8 +59,6 @@ func CLIWriteBioSequences(iterator obiiter.IBioSequence,
|
|||||||
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers))
|
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers))
|
||||||
opts = append(opts, obiformats.OptionsBatchSize(obioptions.CLIBatchSize()))
|
opts = append(opts, obiformats.OptionsBatchSize(obioptions.CLIBatchSize()))
|
||||||
|
|
||||||
opts = append(opts, obiformats.OptionsQualityShift(CLIOutputQualityShift()))
|
|
||||||
|
|
||||||
opts = append(opts, obiformats.OptionsCompressed(CLICompressed()))
|
opts = append(opts, obiformats.OptionsCompressed(CLICompressed()))
|
||||||
|
|
||||||
var err error
|
var err error
|
||||||
|
@ -27,8 +27,6 @@ func CLIWriteCSV(iterator obiiter.IBioSequence,
|
|||||||
|
|
||||||
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers))
|
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers))
|
||||||
opts = append(opts, obiformats.OptionsBatchSize(obioptions.CLIBatchSize()))
|
opts = append(opts, obiformats.OptionsBatchSize(obioptions.CLIBatchSize()))
|
||||||
|
|
||||||
opts = append(opts, obiformats.OptionsQualityShift(obiconvert.CLIOutputQualityShift()))
|
|
||||||
opts = append(opts, obiformats.OptionsCompressed(obiconvert.CLICompressed()))
|
opts = append(opts, obiformats.OptionsCompressed(obiconvert.CLICompressed()))
|
||||||
|
|
||||||
opts = append(opts, obiformats.CSVId(CLIPrintId()),
|
opts = append(opts, obiformats.CSVId(CLIPrintId()),
|
||||||
@ -37,6 +35,7 @@ func CLIWriteCSV(iterator obiiter.IBioSequence,
|
|||||||
obiformats.CSVDefinition(CLIPrintDefinition()),
|
obiformats.CSVDefinition(CLIPrintDefinition()),
|
||||||
obiformats.CSVKeys(CLIToBeKeptAttributes()),
|
obiformats.CSVKeys(CLIToBeKeptAttributes()),
|
||||||
obiformats.CSVSequence(CLIPrintSequence()),
|
obiformats.CSVSequence(CLIPrintSequence()),
|
||||||
|
obiformats.CSVAutoColumn(CLIAutoColumns()),
|
||||||
)
|
)
|
||||||
|
|
||||||
var err error
|
var err error
|
||||||
|
@ -32,7 +32,6 @@ func DistributeSequence(sequences obiiter.IBioSequence) {
|
|||||||
|
|
||||||
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers),
|
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers),
|
||||||
obiformats.OptionsBatchSize(obioptions.CLIBatchSize()),
|
obiformats.OptionsBatchSize(obioptions.CLIBatchSize()),
|
||||||
obiformats.OptionsQualityShift(obiconvert.CLIOutputQualityShift()),
|
|
||||||
obiformats.OptionsAppendFile(CLIAppendSequences()),
|
obiformats.OptionsAppendFile(CLIAppendSequences()),
|
||||||
obiformats.OptionsCompressed(obiconvert.CLICompressed()))
|
obiformats.OptionsCompressed(obiconvert.CLICompressed()))
|
||||||
|
|
||||||
|
@ -93,8 +93,6 @@ func CLISaveRefetenceDB(db obiseq.BioSequenceSlice) {
|
|||||||
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers))
|
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers))
|
||||||
opts = append(opts, obiformats.OptionsBatchSize(obioptions.CLIBatchSize()))
|
opts = append(opts, obiformats.OptionsBatchSize(obioptions.CLIBatchSize()))
|
||||||
|
|
||||||
opts = append(opts, obiformats.OptionsQualityShift(obiconvert.CLIOutputQualityShift()))
|
|
||||||
|
|
||||||
opts = append(opts, obiformats.OptionsCompressed(obiconvert.CLICompressed()))
|
opts = append(opts, obiformats.OptionsCompressed(obiconvert.CLICompressed()))
|
||||||
|
|
||||||
var err error
|
var err error
|
||||||
|
Reference in New Issue
Block a user