mirror of
https://github.com/metabarcoding/obitools4.git
synced 2026-03-26 05:50:52 +00:00
Compare commits
17 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d70b0a5b42 | ||
|
|
4774438644 | ||
|
|
6a8061cc4f | ||
|
|
e2563cd8df | ||
|
|
f2e81adf95 | ||
|
|
f27e9bc91e | ||
|
|
773e54965d | ||
|
|
ceca33998b | ||
|
|
b9bee5f426 | ||
|
|
c10df073a7 | ||
|
|
d3dac1b21f | ||
|
|
0df082da06 | ||
|
|
2452aef7a9 | ||
|
|
337954592d | ||
|
|
8a28c9ae7c | ||
|
|
b6b18c0fa1 | ||
|
|
67e2758d63 |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -132,3 +132,4 @@ xxx.gz
|
|||||||
*.sav
|
*.sav
|
||||||
*.old
|
*.old
|
||||||
ncbitaxo.tgz
|
ncbitaxo.tgz
|
||||||
|
*.csv
|
||||||
|
|||||||
42
cmd/obitools/obimicroasm/main.go
Normal file
42
cmd/obitools/obimicroasm/main.go
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiformats"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obimicroasm"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
|
||||||
|
// go tool pprof -http=":8000" ./obipairing ./cpu.pprof
|
||||||
|
// f, err := os.Create("cpu.pprof")
|
||||||
|
// if err != nil {
|
||||||
|
// log.Fatal(err)
|
||||||
|
// }
|
||||||
|
// pprof.StartCPUProfile(f)
|
||||||
|
// defer pprof.StopCPUProfile()
|
||||||
|
|
||||||
|
// go tool trace cpu.trace
|
||||||
|
// ftrace, err := os.Create("cpu.trace")
|
||||||
|
// if err != nil {
|
||||||
|
// log.Fatal(err)
|
||||||
|
// }
|
||||||
|
// trace.Start(ftrace)
|
||||||
|
// defer trace.Stop()
|
||||||
|
|
||||||
|
optionParser := obioptions.GenerateOptionParser(obimicroasm.OptionSet)
|
||||||
|
|
||||||
|
optionParser(os.Args)
|
||||||
|
|
||||||
|
obidefault.SetStrictReadWorker(2)
|
||||||
|
obidefault.SetStrictWriteWorker(2)
|
||||||
|
|
||||||
|
seq := obimicroasm.CLIAssemblePCR()
|
||||||
|
|
||||||
|
println(obiformats.FormatFasta(seq, obiformats.FormatFastSeqJsonHeader))
|
||||||
|
obiutils.WaitForLastPipe()
|
||||||
|
}
|
||||||
@@ -47,12 +47,27 @@ func main() {
|
|||||||
obiconvert.OpenSequenceDataErrorMessage(args, err)
|
obiconvert.OpenSequenceDataErrorMessage(args, err)
|
||||||
|
|
||||||
taxo := obitax.DefaultTaxonomy()
|
taxo := obitax.DefaultTaxonomy()
|
||||||
|
|
||||||
|
references := obitag.CLIRefDB()
|
||||||
|
|
||||||
|
if references == nil {
|
||||||
|
log.Panicln("No loaded reference database")
|
||||||
|
}
|
||||||
|
|
||||||
|
if taxo == nil {
|
||||||
|
taxo, err = references.ExtractTaxonomy(nil)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("No taxonomy specified or extractable from reference database: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
taxo.SetAsDefault()
|
||||||
|
}
|
||||||
|
|
||||||
if taxo == nil {
|
if taxo == nil {
|
||||||
log.Panicln("No loaded taxonomy")
|
log.Panicln("No loaded taxonomy")
|
||||||
}
|
}
|
||||||
|
|
||||||
references := obitag.CLIRefDB()
|
|
||||||
|
|
||||||
var identified obiiter.IBioSequence
|
var identified obiiter.IBioSequence
|
||||||
|
|
||||||
if obitag.CLIGeometricMode() {
|
if obitag.CLIGeometricMode() {
|
||||||
|
|||||||
@@ -1,13 +1,16 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"log"
|
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obitaxonomy"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obitaxonomy"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
|
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
@@ -18,17 +21,49 @@ func main() {
|
|||||||
var iterator *obitax.ITaxon
|
var iterator *obitax.ITaxon
|
||||||
|
|
||||||
switch {
|
switch {
|
||||||
|
case obitaxonomy.CLIDownloadNCBI():
|
||||||
|
err := obitaxonomy.CLIDownloadNCBITaxdump()
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Cannot download NCBI taxonomy: %s", err.Error())
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
os.Exit(0)
|
||||||
|
|
||||||
|
case obitaxonomy.CLIExtractTaxonomy():
|
||||||
|
iter, err := obiconvert.CLIReadBioSequences(args...)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Cannot extract taxonomy: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
taxonomy, err := iter.ExtractTaxonomy()
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Cannot extract taxonomy: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
taxonomy.SetAsDefault()
|
||||||
|
|
||||||
|
log.Infof("Number of extracted taxa: %d", taxonomy.Len())
|
||||||
|
iterator = taxonomy.AsTaxonSet().Sort().Iterator()
|
||||||
|
|
||||||
case obitaxonomy.CLIDumpSubtaxonomy():
|
case obitaxonomy.CLIDumpSubtaxonomy():
|
||||||
iterator = obitaxonomy.CLISubTaxonomyIterator()
|
iterator = obitaxonomy.CLISubTaxonomyIterator()
|
||||||
|
|
||||||
case obitaxonomy.CLIRequestsPathForTaxid() != "NA":
|
case obitaxonomy.CLIRequestsPathForTaxid() != "NA":
|
||||||
|
|
||||||
taxon := obitax.DefaultTaxonomy().Taxon(obitaxonomy.CLIRequestsPathForTaxid())
|
taxon, isAlias, err := obitax.DefaultTaxonomy().Taxon(obitaxonomy.CLIRequestsPathForTaxid())
|
||||||
|
|
||||||
if taxon == nil {
|
if err != nil {
|
||||||
log.Fatalf("Cannot identify the requested taxon: %s",
|
log.Fatalf("Cannot identify the requested taxon: %s (%v)",
|
||||||
obitaxonomy.CLIRequestsPathForTaxid())
|
obitaxonomy.CLIRequestsPathForTaxid(), err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if isAlias {
|
||||||
|
if obidefault.FailOnTaxonomy() {
|
||||||
|
log.Fatalf("Taxon %s is an alias for %s", taxon.String(), taxon.Parent().String())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
s := taxon.Path()
|
s := taxon.Path()
|
||||||
|
|||||||
8
go.mod
8
go.mod
@@ -14,7 +14,7 @@ require (
|
|||||||
github.com/rrethy/ahocorasick v1.0.0
|
github.com/rrethy/ahocorasick v1.0.0
|
||||||
github.com/schollz/progressbar/v3 v3.13.1
|
github.com/schollz/progressbar/v3 v3.13.1
|
||||||
github.com/sirupsen/logrus v1.9.3
|
github.com/sirupsen/logrus v1.9.3
|
||||||
github.com/stretchr/testify v1.8.4
|
github.com/stretchr/testify v1.10.0
|
||||||
github.com/tevino/abool/v2 v2.1.0
|
github.com/tevino/abool/v2 v2.1.0
|
||||||
github.com/yuin/gopher-lua v1.1.1
|
github.com/yuin/gopher-lua v1.1.1
|
||||||
golang.org/x/exp v0.0.0-20231006140011-7918f672742d
|
golang.org/x/exp v0.0.0-20231006140011-7918f672742d
|
||||||
@@ -25,13 +25,17 @@ require (
|
|||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/Clever/csvlint v0.3.0 // indirect
|
github.com/Clever/csvlint v0.3.0 // indirect
|
||||||
|
github.com/TuftsBCB/io v0.0.0-20140121014543-22b94e9b23f9 // indirect
|
||||||
github.com/buger/jsonparser v1.1.1 // indirect
|
github.com/buger/jsonparser v1.1.1 // indirect
|
||||||
|
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e // indirect
|
||||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||||
|
github.com/ef-ds/deque/v2 v2.0.2 // indirect
|
||||||
github.com/goombaio/orderedmap v0.0.0-20180924084748-ba921b7e2419 // indirect
|
github.com/goombaio/orderedmap v0.0.0-20180924084748-ba921b7e2419 // indirect
|
||||||
github.com/kr/pretty v0.3.0 // indirect
|
github.com/kr/pretty v0.3.0 // indirect
|
||||||
github.com/kr/text v0.2.0 // indirect
|
github.com/kr/text v0.2.0 // indirect
|
||||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||||
github.com/rogpeppe/go-internal v1.6.1 // indirect
|
github.com/rogpeppe/go-internal v1.6.1 // indirect
|
||||||
|
go.etcd.io/bbolt v1.4.0 // indirect
|
||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
@@ -45,7 +49,7 @@ require (
|
|||||||
github.com/shopspring/decimal v1.3.1 // indirect
|
github.com/shopspring/decimal v1.3.1 // indirect
|
||||||
github.com/ulikunitz/xz v0.5.11
|
github.com/ulikunitz/xz v0.5.11
|
||||||
golang.org/x/net v0.17.0 // indirect
|
golang.org/x/net v0.17.0 // indirect
|
||||||
golang.org/x/sys v0.17.0 // indirect
|
golang.org/x/sys v0.29.0 // indirect
|
||||||
golang.org/x/term v0.13.0 // indirect
|
golang.org/x/term v0.13.0 // indirect
|
||||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c
|
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c
|
||||||
)
|
)
|
||||||
|
|||||||
12
go.sum
12
go.sum
@@ -6,12 +6,16 @@ github.com/PaesslerAG/gval v1.2.2 h1:Y7iBzhgE09IGTt5QgGQ2IdaYYYOU134YGHBThD+wm9E
|
|||||||
github.com/PaesslerAG/gval v1.2.2/go.mod h1:XRFLwvmkTEdYziLdaCeCa5ImcGVrfQbeNUbVR+C6xac=
|
github.com/PaesslerAG/gval v1.2.2/go.mod h1:XRFLwvmkTEdYziLdaCeCa5ImcGVrfQbeNUbVR+C6xac=
|
||||||
github.com/PaesslerAG/jsonpath v0.1.0 h1:gADYeifvlqK3R3i2cR5B4DGgxLXIPb3TRTH1mGi0jPI=
|
github.com/PaesslerAG/jsonpath v0.1.0 h1:gADYeifvlqK3R3i2cR5B4DGgxLXIPb3TRTH1mGi0jPI=
|
||||||
github.com/PaesslerAG/jsonpath v0.1.0/go.mod h1:4BzmtoM/PI8fPO4aQGIusjGxGir2BzcV0grWtFzq1Y8=
|
github.com/PaesslerAG/jsonpath v0.1.0/go.mod h1:4BzmtoM/PI8fPO4aQGIusjGxGir2BzcV0grWtFzq1Y8=
|
||||||
|
github.com/TuftsBCB/io v0.0.0-20140121014543-22b94e9b23f9 h1:Zc1/GNsUpgZR9qm1EmRSKrnOHA7CCd0bIzGdq0cREN0=
|
||||||
|
github.com/TuftsBCB/io v0.0.0-20140121014543-22b94e9b23f9/go.mod h1:PZyV4WA3NpqtezSY0h6E6NARAmdDm0qwrydveOyR5Gc=
|
||||||
github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df h1:GSoSVRLoBaFpOOds6QyY1L8AX7uoY+Ln3BHc22W40X0=
|
github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df h1:GSoSVRLoBaFpOOds6QyY1L8AX7uoY+Ln3BHc22W40X0=
|
||||||
github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df/go.mod h1:hiVxq5OP2bUGBRNS3Z/bt/reCLFNbdcST6gISi1fiOM=
|
github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df/go.mod h1:hiVxq5OP2bUGBRNS3Z/bt/reCLFNbdcST6gISi1fiOM=
|
||||||
github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs=
|
github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs=
|
||||||
github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0=
|
github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0=
|
||||||
github.com/chen3feng/stl4go v0.1.1 h1:0L1+mDw7pomftKDruM23f1mA7miavOj6C6MZeadzN2Q=
|
github.com/chen3feng/stl4go v0.1.1 h1:0L1+mDw7pomftKDruM23f1mA7miavOj6C6MZeadzN2Q=
|
||||||
github.com/chen3feng/stl4go v0.1.1/go.mod h1:5ml3psLgETJjRJnMbPE+JiHLrCpt+Ajc2weeTECXzWU=
|
github.com/chen3feng/stl4go v0.1.1/go.mod h1:5ml3psLgETJjRJnMbPE+JiHLrCpt+Ajc2weeTECXzWU=
|
||||||
|
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e h1:fY5BOSpyZCqRo5OhCuC+XN+r/bBCmeuuJtjz+bCNIf8=
|
||||||
|
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
|
||||||
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
|
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
|
||||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||||
@@ -21,6 +25,8 @@ github.com/dlclark/regexp2 v1.11.4/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cn
|
|||||||
github.com/dsnet/compress v0.0.1 h1:PlZu0n3Tuv04TzpfPbrnI0HW/YwodEXDS+oPKahKF0Q=
|
github.com/dsnet/compress v0.0.1 h1:PlZu0n3Tuv04TzpfPbrnI0HW/YwodEXDS+oPKahKF0Q=
|
||||||
github.com/dsnet/compress v0.0.1/go.mod h1:Aw8dCMJ7RioblQeTqt88akK31OvO8Dhf5JflhBbQEHo=
|
github.com/dsnet/compress v0.0.1/go.mod h1:Aw8dCMJ7RioblQeTqt88akK31OvO8Dhf5JflhBbQEHo=
|
||||||
github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
|
github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
|
||||||
|
github.com/ef-ds/deque/v2 v2.0.2 h1:GQtDK1boBMu/qsNbSLQsqzwNptaioxZI39X3UxT5ALA=
|
||||||
|
github.com/ef-ds/deque/v2 v2.0.2/go.mod h1:hoZy4VooWLhRT4uS+sSCilfgBQUNptJU2FGqr08a5sc=
|
||||||
github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
|
github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
|
||||||
github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk=
|
github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk=
|
||||||
github.com/goccy/go-json v0.10.3 h1:KZ5WoDbxAIgm2HNbYckL0se1fHD6rz5j4ywS6ebzDqA=
|
github.com/goccy/go-json v0.10.3 h1:KZ5WoDbxAIgm2HNbYckL0se1fHD6rz5j4ywS6ebzDqA=
|
||||||
@@ -73,6 +79,8 @@ github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
|
|||||||
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||||
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
|
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
|
||||||
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
|
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
|
||||||
|
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
|
||||||
|
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||||
github.com/tevino/abool/v2 v2.1.0 h1:7w+Vf9f/5gmKT4m4qkayb33/92M+Um45F2BkHOR+L/c=
|
github.com/tevino/abool/v2 v2.1.0 h1:7w+Vf9f/5gmKT4m4qkayb33/92M+Um45F2BkHOR+L/c=
|
||||||
github.com/tevino/abool/v2 v2.1.0/go.mod h1:+Lmlqk6bHDWHqN1cbxqhwEAwMPXgc8I1SDEamtseuXY=
|
github.com/tevino/abool/v2 v2.1.0/go.mod h1:+Lmlqk6bHDWHqN1cbxqhwEAwMPXgc8I1SDEamtseuXY=
|
||||||
github.com/ulikunitz/xz v0.5.6/go.mod h1:2bypXElzHzzJZwzH67Y6wb67pO62Rzfn7BSiF4ABRW8=
|
github.com/ulikunitz/xz v0.5.6/go.mod h1:2bypXElzHzzJZwzH67Y6wb67pO62Rzfn7BSiF4ABRW8=
|
||||||
@@ -80,6 +88,8 @@ github.com/ulikunitz/xz v0.5.11 h1:kpFauv27b6ynzBNT/Xy+1k+fK4WswhN/6PN5WhFAGw8=
|
|||||||
github.com/ulikunitz/xz v0.5.11/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
|
github.com/ulikunitz/xz v0.5.11/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
|
||||||
github.com/yuin/gopher-lua v1.1.1 h1:kYKnWBjvbNP4XLT3+bPEwAXJx262OhaHDWDVOPjL46M=
|
github.com/yuin/gopher-lua v1.1.1 h1:kYKnWBjvbNP4XLT3+bPEwAXJx262OhaHDWDVOPjL46M=
|
||||||
github.com/yuin/gopher-lua v1.1.1/go.mod h1:GBR0iDaNXjAgGg9zfCvksxSRnQx76gclCIb7kdAd1Pw=
|
github.com/yuin/gopher-lua v1.1.1/go.mod h1:GBR0iDaNXjAgGg9zfCvksxSRnQx76gclCIb7kdAd1Pw=
|
||||||
|
go.etcd.io/bbolt v1.4.0 h1:TU77id3TnN/zKr7CO/uk+fBCwF2jGcMuw2B/FMAzYIk=
|
||||||
|
go.etcd.io/bbolt v1.4.0/go.mod h1:AsD+OCi/qPN1giOX1aiLAha3o1U8rAz65bvN4j0sRuk=
|
||||||
golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI=
|
golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI=
|
||||||
golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo=
|
golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo=
|
||||||
golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
|
golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
|
||||||
@@ -89,6 +99,8 @@ golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBc
|
|||||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y=
|
golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y=
|
||||||
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||||
|
golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU=
|
||||||
|
golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||||
golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U=
|
golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U=
|
||||||
golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek=
|
golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek=
|
||||||
golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U=
|
golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U=
|
||||||
|
|||||||
@@ -2,12 +2,9 @@ git.sr.ht/~sbinet/gg v0.3.1 h1:LNhjNn8DerC8f9DHLz6lS0YYul/b602DUxDgGkd/Aik=
|
|||||||
git.sr.ht/~sbinet/gg v0.3.1/go.mod h1:KGYtlADtqsqANL9ueOFkWymvzUvLMQllU5Ixo+8v3pc=
|
git.sr.ht/~sbinet/gg v0.3.1/go.mod h1:KGYtlADtqsqANL9ueOFkWymvzUvLMQllU5Ixo+8v3pc=
|
||||||
github.com/ajstarks/svgo v0.0.0-20211024235047-1546f124cd8b h1:slYM766cy2nI3BwyRiyQj/Ud48djTMtMebDqepE95rw=
|
github.com/ajstarks/svgo v0.0.0-20211024235047-1546f124cd8b h1:slYM766cy2nI3BwyRiyQj/Ud48djTMtMebDqepE95rw=
|
||||||
github.com/ajstarks/svgo v0.0.0-20211024235047-1546f124cd8b/go.mod h1:1KcenG0jGWcpt8ov532z81sp/kMMUG485J2InIOyADM=
|
github.com/ajstarks/svgo v0.0.0-20211024235047-1546f124cd8b/go.mod h1:1KcenG0jGWcpt8ov532z81sp/kMMUG485J2InIOyADM=
|
||||||
github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0=
|
|
||||||
github.com/chzyer/logex v1.1.10 h1:Swpa1K6QvQznwJRcfTfQJmTE72DqScAa40E+fbHEXEE=
|
github.com/chzyer/logex v1.1.10 h1:Swpa1K6QvQznwJRcfTfQJmTE72DqScAa40E+fbHEXEE=
|
||||||
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
|
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
|
||||||
github.com/chzyer/logex v1.2.0 h1:+eqR0HfOetur4tgnC8ftU5imRnhi4te+BadWS95c5AM=
|
github.com/chzyer/logex v1.2.0 h1:+eqR0HfOetur4tgnC8ftU5imRnhi4te+BadWS95c5AM=
|
||||||
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e h1:fY5BOSpyZCqRo5OhCuC+XN+r/bBCmeuuJtjz+bCNIf8=
|
|
||||||
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
|
|
||||||
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1 h1:q763qf9huN11kDQavWsoZXJNW3xEE4JJyHa5Q25/sd8=
|
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1 h1:q763qf9huN11kDQavWsoZXJNW3xEE4JJyHa5Q25/sd8=
|
||||||
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
|
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
|
||||||
github.com/chzyer/test v0.0.0-20210722231415-061457976a23 h1:dZ0/VyGgQdVGAss6Ju0dt5P0QltE0SFY5Woh6hbIfiQ=
|
github.com/chzyer/test v0.0.0-20210722231415-061457976a23 h1:dZ0/VyGgQdVGAss6Ju0dt5P0QltE0SFY5Woh6hbIfiQ=
|
||||||
@@ -29,16 +26,21 @@ github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeN
|
|||||||
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
|
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
|
||||||
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
|
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
|
||||||
github.com/ianlancetaylor/demangle v0.0.0-20220319035150-800ac71e25c2 h1:rcanfLhLDA8nozr/K289V1zcntHr3V+SHlXwzz1ZI2g=
|
github.com/ianlancetaylor/demangle v0.0.0-20220319035150-800ac71e25c2 h1:rcanfLhLDA8nozr/K289V1zcntHr3V+SHlXwzz1ZI2g=
|
||||||
|
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
|
||||||
github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213 h1:qGQQKEcAR99REcMpsXCp3lJ03zYT1PkRd3kQGPn9GVg=
|
github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213 h1:qGQQKEcAR99REcMpsXCp3lJ03zYT1PkRd3kQGPn9GVg=
|
||||||
github.com/klauspost/cpuid v1.2.0 h1:NMpwD2G9JSFOE1/TJjGSo5zG7Yb2bTe7eq1jH+irmeE=
|
github.com/klauspost/cpuid v1.2.0 h1:NMpwD2G9JSFOE1/TJjGSo5zG7Yb2bTe7eq1jH+irmeE=
|
||||||
github.com/kr/pty v1.1.1 h1:VkoXIwSboBpnk99O/KFauAEILuNHv5DVFKZMBN/gUgw=
|
github.com/kr/pty v1.1.1 h1:VkoXIwSboBpnk99O/KFauAEILuNHv5DVFKZMBN/gUgw=
|
||||||
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
|
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
|
||||||
github.com/mattn/go-isatty v0.0.17 h1:BTarxUcIeDqL27Mc+vyvdWYSL28zpIhv3RoTdsLMPng=
|
github.com/mattn/go-isatty v0.0.17 h1:BTarxUcIeDqL27Mc+vyvdWYSL28zpIhv3RoTdsLMPng=
|
||||||
github.com/smallnest/goroutine v1.1.1/go.mod h1:Fp8f6ZReubfdj0m4+NcUnW4IsAqKa+Pnrv9opEiD43E=
|
github.com/smallnest/goroutine v1.1.1/go.mod h1:Fp8f6ZReubfdj0m4+NcUnW4IsAqKa+Pnrv9opEiD43E=
|
||||||
|
github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y=
|
||||||
|
github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
|
||||||
github.com/stretchr/objx v0.1.0 h1:4G4v2dO3VZwixGIRoQ5Lfboy6nUhCyYzaqnIAPPhYs4=
|
github.com/stretchr/objx v0.1.0 h1:4G4v2dO3VZwixGIRoQ5Lfboy6nUhCyYzaqnIAPPhYs4=
|
||||||
github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c=
|
github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c=
|
||||||
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
|
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
|
||||||
|
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
|
||||||
github.com/yuin/goldmark v1.4.13 h1:fVcFKWvrslecOb/tg+Cc05dkeYx540o0FuFt3nUVDoE=
|
github.com/yuin/goldmark v1.4.13 h1:fVcFKWvrslecOb/tg+Cc05dkeYx540o0FuFt3nUVDoE=
|
||||||
|
go.etcd.io/gofail v0.2.0/go.mod h1:nL3ILMGfkXTekKI3clMBNazKnjUZjYLKmBHzsVAnC1o=
|
||||||
golang.org/x/crypto v0.14.0 h1:wBqGXzWJW6m1XrIKlAH0Hs1JJ7+9KBwnIO8v66Q9cHc=
|
golang.org/x/crypto v0.14.0 h1:wBqGXzWJW6m1XrIKlAH0Hs1JJ7+9KBwnIO8v66Q9cHc=
|
||||||
golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4=
|
golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4=
|
||||||
golang.org/x/image v0.6.0 h1:bR8b5okrPI3g/gyZakLZHeWxAR8Dn5CyxXv1hLH5g/4=
|
golang.org/x/image v0.6.0 h1:bR8b5okrPI3g/gyZakLZHeWxAR8Dn5CyxXv1hLH5g/4=
|
||||||
@@ -46,6 +48,7 @@ golang.org/x/image v0.6.0/go.mod h1:MXLdDR43H7cDJq5GEGXEVeeNhPgi+YYEQ2pC1byI1x0=
|
|||||||
golang.org/x/mod v0.13.0 h1:I/DsJXRlw/8l/0c24sM9yb0T4z9liZTduXvdAWYiysY=
|
golang.org/x/mod v0.13.0 h1:I/DsJXRlw/8l/0c24sM9yb0T4z9liZTduXvdAWYiysY=
|
||||||
golang.org/x/mod v0.13.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
|
golang.org/x/mod v0.13.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
|
||||||
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4 h1:uVc8UZUe6tr40fFVnUP5Oj+veunVezqYl9z7DYw9xzw=
|
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4 h1:uVc8UZUe6tr40fFVnUP5Oj+veunVezqYl9z7DYw9xzw=
|
||||||
|
golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||||
golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k=
|
golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k=
|
||||||
golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
|
golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
|
||||||
golang.org/x/tools v0.14.0 h1:jvNa2pY0M4r62jkRQ6RwEZZyPcymeL9XZMLBbV7U2nc=
|
golang.org/x/tools v0.14.0 h1:jvNa2pY0M4r62jkRQ6RwEZZyPcymeL9XZMLBbV7U2nc=
|
||||||
|
|||||||
@@ -2,6 +2,8 @@ package obidefault
|
|||||||
|
|
||||||
var __taxonomy__ = ""
|
var __taxonomy__ = ""
|
||||||
var __alternative_name__ = false
|
var __alternative_name__ = false
|
||||||
|
var __fail_on_taxonomy__ = false
|
||||||
|
var __update_taxid__ = false
|
||||||
|
|
||||||
func SelectedTaxonomy() string {
|
func SelectedTaxonomy() string {
|
||||||
return __taxonomy__
|
return __taxonomy__
|
||||||
@@ -30,3 +32,27 @@ func SetSelectedTaxonomy(taxonomy string) {
|
|||||||
func SetAlternativeNamesSelected(alt bool) {
|
func SetAlternativeNamesSelected(alt bool) {
|
||||||
__alternative_name__ = alt
|
__alternative_name__ = alt
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func SetFailOnTaxonomy(fail bool) {
|
||||||
|
__fail_on_taxonomy__ = fail
|
||||||
|
}
|
||||||
|
|
||||||
|
func SetUpdateTaxid(update bool) {
|
||||||
|
__update_taxid__ = update
|
||||||
|
}
|
||||||
|
|
||||||
|
func FailOnTaxonomyPtr() *bool {
|
||||||
|
return &__fail_on_taxonomy__
|
||||||
|
}
|
||||||
|
|
||||||
|
func UpdateTaxidPtr() *bool {
|
||||||
|
return &__update_taxid__
|
||||||
|
}
|
||||||
|
|
||||||
|
func FailOnTaxonomy() bool {
|
||||||
|
return __fail_on_taxonomy__
|
||||||
|
}
|
||||||
|
|
||||||
|
func UpdateTaxid() bool {
|
||||||
|
return __update_taxid__
|
||||||
|
}
|
||||||
|
|||||||
@@ -9,7 +9,6 @@ import (
|
|||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
"github.com/buger/jsonparser"
|
"github.com/buger/jsonparser"
|
||||||
)
|
)
|
||||||
@@ -201,8 +200,6 @@ func _parse_json_array_interface(str []byte, sequence *obiseq.BioSequence) ([]in
|
|||||||
}
|
}
|
||||||
|
|
||||||
func _parse_json_header_(header string, sequence *obiseq.BioSequence) string {
|
func _parse_json_header_(header string, sequence *obiseq.BioSequence) string {
|
||||||
taxonomy := obitax.DefaultTaxonomy()
|
|
||||||
|
|
||||||
annotations := sequence.Annotations()
|
annotations := sequence.Annotations()
|
||||||
start := -1
|
start := -1
|
||||||
stop := -1
|
stop := -1
|
||||||
@@ -291,13 +288,8 @@ func _parse_json_header_(header string, sequence *obiseq.BioSequence) string {
|
|||||||
|
|
||||||
case skey == "taxid":
|
case skey == "taxid":
|
||||||
if dataType == jsonparser.Number || dataType == jsonparser.String {
|
if dataType == jsonparser.Number || dataType == jsonparser.String {
|
||||||
taxid := obiutils.UnsafeString(value)
|
taxid := string(value)
|
||||||
taxon := taxonomy.Taxon(taxid)
|
sequence.SetTaxid(taxid)
|
||||||
if taxon != nil {
|
|
||||||
sequence.SetTaxon(taxon)
|
|
||||||
} else {
|
|
||||||
sequence.SetTaxid(string(value))
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
log.Fatalf("%s: Cannot parse taxid %s", sequence.Id(), string(value))
|
log.Fatalf("%s: Cannot parse taxid %s", sequence.Id(), string(value))
|
||||||
}
|
}
|
||||||
@@ -306,15 +298,7 @@ func _parse_json_header_(header string, sequence *obiseq.BioSequence) string {
|
|||||||
if dataType == jsonparser.Number || dataType == jsonparser.String {
|
if dataType == jsonparser.Number || dataType == jsonparser.String {
|
||||||
rank, _ := obiutils.SplitInTwo(skey, '_')
|
rank, _ := obiutils.SplitInTwo(skey, '_')
|
||||||
|
|
||||||
taxid := obiutils.UnsafeString(value)
|
taxid := string(value)
|
||||||
taxon := taxonomy.Taxon(taxid)
|
|
||||||
|
|
||||||
if taxon != nil {
|
|
||||||
taxid = taxon.String()
|
|
||||||
} else {
|
|
||||||
taxid = string(value)
|
|
||||||
}
|
|
||||||
|
|
||||||
sequence.SetTaxid(taxid, rank)
|
sequence.SetTaxid(taxid, rank)
|
||||||
} else {
|
} else {
|
||||||
log.Fatalf("%s: Cannot parse taxid %s", sequence.Id(), string(value))
|
log.Fatalf("%s: Cannot parse taxid %s", sequence.Id(), string(value))
|
||||||
|
|||||||
@@ -72,7 +72,7 @@ func OBIMimeTypeGuesser(stream io.Reader) (*mimetype.MIME, io.Reader, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fastqDetector := func(raw []byte, limit uint32) bool {
|
fastqDetector := func(raw []byte, limit uint32) bool {
|
||||||
ok, err := regexp.Match("^@[^ ].*\n[^ ]+\n\\+", raw)
|
ok, err := regexp.Match("^@[^ ].*\n[A-Za-z.-]+", raw)
|
||||||
return ok && err == nil
|
return ok && err == nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
18
pkg/obiiter/extract_taxonomy.go
Normal file
18
pkg/obiiter/extract_taxonomy.go
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
package obiiter
|
||||||
|
|
||||||
|
import "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||||
|
|
||||||
|
func (iterator *IBioSequence) ExtractTaxonomy() (taxonomy *obitax.Taxonomy, err error) {
|
||||||
|
|
||||||
|
for iterator.Next() {
|
||||||
|
slice := iterator.Get().Slice()
|
||||||
|
|
||||||
|
taxonomy, err = slice.ExtractTaxonomy(taxonomy)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
@@ -8,9 +8,12 @@ import (
|
|||||||
"math/bits"
|
"math/bits"
|
||||||
"os"
|
"os"
|
||||||
"slices"
|
"slices"
|
||||||
|
"sort"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obistats"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obistats"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
|
"github.com/ef-ds/deque/v2"
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -89,12 +92,18 @@ type DeBruijnGraph struct {
|
|||||||
//
|
//
|
||||||
// *DeBruijnGraph - a pointer to the created De Bruijn's Graph
|
// *DeBruijnGraph - a pointer to the created De Bruijn's Graph
|
||||||
func MakeDeBruijnGraph(kmersize int) *DeBruijnGraph {
|
func MakeDeBruijnGraph(kmersize int) *DeBruijnGraph {
|
||||||
|
if kmersize > 31 {
|
||||||
|
log.Panicf("k-mer size %d is too large", kmersize)
|
||||||
|
}
|
||||||
|
|
||||||
|
kmermask := (^uint64(0) << (uint64(kmersize) * 2))
|
||||||
|
|
||||||
g := DeBruijnGraph{
|
g := DeBruijnGraph{
|
||||||
kmersize: kmersize,
|
kmersize: kmersize,
|
||||||
kmermask: ^(^uint64(0) << (uint64(kmersize) * 2)), // k-mer mask used to set to 0 the bits that are not in the k-mer
|
kmermask: kmermask, // k-mer mask used to set to 1 the bits that are not in the k-mer
|
||||||
prevc: uint64(1) << (uint64(kmersize-1) * 2),
|
prevc: (uint64(1) << (uint64(kmersize-1) * 2)) | kmermask,
|
||||||
prevg: uint64(2) << (uint64(kmersize-1) * 2),
|
prevg: (uint64(2) << (uint64(kmersize-1) * 2)) | kmermask,
|
||||||
prevt: uint64(3) << (uint64(kmersize-1) * 2),
|
prevt: (uint64(3) << (uint64(kmersize-1) * 2)) | kmermask,
|
||||||
graph: make(map[uint64]uint),
|
graph: make(map[uint64]uint),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -161,19 +170,34 @@ func (g *DeBruijnGraph) FilterMinWeight(min int) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FilterMinWeight filters the DeBruijnGraph by removing nodes with weight less than the specified minimum.
|
||||||
|
//
|
||||||
|
// min: an integer representing the minimum count threshold.
|
||||||
|
func (g *DeBruijnGraph) FilterMaxWeight(min int) {
|
||||||
|
umin := uint(min)
|
||||||
|
for idx, count := range g.graph {
|
||||||
|
if count > umin {
|
||||||
|
delete(g.graph, idx)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (g *DeBruijnGraph) Previouses(index uint64) []uint64 {
|
func (g *DeBruijnGraph) Previouses(index uint64) []uint64 {
|
||||||
if _, ok := g.graph[index]; !ok {
|
if _, ok := g.graph[index]; !ok {
|
||||||
log.Panicf("k-mer %s (index %d) is not in graph", g.DecodeNode(index), index)
|
log.Panicf("k-mer %s (index %d) is not in graph", g.DecodeNode(index), index)
|
||||||
}
|
}
|
||||||
|
|
||||||
rep := make([]uint64, 0, 4)
|
rep := make([]uint64, 0, 4)
|
||||||
|
|
||||||
|
index &= ^g.kmermask
|
||||||
index >>= 2
|
index >>= 2
|
||||||
|
|
||||||
if _, ok := g.graph[index]; ok {
|
key := index | g.kmermask
|
||||||
rep = append(rep, index)
|
if _, ok := g.graph[key]; ok {
|
||||||
|
rep = append(rep, key)
|
||||||
}
|
}
|
||||||
|
|
||||||
key := index | g.prevc
|
key = index | g.prevc
|
||||||
if _, ok := g.graph[key]; ok {
|
if _, ok := g.graph[key]; ok {
|
||||||
rep = append(rep, key)
|
rep = append(rep, key)
|
||||||
}
|
}
|
||||||
@@ -197,7 +221,7 @@ func (g *DeBruijnGraph) Nexts(index uint64) []uint64 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
rep := make([]uint64, 0, 4)
|
rep := make([]uint64, 0, 4)
|
||||||
index = (index << 2) & g.kmermask
|
index = (index << 2) | g.kmermask
|
||||||
|
|
||||||
if _, ok := g.graph[index]; ok {
|
if _, ok := g.graph[index]; ok {
|
||||||
rep = append(rep, index)
|
rep = append(rep, index)
|
||||||
@@ -268,6 +292,33 @@ func (g *DeBruijnGraph) MaxHead() (uint64, int, bool) {
|
|||||||
return rep, int(max), found
|
return rep, int(max), found
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (g *DeBruijnGraph) Terminals() []uint64 {
|
||||||
|
rep := make([]uint64, 0, 10)
|
||||||
|
|
||||||
|
for k := range g.graph {
|
||||||
|
if len(g.Nexts(k)) == 0 {
|
||||||
|
rep = append(rep, k)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return rep
|
||||||
|
}
|
||||||
|
|
||||||
|
func (g *DeBruijnGraph) MaxTerminal() (uint64, int, bool) {
|
||||||
|
rep := uint64(0)
|
||||||
|
max := uint(0)
|
||||||
|
found := false
|
||||||
|
for k, w := range g.graph {
|
||||||
|
if len(g.Nexts(k)) == 0 && w > max {
|
||||||
|
rep = k
|
||||||
|
max = w
|
||||||
|
found = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return rep, int(max), found
|
||||||
|
}
|
||||||
|
|
||||||
func (g *DeBruijnGraph) MaxPath() []uint64 {
|
func (g *DeBruijnGraph) MaxPath() []uint64 {
|
||||||
path := make([]uint64, 0, 1000)
|
path := make([]uint64, 0, 1000)
|
||||||
ok := false
|
ok := false
|
||||||
@@ -318,7 +369,11 @@ func (g *DeBruijnGraph) LongestConsensus(id string, min_cov float64) (*obiseq.Bi
|
|||||||
return nil, fmt.Errorf("graph is empty")
|
return nil, fmt.Errorf("graph is empty")
|
||||||
}
|
}
|
||||||
//path := g.LongestPath(max_length)
|
//path := g.LongestPath(max_length)
|
||||||
path := g.HaviestPath()
|
path, err := g.HaviestPath(nil, nil, false)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
spath := path
|
spath := path
|
||||||
|
|
||||||
@@ -481,7 +536,7 @@ func (graph *DeBruijnGraph) append(sequence []byte, current uint64, weight int)
|
|||||||
}
|
}
|
||||||
|
|
||||||
current <<= 2
|
current <<= 2
|
||||||
current &= graph.kmermask
|
current |= graph.kmermask
|
||||||
b := iupac[sequence[0]]
|
b := iupac[sequence[0]]
|
||||||
current |= b[0]
|
current |= b[0]
|
||||||
graph.graph[current] = uint(graph.Weight(current) + weight)
|
graph.graph[current] = uint(graph.Weight(current) + weight)
|
||||||
@@ -495,6 +550,36 @@ func (graph *DeBruijnGraph) append(sequence []byte, current uint64, weight int)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// func (graph *DeBruijnGraph) search(current uint64, extension []byte, path []uint64, error,errormax int) ([]uint64,error) {
|
||||||
|
|
||||||
|
// path = append(path, current)
|
||||||
|
|
||||||
|
// if len(extension) == 0 {
|
||||||
|
// return path,nil
|
||||||
|
// }
|
||||||
|
|
||||||
|
// current <<= 2
|
||||||
|
// current &= graph.kmermask
|
||||||
|
// b := iupac[extension[0]]
|
||||||
|
|
||||||
|
// newPath := path
|
||||||
|
// if len(b) > 1 {
|
||||||
|
// newPath = slices.Clone(path)
|
||||||
|
// }
|
||||||
|
|
||||||
|
// current |= b[0]
|
||||||
|
|
||||||
|
// _, ok := graph.graph[current]
|
||||||
|
// if ok {
|
||||||
|
// newPath = append(newPath, current)
|
||||||
|
// }
|
||||||
|
// rep, err := graph.search(current, extension[1:], newPath, error,errormax)
|
||||||
|
// if err != nil {
|
||||||
|
// return path,err
|
||||||
|
// }
|
||||||
|
|
||||||
|
// }
|
||||||
|
|
||||||
// Push appends a BioSequence to the DeBruijnGraph.
|
// Push appends a BioSequence to the DeBruijnGraph.
|
||||||
//
|
//
|
||||||
// Parameters:
|
// Parameters:
|
||||||
@@ -523,6 +608,7 @@ func (graph *DeBruijnGraph) Push(sequence *obiseq.BioSequence) {
|
|||||||
initFirstKmer(start+1, key)
|
initFirstKmer(start+1, key)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
key |= graph.kmermask
|
||||||
graph.graph[key] = uint(graph.Weight(key) + w)
|
graph.graph[key] = uint(graph.Weight(key) + w)
|
||||||
graph.append(s[graph.kmersize:], key, w)
|
graph.append(s[graph.kmersize:], key, w)
|
||||||
}
|
}
|
||||||
@@ -533,6 +619,110 @@ func (graph *DeBruijnGraph) Push(sequence *obiseq.BioSequence) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (graph *DeBruijnGraph) search(sequence []byte, mismatch, errormax int) []uint64 {
|
||||||
|
var initFirstKmer func(start int, key uint64) []uint64
|
||||||
|
|
||||||
|
initFirstKmer = func(start int, key uint64) []uint64 {
|
||||||
|
if start == graph.kmersize {
|
||||||
|
key |= graph.kmermask
|
||||||
|
if _, ok := graph.graph[key]; ok {
|
||||||
|
return []uint64{key}
|
||||||
|
} else {
|
||||||
|
return []uint64{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
keys := make([]uint64, 0, 1000)
|
||||||
|
|
||||||
|
if start == 0 {
|
||||||
|
key = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
key <<= 2
|
||||||
|
b := iupac[sequence[start]]
|
||||||
|
|
||||||
|
for _, code := range b {
|
||||||
|
key &= ^uint64(3)
|
||||||
|
key |= code
|
||||||
|
keys = append(keys, initFirstKmer(start+1, key)...)
|
||||||
|
}
|
||||||
|
|
||||||
|
// w := []string{}
|
||||||
|
// for _, k := range keys {
|
||||||
|
// w = append(w, graph.DecodeNode(k))
|
||||||
|
// }
|
||||||
|
// // log.Warnf("For %s found %d matches : %v", sequence, len(keys), w)
|
||||||
|
|
||||||
|
return keys
|
||||||
|
}
|
||||||
|
|
||||||
|
rep := initFirstKmer(0, 0)
|
||||||
|
|
||||||
|
return rep
|
||||||
|
}
|
||||||
|
|
||||||
|
func (graph *DeBruijnGraph) Search(sequence *obiseq.BioSequence, errormax int) []uint64 {
|
||||||
|
|
||||||
|
s := sequence.Sequence() // Get the sequence as a byte slice
|
||||||
|
|
||||||
|
if len(s) < graph.KmerSize() {
|
||||||
|
s = slices.Clone(s)
|
||||||
|
for len(s) < graph.KmerSize() {
|
||||||
|
s = append(s, 'n')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Warnf("searching for %s", s)
|
||||||
|
keys := graph.search(s, 0, errormax)
|
||||||
|
|
||||||
|
for mismatch := 1; mismatch <= errormax; mismatch++ {
|
||||||
|
log.Warnf("searching with %d error for %s", mismatch, s)
|
||||||
|
for probe := range IterateOneError(s[0:graph.kmersize]) {
|
||||||
|
keys = append(keys,
|
||||||
|
graph.search(probe, mismatch, errormax)...,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
keys = obiutils.Unique(keys)
|
||||||
|
|
||||||
|
return keys
|
||||||
|
}
|
||||||
|
|
||||||
|
func (graph *DeBruijnGraph) BackSearch(sequence *obiseq.BioSequence, errormax int) []uint64 {
|
||||||
|
lkmer := graph.KmerSize()
|
||||||
|
|
||||||
|
s := sequence.Sequence() // Get the sequence as a byte slice
|
||||||
|
|
||||||
|
if len(s) < lkmer {
|
||||||
|
sn := []byte{}
|
||||||
|
ls := len(s)
|
||||||
|
for ls < lkmer {
|
||||||
|
sn = append(sn, 'n')
|
||||||
|
ls++
|
||||||
|
}
|
||||||
|
s = append(sn, s...)
|
||||||
|
} else {
|
||||||
|
s = s[(len(s) - lkmer):]
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Warnf("back-searching for %s", s)
|
||||||
|
|
||||||
|
keys := graph.search(s, 0, errormax)
|
||||||
|
|
||||||
|
for mismatch := 1; mismatch <= errormax; mismatch++ {
|
||||||
|
log.Warnf("searching with %d error for %s", mismatch, s)
|
||||||
|
for probe := range IterateOneError(s[0:graph.kmersize]) {
|
||||||
|
// log.Warnf("searching with %d error for %s", mismatch, probe)
|
||||||
|
keys = append(keys,
|
||||||
|
graph.search(probe, mismatch, errormax)...,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
keys = obiutils.Unique(keys)
|
||||||
|
return keys
|
||||||
|
}
|
||||||
|
|
||||||
func (graph *DeBruijnGraph) Gml() string {
|
func (graph *DeBruijnGraph) Gml() string {
|
||||||
buffer := bytes.NewBuffer(make([]byte, 0, 1000))
|
buffer := bytes.NewBuffer(make([]byte, 0, 1000))
|
||||||
|
|
||||||
@@ -614,7 +804,7 @@ func (graph *DeBruijnGraph) WriteGml(filename string) error {
|
|||||||
func (g *DeBruijnGraph) HammingDistance(kmer1, kmer2 uint64) int {
|
func (g *DeBruijnGraph) HammingDistance(kmer1, kmer2 uint64) int {
|
||||||
ident := ^((kmer1 & kmer2) | (^kmer1 & ^kmer2))
|
ident := ^((kmer1 & kmer2) | (^kmer1 & ^kmer2))
|
||||||
ident |= (ident >> 1)
|
ident |= (ident >> 1)
|
||||||
ident &= 0x5555555555555555 & g.kmermask
|
ident &= 0x5555555555555555 & ^g.kmermask
|
||||||
return bits.OnesCount64(ident)
|
return bits.OnesCount64(ident)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -638,11 +828,23 @@ func (h *UInt64Heap) Pop() any {
|
|||||||
return x
|
return x
|
||||||
}
|
}
|
||||||
|
|
||||||
func (g *DeBruijnGraph) HaviestPath() []uint64 {
|
func (g *DeBruijnGraph) HaviestPath(starts, stops []uint64, backPath bool) ([]uint64, error) {
|
||||||
|
|
||||||
if g.HasCycle() {
|
// if g.HasCycle() {
|
||||||
return nil
|
// return nil, fmt.Errorf("graph has a cycle")
|
||||||
|
// }
|
||||||
|
|
||||||
|
following := g.Nexts
|
||||||
|
|
||||||
|
if backPath {
|
||||||
|
following = g.Previouses
|
||||||
}
|
}
|
||||||
|
|
||||||
|
stopNodes := make(map[uint64]bool, len(stops))
|
||||||
|
for _, n := range stops {
|
||||||
|
stopNodes[n] = true
|
||||||
|
}
|
||||||
|
|
||||||
// Initialize the distance array and visited set
|
// Initialize the distance array and visited set
|
||||||
distances := make(map[uint64]int)
|
distances := make(map[uint64]int)
|
||||||
visited := make(map[uint64]bool)
|
visited := make(map[uint64]bool)
|
||||||
@@ -654,7 +856,11 @@ func (g *DeBruijnGraph) HaviestPath() []uint64 {
|
|||||||
heap.Init(queue)
|
heap.Init(queue)
|
||||||
|
|
||||||
startNodes := make(map[uint64]struct{})
|
startNodes := make(map[uint64]struct{})
|
||||||
for _, n := range g.Heads() {
|
if starts == nil {
|
||||||
|
starts = g.Heads()
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, n := range starts {
|
||||||
startNodes[n] = struct{}{}
|
startNodes[n] = struct{}{}
|
||||||
heap.Push(queue, n)
|
heap.Push(queue, n)
|
||||||
distances[n] = g.Weight(n)
|
distances[n] = g.Weight(n)
|
||||||
@@ -686,7 +892,11 @@ func (g *DeBruijnGraph) HaviestPath() []uint64 {
|
|||||||
log.Warn("current node is 0")
|
log.Warn("current node is 0")
|
||||||
}
|
}
|
||||||
// Update the distance of the neighbors
|
// Update the distance of the neighbors
|
||||||
nextNodes := g.Nexts(currentNode)
|
|
||||||
|
nextNodes := following(currentNode)
|
||||||
|
if _, ok := stopNodes[currentNode]; ok {
|
||||||
|
nextNodes = []uint64{}
|
||||||
|
}
|
||||||
for _, nextNode := range nextNodes {
|
for _, nextNode := range nextNodes {
|
||||||
if nextNode == 0 {
|
if nextNode == 0 {
|
||||||
log.Warn("next node is 0")
|
log.Warn("next node is 0")
|
||||||
@@ -718,16 +928,178 @@ func (g *DeBruijnGraph) HaviestPath() []uint64 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if slices.Contains(heaviestPath, currentNode) {
|
if slices.Contains(heaviestPath, currentNode) {
|
||||||
log.Panicf("Cycle detected %v -> %v (%v) len(%v), graph: %v", heaviestPath, currentNode, startNodes, len(heaviestPath), g.Len())
|
return nil, fmt.Errorf("cycle detected in heaviest path")
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
heaviestPath = append(heaviestPath, currentNode)
|
heaviestPath = append(heaviestPath, currentNode)
|
||||||
|
|
||||||
// Reverse the path
|
// Reverse the path
|
||||||
slices.Reverse(heaviestPath)
|
if !backPath {
|
||||||
|
slices.Reverse(heaviestPath)
|
||||||
|
}
|
||||||
|
|
||||||
return heaviestPath
|
return heaviestPath, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (g *DeBruijnGraph) HaviestPathDSU(starts, stops []uint64, backPath bool) ([]uint64, error) {
|
||||||
|
// Collect and sort edges
|
||||||
|
type Edge struct {
|
||||||
|
weight float64
|
||||||
|
u, v uint64
|
||||||
|
}
|
||||||
|
edges := make([]Edge, 0)
|
||||||
|
|
||||||
|
// Function to get next nodes (either Nexts or Previouses based on backPath)
|
||||||
|
following := g.Nexts
|
||||||
|
previouses := g.Previouses
|
||||||
|
if backPath {
|
||||||
|
following = g.Previouses
|
||||||
|
previouses = g.Nexts
|
||||||
|
}
|
||||||
|
|
||||||
|
// Collect all edges
|
||||||
|
for u := range g.graph {
|
||||||
|
for _, v := range following(u) {
|
||||||
|
edges = append(edges, Edge{
|
||||||
|
weight: float64(min(g.Weight(u), g.Weight(v))),
|
||||||
|
u: u,
|
||||||
|
v: v,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort edges by weight in descending order
|
||||||
|
sort.Slice(edges, func(i, j int) bool {
|
||||||
|
return edges[i].weight > edges[j].weight
|
||||||
|
})
|
||||||
|
|
||||||
|
// Initialize disjoint set data structure
|
||||||
|
parent := make(map[uint64]uint64)
|
||||||
|
for u := range g.graph {
|
||||||
|
parent[u] = u
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find with path compression
|
||||||
|
var find func(uint64) uint64
|
||||||
|
find = func(node uint64) uint64 {
|
||||||
|
if parent[node] != node {
|
||||||
|
parent[node] = find(parent[node])
|
||||||
|
}
|
||||||
|
return parent[node]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Union function that returns true if cycle is detected
|
||||||
|
union := func(u, v uint64) bool {
|
||||||
|
rootU := find(u)
|
||||||
|
rootV := find(v)
|
||||||
|
if rootU == rootV {
|
||||||
|
return true // Cycle detected
|
||||||
|
}
|
||||||
|
parent[rootV] = rootU
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// If no specific starts provided, use graph heads
|
||||||
|
if starts == nil {
|
||||||
|
if !backPath {
|
||||||
|
starts = g.Heads()
|
||||||
|
} else {
|
||||||
|
starts = g.Terminals()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If no specific stops provided, use graph terminals
|
||||||
|
if stops == nil {
|
||||||
|
if !backPath {
|
||||||
|
stops = g.Terminals()
|
||||||
|
} else {
|
||||||
|
stops = g.Heads()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert stops to a map for O(1) lookup
|
||||||
|
stopNodes := make(map[uint64]bool)
|
||||||
|
for _, stop := range stops {
|
||||||
|
stopNodes[stop] = false
|
||||||
|
}
|
||||||
|
|
||||||
|
var path []uint64
|
||||||
|
maxCapacity := math.Inf(-1)
|
||||||
|
stopEdge := []Edge{}
|
||||||
|
|
||||||
|
// Process edges in descending order of weight
|
||||||
|
for _, edge := range edges {
|
||||||
|
if stopNodes[edge.u] {
|
||||||
|
continue // Skip edges from stop nodes
|
||||||
|
}
|
||||||
|
|
||||||
|
if in, ok := stopNodes[edge.v]; ok {
|
||||||
|
if !in {
|
||||||
|
stopEdge = append(stopEdge, edge)
|
||||||
|
stopNodes[edge.v] = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if union(edge.u, edge.v) {
|
||||||
|
continue // Skip if creates cycle
|
||||||
|
}
|
||||||
|
|
||||||
|
pathFound := false
|
||||||
|
for _, sedge := range stopEdge {
|
||||||
|
// Check if any start-stop pair is connected
|
||||||
|
fv := find(sedge.v)
|
||||||
|
for _, s := range starts {
|
||||||
|
fs := find(s)
|
||||||
|
// log.Warnf("Start: %d, Stop: %d", fs, fv)
|
||||||
|
if fs == fv {
|
||||||
|
pathFound = true
|
||||||
|
maxCapacity = edge.weight
|
||||||
|
|
||||||
|
// Reconstruct path
|
||||||
|
current := sedge.v
|
||||||
|
path = []uint64{current}
|
||||||
|
for current != s {
|
||||||
|
oldcurrent := current
|
||||||
|
// log.Warnf("Start: %d, Current: %d, Previous: %v", s, current, previouses(current))
|
||||||
|
for _, prev := range previouses(current) {
|
||||||
|
if find(prev) == fs {
|
||||||
|
path = append(path, prev)
|
||||||
|
current = prev
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if current == oldcurrent {
|
||||||
|
log.Fatalf("We are stuck")
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
// log.Warnf("Built path: %v", path)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if pathFound {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if pathFound {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// log.Warnf("Stop edge: %v", stopEdge)
|
||||||
|
|
||||||
|
// Process edges in descending order of weight
|
||||||
|
|
||||||
|
if path == nil {
|
||||||
|
return nil, fmt.Errorf("no valid path found")
|
||||||
|
}
|
||||||
|
|
||||||
|
if !backPath {
|
||||||
|
slices.Reverse(path)
|
||||||
|
}
|
||||||
|
log.Warnf("Max capacity: %5.0f: %v", maxCapacity, g.DecodePath(path))
|
||||||
|
|
||||||
|
return path, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (g *DeBruijnGraph) HasCycle() bool {
|
func (g *DeBruijnGraph) HasCycle() bool {
|
||||||
@@ -765,3 +1137,59 @@ func (g *DeBruijnGraph) HasCycle() bool {
|
|||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// HasCycleInDegree détecte la présence d'un cycle dans le graphe en utilisant la méthode des degrés entrants.
|
||||||
|
// Cette méthode est basée sur le tri topologique : si on ne peut pas trier tous les nœuds,
|
||||||
|
// alors il y a un cycle.
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
// - bool: true si le graphe contient un cycle, false sinon
|
||||||
|
func (g *DeBruijnGraph) HasCycleInDegree() bool {
|
||||||
|
// Créer une map pour stocker les degrés entrants de chaque nœud
|
||||||
|
inDegree := make(map[uint64]int)
|
||||||
|
|
||||||
|
// Initialiser les degrés entrants à 0 pour tous les nœuds
|
||||||
|
for node := range g.graph {
|
||||||
|
inDegree[node] = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculer les degrés entrants
|
||||||
|
for node := range g.graph {
|
||||||
|
for _, next := range g.Nexts(node) {
|
||||||
|
inDegree[next]++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Créer une deque pour stocker les nœuds avec un degré entrant de 0
|
||||||
|
queue := deque.Deque[uint64]{}
|
||||||
|
|
||||||
|
// Ajouter tous les nœuds avec un degré entrant de 0 à la deque
|
||||||
|
for node := range g.graph {
|
||||||
|
if inDegree[node] == 0 {
|
||||||
|
queue.PushBack(node)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
visited := 0 // Compteur de nœuds visités
|
||||||
|
|
||||||
|
// Parcours BFS
|
||||||
|
for queue.Len() > 0 {
|
||||||
|
// Retirer le premier nœud de la deque
|
||||||
|
node, _ := queue.PopFront()
|
||||||
|
visited++
|
||||||
|
|
||||||
|
// Pour chaque nœud adjacent
|
||||||
|
for _, next := range g.Nexts(node) {
|
||||||
|
// Réduire son degré entrant
|
||||||
|
inDegree[next]--
|
||||||
|
|
||||||
|
// Si le degré entrant devient 0, l'ajouter à la deque
|
||||||
|
if inDegree[next] == 0 {
|
||||||
|
queue.PushBack(next)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// S'il y a un cycle, on n'aura pas pu visiter tous les nœuds
|
||||||
|
return visited != len(g.graph)
|
||||||
|
}
|
||||||
|
|||||||
45
pkg/obikmer/oneerror.go
Normal file
45
pkg/obikmer/oneerror.go
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
package obikmer
|
||||||
|
|
||||||
|
import (
|
||||||
|
"iter"
|
||||||
|
"slices"
|
||||||
|
)
|
||||||
|
|
||||||
|
var baseError = map[byte]byte{
|
||||||
|
'a': 'b',
|
||||||
|
'c': 'd',
|
||||||
|
'g': 'h',
|
||||||
|
't': 'v',
|
||||||
|
'r': 'y',
|
||||||
|
'y': 'r',
|
||||||
|
's': 'w',
|
||||||
|
'w': 's',
|
||||||
|
'k': 'm',
|
||||||
|
'm': 'k',
|
||||||
|
'd': 'c',
|
||||||
|
'v': 't',
|
||||||
|
'h': 'g',
|
||||||
|
'b': 'a',
|
||||||
|
}
|
||||||
|
|
||||||
|
type BytesItem []byte
|
||||||
|
|
||||||
|
func IterateOneError(kmer []byte) iter.Seq[BytesItem] {
|
||||||
|
lkmer := len(kmer)
|
||||||
|
return func(yield func(BytesItem) bool) {
|
||||||
|
for p := 0; p < lkmer; p++ {
|
||||||
|
for p < lkmer && kmer[p] == 'n' {
|
||||||
|
p++
|
||||||
|
}
|
||||||
|
|
||||||
|
if p < lkmer {
|
||||||
|
nkmer := slices.Clone(kmer)
|
||||||
|
nkmer[p] = baseError[kmer[p]]
|
||||||
|
if !yield(nkmer) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -4,4 +4,5 @@ import lua "github.com/yuin/gopher-lua"
|
|||||||
|
|
||||||
func RegisterObilib(luaState *lua.LState) {
|
func RegisterObilib(luaState *lua.LState) {
|
||||||
RegisterObiSeq(luaState)
|
RegisterObiSeq(luaState)
|
||||||
|
RegisterObiTaxonomy(luaState)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,7 +1,9 @@
|
|||||||
package obilua
|
package obilua
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiformats"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||||
lua "github.com/yuin/gopher-lua"
|
lua "github.com/yuin/gopher-lua"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -16,6 +18,7 @@ func registerBioSequenceType(luaState *lua.LState) {
|
|||||||
bioSequenceType := luaState.NewTypeMetatable(luaBioSequenceTypeName)
|
bioSequenceType := luaState.NewTypeMetatable(luaBioSequenceTypeName)
|
||||||
luaState.SetGlobal(luaBioSequenceTypeName, bioSequenceType)
|
luaState.SetGlobal(luaBioSequenceTypeName, bioSequenceType)
|
||||||
luaState.SetField(bioSequenceType, "new", luaState.NewFunction(newObiSeq))
|
luaState.SetField(bioSequenceType, "new", luaState.NewFunction(newObiSeq))
|
||||||
|
luaState.SetField(bioSequenceType, "nil", obiseq2Lua(luaState, nil))
|
||||||
|
|
||||||
luaState.SetField(bioSequenceType, "__index",
|
luaState.SetField(bioSequenceType, "__index",
|
||||||
luaState.SetFuncs(luaState.NewTable(),
|
luaState.SetFuncs(luaState.NewTable(),
|
||||||
@@ -53,6 +56,7 @@ var bioSequenceMethods = map[string]lua.LGFunction{
|
|||||||
"definition": bioSequenceGetSetDefinition,
|
"definition": bioSequenceGetSetDefinition,
|
||||||
"count": bioSequenceGetSetCount,
|
"count": bioSequenceGetSetCount,
|
||||||
"taxid": bioSequenceGetSetTaxid,
|
"taxid": bioSequenceGetSetTaxid,
|
||||||
|
"taxon": bioSequenceGetSetTaxon,
|
||||||
"attribute": bioSequenceGetSetAttribute,
|
"attribute": bioSequenceGetSetAttribute,
|
||||||
"len": bioSequenceGetLength,
|
"len": bioSequenceGetLength,
|
||||||
"has_sequence": bioSequenceHasSequence,
|
"has_sequence": bioSequenceHasSequence,
|
||||||
@@ -62,6 +66,9 @@ var bioSequenceMethods = map[string]lua.LGFunction{
|
|||||||
"md5_string": bioSequenceGetMD5String,
|
"md5_string": bioSequenceGetMD5String,
|
||||||
"subsequence": bioSequenceGetSubsequence,
|
"subsequence": bioSequenceGetSubsequence,
|
||||||
"reverse_complement": bioSequenceGetRevcomp,
|
"reverse_complement": bioSequenceGetRevcomp,
|
||||||
|
"fasta": bioSequenceGetFasta,
|
||||||
|
"fastq": bioSequenceGetFastq,
|
||||||
|
"string": bioSequenceAsString,
|
||||||
}
|
}
|
||||||
|
|
||||||
// checkBioSequence checks if the first argument in the Lua stack is a *obiseq.BioSequence.
|
// checkBioSequence checks if the first argument in the Lua stack is a *obiseq.BioSequence.
|
||||||
@@ -254,3 +261,88 @@ func bioSequenceGetRevcomp(luaState *lua.LState) int {
|
|||||||
luaState.Push(obiseq2Lua(luaState, revcomp))
|
luaState.Push(obiseq2Lua(luaState, revcomp))
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func bioSequenceGetSetTaxon(luaState *lua.LState) int {
|
||||||
|
s := checkBioSequence(luaState)
|
||||||
|
|
||||||
|
if luaState.GetTop() > 1 {
|
||||||
|
taxon := checkTaxon(luaState, 2)
|
||||||
|
|
||||||
|
s.SetTaxon(taxon)
|
||||||
|
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
taxon := s.Taxon(obitax.DefaultTaxonomy())
|
||||||
|
luaState.Push(taxon2Lua(luaState, taxon))
|
||||||
|
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
func bioSequenceGetFasta(luaState *lua.LState) int {
|
||||||
|
s := checkBioSequence(luaState)
|
||||||
|
|
||||||
|
formater := obiformats.FormatFastSeqJsonHeader
|
||||||
|
|
||||||
|
if luaState.GetTop() > 1 {
|
||||||
|
format := luaState.CheckString(2)
|
||||||
|
switch format {
|
||||||
|
case "json":
|
||||||
|
formater = obiformats.FormatFastSeqJsonHeader
|
||||||
|
case "obi":
|
||||||
|
formater = obiformats.FormatFastSeqOBIHeader
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
txt := obiformats.FormatFasta(s, formater)
|
||||||
|
|
||||||
|
luaState.Push(lua.LString(txt))
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
func bioSequenceGetFastq(luaState *lua.LState) int {
|
||||||
|
s := checkBioSequence(luaState)
|
||||||
|
|
||||||
|
formater := obiformats.FormatFastSeqJsonHeader
|
||||||
|
|
||||||
|
if luaState.GetTop() > 1 {
|
||||||
|
format := luaState.CheckString(2)
|
||||||
|
switch format {
|
||||||
|
case "json":
|
||||||
|
formater = obiformats.FormatFastSeqJsonHeader
|
||||||
|
case "obi":
|
||||||
|
formater = obiformats.FormatFastSeqOBIHeader
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
txt := obiformats.FormatFastq(s, formater)
|
||||||
|
|
||||||
|
luaState.Push(lua.LString(txt))
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
func bioSequenceAsString(luaState *lua.LState) int {
|
||||||
|
s := checkBioSequence(luaState)
|
||||||
|
|
||||||
|
formater := obiformats.FormatFastSeqJsonHeader
|
||||||
|
format := obiformats.FormatFasta
|
||||||
|
|
||||||
|
if s.HasQualities() {
|
||||||
|
format = obiformats.FormatFastq
|
||||||
|
}
|
||||||
|
|
||||||
|
if luaState.GetTop() > 1 {
|
||||||
|
format := luaState.CheckString(2)
|
||||||
|
switch format {
|
||||||
|
case "json":
|
||||||
|
formater = obiformats.FormatFastSeqJsonHeader
|
||||||
|
case "obi":
|
||||||
|
formater = obiformats.FormatFastSeqOBIHeader
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
txt := format(s, formater)
|
||||||
|
|
||||||
|
luaState.Push(lua.LString(txt))
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,6 +1,9 @@
|
|||||||
package obilua
|
package obilua
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiformats"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
lua "github.com/yuin/gopher-lua"
|
lua "github.com/yuin/gopher-lua"
|
||||||
)
|
)
|
||||||
@@ -11,6 +14,7 @@ func registerBioSequenceSliceType(luaState *lua.LState) {
|
|||||||
bioSequenceSliceType := luaState.NewTypeMetatable(luaBioSequenceSliceTypeName)
|
bioSequenceSliceType := luaState.NewTypeMetatable(luaBioSequenceSliceTypeName)
|
||||||
luaState.SetGlobal(luaBioSequenceSliceTypeName, bioSequenceSliceType)
|
luaState.SetGlobal(luaBioSequenceSliceTypeName, bioSequenceSliceType)
|
||||||
luaState.SetField(bioSequenceSliceType, "new", luaState.NewFunction(newObiSeqSlice))
|
luaState.SetField(bioSequenceSliceType, "new", luaState.NewFunction(newObiSeqSlice))
|
||||||
|
luaState.SetField(bioSequenceSliceType, "nil", obiseqslice2Lua(luaState, nil))
|
||||||
|
|
||||||
luaState.SetField(bioSequenceSliceType, "__index",
|
luaState.SetField(bioSequenceSliceType, "__index",
|
||||||
luaState.SetFuncs(luaState.NewTable(),
|
luaState.SetFuncs(luaState.NewTable(),
|
||||||
@@ -37,6 +41,9 @@ var bioSequenceSliceMethods = map[string]lua.LGFunction{
|
|||||||
"pop": bioSequenceSlicePop,
|
"pop": bioSequenceSlicePop,
|
||||||
"sequence": bioSequenceSliceGetSetSequence,
|
"sequence": bioSequenceSliceGetSetSequence,
|
||||||
"len": bioSequenceSliceGetLength,
|
"len": bioSequenceSliceGetLength,
|
||||||
|
"fasta": bioSequenceSliceGetFasta,
|
||||||
|
"fastq": bioSequenceSliceGetFastq,
|
||||||
|
"string": bioSequenceSliceAsString,
|
||||||
}
|
}
|
||||||
|
|
||||||
func checkBioSequenceSlice(L *lua.LState) *obiseq.BioSequenceSlice {
|
func checkBioSequenceSlice(L *lua.LState) *obiseq.BioSequenceSlice {
|
||||||
@@ -105,3 +112,96 @@ func bioSequenceSlicePop(luaState *lua.LState) int {
|
|||||||
return 1
|
return 1
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func bioSequenceSliceGetFasta(luaState *lua.LState) int {
|
||||||
|
s := checkBioSequenceSlice(luaState)
|
||||||
|
|
||||||
|
formater := obiformats.FormatFastSeqJsonHeader
|
||||||
|
|
||||||
|
if luaState.GetTop() > 1 {
|
||||||
|
format := luaState.CheckString(2)
|
||||||
|
switch format {
|
||||||
|
case "json":
|
||||||
|
formater = obiformats.FormatFastSeqJsonHeader
|
||||||
|
case "obi":
|
||||||
|
formater = obiformats.FormatFastSeqOBIHeader
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
txts := make([]string, len(*s))
|
||||||
|
|
||||||
|
for i, seq := range *s {
|
||||||
|
txts[i] = obiformats.FormatFasta(seq, formater)
|
||||||
|
}
|
||||||
|
|
||||||
|
txt := strings.Join(txts, "\n")
|
||||||
|
|
||||||
|
luaState.Push(lua.LString(txt))
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
func bioSequenceSliceGetFastq(luaState *lua.LState) int {
|
||||||
|
s := checkBioSequenceSlice(luaState)
|
||||||
|
|
||||||
|
formater := obiformats.FormatFastSeqJsonHeader
|
||||||
|
|
||||||
|
if luaState.GetTop() > 1 {
|
||||||
|
format := luaState.CheckString(2)
|
||||||
|
switch format {
|
||||||
|
case "json":
|
||||||
|
formater = obiformats.FormatFastSeqJsonHeader
|
||||||
|
case "obi":
|
||||||
|
formater = obiformats.FormatFastSeqOBIHeader
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
txts := make([]string, len(*s))
|
||||||
|
|
||||||
|
for i, seq := range *s {
|
||||||
|
txts[i] = obiformats.FormatFastq(seq, formater)
|
||||||
|
}
|
||||||
|
|
||||||
|
txt := strings.Join(txts, "\n")
|
||||||
|
|
||||||
|
luaState.Push(lua.LString(txt))
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
func bioSequenceSliceAsString(luaState *lua.LState) int {
|
||||||
|
s := checkBioSequenceSlice(luaState)
|
||||||
|
|
||||||
|
formater := obiformats.FormatFastSeqJsonHeader
|
||||||
|
|
||||||
|
if luaState.GetTop() > 1 {
|
||||||
|
format := luaState.CheckString(2)
|
||||||
|
switch format {
|
||||||
|
case "json":
|
||||||
|
formater = obiformats.FormatFastSeqJsonHeader
|
||||||
|
case "obi":
|
||||||
|
formater = obiformats.FormatFastSeqOBIHeader
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
txts := make([]string, len(*s))
|
||||||
|
|
||||||
|
format := obiformats.FormatFasta
|
||||||
|
|
||||||
|
allQual := true
|
||||||
|
|
||||||
|
for _, s := range *s {
|
||||||
|
allQual = allQual && s.HasQualities()
|
||||||
|
}
|
||||||
|
|
||||||
|
if allQual {
|
||||||
|
format = obiformats.FormatFastq
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, seq := range *s {
|
||||||
|
txts[i] = format(seq, formater)
|
||||||
|
}
|
||||||
|
|
||||||
|
txt := strings.Join(txts, "\n")
|
||||||
|
|
||||||
|
luaState.Push(lua.LString(txt))
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|||||||
139
pkg/obilua/obitaxon.go
Normal file
139
pkg/obilua/obitaxon.go
Normal file
@@ -0,0 +1,139 @@
|
|||||||
|
package obilua
|
||||||
|
|
||||||
|
import (
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||||
|
lua "github.com/yuin/gopher-lua"
|
||||||
|
)
|
||||||
|
|
||||||
|
const luaTaxonTypeName = "Taxon"
|
||||||
|
|
||||||
|
func registerTaxonType(luaState *lua.LState) {
|
||||||
|
taxonType := luaState.NewTypeMetatable(luaTaxonTypeName)
|
||||||
|
luaState.SetGlobal(luaTaxonTypeName, taxonType)
|
||||||
|
luaState.SetField(taxonType, "new", luaState.NewFunction(newTaxon))
|
||||||
|
luaState.SetField(taxonType, "nil", taxonomy2Lua(luaState, nil))
|
||||||
|
|
||||||
|
luaState.SetField(taxonType, "__index",
|
||||||
|
luaState.SetFuncs(luaState.NewTable(),
|
||||||
|
taxonMethods))
|
||||||
|
}
|
||||||
|
|
||||||
|
func taxon2Lua(interpreter *lua.LState,
|
||||||
|
taxon *obitax.Taxon) lua.LValue {
|
||||||
|
ud := interpreter.NewUserData()
|
||||||
|
ud.Value = taxon
|
||||||
|
interpreter.SetMetatable(ud, interpreter.GetTypeMetatable(luaTaxonTypeName))
|
||||||
|
|
||||||
|
return ud
|
||||||
|
}
|
||||||
|
|
||||||
|
func newTaxon(luaState *lua.LState) int {
|
||||||
|
taxonomy := checkTaxonomy(luaState)
|
||||||
|
taxid := luaState.CheckString(2)
|
||||||
|
parent := luaState.CheckString(3)
|
||||||
|
sname := luaState.CheckString(4)
|
||||||
|
rank := luaState.CheckString(5)
|
||||||
|
|
||||||
|
isroot := false
|
||||||
|
|
||||||
|
if luaState.GetTop() > 5 {
|
||||||
|
isroot = luaState.CheckBool(6)
|
||||||
|
}
|
||||||
|
|
||||||
|
taxon, err := taxonomy.AddTaxon(taxid, parent, rank, isroot, false)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
luaState.RaiseError("(%v,%v,%v) : Error on taxon creation: %v", taxid, parent, sname, err)
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
taxon.SetName(sname, "scientific name")
|
||||||
|
|
||||||
|
luaState.Push(taxon2Lua(luaState, taxon))
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
var taxonMethods = map[string]lua.LGFunction{
|
||||||
|
"string": taxonAsString,
|
||||||
|
"scientific_name": taxonGetSetScientificName,
|
||||||
|
"parent": taxonGetParent,
|
||||||
|
"taxon_at_rank": taxGetTaxonAtRank,
|
||||||
|
"species": taxonGetSpecies,
|
||||||
|
"genus": taxonGetGenus,
|
||||||
|
"family": taxonGetFamily,
|
||||||
|
}
|
||||||
|
|
||||||
|
func checkTaxon(L *lua.LState, i int) *obitax.Taxon {
|
||||||
|
ud := L.CheckUserData(i)
|
||||||
|
if v, ok := ud.Value.(*obitax.Taxon); ok {
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
L.ArgError(i, "obitax.Taxon expected")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func taxonAsString(luaState *lua.LState) int {
|
||||||
|
taxon := checkTaxon(luaState, 1)
|
||||||
|
luaState.Push(lua.LString(taxon.String()))
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
func taxonGetSetScientificName(luaState *lua.LState) int {
|
||||||
|
taxon := checkTaxon(luaState, 1)
|
||||||
|
|
||||||
|
if luaState.GetTop() > 1 {
|
||||||
|
sname := luaState.CheckString(2)
|
||||||
|
taxon.SetName(sname, "scientific name")
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
luaState.Push(lua.LString(taxon.ScientificName()))
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
func taxonGetParent(luaState *lua.LState) int {
|
||||||
|
taxon := checkTaxon(luaState, 1)
|
||||||
|
|
||||||
|
parent := taxon.Parent()
|
||||||
|
luaState.Push(taxon2Lua(luaState, parent))
|
||||||
|
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
func taxonGetSpecies(luaState *lua.LState) int {
|
||||||
|
taxon := checkTaxon(luaState, 1)
|
||||||
|
|
||||||
|
species := taxon.Species()
|
||||||
|
luaState.Push(taxon2Lua(luaState, species))
|
||||||
|
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
func taxonGetGenus(luaState *lua.LState) int {
|
||||||
|
taxon := checkTaxon(luaState, 1)
|
||||||
|
|
||||||
|
genus := taxon.Genus()
|
||||||
|
luaState.Push(taxon2Lua(luaState, genus))
|
||||||
|
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
func taxonGetFamily(luaState *lua.LState) int {
|
||||||
|
taxon := checkTaxon(luaState, 1)
|
||||||
|
|
||||||
|
family := taxon.Family()
|
||||||
|
luaState.Push(taxon2Lua(luaState, family))
|
||||||
|
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
func taxGetTaxonAtRank(luaState *lua.LState) int {
|
||||||
|
taxon := checkTaxon(luaState, 1)
|
||||||
|
rank := luaState.CheckString(2)
|
||||||
|
|
||||||
|
taxonAt := taxon.TaxonAtRank(rank)
|
||||||
|
|
||||||
|
luaState.Push(taxon2Lua(luaState, taxonAt))
|
||||||
|
|
||||||
|
return 1
|
||||||
|
}
|
||||||
116
pkg/obilua/obitaxonomy.go
Normal file
116
pkg/obilua/obitaxonomy.go
Normal file
@@ -0,0 +1,116 @@
|
|||||||
|
package obilua
|
||||||
|
|
||||||
|
import (
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
|
lua "github.com/yuin/gopher-lua"
|
||||||
|
)
|
||||||
|
|
||||||
|
func RegisterObiTaxonomy(luaState *lua.LState) {
|
||||||
|
registerTaxonomyType(luaState)
|
||||||
|
registerTaxonType(luaState)
|
||||||
|
}
|
||||||
|
|
||||||
|
const luaTaxonomyTypeName = "Taxonomy"
|
||||||
|
|
||||||
|
func registerTaxonomyType(luaState *lua.LState) {
|
||||||
|
taxonomyType := luaState.NewTypeMetatable(luaTaxonomyTypeName)
|
||||||
|
luaState.SetGlobal(luaTaxonomyTypeName, taxonomyType)
|
||||||
|
luaState.SetField(taxonomyType, "new", luaState.NewFunction(newTaxonomy))
|
||||||
|
luaState.SetField(taxonomyType, "default", luaState.NewFunction(defaultTaxonomy))
|
||||||
|
luaState.SetField(taxonomyType, "has_default", luaState.NewFunction(hasDefaultTaxonomy))
|
||||||
|
luaState.SetField(taxonomyType, "nil", taxon2Lua(luaState, nil))
|
||||||
|
luaState.SetField(taxonomyType, "__index",
|
||||||
|
luaState.SetFuncs(luaState.NewTable(),
|
||||||
|
taxonomyMethods))
|
||||||
|
}
|
||||||
|
|
||||||
|
func taxonomy2Lua(interpreter *lua.LState,
|
||||||
|
taxonomy *obitax.Taxonomy) lua.LValue {
|
||||||
|
ud := interpreter.NewUserData()
|
||||||
|
ud.Value = taxonomy
|
||||||
|
interpreter.SetMetatable(ud, interpreter.GetTypeMetatable(luaTaxonomyTypeName))
|
||||||
|
|
||||||
|
return ud
|
||||||
|
}
|
||||||
|
|
||||||
|
func newTaxonomy(luaState *lua.LState) int {
|
||||||
|
name := luaState.CheckString(1)
|
||||||
|
code := luaState.CheckString(2)
|
||||||
|
|
||||||
|
charset := obiutils.AsciiAlphaNumSet
|
||||||
|
if luaState.GetTop() > 2 {
|
||||||
|
charset = obiutils.AsciiSetFromString(luaState.CheckString(3))
|
||||||
|
}
|
||||||
|
|
||||||
|
taxonomy := obitax.NewTaxonomy(name, code, charset)
|
||||||
|
|
||||||
|
luaState.Push(taxonomy2Lua(luaState, taxonomy))
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
func defaultTaxonomy(luaState *lua.LState) int {
|
||||||
|
taxonomy := obitax.DefaultTaxonomy()
|
||||||
|
|
||||||
|
if taxonomy == nil {
|
||||||
|
luaState.RaiseError("No default taxonomy")
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
luaState.Push(taxonomy2Lua(luaState, taxonomy))
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
func hasDefaultTaxonomy(luaState *lua.LState) int {
|
||||||
|
taxonomy := obitax.DefaultTaxonomy()
|
||||||
|
|
||||||
|
luaState.Push(lua.LBool(taxonomy != nil))
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
var taxonomyMethods = map[string]lua.LGFunction{
|
||||||
|
"name": taxonomyGetName,
|
||||||
|
"code": taxonomyGetCode,
|
||||||
|
"taxon": taxonomyGetTaxon,
|
||||||
|
}
|
||||||
|
|
||||||
|
func checkTaxonomy(L *lua.LState) *obitax.Taxonomy {
|
||||||
|
ud := L.CheckUserData(1)
|
||||||
|
if v, ok := ud.Value.(*obitax.Taxonomy); ok {
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
L.ArgError(1, "obitax.Taxonomy expected")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func taxonomyGetName(luaState *lua.LState) int {
|
||||||
|
taxo := checkTaxonomy(luaState)
|
||||||
|
luaState.Push(lua.LString(taxo.Name()))
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
func taxonomyGetCode(luaState *lua.LState) int {
|
||||||
|
taxo := checkTaxonomy(luaState)
|
||||||
|
luaState.Push(lua.LString(taxo.Code()))
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
func taxonomyGetTaxon(luaState *lua.LState) int {
|
||||||
|
taxo := checkTaxonomy(luaState)
|
||||||
|
taxid := luaState.CheckString(2)
|
||||||
|
taxon, isAlias, err := taxo.Taxon(taxid)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
luaState.RaiseError("%s : Error on taxon taxon: %v", taxid, err)
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
if isAlias && obidefault.FailOnTaxonomy() {
|
||||||
|
luaState.RaiseError("%s : Taxon is an alias of %s", taxid, taxon.String())
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
luaState.Push(taxon2Lua(luaState, taxon))
|
||||||
|
return 1
|
||||||
|
}
|
||||||
@@ -66,10 +66,6 @@ func GenerateOptionParser(optionset ...func(*getoptions.GetOpt)) ArgumentParser
|
|||||||
options.GetEnv("OBISOLEXA"),
|
options.GetEnv("OBISOLEXA"),
|
||||||
options.Description("Decodes quality string according to the Solexa specification."))
|
options.Description("Decodes quality string according to the Solexa specification."))
|
||||||
|
|
||||||
options.BoolVar(obidefault.CompressedPtr(), "compressed", obidefault.CompressOutput(),
|
|
||||||
options.Alias("Z"),
|
|
||||||
options.Description("Compress all the result using gzip"))
|
|
||||||
|
|
||||||
for _, o := range optionset {
|
for _, o := range optionset {
|
||||||
o(options)
|
o(options)
|
||||||
}
|
}
|
||||||
@@ -181,6 +177,15 @@ func LoadTaxonomyOptionSet(options *getoptions.GetOpt, required, alternatiive bo
|
|||||||
options.Alias("a"),
|
options.Alias("a"),
|
||||||
options.Description("Enable the search on all alternative names and not only scientific names."))
|
options.Description("Enable the search on all alternative names and not only scientific names."))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
options.BoolVar(obidefault.FailOnTaxonomyPtr(), "fail-on-taxonomy",
|
||||||
|
obidefault.FailOnTaxonomy(),
|
||||||
|
options.Description("Make obitools failing on error if a used taxid is not a currently valid one"),
|
||||||
|
)
|
||||||
|
|
||||||
|
options.BoolVar(obidefault.UpdateTaxidPtr(), "update-taxid", obidefault.UpdateTaxid(),
|
||||||
|
options.Description("Make obitools automatically updating the taxid that are declared merged to a newest one."),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
// CLIIsDebugMode returns whether the CLI is in debug mode.
|
// CLIIsDebugMode returns whether the CLI is in debug mode.
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ import (
|
|||||||
// corresponds to the last commit, and not the one when the file will be
|
// corresponds to the last commit, and not the one when the file will be
|
||||||
// commited
|
// commited
|
||||||
|
|
||||||
var _Commit = "c50a0f4"
|
var _Commit = "4774438"
|
||||||
var _Version = "Release 4.2.0"
|
var _Version = "Release 4.2.0"
|
||||||
|
|
||||||
// Version returns the version of the obitools package.
|
// Version returns the version of the obitools package.
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
package obiseq
|
package obiseq
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
"golang.org/x/exp/slices"
|
"golang.org/x/exp/slices"
|
||||||
@@ -179,3 +180,18 @@ func (s *BioSequenceSlice) SortOnLength(reverse bool) {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *BioSequenceSlice) ExtractTaxonomy(taxonomy *obitax.Taxonomy) (*obitax.Taxonomy, error) {
|
||||||
|
var err error
|
||||||
|
|
||||||
|
for _, s := range *s {
|
||||||
|
taxonomy, err = taxonomy.InsertPathString(s.Path())
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return taxonomy, nil
|
||||||
|
}
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ func (s *BioSequence) UnPair() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *BioSequenceSlice) IsPaired() bool {
|
func (s *BioSequenceSlice) IsPaired() bool {
|
||||||
return (*s)[0].paired != nil
|
return s != nil && s.Len() > 0 && (*s)[0].paired != nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *BioSequenceSlice) PairedWith() *BioSequenceSlice {
|
func (s *BioSequenceSlice) PairedWith() *BioSequenceSlice {
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import (
|
|||||||
"math"
|
"math"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
@@ -15,13 +16,20 @@ func (sequence *BioSequence) TaxonomicDistribution(taxonomy *obitax.Taxonomy) ma
|
|||||||
taxonomy = taxonomy.OrDefault(true)
|
taxonomy = taxonomy.OrDefault(true)
|
||||||
|
|
||||||
for taxid, v := range taxids {
|
for taxid, v := range taxids {
|
||||||
t := taxonomy.Taxon(taxid)
|
t, isAlias, err := taxonomy.Taxon(taxid)
|
||||||
if t == nil {
|
if err != nil {
|
||||||
log.Fatalf(
|
log.Fatalf(
|
||||||
"On sequence %s taxid %s is not defined in taxonomy: %s",
|
"On sequence %s taxid %s is not defined in taxonomy: %s (%v)",
|
||||||
sequence.Id(),
|
sequence.Id(),
|
||||||
taxid,
|
taxid,
|
||||||
taxonomy.Name())
|
taxonomy.Name(),
|
||||||
|
err,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
if isAlias && obidefault.FailOnTaxonomy() {
|
||||||
|
log.Fatalf("On sequence %s taxid %s is an alias on %s",
|
||||||
|
sequence.Id(), taxid, t.String())
|
||||||
}
|
}
|
||||||
taxons[t.Node] = v
|
taxons[t.Node] = v
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,7 +5,9 @@ import (
|
|||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
)
|
)
|
||||||
|
|
||||||
func (s *BioSequence) Taxon(taxonomy *obitax.Taxonomy) *obitax.Taxon {
|
func (s *BioSequence) Taxon(taxonomy *obitax.Taxonomy) *obitax.Taxon {
|
||||||
@@ -14,7 +16,10 @@ func (s *BioSequence) Taxon(taxonomy *obitax.Taxonomy) *obitax.Taxon {
|
|||||||
if taxid == "NA" {
|
if taxid == "NA" {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
return taxonomy.Taxon(taxid)
|
|
||||||
|
taxon, _, _ := taxonomy.Taxon(taxid)
|
||||||
|
|
||||||
|
return taxon
|
||||||
}
|
}
|
||||||
|
|
||||||
// SetTaxid sets the taxid for the BioSequence.
|
// SetTaxid sets the taxid for the BioSequence.
|
||||||
@@ -23,6 +28,9 @@ func (s *BioSequence) Taxon(taxonomy *obitax.Taxonomy) *obitax.Taxon {
|
|||||||
//
|
//
|
||||||
// taxid - the taxid to set.
|
// taxid - the taxid to set.
|
||||||
func (s *BioSequence) SetTaxid(taxid string, rank ...string) {
|
func (s *BioSequence) SetTaxid(taxid string, rank ...string) {
|
||||||
|
var err error
|
||||||
|
var isAlias bool
|
||||||
|
|
||||||
if taxid == "" {
|
if taxid == "" {
|
||||||
taxid = "NA"
|
taxid = "NA"
|
||||||
} else {
|
} else {
|
||||||
@@ -30,11 +38,38 @@ func (s *BioSequence) SetTaxid(taxid string, rank ...string) {
|
|||||||
taxon := (*obitax.Taxon)(nil)
|
taxon := (*obitax.Taxon)(nil)
|
||||||
|
|
||||||
if taxonomy != nil {
|
if taxonomy != nil {
|
||||||
taxon = taxonomy.Taxon(taxid)
|
taxon, isAlias, err = taxonomy.Taxon(taxid)
|
||||||
}
|
|
||||||
|
if err != nil {
|
||||||
|
logger := log.Warnf
|
||||||
|
if obidefault.FailOnTaxonomy() {
|
||||||
|
logger = log.Fatalf
|
||||||
|
}
|
||||||
|
logger("%s: Taxid: %v is unknown from taxonomy (%v)",
|
||||||
|
s.Id(), taxid, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if isAlias {
|
||||||
|
if obidefault.FailOnTaxonomy() {
|
||||||
|
log.Fatalf("%s: Taxid: %v is an alias from taxonomy (%v) to %s",
|
||||||
|
s.Id(), taxid, taxonomy.Name(), taxon.String())
|
||||||
|
} else {
|
||||||
|
if obidefault.UpdateTaxid() {
|
||||||
|
log.Warnf("%s: Taxid: %v is updated to %s",
|
||||||
|
s.Id(), taxid, taxon.String())
|
||||||
|
taxid = taxon.String()
|
||||||
|
} else {
|
||||||
|
log.Warnf("%s: Taxid %v has to be updated to %s",
|
||||||
|
s.Id(), taxid, taxon.String())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
if taxon != nil {
|
||||||
|
taxid = taxon.String()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if taxon != nil {
|
|
||||||
taxid = taxon.String()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -135,14 +170,35 @@ func (sequence *BioSequence) SetFamily(taxonomy *obitax.Taxonomy) *obitax.Taxon
|
|||||||
return sequence.SetTaxonAtRank(taxonomy, "family")
|
return sequence.SetTaxonAtRank(taxonomy, "family")
|
||||||
}
|
}
|
||||||
|
|
||||||
func (sequence *BioSequence) SetPath(taxonomy *obitax.Taxonomy) string {
|
func (sequence *BioSequence) SetPath(taxonomy *obitax.Taxonomy) []string {
|
||||||
taxon := sequence.Taxon(taxonomy)
|
taxon := sequence.Taxon(taxonomy)
|
||||||
path := taxon.Path()
|
path := taxon.Path()
|
||||||
|
spath := make([]string, path.Len())
|
||||||
|
lpath := path.Len() - 1
|
||||||
|
|
||||||
tpath := path.String()
|
for i := lpath; i >= 0; i-- {
|
||||||
sequence.SetAttribute("taxonomic_path", tpath)
|
spath[lpath-i] = path.Get(i).String(taxonomy.Code())
|
||||||
|
}
|
||||||
|
|
||||||
return tpath
|
sequence.SetAttribute("taxonomic_path", spath)
|
||||||
|
|
||||||
|
return spath
|
||||||
|
}
|
||||||
|
|
||||||
|
func (sequence *BioSequence) Path() []string {
|
||||||
|
path, ok := sequence.GetAttribute("taxonomic_path")
|
||||||
|
|
||||||
|
if !ok {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
slice, err := obiutils.InterfaceToStringSlice(path)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("%s: taxonomic_path has the wrong type (%v)", sequence.Id(), err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return slice
|
||||||
}
|
}
|
||||||
|
|
||||||
func (sequence *BioSequence) SetScientificName(taxonomy *obitax.Taxonomy) string {
|
func (sequence *BioSequence) SetScientificName(taxonomy *obitax.Taxonomy) string {
|
||||||
|
|||||||
@@ -63,7 +63,12 @@ func IsSubCladeOfSlot(taxonomy *obitax.Taxonomy, key string) SequencePredicate {
|
|||||||
val, ok := sequence.GetStringAttribute(key)
|
val, ok := sequence.GetStringAttribute(key)
|
||||||
|
|
||||||
if ok {
|
if ok {
|
||||||
parent := taxonomy.Taxon(val)
|
parent, _, err := taxonomy.Taxon(val)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("%s: %s is unkown from the taxonomy (%v)", sequence.Id(), val, err)
|
||||||
|
}
|
||||||
|
|
||||||
taxon := sequence.Taxon(taxonomy)
|
taxon := sequence.Taxon(taxonomy)
|
||||||
return parent != nil && taxon != nil && taxon.IsSubCladeOf(parent)
|
return parent != nil && taxon != nil && taxon.IsSubCladeOf(parent)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1 +1,38 @@
|
|||||||
package obitax
|
package obitax
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/TuftsBCB/io/newick"
|
||||||
|
)
|
||||||
|
|
||||||
|
func (taxonomy *Taxonomy) Newick() string {
|
||||||
|
if taxonomy == nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
iterator := taxonomy.AsTaxonSet().Sort().Iterator()
|
||||||
|
|
||||||
|
nodes := make(map[*string]*newick.Tree, taxonomy.Len())
|
||||||
|
trees := make([]*newick.Tree, 0)
|
||||||
|
|
||||||
|
for iterator.Next() {
|
||||||
|
taxon := iterator.Get()
|
||||||
|
tree := &newick.Tree{Label: taxon.String()}
|
||||||
|
nodes[taxon.Node.id] = tree
|
||||||
|
if parent, ok := nodes[taxon.Parent().Node.id]; ok {
|
||||||
|
parent.Children = append(parent.Children, *tree)
|
||||||
|
} else {
|
||||||
|
trees = append(trees, tree)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
rep := strings.Builder{}
|
||||||
|
|
||||||
|
for _, tree := range trees {
|
||||||
|
rep.WriteString(tree.String())
|
||||||
|
rep.WriteString("\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
return rep.String()
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,11 +1,14 @@
|
|||||||
package obitax
|
package obitax
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"sync"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
|
||||||
var __defaut_taxonomy__ *Taxonomy
|
var __defaut_taxonomy__ *Taxonomy
|
||||||
|
var __defaut_taxonomy_mutex__ sync.Mutex
|
||||||
|
|
||||||
func (taxonomy *Taxonomy) SetAsDefault() {
|
func (taxonomy *Taxonomy) SetAsDefault() {
|
||||||
log.Infof("Set as default taxonomy %s", taxonomy.Name())
|
log.Infof("Set as default taxonomy %s", taxonomy.Name())
|
||||||
@@ -32,14 +35,18 @@ func DefaultTaxonomy() *Taxonomy {
|
|||||||
var err error
|
var err error
|
||||||
if __defaut_taxonomy__ == nil {
|
if __defaut_taxonomy__ == nil {
|
||||||
if obidefault.HasSelectedTaxonomy() {
|
if obidefault.HasSelectedTaxonomy() {
|
||||||
__defaut_taxonomy__, err = LoadTaxonomy(
|
__defaut_taxonomy_mutex__.Lock()
|
||||||
obidefault.SelectedTaxonomy(),
|
defer __defaut_taxonomy_mutex__.Unlock()
|
||||||
!obidefault.AreAlternativeNamesSelected(),
|
if __defaut_taxonomy__ == nil {
|
||||||
)
|
__defaut_taxonomy__, err = LoadTaxonomy(
|
||||||
|
obidefault.SelectedTaxonomy(),
|
||||||
|
!obidefault.AreAlternativeNamesSelected(),
|
||||||
|
)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("Cannot load default taxonomy: %v", err)
|
log.Fatalf("Cannot load default taxonomy: %v", err)
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ package obitax
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
log "github.com/sirupsen/logrus"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// ITaxon represents an iterator for traversing Taxon instances.
|
// ITaxon represents an iterator for traversing Taxon instances.
|
||||||
@@ -195,7 +194,6 @@ func (taxon *Taxon) ISubTaxonomy() *ITaxon {
|
|||||||
|
|
||||||
pushed := true
|
pushed := true
|
||||||
|
|
||||||
log.Warn(parents)
|
|
||||||
for pushed {
|
for pushed {
|
||||||
itaxo := taxo.Iterator()
|
itaxo := taxo.Iterator()
|
||||||
pushed = false
|
pushed = false
|
||||||
@@ -218,9 +216,9 @@ func (taxon *Taxon) ISubTaxonomy() *ITaxon {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (taxonomy *Taxonomy) ISubTaxonomy(taxid string) *ITaxon {
|
func (taxonomy *Taxonomy) ISubTaxonomy(taxid string) *ITaxon {
|
||||||
taxon := taxonomy.Taxon(taxid)
|
taxon, _, err := taxonomy.Taxon(taxid)
|
||||||
|
|
||||||
if taxon == nil {
|
if err != nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -91,7 +91,13 @@ func loadNameTable(reader io.Reader, taxonomy *Taxonomy, onlysn bool) int {
|
|||||||
|
|
||||||
if !onlysn || classname == "scientific name" {
|
if !onlysn || classname == "scientific name" {
|
||||||
n++
|
n++
|
||||||
taxonomy.Taxon(taxid).SetName(name, classname)
|
taxon, _, err := taxonomy.Taxon(taxid)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("%s: is unknown from the taxonomy", taxid)
|
||||||
|
}
|
||||||
|
|
||||||
|
taxon.SetName(name, classname)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -125,7 +131,7 @@ func loadMergedTable(reader io.Reader, taxonomy *Taxonomy) int {
|
|||||||
oldtaxid := strings.TrimSpace(record[0])
|
oldtaxid := strings.TrimSpace(record[0])
|
||||||
newtaxid := strings.TrimSpace(record[1])
|
newtaxid := strings.TrimSpace(record[1])
|
||||||
|
|
||||||
taxonomy.AddAlias(newtaxid, oldtaxid, false)
|
taxonomy.AddAlias(oldtaxid, newtaxid, false)
|
||||||
}
|
}
|
||||||
|
|
||||||
return n
|
return n
|
||||||
@@ -196,7 +202,11 @@ func LoadNCBITaxDump(directory string, onlysn bool) (*Taxonomy, error) {
|
|||||||
n = loadMergedTable(buffered, taxonomy)
|
n = loadMergedTable(buffered, taxonomy)
|
||||||
log.Printf("%d merged taxa read\n", n)
|
log.Printf("%d merged taxa read\n", n)
|
||||||
|
|
||||||
root := taxonomy.Taxon("1")
|
root, _, err := taxonomy.Taxon("1")
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal("cannot find the root taxon (1) in the NCBI tax dump")
|
||||||
|
}
|
||||||
taxonomy.SetRoot(root)
|
taxonomy.SetRoot(root)
|
||||||
|
|
||||||
return taxonomy, nil
|
return taxonomy, nil
|
||||||
|
|||||||
@@ -134,7 +134,12 @@ func LoadNCBITarTaxDump(path string, onlysn bool) (*Taxonomy, error) {
|
|||||||
n = loadMergedTable(buffered, taxonomy)
|
n = loadMergedTable(buffered, taxonomy)
|
||||||
log.Printf("%d merged taxa read\n", n)
|
log.Printf("%d merged taxa read\n", n)
|
||||||
|
|
||||||
root := taxonomy.Taxon("1")
|
root, _, err := taxonomy.Taxon("1")
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal("cannot find the root taxon (1) in the NCBI tax dump")
|
||||||
|
}
|
||||||
|
|
||||||
taxonomy.SetRoot(root)
|
taxonomy.SetRoot(root)
|
||||||
|
|
||||||
return taxonomy, nil
|
return taxonomy, nil
|
||||||
|
|||||||
1
pkg/obitax/newick_write.go
Normal file
1
pkg/obitax/newick_write.go
Normal file
@@ -0,0 +1 @@
|
|||||||
|
package obitax
|
||||||
64
pkg/obitax/string_parser.go
Normal file
64
pkg/obitax/string_parser.go
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
package obitax
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ParseTaxonString parses a string in the format "code:taxid [scientific name]@rank"
|
||||||
|
// and returns the individual components. It handles extra whitespace around components.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - taxonStr: The string to parse in the format "code:taxid [scientific name]@rank"
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
// - code: The taxonomy code
|
||||||
|
// - taxid: The taxon identifier
|
||||||
|
// - scientificName: The scientific name (without brackets)
|
||||||
|
// - rank: The rank
|
||||||
|
// - error: An error if the string format is invalid
|
||||||
|
func ParseTaxonString(taxonStr string) (code, taxid, scientificName, rank string, err error) {
|
||||||
|
// Trim any leading/trailing whitespace from the entire string
|
||||||
|
taxonStr = strings.TrimSpace(taxonStr)
|
||||||
|
|
||||||
|
// Split by '@' to separate rank
|
||||||
|
parts := strings.Split(taxonStr, "@")
|
||||||
|
if len(parts) > 2 {
|
||||||
|
return "", "", "", "", errors.New("invalid format: multiple '@' characters found")
|
||||||
|
}
|
||||||
|
|
||||||
|
mainPart := strings.TrimSpace(parts[0])
|
||||||
|
if len(parts) == 2 {
|
||||||
|
rank = strings.TrimSpace(parts[1])
|
||||||
|
} else {
|
||||||
|
rank = "no rank"
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find scientific name part (enclosed in square brackets)
|
||||||
|
startBracket := strings.Index(mainPart, "[")
|
||||||
|
endBracket := strings.LastIndex(mainPart, "]")
|
||||||
|
|
||||||
|
if startBracket == -1 || endBracket == -1 || startBracket > endBracket {
|
||||||
|
return "", "", "", "", errors.New("invalid format: scientific name must be enclosed in square brackets")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract and clean scientific name
|
||||||
|
scientificName = strings.TrimSpace(mainPart[startBracket+1 : endBracket])
|
||||||
|
|
||||||
|
// Process code:taxid part
|
||||||
|
idPart := strings.TrimSpace(mainPart[:startBracket])
|
||||||
|
idComponents := strings.Split(idPart, ":")
|
||||||
|
|
||||||
|
if len(idComponents) != 2 {
|
||||||
|
return "", "", "", "", errors.New("invalid format: missing taxonomy code separator ':'")
|
||||||
|
}
|
||||||
|
|
||||||
|
code = strings.TrimSpace(idComponents[0])
|
||||||
|
taxid = strings.TrimSpace(idComponents[1])
|
||||||
|
|
||||||
|
if code == "" || taxid == "" || scientificName == "" {
|
||||||
|
return "", "", "", "", errors.New("invalid format: code, taxid and scientific name cannot be empty")
|
||||||
|
}
|
||||||
|
|
||||||
|
return code, taxid, scientificName, rank, nil
|
||||||
|
}
|
||||||
@@ -1,6 +1,7 @@
|
|||||||
package obitax
|
package obitax
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"errors"
|
||||||
"iter"
|
"iter"
|
||||||
"regexp"
|
"regexp"
|
||||||
|
|
||||||
@@ -379,3 +380,29 @@ func (taxon *Taxon) SameAs(other *Taxon) bool {
|
|||||||
|
|
||||||
return taxon.Taxonomy == other.Taxonomy && taxon.Node.id == other.Node.id
|
return taxon.Taxonomy == other.Taxonomy && taxon.Node.id == other.Node.id
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (taxon *Taxon) AddChild(child string, replace bool) (*Taxon, error) {
|
||||||
|
if taxon == nil {
|
||||||
|
return nil, errors.New("nil taxon")
|
||||||
|
}
|
||||||
|
|
||||||
|
code, taxid, scientific_name, rank, err := ParseTaxonString(child)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if taxon.Taxonomy.code != code {
|
||||||
|
return nil, errors.New("taxonomy code mismatch")
|
||||||
|
}
|
||||||
|
|
||||||
|
newTaxon, err := taxon.Taxonomy.AddTaxon(taxid, *taxon.Node.id, rank, false, replace)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
newTaxon.SetName(scientific_name, "scientific name")
|
||||||
|
|
||||||
|
return newTaxon, nil
|
||||||
|
}
|
||||||
|
|||||||
@@ -12,7 +12,6 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
log "github.com/sirupsen/logrus"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// Taxonomy represents a hierarchical classification of taxa.
|
// Taxonomy represents a hierarchical classification of taxa.
|
||||||
@@ -130,27 +129,30 @@ func (taxonomy *Taxonomy) TaxidString(id string) (string, error) {
|
|||||||
// Returns:
|
// Returns:
|
||||||
// - A pointer to the Taxon instance associated with the provided taxid.
|
// - A pointer to the Taxon instance associated with the provided taxid.
|
||||||
// - If the taxid is unknown, the method will log a fatal error.
|
// - If the taxid is unknown, the method will log a fatal error.
|
||||||
func (taxonomy *Taxonomy) Taxon(taxid string) *Taxon {
|
func (taxonomy *Taxonomy) Taxon(taxid string) (*Taxon, bool, error) {
|
||||||
taxonomy = taxonomy.OrDefault(false)
|
taxonomy = taxonomy.OrDefault(false)
|
||||||
if taxonomy == nil {
|
if taxonomy == nil {
|
||||||
return nil
|
return nil, false, errors.New("cannot extract taxon from nil taxonomy")
|
||||||
}
|
}
|
||||||
|
|
||||||
id, err := taxonomy.Id(taxid)
|
id, err := taxonomy.Id(taxid)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("Taxid %s: %v", taxid, err)
|
return nil, false, fmt.Errorf("Taxid %s: %v", taxid, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
taxon := taxonomy.nodes.Get(id)
|
taxon := taxonomy.nodes.Get(id)
|
||||||
|
isAlias := taxon.Node.id != id
|
||||||
|
|
||||||
if taxon == nil {
|
if taxon == nil {
|
||||||
log.Fatalf("Taxid %s is not part of the taxonomy %s",
|
return nil,
|
||||||
taxid,
|
false,
|
||||||
taxonomy.name)
|
fmt.Errorf("Taxid %s is not part of the taxonomy %s",
|
||||||
|
taxid,
|
||||||
|
taxonomy.name)
|
||||||
}
|
}
|
||||||
|
|
||||||
return taxon
|
return taxon, isAlias, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// AsTaxonSet returns the set of taxon nodes contained within the Taxonomy.
|
// AsTaxonSet returns the set of taxon nodes contained within the Taxonomy.
|
||||||
@@ -353,3 +355,63 @@ func (taxonomy *Taxonomy) HasRoot() bool {
|
|||||||
taxonomy = taxonomy.OrDefault(false)
|
taxonomy = taxonomy.OrDefault(false)
|
||||||
return taxonomy != nil && taxonomy.root != nil
|
return taxonomy != nil && taxonomy.root != nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (taxonomy *Taxonomy) InsertPathString(path []string) (*Taxonomy, error) {
|
||||||
|
if len(path) == 0 {
|
||||||
|
return nil, errors.New("path is empty")
|
||||||
|
}
|
||||||
|
|
||||||
|
code, taxid, scientific_name, rank, err := ParseTaxonString(path[0])
|
||||||
|
|
||||||
|
if taxonomy == nil {
|
||||||
|
taxonomy = NewTaxonomy(code, code, obiutils.AsciiAlphaNumSet)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if taxonomy.Len() == 0 {
|
||||||
|
|
||||||
|
if code != taxonomy.code {
|
||||||
|
return nil, fmt.Errorf("cannot insert taxon %s into taxonomy %s with code %s",
|
||||||
|
path[0], taxonomy.name, taxonomy.code)
|
||||||
|
}
|
||||||
|
|
||||||
|
root, err := taxonomy.AddTaxon(taxid, taxid, rank, true, true)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
root.SetName(scientific_name, "scientificName")
|
||||||
|
}
|
||||||
|
|
||||||
|
var current *Taxon
|
||||||
|
current, _, err = taxonomy.Taxon(taxid)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if !current.IsRoot() {
|
||||||
|
return nil, errors.New("path does not start with a root node")
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, id := range path[1:] {
|
||||||
|
taxon, _, err := taxonomy.Taxon(id)
|
||||||
|
if err == nil {
|
||||||
|
if !current.SameAs(taxon.Parent()) {
|
||||||
|
return nil, errors.New("path is not consistent with the taxonomy, parent mismatch")
|
||||||
|
}
|
||||||
|
current = taxon
|
||||||
|
} else {
|
||||||
|
current, err = current.AddChild(id, false)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return taxonomy, nil
|
||||||
|
}
|
||||||
|
|||||||
@@ -151,7 +151,8 @@ func (set *TaxonSet) Alias(id *string, taxon *Taxon) {
|
|||||||
if original == nil {
|
if original == nil {
|
||||||
log.Fatalf("Original taxon %v is not part of taxon set", id)
|
log.Fatalf("Original taxon %v is not part of taxon set", id)
|
||||||
}
|
}
|
||||||
set.set[id] = taxon.Node
|
|
||||||
|
set.set[id] = original.Node
|
||||||
set.nalias++
|
set.nalias++
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -196,3 +197,30 @@ func (set *TaxonSet) Contains(id *string) bool {
|
|||||||
node := set.Get(id)
|
node := set.Get(id)
|
||||||
return node != nil
|
return node != nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (set *TaxonSet) Sort() *TaxonSlice {
|
||||||
|
if set == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
taxonomy := set.Taxonomy()
|
||||||
|
taxa := taxonomy.NewTaxonSlice(0, set.Len())
|
||||||
|
parent := make(map[*TaxNode]bool, set.Len())
|
||||||
|
|
||||||
|
pushed := true
|
||||||
|
|
||||||
|
for pushed {
|
||||||
|
pushed = false
|
||||||
|
for _, node := range set.set {
|
||||||
|
if !parent[node] && (parent[set.Get(node.parent).Node] ||
|
||||||
|
!set.Contains(node.parent) ||
|
||||||
|
node == taxonomy.Root().Node) {
|
||||||
|
pushed = true
|
||||||
|
taxa.slice = append(taxa.slice, node)
|
||||||
|
parent[node] = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return taxa
|
||||||
|
}
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ package obiconvert
|
|||||||
import (
|
import (
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
@@ -71,16 +72,16 @@ func InputOptionSet(options *getoptions.GetOpt) {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func OutputModeOptionSet(options *getoptions.GetOpt) {
|
func OutputModeOptionSet(options *getoptions.GetOpt, compressed bool) {
|
||||||
options.BoolVar(&__no_progress_bar__, "no-progressbar", false,
|
options.BoolVar(&__no_progress_bar__, "no-progressbar", false,
|
||||||
options.Description("Disable the progress bar printing"))
|
options.Description("Disable the progress bar printing"))
|
||||||
|
|
||||||
options.BoolVar(&__compressed__, "compress", false,
|
if compressed {
|
||||||
options.Alias("Z"),
|
options.BoolVar(obidefault.CompressedPtr(), "compressed", obidefault.CompressOutput(),
|
||||||
options.Description("Output is compressed"))
|
options.Alias("Z"),
|
||||||
|
options.Description("Compress all the result using gzip"))
|
||||||
|
|
||||||
options.BoolVar(&__skip_empty__, "skip-empty", __skip_empty__,
|
}
|
||||||
options.Description("Sequences of length equal to zero are suppressed from the output"))
|
|
||||||
|
|
||||||
options.StringVar(&__output_file_name__, "out", __output_file_name__,
|
options.StringVar(&__output_file_name__, "out", __output_file_name__,
|
||||||
options.Alias("o"),
|
options.Alias("o"),
|
||||||
@@ -90,6 +91,9 @@ func OutputModeOptionSet(options *getoptions.GetOpt) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func OutputOptionSet(options *getoptions.GetOpt) {
|
func OutputOptionSet(options *getoptions.GetOpt) {
|
||||||
|
options.BoolVar(&__skip_empty__, "skip-empty", __skip_empty__,
|
||||||
|
options.Description("Sequences of length equal to zero are suppressed from the output"))
|
||||||
|
|
||||||
options.BoolVar(&__output_in_fasta__, "fasta-output", false,
|
options.BoolVar(&__output_in_fasta__, "fasta-output", false,
|
||||||
options.Description("Write sequence in fasta format (default if no quality data available)."))
|
options.Description("Write sequence in fasta format (default if no quality data available)."))
|
||||||
|
|
||||||
@@ -105,7 +109,7 @@ func OutputOptionSet(options *getoptions.GetOpt) {
|
|||||||
options.Alias("O"),
|
options.Alias("O"),
|
||||||
options.Description("output FASTA/FASTQ title line annotations follow OBI format."))
|
options.Description("output FASTA/FASTQ title line annotations follow OBI format."))
|
||||||
|
|
||||||
OutputModeOptionSet(options)
|
OutputModeOptionSet(options, true)
|
||||||
}
|
}
|
||||||
|
|
||||||
func PairedFilesOptionSet(options *getoptions.GetOpt) {
|
func PairedFilesOptionSet(options *getoptions.GetOpt) {
|
||||||
@@ -159,10 +163,6 @@ func CLIOutputFormat() string {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func CLICompressed() bool {
|
|
||||||
return __compressed__
|
|
||||||
}
|
|
||||||
|
|
||||||
func CLISkipEmpty() bool {
|
func CLISkipEmpty() bool {
|
||||||
return __skip_empty__
|
return __skip_empty__
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -55,6 +55,8 @@ func ExpandListOfFiles(check_ext bool, filenames ...string) ([]string, error) {
|
|||||||
strings.HasSuffix(path, "fasta.gz") ||
|
strings.HasSuffix(path, "fasta.gz") ||
|
||||||
strings.HasSuffix(path, "fastq") ||
|
strings.HasSuffix(path, "fastq") ||
|
||||||
strings.HasSuffix(path, "fastq.gz") ||
|
strings.HasSuffix(path, "fastq.gz") ||
|
||||||
|
strings.HasSuffix(path, "fq") ||
|
||||||
|
strings.HasSuffix(path, "fq.gz") ||
|
||||||
strings.HasSuffix(path, "seq") ||
|
strings.HasSuffix(path, "seq") ||
|
||||||
strings.HasSuffix(path, "seq.gz") ||
|
strings.HasSuffix(path, "seq.gz") ||
|
||||||
strings.HasSuffix(path, "gb") ||
|
strings.HasSuffix(path, "gb") ||
|
||||||
@@ -140,7 +142,7 @@ func CLIReadBioSequences(filenames ...string) (obiiter.IBioSequence, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
switch CLIInputFormat() {
|
switch CLIInputFormat() {
|
||||||
case "fastq":
|
case "fastq", "fq":
|
||||||
reader = obiformats.ReadFastqFromFile
|
reader = obiformats.ReadFastqFromFile
|
||||||
case "fasta":
|
case "fasta":
|
||||||
reader = obiformats.ReadFastaFromFile
|
reader = obiformats.ReadFastaFromFile
|
||||||
@@ -168,22 +170,25 @@ func CLIReadBioSequences(filenames ...string) (obiiter.IBioSequence, error) {
|
|||||||
opts...,
|
opts...,
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
iterator, err = reader(list_of_files[0], opts...)
|
if len(list_of_files) > 0 {
|
||||||
|
iterator, err = reader(list_of_files[0], opts...)
|
||||||
if err != nil {
|
|
||||||
return obiiter.NilIBioSequence, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if CLIPairedFileName() != "" {
|
|
||||||
ip, err := reader(CLIPairedFileName(), opts...)
|
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return obiiter.NilIBioSequence, err
|
return obiiter.NilIBioSequence, err
|
||||||
}
|
}
|
||||||
|
|
||||||
iterator = iterator.PairTo(ip)
|
if CLIPairedFileName() != "" {
|
||||||
}
|
ip, err := reader(CLIPairedFileName(), opts...)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return obiiter.NilIBioSequence, err
|
||||||
|
}
|
||||||
|
|
||||||
|
iterator = iterator.PairTo(ip)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
iterator = obiiter.NilIBioSequence
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -58,7 +58,7 @@ func CLIWriteBioSequences(iterator obiiter.IBioSequence,
|
|||||||
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers))
|
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers))
|
||||||
opts = append(opts, obiformats.OptionsBatchSize(obidefault.BatchSize()))
|
opts = append(opts, obiformats.OptionsBatchSize(obidefault.BatchSize()))
|
||||||
|
|
||||||
opts = append(opts, obiformats.OptionsCompressed(CLICompressed()))
|
opts = append(opts, obiformats.OptionsCompressed(obidefault.CompressOutput()))
|
||||||
|
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
|
|||||||
@@ -33,6 +33,7 @@ func CLIWriteSequenceCSV(iterator obiiter.IBioSequence,
|
|||||||
CSVDefinition(CLIPrintDefinition()),
|
CSVDefinition(CLIPrintDefinition()),
|
||||||
CSVKeys(CLIToBeKeptAttributes()),
|
CSVKeys(CLIToBeKeptAttributes()),
|
||||||
CSVSequence(CLIPrintSequence()),
|
CSVSequence(CLIPrintSequence()),
|
||||||
|
CSVQuality(CLIPrintQuality()),
|
||||||
CSVAutoColumn(CLIAutoColumns()),
|
CSVAutoColumn(CLIAutoColumns()),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -66,7 +66,7 @@ func CSVOptionSet(options *getoptions.GetOpt) {
|
|||||||
|
|
||||||
func OptionSet(options *getoptions.GetOpt) {
|
func OptionSet(options *getoptions.GetOpt) {
|
||||||
obiconvert.InputOptionSet(options)
|
obiconvert.InputOptionSet(options)
|
||||||
obiconvert.OutputModeOptionSet(options)
|
obiconvert.OutputModeOptionSet(options, true)
|
||||||
obioptions.LoadTaxonomyOptionSet(options, false, false)
|
obioptions.LoadTaxonomyOptionSet(options, false, false)
|
||||||
CSVOptionSet(options)
|
CSVOptionSet(options)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ func CSVSequenceHeader(opt Options) obiitercsv.CSVHeader {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if opt.CSVQuality() {
|
if opt.CSVQuality() {
|
||||||
record.AppendField("quality")
|
record.AppendField("qualities")
|
||||||
}
|
}
|
||||||
|
|
||||||
return record
|
return record
|
||||||
@@ -100,9 +100,9 @@ func CSVBatchFromSequences(batch obiiter.BioSequenceBatch, opt Options) obiiterc
|
|||||||
for j := 0; j < l; j++ {
|
for j := 0; j < l; j++ {
|
||||||
ascii[j] = uint8(q[j]) + uint8(quality_shift)
|
ascii[j] = uint8(q[j]) + uint8(quality_shift)
|
||||||
}
|
}
|
||||||
record["quality"] = string(ascii)
|
record["qualities"] = string(ascii)
|
||||||
} else {
|
} else {
|
||||||
record["quality"] = opt.CSVNAValue()
|
record["qualities"] = opt.CSVNAValue()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -33,7 +33,7 @@ func CLIDistributeSequence(sequences obiiter.IBioSequence) {
|
|||||||
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers),
|
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers),
|
||||||
obiformats.OptionsBatchSize(obidefault.BatchSize()),
|
obiformats.OptionsBatchSize(obidefault.BatchSize()),
|
||||||
obiformats.OptionsAppendFile(CLIAppendSequences()),
|
obiformats.OptionsAppendFile(CLIAppendSequences()),
|
||||||
obiformats.OptionsCompressed(obiconvert.CLICompressed()))
|
obiformats.OptionsCompressed(obidefault.CompressOutput()))
|
||||||
|
|
||||||
var formater obiformats.SequenceBatchWriterToFile
|
var formater obiformats.SequenceBatchWriterToFile
|
||||||
|
|
||||||
|
|||||||
@@ -248,15 +248,15 @@ func CLIRestrictTaxonomyPredicate() obiseq.SequencePredicate {
|
|||||||
if len(_BelongTaxa) > 0 {
|
if len(_BelongTaxa) > 0 {
|
||||||
taxonomy := obitax.DefaultTaxonomy()
|
taxonomy := obitax.DefaultTaxonomy()
|
||||||
|
|
||||||
taxon := taxonomy.Taxon(_BelongTaxa[0])
|
taxon, _, err := taxonomy.Taxon(_BelongTaxa[0])
|
||||||
if taxon == nil {
|
if err != nil {
|
||||||
p = obiseq.IsSubCladeOfSlot(taxonomy, _BelongTaxa[0])
|
p = obiseq.IsSubCladeOfSlot(taxonomy, _BelongTaxa[0])
|
||||||
} else {
|
} else {
|
||||||
p = obiseq.IsSubCladeOf(taxonomy, taxon)
|
p = obiseq.IsSubCladeOf(taxonomy, taxon)
|
||||||
}
|
}
|
||||||
for _, staxid := range _BelongTaxa[1:] {
|
for _, staxid := range _BelongTaxa[1:] {
|
||||||
taxon := taxonomy.Taxon(staxid)
|
taxon, _, err := taxonomy.Taxon(staxid)
|
||||||
if taxon == nil {
|
if err != nil {
|
||||||
p2 = obiseq.IsSubCladeOfSlot(taxonomy, staxid)
|
p2 = obiseq.IsSubCladeOfSlot(taxonomy, staxid)
|
||||||
} else {
|
} else {
|
||||||
p2 = obiseq.IsSubCladeOf(taxonomy, taxon)
|
p2 = obiseq.IsSubCladeOf(taxonomy, taxon)
|
||||||
@@ -278,16 +278,16 @@ func CLIAvoidTaxonomyPredicate() obiseq.SequencePredicate {
|
|||||||
if len(_NotBelongTaxa) > 0 {
|
if len(_NotBelongTaxa) > 0 {
|
||||||
taxonomy := obitax.DefaultTaxonomy()
|
taxonomy := obitax.DefaultTaxonomy()
|
||||||
|
|
||||||
taxon := taxonomy.Taxon(_NotBelongTaxa[0])
|
taxon, _, err := taxonomy.Taxon(_NotBelongTaxa[0])
|
||||||
if taxon == nil {
|
if err != nil {
|
||||||
p = obiseq.IsSubCladeOfSlot(taxonomy, _NotBelongTaxa[0])
|
p = obiseq.IsSubCladeOfSlot(taxonomy, _NotBelongTaxa[0])
|
||||||
} else {
|
} else {
|
||||||
p = obiseq.IsSubCladeOf(taxonomy, taxon)
|
p = obiseq.IsSubCladeOf(taxonomy, taxon)
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, taxid := range _NotBelongTaxa[1:] {
|
for _, taxid := range _NotBelongTaxa[1:] {
|
||||||
taxon := taxonomy.Taxon(taxid)
|
taxon, _, err := taxonomy.Taxon(taxid)
|
||||||
if taxon == nil {
|
if err != nil {
|
||||||
p2 = obiseq.IsSubCladeOfSlot(taxonomy, taxid)
|
p2 = obiseq.IsSubCladeOfSlot(taxonomy, taxid)
|
||||||
} else {
|
} else {
|
||||||
p2 = obiseq.IsSubCladeOf(taxonomy, taxon)
|
p2 = obiseq.IsSubCladeOf(taxonomy, taxon)
|
||||||
|
|||||||
520
pkg/obitools/obimicroasm/microasm.go
Normal file
520
pkg/obitools/obimicroasm/microasm.go
Normal file
@@ -0,0 +1,520 @@
|
|||||||
|
package obimicroasm
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path"
|
||||||
|
"slices"
|
||||||
|
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiapat"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiformats"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obikmer"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obisuffix"
|
||||||
|
)
|
||||||
|
|
||||||
|
func BuildFilterOnPatternReadPairWorker(
|
||||||
|
forward, reverse string,
|
||||||
|
errormax int,
|
||||||
|
cutReads bool,
|
||||||
|
) obiseq.SeqWorker {
|
||||||
|
forwardPatternDir, err := obiapat.MakeApatPattern(forward, errormax, false)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Cannot compile forward primer %s : %v", forward, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
reverse_rev := obiseq.NewBioSequence("fp", []byte(reverse), "").ReverseComplement(true).String()
|
||||||
|
reveresePatternRev, err := obiapat.MakeApatPattern(reverse_rev, errormax, false)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Cannot compile reverse complement reverse primer %s : %v", reverse, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
matchRead := func(sequence *obiseq.BioSequence) *obiseq.BioSequence {
|
||||||
|
var aseq obiapat.ApatSequence
|
||||||
|
var err error
|
||||||
|
var read, match *obiseq.BioSequence
|
||||||
|
|
||||||
|
aseq, err = obiapat.MakeApatSequence(sequence, false)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Cannot prepare apat sequence from %s : %v", sequence.Id(), err)
|
||||||
|
}
|
||||||
|
|
||||||
|
start, end, nerr, matched := forwardPatternDir.BestMatch(aseq, 0, aseq.Len())
|
||||||
|
|
||||||
|
if matched {
|
||||||
|
read = sequence
|
||||||
|
|
||||||
|
if cutReads {
|
||||||
|
read, err = sequence.Subsequence(start, sequence.Len(), false)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Cannot cut, on forward, forward read %s [%d,%d] : %v",
|
||||||
|
sequence.Id(), start, sequence.Len(), err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
read.SetAttribute("forward_primer", forward)
|
||||||
|
match, _ = sequence.Subsequence(start, end, false)
|
||||||
|
read.SetAttribute("forward_match", match.String())
|
||||||
|
read.SetAttribute("forward_error", nerr)
|
||||||
|
|
||||||
|
aseq, err = obiapat.MakeApatSequence(read, false, aseq)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Cannot prepare apat sequence from %s : %v", sequence.Id(), err)
|
||||||
|
}
|
||||||
|
|
||||||
|
start, end, nerr, matched = reveresePatternRev.BestMatch(aseq, 0, aseq.Len())
|
||||||
|
|
||||||
|
if matched {
|
||||||
|
|
||||||
|
frread := read
|
||||||
|
|
||||||
|
if cutReads {
|
||||||
|
frread, err = read.Subsequence(0, end, false)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Cannot xxx cut, on reverse, forward read %s [%d,%d] : %v",
|
||||||
|
sequence.Id(), start, read.Len(), err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
frread.SetAttribute("reverse_primer", reverse)
|
||||||
|
match, _ = read.Subsequence(start, end, false)
|
||||||
|
frread.SetAttribute("reverse_match", match.ReverseComplement(true).String())
|
||||||
|
frread.SetAttribute("reverse_error", nerr)
|
||||||
|
|
||||||
|
read = frread
|
||||||
|
// log.Warnf("Forward-Reverse primer matched on %s : %d\n%s", read.Id(), read.Len(),
|
||||||
|
// obiformats.FormatFasta(read, obiformats.FormatFastSeqJsonHeader))
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
start, end, nerr, matched = reveresePatternRev.BestMatch(aseq, 0, aseq.Len())
|
||||||
|
|
||||||
|
if matched {
|
||||||
|
read = sequence
|
||||||
|
if cutReads {
|
||||||
|
read, err = sequence.Subsequence(0, end, false)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Cannot yyy cut, on reverse, forward read %s [%d,%d] : %v",
|
||||||
|
sequence.Id(), 0, end, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
read.SetAttribute("reverse_primer", reverse)
|
||||||
|
match, _ = read.Subsequence(start, end, false)
|
||||||
|
read.SetAttribute("reverse_match", match.ReverseComplement(true).String())
|
||||||
|
read.SetAttribute("reverse_error", nerr)
|
||||||
|
} else {
|
||||||
|
read = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return read
|
||||||
|
}
|
||||||
|
|
||||||
|
w := func(sequence *obiseq.BioSequence) (result obiseq.BioSequenceSlice, err error) {
|
||||||
|
result = obiseq.MakeBioSequenceSlice()
|
||||||
|
|
||||||
|
paired := sequence.PairedWith()
|
||||||
|
sequence.UnPair()
|
||||||
|
|
||||||
|
read := matchRead(sequence)
|
||||||
|
|
||||||
|
if read == nil {
|
||||||
|
sequence = sequence.ReverseComplement(true)
|
||||||
|
read = matchRead(sequence)
|
||||||
|
}
|
||||||
|
|
||||||
|
if read != nil {
|
||||||
|
result = append(result, read)
|
||||||
|
}
|
||||||
|
|
||||||
|
if paired != nil {
|
||||||
|
read = matchRead(paired)
|
||||||
|
|
||||||
|
if read == nil {
|
||||||
|
read = matchRead(paired.ReverseComplement(true))
|
||||||
|
}
|
||||||
|
|
||||||
|
if read != nil {
|
||||||
|
result = append(result, read)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
return w
|
||||||
|
}
|
||||||
|
|
||||||
|
func ExtractOnPatterns(iter obiiter.IBioSequence,
|
||||||
|
forward, reverse string,
|
||||||
|
errormax int,
|
||||||
|
cutReads bool,
|
||||||
|
) obiseq.BioSequenceSlice {
|
||||||
|
|
||||||
|
matched := iter.MakeIWorker(
|
||||||
|
BuildFilterOnPatternReadPairWorker(forward, reverse, errormax, cutReads),
|
||||||
|
false,
|
||||||
|
)
|
||||||
|
|
||||||
|
rep := obiseq.MakeBioSequenceSlice()
|
||||||
|
|
||||||
|
for matched.Next() {
|
||||||
|
frgs := matched.Get()
|
||||||
|
rep = append(rep, frgs.Slice()...)
|
||||||
|
}
|
||||||
|
|
||||||
|
return rep
|
||||||
|
}
|
||||||
|
|
||||||
|
func BuildPCRProduct(seqs obiseq.BioSequenceSlice,
|
||||||
|
consensus_id string,
|
||||||
|
kmer_size int,
|
||||||
|
forward, reverse string,
|
||||||
|
backtrack bool,
|
||||||
|
save_graph bool, dirname string) (*obiseq.BioSequence, error) {
|
||||||
|
|
||||||
|
from := obiseq.NewBioSequence("forward", []byte(forward), "")
|
||||||
|
to := obiseq.NewBioSequence("reverse", []byte(CLIReversePrimer()), "").ReverseComplement(true)
|
||||||
|
|
||||||
|
if backtrack {
|
||||||
|
from, to = to, from
|
||||||
|
}
|
||||||
|
|
||||||
|
if seqs.Len() == 0 {
|
||||||
|
return nil, fmt.Errorf("no sequence provided")
|
||||||
|
}
|
||||||
|
|
||||||
|
if save_graph {
|
||||||
|
if dirname == "" {
|
||||||
|
dirname = "."
|
||||||
|
}
|
||||||
|
|
||||||
|
if stat, err := os.Stat(dirname); err != nil || !stat.IsDir() {
|
||||||
|
// path does not exist or is not directory
|
||||||
|
os.RemoveAll(dirname)
|
||||||
|
err := os.Mkdir(dirname, 0755)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Panicf("Cannot create directory %s for saving graphs", dirname)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fasta, err := os.Create(path.Join(dirname, fmt.Sprintf("%s_consensus.fasta", consensus_id)))
|
||||||
|
|
||||||
|
if err == nil {
|
||||||
|
defer fasta.Close()
|
||||||
|
fasta.Write(obiformats.FormatFastaBatch(obiiter.MakeBioSequenceBatch(
|
||||||
|
fmt.Sprintf("%s_consensus", consensus_id),
|
||||||
|
0,
|
||||||
|
seqs,
|
||||||
|
),
|
||||||
|
obiformats.FormatFastSeqJsonHeader, false).Bytes())
|
||||||
|
fasta.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Debugf("Number of reads : %d\n", len(seqs))
|
||||||
|
|
||||||
|
if kmer_size < 0 {
|
||||||
|
longest := make([]int, len(seqs))
|
||||||
|
|
||||||
|
for i, seq := range seqs {
|
||||||
|
s := obiseq.BioSequenceSlice{seq}
|
||||||
|
sa := obisuffix.BuildSuffixArray(&s)
|
||||||
|
longest[i] = slices.Max(sa.CommonSuffix())
|
||||||
|
}
|
||||||
|
|
||||||
|
// spectrum := map[int]int{}
|
||||||
|
// for _, s := range longest {
|
||||||
|
// spectrum[s]++
|
||||||
|
// }
|
||||||
|
|
||||||
|
// log.Warnf("spectum kmer size : %v", spectrum)
|
||||||
|
|
||||||
|
kmer_size = slices.Max(longest) + 1
|
||||||
|
log.Infof("estimated kmer size : %d", kmer_size)
|
||||||
|
}
|
||||||
|
|
||||||
|
var graph *obikmer.DeBruijnGraph
|
||||||
|
|
||||||
|
var hp []uint64
|
||||||
|
var err error
|
||||||
|
var starts []uint64
|
||||||
|
var stops []uint64
|
||||||
|
|
||||||
|
for {
|
||||||
|
graph = obikmer.MakeDeBruijnGraph(kmer_size)
|
||||||
|
|
||||||
|
for _, s := range seqs {
|
||||||
|
graph.Push(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !backtrack {
|
||||||
|
starts = graph.Search(from, CLIAllowedMismatch())
|
||||||
|
stops = graph.BackSearch(to, CLIAllowedMismatch())
|
||||||
|
} else {
|
||||||
|
starts = graph.BackSearch(from, CLIAllowedMismatch())
|
||||||
|
stops = graph.Search(to, CLIAllowedMismatch())
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Infof("Found %d starts", len(starts))
|
||||||
|
pweight := map[int]int{}
|
||||||
|
for _, s := range starts {
|
||||||
|
w := graph.Weight(s)
|
||||||
|
pweight[w]++
|
||||||
|
log.Warnf("Starts : %s (%d)\n", graph.DecodeNode(s), w)
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Infof("Found %d stops", len(stops))
|
||||||
|
for _, s := range stops {
|
||||||
|
w := graph.Weight(s)
|
||||||
|
pweight[w]++
|
||||||
|
log.Warnf("Stop : %s (%d)\n", graph.DecodeNode(s), w)
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Infof("Weight spectrum : %v", pweight)
|
||||||
|
|
||||||
|
wmax := 0
|
||||||
|
sw := 0
|
||||||
|
for w := range pweight {
|
||||||
|
sw += w
|
||||||
|
if w > wmax {
|
||||||
|
wmax = w
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
graph.FilterMinWeight(int(sw / len(pweight)))
|
||||||
|
graph.FilterMaxWeight(int(wmax * 2))
|
||||||
|
|
||||||
|
log.Infof("Minimum coverage : %d", int(sw/len(pweight)))
|
||||||
|
log.Infof("Maximum coverage : %d", int(wmax*2))
|
||||||
|
|
||||||
|
if !graph.HasCycleInDegree() {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
kmer_size++
|
||||||
|
|
||||||
|
if kmer_size > 31 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
SetKmerSize(kmer_size)
|
||||||
|
log.Warnf("Cycle detected, increasing kmer size to %d\n", kmer_size)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !backtrack {
|
||||||
|
starts = graph.Search(from, CLIAllowedMismatch())
|
||||||
|
stops = graph.BackSearch(to, CLIAllowedMismatch())
|
||||||
|
} else {
|
||||||
|
starts = graph.BackSearch(from, CLIAllowedMismatch())
|
||||||
|
stops = graph.Search(to, CLIAllowedMismatch())
|
||||||
|
}
|
||||||
|
|
||||||
|
hp, err = graph.HaviestPath(starts, stops, backtrack)
|
||||||
|
|
||||||
|
log.Debugf("Graph size : %d\n", graph.Len())
|
||||||
|
|
||||||
|
maxw := graph.MaxWeight()
|
||||||
|
modew := graph.WeightMode()
|
||||||
|
meanw := graph.WeightMean()
|
||||||
|
specw := graph.WeightSpectrum()
|
||||||
|
kmer := graph.KmerSize()
|
||||||
|
|
||||||
|
log.Warnf("Weigh mode: %d Weigth mean : %4.1f Weigth max : %d, kmer = %d", modew, meanw, maxw, kmer)
|
||||||
|
log.Warn(specw)
|
||||||
|
|
||||||
|
if save_graph {
|
||||||
|
|
||||||
|
file, err := os.Create(path.Join(dirname,
|
||||||
|
fmt.Sprintf("%s_consensus.gml", consensus_id)))
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println(err)
|
||||||
|
} else {
|
||||||
|
file.WriteString(graph.Gml())
|
||||||
|
file.Close()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if err == nil {
|
||||||
|
s := graph.DecodePath(hp)
|
||||||
|
|
||||||
|
seq := obiseq.NewBioSequence(consensus_id, []byte(s), "")
|
||||||
|
|
||||||
|
total_kmer := graph.Len()
|
||||||
|
sumCount := 0
|
||||||
|
|
||||||
|
if seq != nil {
|
||||||
|
for _, s := range seqs {
|
||||||
|
sumCount += s.Count()
|
||||||
|
}
|
||||||
|
seq.SetAttribute("obiconsensus_consensus", true)
|
||||||
|
seq.SetAttribute("obiconsensus_weight", sumCount)
|
||||||
|
seq.SetAttribute("obiconsensus_seq_length", seq.Len())
|
||||||
|
seq.SetAttribute("obiconsensus_kmer_size", kmer_size)
|
||||||
|
seq.SetAttribute("obiconsensus_kmer_max_occur", graph.MaxWeight())
|
||||||
|
seq.SetAttribute("obiconsensus_filtered_graph_size", graph.Len())
|
||||||
|
seq.SetAttribute("obiconsensus_full_graph_size", total_kmer)
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Warnf("Consensus sequence : \n%s", obiformats.FormatFasta(seq, obiformats.FormatFastSeqJsonHeader))
|
||||||
|
|
||||||
|
return seq, nil
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func CLIAssemblePCR() *obiseq.BioSequence {
|
||||||
|
|
||||||
|
pairs, err := CLIPairedSequence()
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Cannot open file (%v)", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
matched := ExtractOnPatterns(pairs,
|
||||||
|
CLIForwardPrimer(),
|
||||||
|
CLIReversePrimer(),
|
||||||
|
CLIAllowedMismatch(),
|
||||||
|
true,
|
||||||
|
)
|
||||||
|
|
||||||
|
seq, err := BuildPCRProduct(
|
||||||
|
matched,
|
||||||
|
CLIGraphFilesDirectory(),
|
||||||
|
CLIKmerSize(),
|
||||||
|
CLIForwardPrimer(),
|
||||||
|
CLIReversePrimer(),
|
||||||
|
false,
|
||||||
|
CLISaveGraphToFiles(),
|
||||||
|
CLIGraphFilesDirectory())
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Cannot build the consensus sequence : %v", err)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
forwardPatternDir, err := obiapat.MakeApatPattern(
|
||||||
|
CLIForwardPrimer(),
|
||||||
|
CLIAllowedMismatch(),
|
||||||
|
false)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Cannot compile forward primer %s : %v", CLIForwardPrimer(), err)
|
||||||
|
}
|
||||||
|
|
||||||
|
reverse_rev := obiseq.NewBioSequence("fp", []byte(CLIReversePrimer()), "").ReverseComplement(true).String()
|
||||||
|
reveresePatternRev, err := obiapat.MakeApatPattern(reverse_rev, CLIAllowedMismatch(), false)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Cannot compile reverse complement reverse primer %s : %v", CLIReversePrimer(), err)
|
||||||
|
}
|
||||||
|
|
||||||
|
aseq, err := obiapat.MakeApatSequence(seq, false)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Cannot build apat sequence: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
fstart, fend, fnerr, hasfw := forwardPatternDir.BestMatch(aseq, 0, aseq.Len())
|
||||||
|
rstart, rend, rnerr, hasrev := reveresePatternRev.BestMatch(aseq, 0, aseq.Len())
|
||||||
|
|
||||||
|
for hasfw && !hasrev {
|
||||||
|
var rseq *obiseq.BioSequence
|
||||||
|
rseq, err = BuildPCRProduct(
|
||||||
|
matched,
|
||||||
|
CLIGraphFilesDirectory(),
|
||||||
|
CLIKmerSize(),
|
||||||
|
CLIForwardPrimer(),
|
||||||
|
CLIReversePrimer(),
|
||||||
|
true,
|
||||||
|
CLISaveGraphToFiles(),
|
||||||
|
CLIGraphFilesDirectory())
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Cannot build Reverse PCR sequence: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
kmerSize, _ := seq.GetIntAttribute("obiconsensus_kmer_size")
|
||||||
|
fp, _ := seq.Subsequence(seq.Len()-kmerSize, seq.Len(), false)
|
||||||
|
rp, _ := rseq.Subsequence(0, kmerSize, false)
|
||||||
|
rp = rp.ReverseComplement(true)
|
||||||
|
|
||||||
|
pairs, err := CLIPairedSequence()
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Cannot open file (%v)", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
nmatched := ExtractOnPatterns(pairs,
|
||||||
|
fp.String(),
|
||||||
|
rp.String(),
|
||||||
|
CLIAllowedMismatch(),
|
||||||
|
true,
|
||||||
|
)
|
||||||
|
|
||||||
|
in := map[string]bool{}
|
||||||
|
|
||||||
|
for _, s := range matched {
|
||||||
|
in[s.String()] = true
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, s := range nmatched {
|
||||||
|
if !in[s.String()] {
|
||||||
|
matched = append(matched, s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
seq, err = BuildPCRProduct(
|
||||||
|
matched,
|
||||||
|
CLIGraphFilesDirectory(),
|
||||||
|
CLIKmerSize(),
|
||||||
|
CLIForwardPrimer(),
|
||||||
|
CLIReversePrimer(),
|
||||||
|
false,
|
||||||
|
CLISaveGraphToFiles(),
|
||||||
|
CLIGraphFilesDirectory())
|
||||||
|
|
||||||
|
aseq, err := obiapat.MakeApatSequence(seq, false)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Cannot build apat sequence: %v", err)
|
||||||
|
}
|
||||||
|
fstart, fend, fnerr, hasfw = forwardPatternDir.BestMatch(aseq, 0, aseq.Len())
|
||||||
|
rstart, rend, rnerr, hasrev = reveresePatternRev.BestMatch(aseq, 0, aseq.Len())
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
marker, _ := seq.Subsequence(fstart, rend, false)
|
||||||
|
|
||||||
|
marker.SetAttribute("forward_primer", CLIForwardPrimer())
|
||||||
|
match, _ := seq.Subsequence(fstart, fend, false)
|
||||||
|
marker.SetAttribute("forward_match", match.String())
|
||||||
|
marker.SetAttribute("forward_error", fnerr)
|
||||||
|
|
||||||
|
marker.SetAttribute("reverse_primer", CLIReversePrimer())
|
||||||
|
match, _ = seq.Subsequence(rstart, rend, false)
|
||||||
|
marker.SetAttribute("reverse_match", match.ReverseComplement(true).String())
|
||||||
|
marker.SetAttribute("reverse_error", rnerr)
|
||||||
|
|
||||||
|
return marker
|
||||||
|
}
|
||||||
139
pkg/obitools/obimicroasm/options.go
Normal file
139
pkg/obitools/obimicroasm/options.go
Normal file
@@ -0,0 +1,139 @@
|
|||||||
|
package obimicroasm
|
||||||
|
|
||||||
|
import (
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiapat"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||||
|
"github.com/DavidGamba/go-getoptions"
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
)
|
||||||
|
|
||||||
|
var _ForwardFile = ""
|
||||||
|
var _ReverseFile = ""
|
||||||
|
var _ForwardPrimer string
|
||||||
|
var _ReversePrimer string
|
||||||
|
var _AllowedMismatch = 0
|
||||||
|
var _kmerSize = -1
|
||||||
|
|
||||||
|
var _saveGraph = "__@@NOSAVE@@__"
|
||||||
|
|
||||||
|
func MicroAsmOptionSet(options *getoptions.GetOpt) {
|
||||||
|
options.StringVar(&_ForwardFile, "forward-reads", "",
|
||||||
|
options.Alias("F"),
|
||||||
|
options.ArgName("FILENAME_F"),
|
||||||
|
options.Required("You must provide at a forward file"),
|
||||||
|
options.Description("The file names containing the forward reads"))
|
||||||
|
options.StringVar(&_ReverseFile, "reverse-reads", "",
|
||||||
|
options.Alias("R"),
|
||||||
|
options.ArgName("FILENAME_R"),
|
||||||
|
options.Required("You must provide a reverse file"),
|
||||||
|
options.Description("The file names containing the reverse reads"))
|
||||||
|
options.StringVar(&_ForwardPrimer, "forward", "",
|
||||||
|
options.Required("You must provide a forward primer"),
|
||||||
|
options.Description("The forward primer used for the electronic PCR."))
|
||||||
|
|
||||||
|
options.StringVar(&_ReversePrimer, "reverse", "",
|
||||||
|
options.Required("You must provide a reverse primer"),
|
||||||
|
options.Description("The reverse primer used for the electronic PCR."))
|
||||||
|
|
||||||
|
options.IntVar(&_AllowedMismatch, "allowed-mismatches", 0,
|
||||||
|
options.Alias("e"),
|
||||||
|
options.Description("Maximum number of mismatches allowed for each primer."))
|
||||||
|
options.IntVar(&_kmerSize, "kmer-size", _kmerSize,
|
||||||
|
options.ArgName("SIZE"),
|
||||||
|
options.Description("The size of the kmer used to build the consensus. "+
|
||||||
|
"Default value = -1, which means that the kmer size is estimated from the data"),
|
||||||
|
)
|
||||||
|
|
||||||
|
options.StringVar(&_saveGraph, "save-graph", _saveGraph,
|
||||||
|
options.Description("Creates a directory containing the set of DAG used by the obiclean clustering algorithm. "+
|
||||||
|
"The graph files follow the graphml format."),
|
||||||
|
)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func OptionSet(options *getoptions.GetOpt) {
|
||||||
|
obiconvert.OptionSet(options)
|
||||||
|
MicroAsmOptionSet(options)
|
||||||
|
}
|
||||||
|
|
||||||
|
// CLIForwardPrimer returns the sequence of the forward primer as indicated by the
|
||||||
|
// --forward command line option
|
||||||
|
func CLIForwardPrimer() string {
|
||||||
|
pattern, err := obiapat.MakeApatPattern(_ForwardPrimer, _AllowedMismatch, false)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("%+v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
pattern.Free()
|
||||||
|
|
||||||
|
return _ForwardPrimer
|
||||||
|
}
|
||||||
|
|
||||||
|
// CLIReversePrimer returns the sequence of the reverse primer as indicated by the
|
||||||
|
// --reverse command line option
|
||||||
|
func CLIReversePrimer() string {
|
||||||
|
pattern, err := obiapat.MakeApatPattern(_ReversePrimer, _AllowedMismatch, false)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("%+v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
pattern.Free()
|
||||||
|
|
||||||
|
return _ReversePrimer
|
||||||
|
}
|
||||||
|
|
||||||
|
// CLIAllowedMismatch returns the allowed mistmatch count between each
|
||||||
|
// primer and the sequences as indicated by the
|
||||||
|
// --allowed-mismatches|-e command line option
|
||||||
|
func CLIAllowedMismatch() int {
|
||||||
|
return _AllowedMismatch
|
||||||
|
}
|
||||||
|
|
||||||
|
func CLIPairedSequence() (obiiter.IBioSequence, error) {
|
||||||
|
forward, err := obiconvert.CLIReadBioSequences(_ForwardFile)
|
||||||
|
if err != nil {
|
||||||
|
return obiiter.NilIBioSequence, err
|
||||||
|
}
|
||||||
|
|
||||||
|
reverse, err := obiconvert.CLIReadBioSequences(_ReverseFile)
|
||||||
|
if err != nil {
|
||||||
|
return obiiter.NilIBioSequence, err
|
||||||
|
}
|
||||||
|
|
||||||
|
paired := forward.PairTo(reverse)
|
||||||
|
|
||||||
|
return paired, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func CLIForwardFile() string {
|
||||||
|
return _ForwardFile
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns true it the obliclean graphs must be saved
|
||||||
|
func CLISaveGraphToFiles() bool {
|
||||||
|
return _saveGraph != "__@@NOSAVE@@__"
|
||||||
|
}
|
||||||
|
|
||||||
|
// It returns the directory where the graph files are saved
|
||||||
|
func CLIGraphFilesDirectory() string {
|
||||||
|
return _saveGraph
|
||||||
|
}
|
||||||
|
|
||||||
|
// CLIKmerSize returns the value of the kmer size to use for building the consensus.
|
||||||
|
//
|
||||||
|
// The value of the kmer size is set by the user with the `-k` flag.
|
||||||
|
// The value -1 means that the kmer size is estimated as the minimum value that
|
||||||
|
// insure that no kmer are present more than one time in a sequence.
|
||||||
|
//
|
||||||
|
// No parameters.
|
||||||
|
// Returns an integer value.
|
||||||
|
func CLIKmerSize() int {
|
||||||
|
return _kmerSize
|
||||||
|
}
|
||||||
|
|
||||||
|
func SetKmerSize(kmerSize int) {
|
||||||
|
_kmerSize = kmerSize
|
||||||
|
}
|
||||||
@@ -42,9 +42,10 @@ func MatchDistanceIndex(taxonomy *obitax.Taxonomy, distance int, distanceIdx map
|
|||||||
if i == len(keys) || distance > keys[len(keys)-1] {
|
if i == len(keys) || distance > keys[len(keys)-1] {
|
||||||
taxon = taxonomy.Root()
|
taxon = taxonomy.Root()
|
||||||
} else {
|
} else {
|
||||||
taxon = taxonomy.Taxon(distanceIdx[keys[i]])
|
var err error
|
||||||
if taxon == nil {
|
taxon, _, err = taxonomy.Taxon(distanceIdx[keys[i]])
|
||||||
log.Panicf("Cannot identify taxon %s in %s", distanceIdx[keys[i]], taxonomy.Name())
|
if err != nil {
|
||||||
|
log.Panicf("Cannot identify taxon %s in %s (%v)", distanceIdx[keys[i]], taxonomy.Name(), err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -196,9 +197,9 @@ func Identify(sequence *obiseq.BioSequence,
|
|||||||
log.Panic("Problem in identification line : ", best.Id(), "idx:", idx, "distance:", d)
|
log.Panic("Problem in identification line : ", best.Id(), "idx:", idx, "distance:", d)
|
||||||
}
|
}
|
||||||
|
|
||||||
match_taxon := taxo.Taxon(identification)
|
match_taxon, _, err := taxo.Taxon(identification)
|
||||||
|
|
||||||
if taxon != nil {
|
if err == nil {
|
||||||
taxon, _ = taxon.LCA(match_taxon)
|
taxon, _ = taxon.LCA(match_taxon)
|
||||||
} else {
|
} else {
|
||||||
taxon = match_taxon
|
taxon = match_taxon
|
||||||
|
|||||||
@@ -6,7 +6,6 @@ import (
|
|||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiformats"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiformats"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
@@ -43,7 +42,6 @@ func TagOptionSet(options *getoptions.GetOpt) {
|
|||||||
// the obiuniq command
|
// the obiuniq command
|
||||||
func OptionSet(options *getoptions.GetOpt) {
|
func OptionSet(options *getoptions.GetOpt) {
|
||||||
obiconvert.OptionSet(options)
|
obiconvert.OptionSet(options)
|
||||||
obioptions.LoadTaxonomyOptionSet(options, true, false)
|
|
||||||
TagOptionSet(options)
|
TagOptionSet(options)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,10 +1,15 @@
|
|||||||
package obitaxonomy
|
package obitaxonomy
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
|
"time"
|
||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiitercsv"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiitercsv"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obicsv"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obicsv"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
@@ -73,3 +78,18 @@ func CLICSVTaxaIterator(iterator *obitax.ITaxon) *obiitercsv.ICSVRecord {
|
|||||||
func CLICSVTaxaWriter(iterator *obitax.ITaxon, terminalAction bool) *obiitercsv.ICSVRecord {
|
func CLICSVTaxaWriter(iterator *obitax.ITaxon, terminalAction bool) *obiitercsv.ICSVRecord {
|
||||||
return obicsv.CLICSVWriter(CLICSVTaxaIterator(iterator), terminalAction)
|
return obicsv.CLICSVWriter(CLICSVTaxaIterator(iterator), terminalAction)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func CLIDownloadNCBITaxdump() error {
|
||||||
|
now := time.Now()
|
||||||
|
dateStr := now.Format("20060102") // In Go, this specific date is used as reference for formatting
|
||||||
|
|
||||||
|
filename := fmt.Sprintf("ncbitaxo_%s.tgz", dateStr)
|
||||||
|
|
||||||
|
if obiconvert.CLIOutPutFileName() != "-" {
|
||||||
|
filename = obiconvert.CLIOutPutFileName()
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Infof("Downloading NCBI Taxdump to %s", filename)
|
||||||
|
return obiutils.DownloadFile("https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdump.tar.gz", filename)
|
||||||
|
|
||||||
|
}
|
||||||
@@ -5,6 +5,7 @@ import (
|
|||||||
|
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
|
||||||
|
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||||
"github.com/DavidGamba/go-getoptions"
|
"github.com/DavidGamba/go-getoptions"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -22,6 +23,8 @@ var __taxid_path__ = "NA"
|
|||||||
var __taxid_sons__ = "NA"
|
var __taxid_sons__ = "NA"
|
||||||
var __restrict_rank__ = ""
|
var __restrict_rank__ = ""
|
||||||
var __to_dump__ = ""
|
var __to_dump__ = ""
|
||||||
|
var __download_ncbi__ = false
|
||||||
|
var __extract_taxonomy__ = false
|
||||||
|
|
||||||
func FilterTaxonomyOptionSet(options *getoptions.GetOpt) {
|
func FilterTaxonomyOptionSet(options *getoptions.GetOpt) {
|
||||||
options.BoolVar(&__rank_list__, "rank-list", false,
|
options.BoolVar(&__rank_list__, "rank-list", false,
|
||||||
@@ -34,7 +37,8 @@ func FilterTaxonomyOptionSet(options *getoptions.GetOpt) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func OptionSet(options *getoptions.GetOpt) {
|
func OptionSet(options *getoptions.GetOpt) {
|
||||||
obioptions.LoadTaxonomyOptionSet(options, true, true)
|
obioptions.LoadTaxonomyOptionSet(options, false, true)
|
||||||
|
obiconvert.OutputModeOptionSet(options, false)
|
||||||
FilterTaxonomyOptionSet(options)
|
FilterTaxonomyOptionSet(options)
|
||||||
options.BoolVar(&__fixed_pattern__, "fixed", false,
|
options.BoolVar(&__fixed_pattern__, "fixed", false,
|
||||||
options.Alias("F"),
|
options.Alias("F"),
|
||||||
@@ -70,6 +74,12 @@ func OptionSet(options *getoptions.GetOpt) {
|
|||||||
options.ArgName("TAXID"),
|
options.ArgName("TAXID"),
|
||||||
options.Description("Dump a sub-taxonomy corresponding to the precised clade"),
|
options.Description("Dump a sub-taxonomy corresponding to the precised clade"),
|
||||||
)
|
)
|
||||||
|
options.BoolVar(&__download_ncbi__, "download-ncbi", __download_ncbi__,
|
||||||
|
options.Description("Download the current NCBI taxonomy taxdump"),
|
||||||
|
)
|
||||||
|
options.BoolVar(&__extract_taxonomy__, "extract-taxonomy", __extract_taxonomy__,
|
||||||
|
options.Description("Extract taxonomy from a sequence file"),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
func CLITaxonomicalRestrictions() (*obitax.TaxonSet, error) {
|
func CLITaxonomicalRestrictions() (*obitax.TaxonSet, error) {
|
||||||
@@ -81,13 +91,14 @@ func CLITaxonomicalRestrictions() (*obitax.TaxonSet, error) {
|
|||||||
|
|
||||||
ts := taxonomy.NewTaxonSet()
|
ts := taxonomy.NewTaxonSet()
|
||||||
for _, taxid := range __taxonomical_restriction__ {
|
for _, taxid := range __taxonomical_restriction__ {
|
||||||
tx := taxonomy.Taxon(taxid)
|
tx, _, err := taxonomy.Taxon(taxid)
|
||||||
|
|
||||||
if tx == nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf(
|
return nil, fmt.Errorf(
|
||||||
"cannot find taxon %s in taxonomy %s",
|
"cannot find taxon %s in taxonomy %s (%v)",
|
||||||
taxid,
|
taxid,
|
||||||
taxonomy.Name(),
|
taxonomy.Name(),
|
||||||
|
err,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -144,3 +155,11 @@ func CLIDumpSubtaxonomy() bool {
|
|||||||
func CLISubTaxonomyNode() string {
|
func CLISubTaxonomyNode() string {
|
||||||
return __to_dump__
|
return __to_dump__
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func CLIDownloadNCBI() bool {
|
||||||
|
return __download_ncbi__
|
||||||
|
}
|
||||||
|
|
||||||
|
func CLIExtractTaxonomy() bool {
|
||||||
|
return __extract_taxonomy__
|
||||||
|
}
|
||||||
|
|||||||
@@ -93,3 +93,145 @@ func MapToMapInterface(m interface{}) map[string]interface{} {
|
|||||||
log.Panic("Invalid map type")
|
log.Panic("Invalid map type")
|
||||||
return make(map[string]interface{})
|
return make(map[string]interface{})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// InterfaceToInt converts a interface{} to an integer value if possible.
|
||||||
|
// If not a "NotAnInteger" error is returned via the err
|
||||||
|
// return value and val is set to 0.
|
||||||
|
func InterfaceToInt(i interface{}) (val int, err error) {
|
||||||
|
|
||||||
|
err = nil
|
||||||
|
val = 0
|
||||||
|
|
||||||
|
switch t := i.(type) {
|
||||||
|
case int:
|
||||||
|
val = t
|
||||||
|
case int8:
|
||||||
|
val = int(t) // standardizes across systems
|
||||||
|
case int16:
|
||||||
|
val = int(t) // standardizes across systems
|
||||||
|
case int32:
|
||||||
|
val = int(t) // standardizes across systems
|
||||||
|
case int64:
|
||||||
|
val = int(t) // standardizes across systems
|
||||||
|
case float32:
|
||||||
|
val = int(t) // standardizes across systems
|
||||||
|
case float64:
|
||||||
|
val = int(t) // standardizes across systems
|
||||||
|
case uint8:
|
||||||
|
val = int(t) // standardizes across systems
|
||||||
|
case uint16:
|
||||||
|
val = int(t) // standardizes across systems
|
||||||
|
case uint32:
|
||||||
|
val = int(t) // standardizes across systems
|
||||||
|
case uint64:
|
||||||
|
val = int(t) // standardizes across systems
|
||||||
|
default:
|
||||||
|
err = &NotAnInteger{"value attribute cannot be casted to an integer"}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// InterfaceToInt converts a interface{} to an integer value if possible.
|
||||||
|
// If not a "NotAnInteger" error is returned via the err
|
||||||
|
// return value and val is set to 0.
|
||||||
|
func InterfaceToFloat64(i interface{}) (val float64, err error) {
|
||||||
|
|
||||||
|
err = nil
|
||||||
|
val = 0
|
||||||
|
|
||||||
|
switch t := i.(type) {
|
||||||
|
case int:
|
||||||
|
val = float64(t)
|
||||||
|
case int8:
|
||||||
|
val = float64(t) // standardizes across systems
|
||||||
|
case int16:
|
||||||
|
val = float64(t) // standardizes across systems
|
||||||
|
case int32:
|
||||||
|
val = float64(t) // standardizes across systems
|
||||||
|
case int64:
|
||||||
|
val = float64(t) // standardizes across systems
|
||||||
|
case float32:
|
||||||
|
val = float64(t) // standardizes across systems
|
||||||
|
case float64:
|
||||||
|
val = t // standardizes across systems
|
||||||
|
case uint8:
|
||||||
|
val = float64(t) // standardizes across systems
|
||||||
|
case uint16:
|
||||||
|
val = float64(t) // standardizes across systems
|
||||||
|
case uint32:
|
||||||
|
val = float64(t) // standardizes across systems
|
||||||
|
case uint64:
|
||||||
|
val = float64(t) // standardizes across systems
|
||||||
|
default:
|
||||||
|
err = &NotAnFloat64{"value attribute cannot be casted to a float value"}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func InterfaceToIntMap(i interface{}) (val map[string]int, err error) {
|
||||||
|
err = nil
|
||||||
|
|
||||||
|
switch i := i.(type) {
|
||||||
|
case map[string]int:
|
||||||
|
val = i
|
||||||
|
case map[string]interface{}:
|
||||||
|
val = make(map[string]int, len(i))
|
||||||
|
for k, v := range i {
|
||||||
|
val[k], err = InterfaceToInt(v)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case map[string]float64:
|
||||||
|
val = make(map[string]int, len(i))
|
||||||
|
for k, v := range i {
|
||||||
|
val[k] = int(v)
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
err = &NotAMapInt{"value attribute cannot be casted to a map[string]int"}
|
||||||
|
}
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func InterfaceToStringMap(i interface{}) (val map[string]string, err error) {
|
||||||
|
err = nil
|
||||||
|
|
||||||
|
switch i := i.(type) {
|
||||||
|
case map[string]string:
|
||||||
|
val = i
|
||||||
|
case map[string]interface{}:
|
||||||
|
val = make(map[string]string, len(i))
|
||||||
|
for k, v := range i {
|
||||||
|
val[k], err = InterfaceToString(v)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
err = &NotAMapInt{"value attribute cannot be casted to a map[string]int"}
|
||||||
|
}
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func InterfaceToStringSlice(i interface{}) (val []string, err error) {
|
||||||
|
err = nil
|
||||||
|
|
||||||
|
switch i := i.(type) {
|
||||||
|
case []string:
|
||||||
|
val = i
|
||||||
|
case []interface{}:
|
||||||
|
val = make([]string, len(i))
|
||||||
|
for k, v := range i {
|
||||||
|
val[k], err = InterfaceToString(v)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
err = &NotAMapInt{"value attribute cannot be casted to a []string"}
|
||||||
|
}
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|||||||
45
pkg/obiutils/download.go
Normal file
45
pkg/obiutils/download.go
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
package obiutils
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"github.com/schollz/progressbar/v3"
|
||||||
|
)
|
||||||
|
|
||||||
|
func DownloadFile(url string, filepath string) error {
|
||||||
|
// Get the data
|
||||||
|
resp, err := http.Get(url)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
// Check server response
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return fmt.Errorf("bad status: %s", resp.Status)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create the file
|
||||||
|
out, err := os.Create(filepath)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer out.Close()
|
||||||
|
|
||||||
|
// Create progress bar
|
||||||
|
bar := progressbar.DefaultBytes(
|
||||||
|
resp.ContentLength,
|
||||||
|
"downloading",
|
||||||
|
)
|
||||||
|
|
||||||
|
// Write the body to file while updating the progress bar
|
||||||
|
_, err = io.Copy(io.MultiWriter(out, bar), resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
@@ -25,43 +25,6 @@ func (m *NotAnInteger) Error() string {
|
|||||||
return m.message
|
return m.message
|
||||||
}
|
}
|
||||||
|
|
||||||
// InterfaceToInt converts a interface{} to an integer value if possible.
|
|
||||||
// If not a "NotAnInteger" error is returned via the err
|
|
||||||
// return value and val is set to 0.
|
|
||||||
func InterfaceToInt(i interface{}) (val int, err error) {
|
|
||||||
|
|
||||||
err = nil
|
|
||||||
val = 0
|
|
||||||
|
|
||||||
switch t := i.(type) {
|
|
||||||
case int:
|
|
||||||
val = t
|
|
||||||
case int8:
|
|
||||||
val = int(t) // standardizes across systems
|
|
||||||
case int16:
|
|
||||||
val = int(t) // standardizes across systems
|
|
||||||
case int32:
|
|
||||||
val = int(t) // standardizes across systems
|
|
||||||
case int64:
|
|
||||||
val = int(t) // standardizes across systems
|
|
||||||
case float32:
|
|
||||||
val = int(t) // standardizes across systems
|
|
||||||
case float64:
|
|
||||||
val = int(t) // standardizes across systems
|
|
||||||
case uint8:
|
|
||||||
val = int(t) // standardizes across systems
|
|
||||||
case uint16:
|
|
||||||
val = int(t) // standardizes across systems
|
|
||||||
case uint32:
|
|
||||||
val = int(t) // standardizes across systems
|
|
||||||
case uint64:
|
|
||||||
val = int(t) // standardizes across systems
|
|
||||||
default:
|
|
||||||
err = &NotAnInteger{"value attribute cannot be casted to an integer"}
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// NotAnInteger defines a new type of Error : "NotAnInteger"
|
// NotAnInteger defines a new type of Error : "NotAnInteger"
|
||||||
type NotAnFloat64 struct {
|
type NotAnFloat64 struct {
|
||||||
message string
|
message string
|
||||||
@@ -74,43 +37,6 @@ func (m *NotAnFloat64) Error() string {
|
|||||||
return m.message
|
return m.message
|
||||||
}
|
}
|
||||||
|
|
||||||
// InterfaceToInt converts a interface{} to an integer value if possible.
|
|
||||||
// If not a "NotAnInteger" error is returned via the err
|
|
||||||
// return value and val is set to 0.
|
|
||||||
func InterfaceToFloat64(i interface{}) (val float64, err error) {
|
|
||||||
|
|
||||||
err = nil
|
|
||||||
val = 0
|
|
||||||
|
|
||||||
switch t := i.(type) {
|
|
||||||
case int:
|
|
||||||
val = float64(t)
|
|
||||||
case int8:
|
|
||||||
val = float64(t) // standardizes across systems
|
|
||||||
case int16:
|
|
||||||
val = float64(t) // standardizes across systems
|
|
||||||
case int32:
|
|
||||||
val = float64(t) // standardizes across systems
|
|
||||||
case int64:
|
|
||||||
val = float64(t) // standardizes across systems
|
|
||||||
case float32:
|
|
||||||
val = float64(t) // standardizes across systems
|
|
||||||
case float64:
|
|
||||||
val = t // standardizes across systems
|
|
||||||
case uint8:
|
|
||||||
val = float64(t) // standardizes across systems
|
|
||||||
case uint16:
|
|
||||||
val = float64(t) // standardizes across systems
|
|
||||||
case uint32:
|
|
||||||
val = float64(t) // standardizes across systems
|
|
||||||
case uint64:
|
|
||||||
val = float64(t) // standardizes across systems
|
|
||||||
default:
|
|
||||||
err = &NotAnFloat64{"value attribute cannot be casted to a float value"}
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// NotABoolean defines a new type of Error : "NotAMapInt"
|
// NotABoolean defines a new type of Error : "NotAMapInt"
|
||||||
type NotAMapInt struct {
|
type NotAMapInt struct {
|
||||||
message string
|
message string
|
||||||
@@ -123,53 +49,6 @@ func (m *NotAMapInt) Error() string {
|
|||||||
return m.message
|
return m.message
|
||||||
}
|
}
|
||||||
|
|
||||||
func InterfaceToIntMap(i interface{}) (val map[string]int, err error) {
|
|
||||||
err = nil
|
|
||||||
|
|
||||||
switch i := i.(type) {
|
|
||||||
case map[string]int:
|
|
||||||
val = i
|
|
||||||
case map[string]interface{}:
|
|
||||||
val = make(map[string]int, len(i))
|
|
||||||
for k, v := range i {
|
|
||||||
val[k], err = InterfaceToInt(v)
|
|
||||||
if err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
case map[string]float64:
|
|
||||||
val = make(map[string]int, len(i))
|
|
||||||
for k, v := range i {
|
|
||||||
val[k] = int(v)
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
err = &NotAMapInt{"value attribute cannot be casted to a map[string]int"}
|
|
||||||
}
|
|
||||||
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
func InterfaceToStringMap(i interface{}) (val map[string]string, err error) {
|
|
||||||
err = nil
|
|
||||||
|
|
||||||
switch i := i.(type) {
|
|
||||||
case map[string]string:
|
|
||||||
val = i
|
|
||||||
case map[string]interface{}:
|
|
||||||
val = make(map[string]string, len(i))
|
|
||||||
for k, v := range i {
|
|
||||||
val[k], err = InterfaceToString(v)
|
|
||||||
if err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
err = &NotAMapInt{"value attribute cannot be casted to a map[string]int"}
|
|
||||||
}
|
|
||||||
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// NotABoolean defines a new type of Error : "NotAMapInt"
|
// NotABoolean defines a new type of Error : "NotAMapInt"
|
||||||
type NotAMapFloat64 struct {
|
type NotAMapFloat64 struct {
|
||||||
message string
|
message string
|
||||||
|
|||||||
20
pkg/obiutils/unique.go
Normal file
20
pkg/obiutils/unique.go
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
package obiutils
|
||||||
|
|
||||||
|
// Unique returns a new slice containing only unique values from the input slice.
|
||||||
|
// The order of elements in the output slice is not guaranteed to match the input order.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - slice: The input slice containing potentially duplicate values
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
// - A new slice containing only unique values
|
||||||
|
func Unique[T comparable](slice []T) []T {
|
||||||
|
// Create a map to track unique values
|
||||||
|
seen := Set[T]{}
|
||||||
|
|
||||||
|
for _, v := range slice {
|
||||||
|
seen.Add(v)
|
||||||
|
}
|
||||||
|
|
||||||
|
return seen.Members()
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user