A first version of obimicroasm...

This commit is contained in:
Eric Coissac
2025-02-16 21:47:49 +01:00
parent 4774438644
commit d70b0a5b42
11 changed files with 1234 additions and 27 deletions

View File

@ -0,0 +1,42 @@
package main
import (
"os"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiformats"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obimicroasm"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
)
func main() {
// go tool pprof -http=":8000" ./obipairing ./cpu.pprof
// f, err := os.Create("cpu.pprof")
// if err != nil {
// log.Fatal(err)
// }
// pprof.StartCPUProfile(f)
// defer pprof.StopCPUProfile()
// go tool trace cpu.trace
// ftrace, err := os.Create("cpu.trace")
// if err != nil {
// log.Fatal(err)
// }
// trace.Start(ftrace)
// defer trace.Stop()
optionParser := obioptions.GenerateOptionParser(obimicroasm.OptionSet)
optionParser(os.Args)
obidefault.SetStrictReadWorker(2)
obidefault.SetStrictWriteWorker(2)
seq := obimicroasm.CLIAssemblePCR()
println(obiformats.FormatFasta(seq, obiformats.FormatFastSeqJsonHeader))
obiutils.WaitForLastPipe()
}

6
go.mod
View File

@ -14,7 +14,7 @@ require (
github.com/rrethy/ahocorasick v1.0.0
github.com/schollz/progressbar/v3 v3.13.1
github.com/sirupsen/logrus v1.9.3
github.com/stretchr/testify v1.8.4
github.com/stretchr/testify v1.10.0
github.com/tevino/abool/v2 v2.1.0
github.com/yuin/gopher-lua v1.1.1
golang.org/x/exp v0.0.0-20231006140011-7918f672742d
@ -29,11 +29,13 @@ require (
github.com/buger/jsonparser v1.1.1 // indirect
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/ef-ds/deque/v2 v2.0.2 // indirect
github.com/goombaio/orderedmap v0.0.0-20180924084748-ba921b7e2419 // indirect
github.com/kr/pretty v0.3.0 // indirect
github.com/kr/text v0.2.0 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/rogpeppe/go-internal v1.6.1 // indirect
go.etcd.io/bbolt v1.4.0 // indirect
)
require (
@ -47,7 +49,7 @@ require (
github.com/shopspring/decimal v1.3.1 // indirect
github.com/ulikunitz/xz v0.5.11
golang.org/x/net v0.17.0 // indirect
golang.org/x/sys v0.17.0 // indirect
golang.org/x/sys v0.29.0 // indirect
golang.org/x/term v0.13.0 // indirect
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c
)

8
go.sum
View File

@ -25,6 +25,8 @@ github.com/dlclark/regexp2 v1.11.4/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cn
github.com/dsnet/compress v0.0.1 h1:PlZu0n3Tuv04TzpfPbrnI0HW/YwodEXDS+oPKahKF0Q=
github.com/dsnet/compress v0.0.1/go.mod h1:Aw8dCMJ7RioblQeTqt88akK31OvO8Dhf5JflhBbQEHo=
github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
github.com/ef-ds/deque/v2 v2.0.2 h1:GQtDK1boBMu/qsNbSLQsqzwNptaioxZI39X3UxT5ALA=
github.com/ef-ds/deque/v2 v2.0.2/go.mod h1:hoZy4VooWLhRT4uS+sSCilfgBQUNptJU2FGqr08a5sc=
github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk=
github.com/goccy/go-json v0.10.3 h1:KZ5WoDbxAIgm2HNbYckL0se1fHD6rz5j4ywS6ebzDqA=
@ -77,6 +79,8 @@ github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/tevino/abool/v2 v2.1.0 h1:7w+Vf9f/5gmKT4m4qkayb33/92M+Um45F2BkHOR+L/c=
github.com/tevino/abool/v2 v2.1.0/go.mod h1:+Lmlqk6bHDWHqN1cbxqhwEAwMPXgc8I1SDEamtseuXY=
github.com/ulikunitz/xz v0.5.6/go.mod h1:2bypXElzHzzJZwzH67Y6wb67pO62Rzfn7BSiF4ABRW8=
@ -84,6 +88,8 @@ github.com/ulikunitz/xz v0.5.11 h1:kpFauv27b6ynzBNT/Xy+1k+fK4WswhN/6PN5WhFAGw8=
github.com/ulikunitz/xz v0.5.11/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
github.com/yuin/gopher-lua v1.1.1 h1:kYKnWBjvbNP4XLT3+bPEwAXJx262OhaHDWDVOPjL46M=
github.com/yuin/gopher-lua v1.1.1/go.mod h1:GBR0iDaNXjAgGg9zfCvksxSRnQx76gclCIb7kdAd1Pw=
go.etcd.io/bbolt v1.4.0 h1:TU77id3TnN/zKr7CO/uk+fBCwF2jGcMuw2B/FMAzYIk=
go.etcd.io/bbolt v1.4.0/go.mod h1:AsD+OCi/qPN1giOX1aiLAha3o1U8rAz65bvN4j0sRuk=
golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI=
golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo=
golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
@ -93,6 +99,8 @@ golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y=
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU=
golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U=
golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek=
golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U=

View File

@ -2,12 +2,9 @@ git.sr.ht/~sbinet/gg v0.3.1 h1:LNhjNn8DerC8f9DHLz6lS0YYul/b602DUxDgGkd/Aik=
git.sr.ht/~sbinet/gg v0.3.1/go.mod h1:KGYtlADtqsqANL9ueOFkWymvzUvLMQllU5Ixo+8v3pc=
github.com/ajstarks/svgo v0.0.0-20211024235047-1546f124cd8b h1:slYM766cy2nI3BwyRiyQj/Ud48djTMtMebDqepE95rw=
github.com/ajstarks/svgo v0.0.0-20211024235047-1546f124cd8b/go.mod h1:1KcenG0jGWcpt8ov532z81sp/kMMUG485J2InIOyADM=
github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0=
github.com/chzyer/logex v1.1.10 h1:Swpa1K6QvQznwJRcfTfQJmTE72DqScAa40E+fbHEXEE=
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
github.com/chzyer/logex v1.2.0 h1:+eqR0HfOetur4tgnC8ftU5imRnhi4te+BadWS95c5AM=
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e h1:fY5BOSpyZCqRo5OhCuC+XN+r/bBCmeuuJtjz+bCNIf8=
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1 h1:q763qf9huN11kDQavWsoZXJNW3xEE4JJyHa5Q25/sd8=
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
github.com/chzyer/test v0.0.0-20210722231415-061457976a23 h1:dZ0/VyGgQdVGAss6Ju0dt5P0QltE0SFY5Woh6hbIfiQ=
@ -29,16 +26,21 @@ github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeN
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
github.com/ianlancetaylor/demangle v0.0.0-20220319035150-800ac71e25c2 h1:rcanfLhLDA8nozr/K289V1zcntHr3V+SHlXwzz1ZI2g=
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213 h1:qGQQKEcAR99REcMpsXCp3lJ03zYT1PkRd3kQGPn9GVg=
github.com/klauspost/cpuid v1.2.0 h1:NMpwD2G9JSFOE1/TJjGSo5zG7Yb2bTe7eq1jH+irmeE=
github.com/kr/pty v1.1.1 h1:VkoXIwSboBpnk99O/KFauAEILuNHv5DVFKZMBN/gUgw=
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
github.com/mattn/go-isatty v0.0.17 h1:BTarxUcIeDqL27Mc+vyvdWYSL28zpIhv3RoTdsLMPng=
github.com/smallnest/goroutine v1.1.1/go.mod h1:Fp8f6ZReubfdj0m4+NcUnW4IsAqKa+Pnrv9opEiD43E=
github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y=
github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stretchr/objx v0.1.0 h1:4G4v2dO3VZwixGIRoQ5Lfboy6nUhCyYzaqnIAPPhYs4=
github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
github.com/yuin/goldmark v1.4.13 h1:fVcFKWvrslecOb/tg+Cc05dkeYx540o0FuFt3nUVDoE=
go.etcd.io/gofail v0.2.0/go.mod h1:nL3ILMGfkXTekKI3clMBNazKnjUZjYLKmBHzsVAnC1o=
golang.org/x/crypto v0.14.0 h1:wBqGXzWJW6m1XrIKlAH0Hs1JJ7+9KBwnIO8v66Q9cHc=
golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4=
golang.org/x/image v0.6.0 h1:bR8b5okrPI3g/gyZakLZHeWxAR8Dn5CyxXv1hLH5g/4=
@ -46,6 +48,7 @@ golang.org/x/image v0.6.0/go.mod h1:MXLdDR43H7cDJq5GEGXEVeeNhPgi+YYEQ2pC1byI1x0=
golang.org/x/mod v0.13.0 h1:I/DsJXRlw/8l/0c24sM9yb0T4z9liZTduXvdAWYiysY=
golang.org/x/mod v0.13.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4 h1:uVc8UZUe6tr40fFVnUP5Oj+veunVezqYl9z7DYw9xzw=
golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k=
golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
golang.org/x/tools v0.14.0 h1:jvNa2pY0M4r62jkRQ6RwEZZyPcymeL9XZMLBbV7U2nc=

View File

@ -8,9 +8,12 @@ import (
"math/bits"
"os"
"slices"
"sort"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obistats"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
"github.com/ef-ds/deque/v2"
log "github.com/sirupsen/logrus"
)
@ -89,12 +92,18 @@ type DeBruijnGraph struct {
//
// *DeBruijnGraph - a pointer to the created De Bruijn's Graph
func MakeDeBruijnGraph(kmersize int) *DeBruijnGraph {
if kmersize > 31 {
log.Panicf("k-mer size %d is too large", kmersize)
}
kmermask := (^uint64(0) << (uint64(kmersize) * 2))
g := DeBruijnGraph{
kmersize: kmersize,
kmermask: ^(^uint64(0) << (uint64(kmersize) * 2)), // k-mer mask used to set to 0 the bits that are not in the k-mer
prevc: uint64(1) << (uint64(kmersize-1) * 2),
prevg: uint64(2) << (uint64(kmersize-1) * 2),
prevt: uint64(3) << (uint64(kmersize-1) * 2),
kmermask: kmermask, // k-mer mask used to set to 1 the bits that are not in the k-mer
prevc: (uint64(1) << (uint64(kmersize-1) * 2)) | kmermask,
prevg: (uint64(2) << (uint64(kmersize-1) * 2)) | kmermask,
prevt: (uint64(3) << (uint64(kmersize-1) * 2)) | kmermask,
graph: make(map[uint64]uint),
}
@ -161,19 +170,34 @@ func (g *DeBruijnGraph) FilterMinWeight(min int) {
}
}
// FilterMinWeight filters the DeBruijnGraph by removing nodes with weight less than the specified minimum.
//
// min: an integer representing the minimum count threshold.
func (g *DeBruijnGraph) FilterMaxWeight(min int) {
umin := uint(min)
for idx, count := range g.graph {
if count > umin {
delete(g.graph, idx)
}
}
}
func (g *DeBruijnGraph) Previouses(index uint64) []uint64 {
if _, ok := g.graph[index]; !ok {
log.Panicf("k-mer %s (index %d) is not in graph", g.DecodeNode(index), index)
}
rep := make([]uint64, 0, 4)
index &= ^g.kmermask
index >>= 2
if _, ok := g.graph[index]; ok {
rep = append(rep, index)
key := index | g.kmermask
if _, ok := g.graph[key]; ok {
rep = append(rep, key)
}
key := index | g.prevc
key = index | g.prevc
if _, ok := g.graph[key]; ok {
rep = append(rep, key)
}
@ -197,7 +221,7 @@ func (g *DeBruijnGraph) Nexts(index uint64) []uint64 {
}
rep := make([]uint64, 0, 4)
index = (index << 2) & g.kmermask
index = (index << 2) | g.kmermask
if _, ok := g.graph[index]; ok {
rep = append(rep, index)
@ -268,6 +292,33 @@ func (g *DeBruijnGraph) MaxHead() (uint64, int, bool) {
return rep, int(max), found
}
func (g *DeBruijnGraph) Terminals() []uint64 {
rep := make([]uint64, 0, 10)
for k := range g.graph {
if len(g.Nexts(k)) == 0 {
rep = append(rep, k)
}
}
return rep
}
func (g *DeBruijnGraph) MaxTerminal() (uint64, int, bool) {
rep := uint64(0)
max := uint(0)
found := false
for k, w := range g.graph {
if len(g.Nexts(k)) == 0 && w > max {
rep = k
max = w
found = true
}
}
return rep, int(max), found
}
func (g *DeBruijnGraph) MaxPath() []uint64 {
path := make([]uint64, 0, 1000)
ok := false
@ -318,7 +369,11 @@ func (g *DeBruijnGraph) LongestConsensus(id string, min_cov float64) (*obiseq.Bi
return nil, fmt.Errorf("graph is empty")
}
//path := g.LongestPath(max_length)
path := g.HaviestPath()
path, err := g.HaviestPath(nil, nil, false)
if err != nil {
return nil, err
}
spath := path
@ -481,7 +536,7 @@ func (graph *DeBruijnGraph) append(sequence []byte, current uint64, weight int)
}
current <<= 2
current &= graph.kmermask
current |= graph.kmermask
b := iupac[sequence[0]]
current |= b[0]
graph.graph[current] = uint(graph.Weight(current) + weight)
@ -495,6 +550,36 @@ func (graph *DeBruijnGraph) append(sequence []byte, current uint64, weight int)
}
}
// func (graph *DeBruijnGraph) search(current uint64, extension []byte, path []uint64, error,errormax int) ([]uint64,error) {
// path = append(path, current)
// if len(extension) == 0 {
// return path,nil
// }
// current <<= 2
// current &= graph.kmermask
// b := iupac[extension[0]]
// newPath := path
// if len(b) > 1 {
// newPath = slices.Clone(path)
// }
// current |= b[0]
// _, ok := graph.graph[current]
// if ok {
// newPath = append(newPath, current)
// }
// rep, err := graph.search(current, extension[1:], newPath, error,errormax)
// if err != nil {
// return path,err
// }
// }
// Push appends a BioSequence to the DeBruijnGraph.
//
// Parameters:
@ -523,6 +608,7 @@ func (graph *DeBruijnGraph) Push(sequence *obiseq.BioSequence) {
initFirstKmer(start+1, key)
}
} else {
key |= graph.kmermask
graph.graph[key] = uint(graph.Weight(key) + w)
graph.append(s[graph.kmersize:], key, w)
}
@ -533,6 +619,110 @@ func (graph *DeBruijnGraph) Push(sequence *obiseq.BioSequence) {
}
}
func (graph *DeBruijnGraph) search(sequence []byte, mismatch, errormax int) []uint64 {
var initFirstKmer func(start int, key uint64) []uint64
initFirstKmer = func(start int, key uint64) []uint64 {
if start == graph.kmersize {
key |= graph.kmermask
if _, ok := graph.graph[key]; ok {
return []uint64{key}
} else {
return []uint64{}
}
}
keys := make([]uint64, 0, 1000)
if start == 0 {
key = 0
}
key <<= 2
b := iupac[sequence[start]]
for _, code := range b {
key &= ^uint64(3)
key |= code
keys = append(keys, initFirstKmer(start+1, key)...)
}
// w := []string{}
// for _, k := range keys {
// w = append(w, graph.DecodeNode(k))
// }
// // log.Warnf("For %s found %d matches : %v", sequence, len(keys), w)
return keys
}
rep := initFirstKmer(0, 0)
return rep
}
func (graph *DeBruijnGraph) Search(sequence *obiseq.BioSequence, errormax int) []uint64 {
s := sequence.Sequence() // Get the sequence as a byte slice
if len(s) < graph.KmerSize() {
s = slices.Clone(s)
for len(s) < graph.KmerSize() {
s = append(s, 'n')
}
}
log.Warnf("searching for %s", s)
keys := graph.search(s, 0, errormax)
for mismatch := 1; mismatch <= errormax; mismatch++ {
log.Warnf("searching with %d error for %s", mismatch, s)
for probe := range IterateOneError(s[0:graph.kmersize]) {
keys = append(keys,
graph.search(probe, mismatch, errormax)...,
)
}
}
keys = obiutils.Unique(keys)
return keys
}
func (graph *DeBruijnGraph) BackSearch(sequence *obiseq.BioSequence, errormax int) []uint64 {
lkmer := graph.KmerSize()
s := sequence.Sequence() // Get the sequence as a byte slice
if len(s) < lkmer {
sn := []byte{}
ls := len(s)
for ls < lkmer {
sn = append(sn, 'n')
ls++
}
s = append(sn, s...)
} else {
s = s[(len(s) - lkmer):]
}
log.Warnf("back-searching for %s", s)
keys := graph.search(s, 0, errormax)
for mismatch := 1; mismatch <= errormax; mismatch++ {
log.Warnf("searching with %d error for %s", mismatch, s)
for probe := range IterateOneError(s[0:graph.kmersize]) {
// log.Warnf("searching with %d error for %s", mismatch, probe)
keys = append(keys,
graph.search(probe, mismatch, errormax)...,
)
}
}
keys = obiutils.Unique(keys)
return keys
}
func (graph *DeBruijnGraph) Gml() string {
buffer := bytes.NewBuffer(make([]byte, 0, 1000))
@ -614,7 +804,7 @@ func (graph *DeBruijnGraph) WriteGml(filename string) error {
func (g *DeBruijnGraph) HammingDistance(kmer1, kmer2 uint64) int {
ident := ^((kmer1 & kmer2) | (^kmer1 & ^kmer2))
ident |= (ident >> 1)
ident &= 0x5555555555555555 & g.kmermask
ident &= 0x5555555555555555 & ^g.kmermask
return bits.OnesCount64(ident)
}
@ -638,11 +828,23 @@ func (h *UInt64Heap) Pop() any {
return x
}
func (g *DeBruijnGraph) HaviestPath() []uint64 {
func (g *DeBruijnGraph) HaviestPath(starts, stops []uint64, backPath bool) ([]uint64, error) {
if g.HasCycle() {
return nil
// if g.HasCycle() {
// return nil, fmt.Errorf("graph has a cycle")
// }
following := g.Nexts
if backPath {
following = g.Previouses
}
stopNodes := make(map[uint64]bool, len(stops))
for _, n := range stops {
stopNodes[n] = true
}
// Initialize the distance array and visited set
distances := make(map[uint64]int)
visited := make(map[uint64]bool)
@ -654,7 +856,11 @@ func (g *DeBruijnGraph) HaviestPath() []uint64 {
heap.Init(queue)
startNodes := make(map[uint64]struct{})
for _, n := range g.Heads() {
if starts == nil {
starts = g.Heads()
}
for _, n := range starts {
startNodes[n] = struct{}{}
heap.Push(queue, n)
distances[n] = g.Weight(n)
@ -686,7 +892,11 @@ func (g *DeBruijnGraph) HaviestPath() []uint64 {
log.Warn("current node is 0")
}
// Update the distance of the neighbors
nextNodes := g.Nexts(currentNode)
nextNodes := following(currentNode)
if _, ok := stopNodes[currentNode]; ok {
nextNodes = []uint64{}
}
for _, nextNode := range nextNodes {
if nextNode == 0 {
log.Warn("next node is 0")
@ -718,16 +928,178 @@ func (g *DeBruijnGraph) HaviestPath() []uint64 {
}
if slices.Contains(heaviestPath, currentNode) {
log.Panicf("Cycle detected %v -> %v (%v) len(%v), graph: %v", heaviestPath, currentNode, startNodes, len(heaviestPath), g.Len())
return nil
return nil, fmt.Errorf("cycle detected in heaviest path")
}
heaviestPath = append(heaviestPath, currentNode)
// Reverse the path
if !backPath {
slices.Reverse(heaviestPath)
}
return heaviestPath
return heaviestPath, nil
}
func (g *DeBruijnGraph) HaviestPathDSU(starts, stops []uint64, backPath bool) ([]uint64, error) {
// Collect and sort edges
type Edge struct {
weight float64
u, v uint64
}
edges := make([]Edge, 0)
// Function to get next nodes (either Nexts or Previouses based on backPath)
following := g.Nexts
previouses := g.Previouses
if backPath {
following = g.Previouses
previouses = g.Nexts
}
// Collect all edges
for u := range g.graph {
for _, v := range following(u) {
edges = append(edges, Edge{
weight: float64(min(g.Weight(u), g.Weight(v))),
u: u,
v: v,
})
}
}
// Sort edges by weight in descending order
sort.Slice(edges, func(i, j int) bool {
return edges[i].weight > edges[j].weight
})
// Initialize disjoint set data structure
parent := make(map[uint64]uint64)
for u := range g.graph {
parent[u] = u
}
// Find with path compression
var find func(uint64) uint64
find = func(node uint64) uint64 {
if parent[node] != node {
parent[node] = find(parent[node])
}
return parent[node]
}
// Union function that returns true if cycle is detected
union := func(u, v uint64) bool {
rootU := find(u)
rootV := find(v)
if rootU == rootV {
return true // Cycle detected
}
parent[rootV] = rootU
return false
}
// If no specific starts provided, use graph heads
if starts == nil {
if !backPath {
starts = g.Heads()
} else {
starts = g.Terminals()
}
}
// If no specific stops provided, use graph terminals
if stops == nil {
if !backPath {
stops = g.Terminals()
} else {
stops = g.Heads()
}
}
// Convert stops to a map for O(1) lookup
stopNodes := make(map[uint64]bool)
for _, stop := range stops {
stopNodes[stop] = false
}
var path []uint64
maxCapacity := math.Inf(-1)
stopEdge := []Edge{}
// Process edges in descending order of weight
for _, edge := range edges {
if stopNodes[edge.u] {
continue // Skip edges from stop nodes
}
if in, ok := stopNodes[edge.v]; ok {
if !in {
stopEdge = append(stopEdge, edge)
stopNodes[edge.v] = true
}
}
if union(edge.u, edge.v) {
continue // Skip if creates cycle
}
pathFound := false
for _, sedge := range stopEdge {
// Check if any start-stop pair is connected
fv := find(sedge.v)
for _, s := range starts {
fs := find(s)
// log.Warnf("Start: %d, Stop: %d", fs, fv)
if fs == fv {
pathFound = true
maxCapacity = edge.weight
// Reconstruct path
current := sedge.v
path = []uint64{current}
for current != s {
oldcurrent := current
// log.Warnf("Start: %d, Current: %d, Previous: %v", s, current, previouses(current))
for _, prev := range previouses(current) {
if find(prev) == fs {
path = append(path, prev)
current = prev
break
}
}
if current == oldcurrent {
log.Fatalf("We are stuck")
}
}
// log.Warnf("Built path: %v", path)
break
}
}
if pathFound {
break
}
}
if pathFound {
break
}
}
// log.Warnf("Stop edge: %v", stopEdge)
// Process edges in descending order of weight
if path == nil {
return nil, fmt.Errorf("no valid path found")
}
if !backPath {
slices.Reverse(path)
}
log.Warnf("Max capacity: %5.0f: %v", maxCapacity, g.DecodePath(path))
return path, nil
}
func (g *DeBruijnGraph) HasCycle() bool {
@ -765,3 +1137,59 @@ func (g *DeBruijnGraph) HasCycle() bool {
}
return false
}
// HasCycleInDegree détecte la présence d'un cycle dans le graphe en utilisant la méthode des degrés entrants.
// Cette méthode est basée sur le tri topologique : si on ne peut pas trier tous les nœuds,
// alors il y a un cycle.
//
// Returns:
// - bool: true si le graphe contient un cycle, false sinon
func (g *DeBruijnGraph) HasCycleInDegree() bool {
// Créer une map pour stocker les degrés entrants de chaque nœud
inDegree := make(map[uint64]int)
// Initialiser les degrés entrants à 0 pour tous les nœuds
for node := range g.graph {
inDegree[node] = 0
}
// Calculer les degrés entrants
for node := range g.graph {
for _, next := range g.Nexts(node) {
inDegree[next]++
}
}
// Créer une deque pour stocker les nœuds avec un degré entrant de 0
queue := deque.Deque[uint64]{}
// Ajouter tous les nœuds avec un degré entrant de 0 à la deque
for node := range g.graph {
if inDegree[node] == 0 {
queue.PushBack(node)
}
}
visited := 0 // Compteur de nœuds visités
// Parcours BFS
for queue.Len() > 0 {
// Retirer le premier nœud de la deque
node, _ := queue.PopFront()
visited++
// Pour chaque nœud adjacent
for _, next := range g.Nexts(node) {
// Réduire son degré entrant
inDegree[next]--
// Si le degré entrant devient 0, l'ajouter à la deque
if inDegree[next] == 0 {
queue.PushBack(next)
}
}
}
// S'il y a un cycle, on n'aura pas pu visiter tous les nœuds
return visited != len(g.graph)
}

45
pkg/obikmer/oneerror.go Normal file
View File

@ -0,0 +1,45 @@
package obikmer
import (
"iter"
"slices"
)
var baseError = map[byte]byte{
'a': 'b',
'c': 'd',
'g': 'h',
't': 'v',
'r': 'y',
'y': 'r',
's': 'w',
'w': 's',
'k': 'm',
'm': 'k',
'd': 'c',
'v': 't',
'h': 'g',
'b': 'a',
}
type BytesItem []byte
func IterateOneError(kmer []byte) iter.Seq[BytesItem] {
lkmer := len(kmer)
return func(yield func(BytesItem) bool) {
for p := 0; p < lkmer; p++ {
for p < lkmer && kmer[p] == 'n' {
p++
}
if p < lkmer {
nkmer := slices.Clone(kmer)
nkmer[p] = baseError[kmer[p]]
if !yield(nkmer) {
return
}
}
}
}
}

View File

@ -8,7 +8,7 @@ import (
// corresponds to the last commit, and not the one when the file will be
// commited
var _Commit = "6a8061c"
var _Commit = "4774438"
var _Version = "Release 4.2.0"
// Version returns the version of the obitools package.

View File

@ -25,7 +25,7 @@ func (s *BioSequence) UnPair() {
}
func (s *BioSequenceSlice) IsPaired() bool {
return (*s)[0].paired != nil
return s != nil && s.Len() > 0 && (*s)[0].paired != nil
}
func (s *BioSequenceSlice) PairedWith() *BioSequenceSlice {

View File

@ -0,0 +1,520 @@
package obimicroasm
import (
"fmt"
"os"
"path"
"slices"
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiapat"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiformats"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obikmer"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obisuffix"
)
func BuildFilterOnPatternReadPairWorker(
forward, reverse string,
errormax int,
cutReads bool,
) obiseq.SeqWorker {
forwardPatternDir, err := obiapat.MakeApatPattern(forward, errormax, false)
if err != nil {
log.Fatalf("Cannot compile forward primer %s : %v", forward, err)
}
reverse_rev := obiseq.NewBioSequence("fp", []byte(reverse), "").ReverseComplement(true).String()
reveresePatternRev, err := obiapat.MakeApatPattern(reverse_rev, errormax, false)
if err != nil {
log.Fatalf("Cannot compile reverse complement reverse primer %s : %v", reverse, err)
}
matchRead := func(sequence *obiseq.BioSequence) *obiseq.BioSequence {
var aseq obiapat.ApatSequence
var err error
var read, match *obiseq.BioSequence
aseq, err = obiapat.MakeApatSequence(sequence, false)
if err != nil {
log.Fatalf("Cannot prepare apat sequence from %s : %v", sequence.Id(), err)
}
start, end, nerr, matched := forwardPatternDir.BestMatch(aseq, 0, aseq.Len())
if matched {
read = sequence
if cutReads {
read, err = sequence.Subsequence(start, sequence.Len(), false)
if err != nil {
log.Fatalf("Cannot cut, on forward, forward read %s [%d,%d] : %v",
sequence.Id(), start, sequence.Len(), err)
}
}
read.SetAttribute("forward_primer", forward)
match, _ = sequence.Subsequence(start, end, false)
read.SetAttribute("forward_match", match.String())
read.SetAttribute("forward_error", nerr)
aseq, err = obiapat.MakeApatSequence(read, false, aseq)
if err != nil {
log.Fatalf("Cannot prepare apat sequence from %s : %v", sequence.Id(), err)
}
start, end, nerr, matched = reveresePatternRev.BestMatch(aseq, 0, aseq.Len())
if matched {
frread := read
if cutReads {
frread, err = read.Subsequence(0, end, false)
if err != nil {
log.Fatalf("Cannot xxx cut, on reverse, forward read %s [%d,%d] : %v",
sequence.Id(), start, read.Len(), err)
}
}
frread.SetAttribute("reverse_primer", reverse)
match, _ = read.Subsequence(start, end, false)
frread.SetAttribute("reverse_match", match.ReverseComplement(true).String())
frread.SetAttribute("reverse_error", nerr)
read = frread
// log.Warnf("Forward-Reverse primer matched on %s : %d\n%s", read.Id(), read.Len(),
// obiformats.FormatFasta(read, obiformats.FormatFastSeqJsonHeader))
}
} else {
start, end, nerr, matched = reveresePatternRev.BestMatch(aseq, 0, aseq.Len())
if matched {
read = sequence
if cutReads {
read, err = sequence.Subsequence(0, end, false)
if err != nil {
log.Fatalf("Cannot yyy cut, on reverse, forward read %s [%d,%d] : %v",
sequence.Id(), 0, end, err)
}
}
read.SetAttribute("reverse_primer", reverse)
match, _ = read.Subsequence(start, end, false)
read.SetAttribute("reverse_match", match.ReverseComplement(true).String())
read.SetAttribute("reverse_error", nerr)
} else {
read = nil
}
}
return read
}
w := func(sequence *obiseq.BioSequence) (result obiseq.BioSequenceSlice, err error) {
result = obiseq.MakeBioSequenceSlice()
paired := sequence.PairedWith()
sequence.UnPair()
read := matchRead(sequence)
if read == nil {
sequence = sequence.ReverseComplement(true)
read = matchRead(sequence)
}
if read != nil {
result = append(result, read)
}
if paired != nil {
read = matchRead(paired)
if read == nil {
read = matchRead(paired.ReverseComplement(true))
}
if read != nil {
result = append(result, read)
}
}
return
}
return w
}
func ExtractOnPatterns(iter obiiter.IBioSequence,
forward, reverse string,
errormax int,
cutReads bool,
) obiseq.BioSequenceSlice {
matched := iter.MakeIWorker(
BuildFilterOnPatternReadPairWorker(forward, reverse, errormax, cutReads),
false,
)
rep := obiseq.MakeBioSequenceSlice()
for matched.Next() {
frgs := matched.Get()
rep = append(rep, frgs.Slice()...)
}
return rep
}
func BuildPCRProduct(seqs obiseq.BioSequenceSlice,
consensus_id string,
kmer_size int,
forward, reverse string,
backtrack bool,
save_graph bool, dirname string) (*obiseq.BioSequence, error) {
from := obiseq.NewBioSequence("forward", []byte(forward), "")
to := obiseq.NewBioSequence("reverse", []byte(CLIReversePrimer()), "").ReverseComplement(true)
if backtrack {
from, to = to, from
}
if seqs.Len() == 0 {
return nil, fmt.Errorf("no sequence provided")
}
if save_graph {
if dirname == "" {
dirname = "."
}
if stat, err := os.Stat(dirname); err != nil || !stat.IsDir() {
// path does not exist or is not directory
os.RemoveAll(dirname)
err := os.Mkdir(dirname, 0755)
if err != nil {
log.Panicf("Cannot create directory %s for saving graphs", dirname)
}
}
fasta, err := os.Create(path.Join(dirname, fmt.Sprintf("%s_consensus.fasta", consensus_id)))
if err == nil {
defer fasta.Close()
fasta.Write(obiformats.FormatFastaBatch(obiiter.MakeBioSequenceBatch(
fmt.Sprintf("%s_consensus", consensus_id),
0,
seqs,
),
obiformats.FormatFastSeqJsonHeader, false).Bytes())
fasta.Close()
}
}
log.Debugf("Number of reads : %d\n", len(seqs))
if kmer_size < 0 {
longest := make([]int, len(seqs))
for i, seq := range seqs {
s := obiseq.BioSequenceSlice{seq}
sa := obisuffix.BuildSuffixArray(&s)
longest[i] = slices.Max(sa.CommonSuffix())
}
// spectrum := map[int]int{}
// for _, s := range longest {
// spectrum[s]++
// }
// log.Warnf("spectum kmer size : %v", spectrum)
kmer_size = slices.Max(longest) + 1
log.Infof("estimated kmer size : %d", kmer_size)
}
var graph *obikmer.DeBruijnGraph
var hp []uint64
var err error
var starts []uint64
var stops []uint64
for {
graph = obikmer.MakeDeBruijnGraph(kmer_size)
for _, s := range seqs {
graph.Push(s)
}
if !backtrack {
starts = graph.Search(from, CLIAllowedMismatch())
stops = graph.BackSearch(to, CLIAllowedMismatch())
} else {
starts = graph.BackSearch(from, CLIAllowedMismatch())
stops = graph.Search(to, CLIAllowedMismatch())
}
log.Infof("Found %d starts", len(starts))
pweight := map[int]int{}
for _, s := range starts {
w := graph.Weight(s)
pweight[w]++
log.Warnf("Starts : %s (%d)\n", graph.DecodeNode(s), w)
}
log.Infof("Found %d stops", len(stops))
for _, s := range stops {
w := graph.Weight(s)
pweight[w]++
log.Warnf("Stop : %s (%d)\n", graph.DecodeNode(s), w)
}
log.Infof("Weight spectrum : %v", pweight)
wmax := 0
sw := 0
for w := range pweight {
sw += w
if w > wmax {
wmax = w
}
}
graph.FilterMinWeight(int(sw / len(pweight)))
graph.FilterMaxWeight(int(wmax * 2))
log.Infof("Minimum coverage : %d", int(sw/len(pweight)))
log.Infof("Maximum coverage : %d", int(wmax*2))
if !graph.HasCycleInDegree() {
break
}
kmer_size++
if kmer_size > 31 {
break
}
SetKmerSize(kmer_size)
log.Warnf("Cycle detected, increasing kmer size to %d\n", kmer_size)
}
if !backtrack {
starts = graph.Search(from, CLIAllowedMismatch())
stops = graph.BackSearch(to, CLIAllowedMismatch())
} else {
starts = graph.BackSearch(from, CLIAllowedMismatch())
stops = graph.Search(to, CLIAllowedMismatch())
}
hp, err = graph.HaviestPath(starts, stops, backtrack)
log.Debugf("Graph size : %d\n", graph.Len())
maxw := graph.MaxWeight()
modew := graph.WeightMode()
meanw := graph.WeightMean()
specw := graph.WeightSpectrum()
kmer := graph.KmerSize()
log.Warnf("Weigh mode: %d Weigth mean : %4.1f Weigth max : %d, kmer = %d", modew, meanw, maxw, kmer)
log.Warn(specw)
if save_graph {
file, err := os.Create(path.Join(dirname,
fmt.Sprintf("%s_consensus.gml", consensus_id)))
if err != nil {
fmt.Println(err)
} else {
file.WriteString(graph.Gml())
file.Close()
}
}
if err == nil {
s := graph.DecodePath(hp)
seq := obiseq.NewBioSequence(consensus_id, []byte(s), "")
total_kmer := graph.Len()
sumCount := 0
if seq != nil {
for _, s := range seqs {
sumCount += s.Count()
}
seq.SetAttribute("obiconsensus_consensus", true)
seq.SetAttribute("obiconsensus_weight", sumCount)
seq.SetAttribute("obiconsensus_seq_length", seq.Len())
seq.SetAttribute("obiconsensus_kmer_size", kmer_size)
seq.SetAttribute("obiconsensus_kmer_max_occur", graph.MaxWeight())
seq.SetAttribute("obiconsensus_filtered_graph_size", graph.Len())
seq.SetAttribute("obiconsensus_full_graph_size", total_kmer)
}
log.Warnf("Consensus sequence : \n%s", obiformats.FormatFasta(seq, obiformats.FormatFastSeqJsonHeader))
return seq, nil
}
return nil, err
}
func CLIAssemblePCR() *obiseq.BioSequence {
pairs, err := CLIPairedSequence()
if err != nil {
log.Errorf("Cannot open file (%v)", err)
os.Exit(1)
}
matched := ExtractOnPatterns(pairs,
CLIForwardPrimer(),
CLIReversePrimer(),
CLIAllowedMismatch(),
true,
)
seq, err := BuildPCRProduct(
matched,
CLIGraphFilesDirectory(),
CLIKmerSize(),
CLIForwardPrimer(),
CLIReversePrimer(),
false,
CLISaveGraphToFiles(),
CLIGraphFilesDirectory())
if err != nil {
log.Fatalf("Cannot build the consensus sequence : %v", err)
}
forwardPatternDir, err := obiapat.MakeApatPattern(
CLIForwardPrimer(),
CLIAllowedMismatch(),
false)
if err != nil {
log.Fatalf("Cannot compile forward primer %s : %v", CLIForwardPrimer(), err)
}
reverse_rev := obiseq.NewBioSequence("fp", []byte(CLIReversePrimer()), "").ReverseComplement(true).String()
reveresePatternRev, err := obiapat.MakeApatPattern(reverse_rev, CLIAllowedMismatch(), false)
if err != nil {
log.Fatalf("Cannot compile reverse complement reverse primer %s : %v", CLIReversePrimer(), err)
}
aseq, err := obiapat.MakeApatSequence(seq, false)
if err != nil {
log.Fatalf("Cannot build apat sequence: %v", err)
}
fstart, fend, fnerr, hasfw := forwardPatternDir.BestMatch(aseq, 0, aseq.Len())
rstart, rend, rnerr, hasrev := reveresePatternRev.BestMatch(aseq, 0, aseq.Len())
for hasfw && !hasrev {
var rseq *obiseq.BioSequence
rseq, err = BuildPCRProduct(
matched,
CLIGraphFilesDirectory(),
CLIKmerSize(),
CLIForwardPrimer(),
CLIReversePrimer(),
true,
CLISaveGraphToFiles(),
CLIGraphFilesDirectory())
if err != nil {
log.Fatalf("Cannot build Reverse PCR sequence: %v", err)
}
kmerSize, _ := seq.GetIntAttribute("obiconsensus_kmer_size")
fp, _ := seq.Subsequence(seq.Len()-kmerSize, seq.Len(), false)
rp, _ := rseq.Subsequence(0, kmerSize, false)
rp = rp.ReverseComplement(true)
pairs, err := CLIPairedSequence()
if err != nil {
log.Errorf("Cannot open file (%v)", err)
os.Exit(1)
}
nmatched := ExtractOnPatterns(pairs,
fp.String(),
rp.String(),
CLIAllowedMismatch(),
true,
)
in := map[string]bool{}
for _, s := range matched {
in[s.String()] = true
}
for _, s := range nmatched {
if !in[s.String()] {
matched = append(matched, s)
}
}
seq, err = BuildPCRProduct(
matched,
CLIGraphFilesDirectory(),
CLIKmerSize(),
CLIForwardPrimer(),
CLIReversePrimer(),
false,
CLISaveGraphToFiles(),
CLIGraphFilesDirectory())
aseq, err := obiapat.MakeApatSequence(seq, false)
if err != nil {
log.Fatalf("Cannot build apat sequence: %v", err)
}
fstart, fend, fnerr, hasfw = forwardPatternDir.BestMatch(aseq, 0, aseq.Len())
rstart, rend, rnerr, hasrev = reveresePatternRev.BestMatch(aseq, 0, aseq.Len())
}
marker, _ := seq.Subsequence(fstart, rend, false)
marker.SetAttribute("forward_primer", CLIForwardPrimer())
match, _ := seq.Subsequence(fstart, fend, false)
marker.SetAttribute("forward_match", match.String())
marker.SetAttribute("forward_error", fnerr)
marker.SetAttribute("reverse_primer", CLIReversePrimer())
match, _ = seq.Subsequence(rstart, rend, false)
marker.SetAttribute("reverse_match", match.ReverseComplement(true).String())
marker.SetAttribute("reverse_error", rnerr)
return marker
}

View File

@ -0,0 +1,139 @@
package obimicroasm
import (
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiapat"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
"github.com/DavidGamba/go-getoptions"
log "github.com/sirupsen/logrus"
)
var _ForwardFile = ""
var _ReverseFile = ""
var _ForwardPrimer string
var _ReversePrimer string
var _AllowedMismatch = 0
var _kmerSize = -1
var _saveGraph = "__@@NOSAVE@@__"
func MicroAsmOptionSet(options *getoptions.GetOpt) {
options.StringVar(&_ForwardFile, "forward-reads", "",
options.Alias("F"),
options.ArgName("FILENAME_F"),
options.Required("You must provide at a forward file"),
options.Description("The file names containing the forward reads"))
options.StringVar(&_ReverseFile, "reverse-reads", "",
options.Alias("R"),
options.ArgName("FILENAME_R"),
options.Required("You must provide a reverse file"),
options.Description("The file names containing the reverse reads"))
options.StringVar(&_ForwardPrimer, "forward", "",
options.Required("You must provide a forward primer"),
options.Description("The forward primer used for the electronic PCR."))
options.StringVar(&_ReversePrimer, "reverse", "",
options.Required("You must provide a reverse primer"),
options.Description("The reverse primer used for the electronic PCR."))
options.IntVar(&_AllowedMismatch, "allowed-mismatches", 0,
options.Alias("e"),
options.Description("Maximum number of mismatches allowed for each primer."))
options.IntVar(&_kmerSize, "kmer-size", _kmerSize,
options.ArgName("SIZE"),
options.Description("The size of the kmer used to build the consensus. "+
"Default value = -1, which means that the kmer size is estimated from the data"),
)
options.StringVar(&_saveGraph, "save-graph", _saveGraph,
options.Description("Creates a directory containing the set of DAG used by the obiclean clustering algorithm. "+
"The graph files follow the graphml format."),
)
}
func OptionSet(options *getoptions.GetOpt) {
obiconvert.OptionSet(options)
MicroAsmOptionSet(options)
}
// CLIForwardPrimer returns the sequence of the forward primer as indicated by the
// --forward command line option
func CLIForwardPrimer() string {
pattern, err := obiapat.MakeApatPattern(_ForwardPrimer, _AllowedMismatch, false)
if err != nil {
log.Fatalf("%+v", err)
}
pattern.Free()
return _ForwardPrimer
}
// CLIReversePrimer returns the sequence of the reverse primer as indicated by the
// --reverse command line option
func CLIReversePrimer() string {
pattern, err := obiapat.MakeApatPattern(_ReversePrimer, _AllowedMismatch, false)
if err != nil {
log.Fatalf("%+v", err)
}
pattern.Free()
return _ReversePrimer
}
// CLIAllowedMismatch returns the allowed mistmatch count between each
// primer and the sequences as indicated by the
// --allowed-mismatches|-e command line option
func CLIAllowedMismatch() int {
return _AllowedMismatch
}
func CLIPairedSequence() (obiiter.IBioSequence, error) {
forward, err := obiconvert.CLIReadBioSequences(_ForwardFile)
if err != nil {
return obiiter.NilIBioSequence, err
}
reverse, err := obiconvert.CLIReadBioSequences(_ReverseFile)
if err != nil {
return obiiter.NilIBioSequence, err
}
paired := forward.PairTo(reverse)
return paired, nil
}
func CLIForwardFile() string {
return _ForwardFile
}
// Returns true it the obliclean graphs must be saved
func CLISaveGraphToFiles() bool {
return _saveGraph != "__@@NOSAVE@@__"
}
// It returns the directory where the graph files are saved
func CLIGraphFilesDirectory() string {
return _saveGraph
}
// CLIKmerSize returns the value of the kmer size to use for building the consensus.
//
// The value of the kmer size is set by the user with the `-k` flag.
// The value -1 means that the kmer size is estimated as the minimum value that
// insure that no kmer are present more than one time in a sequence.
//
// No parameters.
// Returns an integer value.
func CLIKmerSize() int {
return _kmerSize
}
func SetKmerSize(kmerSize int) {
_kmerSize = kmerSize
}

20
pkg/obiutils/unique.go Normal file
View File

@ -0,0 +1,20 @@
package obiutils
// Unique returns a new slice containing only unique values from the input slice.
// The order of elements in the output slice is not guaranteed to match the input order.
//
// Parameters:
// - slice: The input slice containing potentially duplicate values
//
// Returns:
// - A new slice containing only unique values
func Unique[T comparable](slice []T) []T {
// Create a map to track unique values
seen := Set[T]{}
for _, v := range slice {
seen.Add(v)
}
return seen.Members()
}