mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
few bug in the graph algorithm
Former-commit-id: b61bdd9f671e2f5e90d32c1beac1ed84efa5e05c
This commit is contained in:
@ -3,6 +3,7 @@ package obikmer
|
|||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"log"
|
||||||
|
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
)
|
)
|
||||||
@ -166,9 +167,10 @@ func (g *DeBruijnGraph) MaxNext(index uint64) (uint64, bool) {
|
|||||||
max := uint(0)
|
max := uint(0)
|
||||||
rep := uint64(0)
|
rep := uint64(0)
|
||||||
for _, idx := range ns {
|
for _, idx := range ns {
|
||||||
w, _ := g.graph[idx]
|
w := g.graph[idx]
|
||||||
if w > max {
|
if w > max {
|
||||||
rep = idx
|
rep = idx
|
||||||
|
max = w
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -191,33 +193,34 @@ func (g *DeBruijnGraph) MaxPath() []uint64 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (g *DeBruijnGraph) LongestPath() []uint64 {
|
func (g *DeBruijnGraph) LongestPath() []uint64 {
|
||||||
var path []uint64
|
var path []uint64
|
||||||
wmax := uint(0)
|
wmax := uint(0)
|
||||||
ok := true
|
ok := true
|
||||||
|
|
||||||
starts:= g.Heads()
|
starts := g.Heads()
|
||||||
|
log.Println(starts)
|
||||||
for _,idx := range starts {
|
for _, idx := range starts {
|
||||||
lp := make([]uint64, 0, 1000)
|
lp := make([]uint64, 0, 1000)
|
||||||
|
ok = true
|
||||||
w := uint(0)
|
w := uint(0)
|
||||||
for ok {
|
for ok {
|
||||||
nw:= g.graph[idx]
|
nw := g.graph[idx]
|
||||||
w+=nw
|
w += nw
|
||||||
lp = append(lp, idx)
|
lp = append(lp, idx)
|
||||||
idx, ok = g.MaxNext(idx)
|
idx, ok = g.MaxNext(idx)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log.Printf("max : %d current %d", wmax, w)
|
||||||
if w > wmax {
|
if w > wmax {
|
||||||
path=lp
|
path = lp
|
||||||
wmax=w
|
wmax = w
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
return path
|
return path
|
||||||
}
|
}
|
||||||
|
|
||||||
func (g *DeBruijnGraph) LongestConsensus(id string) (*obiseq.BioSequence,error) {
|
func (g *DeBruijnGraph) LongestConsensus(id string) (*obiseq.BioSequence, error) {
|
||||||
path := g.LongestPath()
|
path := g.LongestPath()
|
||||||
s := g.DecodePath(path)
|
s := g.DecodePath(path)
|
||||||
|
|
||||||
@ -228,13 +231,12 @@ func (g *DeBruijnGraph) LongestConsensus(id string) (*obiseq.BioSequence,error)
|
|||||||
"",
|
"",
|
||||||
)
|
)
|
||||||
|
|
||||||
return &seq,nil
|
return &seq, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil,fmt.Errorf("cannot identify optimum path")
|
return nil, fmt.Errorf("cannot identify optimum path")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
func (g *DeBruijnGraph) Heads() []uint64 {
|
func (g *DeBruijnGraph) Heads() []uint64 {
|
||||||
rep := make([]uint64, 0, 10)
|
rep := make([]uint64, 0, 10)
|
||||||
|
|
||||||
@ -281,13 +283,13 @@ func (g *DeBruijnGraph) DecodePath(path []uint64) string {
|
|||||||
|
|
||||||
for _, idx := range path[1:] {
|
for _, idx := range path[1:] {
|
||||||
buf.WriteByte(decode[idx&3])
|
buf.WriteByte(decode[idx&3])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return buf.String()
|
return buf.String()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (g *DeBruijnGraph) BestConsensus(id string) (*obiseq.BioSequence,error) {
|
func (g *DeBruijnGraph) BestConsensus(id string) (*obiseq.BioSequence, error) {
|
||||||
path := g.MaxPath()
|
path := g.MaxPath()
|
||||||
s := g.DecodePath(path)
|
s := g.DecodePath(path)
|
||||||
|
|
||||||
@ -298,10 +300,10 @@ func (g *DeBruijnGraph) BestConsensus(id string) (*obiseq.BioSequence,error) {
|
|||||||
"",
|
"",
|
||||||
)
|
)
|
||||||
|
|
||||||
return &seq,nil
|
return &seq, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil,fmt.Errorf("cannot identify optimum path")
|
return nil, fmt.Errorf("cannot identify optimum path")
|
||||||
}
|
}
|
||||||
|
|
||||||
func (g *DeBruijnGraph) Weight(index uint64) int {
|
func (g *DeBruijnGraph) Weight(index uint64) int {
|
||||||
@ -374,3 +376,51 @@ func (graph *DeBruijnGraph) Push(sequence *obiseq.BioSequence) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (graph *DeBruijnGraph) GML() string {
|
||||||
|
buffer := bytes.NewBuffer(make([]byte, 0, 1000))
|
||||||
|
|
||||||
|
buffer.WriteString(
|
||||||
|
`graph [
|
||||||
|
comment "De Bruijn graph"
|
||||||
|
directed 1
|
||||||
|
|
||||||
|
`)
|
||||||
|
|
||||||
|
for idx := range graph.graph {
|
||||||
|
node := graph.DecodeNode(idx)
|
||||||
|
buffer.WriteString(
|
||||||
|
fmt.Sprintf("node [ id \"%s\" ]\n", node),
|
||||||
|
)
|
||||||
|
n := graph.Nexts(uint64(idx))
|
||||||
|
if len(n) == 0 {
|
||||||
|
idx <<= 2
|
||||||
|
idx &= graph.kmermask
|
||||||
|
node := graph.DecodeNode(idx)
|
||||||
|
buffer.WriteString(
|
||||||
|
fmt.Sprintf("node [ id \"%s\" \n label \"%s\" ]\n", node, node),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for idx, weight := range graph.graph {
|
||||||
|
src := graph.DecodeNode(idx)
|
||||||
|
label := decode[idx&3]
|
||||||
|
idx <<= 2
|
||||||
|
idx &= graph.kmermask
|
||||||
|
dst := graph.DecodeNode(idx)
|
||||||
|
|
||||||
|
buffer.WriteString(
|
||||||
|
fmt.Sprintf(`edge [ source "%s"
|
||||||
|
target "%s"
|
||||||
|
color "#00FF00"
|
||||||
|
label "%c[%d]"
|
||||||
|
]
|
||||||
|
|
||||||
|
`, src, dst, label, weight),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
buffer.WriteString("]\n")
|
||||||
|
return buffer.String()
|
||||||
|
|
||||||
|
}
|
||||||
|
@ -41,7 +41,7 @@ func BuildConsensus(seqs obiseq.BioSequenceSlice, quorum float64) (*obiseq.BioSe
|
|||||||
spectrum := graph.LinkSpectrum()
|
spectrum := graph.LinkSpectrum()
|
||||||
cum := make(map[int]int)
|
cum := make(map[int]int)
|
||||||
|
|
||||||
spectrum[1]=0
|
spectrum[1] = 0
|
||||||
for i := 2; i < len(spectrum); i++ {
|
for i := 2; i < len(spectrum); i++ {
|
||||||
spectrum[i] += spectrum[i-1]
|
spectrum[i] += spectrum[i-1]
|
||||||
cum[spectrum[i]]++
|
cum[spectrum[i]]++
|
||||||
@ -63,26 +63,36 @@ func BuildConsensus(seqs obiseq.BioSequenceSlice, quorum float64) (*obiseq.BioSe
|
|||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
threshold /= 2
|
||||||
graph.FilterMin(threshold)
|
graph.FilterMin(threshold)
|
||||||
log.Printf("Graph size : %d\n", graph.Len())
|
log.Printf("Graph size : %d\n", graph.Len())
|
||||||
|
|
||||||
seq,err := graph.LongestConsensus(seqs[0].Source())
|
// file, err := os.Create(
|
||||||
|
// fmt.Sprintf("%s.gml", seqs[0].Source()))
|
||||||
|
|
||||||
|
// if err != nil {
|
||||||
|
// fmt.Println(err)
|
||||||
|
// } else {
|
||||||
|
// file.WriteString(graph.GML())
|
||||||
|
// file.Close()
|
||||||
|
// }
|
||||||
|
|
||||||
|
seq, err := graph.LongestConsensus(seqs[0].Source())
|
||||||
|
|
||||||
seq.SetCount(len(seqs))
|
seq.SetCount(len(seqs))
|
||||||
seq.SetAttribute("seq_length",seq.Len())
|
seq.SetAttribute("seq_length", seq.Len())
|
||||||
seq.SetAttribute("kmer_size",kmersize)
|
seq.SetAttribute("kmer_size", kmersize)
|
||||||
seq.SetAttribute("kmer_min_occur",threshold)
|
seq.SetAttribute("kmer_min_occur", threshold)
|
||||||
seq.SetAttribute("kmer_max_occur",graph.MaxLink())
|
seq.SetAttribute("kmer_max_occur", graph.MaxLink())
|
||||||
seq.SetAttribute("filtered_graph_size",graph.Len())
|
seq.SetAttribute("filtered_graph_size", graph.Len())
|
||||||
seq.SetAttribute("full_graph_size",total_kmer)
|
seq.SetAttribute("full_graph_size", total_kmer)
|
||||||
|
|
||||||
return seq,err
|
return seq, err
|
||||||
}
|
}
|
||||||
|
|
||||||
func Consensus(iterator obiiter.IBioSequence, quorum float64) obiiter.IBioSequence {
|
func Consensus(iterator obiiter.IBioSequence, quorum float64) obiiter.IBioSequence {
|
||||||
newIter := obiiter.MakeIBioSequence()
|
newIter := obiiter.MakeIBioSequence()
|
||||||
size:=10
|
size := 10
|
||||||
|
|
||||||
newIter.Add(1)
|
newIter.Add(1)
|
||||||
|
|
||||||
@ -97,7 +107,7 @@ func Consensus(iterator obiiter.IBioSequence, quorum float64) obiiter.IBioSequen
|
|||||||
|
|
||||||
for iterator.Next() {
|
for iterator.Next() {
|
||||||
seqs := iterator.Get()
|
seqs := iterator.Get()
|
||||||
consensus,err := BuildConsensus(seqs.Slice(),quorum)
|
consensus, err := BuildConsensus(seqs.Slice(), quorum)
|
||||||
|
|
||||||
if err == nil {
|
if err == nil {
|
||||||
buffer = append(buffer, consensus)
|
buffer = append(buffer, consensus)
|
||||||
|
Reference in New Issue
Block a user