diff --git a/pkg/obikmer/debruijn.go b/pkg/obikmer/debruijn.go index ac41dbd..4017d0d 100644 --- a/pkg/obikmer/debruijn.go +++ b/pkg/obikmer/debruijn.go @@ -3,6 +3,7 @@ package obikmer import ( "bytes" "fmt" + "log" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" ) @@ -166,9 +167,10 @@ func (g *DeBruijnGraph) MaxNext(index uint64) (uint64, bool) { max := uint(0) rep := uint64(0) for _, idx := range ns { - w, _ := g.graph[idx] + w := g.graph[idx] if w > max { rep = idx + max = w } } @@ -191,33 +193,34 @@ func (g *DeBruijnGraph) MaxPath() []uint64 { } func (g *DeBruijnGraph) LongestPath() []uint64 { - var path []uint64 + var path []uint64 wmax := uint(0) ok := true - starts:= g.Heads() - - for _,idx := range starts { + starts := g.Heads() + log.Println(starts) + for _, idx := range starts { lp := make([]uint64, 0, 1000) + ok = true w := uint(0) for ok { - nw:= g.graph[idx] - w+=nw + nw := g.graph[idx] + w += nw lp = append(lp, idx) idx, ok = g.MaxNext(idx) } - + + log.Printf("max : %d current %d", wmax, w) if w > wmax { - path=lp - wmax=w + path = lp + wmax = w } } - return path } -func (g *DeBruijnGraph) LongestConsensus(id string) (*obiseq.BioSequence,error) { +func (g *DeBruijnGraph) LongestConsensus(id string) (*obiseq.BioSequence, error) { path := g.LongestPath() s := g.DecodePath(path) @@ -228,13 +231,12 @@ func (g *DeBruijnGraph) LongestConsensus(id string) (*obiseq.BioSequence,error) "", ) - return &seq,nil + return &seq, nil } - return nil,fmt.Errorf("cannot identify optimum path") + return nil, fmt.Errorf("cannot identify optimum path") } - func (g *DeBruijnGraph) Heads() []uint64 { rep := make([]uint64, 0, 10) @@ -281,13 +283,13 @@ func (g *DeBruijnGraph) DecodePath(path []uint64) string { for _, idx := range path[1:] { buf.WriteByte(decode[idx&3]) - } + } } return buf.String() } -func (g *DeBruijnGraph) BestConsensus(id string) (*obiseq.BioSequence,error) { +func (g *DeBruijnGraph) BestConsensus(id string) (*obiseq.BioSequence, error) { path := g.MaxPath() s := g.DecodePath(path) @@ -298,10 +300,10 @@ func (g *DeBruijnGraph) BestConsensus(id string) (*obiseq.BioSequence,error) { "", ) - return &seq,nil + return &seq, nil } - return nil,fmt.Errorf("cannot identify optimum path") + return nil, fmt.Errorf("cannot identify optimum path") } func (g *DeBruijnGraph) Weight(index uint64) int { @@ -374,3 +376,51 @@ func (graph *DeBruijnGraph) Push(sequence *obiseq.BioSequence) { } } + +func (graph *DeBruijnGraph) GML() string { + buffer := bytes.NewBuffer(make([]byte, 0, 1000)) + + buffer.WriteString( + `graph [ + comment "De Bruijn graph" + directed 1 + + `) + + for idx := range graph.graph { + node := graph.DecodeNode(idx) + buffer.WriteString( + fmt.Sprintf("node [ id \"%s\" ]\n", node), + ) + n := graph.Nexts(uint64(idx)) + if len(n) == 0 { + idx <<= 2 + idx &= graph.kmermask + node := graph.DecodeNode(idx) + buffer.WriteString( + fmt.Sprintf("node [ id \"%s\" \n label \"%s\" ]\n", node, node), + ) + } + } + + for idx, weight := range graph.graph { + src := graph.DecodeNode(idx) + label := decode[idx&3] + idx <<= 2 + idx &= graph.kmermask + dst := graph.DecodeNode(idx) + + buffer.WriteString( + fmt.Sprintf(`edge [ source "%s" + target "%s" + color "#00FF00" + label "%c[%d]" + ] + + `, src, dst, label, weight), + ) + } + buffer.WriteString("]\n") + return buffer.String() + +} diff --git a/pkg/obitools/obiconsensus/obiconsensus.go b/pkg/obitools/obiconsensus/obiconsensus.go index de21d7d..c000fc0 100644 --- a/pkg/obitools/obiconsensus/obiconsensus.go +++ b/pkg/obitools/obiconsensus/obiconsensus.go @@ -41,7 +41,7 @@ func BuildConsensus(seqs obiseq.BioSequenceSlice, quorum float64) (*obiseq.BioSe spectrum := graph.LinkSpectrum() cum := make(map[int]int) - spectrum[1]=0 + spectrum[1] = 0 for i := 2; i < len(spectrum); i++ { spectrum[i] += spectrum[i-1] cum[spectrum[i]]++ @@ -63,26 +63,36 @@ func BuildConsensus(seqs obiseq.BioSequenceSlice, quorum float64) (*obiseq.BioSe break } } - + threshold /= 2 graph.FilterMin(threshold) log.Printf("Graph size : %d\n", graph.Len()) - seq,err := graph.LongestConsensus(seqs[0].Source()) + // file, err := os.Create( + // fmt.Sprintf("%s.gml", seqs[0].Source())) + + // if err != nil { + // fmt.Println(err) + // } else { + // file.WriteString(graph.GML()) + // file.Close() + // } + + seq, err := graph.LongestConsensus(seqs[0].Source()) seq.SetCount(len(seqs)) - seq.SetAttribute("seq_length",seq.Len()) - seq.SetAttribute("kmer_size",kmersize) - seq.SetAttribute("kmer_min_occur",threshold) - seq.SetAttribute("kmer_max_occur",graph.MaxLink()) - seq.SetAttribute("filtered_graph_size",graph.Len()) - seq.SetAttribute("full_graph_size",total_kmer) + seq.SetAttribute("seq_length", seq.Len()) + seq.SetAttribute("kmer_size", kmersize) + seq.SetAttribute("kmer_min_occur", threshold) + seq.SetAttribute("kmer_max_occur", graph.MaxLink()) + seq.SetAttribute("filtered_graph_size", graph.Len()) + seq.SetAttribute("full_graph_size", total_kmer) - return seq,err + return seq, err } func Consensus(iterator obiiter.IBioSequence, quorum float64) obiiter.IBioSequence { newIter := obiiter.MakeIBioSequence() - size:=10 + size := 10 newIter.Add(1) @@ -97,7 +107,7 @@ func Consensus(iterator obiiter.IBioSequence, quorum float64) obiiter.IBioSequen for iterator.Next() { seqs := iterator.Get() - consensus,err := BuildConsensus(seqs.Slice(),quorum) + consensus, err := BuildConsensus(seqs.Slice(), quorum) if err == nil { buffer = append(buffer, consensus)