Make obiconsensus using the count of the sequences

Former-commit-id: 7fc5292aeb225843a86cd85591a5405e35125e3d
This commit is contained in:
Eric Coissac
2024-04-03 12:58:32 +02:00
parent d68210ef94
commit 3d1d9f32df
3 changed files with 19 additions and 8 deletions

View File

@ -64,7 +64,7 @@ func MakeOptions(setters []WithOption) Options {
csv_keys: make([]string, 0),
csv_auto: false,
paired_filename: "",
source: "",
source: "unknown",
with_feature_table: false,
}

View File

@ -315,7 +315,7 @@ func (g *DeBruijnGraph) Weight(index uint64) int {
return int(val)
}
func (graph *DeBruijnGraph) append(sequence []byte, current uint64) {
func (graph *DeBruijnGraph) append(sequence []byte, current uint64, weight int) {
for i := 0; i < len(sequence); i++ {
current <<= 2
@ -323,14 +323,14 @@ func (graph *DeBruijnGraph) append(sequence []byte, current uint64) {
b := iupac[sequence[i]]
if len(b) == 1 {
current |= b[0]
graph.graph[current] = uint(graph.Weight(current) + 1)
graph.graph[current] = uint(graph.Weight(current) + weight)
} else {
for j := 0; j < len(b); j++ {
current &= ^uint64(3)
current |= b[j]
graph.graph[current] = uint(graph.Weight(current) + 1)
graph.append(sequence[(i+1):], current)
graph.graph[current] = uint(graph.Weight(current) + weight)
graph.append(sequence[(i+1):], current, weight)
}
return
}
@ -341,6 +341,7 @@ func (graph *DeBruijnGraph) append(sequence []byte, current uint64) {
func (graph *DeBruijnGraph) Push(sequence *obiseq.BioSequence) {
key := uint64(0)
s := sequence.Sequence()
w := sequence.Count()
init := make([]uint64, 0, 16)
var f func(start int, key uint64)
f = func(start int, key uint64) {
@ -365,7 +366,7 @@ func (graph *DeBruijnGraph) Push(sequence *obiseq.BioSequence) {
f(0, key)
for _, idx := range init {
graph.append(s[graph.kmersize:], idx)
graph.append(s[graph.kmersize:], idx, w)
}
}
}

View File

@ -99,9 +99,19 @@ func BuildConsensus(seqs obiseq.BioSequenceSlice,
}
}
seq, err := graph.LongestConsensus(seqs[0].Source())
id := seqs[0].Source()
if id == "" {
id = seqs[0].Id()
}
seq, err := graph.LongestConsensus(id)
seq.SetCount(len(seqs))
sumCount := 0
for _, s := range seqs {
sumCount += s.Count()
}
seq.SetCount(sumCount)
seq.SetAttribute("seq_length", seq.Len())
seq.SetAttribute("kmer_size", kmer_size)
seq.SetAttribute("kmer_min_occur", threshold)