debug of obimultiplex

Former-commit-id: 1cf276840feb9d6135d96bd1bf63627d3085ae6e
This commit is contained in:
Eric Coissac
2024-06-18 23:38:33 +02:00
parent 44533fb61e
commit a812fe8b72
9 changed files with 468 additions and 50 deletions

View File

@ -52,10 +52,12 @@ func _samenuc(a, b byte) bool {
// Returns:
// - The score of the LCS.
// - The length of the LCS.
func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[]uint64) (int, int) {
func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[]uint64) (int, int, int) {
lA := len(bA)
lB := len(bB)
end := 0
pend := 0
// Ensure that A is the longest
if lA < lB {
@ -75,7 +77,7 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
// The difference of length is larger the maximum allowed errors
if delta > maxError {
return -1, -1
return -1, -1, -1
}
// // BEGINNING OF DEBUG CODE //
@ -121,7 +123,7 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
}
previous[extra+even-1] = encodeValues(0, 1, false) // Initialise cell 1,0
N := lB + ((delta) >> 1)
N := lB + (delta >> 1)
// log.Debugln("N = ", N, " delta = ", delta, " extra = ", extra, " maxError = ", maxError)
@ -147,6 +149,7 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
switch {
case i == 0:
// We are setting the gaps of the first row
Sup = _notavail
Sdiag = _notavail
if endgapfree {
@ -155,10 +158,12 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
Sleft = encodeValues(0, j, false)
}
case j == 0:
// We are setting the gaps of the first column
Sup = encodeValues(0, i, false)
Sdiag = _notavail
Sleft = _notavail
default:
// We are in the middle of the matrix
Sdiag = _incpath(previous[x])
if _samenuc(bA[j-1], bB[i-1]) {
Sdiag = _incscore(Sdiag)
@ -187,6 +192,15 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
score = Sup
default:
score = Sleft
if endgapfree && i == lB {
_, l, o := decodeValues(Sleft)
if l > pend && o == false {
pend = l
end = j
}
}
}
// I supose the bug was here
@ -271,6 +285,14 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
score = Sup
default:
score = Sleft
if endgapfree && i == lB {
_, l, o := decodeValues(Sleft)
if l > pend && o == false {
pend = l
end = j
}
}
}
// I supose the bug was here
@ -331,10 +353,10 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
// // end OF DEBUG CODE //
if o {
return -1, -1
return -1, -1, -1
}
return s, l
return s, l, end
}
// FastLCSEGFScore calculates the score of the longest common subsequence between two bio sequences in end-gap-free mode.
@ -356,7 +378,7 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
// Returns:
// - The score of the longest common subsequence.
// - The length of the shortest alignment corresponding to the LCS.
func FastLCSEGFScore(seqA, seqB *obiseq.BioSequence, maxError int, buffer *[]uint64) (int, int) {
func FastLCSEGFScore(seqA, seqB *obiseq.BioSequence, maxError int, buffer *[]uint64) (int, int, int) {
return FastLCSEGFScoreByte(seqA.Sequence(), seqB.Sequence(), maxError, true, buffer)
}
@ -380,5 +402,7 @@ func FastLCSEGFScore(seqA, seqB *obiseq.BioSequence, maxError int, buffer *[]uin
// - The score of the longest common subsequence.
// - The length of the shortest alignment corresponding to the LCS.
func FastLCSScore(seqA, seqB *obiseq.BioSequence, maxError int, buffer *[]uint64) (int, int) {
return FastLCSEGFScoreByte(seqA.Sequence(), seqB.Sequence(), maxError, false, buffer)
score, alilen, _ := FastLCSEGFScoreByte(seqA.Sequence(), seqB.Sequence(), maxError, false, buffer)
return score, alilen
}

View File

@ -0,0 +1,104 @@
package obialign
func buffIndex(i, j, width int) int {
return (i+1)*width + (j + 1)
}
func LocatePattern(pattern, sequence []byte) (int, int, int) {
width := len(pattern) + 1
buffsize := (len(pattern) + 1) * (len(sequence) + 1)
buffer := make([]int, buffsize)
path := make([]int, buffsize)
for j := 0; j < len(pattern); j++ {
idx := buffIndex(-1, j, width)
buffer[idx] = -j - 1
path[idx] = -1
}
for i := -1; i < len(sequence); i++ {
idx := buffIndex(i, -1, width)
buffer[idx] = 0
path[idx] = +1
}
path[0] = 0
jmax := len(pattern) - 1
for i := 0; i < len(sequence); i++ {
for j := 0; j < jmax; j++ {
match := -1
if _samenuc(pattern[j], sequence[i]) {
match = 0
}
idx := buffIndex(i, j, width)
diag := buffer[buffIndex(i-1, j-1, width)] + match
left := buffer[buffIndex(i, j-1, width)] - 1
up := buffer[buffIndex(i-1, j, width)] - 1
score := max(diag, up, left)
buffer[idx] = score
switch {
case score == left:
path[idx] = -1
case score == diag:
path[idx] = 0
case score == up:
path[idx] = +1
}
}
}
for i := 0; i < len(sequence); i++ {
idx := buffIndex(i, jmax, width)
match := -1
if _samenuc(pattern[jmax], sequence[i]) {
match = 0
}
diag := buffer[buffIndex(i-1, jmax-1, width)] + match
left := buffer[buffIndex(i, jmax-1, width)] - 1
up := buffer[buffIndex(i-1, jmax, width)]
score := max(diag, up, left)
buffer[idx] = score
switch {
case score == left:
path[idx] = -1
case score == diag:
path[idx] = 0
case score == up:
path[idx] = +1
}
}
i := len(sequence) - 1
j := jmax
end := -1
lali := 0
for i > -1 && j > 0 {
lali++
switch path[buffIndex(i, j, width)] {
case 0:
j--
if end == -1 {
end = i
lali = 1
}
i--
case 1:
i--
case -1:
j--
if end == -1 {
end = i
lali = 1
}
}
}
return i, end + 1, -buffer[buffIndex(len(sequence)-1, len(pattern)-1, width)]
}