mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
debug of obimultiplex
Former-commit-id: 1cf276840feb9d6135d96bd1bf63627d3085ae6e
This commit is contained in:
@ -52,10 +52,12 @@ func _samenuc(a, b byte) bool {
|
||||
// Returns:
|
||||
// - The score of the LCS.
|
||||
// - The length of the LCS.
|
||||
func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[]uint64) (int, int) {
|
||||
func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[]uint64) (int, int, int) {
|
||||
|
||||
lA := len(bA)
|
||||
lB := len(bB)
|
||||
end := 0
|
||||
pend := 0
|
||||
|
||||
// Ensure that A is the longest
|
||||
if lA < lB {
|
||||
@ -75,7 +77,7 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
|
||||
|
||||
// The difference of length is larger the maximum allowed errors
|
||||
if delta > maxError {
|
||||
return -1, -1
|
||||
return -1, -1, -1
|
||||
}
|
||||
|
||||
// // BEGINNING OF DEBUG CODE //
|
||||
@ -121,7 +123,7 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
|
||||
}
|
||||
previous[extra+even-1] = encodeValues(0, 1, false) // Initialise cell 1,0
|
||||
|
||||
N := lB + ((delta) >> 1)
|
||||
N := lB + (delta >> 1)
|
||||
|
||||
// log.Debugln("N = ", N, " delta = ", delta, " extra = ", extra, " maxError = ", maxError)
|
||||
|
||||
@ -147,6 +149,7 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
|
||||
switch {
|
||||
|
||||
case i == 0:
|
||||
// We are setting the gaps of the first row
|
||||
Sup = _notavail
|
||||
Sdiag = _notavail
|
||||
if endgapfree {
|
||||
@ -155,10 +158,12 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
|
||||
Sleft = encodeValues(0, j, false)
|
||||
}
|
||||
case j == 0:
|
||||
// We are setting the gaps of the first column
|
||||
Sup = encodeValues(0, i, false)
|
||||
Sdiag = _notavail
|
||||
Sleft = _notavail
|
||||
default:
|
||||
// We are in the middle of the matrix
|
||||
Sdiag = _incpath(previous[x])
|
||||
if _samenuc(bA[j-1], bB[i-1]) {
|
||||
Sdiag = _incscore(Sdiag)
|
||||
@ -187,6 +192,15 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
|
||||
score = Sup
|
||||
default:
|
||||
score = Sleft
|
||||
|
||||
if endgapfree && i == lB {
|
||||
_, l, o := decodeValues(Sleft)
|
||||
|
||||
if l > pend && o == false {
|
||||
pend = l
|
||||
end = j
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// I supose the bug was here
|
||||
@ -271,6 +285,14 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
|
||||
score = Sup
|
||||
default:
|
||||
score = Sleft
|
||||
if endgapfree && i == lB {
|
||||
_, l, o := decodeValues(Sleft)
|
||||
|
||||
if l > pend && o == false {
|
||||
pend = l
|
||||
end = j
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// I supose the bug was here
|
||||
@ -331,10 +353,10 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
|
||||
// // end OF DEBUG CODE //
|
||||
|
||||
if o {
|
||||
return -1, -1
|
||||
return -1, -1, -1
|
||||
}
|
||||
|
||||
return s, l
|
||||
return s, l, end
|
||||
}
|
||||
|
||||
// FastLCSEGFScore calculates the score of the longest common subsequence between two bio sequences in end-gap-free mode.
|
||||
@ -356,7 +378,7 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
|
||||
// Returns:
|
||||
// - The score of the longest common subsequence.
|
||||
// - The length of the shortest alignment corresponding to the LCS.
|
||||
func FastLCSEGFScore(seqA, seqB *obiseq.BioSequence, maxError int, buffer *[]uint64) (int, int) {
|
||||
func FastLCSEGFScore(seqA, seqB *obiseq.BioSequence, maxError int, buffer *[]uint64) (int, int, int) {
|
||||
return FastLCSEGFScoreByte(seqA.Sequence(), seqB.Sequence(), maxError, true, buffer)
|
||||
}
|
||||
|
||||
@ -380,5 +402,7 @@ func FastLCSEGFScore(seqA, seqB *obiseq.BioSequence, maxError int, buffer *[]uin
|
||||
// - The score of the longest common subsequence.
|
||||
// - The length of the shortest alignment corresponding to the LCS.
|
||||
func FastLCSScore(seqA, seqB *obiseq.BioSequence, maxError int, buffer *[]uint64) (int, int) {
|
||||
return FastLCSEGFScoreByte(seqA.Sequence(), seqB.Sequence(), maxError, false, buffer)
|
||||
score, alilen, _ := FastLCSEGFScoreByte(seqA.Sequence(), seqB.Sequence(), maxError, false, buffer)
|
||||
|
||||
return score, alilen
|
||||
}
|
||||
|
104
pkg/obialign/locatepattern.go
Normal file
104
pkg/obialign/locatepattern.go
Normal file
@ -0,0 +1,104 @@
|
||||
package obialign
|
||||
|
||||
func buffIndex(i, j, width int) int {
|
||||
return (i+1)*width + (j + 1)
|
||||
}
|
||||
func LocatePattern(pattern, sequence []byte) (int, int, int) {
|
||||
width := len(pattern) + 1
|
||||
buffsize := (len(pattern) + 1) * (len(sequence) + 1)
|
||||
buffer := make([]int, buffsize)
|
||||
path := make([]int, buffsize)
|
||||
|
||||
for j := 0; j < len(pattern); j++ {
|
||||
idx := buffIndex(-1, j, width)
|
||||
buffer[idx] = -j - 1
|
||||
path[idx] = -1
|
||||
}
|
||||
|
||||
for i := -1; i < len(sequence); i++ {
|
||||
idx := buffIndex(i, -1, width)
|
||||
buffer[idx] = 0
|
||||
path[idx] = +1
|
||||
}
|
||||
|
||||
path[0] = 0
|
||||
jmax := len(pattern) - 1
|
||||
for i := 0; i < len(sequence); i++ {
|
||||
for j := 0; j < jmax; j++ {
|
||||
match := -1
|
||||
if _samenuc(pattern[j], sequence[i]) {
|
||||
match = 0
|
||||
}
|
||||
|
||||
idx := buffIndex(i, j, width)
|
||||
|
||||
diag := buffer[buffIndex(i-1, j-1, width)] + match
|
||||
left := buffer[buffIndex(i, j-1, width)] - 1
|
||||
up := buffer[buffIndex(i-1, j, width)] - 1
|
||||
|
||||
score := max(diag, up, left)
|
||||
|
||||
buffer[idx] = score
|
||||
|
||||
switch {
|
||||
case score == left:
|
||||
path[idx] = -1
|
||||
case score == diag:
|
||||
path[idx] = 0
|
||||
case score == up:
|
||||
path[idx] = +1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for i := 0; i < len(sequence); i++ {
|
||||
idx := buffIndex(i, jmax, width)
|
||||
|
||||
match := -1
|
||||
if _samenuc(pattern[jmax], sequence[i]) {
|
||||
match = 0
|
||||
}
|
||||
|
||||
diag := buffer[buffIndex(i-1, jmax-1, width)] + match
|
||||
left := buffer[buffIndex(i, jmax-1, width)] - 1
|
||||
up := buffer[buffIndex(i-1, jmax, width)]
|
||||
|
||||
score := max(diag, up, left)
|
||||
buffer[idx] = score
|
||||
switch {
|
||||
case score == left:
|
||||
path[idx] = -1
|
||||
case score == diag:
|
||||
path[idx] = 0
|
||||
case score == up:
|
||||
path[idx] = +1
|
||||
}
|
||||
}
|
||||
|
||||
i := len(sequence) - 1
|
||||
j := jmax
|
||||
end := -1
|
||||
lali := 0
|
||||
for i > -1 && j > 0 {
|
||||
lali++
|
||||
switch path[buffIndex(i, j, width)] {
|
||||
case 0:
|
||||
j--
|
||||
if end == -1 {
|
||||
end = i
|
||||
lali = 1
|
||||
}
|
||||
i--
|
||||
case 1:
|
||||
i--
|
||||
case -1:
|
||||
j--
|
||||
if end == -1 {
|
||||
end = i
|
||||
lali = 1
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
return i, end + 1, -buffer[buffIndex(len(sequence)-1, len(pattern)-1, width)]
|
||||
}
|
Reference in New Issue
Block a user