mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Add the possibility to look for patterns allowing indels.
Former-commit-id: 0599c2b0ad16df086dbdb08e491503870d8904be
This commit is contained in:
@ -1,9 +1,9 @@
|
|||||||
package obialign
|
package obialign
|
||||||
|
|
||||||
import (
|
// import (
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
|
// "git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
// "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
)
|
// )
|
||||||
|
|
||||||
const wsize = 16
|
const wsize = 16
|
||||||
const dwsize = wsize * 2
|
const dwsize = wsize * 2
|
||||||
@ -58,182 +58,182 @@ var _empty = encodeValues(0, 0, false)
|
|||||||
var _out = encodeValues(0, 30000, true)
|
var _out = encodeValues(0, 30000, true)
|
||||||
var _notavail = encodeValues(0, 30000, false)
|
var _notavail = encodeValues(0, 30000, false)
|
||||||
|
|
||||||
func FastLCSScore(seqA, seqB *obiseq.BioSequence, maxError int, buffer *[]uint64) (int, int) {
|
// func FastLCSScore(seqA, seqB *obiseq.BioSequence, maxError int, buffer *[]uint64) (int, int) {
|
||||||
|
|
||||||
lA := seqA.Len()
|
// lA := seqA.Len()
|
||||||
lB := seqB.Len()
|
// lB := seqB.Len()
|
||||||
|
|
||||||
// Ensure that A is the longest
|
// // Ensure that A is the longest
|
||||||
if lA < lB {
|
// if lA < lB {
|
||||||
seqA, seqB = seqB, seqA
|
// seqA, seqB = seqB, seqA
|
||||||
lA, lB = lB, lA
|
// lA, lB = lB, lA
|
||||||
}
|
// }
|
||||||
|
|
||||||
if maxError == -1 {
|
// if maxError == -1 {
|
||||||
maxError = lA * 2
|
// maxError = lA * 2
|
||||||
}
|
// }
|
||||||
|
|
||||||
delta := lA - lB
|
// delta := lA - lB
|
||||||
|
|
||||||
// The difference of length is larger the maximum allowed errors
|
// // The difference of length is larger the maximum allowed errors
|
||||||
if delta > maxError {
|
// if delta > maxError {
|
||||||
return -1, -1
|
// return -1, -1
|
||||||
}
|
// }
|
||||||
|
|
||||||
// Doit-on vraiment diviser par deux ??? pas certain
|
// // Doit-on vraiment diviser par deux ??? pas certain
|
||||||
extra := (maxError - delta) + 1
|
// extra := (maxError - delta) + 1
|
||||||
|
|
||||||
even := 1 + delta + 2*extra
|
// even := 1 + delta + 2*extra
|
||||||
width := 2*even - 1
|
// width := 2*even - 1
|
||||||
|
|
||||||
if buffer == nil {
|
// if buffer == nil {
|
||||||
var local []uint64
|
// var local []uint64
|
||||||
buffer = &local
|
// buffer = &local
|
||||||
}
|
// }
|
||||||
|
|
||||||
if cap(*buffer) < 2*width {
|
// if cap(*buffer) < 2*width {
|
||||||
*buffer = make([]uint64, 3*width)
|
// *buffer = make([]uint64, 3*width)
|
||||||
}
|
// }
|
||||||
|
|
||||||
previous := (*buffer)[0:width]
|
// previous := (*buffer)[0:width]
|
||||||
current := (*buffer)[width:(2 * width)]
|
// current := (*buffer)[width:(2 * width)]
|
||||||
|
|
||||||
previous[extra] = _empty
|
// previous[extra] = _empty
|
||||||
previous[extra+even] = encodeValues(0, 1, false)
|
// previous[extra+even] = encodeValues(0, 1, false)
|
||||||
previous[extra+even-1] = encodeValues(0, 1, false)
|
// previous[extra+even-1] = encodeValues(0, 1, false)
|
||||||
|
|
||||||
N := lB + ((delta) >> 1)
|
// N := lB + ((delta) >> 1)
|
||||||
|
|
||||||
bA := seqA.Sequence()
|
// bA := seqA.Sequence()
|
||||||
bB := seqB.Sequence()
|
// bB := seqB.Sequence()
|
||||||
|
|
||||||
// log.Println("N = ", N)
|
// // log.Println("N = ", N)
|
||||||
|
|
||||||
for y := 1; y <= N; y++ {
|
// for y := 1; y <= N; y++ {
|
||||||
// in_matrix := false
|
// // in_matrix := false
|
||||||
x1 := y - lB + extra
|
// x1 := y - lB + extra
|
||||||
x2 := extra - y
|
// x2 := extra - y
|
||||||
xs := goutils.MaxInt(goutils.MaxInt(x1, x2), 0)
|
// xs := goutils.MaxInt(goutils.MaxInt(x1, x2), 0)
|
||||||
|
|
||||||
x1 = y + extra
|
// x1 = y + extra
|
||||||
x2 = lA + extra - y
|
// x2 = lA + extra - y
|
||||||
xf := goutils.MinInt(goutils.MinInt(x1, x2), even-1) + 1
|
// xf := goutils.MinInt(goutils.MinInt(x1, x2), even-1) + 1
|
||||||
|
|
||||||
for x := xs; x < xf; x++ {
|
// for x := xs; x < xf; x++ {
|
||||||
|
|
||||||
i := y - x + extra
|
// i := y - x + extra
|
||||||
j := y + x - extra
|
// j := y + x - extra
|
||||||
|
|
||||||
var Sdiag, Sleft, Sup uint64
|
// var Sdiag, Sleft, Sup uint64
|
||||||
|
|
||||||
switch {
|
// switch {
|
||||||
|
|
||||||
case i == 0:
|
// case i == 0:
|
||||||
Sup = _notavail
|
// Sup = _notavail
|
||||||
Sdiag = _notavail
|
// Sdiag = _notavail
|
||||||
Sleft = encodeValues(0, j-1, false)
|
// Sleft = encodeValues(0, j-1, false)
|
||||||
case j == 0:
|
// case j == 0:
|
||||||
Sup = encodeValues(0, i-1, false)
|
// Sup = encodeValues(0, i-1, false)
|
||||||
Sdiag = _notavail
|
// Sdiag = _notavail
|
||||||
Sleft = _notavail
|
// Sleft = _notavail
|
||||||
default:
|
// default:
|
||||||
Sdiag = previous[x]
|
// Sdiag = previous[x]
|
||||||
|
|
||||||
if bA[j-1] == bB[i-1] {
|
// if bA[j-1] == bB[i-1] {
|
||||||
Sdiag = _incscore(Sdiag)
|
// Sdiag = _incscore(Sdiag)
|
||||||
}
|
// }
|
||||||
|
|
||||||
if x < (even - 1) {
|
// if x < (even - 1) {
|
||||||
Sup = previous[x+even]
|
// Sup = previous[x+even]
|
||||||
} else {
|
// } else {
|
||||||
Sup = _out
|
// Sup = _out
|
||||||
}
|
// }
|
||||||
if x > 0 {
|
// if x > 0 {
|
||||||
Sleft = previous[x+even-1]
|
// Sleft = previous[x+even-1]
|
||||||
} else {
|
// } else {
|
||||||
Sleft = _out
|
// Sleft = _out
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
|
|
||||||
var score uint64
|
// var score uint64
|
||||||
switch {
|
// switch {
|
||||||
case Sdiag >= Sup && Sdiag >= Sleft:
|
// case Sdiag >= Sup && Sdiag >= Sleft:
|
||||||
score = Sdiag
|
// score = Sdiag
|
||||||
case Sup >= Sleft:
|
// case Sup >= Sleft:
|
||||||
score = Sup
|
// score = Sup
|
||||||
default:
|
// default:
|
||||||
score = Sleft
|
// score = Sleft
|
||||||
}
|
// }
|
||||||
|
|
||||||
if _isout(Sdiag) || _isout(Sup) || _isout(Sleft) {
|
// if _isout(Sdiag) || _isout(Sup) || _isout(Sleft) {
|
||||||
score = _setout(score)
|
// score = _setout(score)
|
||||||
}
|
// }
|
||||||
|
|
||||||
current[x] = _incpath(score)
|
// current[x] = _incpath(score)
|
||||||
}
|
// }
|
||||||
// . 9 10 + 2 - 1
|
// // . 9 10 + 2 - 1
|
||||||
x1 = y - lB + extra + even
|
// x1 = y - lB + extra + even
|
||||||
x2 = extra - y + even - 1
|
// x2 = extra - y + even - 1
|
||||||
xs = goutils.MaxInt(goutils.MaxInt(x1, x2), even)
|
// xs = goutils.MaxInt(goutils.MaxInt(x1, x2), even)
|
||||||
|
|
||||||
x1 = y + extra + even
|
// x1 = y + extra + even
|
||||||
x2 = lA + extra - y + even - 1
|
// x2 = lA + extra - y + even - 1
|
||||||
xf = goutils.MinInt(goutils.MinInt(x1, x2), width-1) + 1
|
// xf = goutils.MinInt(goutils.MinInt(x1, x2), width-1) + 1
|
||||||
|
|
||||||
for x := xs; x < xf; x++ {
|
// for x := xs; x < xf; x++ {
|
||||||
|
|
||||||
i := y - x + extra + even
|
// i := y - x + extra + even
|
||||||
j := y + x - extra - even + 1
|
// j := y + x - extra - even + 1
|
||||||
|
|
||||||
var Sdiag, Sleft, Sup uint64
|
// var Sdiag, Sleft, Sup uint64
|
||||||
|
|
||||||
switch {
|
// switch {
|
||||||
|
|
||||||
case i == 0:
|
// case i == 0:
|
||||||
Sup = _notavail
|
// Sup = _notavail
|
||||||
Sdiag = _notavail
|
// Sdiag = _notavail
|
||||||
Sleft = encodeValues(0, j-1, false)
|
// Sleft = encodeValues(0, j-1, false)
|
||||||
case j == 0:
|
// case j == 0:
|
||||||
Sup = encodeValues(0, i-1, false)
|
// Sup = encodeValues(0, i-1, false)
|
||||||
Sdiag = _notavail
|
// Sdiag = _notavail
|
||||||
Sleft = _notavail
|
// Sleft = _notavail
|
||||||
default:
|
// default:
|
||||||
Sdiag = previous[x]
|
// Sdiag = previous[x]
|
||||||
if bA[j-1] == bB[i-1] {
|
// if bA[j-1] == bB[i-1] {
|
||||||
Sdiag = _incscore(Sdiag)
|
// Sdiag = _incscore(Sdiag)
|
||||||
}
|
// }
|
||||||
|
|
||||||
Sleft = current[x-even]
|
// Sleft = current[x-even]
|
||||||
Sup = current[x-even+1]
|
// Sup = current[x-even+1]
|
||||||
|
|
||||||
}
|
// }
|
||||||
|
|
||||||
var score uint64
|
// var score uint64
|
||||||
switch {
|
// switch {
|
||||||
case Sdiag >= Sup && Sdiag >= Sleft:
|
// case Sdiag >= Sup && Sdiag >= Sleft:
|
||||||
score = Sdiag
|
// score = Sdiag
|
||||||
case Sup >= Sleft:
|
// case Sup >= Sleft:
|
||||||
score = Sup
|
// score = Sup
|
||||||
default:
|
// default:
|
||||||
score = Sleft
|
// score = Sleft
|
||||||
}
|
// }
|
||||||
|
|
||||||
if _isout(Sdiag) || _isout(Sup) || _isout(Sleft) {
|
// if _isout(Sdiag) || _isout(Sup) || _isout(Sleft) {
|
||||||
score = _setout(score)
|
// score = _setout(score)
|
||||||
}
|
// }
|
||||||
|
|
||||||
current[x] = _incpath(score)
|
// current[x] = _incpath(score)
|
||||||
}
|
// }
|
||||||
|
|
||||||
previous, current = current, previous
|
// previous, current = current, previous
|
||||||
|
|
||||||
}
|
// }
|
||||||
|
|
||||||
s, l, o := decodeValues(previous[(delta%2)*even+extra+(delta>>1)])
|
// s, l, o := decodeValues(previous[(delta%2)*even+extra+(delta>>1)])
|
||||||
|
|
||||||
if o {
|
// if o {
|
||||||
return -1, -1
|
// return -1, -1
|
||||||
}
|
// }
|
||||||
|
|
||||||
return s, l
|
// return s, l
|
||||||
}
|
// }
|
||||||
|
319
pkg/obialign/fastlcsegf.go
Normal file
319
pkg/obialign/fastlcsegf.go
Normal file
@ -0,0 +1,319 @@
|
|||||||
|
package obialign
|
||||||
|
|
||||||
|
import (
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
)
|
||||||
|
|
||||||
|
var _iupac = [26]byte{
|
||||||
|
// a b c d e f
|
||||||
|
1, 14, 2, 13, 0, 0,
|
||||||
|
// g h i j k l
|
||||||
|
4, 11, 0, 0, 12, 0,
|
||||||
|
// m n o p q r
|
||||||
|
3, 15, 0, 0, 0, 5,
|
||||||
|
// s t u v w x
|
||||||
|
6, 8, 8, 13, 9, 0,
|
||||||
|
// y z
|
||||||
|
10, 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
func _samenuc(a, b byte) bool {
|
||||||
|
if (a >= 'A') && (a <= 'Z') {
|
||||||
|
a |= 32
|
||||||
|
}
|
||||||
|
if (b >= 'A') && (b <= 'Z') {
|
||||||
|
b |= 32
|
||||||
|
}
|
||||||
|
|
||||||
|
if (a >= 'a') && (a <= 'z') && (b >= 'a') && (b <= 'z') {
|
||||||
|
return (_iupac[a-'a'] & _iupac[b-'a']) > 0
|
||||||
|
}
|
||||||
|
return a == b
|
||||||
|
}
|
||||||
|
func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[]uint64) (int, int) {
|
||||||
|
|
||||||
|
lA := len(bA)
|
||||||
|
lB := len(bB)
|
||||||
|
|
||||||
|
// Ensure that A is the longest
|
||||||
|
if lA < lB {
|
||||||
|
bA, bB = bB, bA
|
||||||
|
lA, lB = lB, lA
|
||||||
|
}
|
||||||
|
|
||||||
|
if maxError == -1 {
|
||||||
|
maxError = lA * 2
|
||||||
|
}
|
||||||
|
|
||||||
|
delta := lA - lB
|
||||||
|
|
||||||
|
// The difference of length is larger the maximum allowed errors
|
||||||
|
if delta > maxError {
|
||||||
|
return -1, -1
|
||||||
|
}
|
||||||
|
|
||||||
|
// // BEGINNING OF DEBUG CODE //
|
||||||
|
// debug_scores := make([][]int, lB+1)
|
||||||
|
// for i := range debug_scores {
|
||||||
|
// debug_scores[i] = make([]int, lA+1)
|
||||||
|
// }
|
||||||
|
|
||||||
|
// debug_path := make([][]int, lB+1)
|
||||||
|
// for i := range debug_path {
|
||||||
|
// debug_path[i] = make([]int, lA+1)
|
||||||
|
// }
|
||||||
|
|
||||||
|
// debug_out := make([][]bool, lB+1)
|
||||||
|
// for i := range debug_out {
|
||||||
|
// debug_out[i] = make([]bool, lA+1)
|
||||||
|
// }
|
||||||
|
// // END OF DEBUG CODE //
|
||||||
|
|
||||||
|
// Doit-on vraiment diviser par deux ??? pas certain
|
||||||
|
extra := (maxError - delta) + 1
|
||||||
|
|
||||||
|
even := 1 + delta + 2*extra
|
||||||
|
width := 2*even - 1
|
||||||
|
|
||||||
|
if buffer == nil {
|
||||||
|
var local []uint64
|
||||||
|
buffer = &local
|
||||||
|
}
|
||||||
|
|
||||||
|
if cap(*buffer) < 2*width {
|
||||||
|
*buffer = make([]uint64, 3*width)
|
||||||
|
}
|
||||||
|
|
||||||
|
previous := (*buffer)[0:width]
|
||||||
|
current := (*buffer)[width:(2 * width)]
|
||||||
|
|
||||||
|
previous[extra] = _empty // Initialise cell 0,0
|
||||||
|
if endgapfree { // Initialise cell 0,1
|
||||||
|
previous[extra+even] = encodeValues(0, 0, false)
|
||||||
|
} else {
|
||||||
|
previous[extra+even] = encodeValues(0, 1, false)
|
||||||
|
}
|
||||||
|
previous[extra+even-1] = encodeValues(0, 1, false) // Initialise cell 1,0
|
||||||
|
|
||||||
|
N := lB + ((delta) >> 1)
|
||||||
|
|
||||||
|
log.Debugln("N = ", N, " delta = ", delta, " extra = ", extra, " maxError = ", maxError)
|
||||||
|
|
||||||
|
for y := 1; y <= N; y++ {
|
||||||
|
// in_matrix := false
|
||||||
|
x1 := y - lB + extra
|
||||||
|
x2 := extra - y
|
||||||
|
xs := goutils.MaxInt(goutils.MaxInt(x1, x2), 0)
|
||||||
|
|
||||||
|
x1 = y + extra
|
||||||
|
x2 = lA + extra - y
|
||||||
|
xf := goutils.MinInt(goutils.MinInt(x1, x2), even-1) + 1
|
||||||
|
|
||||||
|
for x := xs; x < xf; x++ {
|
||||||
|
|
||||||
|
// i span along B and j along A
|
||||||
|
i := y - x + extra
|
||||||
|
j := y + x - extra
|
||||||
|
|
||||||
|
// log.Debugln("Coord : ", i, j)
|
||||||
|
var Sdiag, Sleft, Sup uint64
|
||||||
|
|
||||||
|
switch {
|
||||||
|
|
||||||
|
case i == 0:
|
||||||
|
Sup = _notavail
|
||||||
|
Sdiag = _notavail
|
||||||
|
if endgapfree {
|
||||||
|
Sleft = encodeValues(0, 0, false)
|
||||||
|
} else {
|
||||||
|
Sleft = encodeValues(0, j, false)
|
||||||
|
}
|
||||||
|
case j == 0:
|
||||||
|
Sup = encodeValues(0, i, false)
|
||||||
|
Sdiag = _notavail
|
||||||
|
Sleft = _notavail
|
||||||
|
default:
|
||||||
|
Sdiag = _incpath(previous[x])
|
||||||
|
if _samenuc(bA[j-1], bB[i-1]) {
|
||||||
|
Sdiag = _incscore(Sdiag)
|
||||||
|
}
|
||||||
|
|
||||||
|
if x < (even - 1) {
|
||||||
|
Sup = _incpath(previous[x+even])
|
||||||
|
} else {
|
||||||
|
Sup = _out
|
||||||
|
}
|
||||||
|
if x > 0 {
|
||||||
|
Sleft = previous[x+even-1]
|
||||||
|
if (i > 0 && i < lB) || !endgapfree {
|
||||||
|
Sleft = _incpath(Sleft)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Sleft = _out
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var score uint64
|
||||||
|
switch {
|
||||||
|
case Sdiag >= Sup && Sdiag >= Sleft:
|
||||||
|
score = Sdiag
|
||||||
|
case Sup >= Sleft:
|
||||||
|
score = Sup
|
||||||
|
default:
|
||||||
|
score = Sleft
|
||||||
|
}
|
||||||
|
|
||||||
|
// I supose the bug was here
|
||||||
|
// if _isout(Sdiag) || _isout(Sup) || _isout(Sleft) {
|
||||||
|
// score = _setout(score)
|
||||||
|
// }
|
||||||
|
|
||||||
|
// // BEGINNING OF DEBUG CODE //
|
||||||
|
// if i < 2 && j < 5 {
|
||||||
|
// log.Debugf("[%d,%d]\n",i,j)
|
||||||
|
// s, p, o := decodeValues(Sdiag)
|
||||||
|
// log.Debugf("+Sdiag (%v) : %d, %d, %v\n",Sdiag,s,p,o)
|
||||||
|
// s, p, o = decodeValues(Sup)
|
||||||
|
// log.Debugf("+Sup (%v) : %d, %d, %v\n",Sup,s,p,o)
|
||||||
|
// s, p, o = decodeValues(Sleft)
|
||||||
|
// log.Debugf("+Sleft (%v) : %d, %d, %v\n",Sleft,s,p,o)
|
||||||
|
// s, p, o = decodeValues(score)
|
||||||
|
// log.Debugf("+score (%v) : %d, %d, %v\n",score,s,p,o)
|
||||||
|
// log.Debugln("-------------------")
|
||||||
|
// }
|
||||||
|
// s, p, o := decodeValues(score)
|
||||||
|
// debug_scores[i][j] = s
|
||||||
|
// debug_path[i][j] = p
|
||||||
|
// debug_out[i][j] = o
|
||||||
|
// // END OF DEBUG CODE //
|
||||||
|
|
||||||
|
current[x] = score
|
||||||
|
}
|
||||||
|
// . 9 10 + 2 - 1
|
||||||
|
x1 = y - lB + extra + even
|
||||||
|
x2 = extra - y + even - 1
|
||||||
|
xs = goutils.MaxInt(goutils.MaxInt(x1, x2), even)
|
||||||
|
|
||||||
|
x1 = y + extra + even
|
||||||
|
x2 = lA + extra - y + even - 1
|
||||||
|
xf = goutils.MinInt(goutils.MinInt(x1, x2), width-1) + 1
|
||||||
|
|
||||||
|
for x := xs; x < xf; x++ {
|
||||||
|
|
||||||
|
i := y - x + extra + even
|
||||||
|
j := y + x - extra - even + 1
|
||||||
|
|
||||||
|
//log.Debugln("Coord : ", i, j)
|
||||||
|
var Sdiag, Sleft, Sup uint64
|
||||||
|
|
||||||
|
switch {
|
||||||
|
|
||||||
|
case i == 0:
|
||||||
|
Sup = _notavail
|
||||||
|
Sdiag = _notavail
|
||||||
|
if endgapfree {
|
||||||
|
Sleft = encodeValues(0, 0, false)
|
||||||
|
} else {
|
||||||
|
Sleft = encodeValues(0, j, false)
|
||||||
|
}
|
||||||
|
case j == 0:
|
||||||
|
Sup = encodeValues(0, i, false)
|
||||||
|
Sdiag = _notavail
|
||||||
|
Sleft = _notavail
|
||||||
|
default:
|
||||||
|
Sdiag = _incpath(previous[x])
|
||||||
|
if _samenuc(bA[j-1], bB[i-1]) {
|
||||||
|
Sdiag = _incscore(Sdiag)
|
||||||
|
}
|
||||||
|
|
||||||
|
Sleft = current[x-even]
|
||||||
|
if (i > 0 && i < lB) || !endgapfree {
|
||||||
|
Sleft = _incpath(Sleft)
|
||||||
|
}
|
||||||
|
Sup = _incpath(current[x-even+1])
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
var score uint64
|
||||||
|
switch {
|
||||||
|
case Sdiag >= Sup && Sdiag >= Sleft:
|
||||||
|
score = Sdiag
|
||||||
|
case Sup >= Sleft:
|
||||||
|
score = Sup
|
||||||
|
default:
|
||||||
|
score = Sleft
|
||||||
|
}
|
||||||
|
|
||||||
|
// I supose the bug was here
|
||||||
|
// if _isout(Sdiag) || _isout(Sup) || _isout(Sleft) {
|
||||||
|
// score = _setout(score)
|
||||||
|
// }
|
||||||
|
|
||||||
|
// // BEGINNING OF DEBUG CODE //
|
||||||
|
// if i < 2 && j < 5 {
|
||||||
|
// log.Debugf("[%d,%d]\n",i,j)
|
||||||
|
// s, p, o := decodeValues(Sdiag)
|
||||||
|
// log.Debugf("-Sdiag (%v) : %d, %d, %v\n",Sdiag,s,p,o)
|
||||||
|
// s, p, o = decodeValues(Sup)
|
||||||
|
// log.Debugf("-Sup (%v) : %d, %d, %v\n",Sup,s,p,o)
|
||||||
|
// s, p, o = decodeValues(Sleft)
|
||||||
|
// log.Debugf("-Sleft (%v) : %d, %d, %v\n",Sleft,s,p,o)
|
||||||
|
// s, p, o = decodeValues(score)
|
||||||
|
// log.Debugf("-score (%v) : %d, %d, %v\n",score,s,p,o)
|
||||||
|
// log.Debugln("-------------------")
|
||||||
|
// }
|
||||||
|
// s, p, o := decodeValues(score)
|
||||||
|
// debug_scores[i][j] = s
|
||||||
|
// debug_path[i][j] = p
|
||||||
|
// debug_out[i][j] = o
|
||||||
|
// // END OF DEBUG CODE //
|
||||||
|
current[x] = score
|
||||||
|
}
|
||||||
|
|
||||||
|
previous, current = current, previous
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
s, l, o := decodeValues(previous[(delta%2)*even+extra+(delta>>1)])
|
||||||
|
|
||||||
|
// // BEGINNING OF DEBUG CODE //
|
||||||
|
// fmt.Printf("%2c\t", ' ')
|
||||||
|
// for j := 0; j <= lA; j++ {
|
||||||
|
// if j > 0 {
|
||||||
|
// fmt.Printf("%11c\t", bA[j-1])
|
||||||
|
// } else {
|
||||||
|
// fmt.Printf("%11c\t", '-')
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// fmt.Printf("\n")
|
||||||
|
// for i := 0; i <= lB; i++ {
|
||||||
|
// if i > 0 {
|
||||||
|
// fmt.Printf("%2c\t", bB[i-1])
|
||||||
|
// } else {
|
||||||
|
// fmt.Printf("%2c\t", '-')
|
||||||
|
// }
|
||||||
|
|
||||||
|
// for j := 0; j <= lA; j++ {
|
||||||
|
// fmt.Printf("%2d:%2d:%v\t", debug_scores[i][j],
|
||||||
|
// debug_path[i][j], debug_out[i][j])
|
||||||
|
// }
|
||||||
|
// fmt.Printf("\n")
|
||||||
|
// }
|
||||||
|
// // end OF DEBUG CODE //
|
||||||
|
|
||||||
|
if o {
|
||||||
|
return -1, -1
|
||||||
|
}
|
||||||
|
|
||||||
|
return s, l
|
||||||
|
}
|
||||||
|
|
||||||
|
func FastLCSEGFScore(seqA, seqB *obiseq.BioSequence, maxError int, buffer *[]uint64) (int, int) {
|
||||||
|
return FastLCSEGFScoreByte(seqA.Sequence(), seqB.Sequence(), maxError, true, buffer)
|
||||||
|
}
|
||||||
|
|
||||||
|
func FastLCSScore(seqA, seqB *obiseq.BioSequence, maxError int, buffer *[]uint64) (int, int) {
|
||||||
|
return FastLCSEGFScoreByte(seqA.Sequence(), seqB.Sequence(), maxError, false, buffer)
|
||||||
|
}
|
@ -151,10 +151,7 @@ int32_t ManberNoErr (Seq *pseq , Pattern *ppat, int32_t patnum,int32_t beg
|
|||||||
int32_t ManberSub (Seq *pseq , Pattern *ppat, int32_t patnum,int32_t begin,int32_t length);
|
int32_t ManberSub (Seq *pseq , Pattern *ppat, int32_t patnum,int32_t begin,int32_t length);
|
||||||
int32_t ManberIndel (Seq *pseq , Pattern *ppat, int32_t patnum,int32_t begin,int32_t length);
|
int32_t ManberIndel (Seq *pseq , Pattern *ppat, int32_t patnum,int32_t begin,int32_t length);
|
||||||
int32_t ManberAll (Seq *pseq , Pattern *ppat, int32_t patnum,int32_t begin,int32_t length);
|
int32_t ManberAll (Seq *pseq , Pattern *ppat, int32_t patnum,int32_t begin,int32_t length);
|
||||||
int32_t NwsPatAlign (Seq *pseq , Pattern *ppat, int32_t nerr ,
|
int32_t NwsPatAlign (Seq *pseq , Pattern *ppat, int32_t nerr, int32_t begin, int32_t *reslen, int32_t *reserr); /* apat_sys.c */
|
||||||
int32_t *reslen , int32_t *reserr);
|
|
||||||
|
|
||||||
/* apat_sys.c */
|
|
||||||
|
|
||||||
float UserCpuTime (int32_t reset);
|
float UserCpuTime (int32_t reset);
|
||||||
float SysCpuTime (int32_t reset);
|
float SysCpuTime (int32_t reset);
|
||||||
|
@ -23,7 +23,8 @@
|
|||||||
#define TOPCURS CursiToTop
|
#define TOPCURS CursiToTop
|
||||||
#define DOWNREAD ReadiDown
|
#define DOWNREAD ReadiDown
|
||||||
|
|
||||||
#define KRONECK(x, msk) ((~x & msk) ? 0 : 1)
|
//#define KRONECK(x, msk) ((~x & msk) ? 0 : 1)
|
||||||
|
#define KRONECK(x, msk) ((x & msk) ? 0 : 1)
|
||||||
#define MIN(x, y) ((x) < (y) ? (x) : (y))
|
#define MIN(x, y) ((x) < (y) ? (x) : (y))
|
||||||
|
|
||||||
/* -------------------------------------------- */
|
/* -------------------------------------------- */
|
||||||
@ -192,8 +193,8 @@ int32_t ManberIndel(Seq *pseq, Pattern *ppat, int patnum,int begin,int length)
|
|||||||
{
|
{
|
||||||
int e, emax, found;
|
int e, emax, found;
|
||||||
uint32_t pos;
|
uint32_t pos;
|
||||||
uint32_t smask, cmask, sindx;
|
patword_t smask, cmask, sindx;
|
||||||
uint32_t *pr, r[2 * MAX_PAT_ERR + 2];
|
patword_t *pr, r[2 * MAX_PAT_ERR + 2];
|
||||||
uint8_t *data;
|
uint8_t *data;
|
||||||
StackiPtr *stkpos, *stkerr;
|
StackiPtr *stkpos, *stkerr;
|
||||||
uint32_t end;
|
uint32_t end;
|
||||||
@ -220,6 +221,9 @@ int32_t ManberIndel(Seq *pseq, Pattern *ppat, int patnum,int begin,int length)
|
|||||||
stkpos = pseq->hitpos + patnum;
|
stkpos = pseq->hitpos + patnum;
|
||||||
stkerr = pseq->hiterr + patnum;
|
stkerr = pseq->hiterr + patnum;
|
||||||
|
|
||||||
|
EmptyStacki(stkpos[0]);
|
||||||
|
EmptyStacki(stkerr[0]);
|
||||||
|
|
||||||
/* loop on text data */
|
/* loop on text data */
|
||||||
|
|
||||||
for (pos = begin ; pos < end ; pos++) {
|
for (pos = begin ; pos < end ; pos++) {
|
||||||
@ -256,12 +260,14 @@ int32_t ManberIndel(Seq *pseq, Pattern *ppat, int patnum,int begin,int length)
|
|||||||
/* -------------------------------------------- */
|
/* -------------------------------------------- */
|
||||||
int32_t ManberAll(Seq *pseq, Pattern *ppat, int patnum,int begin,int length)
|
int32_t ManberAll(Seq *pseq, Pattern *ppat, int patnum,int begin,int length)
|
||||||
{
|
{
|
||||||
if (ppat->maxerr == 0)
|
if (ppat->maxerr == 0){
|
||||||
return ManberNoErr(pseq, ppat, patnum, begin, length);
|
return ManberNoErr(pseq, ppat, patnum, begin, length);}
|
||||||
else if (ppat->hasIndel)
|
else if (ppat->hasIndel) {
|
||||||
return ManberIndel(pseq, ppat, patnum, begin, length);
|
return ManberIndel(pseq, ppat, patnum, begin, length);
|
||||||
else
|
}
|
||||||
|
else {
|
||||||
return ManberSub(pseq, ppat, patnum, begin, length);
|
return ManberSub(pseq, ppat, patnum, begin, length);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -271,11 +277,9 @@ int32_t ManberAll(Seq *pseq, Pattern *ppat, int patnum,int begin,int length)
|
|||||||
/* (avec substitution obligatoire aux bords) */
|
/* (avec substitution obligatoire aux bords) */
|
||||||
/* -------------------------------------------- */
|
/* -------------------------------------------- */
|
||||||
|
|
||||||
int32_t NwsPatAlign(pseq, ppat, nerr, reslen, reserr)
|
int32_t NwsPatAlign(Seq *pseq, Pattern *ppat,
|
||||||
Seq *pseq;
|
int32_t nerr, int32_t begin,
|
||||||
Pattern *ppat;
|
int32_t *reslen, int32_t *reserr) {
|
||||||
int32_t nerr, *reslen, *reserr;
|
|
||||||
{
|
|
||||||
uint8_t *sseq, *px;
|
uint8_t *sseq, *px;
|
||||||
int32_t i, j, lseq, lpat, npos, dindel, dsub,
|
int32_t i, j, lseq, lpat, npos, dindel, dsub,
|
||||||
*pc, *pi, *pd, *ps;
|
*pc, *pi, *pd, *ps;
|
||||||
@ -283,7 +287,9 @@ int32_t NwsPatAlign(pseq, ppat, nerr, reslen, reserr)
|
|||||||
|
|
||||||
static int32_t sTab[(MAX_PAT_LEN+MAX_PAT_ERR+1) * (MAX_PAT_LEN+1)];
|
static int32_t sTab[(MAX_PAT_LEN+MAX_PAT_ERR+1) * (MAX_PAT_LEN+1)];
|
||||||
|
|
||||||
lseq = pseq->seqlen;
|
lpat = ppat->patlen;
|
||||||
|
lseq = MIN(lpat + MAX_PAT_ERR+1, pseq->seqlen - begin);
|
||||||
|
sseq = pseq->data + begin - 1;
|
||||||
|
|
||||||
pc = sTab; /* |----|----| --> i */
|
pc = sTab; /* |----|----| --> i */
|
||||||
pi = pc - 1; /* | ps | pd | | */
|
pi = pc - 1; /* | ps | pd | | */
|
||||||
@ -291,36 +297,39 @@ int32_t NwsPatAlign(pseq, ppat, nerr, reslen, reserr)
|
|||||||
ps = pd - 1; /* | pi | pc | v j */
|
ps = pd - 1; /* | pi | pc | v j */
|
||||||
/* |---------| */
|
/* |---------| */
|
||||||
|
|
||||||
lseq = pseq->seqlen;
|
|
||||||
lpat = ppat->patlen;
|
|
||||||
|
|
||||||
sseq = pseq->data - 1;
|
|
||||||
|
|
||||||
amask = ONEMASK >> lpat;
|
//amask = ONEMASK >> lpat;
|
||||||
|
amask = 0x1L << (ppat->patlen);
|
||||||
|
|
||||||
for (j = 0 ; j <= lpat ; j++) {
|
for (j = 0 ; j <= lpat ; j++) {
|
||||||
|
|
||||||
for (i = 0 , px = sseq ; i <= lseq ; i++, px++) {
|
for (i = 0 , px = sseq ; i <= lseq ; i++, px++) {
|
||||||
|
|
||||||
if (i && j) {
|
if (i && j) {
|
||||||
dindel = MIN(*pi, *pd) + 1;
|
dindel = MIN(*pi, *pd) + 1;
|
||||||
|
if (j == lpat) dindel--;
|
||||||
dsub = *ps + KRONECK(ppat->smat[*px], amask);
|
dsub = *ps + KRONECK(ppat->smat[*px], amask);
|
||||||
|
// fprintf(stderr, "mismatch : %d %d %d %d\n",j,*px,KRONECK(ppat->smat[*px], amask),amask);
|
||||||
*pc = MIN(dindel, dsub);
|
*pc = MIN(dindel, dsub);
|
||||||
}
|
}
|
||||||
else if (i) /* j == 0 */
|
else if (i) /* j == 0 */
|
||||||
*pc = *pi + 1;
|
*pc = *pi + 1;
|
||||||
else if (j) /* i == 0 */
|
else if (j) /* i == 0 */
|
||||||
*pc = *pd + 1;
|
*pc = *pd;
|
||||||
else /* root */
|
else /* root */
|
||||||
*pc = 0;
|
*pc = 0;
|
||||||
|
|
||||||
|
fprintf(stderr," %02d",*pc);
|
||||||
pc++;
|
pc++;
|
||||||
pi++;
|
pi++;
|
||||||
pd++;
|
pd++;
|
||||||
ps++;
|
ps++;
|
||||||
}
|
}
|
||||||
|
fprintf(stderr,"\n");
|
||||||
|
|
||||||
amask <<= 1;
|
// amask <<= 1;
|
||||||
|
amask >>= 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
pc--;
|
pc--;
|
||||||
@ -331,6 +340,7 @@ int32_t NwsPatAlign(pseq, ppat, nerr, reslen, reserr)
|
|||||||
*reserr++ = *pc;
|
*reserr++ = *pc;
|
||||||
npos++;
|
npos++;
|
||||||
}
|
}
|
||||||
|
fprintf(stderr,"i=%d *pc = %d<%d, reserr = %d npos = %d\n",i,*pc,nerr,*(reserr-1),npos);
|
||||||
}
|
}
|
||||||
|
|
||||||
return npos;
|
return npos;
|
||||||
|
@ -337,7 +337,7 @@ int32_t delete_apatseq(SeqPtr pseq,
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
PatternPtr buildPattern(const char *pat, int32_t error_max,
|
PatternPtr buildPattern(const char *pat, int32_t error_max, uint8_t hasIndel,
|
||||||
int *errno, char **errmsg)
|
int *errno, char **errmsg)
|
||||||
{
|
{
|
||||||
PatternPtr pattern;
|
PatternPtr pattern;
|
||||||
@ -355,7 +355,7 @@ PatternPtr buildPattern(const char *pat, int32_t error_max,
|
|||||||
errno,errmsg);
|
errno,errmsg);
|
||||||
|
|
||||||
pattern->ok = true;
|
pattern->ok = true;
|
||||||
pattern->hasIndel= false;
|
pattern->hasIndel= hasIndel;
|
||||||
pattern->maxerr = error_max;
|
pattern->maxerr = error_max;
|
||||||
|
|
||||||
pattern->cpat = (char*)pattern + sizeof(Pattern);
|
pattern->cpat = (char*)pattern + sizeof(Pattern);
|
||||||
|
@ -118,7 +118,7 @@ ecoseq_t *new_ecoseq_with_data( char *AC,
|
|||||||
|
|
||||||
int32_t delete_apatseq(SeqPtr pseq,
|
int32_t delete_apatseq(SeqPtr pseq,
|
||||||
int *errno, char **errmsg);
|
int *errno, char **errmsg);
|
||||||
PatternPtr buildPattern(const char *pat, int32_t error_max, int *errno, char **errmsg);
|
PatternPtr buildPattern(const char *pat, int32_t error_max, uint8_t hasIndel, int *errno, char **errmsg);
|
||||||
PatternPtr complementPattern(PatternPtr pat, int *errno, char **errmsg);
|
PatternPtr complementPattern(PatternPtr pat, int *errno, char **errmsg);
|
||||||
|
|
||||||
SeqPtr new_apatseq(const char *in,int32_t circular, int32_t seqlen,
|
SeqPtr new_apatseq(const char *in,int32_t circular, int32_t seqlen,
|
||||||
|
@ -13,6 +13,8 @@ import (
|
|||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obialign"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -65,14 +67,20 @@ var NilApatSequence = ApatSequence{nil}
|
|||||||
// the errormax parameter. Some positions can be marked as not
|
// the errormax parameter. Some positions can be marked as not
|
||||||
// allowed for mismatches. They have to be signaled using a '#'
|
// allowed for mismatches. They have to be signaled using a '#'
|
||||||
// sign after the corresponding nucleotide.
|
// sign after the corresponding nucleotide.
|
||||||
func MakeApatPattern(pattern string, errormax int) (ApatPattern, error) {
|
func MakeApatPattern(pattern string, errormax int, allowsIndel bool) (ApatPattern, error) {
|
||||||
cpattern := C.CString(pattern)
|
cpattern := C.CString(pattern)
|
||||||
defer C.free(unsafe.Pointer(cpattern))
|
defer C.free(unsafe.Pointer(cpattern))
|
||||||
cerrormax := C.int32_t(errormax)
|
cerrormax := C.int32_t(errormax)
|
||||||
|
|
||||||
|
callosindel := C.uint8_t(0)
|
||||||
|
if allowsIndel {
|
||||||
|
callosindel = C.uint8_t(1)
|
||||||
|
}
|
||||||
|
|
||||||
var errno C.int32_t
|
var errno C.int32_t
|
||||||
var errmsg *C.char
|
var errmsg *C.char
|
||||||
|
|
||||||
apc := C.buildPattern(cpattern, cerrormax, &errno, &errmsg)
|
apc := C.buildPattern(cpattern, cerrormax, callosindel, &errno, &errmsg)
|
||||||
|
|
||||||
if apc == nil {
|
if apc == nil {
|
||||||
message := C.GoString(errmsg)
|
message := C.GoString(errmsg)
|
||||||
@ -281,16 +289,13 @@ func (sequence ApatSequence) Free() {
|
|||||||
// values of the [3]int indicate respectively the start and the end position of
|
// values of the [3]int indicate respectively the start and the end position of
|
||||||
// the match. Following the GO convention the end position is not included in the
|
// the match. Following the GO convention the end position is not included in the
|
||||||
// match. The third value indicates the number of error detected for this occurrence.
|
// match. The third value indicates the number of error detected for this occurrence.
|
||||||
func (pattern ApatPattern) FindAllIndex(sequence ApatSequence, limits ...int) (loc [][3]int) {
|
func (pattern ApatPattern) FindAllIndex(sequence ApatSequence, begin, length int) (loc [][3]int) {
|
||||||
begin := 0
|
if begin < 0 {
|
||||||
length := sequence.Len()
|
begin = 0
|
||||||
|
|
||||||
if len(limits) > 0 {
|
|
||||||
begin = limits[0]
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(limits) > 1 {
|
if length < 0 {
|
||||||
length = limits[1]
|
length = sequence.Len()
|
||||||
}
|
}
|
||||||
|
|
||||||
nhits := int(C.ManberAll(sequence.pointer.pointer,
|
nhits := int(C.ManberAll(sequence.pointer.pointer,
|
||||||
@ -310,13 +315,70 @@ func (pattern ApatPattern) FindAllIndex(sequence ApatSequence, limits ...int) (l
|
|||||||
for i := 0; i < nhits; i++ {
|
for i := 0; i < nhits; i++ {
|
||||||
start := int(stktmp[i])
|
start := int(stktmp[i])
|
||||||
err := int(errtmp[i])
|
err := int(errtmp[i])
|
||||||
|
log.Debugln(C.GoString(pattern.pointer.pointer.cpat), start, err)
|
||||||
loc = append(loc, [3]int{start, start + patlen, err})
|
loc = append(loc, [3]int{start, start + patlen, err})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log.Debugln("------------")
|
||||||
return loc
|
return loc
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (pattern ApatPattern) BestMatch(sequence ApatSequence, begin, length int) (start int, end int, nerr int, matched bool) {
|
||||||
|
res := pattern.FindAllIndex(sequence, begin, length)
|
||||||
|
|
||||||
|
sbuffer := [(int(C.MAX_PAT_LEN) + int(C.MAX_PAT_ERR) + 1) * (int(C.MAX_PAT_LEN) + 1)]uint64{}
|
||||||
|
buffer := sbuffer[:]
|
||||||
|
|
||||||
|
if len(res) == 0 {
|
||||||
|
matched = false
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
matched = true
|
||||||
|
|
||||||
|
best := [3]int{0, 0, 10000}
|
||||||
|
for _, m := range res {
|
||||||
|
if m[2] < best[2] {
|
||||||
|
best = m
|
||||||
|
log.Debugln(best)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
nerr = best[2]
|
||||||
|
end = best[1]
|
||||||
|
|
||||||
|
if nerr == 0 || !pattern.pointer.pointer.hasIndel {
|
||||||
|
start = best[0]
|
||||||
|
log.Debugln("No nws ", start, nerr)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
start = best[0] - nerr
|
||||||
|
end = best[0] + int(pattern.pointer.pointer.patlen) + nerr
|
||||||
|
start = goutils.MaxInt(start, 0)
|
||||||
|
end = goutils.MinInt(end, sequence.Len())
|
||||||
|
|
||||||
|
cpattern := (*[1 << 30]byte)(unsafe.Pointer(pattern.pointer.pointer.cpat))
|
||||||
|
cseq := (*[1 << 30]byte)(unsafe.Pointer(sequence.pointer.pointer.cseq))
|
||||||
|
|
||||||
|
log.Debugln(
|
||||||
|
string((*cseq)[start:end]),
|
||||||
|
string((*cpattern)[0:int(pattern.pointer.pointer.patlen)]),
|
||||||
|
best[0], nerr, int(pattern.pointer.pointer.patlen),
|
||||||
|
sequence.Len(), start, end)
|
||||||
|
|
||||||
|
score, lali := obialign.FastLCSEGFScoreByte(
|
||||||
|
(*cseq)[start:end],
|
||||||
|
(*cpattern)[0:int(pattern.pointer.pointer.patlen)],
|
||||||
|
nerr*2, true, &buffer)
|
||||||
|
|
||||||
|
nerr = lali - score
|
||||||
|
start = best[0] + int(pattern.pointer.pointer.patlen) - lali
|
||||||
|
log.Println("results", score, lali, start, nerr)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// tagaacaggctcctctag
|
||||||
// func AllocatedApaSequences() int {
|
// func AllocatedApaSequences() int {
|
||||||
// return int(_AllocatedApaSequences)
|
// return int(_AllocatedApaSequences)
|
||||||
// }
|
// }
|
||||||
|
@ -133,7 +133,7 @@ func OptionForwardPrimer(primer string, max int) WithOption {
|
|||||||
f := WithOption(func(opt Options) {
|
f := WithOption(func(opt Options) {
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
opt.pointer.forward, err = MakeApatPattern(primer, max)
|
opt.pointer.forward, err = MakeApatPattern(primer, max, false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("error : %v\n", err)
|
log.Fatalf("error : %v\n", err)
|
||||||
}
|
}
|
||||||
@ -155,7 +155,7 @@ func OptionReversePrimer(primer string, max int) WithOption {
|
|||||||
f := WithOption(func(opt Options) {
|
f := WithOption(func(opt Options) {
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
opt.pointer.reverse, err = MakeApatPattern(primer, max)
|
opt.pointer.reverse, err = MakeApatPattern(primer, max, false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("error : %v\n", err)
|
log.Fatalf("error : %v\n", err)
|
||||||
}
|
}
|
||||||
@ -210,7 +210,7 @@ func _Pcr(seq ApatSequence,
|
|||||||
reverse := opt.pointer.reverse
|
reverse := opt.pointer.reverse
|
||||||
crev := opt.pointer.crev
|
crev := opt.pointer.crev
|
||||||
|
|
||||||
forwardMatches := forward.FindAllIndex(seq)
|
forwardMatches := forward.FindAllIndex(seq,0,-1)
|
||||||
|
|
||||||
if len(forwardMatches) > 0 {
|
if len(forwardMatches) > 0 {
|
||||||
|
|
||||||
@ -284,7 +284,7 @@ func _Pcr(seq ApatSequence,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
forwardMatches = reverse.FindAllIndex(seq)
|
forwardMatches = reverse.FindAllIndex(seq,0,-1)
|
||||||
if forwardMatches != nil {
|
if forwardMatches != nil {
|
||||||
|
|
||||||
begin := forwardMatches[0][0]
|
begin := forwardMatches[0][0]
|
||||||
|
@ -27,11 +27,11 @@ type DemultiplexMatch struct {
|
|||||||
Error error
|
Error error
|
||||||
}
|
}
|
||||||
|
|
||||||
func (library *NGSLibrary) Compile(maxError int) error {
|
func (library *NGSLibrary) Compile(maxError int, allowsIndel bool) error {
|
||||||
for primers, marker := range *library {
|
for primers, marker := range *library {
|
||||||
err := marker.Compile(primers.Forward,
|
err := marker.Compile(primers.Forward,
|
||||||
primers.Reverse,
|
primers.Reverse,
|
||||||
maxError)
|
maxError, allowsIndel)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -57,15 +57,13 @@ func (library *NGSLibrary) ExtractBarcode(sequence *obiseq.BioSequence, inplace
|
|||||||
return match.ExtractBarcode(sequence, inplace)
|
return match.ExtractBarcode(sequence, inplace)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (marker *Marker) Compile(forward, reverse string, maxError int) error {
|
func (marker *Marker) Compile(forward, reverse string, maxError int, allowsIndel bool) error {
|
||||||
var err error
|
var err error
|
||||||
marker.forward, err = obiapat.MakeApatPattern(forward,
|
marker.forward, err = obiapat.MakeApatPattern(forward, maxError, allowsIndel)
|
||||||
maxError)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
marker.reverse, err = obiapat.MakeApatPattern(reverse,
|
marker.reverse, err = obiapat.MakeApatPattern(reverse, maxError, allowsIndel)
|
||||||
maxError)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -105,33 +103,34 @@ func (marker *Marker) Compile(forward, reverse string, maxError int) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
func (marker *Marker) Match(sequence *obiseq.BioSequence) *DemultiplexMatch {
|
func (marker *Marker) Match(sequence *obiseq.BioSequence) *DemultiplexMatch {
|
||||||
aseq, _ := obiapat.MakeApatSequence(sequence, false)
|
aseq, _ := obiapat.MakeApatSequence(sequence, false)
|
||||||
match := marker.forward.FindAllIndex(aseq, marker.taglength)
|
start, end, nerr ,matched := marker.forward.BestMatch(aseq, marker.taglength,-1)
|
||||||
|
if matched {
|
||||||
if len(match) > 0 {
|
|
||||||
sseq := sequence.String()
|
sseq := sequence.String()
|
||||||
direct := sseq[match[0][0]:match[0][1]]
|
direct := sseq[start:end]
|
||||||
ftag := strings.ToLower(sseq[(match[0][0] - marker.taglength):match[0][0]])
|
ftag := strings.ToLower(sseq[(start - marker.taglength):start])
|
||||||
|
|
||||||
m := DemultiplexMatch{
|
m := DemultiplexMatch{
|
||||||
ForwardMatch: direct,
|
ForwardMatch: direct,
|
||||||
ForwardTag: ftag,
|
ForwardTag: ftag,
|
||||||
BarcodeStart: match[0][1],
|
BarcodeStart: end,
|
||||||
ForwardMismatches: match[0][2],
|
ForwardMismatches: nerr,
|
||||||
IsDirect: true,
|
IsDirect: true,
|
||||||
Error: nil,
|
Error: nil,
|
||||||
}
|
}
|
||||||
|
|
||||||
rmatch := marker.creverse.FindAllIndex(aseq, match[0][1])
|
start, end, nerr ,matched = marker.creverse.BestMatch(aseq, start,-1)
|
||||||
|
|
||||||
if len(rmatch) > 0 {
|
if matched {
|
||||||
|
|
||||||
// extracting primer matches
|
// extracting primer matches
|
||||||
reverse, _ := sequence.Subsequence(rmatch[0][0], rmatch[0][1], false)
|
reverse, _ := sequence.Subsequence(start,end, false)
|
||||||
defer reverse.Recycle()
|
defer reverse.Recycle()
|
||||||
reverse = reverse.ReverseComplement(true)
|
reverse = reverse.ReverseComplement(true)
|
||||||
rtag, err := sequence.Subsequence(rmatch[0][1], rmatch[0][1]+marker.taglength, false)
|
rtag, err := sequence.Subsequence(end, end+marker.taglength, false)
|
||||||
defer rtag.Recycle()
|
defer rtag.Recycle()
|
||||||
srtag := ""
|
srtag := ""
|
||||||
|
|
||||||
@ -143,8 +142,8 @@ func (marker *Marker) Match(sequence *obiseq.BioSequence) *DemultiplexMatch {
|
|||||||
}
|
}
|
||||||
|
|
||||||
m.ReverseMatch = strings.ToLower(reverse.String())
|
m.ReverseMatch = strings.ToLower(reverse.String())
|
||||||
m.ReverseMismatches = rmatch[0][2]
|
m.ReverseMismatches = nerr
|
||||||
m.BarcodeEnd = rmatch[0][0]
|
m.BarcodeEnd = start
|
||||||
m.ReverseTag = srtag
|
m.ReverseTag = srtag
|
||||||
|
|
||||||
sample, ok := marker.samples[TagPair{ftag, srtag}]
|
sample, ok := marker.samples[TagPair{ftag, srtag}]
|
||||||
@ -162,32 +161,32 @@ func (marker *Marker) Match(sequence *obiseq.BioSequence) *DemultiplexMatch {
|
|||||||
return &m
|
return &m
|
||||||
}
|
}
|
||||||
|
|
||||||
match = marker.reverse.FindAllIndex(aseq, marker.taglength)
|
start, end, nerr ,matched = marker.reverse.BestMatch(aseq, marker.taglength,-1)
|
||||||
|
|
||||||
if len(match) > 0 {
|
if matched {
|
||||||
sseq := sequence.String()
|
sseq := sequence.String()
|
||||||
|
|
||||||
reverse := strings.ToLower(sseq[match[0][0]:match[0][1]])
|
reverse := strings.ToLower(sseq[start:end])
|
||||||
rtag := strings.ToLower(sseq[(match[0][0] - marker.taglength):match[0][0]])
|
rtag := strings.ToLower(sseq[(start - marker.taglength):start])
|
||||||
|
|
||||||
m := DemultiplexMatch{
|
m := DemultiplexMatch{
|
||||||
ReverseMatch: reverse,
|
ReverseMatch: reverse,
|
||||||
ReverseTag: rtag,
|
ReverseTag: rtag,
|
||||||
BarcodeStart: match[0][1],
|
BarcodeStart: end,
|
||||||
ReverseMismatches: match[0][2],
|
ReverseMismatches: nerr,
|
||||||
IsDirect: false,
|
IsDirect: false,
|
||||||
Error: nil,
|
Error: nil,
|
||||||
}
|
}
|
||||||
|
|
||||||
rmatch := marker.cforward.FindAllIndex(aseq, match[0][1])
|
start, end, nerr ,matched = marker.cforward.BestMatch(aseq, end,-1)
|
||||||
|
|
||||||
if len(rmatch) > 0 {
|
if matched {
|
||||||
|
|
||||||
direct, _ := sequence.Subsequence(rmatch[0][0], rmatch[0][1], false)
|
direct, _ := sequence.Subsequence(start,end, false)
|
||||||
defer direct.Recycle()
|
defer direct.Recycle()
|
||||||
direct = direct.ReverseComplement(true)
|
direct = direct.ReverseComplement(true)
|
||||||
|
|
||||||
ftag, err := sequence.Subsequence(rmatch[0][1], rmatch[0][1]+marker.taglength, false)
|
ftag, err := sequence.Subsequence(end,end+marker.taglength, false)
|
||||||
defer ftag.Recycle()
|
defer ftag.Recycle()
|
||||||
sftag := ""
|
sftag := ""
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -200,8 +199,8 @@ func (marker *Marker) Match(sequence *obiseq.BioSequence) *DemultiplexMatch {
|
|||||||
|
|
||||||
m.ForwardMatch = direct.String()
|
m.ForwardMatch = direct.String()
|
||||||
m.ForwardTag = sftag
|
m.ForwardTag = sftag
|
||||||
m.ReverseMismatches = rmatch[0][2]
|
m.ReverseMismatches = nerr
|
||||||
m.BarcodeEnd = rmatch[0][0]
|
m.BarcodeEnd = start
|
||||||
|
|
||||||
sample, ok := marker.samples[TagPair{sftag, rtag}]
|
sample, ok := marker.samples[TagPair{sftag, rtag}]
|
||||||
|
|
||||||
|
@ -8,6 +8,7 @@ type _Options struct {
|
|||||||
discardErrors bool
|
discardErrors bool
|
||||||
unidentified string
|
unidentified string
|
||||||
allowedMismatch int
|
allowedMismatch int
|
||||||
|
allowsIndel bool
|
||||||
withProgressBar bool
|
withProgressBar bool
|
||||||
parallelWorkers int
|
parallelWorkers int
|
||||||
batchSize int
|
batchSize int
|
||||||
@ -55,6 +56,14 @@ func OptionAllowedMismatches(count int) WithOption {
|
|||||||
return f
|
return f
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func OptionAllowedIndel(allowed bool) WithOption {
|
||||||
|
f := WithOption(func(opt Options) {
|
||||||
|
opt.pointer.allowsIndel = allowed
|
||||||
|
})
|
||||||
|
|
||||||
|
return f
|
||||||
|
}
|
||||||
|
|
||||||
// OptionParallelWorkers sets how many search
|
// OptionParallelWorkers sets how many search
|
||||||
// jobs will be run in parallel.
|
// jobs will be run in parallel.
|
||||||
func OptionParallelWorkers(nworkers int) WithOption {
|
func OptionParallelWorkers(nworkers int) WithOption {
|
||||||
@ -87,6 +96,10 @@ func (options Options) AllowedMismatch() int {
|
|||||||
return options.pointer.allowedMismatch
|
return options.pointer.allowedMismatch
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (options Options) AllowsIndel() bool {
|
||||||
|
return options.pointer.allowsIndel
|
||||||
|
}
|
||||||
|
|
||||||
func (options Options) WithProgressBar() bool {
|
func (options Options) WithProgressBar() bool {
|
||||||
return options.pointer.withProgressBar
|
return options.pointer.withProgressBar
|
||||||
}
|
}
|
||||||
@ -110,6 +123,7 @@ func MakeOptions(setters []WithOption) Options {
|
|||||||
discardErrors: true,
|
discardErrors: true,
|
||||||
unidentified: "",
|
unidentified: "",
|
||||||
allowedMismatch: 0,
|
allowedMismatch: 0,
|
||||||
|
allowsIndel: false,
|
||||||
withProgressBar: false,
|
withProgressBar: false,
|
||||||
parallelWorkers: 4,
|
parallelWorkers: 4,
|
||||||
batchSize: 1000,
|
batchSize: 1000,
|
||||||
@ -145,7 +159,7 @@ func ExtractBarcodeSlice(ngslibrary NGSLibrary,
|
|||||||
|
|
||||||
opt := MakeOptions(options)
|
opt := MakeOptions(options)
|
||||||
|
|
||||||
ngslibrary.Compile(opt.AllowedMismatch())
|
ngslibrary.Compile(opt.AllowedMismatch(),opt.AllowsIndel())
|
||||||
|
|
||||||
return _ExtractBarcodeSlice(ngslibrary, sequences, opt)
|
return _ExtractBarcodeSlice(ngslibrary, sequences, opt)
|
||||||
}
|
}
|
||||||
@ -155,7 +169,7 @@ func ExtractBarcodeSliceWorker(ngslibrary NGSLibrary,
|
|||||||
|
|
||||||
opt := MakeOptions(options)
|
opt := MakeOptions(options)
|
||||||
|
|
||||||
ngslibrary.Compile(opt.AllowedMismatch())
|
ngslibrary.Compile(opt.AllowedMismatch(),opt.AllowsIndel())
|
||||||
|
|
||||||
worker := func(sequences obiseq.BioSequenceSlice) obiseq.BioSequenceSlice {
|
worker := func(sequences obiseq.BioSequenceSlice) obiseq.BioSequenceSlice {
|
||||||
return _ExtractBarcodeSlice(ngslibrary, sequences, opt)
|
return _ExtractBarcodeSlice(ngslibrary, sequences, opt)
|
||||||
|
46
pkg/obiseq/compare.go
Normal file
46
pkg/obiseq/compare.go
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
package obiseq
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Compare func(a, b *BioSequence) int
|
||||||
|
|
||||||
|
func CompareSequence(a, b *BioSequence) int {
|
||||||
|
return bytes.Compare(a.sequence, b.sequence)
|
||||||
|
}
|
||||||
|
|
||||||
|
func CompareQuality(a, b *BioSequence) int {
|
||||||
|
return bytes.Compare(a.qualities, b.qualities)
|
||||||
|
}
|
||||||
|
|
||||||
|
// func CompareAttributeBuillder(key string) Compare {
|
||||||
|
// f := func(a, b *BioSequence) int {
|
||||||
|
// ak, oka := a.GetAttribute(key)
|
||||||
|
// bk, okb := b.GetAttribute(key)
|
||||||
|
|
||||||
|
// switch {
|
||||||
|
// case !oka && !okb:
|
||||||
|
// return 0
|
||||||
|
// case !oka:
|
||||||
|
// return -1
|
||||||
|
// case !okb:
|
||||||
|
// return +1
|
||||||
|
// }
|
||||||
|
|
||||||
|
// //av,oka := ak.(constraints.Ordered)
|
||||||
|
// //bv,okb := bk.(constraints.Ordered)
|
||||||
|
|
||||||
|
// switch {
|
||||||
|
// case !oka && !okb:
|
||||||
|
// return 0
|
||||||
|
// case !oka:
|
||||||
|
// return -1
|
||||||
|
// case !okb:
|
||||||
|
// return +1
|
||||||
|
// }
|
||||||
|
|
||||||
|
// }
|
||||||
|
|
||||||
|
// return f
|
||||||
|
// }
|
@ -16,6 +16,7 @@ func IExtractBarcode(iterator obiiter.IBioSequence) (obiiter.IBioSequence, error
|
|||||||
|
|
||||||
opts = append(opts,
|
opts = append(opts,
|
||||||
obingslibrary.OptionAllowedMismatches(CLIAllowedMismatch()),
|
obingslibrary.OptionAllowedMismatches(CLIAllowedMismatch()),
|
||||||
|
obingslibrary.OptionAllowedIndel(CLIAllowsIndel()),
|
||||||
obingslibrary.OptionUnidentified(CLIUnidentifiedFileName()),
|
obingslibrary.OptionUnidentified(CLIUnidentifiedFileName()),
|
||||||
obingslibrary.OptionDiscardErrors(!CLIConservedErrors()),
|
obingslibrary.OptionDiscardErrors(!CLIConservedErrors()),
|
||||||
obingslibrary.OptionParallelWorkers(obioptions.CLIParallelWorkers()),
|
obingslibrary.OptionParallelWorkers(obioptions.CLIParallelWorkers()),
|
||||||
|
@ -13,6 +13,7 @@ import (
|
|||||||
var _NGSFilterFile = ""
|
var _NGSFilterFile = ""
|
||||||
var _UnidentifiedFile = ""
|
var _UnidentifiedFile = ""
|
||||||
var _AllowedMismatch = int(2)
|
var _AllowedMismatch = int(2)
|
||||||
|
var _AllowsIndel = false
|
||||||
var _ConservedError = false
|
var _ConservedError = false
|
||||||
|
|
||||||
// PCROptionSet defines every options related to a simulated PCR.
|
// PCROptionSet defines every options related to a simulated PCR.
|
||||||
@ -33,6 +34,9 @@ func MultiplexOptionSet(options *getoptions.GetOpt) {
|
|||||||
options.BoolVar(&_ConservedError, "keep-errors", _ConservedError,
|
options.BoolVar(&_ConservedError, "keep-errors", _ConservedError,
|
||||||
options.Description("Prints symbol counts."))
|
options.Description("Prints symbol counts."))
|
||||||
|
|
||||||
|
options.BoolVar(&_AllowsIndel, "with-indels", _AllowsIndel,
|
||||||
|
options.Description("Allows for indels during the primers matching."))
|
||||||
|
|
||||||
options.StringVar(&_UnidentifiedFile, "unidentified", _UnidentifiedFile,
|
options.StringVar(&_UnidentifiedFile, "unidentified", _UnidentifiedFile,
|
||||||
options.Alias("u"),
|
options.Alias("u"),
|
||||||
options.Description("Filename used to store the sequences unassigned to any sample."))
|
options.Description("Filename used to store the sequences unassigned to any sample."))
|
||||||
@ -52,6 +56,9 @@ func CLIAllowedMismatch() int {
|
|||||||
return _AllowedMismatch
|
return _AllowedMismatch
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func CLIAllowsIndel() bool {
|
||||||
|
return _AllowsIndel
|
||||||
|
}
|
||||||
func CLIUnidentifiedFileName() string {
|
func CLIUnidentifiedFileName() string {
|
||||||
return _UnidentifiedFile
|
return _UnidentifiedFile
|
||||||
}
|
}
|
||||||
|
@ -59,7 +59,7 @@ func OptionSet(options *getoptions.GetOpt) {
|
|||||||
// ForwardPrimer returns the sequence of the forward primer as indicated by the
|
// ForwardPrimer returns the sequence of the forward primer as indicated by the
|
||||||
// --forward command line option
|
// --forward command line option
|
||||||
func ForwardPrimer() string {
|
func ForwardPrimer() string {
|
||||||
pattern, err := obiapat.MakeApatPattern(_ForwardPrimer, _AllowedMismatch)
|
pattern, err := obiapat.MakeApatPattern(_ForwardPrimer, _AllowedMismatch, false)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("%+v", err)
|
log.Fatalf("%+v", err)
|
||||||
@ -73,7 +73,7 @@ func ForwardPrimer() string {
|
|||||||
// ReversePrimer returns the sequence of the reverse primer as indicated by the
|
// ReversePrimer returns the sequence of the reverse primer as indicated by the
|
||||||
// --reverse command line option
|
// --reverse command line option
|
||||||
func ReversePrimer() string {
|
func ReversePrimer() string {
|
||||||
pattern, err := obiapat.MakeApatPattern(_ReversePrimer, _AllowedMismatch)
|
pattern, err := obiapat.MakeApatPattern(_ReversePrimer, _AllowedMismatch,false)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("%+v", err)
|
log.Fatalf("%+v", err)
|
||||||
|
Reference in New Issue
Block a user