mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-12-08 08:40:26 +00:00
First commit
This commit is contained in:
167
pkg/obialign/alignment.go
Normal file
167
pkg/obialign/alignment.go
Normal file
@@ -0,0 +1,167 @@
|
||||
package obialign
|
||||
|
||||
import (
|
||||
"math"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obiseq"
|
||||
)
|
||||
|
||||
type __build_align_arena__ struct {
|
||||
bufferA []byte
|
||||
bufferB []byte
|
||||
}
|
||||
|
||||
type BuildAlignArena struct {
|
||||
pointer *__build_align_arena__
|
||||
}
|
||||
|
||||
var NilBuildAlignArena = BuildAlignArena{nil}
|
||||
|
||||
func MakeBuildAlignArena(lseqA, lseqB int) BuildAlignArena {
|
||||
a := __build_align_arena__{
|
||||
bufferA: make([]byte, lseqA+lseqB),
|
||||
bufferB: make([]byte, lseqA+lseqB),
|
||||
}
|
||||
|
||||
return BuildAlignArena{&a}
|
||||
}
|
||||
|
||||
func __build_alignment__(seqA, seqB []byte, path []int, gap byte,
|
||||
bufferA, bufferB *[]byte) ([]byte, []byte) {
|
||||
|
||||
if bufferA == nil {
|
||||
b := make([]byte, 0, len(seqA)+len(seqB))
|
||||
bufferA = &b
|
||||
}
|
||||
|
||||
if bufferB == nil {
|
||||
b := make([]byte, 0, len(seqA)+len(seqB))
|
||||
bufferB = &b
|
||||
}
|
||||
|
||||
*bufferA = (*bufferA)[:0]
|
||||
*bufferB = (*bufferB)[:0]
|
||||
|
||||
lp := len(path)
|
||||
|
||||
pos_a := 0
|
||||
pos_b := 0
|
||||
for i := 0; i < lp; i++ {
|
||||
step := path[i]
|
||||
if step < 0 {
|
||||
*bufferA = append(*bufferA, seqA[pos_a:(pos_a-step)]...)
|
||||
for j := 0; j < -step; j++ {
|
||||
*bufferB = append(*bufferB, gap)
|
||||
}
|
||||
pos_a -= step
|
||||
}
|
||||
if step > 0 {
|
||||
*bufferB = append(*bufferB, seqB[pos_b:(pos_b+step)]...)
|
||||
for j := 0; j < step; j++ {
|
||||
*bufferA = append(*bufferA, gap)
|
||||
}
|
||||
pos_b += step
|
||||
}
|
||||
|
||||
i++
|
||||
step = path[i]
|
||||
if step > 0 {
|
||||
*bufferA = append(*bufferA, seqA[pos_a:(pos_a+step)]...)
|
||||
*bufferB = append(*bufferB, seqB[pos_b:(pos_b+step)]...)
|
||||
pos_a += step
|
||||
pos_b += step
|
||||
}
|
||||
}
|
||||
|
||||
return *bufferA, *bufferB
|
||||
}
|
||||
|
||||
func BuildAlignment(seqA, seqB obiseq.BioSequence,
|
||||
path []int, gap byte, arena BuildAlignArena) (obiseq.BioSequence, obiseq.BioSequence) {
|
||||
|
||||
if arena.pointer == nil {
|
||||
arena = MakeBuildAlignArena(seqA.Length(), seqB.Length())
|
||||
}
|
||||
|
||||
A, B := __build_alignment__(seqA.Sequence(), seqB.Sequence(), path, gap,
|
||||
&arena.pointer.bufferA,
|
||||
&arena.pointer.bufferB)
|
||||
|
||||
seqA = obiseq.MakeBioSequence(seqA.Id(),
|
||||
A,
|
||||
seqA.Definition())
|
||||
|
||||
seqB = obiseq.MakeBioSequence(seqB.Id(),
|
||||
B,
|
||||
seqB.Definition())
|
||||
|
||||
return seqA, seqB
|
||||
}
|
||||
|
||||
func BuildQualityConsensus(seqA, seqB obiseq.BioSequence, path []int,
|
||||
arena1, arena2 BuildAlignArena) (obiseq.BioSequence, int) {
|
||||
|
||||
if arena1.pointer == nil {
|
||||
arena1 = MakeBuildAlignArena(seqA.Length(), seqB.Length())
|
||||
}
|
||||
if arena2.pointer == nil {
|
||||
arena2 = MakeBuildAlignArena(seqA.Length(), seqB.Length())
|
||||
}
|
||||
|
||||
sA, sB := __build_alignment__(seqA.Sequence(), seqB.Sequence(), path, ' ',
|
||||
&arena1.pointer.bufferA,
|
||||
&arena1.pointer.bufferB)
|
||||
|
||||
qsA, qsB := __build_alignment__(seqA.Qualities(), seqB.Qualities(), path, byte(0),
|
||||
&arena2.pointer.bufferA,
|
||||
&arena2.pointer.bufferB)
|
||||
|
||||
consensus := make([]byte, 0, len(sA))
|
||||
qualities := make([]byte, 0, len(sA))
|
||||
|
||||
var qA, qB byte
|
||||
var qM, qm byte
|
||||
var i int
|
||||
|
||||
match := 0
|
||||
|
||||
for i, qA = range qsA {
|
||||
qB = qsB[i]
|
||||
|
||||
if qA > qB {
|
||||
consensus = append(consensus, sA[i])
|
||||
qM = qA
|
||||
qm = qB
|
||||
}
|
||||
if qB > qA {
|
||||
consensus = append(consensus, sB[i])
|
||||
qM = qB
|
||||
qm = qA
|
||||
}
|
||||
if qB == qA {
|
||||
nuc := __four_bits_base_code__[sA[i]&31] | __four_bits_base_code__[sB[i]&31]
|
||||
consensus = append(consensus, __four_bits_base_decode__[nuc])
|
||||
}
|
||||
|
||||
q := qA + qB
|
||||
|
||||
if qA > 0 && qB > 0 {
|
||||
if sA[i] != sB[i] {
|
||||
q = qM - byte(math.Log10(1-math.Pow(10, -float64(qm)/30))*10+0.5)
|
||||
}
|
||||
if sA[i] == sB[i] {
|
||||
match++
|
||||
}
|
||||
}
|
||||
|
||||
if q > 90 {
|
||||
q = 90
|
||||
}
|
||||
qualities = append(qualities, q)
|
||||
}
|
||||
|
||||
seq := obiseq.MakeBioSequence(seqA.Id(), consensus, seqA.Definition())
|
||||
seq.SetQualities(qualities)
|
||||
|
||||
return seq, match
|
||||
}
|
||||
95
pkg/obialign/backtracking.go
Normal file
95
pkg/obialign/backtracking.go
Normal file
@@ -0,0 +1,95 @@
|
||||
package obialign
|
||||
|
||||
func __backtracking__(path_matrix []int, lseqA, lseqB int, path *[]int) []int {
|
||||
|
||||
needed := (lseqA + lseqB) * 2
|
||||
|
||||
if needed > cap(*path) {
|
||||
*path = make([]int, 0, needed)
|
||||
}
|
||||
|
||||
*path = (*path)[:cap(*path)]
|
||||
p := cap(*path)
|
||||
|
||||
i := lseqA - 1
|
||||
j := lseqB - 1
|
||||
|
||||
ldiag := 0
|
||||
lup := 0
|
||||
lleft := 0
|
||||
|
||||
for i > -1 || j > -1 {
|
||||
step := __get_matrix__(&path_matrix, lseqA, i, j)
|
||||
// log.Printf("I: %d J:%d -> %d\n", i, j, step)
|
||||
|
||||
switch {
|
||||
case step == 0:
|
||||
if lleft != 0 {
|
||||
p--
|
||||
(*path)[p] = ldiag
|
||||
p--
|
||||
(*path)[p] = lleft
|
||||
lleft = 0
|
||||
ldiag = 0
|
||||
}
|
||||
if lup != 0 {
|
||||
p--
|
||||
(*path)[p] = ldiag
|
||||
p--
|
||||
(*path)[p] = lup
|
||||
lup = 0
|
||||
ldiag = 0
|
||||
}
|
||||
ldiag++
|
||||
i--
|
||||
j--
|
||||
case step > 0:
|
||||
if lup != 0 {
|
||||
p--
|
||||
(*path)[p] = ldiag
|
||||
p--
|
||||
(*path)[p] = lup
|
||||
lup = 0
|
||||
ldiag = 0
|
||||
}
|
||||
lleft += step
|
||||
j -= step
|
||||
case step < 0:
|
||||
if lleft != 0 {
|
||||
p--
|
||||
(*path)[p] = ldiag
|
||||
p--
|
||||
(*path)[p] = lleft
|
||||
lleft = 0
|
||||
ldiag = 0
|
||||
}
|
||||
lup += step
|
||||
i += step
|
||||
}
|
||||
}
|
||||
|
||||
if lleft != 0 {
|
||||
p--
|
||||
(*path)[p] = ldiag
|
||||
p--
|
||||
(*path)[p] = lleft
|
||||
ldiag = 0
|
||||
}
|
||||
if lup != 0 {
|
||||
p--
|
||||
(*path)[p] = ldiag
|
||||
p--
|
||||
(*path)[p] = lup
|
||||
ldiag = 0
|
||||
}
|
||||
if ldiag != 0 {
|
||||
p--
|
||||
(*path)[p] = ldiag
|
||||
p--
|
||||
(*path)[p] = 0
|
||||
}
|
||||
|
||||
*path = (*path)[p:cap((*path))]
|
||||
|
||||
return *path
|
||||
}
|
||||
100
pkg/obialign/dnamatrix.go
Normal file
100
pkg/obialign/dnamatrix.go
Normal file
@@ -0,0 +1,100 @@
|
||||
package obialign
|
||||
|
||||
import (
|
||||
"math"
|
||||
)
|
||||
|
||||
var __four_bits_count__ = []float64{
|
||||
0, // 0000
|
||||
1, // 0001
|
||||
1, // 0010
|
||||
2, // 0011
|
||||
1, // 0100
|
||||
2, // 0101
|
||||
2, // 0110
|
||||
3, // 0111
|
||||
1, // 1000
|
||||
2, // 1001
|
||||
2, // 1010
|
||||
3, // 1011
|
||||
2, // 1100
|
||||
3, // 1101
|
||||
3, // 1110
|
||||
4, // 1111
|
||||
}
|
||||
|
||||
var __initialized_dna_score__ = false
|
||||
|
||||
var __nuc_part_match__ [32][32]float64
|
||||
var __nuc_score_part_match_match__ [100][100]int
|
||||
var __nuc_score_part_match_mismatch__ [100][100]int
|
||||
|
||||
func __match_ratio__(a, b byte) float64 {
|
||||
// count of common bits
|
||||
cm := __four_bits_count__[a&b&15]
|
||||
|
||||
ca := __four_bits_count__[a&15]
|
||||
cb := __four_bits_count__[b&15]
|
||||
|
||||
if cm == 0 || ca == 0 || cb == 0 {
|
||||
return float64(0)
|
||||
}
|
||||
|
||||
return float64(cm) / float64(ca) / float64(cb)
|
||||
}
|
||||
|
||||
func __logaddexp__(a, b float64) float64 {
|
||||
if a > b {
|
||||
a, b = b, a
|
||||
}
|
||||
|
||||
return b + math.Log1p(math.Exp(a-b))
|
||||
}
|
||||
|
||||
func __match_score_ratio__(a, b byte) (float64, float64) {
|
||||
|
||||
l2 := math.Log(2)
|
||||
l3 := math.Log(3)
|
||||
l4 := math.Log(4)
|
||||
l10 := math.Log(10)
|
||||
lE1 := -float64(a)/10*l10 - l4
|
||||
lE2 := -float64(b)/10*l10 - l4
|
||||
lO1 := math.Log1p(-math.Exp(lE1 + l3))
|
||||
lO2 := math.Log1p(-math.Exp(lE2 + l3))
|
||||
lO1O2 := lO1 + lO2
|
||||
lE1E2 := lE1 + lE2
|
||||
lO1E2 := lO1 + lE2
|
||||
lO2E1 := lO2 + lE1
|
||||
|
||||
MM := __logaddexp__(lO1O2, lE1E2+l3) + l4
|
||||
Mm := __logaddexp__(__logaddexp__(lO1E2, lO2E1), lE1E2+l2) + l4
|
||||
|
||||
return MM, Mm
|
||||
}
|
||||
|
||||
func __init_nuc_part_match__() {
|
||||
|
||||
for i, a := range __four_bits_base_code__ {
|
||||
for j, b := range __four_bits_base_code__ {
|
||||
__nuc_part_match__[i][j] = __match_ratio__(a, b)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func __init_nuc_score_part_match__() {
|
||||
for i := 0; i < 100; i++ {
|
||||
for j := 0; j < 100; j++ {
|
||||
MM, Mm := __match_score_ratio__(byte(i), byte(j))
|
||||
__nuc_score_part_match_match__[i][j] = int(MM*10 + 0.5)
|
||||
__nuc_score_part_match_mismatch__[i][j] = int(Mm*10 + 0.5)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func InitDNAScoreMatrix() {
|
||||
if !__initialized_dna_score__ {
|
||||
__init_nuc_part_match__()
|
||||
__init_nuc_score_part_match__()
|
||||
__initialized_dna_score__ = true
|
||||
}
|
||||
}
|
||||
74
pkg/obialign/fourbitsencode.go
Normal file
74
pkg/obialign/fourbitsencode.go
Normal file
@@ -0,0 +1,74 @@
|
||||
package obialign
|
||||
|
||||
import (
|
||||
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obiseq"
|
||||
)
|
||||
|
||||
var __four_bits_base_code__ = []byte{0b0000,
|
||||
// IUPAC nucleotide code Base
|
||||
0b0001, // A Adenine
|
||||
0b1110, // B C or G or T
|
||||
0b0010, // C Cytosine
|
||||
0b1101, // D A or G or T
|
||||
0b0000, // E not a nucleotide
|
||||
0b0000, // F not a nucleotide
|
||||
0b0100, // G Guanine
|
||||
0b1011, // H A or C or T
|
||||
0b0000, // I not a nucleotide
|
||||
0b0000, // J not a nucleotide
|
||||
0b1100, // K G or T
|
||||
0b0000, // L not a nucleotide
|
||||
0b0011, // M A or C
|
||||
0b1111, // N any base
|
||||
0b0000, // O not a nucleotide
|
||||
0b0000, // P not a nucleotide
|
||||
0b0000, // Q not a nucleotide
|
||||
0b0101, // R A or G
|
||||
0b0110, // S G or C
|
||||
0b1000, // T Thymine
|
||||
0b1000, // U Uracil
|
||||
0b0111, // V A or C or G
|
||||
0b1001, // W A or T
|
||||
0b0000, // X not a nucleotide
|
||||
0b1010, // Y C or T
|
||||
0b0000, // Z not a nucleotide
|
||||
0b0000,
|
||||
0b0000,
|
||||
0b0000,
|
||||
0b0000,
|
||||
0b0000}
|
||||
|
||||
var __four_bits_base_decode__ = []byte{
|
||||
// 0b0000 0b0001 0b0010 0b0011
|
||||
'.', 'a', 'c', 'm',
|
||||
// 0b0100 0b0101 0b0110 0b0111
|
||||
'g', 'r', 's', 'v',
|
||||
// 0b1000 0b1001 0b1010 0b1011
|
||||
't', 'w', 'y', 'h',
|
||||
// 0b1100 0b1101 0b1110 0b1111
|
||||
'k', 'd', 'b', 'n',
|
||||
}
|
||||
|
||||
func Encode4bits(seq obiseq.BioSequence, buffer []byte) []byte {
|
||||
length := seq.Length()
|
||||
rawseq := seq.Sequence()
|
||||
|
||||
if buffer == nil {
|
||||
buffer = make([]byte, 0, length)
|
||||
} else {
|
||||
buffer = buffer[:0]
|
||||
}
|
||||
|
||||
var code byte
|
||||
|
||||
for _, nuc := range rawseq {
|
||||
if nuc == '.' || nuc == '-' {
|
||||
code = 0
|
||||
} else {
|
||||
code = __four_bits_base_code__[nuc&31]
|
||||
}
|
||||
buffer = append(buffer, code)
|
||||
}
|
||||
|
||||
return buffer
|
||||
}
|
||||
366
pkg/obialign/pairedendalign.go
Normal file
366
pkg/obialign/pairedendalign.go
Normal file
@@ -0,0 +1,366 @@
|
||||
package obialign
|
||||
|
||||
import (
|
||||
"log"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obikmer"
|
||||
"git.metabarcoding.org/lecasofts/go/oa2/pkg/obiseq"
|
||||
)
|
||||
|
||||
type __pe_align_arena__ struct {
|
||||
score_matrix []int
|
||||
path_matrix []int
|
||||
path []int
|
||||
fast_index [][]int
|
||||
fast_buffer []byte
|
||||
}
|
||||
|
||||
type PEAlignArena struct {
|
||||
pointer *__pe_align_arena__
|
||||
}
|
||||
|
||||
var NilPEAlignArena = PEAlignArena{nil}
|
||||
|
||||
func MakePEAlignArena(lseqA, lseqB int) PEAlignArena {
|
||||
a := __pe_align_arena__{
|
||||
score_matrix: make([]int, 0, (lseqA+1)*(lseqB+1)),
|
||||
path_matrix: make([]int, 0, (lseqA+1)*(lseqB+1)),
|
||||
path: make([]int, 2*(lseqA+lseqB)),
|
||||
fast_index: make([][]int, 256),
|
||||
fast_buffer: make([]byte, 0, lseqA),
|
||||
}
|
||||
|
||||
return PEAlignArena{&a}
|
||||
}
|
||||
|
||||
func __set_matrices__(matrixA, matrixB *[]int, lenA, a, b, valueA, valueB int) {
|
||||
i := (b+1)*(lenA+1) + a + 1
|
||||
(*matrixA)[i] = valueA
|
||||
(*matrixB)[i] = valueB
|
||||
}
|
||||
|
||||
func __get_matrix__(matrix *[]int, lenA, a, b int) int {
|
||||
return (*matrix)[(b+1)*(lenA+1)+a+1]
|
||||
}
|
||||
|
||||
func __get_matrix_from__(matrix *[]int, lenA, a, b int) (int, int, int) {
|
||||
i := (b+1)*(lenA+1) + a
|
||||
j := i - lenA
|
||||
m := *matrix
|
||||
return m[j], m[j-1], m[i]
|
||||
}
|
||||
|
||||
func __pairing_score_pe_align__(baseA, qualA, baseB, qualB byte) int {
|
||||
part_match := __nuc_part_match__[baseA&31][baseB&31]
|
||||
// log.Printf("id : %f A : %s %d B : %s %d\n", part_match, string(baseA), qualA, string(baseB), qualB)
|
||||
switch {
|
||||
case part_match == 1:
|
||||
// log.Printf("match\n")
|
||||
return __nuc_score_part_match_match__[qualA][qualB]
|
||||
case part_match == 0:
|
||||
return __nuc_score_part_match_mismatch__[qualA][qualB]
|
||||
default:
|
||||
return int(part_match*float64(__nuc_score_part_match_match__[qualA][qualB]) +
|
||||
(1-part_match)*float64(__nuc_score_part_match_mismatch__[qualA][qualB]) + 0.5)
|
||||
}
|
||||
}
|
||||
|
||||
func __fill_matrix_pe_left_align__(seqA, qualA, seqB, qualB []byte, gap int,
|
||||
score_matrix, path_matrix *[]int) int {
|
||||
|
||||
la := len(seqA)
|
||||
lb := len(seqB)
|
||||
|
||||
// The actual gap score is the gap score times the mismatch between
|
||||
// two bases with a score of 40
|
||||
gap = gap * __nuc_score_part_match_mismatch__[40][40]
|
||||
|
||||
needed := (la + 1) * (lb + 1)
|
||||
|
||||
if needed > cap(*score_matrix) {
|
||||
*score_matrix = make([]int, needed)
|
||||
}
|
||||
|
||||
if needed > cap(*path_matrix) {
|
||||
*path_matrix = make([]int, needed)
|
||||
}
|
||||
|
||||
*score_matrix = (*score_matrix)[:needed]
|
||||
*path_matrix = (*path_matrix)[:needed]
|
||||
|
||||
__set_matrices__(score_matrix, path_matrix, la, -1, -1, 0, 0)
|
||||
|
||||
// Fills the first column with score 0
|
||||
for i := 0; i < la; i++ {
|
||||
__set_matrices__(score_matrix, path_matrix, la, i, -1, 0, -1)
|
||||
}
|
||||
|
||||
la1 := la - 1
|
||||
|
||||
for j := 0; j < lb; j++ {
|
||||
|
||||
__set_matrices__(score_matrix, path_matrix, la, -1, j, (j+1)*gap, 1)
|
||||
|
||||
for i := 0; i < la1; i++ {
|
||||
left, diag, top := __get_matrix_from__(score_matrix, la, i, j)
|
||||
diag += __pairing_score_pe_align__(seqA[i], qualA[i], seqB[j], qualB[j])
|
||||
left += gap
|
||||
top += gap
|
||||
switch {
|
||||
case diag > left && diag > top:
|
||||
__set_matrices__(score_matrix, path_matrix, la, i, j, diag, 0)
|
||||
case left > diag && left > top:
|
||||
__set_matrices__(score_matrix, path_matrix, la, i, j, left, +1)
|
||||
default:
|
||||
__set_matrices__(score_matrix, path_matrix, la, i, j, top, -1)
|
||||
}
|
||||
}
|
||||
|
||||
// Special case for the last line Left gap are free
|
||||
|
||||
left, diag, top := __get_matrix_from__(score_matrix, la, la1, j)
|
||||
diag += __pairing_score_pe_align__(seqA[la1], qualA[la1], seqB[j], qualB[j])
|
||||
top += gap
|
||||
|
||||
switch {
|
||||
case diag > left && diag > top:
|
||||
__set_matrices__(score_matrix, path_matrix, la, la1, j, diag, 0)
|
||||
case left > diag && left > top:
|
||||
__set_matrices__(score_matrix, path_matrix, la, la1, j, left, +1)
|
||||
default:
|
||||
__set_matrices__(score_matrix, path_matrix, la, la1, j, top, -1)
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
return __get_matrix__(score_matrix, la, la1, lb-1)
|
||||
}
|
||||
|
||||
func __fill_matrix_pe_right_align__(seqA, qualA, seqB, qualB []byte, gap int,
|
||||
score_matrix, path_matrix *[]int) int {
|
||||
|
||||
la := len(seqA)
|
||||
lb := len(seqB)
|
||||
|
||||
// The actual gap score is the gap score times the mismatch between
|
||||
// two bases with a score of 40
|
||||
gap = gap * __nuc_score_part_match_mismatch__[40][40]
|
||||
|
||||
needed := (la + 1) * (lb + 1)
|
||||
|
||||
if needed > cap(*score_matrix) {
|
||||
*score_matrix = make([]int, needed)
|
||||
}
|
||||
|
||||
if needed > cap(*path_matrix) {
|
||||
*path_matrix = make([]int, needed)
|
||||
}
|
||||
|
||||
*score_matrix = (*score_matrix)[:needed]
|
||||
*path_matrix = (*path_matrix)[:needed]
|
||||
|
||||
__set_matrices__(score_matrix, path_matrix, la, -1, -1, 0, 0)
|
||||
|
||||
// Fills the first column with score 0
|
||||
for i := 0; i < la; i++ {
|
||||
__set_matrices__(score_matrix, path_matrix, la, i, -1, (i+1)*gap, -1)
|
||||
}
|
||||
|
||||
lb1 := lb - 1
|
||||
|
||||
for j := 0; j < lb1; j++ {
|
||||
|
||||
__set_matrices__(score_matrix, path_matrix, la, -1, j, 0, 1)
|
||||
|
||||
for i := 0; i < la; i++ {
|
||||
left, diag, top := __get_matrix_from__(score_matrix, la, i, j)
|
||||
|
||||
diag += __pairing_score_pe_align__(seqA[i], qualA[i], seqB[j], qualB[j])
|
||||
left += gap
|
||||
top += gap
|
||||
|
||||
switch {
|
||||
case diag > left && left > top:
|
||||
__set_matrices__(score_matrix, path_matrix, la, i, j, diag, 0)
|
||||
case left > diag && left > top:
|
||||
__set_matrices__(score_matrix, path_matrix, la, i, j, left, +1)
|
||||
default:
|
||||
__set_matrices__(score_matrix, path_matrix, la, i, j, top, -1)
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// Special case for the last colump Up gap are free
|
||||
__set_matrices__(score_matrix, path_matrix, la, -1, lb1, 0, 1)
|
||||
|
||||
for i := 0; i < la; i++ {
|
||||
|
||||
left, diag, top := __get_matrix_from__(score_matrix, la, i, lb1)
|
||||
diag += __pairing_score_pe_align__(seqA[i], qualA[i], seqB[lb1], qualB[lb1])
|
||||
left += gap
|
||||
|
||||
switch {
|
||||
case diag > left && diag > top:
|
||||
__set_matrices__(score_matrix, path_matrix, la, i, lb1, diag, 0)
|
||||
case left > diag && left > top:
|
||||
__set_matrices__(score_matrix, path_matrix, la, i, lb1, left, +1)
|
||||
default:
|
||||
__set_matrices__(score_matrix, path_matrix, la, i, lb1, top, -1)
|
||||
}
|
||||
}
|
||||
|
||||
return __get_matrix__(score_matrix, la, la-1, lb1)
|
||||
}
|
||||
|
||||
func PELeftAlign(seqA, seqB obiseq.BioSequence, gap int, arena PEAlignArena) (int, []int) {
|
||||
|
||||
if !__initialized_dna_score__ {
|
||||
log.Println("Initializing the DNA Scoring matrix")
|
||||
InitDNAScoreMatrix()
|
||||
}
|
||||
|
||||
if arena.pointer == nil {
|
||||
arena = MakePEAlignArena(seqA.Length(), seqB.Length())
|
||||
}
|
||||
|
||||
score := __fill_matrix_pe_left_align__(seqA.Sequence(), seqA.Qualities(),
|
||||
seqB.Sequence(), seqB.Qualities(), gap,
|
||||
&arena.pointer.score_matrix,
|
||||
&arena.pointer.path_matrix)
|
||||
|
||||
arena.pointer.path = __backtracking__(arena.pointer.path_matrix,
|
||||
seqA.Length(), seqB.Length(),
|
||||
&arena.pointer.path)
|
||||
|
||||
return score, arena.pointer.path
|
||||
}
|
||||
|
||||
func PERightAlign(seqA, seqB obiseq.BioSequence, gap int, arena PEAlignArena) (int, []int) {
|
||||
|
||||
if !__initialized_dna_score__ {
|
||||
log.Println("Initializing the DNA Scoring matrix")
|
||||
InitDNAScoreMatrix()
|
||||
}
|
||||
|
||||
if arena.pointer == nil {
|
||||
arena = MakePEAlignArena(seqA.Length(), seqB.Length())
|
||||
}
|
||||
|
||||
score := __fill_matrix_pe_right_align__(seqA.Sequence(), seqA.Qualities(),
|
||||
seqB.Sequence(), seqB.Qualities(), gap,
|
||||
&arena.pointer.score_matrix,
|
||||
&arena.pointer.path_matrix)
|
||||
|
||||
arena.pointer.path = __backtracking__(arena.pointer.path_matrix,
|
||||
seqA.Length(), seqB.Length(),
|
||||
&arena.pointer.path)
|
||||
|
||||
return score, arena.pointer.path
|
||||
}
|
||||
|
||||
func PEAlign(seqA, seqB obiseq.BioSequence,
|
||||
gap, delta int,
|
||||
arena PEAlignArena) (int, []int) {
|
||||
var score, shift int
|
||||
var startA, startB int
|
||||
var part_len, over int
|
||||
var raw_seqA, qual_seqA []byte
|
||||
var raw_seqB, qual_seqB []byte
|
||||
var extra5, extra3 int
|
||||
|
||||
if !__initialized_dna_score__ {
|
||||
log.Println("Initializing the DNA Scoring matrix")
|
||||
InitDNAScoreMatrix()
|
||||
}
|
||||
|
||||
index := obikmer.Index4mer(seqA,
|
||||
&arena.pointer.fast_index,
|
||||
&arena.pointer.fast_buffer)
|
||||
|
||||
shift, fast_score := obikmer.FastShiftFourMer(index, seqB, nil)
|
||||
|
||||
if shift > 0 {
|
||||
over = seqA.Length() - shift
|
||||
} else {
|
||||
over = seqB.Length() + shift
|
||||
}
|
||||
|
||||
if fast_score+3 < over {
|
||||
if shift > 0 {
|
||||
startA = shift - delta
|
||||
if startA < 0 {
|
||||
startA = 0
|
||||
}
|
||||
extra5 = -startA
|
||||
startB = 0
|
||||
raw_seqA = seqA.Sequence()[startA:]
|
||||
qual_seqA = seqA.Qualities()[startA:]
|
||||
part_len = len(raw_seqA)
|
||||
raw_seqB = seqB.Sequence()[0:part_len]
|
||||
qual_seqB = seqB.Qualities()[0:part_len]
|
||||
extra3 = seqB.Length() - part_len
|
||||
score = __fill_matrix_pe_left_align__(
|
||||
raw_seqA, qual_seqA, raw_seqB, qual_seqB, gap,
|
||||
&arena.pointer.score_matrix,
|
||||
&arena.pointer.path_matrix)
|
||||
} else {
|
||||
startA = 0
|
||||
startB = -shift - delta
|
||||
if startB < 0 {
|
||||
startB = 0
|
||||
}
|
||||
extra5 = startB
|
||||
raw_seqB = seqB.Sequence()[startB:]
|
||||
qual_seqB = seqB.Qualities()[startB:]
|
||||
part_len = len(raw_seqB)
|
||||
raw_seqA = seqA.Sequence()[:part_len]
|
||||
qual_seqA = seqA.Qualities()[:part_len]
|
||||
extra3 = part_len - seqA.Length()
|
||||
score = __fill_matrix_pe_right_align__(
|
||||
raw_seqA, qual_seqA, raw_seqB, qual_seqB, gap,
|
||||
&arena.pointer.score_matrix,
|
||||
&arena.pointer.path_matrix)
|
||||
}
|
||||
|
||||
arena.pointer.path = __backtracking__(arena.pointer.path_matrix,
|
||||
len(raw_seqA), len(raw_seqB),
|
||||
&arena.pointer.path)
|
||||
|
||||
} else {
|
||||
if shift > 0 {
|
||||
startA = shift
|
||||
startB = 0
|
||||
extra5 = -startA
|
||||
qual_seqA = seqA.Qualities()[startA:]
|
||||
part_len = len(qual_seqA)
|
||||
qual_seqB = seqB.Qualities()[0:part_len]
|
||||
extra3 = seqB.Length() - part_len
|
||||
score = 0
|
||||
} else {
|
||||
startA = 0
|
||||
startB = -shift
|
||||
extra5 = startB
|
||||
qual_seqB = seqB.Qualities()[startB:]
|
||||
part_len = len(qual_seqB)
|
||||
extra3 = part_len - seqA.Length()
|
||||
qual_seqA = seqA.Qualities()[:part_len]
|
||||
}
|
||||
score = 0
|
||||
for i, qualA := range qual_seqA {
|
||||
qualB := qual_seqB[i]
|
||||
score += __nuc_score_part_match_match__[qualA][qualB]
|
||||
}
|
||||
arena.pointer.path = arena.pointer.path[:0]
|
||||
arena.pointer.path = append(arena.pointer.path, 0, part_len)
|
||||
}
|
||||
|
||||
arena.pointer.path[0] += extra5
|
||||
if arena.pointer.path[len(arena.pointer.path)-1] == 0 {
|
||||
arena.pointer.path[len(arena.pointer.path)-2] += extra3
|
||||
} else {
|
||||
arena.pointer.path = append(arena.pointer.path, extra3, 0)
|
||||
}
|
||||
|
||||
return score, arena.pointer.path
|
||||
}
|
||||
Reference in New Issue
Block a user