Code refactoring

This commit is contained in:
2022-01-14 15:46:36 +01:00
parent 2163db94d0
commit 45a1765a03
3 changed files with 95 additions and 87 deletions

View File

@ -11,7 +11,7 @@ type _BuildAlignArena struct {
bufferB []byte bufferB []byte
} }
// BuildAlignArena define memory arena usable by the // BuildAlignArena defines memory arena usable by the
// BuildAlignment function. The same arena can be reused // BuildAlignment function. The same arena can be reused
// from alignment to alignment to limit memory allocation // from alignment to alignment to limit memory allocation
// and desallocation process. // and desallocation process.
@ -23,7 +23,7 @@ type BuildAlignArena struct {
// type. // type.
var NilBuildAlignArena = BuildAlignArena{nil} var NilBuildAlignArena = BuildAlignArena{nil}
// MakeBuildAlignArena make a new arena for aligning two sequences // MakeBuildAlignArena makes a new arena for aligning two sequences
// of maximum length indicated by lseqA and lseqB. // of maximum length indicated by lseqA and lseqB.
func MakeBuildAlignArena(lseqA, lseqB int) BuildAlignArena { func MakeBuildAlignArena(lseqA, lseqB int) BuildAlignArena {
a := _BuildAlignArena{ a := _BuildAlignArena{

View File

@ -19,7 +19,7 @@ func _Backtracking(pathMatrix []int, lseqA, lseqB int, path *[]int) []int {
lleft := 0 lleft := 0
for i > -1 || j > -1 { for i > -1 || j > -1 {
step := __get_matrix__(&pathMatrix, lseqA, i, j) step := _GetMatrix(&pathMatrix, lseqA, i, j)
// log.Printf("I: %d J:%d -> %d\n", i, j, step) // log.Printf("I: %d J:%d -> %d\n", i, j, step)
switch { switch {

View File

@ -7,66 +7,74 @@ import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
) )
type __pe_align_arena__ struct { type _PeAlignArena struct {
score_matrix []int scoreMatrix []int
path_matrix []int pathMatrix []int
path []int path []int
fast_index [][]int fastIndex [][]int
fast_buffer []byte fastBuffer []byte
} }
// PEAlignArena defines memory arena usable by the
// Paired-End alignment related functions. The same arena can be reused
// from alignment to alignment to limit memory allocation
// and desallocation process.
type PEAlignArena struct { type PEAlignArena struct {
pointer *__pe_align_arena__ pointer *_PeAlignArena
} }
// NilPEAlignArena is the nil instance of the PEAlignArena
// type.
var NilPEAlignArena = PEAlignArena{nil} var NilPEAlignArena = PEAlignArena{nil}
// MakePEAlignArena makes a new arena for the alignment of two paired sequences
// of maximum length indicated by lseqA and lseqB.
func MakePEAlignArena(lseqA, lseqB int) PEAlignArena { func MakePEAlignArena(lseqA, lseqB int) PEAlignArena {
a := __pe_align_arena__{ a := _PeAlignArena{
score_matrix: make([]int, 0, (lseqA+1)*(lseqB+1)), scoreMatrix: make([]int, 0, (lseqA+1)*(lseqB+1)),
path_matrix: make([]int, 0, (lseqA+1)*(lseqB+1)), pathMatrix: make([]int, 0, (lseqA+1)*(lseqB+1)),
path: make([]int, 2*(lseqA+lseqB)), path: make([]int, 2*(lseqA+lseqB)),
fast_index: make([][]int, 256), fastIndex: make([][]int, 256),
fast_buffer: make([]byte, 0, lseqA), fastBuffer: make([]byte, 0, lseqA),
} }
return PEAlignArena{&a} return PEAlignArena{&a}
} }
func __set_matrices__(matrixA, matrixB *[]int, lenA, a, b, valueA, valueB int) { func _SetMatrices(matrixA, matrixB *[]int, lenA, a, b, valueA, valueB int) {
i := (b+1)*(lenA+1) + a + 1 i := (b+1)*(lenA+1) + a + 1
(*matrixA)[i] = valueA (*matrixA)[i] = valueA
(*matrixB)[i] = valueB (*matrixB)[i] = valueB
} }
func __get_matrix__(matrix *[]int, lenA, a, b int) int { func _GetMatrix(matrix *[]int, lenA, a, b int) int {
return (*matrix)[(b+1)*(lenA+1)+a+1] return (*matrix)[(b+1)*(lenA+1)+a+1]
} }
func __get_matrix_from__(matrix *[]int, lenA, a, b int) (int, int, int) { func _GetMatrixFrom(matrix *[]int, lenA, a, b int) (int, int, int) {
i := (b+1)*(lenA+1) + a i := (b+1)*(lenA+1) + a
j := i - lenA j := i - lenA
m := *matrix m := *matrix
return m[j], m[j-1], m[i] return m[j], m[j-1], m[i]
} }
func __pairing_score_pe_align__(baseA, qualA, baseB, qualB byte) int { func _PairingScorePeAlign(baseA, qualA, baseB, qualB byte) int {
part_match := _NucPartMatch[baseA&31][baseB&31] partMatch := _NucPartMatch[baseA&31][baseB&31]
// log.Printf("id : %f A : %s %d B : %s %d\n", part_match, string(baseA), qualA, string(baseB), qualB) // log.Printf("id : %f A : %s %d B : %s %d\n", part_match, string(baseA), qualA, string(baseB), qualB)
switch { switch {
case part_match == 1: case partMatch == 1:
// log.Printf("match\n") // log.Printf("match\n")
return _NucScorePartMatchMatch[qualA][qualB] return _NucScorePartMatchMatch[qualA][qualB]
case part_match == 0: case partMatch == 0:
return _NucScorePartMatchMismatch[qualA][qualB] return _NucScorePartMatchMismatch[qualA][qualB]
default: default:
return int(part_match*float64(_NucScorePartMatchMatch[qualA][qualB]) + return int(partMatch*float64(_NucScorePartMatchMatch[qualA][qualB]) +
(1-part_match)*float64(_NucScorePartMatchMismatch[qualA][qualB]) + 0.5) (1-partMatch)*float64(_NucScorePartMatchMismatch[qualA][qualB]) + 0.5)
} }
} }
func __fill_matrix_pe_left_align__(seqA, qualA, seqB, qualB []byte, gap int, func _FillMatrixPeLeftAlign(seqA, qualA, seqB, qualB []byte, gap int,
score_matrix, path_matrix *[]int) int { scoreMatrix, pathMatrix *[]int) int {
la := len(seqA) la := len(seqA)
lb := len(seqB) lb := len(seqB)
@ -77,67 +85,67 @@ func __fill_matrix_pe_left_align__(seqA, qualA, seqB, qualB []byte, gap int,
needed := (la + 1) * (lb + 1) needed := (la + 1) * (lb + 1)
if needed > cap(*score_matrix) { if needed > cap(*scoreMatrix) {
*score_matrix = make([]int, needed) *scoreMatrix = make([]int, needed)
} }
if needed > cap(*path_matrix) { if needed > cap(*pathMatrix) {
*path_matrix = make([]int, needed) *pathMatrix = make([]int, needed)
} }
*score_matrix = (*score_matrix)[:needed] *scoreMatrix = (*scoreMatrix)[:needed]
*path_matrix = (*path_matrix)[:needed] *pathMatrix = (*pathMatrix)[:needed]
__set_matrices__(score_matrix, path_matrix, la, -1, -1, 0, 0) _SetMatrices(scoreMatrix, pathMatrix, la, -1, -1, 0, 0)
// Fills the first column with score 0 // Fills the first column with score 0
for i := 0; i < la; i++ { for i := 0; i < la; i++ {
__set_matrices__(score_matrix, path_matrix, la, i, -1, 0, -1) _SetMatrices(scoreMatrix, pathMatrix, la, i, -1, 0, -1)
} }
la1 := la - 1 la1 := la - 1
for j := 0; j < lb; j++ { for j := 0; j < lb; j++ {
__set_matrices__(score_matrix, path_matrix, la, -1, j, (j+1)*gap, 1) _SetMatrices(scoreMatrix, pathMatrix, la, -1, j, (j+1)*gap, 1)
for i := 0; i < la1; i++ { for i := 0; i < la1; i++ {
left, diag, top := __get_matrix_from__(score_matrix, la, i, j) left, diag, top := _GetMatrixFrom(scoreMatrix, la, i, j)
diag += __pairing_score_pe_align__(seqA[i], qualA[i], seqB[j], qualB[j]) diag += _PairingScorePeAlign(seqA[i], qualA[i], seqB[j], qualB[j])
left += gap left += gap
top += gap top += gap
switch { switch {
case diag > left && diag > top: case diag > left && diag > top:
__set_matrices__(score_matrix, path_matrix, la, i, j, diag, 0) _SetMatrices(scoreMatrix, pathMatrix, la, i, j, diag, 0)
case left > diag && left > top: case left > diag && left > top:
__set_matrices__(score_matrix, path_matrix, la, i, j, left, +1) _SetMatrices(scoreMatrix, pathMatrix, la, i, j, left, +1)
default: default:
__set_matrices__(score_matrix, path_matrix, la, i, j, top, -1) _SetMatrices(scoreMatrix, pathMatrix, la, i, j, top, -1)
} }
} }
// Special case for the last line Left gap are free // Special case for the last line Left gap are free
left, diag, top := __get_matrix_from__(score_matrix, la, la1, j) left, diag, top := _GetMatrixFrom(scoreMatrix, la, la1, j)
diag += __pairing_score_pe_align__(seqA[la1], qualA[la1], seqB[j], qualB[j]) diag += _PairingScorePeAlign(seqA[la1], qualA[la1], seqB[j], qualB[j])
top += gap top += gap
switch { switch {
case diag > left && diag > top: case diag > left && diag > top:
__set_matrices__(score_matrix, path_matrix, la, la1, j, diag, 0) _SetMatrices(scoreMatrix, pathMatrix, la, la1, j, diag, 0)
case left > diag && left > top: case left > diag && left > top:
__set_matrices__(score_matrix, path_matrix, la, la1, j, left, +1) _SetMatrices(scoreMatrix, pathMatrix, la, la1, j, left, +1)
default: default:
__set_matrices__(score_matrix, path_matrix, la, la1, j, top, -1) _SetMatrices(scoreMatrix, pathMatrix, la, la1, j, top, -1)
} }
} }
return __get_matrix__(score_matrix, la, la1, lb-1) return _GetMatrix(scoreMatrix, la, la1, lb-1)
} }
func __fill_matrix_pe_right_align__(seqA, qualA, seqB, qualB []byte, gap int, func _FillMatrixPeRightAlign(seqA, qualA, seqB, qualB []byte, gap int,
score_matrix, path_matrix *[]int) int { scoreMatrix, pathMatrix *[]int) int {
la := len(seqA) la := len(seqA)
lb := len(seqB) lb := len(seqB)
@ -148,69 +156,69 @@ func __fill_matrix_pe_right_align__(seqA, qualA, seqB, qualB []byte, gap int,
needed := (la + 1) * (lb + 1) needed := (la + 1) * (lb + 1)
if needed > cap(*score_matrix) { if needed > cap(*scoreMatrix) {
*score_matrix = make([]int, needed) *scoreMatrix = make([]int, needed)
} }
if needed > cap(*path_matrix) { if needed > cap(*pathMatrix) {
*path_matrix = make([]int, needed) *pathMatrix = make([]int, needed)
} }
*score_matrix = (*score_matrix)[:needed] *scoreMatrix = (*scoreMatrix)[:needed]
*path_matrix = (*path_matrix)[:needed] *pathMatrix = (*pathMatrix)[:needed]
__set_matrices__(score_matrix, path_matrix, la, -1, -1, 0, 0) _SetMatrices(scoreMatrix, pathMatrix, la, -1, -1, 0, 0)
// Fills the first column with score 0 // Fills the first column with score 0
for i := 0; i < la; i++ { for i := 0; i < la; i++ {
__set_matrices__(score_matrix, path_matrix, la, i, -1, (i+1)*gap, -1) _SetMatrices(scoreMatrix, pathMatrix, la, i, -1, (i+1)*gap, -1)
} }
lb1 := lb - 1 lb1 := lb - 1
for j := 0; j < lb1; j++ { for j := 0; j < lb1; j++ {
__set_matrices__(score_matrix, path_matrix, la, -1, j, 0, 1) _SetMatrices(scoreMatrix, pathMatrix, la, -1, j, 0, 1)
for i := 0; i < la; i++ { for i := 0; i < la; i++ {
left, diag, top := __get_matrix_from__(score_matrix, la, i, j) left, diag, top := _GetMatrixFrom(scoreMatrix, la, i, j)
diag += __pairing_score_pe_align__(seqA[i], qualA[i], seqB[j], qualB[j]) diag += _PairingScorePeAlign(seqA[i], qualA[i], seqB[j], qualB[j])
left += gap left += gap
top += gap top += gap
switch { switch {
case diag > left && left > top: case diag > left && left > top:
__set_matrices__(score_matrix, path_matrix, la, i, j, diag, 0) _SetMatrices(scoreMatrix, pathMatrix, la, i, j, diag, 0)
case left > diag && left > top: case left > diag && left > top:
__set_matrices__(score_matrix, path_matrix, la, i, j, left, +1) _SetMatrices(scoreMatrix, pathMatrix, la, i, j, left, +1)
default: default:
__set_matrices__(score_matrix, path_matrix, la, i, j, top, -1) _SetMatrices(scoreMatrix, pathMatrix, la, i, j, top, -1)
} }
} }
} }
// Special case for the last colump Up gap are free // Special case for the last colump Up gap are free
__set_matrices__(score_matrix, path_matrix, la, -1, lb1, 0, 1) _SetMatrices(scoreMatrix, pathMatrix, la, -1, lb1, 0, 1)
for i := 0; i < la; i++ { for i := 0; i < la; i++ {
left, diag, top := __get_matrix_from__(score_matrix, la, i, lb1) left, diag, top := _GetMatrixFrom(scoreMatrix, la, i, lb1)
diag += __pairing_score_pe_align__(seqA[i], qualA[i], seqB[lb1], qualB[lb1]) diag += _PairingScorePeAlign(seqA[i], qualA[i], seqB[lb1], qualB[lb1])
left += gap left += gap
switch { switch {
case diag > left && diag > top: case diag > left && diag > top:
__set_matrices__(score_matrix, path_matrix, la, i, lb1, diag, 0) _SetMatrices(scoreMatrix, pathMatrix, la, i, lb1, diag, 0)
case left > diag && left > top: case left > diag && left > top:
__set_matrices__(score_matrix, path_matrix, la, i, lb1, left, +1) _SetMatrices(scoreMatrix, pathMatrix, la, i, lb1, left, +1)
default: default:
__set_matrices__(score_matrix, path_matrix, la, i, lb1, top, -1) _SetMatrices(scoreMatrix, pathMatrix, la, i, lb1, top, -1)
} }
} }
return __get_matrix__(score_matrix, la, la-1, lb1) return _GetMatrix(scoreMatrix, la, la-1, lb1)
} }
func PELeftAlign(seqA, seqB obiseq.BioSequence, gap int, arena PEAlignArena) (int, []int) { func PELeftAlign(seqA, seqB obiseq.BioSequence, gap int, arena PEAlignArena) (int, []int) {
@ -224,12 +232,12 @@ func PELeftAlign(seqA, seqB obiseq.BioSequence, gap int, arena PEAlignArena) (in
arena = MakePEAlignArena(seqA.Length(), seqB.Length()) arena = MakePEAlignArena(seqA.Length(), seqB.Length())
} }
score := __fill_matrix_pe_left_align__(seqA.Sequence(), seqA.Qualities(), score := _FillMatrixPeLeftAlign(seqA.Sequence(), seqA.Qualities(),
seqB.Sequence(), seqB.Qualities(), gap, seqB.Sequence(), seqB.Qualities(), gap,
&arena.pointer.score_matrix, &arena.pointer.scoreMatrix,
&arena.pointer.path_matrix) &arena.pointer.pathMatrix)
arena.pointer.path = _Backtracking(arena.pointer.path_matrix, arena.pointer.path = _Backtracking(arena.pointer.pathMatrix,
seqA.Length(), seqB.Length(), seqA.Length(), seqB.Length(),
&arena.pointer.path) &arena.pointer.path)
@ -247,12 +255,12 @@ func PERightAlign(seqA, seqB obiseq.BioSequence, gap int, arena PEAlignArena) (i
arena = MakePEAlignArena(seqA.Length(), seqB.Length()) arena = MakePEAlignArena(seqA.Length(), seqB.Length())
} }
score := __fill_matrix_pe_right_align__(seqA.Sequence(), seqA.Qualities(), score := _FillMatrixPeRightAlign(seqA.Sequence(), seqA.Qualities(),
seqB.Sequence(), seqB.Qualities(), gap, seqB.Sequence(), seqB.Qualities(), gap,
&arena.pointer.score_matrix, &arena.pointer.scoreMatrix,
&arena.pointer.path_matrix) &arena.pointer.pathMatrix)
arena.pointer.path = _Backtracking(arena.pointer.path_matrix, arena.pointer.path = _Backtracking(arena.pointer.pathMatrix,
seqA.Length(), seqB.Length(), seqA.Length(), seqB.Length(),
&arena.pointer.path) &arena.pointer.path)
@ -275,8 +283,8 @@ func PEAlign(seqA, seqB obiseq.BioSequence,
} }
index := obikmer.Index4mer(seqA, index := obikmer.Index4mer(seqA,
&arena.pointer.fast_index, &arena.pointer.fastIndex,
&arena.pointer.fast_buffer) &arena.pointer.fastBuffer)
shift, fast_score := obikmer.FastShiftFourMer(index, seqB, nil) shift, fast_score := obikmer.FastShiftFourMer(index, seqB, nil)
@ -300,10 +308,10 @@ func PEAlign(seqA, seqB obiseq.BioSequence,
raw_seqB = seqB.Sequence()[0:part_len] raw_seqB = seqB.Sequence()[0:part_len]
qual_seqB = seqB.Qualities()[0:part_len] qual_seqB = seqB.Qualities()[0:part_len]
extra3 = seqB.Length() - part_len extra3 = seqB.Length() - part_len
score = __fill_matrix_pe_left_align__( score = _FillMatrixPeLeftAlign(
raw_seqA, qual_seqA, raw_seqB, qual_seqB, gap, raw_seqA, qual_seqA, raw_seqB, qual_seqB, gap,
&arena.pointer.score_matrix, &arena.pointer.scoreMatrix,
&arena.pointer.path_matrix) &arena.pointer.pathMatrix)
} else { } else {
startA = 0 startA = 0
startB = -shift - delta startB = -shift - delta
@ -317,13 +325,13 @@ func PEAlign(seqA, seqB obiseq.BioSequence,
raw_seqA = seqA.Sequence()[:part_len] raw_seqA = seqA.Sequence()[:part_len]
qual_seqA = seqA.Qualities()[:part_len] qual_seqA = seqA.Qualities()[:part_len]
extra3 = part_len - seqA.Length() extra3 = part_len - seqA.Length()
score = __fill_matrix_pe_right_align__( score = _FillMatrixPeRightAlign(
raw_seqA, qual_seqA, raw_seqB, qual_seqB, gap, raw_seqA, qual_seqA, raw_seqB, qual_seqB, gap,
&arena.pointer.score_matrix, &arena.pointer.scoreMatrix,
&arena.pointer.path_matrix) &arena.pointer.pathMatrix)
} }
arena.pointer.path = _Backtracking(arena.pointer.path_matrix, arena.pointer.path = _Backtracking(arena.pointer.pathMatrix,
len(raw_seqA), len(raw_seqB), len(raw_seqA), len(raw_seqB),
&arena.pointer.path) &arena.pointer.path)