From 45a1765a03f7f6abd85998da096bf10ea8e8e3d6 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Fri, 14 Jan 2022 15:46:36 +0100 Subject: [PATCH] Code refactoring --- pkg/obialign/alignment.go | 4 +- pkg/obialign/backtracking.go | 2 +- pkg/obialign/pairedendalign.go | 176 +++++++++++++++++---------------- 3 files changed, 95 insertions(+), 87 deletions(-) diff --git a/pkg/obialign/alignment.go b/pkg/obialign/alignment.go index d552f84..f9683e2 100644 --- a/pkg/obialign/alignment.go +++ b/pkg/obialign/alignment.go @@ -11,7 +11,7 @@ type _BuildAlignArena struct { bufferB []byte } -// BuildAlignArena define memory arena usable by the +// BuildAlignArena defines memory arena usable by the // BuildAlignment function. The same arena can be reused // from alignment to alignment to limit memory allocation // and desallocation process. @@ -23,7 +23,7 @@ type BuildAlignArena struct { // type. var NilBuildAlignArena = BuildAlignArena{nil} -// MakeBuildAlignArena make a new arena for aligning two sequences +// MakeBuildAlignArena makes a new arena for aligning two sequences // of maximum length indicated by lseqA and lseqB. func MakeBuildAlignArena(lseqA, lseqB int) BuildAlignArena { a := _BuildAlignArena{ diff --git a/pkg/obialign/backtracking.go b/pkg/obialign/backtracking.go index 44be309..9f4a030 100644 --- a/pkg/obialign/backtracking.go +++ b/pkg/obialign/backtracking.go @@ -19,7 +19,7 @@ func _Backtracking(pathMatrix []int, lseqA, lseqB int, path *[]int) []int { lleft := 0 for i > -1 || j > -1 { - step := __get_matrix__(&pathMatrix, lseqA, i, j) + step := _GetMatrix(&pathMatrix, lseqA, i, j) // log.Printf("I: %d J:%d -> %d\n", i, j, step) switch { diff --git a/pkg/obialign/pairedendalign.go b/pkg/obialign/pairedendalign.go index 54a02bb..725c828 100644 --- a/pkg/obialign/pairedendalign.go +++ b/pkg/obialign/pairedendalign.go @@ -7,66 +7,74 @@ import ( "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" ) -type __pe_align_arena__ struct { - score_matrix []int - path_matrix []int - path []int - fast_index [][]int - fast_buffer []byte +type _PeAlignArena struct { + scoreMatrix []int + pathMatrix []int + path []int + fastIndex [][]int + fastBuffer []byte } +// PEAlignArena defines memory arena usable by the +// Paired-End alignment related functions. The same arena can be reused +// from alignment to alignment to limit memory allocation +// and desallocation process. type PEAlignArena struct { - pointer *__pe_align_arena__ + pointer *_PeAlignArena } +// NilPEAlignArena is the nil instance of the PEAlignArena +// type. var NilPEAlignArena = PEAlignArena{nil} +// MakePEAlignArena makes a new arena for the alignment of two paired sequences +// of maximum length indicated by lseqA and lseqB. func MakePEAlignArena(lseqA, lseqB int) PEAlignArena { - a := __pe_align_arena__{ - score_matrix: make([]int, 0, (lseqA+1)*(lseqB+1)), - path_matrix: make([]int, 0, (lseqA+1)*(lseqB+1)), - path: make([]int, 2*(lseqA+lseqB)), - fast_index: make([][]int, 256), - fast_buffer: make([]byte, 0, lseqA), + a := _PeAlignArena{ + scoreMatrix: make([]int, 0, (lseqA+1)*(lseqB+1)), + pathMatrix: make([]int, 0, (lseqA+1)*(lseqB+1)), + path: make([]int, 2*(lseqA+lseqB)), + fastIndex: make([][]int, 256), + fastBuffer: make([]byte, 0, lseqA), } return PEAlignArena{&a} } -func __set_matrices__(matrixA, matrixB *[]int, lenA, a, b, valueA, valueB int) { +func _SetMatrices(matrixA, matrixB *[]int, lenA, a, b, valueA, valueB int) { i := (b+1)*(lenA+1) + a + 1 (*matrixA)[i] = valueA (*matrixB)[i] = valueB } -func __get_matrix__(matrix *[]int, lenA, a, b int) int { +func _GetMatrix(matrix *[]int, lenA, a, b int) int { return (*matrix)[(b+1)*(lenA+1)+a+1] } -func __get_matrix_from__(matrix *[]int, lenA, a, b int) (int, int, int) { +func _GetMatrixFrom(matrix *[]int, lenA, a, b int) (int, int, int) { i := (b+1)*(lenA+1) + a j := i - lenA m := *matrix return m[j], m[j-1], m[i] } -func __pairing_score_pe_align__(baseA, qualA, baseB, qualB byte) int { - part_match := _NucPartMatch[baseA&31][baseB&31] +func _PairingScorePeAlign(baseA, qualA, baseB, qualB byte) int { + partMatch := _NucPartMatch[baseA&31][baseB&31] // log.Printf("id : %f A : %s %d B : %s %d\n", part_match, string(baseA), qualA, string(baseB), qualB) switch { - case part_match == 1: + case partMatch == 1: // log.Printf("match\n") return _NucScorePartMatchMatch[qualA][qualB] - case part_match == 0: + case partMatch == 0: return _NucScorePartMatchMismatch[qualA][qualB] default: - return int(part_match*float64(_NucScorePartMatchMatch[qualA][qualB]) + - (1-part_match)*float64(_NucScorePartMatchMismatch[qualA][qualB]) + 0.5) + return int(partMatch*float64(_NucScorePartMatchMatch[qualA][qualB]) + + (1-partMatch)*float64(_NucScorePartMatchMismatch[qualA][qualB]) + 0.5) } } -func __fill_matrix_pe_left_align__(seqA, qualA, seqB, qualB []byte, gap int, - score_matrix, path_matrix *[]int) int { +func _FillMatrixPeLeftAlign(seqA, qualA, seqB, qualB []byte, gap int, + scoreMatrix, pathMatrix *[]int) int { la := len(seqA) lb := len(seqB) @@ -77,67 +85,67 @@ func __fill_matrix_pe_left_align__(seqA, qualA, seqB, qualB []byte, gap int, needed := (la + 1) * (lb + 1) - if needed > cap(*score_matrix) { - *score_matrix = make([]int, needed) + if needed > cap(*scoreMatrix) { + *scoreMatrix = make([]int, needed) } - if needed > cap(*path_matrix) { - *path_matrix = make([]int, needed) + if needed > cap(*pathMatrix) { + *pathMatrix = make([]int, needed) } - *score_matrix = (*score_matrix)[:needed] - *path_matrix = (*path_matrix)[:needed] + *scoreMatrix = (*scoreMatrix)[:needed] + *pathMatrix = (*pathMatrix)[:needed] - __set_matrices__(score_matrix, path_matrix, la, -1, -1, 0, 0) + _SetMatrices(scoreMatrix, pathMatrix, la, -1, -1, 0, 0) // Fills the first column with score 0 for i := 0; i < la; i++ { - __set_matrices__(score_matrix, path_matrix, la, i, -1, 0, -1) + _SetMatrices(scoreMatrix, pathMatrix, la, i, -1, 0, -1) } la1 := la - 1 for j := 0; j < lb; j++ { - __set_matrices__(score_matrix, path_matrix, la, -1, j, (j+1)*gap, 1) + _SetMatrices(scoreMatrix, pathMatrix, la, -1, j, (j+1)*gap, 1) for i := 0; i < la1; i++ { - left, diag, top := __get_matrix_from__(score_matrix, la, i, j) - diag += __pairing_score_pe_align__(seqA[i], qualA[i], seqB[j], qualB[j]) + left, diag, top := _GetMatrixFrom(scoreMatrix, la, i, j) + diag += _PairingScorePeAlign(seqA[i], qualA[i], seqB[j], qualB[j]) left += gap top += gap switch { case diag > left && diag > top: - __set_matrices__(score_matrix, path_matrix, la, i, j, diag, 0) + _SetMatrices(scoreMatrix, pathMatrix, la, i, j, diag, 0) case left > diag && left > top: - __set_matrices__(score_matrix, path_matrix, la, i, j, left, +1) + _SetMatrices(scoreMatrix, pathMatrix, la, i, j, left, +1) default: - __set_matrices__(score_matrix, path_matrix, la, i, j, top, -1) + _SetMatrices(scoreMatrix, pathMatrix, la, i, j, top, -1) } } // Special case for the last line Left gap are free - left, diag, top := __get_matrix_from__(score_matrix, la, la1, j) - diag += __pairing_score_pe_align__(seqA[la1], qualA[la1], seqB[j], qualB[j]) + left, diag, top := _GetMatrixFrom(scoreMatrix, la, la1, j) + diag += _PairingScorePeAlign(seqA[la1], qualA[la1], seqB[j], qualB[j]) top += gap switch { case diag > left && diag > top: - __set_matrices__(score_matrix, path_matrix, la, la1, j, diag, 0) + _SetMatrices(scoreMatrix, pathMatrix, la, la1, j, diag, 0) case left > diag && left > top: - __set_matrices__(score_matrix, path_matrix, la, la1, j, left, +1) + _SetMatrices(scoreMatrix, pathMatrix, la, la1, j, left, +1) default: - __set_matrices__(score_matrix, path_matrix, la, la1, j, top, -1) + _SetMatrices(scoreMatrix, pathMatrix, la, la1, j, top, -1) } } - return __get_matrix__(score_matrix, la, la1, lb-1) + return _GetMatrix(scoreMatrix, la, la1, lb-1) } -func __fill_matrix_pe_right_align__(seqA, qualA, seqB, qualB []byte, gap int, - score_matrix, path_matrix *[]int) int { +func _FillMatrixPeRightAlign(seqA, qualA, seqB, qualB []byte, gap int, + scoreMatrix, pathMatrix *[]int) int { la := len(seqA) lb := len(seqB) @@ -148,69 +156,69 @@ func __fill_matrix_pe_right_align__(seqA, qualA, seqB, qualB []byte, gap int, needed := (la + 1) * (lb + 1) - if needed > cap(*score_matrix) { - *score_matrix = make([]int, needed) + if needed > cap(*scoreMatrix) { + *scoreMatrix = make([]int, needed) } - if needed > cap(*path_matrix) { - *path_matrix = make([]int, needed) + if needed > cap(*pathMatrix) { + *pathMatrix = make([]int, needed) } - *score_matrix = (*score_matrix)[:needed] - *path_matrix = (*path_matrix)[:needed] + *scoreMatrix = (*scoreMatrix)[:needed] + *pathMatrix = (*pathMatrix)[:needed] - __set_matrices__(score_matrix, path_matrix, la, -1, -1, 0, 0) + _SetMatrices(scoreMatrix, pathMatrix, la, -1, -1, 0, 0) // Fills the first column with score 0 for i := 0; i < la; i++ { - __set_matrices__(score_matrix, path_matrix, la, i, -1, (i+1)*gap, -1) + _SetMatrices(scoreMatrix, pathMatrix, la, i, -1, (i+1)*gap, -1) } lb1 := lb - 1 for j := 0; j < lb1; j++ { - __set_matrices__(score_matrix, path_matrix, la, -1, j, 0, 1) + _SetMatrices(scoreMatrix, pathMatrix, la, -1, j, 0, 1) for i := 0; i < la; i++ { - left, diag, top := __get_matrix_from__(score_matrix, la, i, j) + left, diag, top := _GetMatrixFrom(scoreMatrix, la, i, j) - diag += __pairing_score_pe_align__(seqA[i], qualA[i], seqB[j], qualB[j]) + diag += _PairingScorePeAlign(seqA[i], qualA[i], seqB[j], qualB[j]) left += gap top += gap switch { case diag > left && left > top: - __set_matrices__(score_matrix, path_matrix, la, i, j, diag, 0) + _SetMatrices(scoreMatrix, pathMatrix, la, i, j, diag, 0) case left > diag && left > top: - __set_matrices__(score_matrix, path_matrix, la, i, j, left, +1) + _SetMatrices(scoreMatrix, pathMatrix, la, i, j, left, +1) default: - __set_matrices__(score_matrix, path_matrix, la, i, j, top, -1) + _SetMatrices(scoreMatrix, pathMatrix, la, i, j, top, -1) } } } // Special case for the last colump Up gap are free - __set_matrices__(score_matrix, path_matrix, la, -1, lb1, 0, 1) + _SetMatrices(scoreMatrix, pathMatrix, la, -1, lb1, 0, 1) for i := 0; i < la; i++ { - left, diag, top := __get_matrix_from__(score_matrix, la, i, lb1) - diag += __pairing_score_pe_align__(seqA[i], qualA[i], seqB[lb1], qualB[lb1]) + left, diag, top := _GetMatrixFrom(scoreMatrix, la, i, lb1) + diag += _PairingScorePeAlign(seqA[i], qualA[i], seqB[lb1], qualB[lb1]) left += gap switch { case diag > left && diag > top: - __set_matrices__(score_matrix, path_matrix, la, i, lb1, diag, 0) + _SetMatrices(scoreMatrix, pathMatrix, la, i, lb1, diag, 0) case left > diag && left > top: - __set_matrices__(score_matrix, path_matrix, la, i, lb1, left, +1) + _SetMatrices(scoreMatrix, pathMatrix, la, i, lb1, left, +1) default: - __set_matrices__(score_matrix, path_matrix, la, i, lb1, top, -1) + _SetMatrices(scoreMatrix, pathMatrix, la, i, lb1, top, -1) } } - return __get_matrix__(score_matrix, la, la-1, lb1) + return _GetMatrix(scoreMatrix, la, la-1, lb1) } func PELeftAlign(seqA, seqB obiseq.BioSequence, gap int, arena PEAlignArena) (int, []int) { @@ -224,12 +232,12 @@ func PELeftAlign(seqA, seqB obiseq.BioSequence, gap int, arena PEAlignArena) (in arena = MakePEAlignArena(seqA.Length(), seqB.Length()) } - score := __fill_matrix_pe_left_align__(seqA.Sequence(), seqA.Qualities(), + score := _FillMatrixPeLeftAlign(seqA.Sequence(), seqA.Qualities(), seqB.Sequence(), seqB.Qualities(), gap, - &arena.pointer.score_matrix, - &arena.pointer.path_matrix) + &arena.pointer.scoreMatrix, + &arena.pointer.pathMatrix) - arena.pointer.path = _Backtracking(arena.pointer.path_matrix, + arena.pointer.path = _Backtracking(arena.pointer.pathMatrix, seqA.Length(), seqB.Length(), &arena.pointer.path) @@ -247,12 +255,12 @@ func PERightAlign(seqA, seqB obiseq.BioSequence, gap int, arena PEAlignArena) (i arena = MakePEAlignArena(seqA.Length(), seqB.Length()) } - score := __fill_matrix_pe_right_align__(seqA.Sequence(), seqA.Qualities(), + score := _FillMatrixPeRightAlign(seqA.Sequence(), seqA.Qualities(), seqB.Sequence(), seqB.Qualities(), gap, - &arena.pointer.score_matrix, - &arena.pointer.path_matrix) + &arena.pointer.scoreMatrix, + &arena.pointer.pathMatrix) - arena.pointer.path = _Backtracking(arena.pointer.path_matrix, + arena.pointer.path = _Backtracking(arena.pointer.pathMatrix, seqA.Length(), seqB.Length(), &arena.pointer.path) @@ -275,8 +283,8 @@ func PEAlign(seqA, seqB obiseq.BioSequence, } index := obikmer.Index4mer(seqA, - &arena.pointer.fast_index, - &arena.pointer.fast_buffer) + &arena.pointer.fastIndex, + &arena.pointer.fastBuffer) shift, fast_score := obikmer.FastShiftFourMer(index, seqB, nil) @@ -300,10 +308,10 @@ func PEAlign(seqA, seqB obiseq.BioSequence, raw_seqB = seqB.Sequence()[0:part_len] qual_seqB = seqB.Qualities()[0:part_len] extra3 = seqB.Length() - part_len - score = __fill_matrix_pe_left_align__( + score = _FillMatrixPeLeftAlign( raw_seqA, qual_seqA, raw_seqB, qual_seqB, gap, - &arena.pointer.score_matrix, - &arena.pointer.path_matrix) + &arena.pointer.scoreMatrix, + &arena.pointer.pathMatrix) } else { startA = 0 startB = -shift - delta @@ -317,13 +325,13 @@ func PEAlign(seqA, seqB obiseq.BioSequence, raw_seqA = seqA.Sequence()[:part_len] qual_seqA = seqA.Qualities()[:part_len] extra3 = part_len - seqA.Length() - score = __fill_matrix_pe_right_align__( + score = _FillMatrixPeRightAlign( raw_seqA, qual_seqA, raw_seqB, qual_seqB, gap, - &arena.pointer.score_matrix, - &arena.pointer.path_matrix) + &arena.pointer.scoreMatrix, + &arena.pointer.pathMatrix) } - arena.pointer.path = _Backtracking(arena.pointer.path_matrix, + arena.pointer.path = _Backtracking(arena.pointer.pathMatrix, len(raw_seqA), len(raw_seqB), &arena.pointer.path)