mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Code refactoring
This commit is contained in:
@ -169,8 +169,8 @@ func BuildQualityConsensus(seqA, seqB obiseq.BioSequence, path []int,
|
|||||||
qm = qA
|
qm = qA
|
||||||
}
|
}
|
||||||
if qB == qA {
|
if qB == qA {
|
||||||
nuc := __four_bits_base_code__[sA[i]&31] | __four_bits_base_code__[sB[i]&31]
|
nuc := _FourBitsBaseCode[sA[i]&31] | _FourBitsBaseCode[sB[i]&31]
|
||||||
consensus = append(consensus, __four_bits_base_decode__[nuc])
|
consensus = append(consensus, _FourBitsBaseDecode[nuc])
|
||||||
}
|
}
|
||||||
|
|
||||||
q := qA + qB
|
q := qA + qB
|
||||||
|
@ -74,8 +74,8 @@ func _MatchScoreRatio(a, b byte) (float64, float64) {
|
|||||||
|
|
||||||
func _InitNucPartMatch() {
|
func _InitNucPartMatch() {
|
||||||
|
|
||||||
for i, a := range __four_bits_base_code__ {
|
for i, a := range _FourBitsBaseCode {
|
||||||
for j, b := range __four_bits_base_code__ {
|
for j, b := range _FourBitsBaseCode {
|
||||||
_NucPartMatch[i][j] = _MatchRatio(a, b)
|
_NucPartMatch[i][j] = _MatchRatio(a, b)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -4,7 +4,7 @@ import (
|
|||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
)
|
)
|
||||||
|
|
||||||
var __four_bits_base_code__ = []byte{0b0000,
|
var _FourBitsBaseCode = []byte{0b0000,
|
||||||
// IUPAC nucleotide code Base
|
// IUPAC nucleotide code Base
|
||||||
0b0001, // A Adenine
|
0b0001, // A Adenine
|
||||||
0b1110, // B C or G or T
|
0b1110, // B C or G or T
|
||||||
@ -38,7 +38,7 @@ var __four_bits_base_code__ = []byte{0b0000,
|
|||||||
0b0000,
|
0b0000,
|
||||||
0b0000}
|
0b0000}
|
||||||
|
|
||||||
var __four_bits_base_decode__ = []byte{
|
var _FourBitsBaseDecode = []byte{
|
||||||
// 0b0000 0b0001 0b0010 0b0011
|
// 0b0000 0b0001 0b0010 0b0011
|
||||||
'.', 'a', 'c', 'm',
|
'.', 'a', 'c', 'm',
|
||||||
// 0b0100 0b0101 0b0110 0b0111
|
// 0b0100 0b0101 0b0110 0b0111
|
||||||
@ -49,6 +49,14 @@ var __four_bits_base_decode__ = []byte{
|
|||||||
'k', 'd', 'b', 'n',
|
'k', 'd', 'b', 'n',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Encode4bits encodes each nucleotide of a sequence into a binary
|
||||||
|
// code where the four low weigth bit of a byte correspond respectively
|
||||||
|
// to the four nucleotides A, C, G, T. Simple bases A, C, G, T are therefore
|
||||||
|
// represented by a code with only a single bit on, when anbiguous symboles
|
||||||
|
// like R, D or N have the bits corresponding to each nucleotide represented
|
||||||
|
// by the ambiguity set to 1.
|
||||||
|
// A byte slice can be provided (buffer) to preveent allocation of a new
|
||||||
|
// memory chunk by th function.
|
||||||
func Encode4bits(seq obiseq.BioSequence, buffer []byte) []byte {
|
func Encode4bits(seq obiseq.BioSequence, buffer []byte) []byte {
|
||||||
length := seq.Length()
|
length := seq.Length()
|
||||||
rawseq := seq.Sequence()
|
rawseq := seq.Sequence()
|
||||||
@ -65,7 +73,7 @@ func Encode4bits(seq obiseq.BioSequence, buffer []byte) []byte {
|
|||||||
if nuc == '.' || nuc == '-' {
|
if nuc == '.' || nuc == '-' {
|
||||||
code = 0
|
code = 0
|
||||||
} else {
|
} else {
|
||||||
code = __four_bits_base_code__[nuc&31]
|
code = _FourBitsBaseCode[nuc&31]
|
||||||
}
|
}
|
||||||
buffer = append(buffer, code)
|
buffer = append(buffer, code)
|
||||||
}
|
}
|
||||||
|
@ -51,17 +51,17 @@ func __get_matrix_from__(matrix *[]int, lenA, a, b int) (int, int, int) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func __pairing_score_pe_align__(baseA, qualA, baseB, qualB byte) int {
|
func __pairing_score_pe_align__(baseA, qualA, baseB, qualB byte) int {
|
||||||
part_match := __nuc_part_match__[baseA&31][baseB&31]
|
part_match := _NucPartMatch[baseA&31][baseB&31]
|
||||||
// log.Printf("id : %f A : %s %d B : %s %d\n", part_match, string(baseA), qualA, string(baseB), qualB)
|
// log.Printf("id : %f A : %s %d B : %s %d\n", part_match, string(baseA), qualA, string(baseB), qualB)
|
||||||
switch {
|
switch {
|
||||||
case part_match == 1:
|
case part_match == 1:
|
||||||
// log.Printf("match\n")
|
// log.Printf("match\n")
|
||||||
return __nuc_score_part_match_match__[qualA][qualB]
|
return _NucScorePartMatchMatch[qualA][qualB]
|
||||||
case part_match == 0:
|
case part_match == 0:
|
||||||
return __nuc_score_part_match_mismatch__[qualA][qualB]
|
return _NucScorePartMatchMismatch[qualA][qualB]
|
||||||
default:
|
default:
|
||||||
return int(part_match*float64(__nuc_score_part_match_match__[qualA][qualB]) +
|
return int(part_match*float64(_NucScorePartMatchMatch[qualA][qualB]) +
|
||||||
(1-part_match)*float64(__nuc_score_part_match_mismatch__[qualA][qualB]) + 0.5)
|
(1-part_match)*float64(_NucScorePartMatchMismatch[qualA][qualB]) + 0.5)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -73,7 +73,7 @@ func __fill_matrix_pe_left_align__(seqA, qualA, seqB, qualB []byte, gap int,
|
|||||||
|
|
||||||
// The actual gap score is the gap score times the mismatch between
|
// The actual gap score is the gap score times the mismatch between
|
||||||
// two bases with a score of 40
|
// two bases with a score of 40
|
||||||
gap = gap * __nuc_score_part_match_mismatch__[40][40]
|
gap = gap * _NucScorePartMatchMismatch[40][40]
|
||||||
|
|
||||||
needed := (la + 1) * (lb + 1)
|
needed := (la + 1) * (lb + 1)
|
||||||
|
|
||||||
@ -144,7 +144,7 @@ func __fill_matrix_pe_right_align__(seqA, qualA, seqB, qualB []byte, gap int,
|
|||||||
|
|
||||||
// The actual gap score is the gap score times the mismatch between
|
// The actual gap score is the gap score times the mismatch between
|
||||||
// two bases with a score of 40
|
// two bases with a score of 40
|
||||||
gap = gap * __nuc_score_part_match_mismatch__[40][40]
|
gap = gap * _NucScorePartMatchMismatch[40][40]
|
||||||
|
|
||||||
needed := (la + 1) * (lb + 1)
|
needed := (la + 1) * (lb + 1)
|
||||||
|
|
||||||
@ -215,9 +215,9 @@ func __fill_matrix_pe_right_align__(seqA, qualA, seqB, qualB []byte, gap int,
|
|||||||
|
|
||||||
func PELeftAlign(seqA, seqB obiseq.BioSequence, gap int, arena PEAlignArena) (int, []int) {
|
func PELeftAlign(seqA, seqB obiseq.BioSequence, gap int, arena PEAlignArena) (int, []int) {
|
||||||
|
|
||||||
if !__initialized_dna_score__ {
|
if !_InitializedDnaScore {
|
||||||
log.Println("Initializing the DNA Scoring matrix")
|
log.Println("Initializing the DNA Scoring matrix")
|
||||||
InitDNAScoreMatrix()
|
_InitDNAScoreMatrix()
|
||||||
}
|
}
|
||||||
|
|
||||||
if arena.pointer == nil {
|
if arena.pointer == nil {
|
||||||
@ -229,7 +229,7 @@ func PELeftAlign(seqA, seqB obiseq.BioSequence, gap int, arena PEAlignArena) (in
|
|||||||
&arena.pointer.score_matrix,
|
&arena.pointer.score_matrix,
|
||||||
&arena.pointer.path_matrix)
|
&arena.pointer.path_matrix)
|
||||||
|
|
||||||
arena.pointer.path = __backtracking__(arena.pointer.path_matrix,
|
arena.pointer.path = _Backtracking(arena.pointer.path_matrix,
|
||||||
seqA.Length(), seqB.Length(),
|
seqA.Length(), seqB.Length(),
|
||||||
&arena.pointer.path)
|
&arena.pointer.path)
|
||||||
|
|
||||||
@ -238,9 +238,9 @@ func PELeftAlign(seqA, seqB obiseq.BioSequence, gap int, arena PEAlignArena) (in
|
|||||||
|
|
||||||
func PERightAlign(seqA, seqB obiseq.BioSequence, gap int, arena PEAlignArena) (int, []int) {
|
func PERightAlign(seqA, seqB obiseq.BioSequence, gap int, arena PEAlignArena) (int, []int) {
|
||||||
|
|
||||||
if !__initialized_dna_score__ {
|
if !_InitializedDnaScore {
|
||||||
log.Println("Initializing the DNA Scoring matrix")
|
log.Println("Initializing the DNA Scoring matrix")
|
||||||
InitDNAScoreMatrix()
|
_InitDNAScoreMatrix()
|
||||||
}
|
}
|
||||||
|
|
||||||
if arena.pointer == nil {
|
if arena.pointer == nil {
|
||||||
@ -252,7 +252,7 @@ func PERightAlign(seqA, seqB obiseq.BioSequence, gap int, arena PEAlignArena) (i
|
|||||||
&arena.pointer.score_matrix,
|
&arena.pointer.score_matrix,
|
||||||
&arena.pointer.path_matrix)
|
&arena.pointer.path_matrix)
|
||||||
|
|
||||||
arena.pointer.path = __backtracking__(arena.pointer.path_matrix,
|
arena.pointer.path = _Backtracking(arena.pointer.path_matrix,
|
||||||
seqA.Length(), seqB.Length(),
|
seqA.Length(), seqB.Length(),
|
||||||
&arena.pointer.path)
|
&arena.pointer.path)
|
||||||
|
|
||||||
@ -269,9 +269,9 @@ func PEAlign(seqA, seqB obiseq.BioSequence,
|
|||||||
var raw_seqB, qual_seqB []byte
|
var raw_seqB, qual_seqB []byte
|
||||||
var extra5, extra3 int
|
var extra5, extra3 int
|
||||||
|
|
||||||
if !__initialized_dna_score__ {
|
if !_InitializedDnaScore {
|
||||||
log.Println("Initializing the DNA Scoring matrix")
|
log.Println("Initializing the DNA Scoring matrix")
|
||||||
InitDNAScoreMatrix()
|
_InitDNAScoreMatrix()
|
||||||
}
|
}
|
||||||
|
|
||||||
index := obikmer.Index4mer(seqA,
|
index := obikmer.Index4mer(seqA,
|
||||||
@ -323,7 +323,7 @@ func PEAlign(seqA, seqB obiseq.BioSequence,
|
|||||||
&arena.pointer.path_matrix)
|
&arena.pointer.path_matrix)
|
||||||
}
|
}
|
||||||
|
|
||||||
arena.pointer.path = __backtracking__(arena.pointer.path_matrix,
|
arena.pointer.path = _Backtracking(arena.pointer.path_matrix,
|
||||||
len(raw_seqA), len(raw_seqB),
|
len(raw_seqA), len(raw_seqB),
|
||||||
&arena.pointer.path)
|
&arena.pointer.path)
|
||||||
|
|
||||||
@ -349,7 +349,7 @@ func PEAlign(seqA, seqB obiseq.BioSequence,
|
|||||||
score = 0
|
score = 0
|
||||||
for i, qualA := range qual_seqA {
|
for i, qualA := range qual_seqA {
|
||||||
qualB := qual_seqB[i]
|
qualB := qual_seqB[i]
|
||||||
score += __nuc_score_part_match_match__[qualA][qualB]
|
score += _NucScorePartMatchMatch[qualA][qualB]
|
||||||
}
|
}
|
||||||
arena.pointer.path = arena.pointer.path[:0]
|
arena.pointer.path = arena.pointer.path[:0]
|
||||||
arena.pointer.path = append(arena.pointer.path, 0, part_len)
|
arena.pointer.path = append(arena.pointer.path, 0, part_len)
|
||||||
|
Reference in New Issue
Block a user