Files
obitools4/pkg/obialign/dnamatrix.go

101 lines
2.1 KiB
Go
Raw Normal View History

2022-01-13 23:27:39 +01:00
package obialign
import (
"math"
)
2022-01-14 15:30:01 +01:00
var _FourBitsCount = []float64{
2022-01-13 23:27:39 +01:00
0, // 0000
1, // 0001
1, // 0010
2, // 0011
1, // 0100
2, // 0101
2, // 0110
3, // 0111
1, // 1000
2, // 1001
2, // 1010
3, // 1011
2, // 1100
3, // 1101
3, // 1110
4, // 1111
}
2022-01-14 15:30:01 +01:00
var _InitializedDnaScore = false
2022-01-13 23:27:39 +01:00
2022-01-14 15:30:01 +01:00
var _NucPartMatch [32][32]float64
var _NucScorePartMatchMatch [100][100]int
var _NucScorePartMatchMismatch [100][100]int
2022-01-13 23:27:39 +01:00
2022-01-14 15:30:01 +01:00
func _MatchRatio(a, b byte) float64 {
2022-01-13 23:27:39 +01:00
// count of common bits
2022-01-14 15:30:01 +01:00
cm := _FourBitsCount[a&b&15]
2022-01-13 23:27:39 +01:00
2022-01-14 15:30:01 +01:00
ca := _FourBitsCount[a&15]
cb := _FourBitsCount[b&15]
2022-01-13 23:27:39 +01:00
if cm == 0 || ca == 0 || cb == 0 {
return float64(0)
}
return float64(cm) / float64(ca) / float64(cb)
}
2022-01-14 15:30:01 +01:00
func _Logaddexp(a, b float64) float64 {
2022-01-13 23:27:39 +01:00
if a > b {
a, b = b, a
}
return b + math.Log1p(math.Exp(a-b))
}
2022-01-14 15:30:01 +01:00
func _MatchScoreRatio(a, b byte) (float64, float64) {
2022-01-13 23:27:39 +01:00
l2 := math.Log(2)
l3 := math.Log(3)
l10 := math.Log(10)
2023-01-27 10:49:28 +01:00
lalea := math.Log(4) // 1 /(change of the random model)
2023-01-25 13:22:56 +01:00
lE1 := -float64(a)/10*l10 - l3 // log proba of sequencing error on A/3
lE2 := -float64(b)/10*l10 - l3 // log proba of sequencing error on B/3
lO1 := math.Log1p(-math.Exp(lE1 + l3)) // log proba no being an error on A
lO2 := math.Log1p(-math.Exp(lE2 + l3)) // log proba no being an error on B
2022-01-13 23:27:39 +01:00
lO1O2 := lO1 + lO2
lE1E2 := lE1 + lE2
lO1E2 := lO1 + lE2
lO2E1 := lO2 + lE1
2023-01-25 13:22:56 +01:00
MM := _Logaddexp(lO1O2, lE1E2+l3) // Proba match when match observed
Mm := _Logaddexp(_Logaddexp(lO1E2, lO2E1), lE1E2+l2) // Proba match when mismatch observed
2022-01-13 23:27:39 +01:00
2023-01-25 13:22:56 +01:00
return MM + lalea, Mm + lalea
2022-01-13 23:27:39 +01:00
}
2022-01-14 15:30:01 +01:00
func _InitNucPartMatch() {
2022-01-13 23:27:39 +01:00
2022-01-14 15:36:39 +01:00
for i, a := range _FourBitsBaseCode {
for j, b := range _FourBitsBaseCode {
2022-01-14 15:30:01 +01:00
_NucPartMatch[i][j] = _MatchRatio(a, b)
2022-01-13 23:27:39 +01:00
}
}
}
2022-01-14 15:30:01 +01:00
func _InitNucScorePartMatch() {
2022-01-13 23:27:39 +01:00
for i := 0; i < 100; i++ {
for j := 0; j < 100; j++ {
2022-01-14 15:30:01 +01:00
MM, Mm := _MatchScoreRatio(byte(i), byte(j))
_NucScorePartMatchMatch[i][j] = int(MM*10 + 0.5)
_NucScorePartMatchMismatch[i][j] = int(Mm*10 + 0.5)
2022-01-13 23:27:39 +01:00
}
}
}
2022-01-14 15:30:01 +01:00
func _InitDNAScoreMatrix() {
if !_InitializedDnaScore {
_InitNucPartMatch()
_InitNucScorePartMatch()
_InitializedDnaScore = true
2022-01-13 23:27:39 +01:00
}
}