diff --git a/pkg/obialign/dnamatrix.go b/pkg/obialign/dnamatrix.go index 9ebf17e..b82b453 100644 --- a/pkg/obialign/dnamatrix.go +++ b/pkg/obialign/dnamatrix.go @@ -74,6 +74,30 @@ func _Logaddexp(a, b float64) float64 { return b + math.Log1p(math.Exp(a-b)) } +func _Log1mexp(a float64) float64 { + if a > 0 { + log.Panic("Log1mexp: a > 0") + } + + if a == 0 { + return 0 + } + + return (math.Log(-math.Expm1(a))) +} + +func _Logdiffexp(a, b float64) float64 { + if a < b { + log.Panic("Log1mexp: a < b") + } + + if a == b { + return math.Inf(-1) + } + + return a + _Log1mexp(b-a) +} + // _MatchScoreRatio calculates the match score ratio between two bytes. // // Parameters: @@ -83,25 +107,25 @@ func _Logaddexp(a, b float64) float64 { // Returns: // - float64: the match score ratio when a match is observed // - float64: the match score ratio when a mismatch is observed -func _MatchScoreRatio(a, b byte) (float64, float64) { +func _MatchScoreRatio(QF, QR byte) (float64, float64) { - l2 := math.Log(2) l3 := math.Log(3) + l4 := math.Log(4) l10 := math.Log(10) - lalea := math.Log(4) // 1 /(change of the random model) - lE1 := -float64(a)/10*l10 - l3 // log proba of sequencing error on A/3 - lE2 := -float64(b)/10*l10 - l3 // log proba of sequencing error on B/3 - lO1 := math.Log1p(-math.Exp(lE1 + l3)) // log proba no being an error on A - lO2 := math.Log1p(-math.Exp(lE2 + l3)) // log proba no being an error on B - lO1O2 := lO1 + lO2 - lE1E2 := lE1 + lE2 - lO1E2 := lO1 + lE2 - lO2E1 := lO2 + lE1 + qF := -float64(QF) / 10 * l10 + qR := -float64(QR) / 10 * l10 + term1 := _Logaddexp(qF, qR) + term2 := _Logdiffexp(term1, qF+qR) - MM := _Logaddexp(lO1O2, lE1E2+l3) // Proba match when match observed - Mm := _Logaddexp(_Logaddexp(lO1E2, lO2E1), lE1E2+l2) // Proba match when mismatch observed + // log.Warnf("MatchScoreRatio: %v, %v , %v, %v", QF, QR, term1, term2) - return MM + lalea, Mm + lalea + match_logp := _Log1mexp(term2 + l3 - l4) + match_score := match_logp - _Log1mexp(match_logp) + + mismatch_logp := term2 - l4 + mismatch_score := mismatch_logp - _Log1mexp(mismatch_logp) + + return match_score, mismatch_score } func _InitNucPartMatch() { diff --git a/pkg/obikmer/encodefourmer.go b/pkg/obikmer/encodefourmer.go index c3be879..ba1a5a1 100644 --- a/pkg/obikmer/encodefourmer.go +++ b/pkg/obikmer/encodefourmer.go @@ -2,6 +2,7 @@ package obikmer import ( "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils" ) var __single_base_code__ = []byte{0, @@ -131,33 +132,39 @@ func FastShiftFourMer(index [][]int, shifts *map[int]int, lindex int, seq *obise maxshift := 0 maxcount := 0 maxscore := -1.0 + maxrelscore := -1.0 for shift, count := range *shifts { delete((*shifts), shift) - score := float64(count) - if relscore { - over := -shift - switch { - case shift > 0: - over += lindex - case shift < 0: - over = seq.Len() - over - default: - over = min(lindex, seq.Len()) - } - score = score / float64(over-3) + selectscore := float64(count) + relativescore := float64(count) + over := -shift + switch { + case shift > 0: + over += lindex + case shift < 0: + over = seq.Len() - over + default: + over = min(lindex, seq.Len()) } - if score > maxscore { + relativescore = relativescore / float64(over-3) + if relscore { + selectscore = relativescore + } + + if selectscore > maxscore { maxshift = shift maxcount = count - maxscore = score + maxscore = selectscore + maxrelscore = relativescore } else { - if score == maxscore && shift < maxshift { + if selectscore == maxscore && obiutils.Abs(shift) < obiutils.Abs(maxshift) { maxshift = shift maxcount = count + maxrelscore = relativescore } } } - return maxshift, maxcount, maxscore + return maxshift, maxcount, maxrelscore } diff --git a/pkg/obioptions/version.go b/pkg/obioptions/version.go index b5d16ab..0d2dd9a 100644 --- a/pkg/obioptions/version.go +++ b/pkg/obioptions/version.go @@ -8,7 +8,7 @@ import ( // corresponds to the last commit, and not the one when the file will be // commited -var _Commit = "d3d15ac" +var _Commit = "4588bf8" var _Version = "Release 4.2.0" // Version returns the version of the obitools package.