mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Patch an aligment bug on obipairing
This commit is contained in:
@ -55,21 +55,21 @@ func _MatchScoreRatio(a, b byte) (float64, float64) {
|
||||
|
||||
l2 := math.Log(2)
|
||||
l3 := math.Log(3)
|
||||
l4 := math.Log(4)
|
||||
l10 := math.Log(10)
|
||||
lE1 := -float64(a)/10*l10 - l4
|
||||
lE2 := -float64(b)/10*l10 - l4
|
||||
lO1 := math.Log1p(-math.Exp(lE1 + l3))
|
||||
lO2 := math.Log1p(-math.Exp(lE2 + l3))
|
||||
lalea := math.Log(30) // 1 /(change of the random model)
|
||||
lE1 := -float64(a)/10*l10 - l3 // log proba of sequencing error on A/3
|
||||
lE2 := -float64(b)/10*l10 - l3 // log proba of sequencing error on B/3
|
||||
lO1 := math.Log1p(-math.Exp(lE1 + l3)) // log proba no being an error on A
|
||||
lO2 := math.Log1p(-math.Exp(lE2 + l3)) // log proba no being an error on B
|
||||
lO1O2 := lO1 + lO2
|
||||
lE1E2 := lE1 + lE2
|
||||
lO1E2 := lO1 + lE2
|
||||
lO2E1 := lO2 + lE1
|
||||
|
||||
MM := _Logaddexp(lO1O2, lE1E2+l3) + l4
|
||||
Mm := _Logaddexp(_Logaddexp(lO1E2, lO2E1), lE1E2+l2) + l4
|
||||
MM := _Logaddexp(lO1O2, lE1E2+l3) // Proba match when match observed
|
||||
Mm := _Logaddexp(_Logaddexp(lO1E2, lO2E1), lE1E2+l2) // Proba match when mismatch observed
|
||||
|
||||
return MM, Mm
|
||||
return MM + lalea, Mm + lalea
|
||||
}
|
||||
|
||||
func _InitNucPartMatch() {
|
||||
|
@ -52,6 +52,7 @@ func _GetMatrix(matrix *[]int, lenA, a, b int) int {
|
||||
}
|
||||
|
||||
func _GetMatrixFrom(matrix *[]int, lenA, a, b int) (int, int, int) {
|
||||
// Formula rechecked on 01/25/2023
|
||||
i := (b+1)*(lenA+1) + a
|
||||
j := i - lenA
|
||||
m := *matrix
|
||||
@ -68,10 +69,15 @@ func _PairingScorePeAlign(baseA, qualA, baseB, qualB byte) int {
|
||||
return _NucScorePartMatchMismatch[qualA][qualB]
|
||||
default:
|
||||
return int(partMatch*float64(_NucScorePartMatchMatch[qualA][qualB]) +
|
||||
(1-partMatch)*float64(_NucScorePartMatchMismatch[qualA][qualB]) + 0.5)
|
||||
(1-partMatch)*float64(_NucScorePartMatchMismatch[qualA][qualB]) +
|
||||
0.5)
|
||||
}
|
||||
}
|
||||
|
||||
// Gaps at the beginning of B and at the end of A are free
|
||||
// With A spanning over lines and B over columns
|
||||
// - First column gap = 0
|
||||
// - Last line gaps = 0
|
||||
func _FillMatrixPeLeftAlign(seqA, qualA, seqB, qualB []byte, gap float64,
|
||||
scoreMatrix, pathMatrix *[]int) int {
|
||||
|
||||
@ -95,6 +101,7 @@ func _FillMatrixPeLeftAlign(seqA, qualA, seqB, qualB []byte, gap float64,
|
||||
*scoreMatrix = (*scoreMatrix)[:needed]
|
||||
*pathMatrix = (*pathMatrix)[:needed]
|
||||
|
||||
// Sets the first position of the matrix with 0 score
|
||||
_SetMatrices(scoreMatrix, pathMatrix, la, -1, -1, 0, 0)
|
||||
|
||||
// Fills the first column with score 0
|
||||
@ -102,17 +109,20 @@ func _FillMatrixPeLeftAlign(seqA, qualA, seqB, qualB []byte, gap float64,
|
||||
_SetMatrices(scoreMatrix, pathMatrix, la, i, -1, 0, -1)
|
||||
}
|
||||
|
||||
la1 := la - 1
|
||||
la1 := la - 1 // Except the last line (gaps are free on it)
|
||||
|
||||
for j := 0; j < lb; j++ {
|
||||
|
||||
// Fill the first line with scores corresponding to a set of gaps
|
||||
_SetMatrices(scoreMatrix, pathMatrix, la, -1, j, (j+1)*gapPenalty, 1)
|
||||
|
||||
for i := 0; i < la1; i++ {
|
||||
left, diag, top := _GetMatrixFrom(scoreMatrix, la, i, j)
|
||||
|
||||
diag += _PairingScorePeAlign(seqA[i], qualA[i], seqB[j], qualB[j])
|
||||
left += gapPenalty
|
||||
top += gapPenalty
|
||||
|
||||
switch {
|
||||
case diag > left && diag > top:
|
||||
_SetMatrices(scoreMatrix, pathMatrix, la, i, j, diag, 0)
|
||||
@ -143,6 +153,10 @@ func _FillMatrixPeLeftAlign(seqA, qualA, seqB, qualB []byte, gap float64,
|
||||
return _GetMatrix(scoreMatrix, la, la1, lb-1)
|
||||
}
|
||||
|
||||
// Gaps at the beginning of A and at the end of B are free
|
||||
// With A spanning over lines and B over columns
|
||||
// - First line gap = 0
|
||||
// - Last column gaps = 0
|
||||
func _FillMatrixPeRightAlign(seqA, qualA, seqB, qualB []byte, gap float64,
|
||||
scoreMatrix, pathMatrix *[]int) int {
|
||||
|
||||
@ -166,17 +180,19 @@ func _FillMatrixPeRightAlign(seqA, qualA, seqB, qualB []byte, gap float64,
|
||||
*scoreMatrix = (*scoreMatrix)[:needed]
|
||||
*pathMatrix = (*pathMatrix)[:needed]
|
||||
|
||||
// Sets the first position of the matrix with 0 score
|
||||
_SetMatrices(scoreMatrix, pathMatrix, la, -1, -1, 0, 0)
|
||||
|
||||
// Fills the first column with score 0
|
||||
// Fills the first column with scores corresponding to a set of gaps
|
||||
for i := 0; i < la; i++ {
|
||||
_SetMatrices(scoreMatrix, pathMatrix, la, i, -1, (i+1)*gapPenalty, -1)
|
||||
}
|
||||
|
||||
lb1 := lb - 1
|
||||
lb1 := lb - 1 // Except the last column (gaps are free on it)
|
||||
|
||||
for j := 0; j < lb1; j++ {
|
||||
|
||||
// Fill the first line with zero score
|
||||
_SetMatrices(scoreMatrix, pathMatrix, la, -1, j, 0, 1)
|
||||
|
||||
for i := 0; i < la; i++ {
|
||||
@ -187,7 +203,7 @@ func _FillMatrixPeRightAlign(seqA, qualA, seqB, qualB []byte, gap float64,
|
||||
top += gapPenalty
|
||||
|
||||
switch {
|
||||
case diag > left && left > top:
|
||||
case diag > left && diag > top:
|
||||
_SetMatrices(scoreMatrix, pathMatrix, la, i, j, diag, 0)
|
||||
case left > diag && left > top:
|
||||
_SetMatrices(scoreMatrix, pathMatrix, la, i, j, left, +1)
|
||||
@ -218,6 +234,7 @@ func _FillMatrixPeRightAlign(seqA, qualA, seqB, qualB []byte, gap float64,
|
||||
}
|
||||
|
||||
return _GetMatrix(scoreMatrix, la, la-1, lb1)
|
||||
|
||||
}
|
||||
|
||||
func PELeftAlign(seqA, seqB *obiseq.BioSequence, gap float64,
|
||||
@ -295,11 +312,9 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
|
||||
over = seqB.Len() + shift
|
||||
}
|
||||
|
||||
|
||||
// At least one mismatch exists in the overlaping region
|
||||
if fastScore+3 < over {
|
||||
|
||||
// At least one mismatch exists in the overlaping region
|
||||
|
||||
if shift > 0 {
|
||||
startA = shift - delta
|
||||
if startA < 0 {
|
||||
@ -319,8 +334,6 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
|
||||
&arena.pointer.pathMatrix)
|
||||
} else {
|
||||
|
||||
// Both overlaping regions are identicals
|
||||
|
||||
startA = 0
|
||||
startB = -shift - delta
|
||||
if startB < 0 {
|
||||
@ -333,6 +346,7 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
|
||||
rawSeqA = seqA.Sequence()[:partLen]
|
||||
qualSeqA = seqA.Qualities()[:partLen]
|
||||
extra3 = partLen - seqA.Len()
|
||||
|
||||
score = _FillMatrixPeRightAlign(
|
||||
rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap,
|
||||
&arena.pointer.scoreMatrix,
|
||||
@ -344,6 +358,9 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
|
||||
&arena.pointer.path)
|
||||
|
||||
} else {
|
||||
|
||||
// Both overlaping regions are identicals
|
||||
|
||||
if shift > 0 {
|
||||
startA = shift
|
||||
startB = 0
|
||||
|
@ -19,13 +19,19 @@ var _BioSequenceSlicePool = sync.Pool{
|
||||
}
|
||||
|
||||
// > This function returns a pointer to a new `BioSequenceSlice` object
|
||||
func NewBioSequenceSlice() *BioSequenceSlice {
|
||||
return _BioSequenceSlicePool.Get().(*BioSequenceSlice)
|
||||
func NewBioSequenceSlice(size ...int) *BioSequenceSlice {
|
||||
slice := _BioSequenceSlicePool.Get().(*BioSequenceSlice)
|
||||
if len(size) > 0 {
|
||||
s := size[0]
|
||||
slice.InsureCapacity(s)
|
||||
(*slice)=(*slice)[0:s]
|
||||
}
|
||||
return slice
|
||||
}
|
||||
|
||||
// `MakeBioSequenceSlice()` returns a pointer to a new `BioSequenceSlice` struct
|
||||
func MakeBioSequenceSlice() BioSequenceSlice {
|
||||
return *NewBioSequenceSlice()
|
||||
func MakeBioSequenceSlice(size ...int) BioSequenceSlice {
|
||||
return *NewBioSequenceSlice(size...)
|
||||
}
|
||||
|
||||
func (s *BioSequenceSlice) Recycle() {
|
||||
|
@ -5,6 +5,10 @@ type SeqAnnotator func(*BioSequence)
|
||||
type SeqWorker func(*BioSequence) *BioSequence
|
||||
type SeqSliceWorker func(BioSequenceSlice) BioSequenceSlice
|
||||
|
||||
func NilSeqWorker(seq *BioSequence) *BioSequence {
|
||||
return seq
|
||||
}
|
||||
|
||||
func AnnotatorToSeqWorker(function SeqAnnotator) SeqWorker {
|
||||
f := func(seq *BioSequence) *BioSequence {
|
||||
function(seq)
|
||||
@ -14,18 +18,76 @@ func AnnotatorToSeqWorker(function SeqAnnotator) SeqWorker {
|
||||
}
|
||||
|
||||
func SeqToSliceWorker(worker SeqWorker, inplace bool) SeqSliceWorker {
|
||||
f := func(input BioSequenceSlice) BioSequenceSlice {
|
||||
output := input
|
||||
if (! inplace) {
|
||||
output = MakeBioSequenceSlice()
|
||||
var f SeqSliceWorker
|
||||
|
||||
if worker == nil {
|
||||
if inplace {
|
||||
f = func(input BioSequenceSlice) BioSequenceSlice {
|
||||
return input
|
||||
}
|
||||
} else {
|
||||
f = func(input BioSequenceSlice) BioSequenceSlice {
|
||||
output := MakeBioSequenceSlice(len(input))
|
||||
copy(output,input)
|
||||
return output
|
||||
}
|
||||
}
|
||||
for i,s := range(input) {
|
||||
} else {
|
||||
f = func(input BioSequenceSlice) BioSequenceSlice {
|
||||
output := input
|
||||
if !inplace {
|
||||
output = MakeBioSequenceSlice(len(input))
|
||||
}
|
||||
for i, s := range input {
|
||||
output[i] = worker(s)
|
||||
}
|
||||
|
||||
return output
|
||||
}
|
||||
}
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func SeqToSliceConditionalWorker(worker SeqWorker,
|
||||
condition SequencePredicate,
|
||||
inplace bool) SeqSliceWorker {
|
||||
|
||||
if condition == nil {
|
||||
return SeqToSliceWorker(worker,inplace)
|
||||
}
|
||||
|
||||
f := func(input BioSequenceSlice) BioSequenceSlice {
|
||||
output := input
|
||||
if !inplace {
|
||||
output = MakeBioSequenceSlice(len(input))
|
||||
}
|
||||
for i, s := range input {
|
||||
if condition(s) {
|
||||
output[i] = worker(s)
|
||||
} else {
|
||||
output[i] = s
|
||||
}
|
||||
}
|
||||
|
||||
return output
|
||||
}
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func (worker SeqWorker) ChainWorkers(next SeqWorker) SeqWorker {
|
||||
if worker == nil {
|
||||
return next
|
||||
} else {
|
||||
if next == nil {
|
||||
return worker
|
||||
}
|
||||
}
|
||||
|
||||
f := func(seq *BioSequence) *BioSequence {
|
||||
return next(worker(seq))
|
||||
}
|
||||
|
||||
return f
|
||||
}
|
||||
|
Reference in New Issue
Block a user