mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Patch an aligment bug on obipairing
This commit is contained in:
@ -55,21 +55,21 @@ func _MatchScoreRatio(a, b byte) (float64, float64) {
|
|||||||
|
|
||||||
l2 := math.Log(2)
|
l2 := math.Log(2)
|
||||||
l3 := math.Log(3)
|
l3 := math.Log(3)
|
||||||
l4 := math.Log(4)
|
|
||||||
l10 := math.Log(10)
|
l10 := math.Log(10)
|
||||||
lE1 := -float64(a)/10*l10 - l4
|
lalea := math.Log(30) // 1 /(change of the random model)
|
||||||
lE2 := -float64(b)/10*l10 - l4
|
lE1 := -float64(a)/10*l10 - l3 // log proba of sequencing error on A/3
|
||||||
lO1 := math.Log1p(-math.Exp(lE1 + l3))
|
lE2 := -float64(b)/10*l10 - l3 // log proba of sequencing error on B/3
|
||||||
lO2 := math.Log1p(-math.Exp(lE2 + l3))
|
lO1 := math.Log1p(-math.Exp(lE1 + l3)) // log proba no being an error on A
|
||||||
|
lO2 := math.Log1p(-math.Exp(lE2 + l3)) // log proba no being an error on B
|
||||||
lO1O2 := lO1 + lO2
|
lO1O2 := lO1 + lO2
|
||||||
lE1E2 := lE1 + lE2
|
lE1E2 := lE1 + lE2
|
||||||
lO1E2 := lO1 + lE2
|
lO1E2 := lO1 + lE2
|
||||||
lO2E1 := lO2 + lE1
|
lO2E1 := lO2 + lE1
|
||||||
|
|
||||||
MM := _Logaddexp(lO1O2, lE1E2+l3) + l4
|
MM := _Logaddexp(lO1O2, lE1E2+l3) // Proba match when match observed
|
||||||
Mm := _Logaddexp(_Logaddexp(lO1E2, lO2E1), lE1E2+l2) + l4
|
Mm := _Logaddexp(_Logaddexp(lO1E2, lO2E1), lE1E2+l2) // Proba match when mismatch observed
|
||||||
|
|
||||||
return MM, Mm
|
return MM + lalea, Mm + lalea
|
||||||
}
|
}
|
||||||
|
|
||||||
func _InitNucPartMatch() {
|
func _InitNucPartMatch() {
|
||||||
|
@ -52,6 +52,7 @@ func _GetMatrix(matrix *[]int, lenA, a, b int) int {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func _GetMatrixFrom(matrix *[]int, lenA, a, b int) (int, int, int) {
|
func _GetMatrixFrom(matrix *[]int, lenA, a, b int) (int, int, int) {
|
||||||
|
// Formula rechecked on 01/25/2023
|
||||||
i := (b+1)*(lenA+1) + a
|
i := (b+1)*(lenA+1) + a
|
||||||
j := i - lenA
|
j := i - lenA
|
||||||
m := *matrix
|
m := *matrix
|
||||||
@ -68,10 +69,15 @@ func _PairingScorePeAlign(baseA, qualA, baseB, qualB byte) int {
|
|||||||
return _NucScorePartMatchMismatch[qualA][qualB]
|
return _NucScorePartMatchMismatch[qualA][qualB]
|
||||||
default:
|
default:
|
||||||
return int(partMatch*float64(_NucScorePartMatchMatch[qualA][qualB]) +
|
return int(partMatch*float64(_NucScorePartMatchMatch[qualA][qualB]) +
|
||||||
(1-partMatch)*float64(_NucScorePartMatchMismatch[qualA][qualB]) + 0.5)
|
(1-partMatch)*float64(_NucScorePartMatchMismatch[qualA][qualB]) +
|
||||||
|
0.5)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Gaps at the beginning of B and at the end of A are free
|
||||||
|
// With A spanning over lines and B over columns
|
||||||
|
// - First column gap = 0
|
||||||
|
// - Last line gaps = 0
|
||||||
func _FillMatrixPeLeftAlign(seqA, qualA, seqB, qualB []byte, gap float64,
|
func _FillMatrixPeLeftAlign(seqA, qualA, seqB, qualB []byte, gap float64,
|
||||||
scoreMatrix, pathMatrix *[]int) int {
|
scoreMatrix, pathMatrix *[]int) int {
|
||||||
|
|
||||||
@ -95,6 +101,7 @@ func _FillMatrixPeLeftAlign(seqA, qualA, seqB, qualB []byte, gap float64,
|
|||||||
*scoreMatrix = (*scoreMatrix)[:needed]
|
*scoreMatrix = (*scoreMatrix)[:needed]
|
||||||
*pathMatrix = (*pathMatrix)[:needed]
|
*pathMatrix = (*pathMatrix)[:needed]
|
||||||
|
|
||||||
|
// Sets the first position of the matrix with 0 score
|
||||||
_SetMatrices(scoreMatrix, pathMatrix, la, -1, -1, 0, 0)
|
_SetMatrices(scoreMatrix, pathMatrix, la, -1, -1, 0, 0)
|
||||||
|
|
||||||
// Fills the first column with score 0
|
// Fills the first column with score 0
|
||||||
@ -102,17 +109,20 @@ func _FillMatrixPeLeftAlign(seqA, qualA, seqB, qualB []byte, gap float64,
|
|||||||
_SetMatrices(scoreMatrix, pathMatrix, la, i, -1, 0, -1)
|
_SetMatrices(scoreMatrix, pathMatrix, la, i, -1, 0, -1)
|
||||||
}
|
}
|
||||||
|
|
||||||
la1 := la - 1
|
la1 := la - 1 // Except the last line (gaps are free on it)
|
||||||
|
|
||||||
for j := 0; j < lb; j++ {
|
for j := 0; j < lb; j++ {
|
||||||
|
|
||||||
|
// Fill the first line with scores corresponding to a set of gaps
|
||||||
_SetMatrices(scoreMatrix, pathMatrix, la, -1, j, (j+1)*gapPenalty, 1)
|
_SetMatrices(scoreMatrix, pathMatrix, la, -1, j, (j+1)*gapPenalty, 1)
|
||||||
|
|
||||||
for i := 0; i < la1; i++ {
|
for i := 0; i < la1; i++ {
|
||||||
left, diag, top := _GetMatrixFrom(scoreMatrix, la, i, j)
|
left, diag, top := _GetMatrixFrom(scoreMatrix, la, i, j)
|
||||||
|
|
||||||
diag += _PairingScorePeAlign(seqA[i], qualA[i], seqB[j], qualB[j])
|
diag += _PairingScorePeAlign(seqA[i], qualA[i], seqB[j], qualB[j])
|
||||||
left += gapPenalty
|
left += gapPenalty
|
||||||
top += gapPenalty
|
top += gapPenalty
|
||||||
|
|
||||||
switch {
|
switch {
|
||||||
case diag > left && diag > top:
|
case diag > left && diag > top:
|
||||||
_SetMatrices(scoreMatrix, pathMatrix, la, i, j, diag, 0)
|
_SetMatrices(scoreMatrix, pathMatrix, la, i, j, diag, 0)
|
||||||
@ -143,6 +153,10 @@ func _FillMatrixPeLeftAlign(seqA, qualA, seqB, qualB []byte, gap float64,
|
|||||||
return _GetMatrix(scoreMatrix, la, la1, lb-1)
|
return _GetMatrix(scoreMatrix, la, la1, lb-1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Gaps at the beginning of A and at the end of B are free
|
||||||
|
// With A spanning over lines and B over columns
|
||||||
|
// - First line gap = 0
|
||||||
|
// - Last column gaps = 0
|
||||||
func _FillMatrixPeRightAlign(seqA, qualA, seqB, qualB []byte, gap float64,
|
func _FillMatrixPeRightAlign(seqA, qualA, seqB, qualB []byte, gap float64,
|
||||||
scoreMatrix, pathMatrix *[]int) int {
|
scoreMatrix, pathMatrix *[]int) int {
|
||||||
|
|
||||||
@ -166,17 +180,19 @@ func _FillMatrixPeRightAlign(seqA, qualA, seqB, qualB []byte, gap float64,
|
|||||||
*scoreMatrix = (*scoreMatrix)[:needed]
|
*scoreMatrix = (*scoreMatrix)[:needed]
|
||||||
*pathMatrix = (*pathMatrix)[:needed]
|
*pathMatrix = (*pathMatrix)[:needed]
|
||||||
|
|
||||||
|
// Sets the first position of the matrix with 0 score
|
||||||
_SetMatrices(scoreMatrix, pathMatrix, la, -1, -1, 0, 0)
|
_SetMatrices(scoreMatrix, pathMatrix, la, -1, -1, 0, 0)
|
||||||
|
|
||||||
// Fills the first column with score 0
|
// Fills the first column with scores corresponding to a set of gaps
|
||||||
for i := 0; i < la; i++ {
|
for i := 0; i < la; i++ {
|
||||||
_SetMatrices(scoreMatrix, pathMatrix, la, i, -1, (i+1)*gapPenalty, -1)
|
_SetMatrices(scoreMatrix, pathMatrix, la, i, -1, (i+1)*gapPenalty, -1)
|
||||||
}
|
}
|
||||||
|
|
||||||
lb1 := lb - 1
|
lb1 := lb - 1 // Except the last column (gaps are free on it)
|
||||||
|
|
||||||
for j := 0; j < lb1; j++ {
|
for j := 0; j < lb1; j++ {
|
||||||
|
|
||||||
|
// Fill the first line with zero score
|
||||||
_SetMatrices(scoreMatrix, pathMatrix, la, -1, j, 0, 1)
|
_SetMatrices(scoreMatrix, pathMatrix, la, -1, j, 0, 1)
|
||||||
|
|
||||||
for i := 0; i < la; i++ {
|
for i := 0; i < la; i++ {
|
||||||
@ -187,7 +203,7 @@ func _FillMatrixPeRightAlign(seqA, qualA, seqB, qualB []byte, gap float64,
|
|||||||
top += gapPenalty
|
top += gapPenalty
|
||||||
|
|
||||||
switch {
|
switch {
|
||||||
case diag > left && left > top:
|
case diag > left && diag > top:
|
||||||
_SetMatrices(scoreMatrix, pathMatrix, la, i, j, diag, 0)
|
_SetMatrices(scoreMatrix, pathMatrix, la, i, j, diag, 0)
|
||||||
case left > diag && left > top:
|
case left > diag && left > top:
|
||||||
_SetMatrices(scoreMatrix, pathMatrix, la, i, j, left, +1)
|
_SetMatrices(scoreMatrix, pathMatrix, la, i, j, left, +1)
|
||||||
@ -218,6 +234,7 @@ func _FillMatrixPeRightAlign(seqA, qualA, seqB, qualB []byte, gap float64,
|
|||||||
}
|
}
|
||||||
|
|
||||||
return _GetMatrix(scoreMatrix, la, la-1, lb1)
|
return _GetMatrix(scoreMatrix, la, la-1, lb1)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func PELeftAlign(seqA, seqB *obiseq.BioSequence, gap float64,
|
func PELeftAlign(seqA, seqB *obiseq.BioSequence, gap float64,
|
||||||
@ -295,10 +312,8 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
|
|||||||
over = seqB.Len() + shift
|
over = seqB.Len() + shift
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if fastScore+3 < over {
|
|
||||||
|
|
||||||
// At least one mismatch exists in the overlaping region
|
// At least one mismatch exists in the overlaping region
|
||||||
|
if fastScore+3 < over {
|
||||||
|
|
||||||
if shift > 0 {
|
if shift > 0 {
|
||||||
startA = shift - delta
|
startA = shift - delta
|
||||||
@ -319,8 +334,6 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
|
|||||||
&arena.pointer.pathMatrix)
|
&arena.pointer.pathMatrix)
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
// Both overlaping regions are identicals
|
|
||||||
|
|
||||||
startA = 0
|
startA = 0
|
||||||
startB = -shift - delta
|
startB = -shift - delta
|
||||||
if startB < 0 {
|
if startB < 0 {
|
||||||
@ -333,6 +346,7 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
|
|||||||
rawSeqA = seqA.Sequence()[:partLen]
|
rawSeqA = seqA.Sequence()[:partLen]
|
||||||
qualSeqA = seqA.Qualities()[:partLen]
|
qualSeqA = seqA.Qualities()[:partLen]
|
||||||
extra3 = partLen - seqA.Len()
|
extra3 = partLen - seqA.Len()
|
||||||
|
|
||||||
score = _FillMatrixPeRightAlign(
|
score = _FillMatrixPeRightAlign(
|
||||||
rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap,
|
rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap,
|
||||||
&arena.pointer.scoreMatrix,
|
&arena.pointer.scoreMatrix,
|
||||||
@ -344,6 +358,9 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
|
|||||||
&arena.pointer.path)
|
&arena.pointer.path)
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
|
// Both overlaping regions are identicals
|
||||||
|
|
||||||
if shift > 0 {
|
if shift > 0 {
|
||||||
startA = shift
|
startA = shift
|
||||||
startB = 0
|
startB = 0
|
||||||
|
@ -19,13 +19,19 @@ var _BioSequenceSlicePool = sync.Pool{
|
|||||||
}
|
}
|
||||||
|
|
||||||
// > This function returns a pointer to a new `BioSequenceSlice` object
|
// > This function returns a pointer to a new `BioSequenceSlice` object
|
||||||
func NewBioSequenceSlice() *BioSequenceSlice {
|
func NewBioSequenceSlice(size ...int) *BioSequenceSlice {
|
||||||
return _BioSequenceSlicePool.Get().(*BioSequenceSlice)
|
slice := _BioSequenceSlicePool.Get().(*BioSequenceSlice)
|
||||||
|
if len(size) > 0 {
|
||||||
|
s := size[0]
|
||||||
|
slice.InsureCapacity(s)
|
||||||
|
(*slice)=(*slice)[0:s]
|
||||||
|
}
|
||||||
|
return slice
|
||||||
}
|
}
|
||||||
|
|
||||||
// `MakeBioSequenceSlice()` returns a pointer to a new `BioSequenceSlice` struct
|
// `MakeBioSequenceSlice()` returns a pointer to a new `BioSequenceSlice` struct
|
||||||
func MakeBioSequenceSlice() BioSequenceSlice {
|
func MakeBioSequenceSlice(size ...int) BioSequenceSlice {
|
||||||
return *NewBioSequenceSlice()
|
return *NewBioSequenceSlice(size...)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *BioSequenceSlice) Recycle() {
|
func (s *BioSequenceSlice) Recycle() {
|
||||||
|
@ -5,6 +5,10 @@ type SeqAnnotator func(*BioSequence)
|
|||||||
type SeqWorker func(*BioSequence) *BioSequence
|
type SeqWorker func(*BioSequence) *BioSequence
|
||||||
type SeqSliceWorker func(BioSequenceSlice) BioSequenceSlice
|
type SeqSliceWorker func(BioSequenceSlice) BioSequenceSlice
|
||||||
|
|
||||||
|
func NilSeqWorker(seq *BioSequence) *BioSequence {
|
||||||
|
return seq
|
||||||
|
}
|
||||||
|
|
||||||
func AnnotatorToSeqWorker(function SeqAnnotator) SeqWorker {
|
func AnnotatorToSeqWorker(function SeqAnnotator) SeqWorker {
|
||||||
f := func(seq *BioSequence) *BioSequence {
|
f := func(seq *BioSequence) *BioSequence {
|
||||||
function(seq)
|
function(seq)
|
||||||
@ -14,13 +18,56 @@ func AnnotatorToSeqWorker(function SeqAnnotator) SeqWorker {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func SeqToSliceWorker(worker SeqWorker, inplace bool) SeqSliceWorker {
|
func SeqToSliceWorker(worker SeqWorker, inplace bool) SeqSliceWorker {
|
||||||
|
var f SeqSliceWorker
|
||||||
|
|
||||||
|
if worker == nil {
|
||||||
|
if inplace {
|
||||||
|
f = func(input BioSequenceSlice) BioSequenceSlice {
|
||||||
|
return input
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
f = func(input BioSequenceSlice) BioSequenceSlice {
|
||||||
|
output := MakeBioSequenceSlice(len(input))
|
||||||
|
copy(output,input)
|
||||||
|
return output
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
f = func(input BioSequenceSlice) BioSequenceSlice {
|
||||||
|
output := input
|
||||||
|
if !inplace {
|
||||||
|
output = MakeBioSequenceSlice(len(input))
|
||||||
|
}
|
||||||
|
for i, s := range input {
|
||||||
|
output[i] = worker(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
return output
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return f
|
||||||
|
}
|
||||||
|
|
||||||
|
func SeqToSliceConditionalWorker(worker SeqWorker,
|
||||||
|
condition SequencePredicate,
|
||||||
|
inplace bool) SeqSliceWorker {
|
||||||
|
|
||||||
|
if condition == nil {
|
||||||
|
return SeqToSliceWorker(worker,inplace)
|
||||||
|
}
|
||||||
|
|
||||||
f := func(input BioSequenceSlice) BioSequenceSlice {
|
f := func(input BioSequenceSlice) BioSequenceSlice {
|
||||||
output := input
|
output := input
|
||||||
if (! inplace) {
|
if !inplace {
|
||||||
output = MakeBioSequenceSlice()
|
output = MakeBioSequenceSlice(len(input))
|
||||||
}
|
}
|
||||||
for i,s := range(input) {
|
for i, s := range input {
|
||||||
|
if condition(s) {
|
||||||
output[i] = worker(s)
|
output[i] = worker(s)
|
||||||
|
} else {
|
||||||
|
output[i] = s
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return output
|
return output
|
||||||
@ -29,3 +76,18 @@ func SeqToSliceWorker(worker SeqWorker, inplace bool) SeqSliceWorker {
|
|||||||
return f
|
return f
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (worker SeqWorker) ChainWorkers(next SeqWorker) SeqWorker {
|
||||||
|
if worker == nil {
|
||||||
|
return next
|
||||||
|
} else {
|
||||||
|
if next == nil {
|
||||||
|
return worker
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
f := func(seq *BioSequence) *BioSequence {
|
||||||
|
return next(worker(seq))
|
||||||
|
}
|
||||||
|
|
||||||
|
return f
|
||||||
|
}
|
||||||
|
Reference in New Issue
Block a user