mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
EndGapFree alignments
This commit is contained in:
@ -12,7 +12,7 @@ func _Backtracking(pathMatrix []int, lseqA, lseqB int, path *[]int) []int {
|
||||
cp := cap(*path)
|
||||
(*path) = slices.Grow((*path), needed)
|
||||
if cp < cap(*path) {
|
||||
log.Infof("Resized path from %d to %d\n", cp, cap(*path))
|
||||
log.Debugf("Resized path from %d to %d\n", cp, cap(*path))
|
||||
}
|
||||
p := cap(*path)
|
||||
*path = (*path)[:p]
|
||||
|
@ -1,6 +1,8 @@
|
||||
package obialign
|
||||
|
||||
import (
|
||||
"log"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obikmer"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||
)
|
||||
@ -313,6 +315,105 @@ func _FillMatrixPeRightAlign(seqA, qualA, seqB, qualB []byte, gap, scale float64
|
||||
|
||||
}
|
||||
|
||||
// Gaps at the beginning and at the end of seqA are free
|
||||
// With seqA spanning over lines and seqB over columns
|
||||
//
|
||||
// SeqA must be the longer sequence. If that constraint is not
|
||||
// respected, the function will panic.
|
||||
//
|
||||
// TO BE FINISHED
|
||||
// - First column gap = 0
|
||||
// - Last column gaps = 0
|
||||
//
|
||||
// Paths are encoded :
|
||||
// - 0 : for diagonal
|
||||
// - -1 : for top
|
||||
// - +1 : for left
|
||||
func _FillMatrixPeCenterAlign(seqA, qualA, seqB, qualB []byte, gap, scale float64,
|
||||
scoreMatrix, pathMatrix *[]int) int {
|
||||
|
||||
la := len(seqA)
|
||||
lb := len(seqB)
|
||||
|
||||
if len(seqA) < len(seqB) {
|
||||
log.Panicf("len(seqA) < len(seqB) : %d < %d", len(seqA), len(seqB))
|
||||
}
|
||||
|
||||
// The actual gap score is the gap score times the mismatch between
|
||||
// two bases with a score of 40
|
||||
gapPenalty := int(scale*gap*float64(_NucScorePartMatchMismatch[40][40]) + 0.5)
|
||||
|
||||
needed := (la + 1) * (lb + 1)
|
||||
|
||||
if needed > cap(*scoreMatrix) {
|
||||
*scoreMatrix = make([]int, needed)
|
||||
}
|
||||
|
||||
if needed > cap(*pathMatrix) {
|
||||
*pathMatrix = make([]int, needed)
|
||||
}
|
||||
|
||||
*scoreMatrix = (*scoreMatrix)[:needed]
|
||||
*pathMatrix = (*pathMatrix)[:needed]
|
||||
|
||||
// Sets the first position of the matrix with 0 score
|
||||
_SetMatrices(scoreMatrix, pathMatrix, la, -1, -1, 0, 0)
|
||||
|
||||
// Fills the first column with score 0
|
||||
for i := 0; i < la; i++ {
|
||||
_SetMatrices(scoreMatrix, pathMatrix, la, i, -1, 0, -1)
|
||||
}
|
||||
|
||||
// la1 := la - 1 // Except the last line (gaps are free on it)
|
||||
lb1 := lb - 1 // Except the last column (gaps are free on it)
|
||||
|
||||
for j := 0; j < lb1; j++ {
|
||||
|
||||
// Fill the first line with scores corresponding to a set of gaps
|
||||
_SetMatrices(scoreMatrix, pathMatrix, la, -1, j, (j+1)*gapPenalty, 1)
|
||||
|
||||
for i := 0; i < la; i++ {
|
||||
left, diag, top := _GetMatrixFrom(scoreMatrix, la, i, j)
|
||||
// log.Infof("LA: i : %d j : %d left : %d diag : %d top : %d\n", i, j, left, diag, top)
|
||||
|
||||
diag += _PairingScorePeAlign(seqA[i], qualA[i], seqB[j], qualB[j], scale)
|
||||
left += gapPenalty
|
||||
top += gapPenalty
|
||||
|
||||
switch {
|
||||
case diag >= left && diag >= top:
|
||||
_SetMatrices(scoreMatrix, pathMatrix, la, i, j, diag, 0)
|
||||
case left >= diag && left >= top:
|
||||
_SetMatrices(scoreMatrix, pathMatrix, la, i, j, left, +1)
|
||||
default:
|
||||
_SetMatrices(scoreMatrix, pathMatrix, la, i, j, top, -1)
|
||||
}
|
||||
// log.Infof("LA: i : %d j : %d left : %d diag : %d top : %d [%d]\n", i, j, left, diag, top, _GetMatrix(scoreMatrix, la, i, j))
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
for i := 0; i < la; i++ {
|
||||
left, diag, top := _GetMatrixFrom(scoreMatrix, la, i, lb1)
|
||||
// log.Infof("LA: i : %d j : %d left : %d diag : %d top : %d\n", i, j, left, diag, top)
|
||||
|
||||
diag += _PairingScorePeAlign(seqA[i], qualA[i], seqB[lb1], qualB[lb1], scale)
|
||||
left += gapPenalty
|
||||
|
||||
switch {
|
||||
case diag >= left && diag >= top:
|
||||
_SetMatrices(scoreMatrix, pathMatrix, la, i, lb1, diag, 0)
|
||||
case left >= diag && left >= top:
|
||||
_SetMatrices(scoreMatrix, pathMatrix, la, i, lb1, left, +1)
|
||||
default:
|
||||
_SetMatrices(scoreMatrix, pathMatrix, la, i, lb1, top, -1)
|
||||
}
|
||||
// log.Infof("LA: i : %d j : %d left : %d diag : %d top : %d [%d]\n", i, j, left, diag, top, _GetMatrix(scoreMatrix, la, i, j))
|
||||
}
|
||||
|
||||
return _GetMatrix(scoreMatrix, la, la-1, lb1)
|
||||
}
|
||||
|
||||
func PELeftAlign(seqA, seqB *obiseq.BioSequence, gap, scale float64,
|
||||
arena PEAlignArena) (int, []int) {
|
||||
|
||||
@ -359,6 +460,29 @@ func PERightAlign(seqA, seqB *obiseq.BioSequence, gap, scale float64,
|
||||
return score, path
|
||||
}
|
||||
|
||||
func PECenterAlign(seqA, seqB *obiseq.BioSequence, gap, scale float64,
|
||||
arena PEAlignArena) (int, []int) {
|
||||
|
||||
if !_InitializedDnaScore {
|
||||
_InitDNAScoreMatrix()
|
||||
}
|
||||
|
||||
if arena.pointer == nil {
|
||||
arena = MakePEAlignArena(seqA.Len(), seqB.Len())
|
||||
}
|
||||
|
||||
score := _FillMatrixPeCenterAlign(seqA.Sequence(), seqA.Qualities(),
|
||||
seqB.Sequence(), seqB.Qualities(), gap, scale,
|
||||
&arena.pointer.scoreMatrix,
|
||||
&arena.pointer.pathMatrix)
|
||||
|
||||
path := _Backtracking(arena.pointer.pathMatrix,
|
||||
seqA.Len(), seqB.Len(),
|
||||
&arena.pointer.path)
|
||||
|
||||
return score, path
|
||||
}
|
||||
|
||||
func PEAlign(seqA, seqB *obiseq.BioSequence,
|
||||
gap, scale float64, fastAlign bool, delta int, fastScoreRel bool,
|
||||
arena PEAlignArena, shift_buff *map[int]int) (int, []int, int, int, float64) {
|
||||
|
@ -7,7 +7,7 @@ import (
|
||||
// TODO: The version number is extracted from git. This induces that the version
|
||||
// corresponds to the last commit, and not the one when the file will be
|
||||
// commited
|
||||
var _Commit = "65ae826"
|
||||
var _Commit = "05bf2bf"
|
||||
var _Version = "Release 4.2.0"
|
||||
|
||||
// Version returns the version of the obitools package.
|
||||
|
@ -48,6 +48,11 @@ func MakeKmerAlignWorker[T obifp.FPUint[T]](
|
||||
*slice = (*slice)[:0]
|
||||
|
||||
candidates := matches.Sequences()
|
||||
ks := k.Kmersize
|
||||
if k.SparseAt >= 0 {
|
||||
ks--
|
||||
}
|
||||
|
||||
n := candidates.Len()
|
||||
for _, seq := range candidates {
|
||||
idmatched_id := seq.Id()
|
||||
@ -74,6 +79,7 @@ func MakeKmerAlignWorker[T obifp.FPUint[T]](
|
||||
lcons := cons.Len()
|
||||
aliLength := lcons - _Abs(left) - _Abs(right)
|
||||
identity := float64(match) / float64(aliLength)
|
||||
residual := float64(match-int(ks)) / float64(aliLength-int(ks))
|
||||
if aliLength == 0 {
|
||||
identity = 0
|
||||
}
|
||||
@ -93,7 +99,7 @@ func MakeKmerAlignWorker[T obifp.FPUint[T]](
|
||||
rep.SetAttribute("obikmer_orientation", "reverse")
|
||||
}
|
||||
|
||||
if aliLength >= int(k.KmerSize()) && identity >= minIdentity {
|
||||
if aliLength >= int(k.KmerSize()) && residual >= minIdentity {
|
||||
if withStats {
|
||||
if left < 0 {
|
||||
rep.SetAttribute("seq_a_single", -left)
|
||||
@ -110,6 +116,8 @@ func MakeKmerAlignWorker[T obifp.FPUint[T]](
|
||||
rep.SetAttribute("seq_b_single", right)
|
||||
}
|
||||
rep.SetAttribute("obikmer_score", score)
|
||||
rep.SetAttribute("obikmer_identity", identity)
|
||||
rep.SetAttribute("obikmer_residual_id", residual)
|
||||
scoreNorm := float64(0)
|
||||
if aliLength > 0 {
|
||||
scoreNorm = math.Round(float64(match)/float64(aliLength)*1000) / 1000
|
||||
|
Reference in New Issue
Block a user