diff --git a/pkg/obialign/backtracking.go b/pkg/obialign/backtracking.go index d67b239..90ee8fe 100644 --- a/pkg/obialign/backtracking.go +++ b/pkg/obialign/backtracking.go @@ -12,7 +12,7 @@ func _Backtracking(pathMatrix []int, lseqA, lseqB int, path *[]int) []int { cp := cap(*path) (*path) = slices.Grow((*path), needed) if cp < cap(*path) { - log.Infof("Resized path from %d to %d\n", cp, cap(*path)) + log.Debugf("Resized path from %d to %d\n", cp, cap(*path)) } p := cap(*path) *path = (*path)[:p] diff --git a/pkg/obialign/pairedendalign.go b/pkg/obialign/pairedendalign.go index c03dae3..8eb7609 100644 --- a/pkg/obialign/pairedendalign.go +++ b/pkg/obialign/pairedendalign.go @@ -1,6 +1,8 @@ package obialign import ( + "log" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obikmer" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" ) @@ -313,6 +315,105 @@ func _FillMatrixPeRightAlign(seqA, qualA, seqB, qualB []byte, gap, scale float64 } +// Gaps at the beginning and at the end of seqA are free +// With seqA spanning over lines and seqB over columns +// +// SeqA must be the longer sequence. If that constraint is not +// respected, the function will panic. +// +// TO BE FINISHED +// - First column gap = 0 +// - Last column gaps = 0 +// +// Paths are encoded : +// - 0 : for diagonal +// - -1 : for top +// - +1 : for left +func _FillMatrixPeCenterAlign(seqA, qualA, seqB, qualB []byte, gap, scale float64, + scoreMatrix, pathMatrix *[]int) int { + + la := len(seqA) + lb := len(seqB) + + if len(seqA) < len(seqB) { + log.Panicf("len(seqA) < len(seqB) : %d < %d", len(seqA), len(seqB)) + } + + // The actual gap score is the gap score times the mismatch between + // two bases with a score of 40 + gapPenalty := int(scale*gap*float64(_NucScorePartMatchMismatch[40][40]) + 0.5) + + needed := (la + 1) * (lb + 1) + + if needed > cap(*scoreMatrix) { + *scoreMatrix = make([]int, needed) + } + + if needed > cap(*pathMatrix) { + *pathMatrix = make([]int, needed) + } + + *scoreMatrix = (*scoreMatrix)[:needed] + *pathMatrix = (*pathMatrix)[:needed] + + // Sets the first position of the matrix with 0 score + _SetMatrices(scoreMatrix, pathMatrix, la, -1, -1, 0, 0) + + // Fills the first column with score 0 + for i := 0; i < la; i++ { + _SetMatrices(scoreMatrix, pathMatrix, la, i, -1, 0, -1) + } + + // la1 := la - 1 // Except the last line (gaps are free on it) + lb1 := lb - 1 // Except the last column (gaps are free on it) + + for j := 0; j < lb1; j++ { + + // Fill the first line with scores corresponding to a set of gaps + _SetMatrices(scoreMatrix, pathMatrix, la, -1, j, (j+1)*gapPenalty, 1) + + for i := 0; i < la; i++ { + left, diag, top := _GetMatrixFrom(scoreMatrix, la, i, j) + // log.Infof("LA: i : %d j : %d left : %d diag : %d top : %d\n", i, j, left, diag, top) + + diag += _PairingScorePeAlign(seqA[i], qualA[i], seqB[j], qualB[j], scale) + left += gapPenalty + top += gapPenalty + + switch { + case diag >= left && diag >= top: + _SetMatrices(scoreMatrix, pathMatrix, la, i, j, diag, 0) + case left >= diag && left >= top: + _SetMatrices(scoreMatrix, pathMatrix, la, i, j, left, +1) + default: + _SetMatrices(scoreMatrix, pathMatrix, la, i, j, top, -1) + } + // log.Infof("LA: i : %d j : %d left : %d diag : %d top : %d [%d]\n", i, j, left, diag, top, _GetMatrix(scoreMatrix, la, i, j)) + } + + } + + for i := 0; i < la; i++ { + left, diag, top := _GetMatrixFrom(scoreMatrix, la, i, lb1) + // log.Infof("LA: i : %d j : %d left : %d diag : %d top : %d\n", i, j, left, diag, top) + + diag += _PairingScorePeAlign(seqA[i], qualA[i], seqB[lb1], qualB[lb1], scale) + left += gapPenalty + + switch { + case diag >= left && diag >= top: + _SetMatrices(scoreMatrix, pathMatrix, la, i, lb1, diag, 0) + case left >= diag && left >= top: + _SetMatrices(scoreMatrix, pathMatrix, la, i, lb1, left, +1) + default: + _SetMatrices(scoreMatrix, pathMatrix, la, i, lb1, top, -1) + } + // log.Infof("LA: i : %d j : %d left : %d diag : %d top : %d [%d]\n", i, j, left, diag, top, _GetMatrix(scoreMatrix, la, i, j)) + } + + return _GetMatrix(scoreMatrix, la, la-1, lb1) +} + func PELeftAlign(seqA, seqB *obiseq.BioSequence, gap, scale float64, arena PEAlignArena) (int, []int) { @@ -359,6 +460,29 @@ func PERightAlign(seqA, seqB *obiseq.BioSequence, gap, scale float64, return score, path } +func PECenterAlign(seqA, seqB *obiseq.BioSequence, gap, scale float64, + arena PEAlignArena) (int, []int) { + + if !_InitializedDnaScore { + _InitDNAScoreMatrix() + } + + if arena.pointer == nil { + arena = MakePEAlignArena(seqA.Len(), seqB.Len()) + } + + score := _FillMatrixPeCenterAlign(seqA.Sequence(), seqA.Qualities(), + seqB.Sequence(), seqB.Qualities(), gap, scale, + &arena.pointer.scoreMatrix, + &arena.pointer.pathMatrix) + + path := _Backtracking(arena.pointer.pathMatrix, + seqA.Len(), seqB.Len(), + &arena.pointer.path) + + return score, path +} + func PEAlign(seqA, seqB *obiseq.BioSequence, gap, scale float64, fastAlign bool, delta int, fastScoreRel bool, arena PEAlignArena, shift_buff *map[int]int) (int, []int, int, int, float64) { diff --git a/pkg/obioptions/version.go b/pkg/obioptions/version.go index 075fddf..d111bc8 100644 --- a/pkg/obioptions/version.go +++ b/pkg/obioptions/version.go @@ -7,7 +7,7 @@ import ( // TODO: The version number is extracted from git. This induces that the version // corresponds to the last commit, and not the one when the file will be // commited -var _Commit = "65ae826" +var _Commit = "05bf2bf" var _Version = "Release 4.2.0" // Version returns the version of the obitools package. diff --git a/pkg/obitools/obikmersim/obikmersim.go b/pkg/obitools/obikmersim/obikmersim.go index b0b4a7c..e96862d 100644 --- a/pkg/obitools/obikmersim/obikmersim.go +++ b/pkg/obitools/obikmersim/obikmersim.go @@ -48,6 +48,11 @@ func MakeKmerAlignWorker[T obifp.FPUint[T]]( *slice = (*slice)[:0] candidates := matches.Sequences() + ks := k.Kmersize + if k.SparseAt >= 0 { + ks-- + } + n := candidates.Len() for _, seq := range candidates { idmatched_id := seq.Id() @@ -74,6 +79,7 @@ func MakeKmerAlignWorker[T obifp.FPUint[T]]( lcons := cons.Len() aliLength := lcons - _Abs(left) - _Abs(right) identity := float64(match) / float64(aliLength) + residual := float64(match-int(ks)) / float64(aliLength-int(ks)) if aliLength == 0 { identity = 0 } @@ -93,7 +99,7 @@ func MakeKmerAlignWorker[T obifp.FPUint[T]]( rep.SetAttribute("obikmer_orientation", "reverse") } - if aliLength >= int(k.KmerSize()) && identity >= minIdentity { + if aliLength >= int(k.KmerSize()) && residual >= minIdentity { if withStats { if left < 0 { rep.SetAttribute("seq_a_single", -left) @@ -110,6 +116,8 @@ func MakeKmerAlignWorker[T obifp.FPUint[T]]( rep.SetAttribute("seq_b_single", right) } rep.SetAttribute("obikmer_score", score) + rep.SetAttribute("obikmer_identity", identity) + rep.SetAttribute("obikmer_residual_id", residual) scoreNorm := float64(0) if aliLength > 0 { scoreNorm = math.Round(float64(match)/float64(aliLength)*1000) / 1000