diff --git a/cmd/obitools/obipairing/main.go b/cmd/obitools/obipairing/main.go index 299cfcf..9948a3c 100644 --- a/cmd/obitools/obipairing/main.go +++ b/cmd/obitools/obipairing/main.go @@ -41,6 +41,7 @@ func main() { paired := obipairing.IAssemblePESequencesBatch(pairs, obipairing.CLIGapPenality(), + obipairing.CLIPenalityScale(), obipairing.CLIDelta(), obipairing.CLIMinOverlap(), obipairing.CLIMinIdentity(), diff --git a/cmd/obitools/obitagpcr/main.go b/cmd/obitools/obitagpcr/main.go index 1f5250c..bafa137 100644 --- a/cmd/obitools/obitagpcr/main.go +++ b/cmd/obitools/obitagpcr/main.go @@ -44,6 +44,7 @@ func main() { paired := obitagpcr.IPCRTagPESequencesBatch(pairs, obipairing.CLIGapPenality(), + obipairing.CLIPenalityScale(), obipairing.CLIDelta(), obipairing.CLIMinOverlap(), obipairing.CLIMinIdentity(), diff --git a/pkg/obialign/pairedendalign.go b/pkg/obialign/pairedendalign.go index e779043..dbe429e 100644 --- a/pkg/obialign/pairedendalign.go +++ b/pkg/obialign/pairedendalign.go @@ -111,17 +111,17 @@ func _GetMatrixFrom(matrix *[]int, lenA, a, b int) (int, int, int) { return m[i_left], m[i_diag], m[i_top] } -func _PairingScorePeAlign(baseA, qualA, baseB, qualB byte) int { +func _PairingScorePeAlign(baseA, qualA, baseB, qualB byte, scale float64) int { partMatch := _NucPartMatch[baseA&31][baseB&31] // log.Printf("id : %f A : %s %d B : %s %d\n", part_match, string(baseA), qualA, string(baseB), qualB) switch int(partMatch * 100) { case 100: return _NucScorePartMatchMatch[qualA][qualB] case 0: - return _NucScorePartMatchMismatch[qualA][qualB] + return int(float64(_NucScorePartMatchMismatch[qualA][qualB])*scale + 0.5) default: return int(partMatch*float64(_NucScorePartMatchMatch[qualA][qualB]) + - (1-partMatch)*float64(_NucScorePartMatchMismatch[qualA][qualB]) + + (1-partMatch)*float64(_NucScorePartMatchMismatch[qualA][qualB])*scale + 0.5) } } @@ -135,7 +135,7 @@ func _PairingScorePeAlign(baseA, qualA, baseB, qualB byte) int { // - 0 : for diagonal // - -1 : for top // - +1 : for left -func _FillMatrixPeLeftAlign(seqA, qualA, seqB, qualB []byte, gap float64, +func _FillMatrixPeLeftAlign(seqA, qualA, seqB, qualB []byte, gap, scale float64, scoreMatrix, pathMatrix *[]int) int { la := len(seqA) @@ -143,7 +143,7 @@ func _FillMatrixPeLeftAlign(seqA, qualA, seqB, qualB []byte, gap float64, // The actual gap score is the gap score times the mismatch between // two bases with a score of 40 - gapPenalty := int(gap * float64(_NucScorePartMatchMismatch[40][40])) + gapPenalty := int(scale*gap*float64(_NucScorePartMatchMismatch[40][40]) + 0.5) needed := (la + 1) * (lb + 1) @@ -177,7 +177,7 @@ func _FillMatrixPeLeftAlign(seqA, qualA, seqB, qualB []byte, gap float64, left, diag, top := _GetMatrixFrom(scoreMatrix, la, i, j) // log.Infof("LA: i : %d j : %d left : %d diag : %d top : %d\n", i, j, left, diag, top) - diag += _PairingScorePeAlign(seqA[i], qualA[i], seqB[j], qualB[j]) + diag += _PairingScorePeAlign(seqA[i], qualA[i], seqB[j], qualB[j], scale) left += gapPenalty top += gapPenalty @@ -195,7 +195,7 @@ func _FillMatrixPeLeftAlign(seqA, qualA, seqB, qualB []byte, gap float64, // Special case for the last line Left gap are free left, diag, top := _GetMatrixFrom(scoreMatrix, la, la1, j) - diag += _PairingScorePeAlign(seqA[la1], qualA[la1], seqB[j], qualB[j]) + diag += _PairingScorePeAlign(seqA[la1], qualA[la1], seqB[j], qualB[j], scale) top += gapPenalty switch { @@ -218,7 +218,7 @@ func _FillMatrixPeLeftAlign(seqA, qualA, seqB, qualB []byte, gap float64, // With A spanning over lines and B over columns // - First line gap = 0 // - Last column gaps = 0 -func _FillMatrixPeRightAlign(seqA, qualA, seqB, qualB []byte, gap float64, +func _FillMatrixPeRightAlign(seqA, qualA, seqB, qualB []byte, gap, scale float64, scoreMatrix, pathMatrix *[]int) int { la := len(seqA) @@ -226,7 +226,7 @@ func _FillMatrixPeRightAlign(seqA, qualA, seqB, qualB []byte, gap float64, // The actual gap score is the gap score times the mismatch between // two bases with a score of 40 - gapPenalty := int(gap * float64(_NucScorePartMatchMismatch[40][40])) + gapPenalty := int(scale*gap*float64(_NucScorePartMatchMismatch[40][40]) + 0.5) needed := (la + 1) * (lb + 1) @@ -259,7 +259,7 @@ func _FillMatrixPeRightAlign(seqA, qualA, seqB, qualB []byte, gap float64, for i := 0; i < la; i++ { left, diag, top := _GetMatrixFrom(scoreMatrix, la, i, j) - diag += _PairingScorePeAlign(seqA[i], qualA[i], seqB[j], qualB[j]) + diag += _PairingScorePeAlign(seqA[i], qualA[i], seqB[j], qualB[j], scale) left += gapPenalty top += gapPenalty @@ -283,7 +283,7 @@ func _FillMatrixPeRightAlign(seqA, qualA, seqB, qualB []byte, gap float64, for i := 0; i < la; i++ { left, diag, top := _GetMatrixFrom(scoreMatrix, la, i, lb1) - diag += _PairingScorePeAlign(seqA[i], qualA[i], seqB[lb1], qualB[lb1]) + diag += _PairingScorePeAlign(seqA[i], qualA[i], seqB[lb1], qualB[lb1], scale) left += gapPenalty // log.Infof("LR: i : %d j : %d left : %d diag : %d top : %d [%d]\n", i, lb1, left, diag, top, _GetMatrix(scoreMatrix, la, i, lb1)) @@ -302,7 +302,7 @@ func _FillMatrixPeRightAlign(seqA, qualA, seqB, qualB []byte, gap float64, } -func PELeftAlign(seqA, seqB *obiseq.BioSequence, gap float64, +func PELeftAlign(seqA, seqB *obiseq.BioSequence, gap, scale float64, arena PEAlignArena) (int, []int) { if !_InitializedDnaScore { @@ -315,7 +315,7 @@ func PELeftAlign(seqA, seqB *obiseq.BioSequence, gap float64, } score := _FillMatrixPeLeftAlign(seqA.Sequence(), seqA.Qualities(), - seqB.Sequence(), seqB.Qualities(), gap, + seqB.Sequence(), seqB.Qualities(), gap, scale, &arena.pointer.scoreMatrix, &arena.pointer.pathMatrix) @@ -326,7 +326,7 @@ func PELeftAlign(seqA, seqB *obiseq.BioSequence, gap float64, return score, arena.pointer.path } -func PERightAlign(seqA, seqB *obiseq.BioSequence, gap float64, +func PERightAlign(seqA, seqB *obiseq.BioSequence, gap, scale float64, arena PEAlignArena) (int, []int) { if !_InitializedDnaScore { @@ -339,7 +339,7 @@ func PERightAlign(seqA, seqB *obiseq.BioSequence, gap float64, } score := _FillMatrixPeRightAlign(seqA.Sequence(), seqA.Qualities(), - seqB.Sequence(), seqB.Qualities(), gap, + seqB.Sequence(), seqB.Qualities(), gap, scale, &arena.pointer.scoreMatrix, &arena.pointer.pathMatrix) @@ -351,7 +351,7 @@ func PERightAlign(seqA, seqB *obiseq.BioSequence, gap float64, } func PEAlign(seqA, seqB *obiseq.BioSequence, - gap float64, fastAlign bool, delta int, fastScoreRel bool, + gap, scale float64, fastAlign bool, delta int, fastScoreRel bool, arena PEAlignArena) (int, []int, int, int, float64) { var score, shift int var startA, startB int @@ -403,7 +403,7 @@ func PEAlign(seqA, seqB *obiseq.BioSequence, qualSeqB = seqB.Qualities()[0:partLen] extra3 = seqB.Len() - partLen score = _FillMatrixPeLeftAlign( - rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, + rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, scale, &arena.pointer.scoreMatrix, &arena.pointer.pathMatrix) } else { @@ -425,7 +425,7 @@ func PEAlign(seqA, seqB *obiseq.BioSequence, extra3 = partLen - seqA.Len() score = _FillMatrixPeRightAlign( - rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, + rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, scale, &arena.pointer.scoreMatrix, &arena.pointer.pathMatrix) } @@ -482,7 +482,7 @@ func PEAlign(seqA, seqB *obiseq.BioSequence, qualSeqB = seqB.Qualities() scoreR := _FillMatrixPeRightAlign( - rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, + rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, scale, &arena.pointer.scoreMatrix, &arena.pointer.pathMatrix) @@ -491,7 +491,7 @@ func PEAlign(seqA, seqB *obiseq.BioSequence, &arena.pointer.path) scoreL := _FillMatrixPeLeftAlign( - rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, + rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, scale, &arena.pointer.scoreMatrix, &arena.pointer.pathMatrix) diff --git a/pkg/obitools/obipairing/options.go b/pkg/obitools/obipairing/options.go index 5bc2983..a2b0666 100644 --- a/pkg/obitools/obipairing/options.go +++ b/pkg/obitools/obipairing/options.go @@ -10,11 +10,12 @@ var _ForwardFile = "" var _ReverseFile = "" var _Delta = 5 var _MinOverlap = 20 -var _GapPenality = float64(2.0) +var _GapPenality = 2.0 var _WithoutStats = false var _MinIdentity = 0.9 var _NoFastAlign = false var _FastScoreAbs = false +var _PenalityScale = 1.0 func PairingOptionSet(options *getoptions.GetOpt) { options.StringVar(&_ForwardFile, "forward-reads", "", @@ -38,6 +39,8 @@ func PairingOptionSet(options *getoptions.GetOpt) { options.Float64Var(&_GapPenality, "gap-penality", _GapPenality, options.Alias("G"), options.Description("Gap penality expressed as the multiply factor applied to the mismatch score between two nucleotides with a quality of 40 (default 2).")) + options.Float64Var(&_PenalityScale, "penality-scale", _PenalityScale, + options.Description("Scale factor applied to the mismatch score and the gap penality (default 1).")) options.BoolVar(&_WithoutStats, "without-stat", _WithoutStats, options.Alias("S"), options.Description("Remove alignment statistics from the produced consensus sequences.")) @@ -85,6 +88,10 @@ func CLIGapPenality() float64 { return _GapPenality } +func CLIPenalityScale() float64 { + return _PenalityScale +} + func CLIWithStats() bool { return !_WithoutStats } diff --git a/pkg/obitools/obipairing/pairing.go b/pkg/obitools/obipairing/pairing.go index c51e521..8589e20 100644 --- a/pkg/obitools/obipairing/pairing.go +++ b/pkg/obitools/obipairing/pairing.go @@ -106,11 +106,14 @@ func JoinPairedSequence(seqA, seqB *obiseq.BioSequence, inplace bool) *obiseq.Bi // An obiseq.BioSequence corresponding to the assembling of the both // input sequence. func AssemblePESequences(seqA, seqB *obiseq.BioSequence, - gap float64, delta, minOverlap int, minIdentity float64, withStats bool, + gap, scale float64, delta, minOverlap int, minIdentity float64, withStats bool, inplace bool, fastAlign, fastModeRel bool, arenaAlign obialign.PEAlignArena) *obiseq.BioSequence { - score, path, fastcount, over, fastscore := obialign.PEAlign(seqA, seqB, gap, fastAlign, delta, fastModeRel, arenaAlign) + score, path, fastcount, over, fastscore := obialign.PEAlign(seqA, seqB, + gap, scale, + fastAlign, delta, fastModeRel, + arenaAlign) cons, match := obialign.BuildQualityConsensus(seqA, seqB, path, true) left := path[0] @@ -210,7 +213,7 @@ func AssemblePESequences(seqA, seqB *obiseq.BioSequence, // The function returns an iterator over batches of obiseq.Biosequence object. // each pair of processed sequences produces one sequence in the result iterator. func IAssemblePESequencesBatch(iterator obiiter.IBioSequence, - gap float64, delta, minOverlap int, + gap, scale float64, delta, minOverlap int, minIdentity float64, fastAlign, fastModeRel, withStats bool, sizes ...int) obiiter.IBioSequence { @@ -241,7 +244,9 @@ func IAssemblePESequencesBatch(iterator obiiter.IBioSequence, cons := make(obiseq.BioSequenceSlice, len(batch.Slice())) for i, A := range batch.Slice() { B := A.PairedWith() - cons[i] = AssemblePESequences(A, B.ReverseComplement(true), gap, delta, minOverlap, minIdentity, withStats, true, fastAlign, fastModeRel, arena) + cons[i] = AssemblePESequences(A, B.ReverseComplement(true), + gap, scale, + delta, minOverlap, minIdentity, withStats, true, fastAlign, fastModeRel, arena) } newIter.Push(obiiter.MakeBioSequenceBatch( batch.Order(), diff --git a/pkg/obitools/obitagpcr/pcrtag.go b/pkg/obitools/obitagpcr/pcrtag.go index 8e565f0..d8cbd40 100644 --- a/pkg/obitools/obitagpcr/pcrtag.go +++ b/pkg/obitools/obitagpcr/pcrtag.go @@ -13,7 +13,7 @@ import ( ) func IPCRTagPESequencesBatch(iterator obiiter.IBioSequence, - gap float64, delta, minOverlap int, + gap, scale float64, delta, minOverlap int, minIdentity float64, fastAlign, fastScoreRel, withStats bool) obiiter.IBioSequence { @@ -50,7 +50,8 @@ func IPCRTagPESequencesBatch(iterator obiiter.IBioSequence, B := A.PairedWith() consensus := obipairing.AssemblePESequences( A.Copy(), B.ReverseComplement(false), - gap, delta, minOverlap, minIdentity, withStats, true, + gap, scale, + delta, minOverlap, minIdentity, withStats, true, fastAlign, fastScoreRel, arena, )