Adds the option --pairing-scale to obipairing

Former-commit-id: c60416708467f5e818e70e08b3c512014b6212f0
This commit is contained in:
2023-12-07 12:28:16 +01:00
parent 008f33aee4
commit 37c3e16d5d
6 changed files with 42 additions and 27 deletions

View File

@ -41,6 +41,7 @@ func main() {
paired := obipairing.IAssemblePESequencesBatch(pairs,
obipairing.CLIGapPenality(),
obipairing.CLIPenalityScale(),
obipairing.CLIDelta(),
obipairing.CLIMinOverlap(),
obipairing.CLIMinIdentity(),

View File

@ -44,6 +44,7 @@ func main() {
paired := obitagpcr.IPCRTagPESequencesBatch(pairs,
obipairing.CLIGapPenality(),
obipairing.CLIPenalityScale(),
obipairing.CLIDelta(),
obipairing.CLIMinOverlap(),
obipairing.CLIMinIdentity(),

View File

@ -111,17 +111,17 @@ func _GetMatrixFrom(matrix *[]int, lenA, a, b int) (int, int, int) {
return m[i_left], m[i_diag], m[i_top]
}
func _PairingScorePeAlign(baseA, qualA, baseB, qualB byte) int {
func _PairingScorePeAlign(baseA, qualA, baseB, qualB byte, scale float64) int {
partMatch := _NucPartMatch[baseA&31][baseB&31]
// log.Printf("id : %f A : %s %d B : %s %d\n", part_match, string(baseA), qualA, string(baseB), qualB)
switch int(partMatch * 100) {
case 100:
return _NucScorePartMatchMatch[qualA][qualB]
case 0:
return _NucScorePartMatchMismatch[qualA][qualB]
return int(float64(_NucScorePartMatchMismatch[qualA][qualB])*scale + 0.5)
default:
return int(partMatch*float64(_NucScorePartMatchMatch[qualA][qualB]) +
(1-partMatch)*float64(_NucScorePartMatchMismatch[qualA][qualB]) +
(1-partMatch)*float64(_NucScorePartMatchMismatch[qualA][qualB])*scale +
0.5)
}
}
@ -135,7 +135,7 @@ func _PairingScorePeAlign(baseA, qualA, baseB, qualB byte) int {
// - 0 : for diagonal
// - -1 : for top
// - +1 : for left
func _FillMatrixPeLeftAlign(seqA, qualA, seqB, qualB []byte, gap float64,
func _FillMatrixPeLeftAlign(seqA, qualA, seqB, qualB []byte, gap, scale float64,
scoreMatrix, pathMatrix *[]int) int {
la := len(seqA)
@ -143,7 +143,7 @@ func _FillMatrixPeLeftAlign(seqA, qualA, seqB, qualB []byte, gap float64,
// The actual gap score is the gap score times the mismatch between
// two bases with a score of 40
gapPenalty := int(gap * float64(_NucScorePartMatchMismatch[40][40]))
gapPenalty := int(scale*gap*float64(_NucScorePartMatchMismatch[40][40]) + 0.5)
needed := (la + 1) * (lb + 1)
@ -177,7 +177,7 @@ func _FillMatrixPeLeftAlign(seqA, qualA, seqB, qualB []byte, gap float64,
left, diag, top := _GetMatrixFrom(scoreMatrix, la, i, j)
// log.Infof("LA: i : %d j : %d left : %d diag : %d top : %d\n", i, j, left, diag, top)
diag += _PairingScorePeAlign(seqA[i], qualA[i], seqB[j], qualB[j])
diag += _PairingScorePeAlign(seqA[i], qualA[i], seqB[j], qualB[j], scale)
left += gapPenalty
top += gapPenalty
@ -195,7 +195,7 @@ func _FillMatrixPeLeftAlign(seqA, qualA, seqB, qualB []byte, gap float64,
// Special case for the last line Left gap are free
left, diag, top := _GetMatrixFrom(scoreMatrix, la, la1, j)
diag += _PairingScorePeAlign(seqA[la1], qualA[la1], seqB[j], qualB[j])
diag += _PairingScorePeAlign(seqA[la1], qualA[la1], seqB[j], qualB[j], scale)
top += gapPenalty
switch {
@ -218,7 +218,7 @@ func _FillMatrixPeLeftAlign(seqA, qualA, seqB, qualB []byte, gap float64,
// With A spanning over lines and B over columns
// - First line gap = 0
// - Last column gaps = 0
func _FillMatrixPeRightAlign(seqA, qualA, seqB, qualB []byte, gap float64,
func _FillMatrixPeRightAlign(seqA, qualA, seqB, qualB []byte, gap, scale float64,
scoreMatrix, pathMatrix *[]int) int {
la := len(seqA)
@ -226,7 +226,7 @@ func _FillMatrixPeRightAlign(seqA, qualA, seqB, qualB []byte, gap float64,
// The actual gap score is the gap score times the mismatch between
// two bases with a score of 40
gapPenalty := int(gap * float64(_NucScorePartMatchMismatch[40][40]))
gapPenalty := int(scale*gap*float64(_NucScorePartMatchMismatch[40][40]) + 0.5)
needed := (la + 1) * (lb + 1)
@ -259,7 +259,7 @@ func _FillMatrixPeRightAlign(seqA, qualA, seqB, qualB []byte, gap float64,
for i := 0; i < la; i++ {
left, diag, top := _GetMatrixFrom(scoreMatrix, la, i, j)
diag += _PairingScorePeAlign(seqA[i], qualA[i], seqB[j], qualB[j])
diag += _PairingScorePeAlign(seqA[i], qualA[i], seqB[j], qualB[j], scale)
left += gapPenalty
top += gapPenalty
@ -283,7 +283,7 @@ func _FillMatrixPeRightAlign(seqA, qualA, seqB, qualB []byte, gap float64,
for i := 0; i < la; i++ {
left, diag, top := _GetMatrixFrom(scoreMatrix, la, i, lb1)
diag += _PairingScorePeAlign(seqA[i], qualA[i], seqB[lb1], qualB[lb1])
diag += _PairingScorePeAlign(seqA[i], qualA[i], seqB[lb1], qualB[lb1], scale)
left += gapPenalty
// log.Infof("LR: i : %d j : %d left : %d diag : %d top : %d [%d]\n", i, lb1, left, diag, top, _GetMatrix(scoreMatrix, la, i, lb1))
@ -302,7 +302,7 @@ func _FillMatrixPeRightAlign(seqA, qualA, seqB, qualB []byte, gap float64,
}
func PELeftAlign(seqA, seqB *obiseq.BioSequence, gap float64,
func PELeftAlign(seqA, seqB *obiseq.BioSequence, gap, scale float64,
arena PEAlignArena) (int, []int) {
if !_InitializedDnaScore {
@ -315,7 +315,7 @@ func PELeftAlign(seqA, seqB *obiseq.BioSequence, gap float64,
}
score := _FillMatrixPeLeftAlign(seqA.Sequence(), seqA.Qualities(),
seqB.Sequence(), seqB.Qualities(), gap,
seqB.Sequence(), seqB.Qualities(), gap, scale,
&arena.pointer.scoreMatrix,
&arena.pointer.pathMatrix)
@ -326,7 +326,7 @@ func PELeftAlign(seqA, seqB *obiseq.BioSequence, gap float64,
return score, arena.pointer.path
}
func PERightAlign(seqA, seqB *obiseq.BioSequence, gap float64,
func PERightAlign(seqA, seqB *obiseq.BioSequence, gap, scale float64,
arena PEAlignArena) (int, []int) {
if !_InitializedDnaScore {
@ -339,7 +339,7 @@ func PERightAlign(seqA, seqB *obiseq.BioSequence, gap float64,
}
score := _FillMatrixPeRightAlign(seqA.Sequence(), seqA.Qualities(),
seqB.Sequence(), seqB.Qualities(), gap,
seqB.Sequence(), seqB.Qualities(), gap, scale,
&arena.pointer.scoreMatrix,
&arena.pointer.pathMatrix)
@ -351,7 +351,7 @@ func PERightAlign(seqA, seqB *obiseq.BioSequence, gap float64,
}
func PEAlign(seqA, seqB *obiseq.BioSequence,
gap float64, fastAlign bool, delta int, fastScoreRel bool,
gap, scale float64, fastAlign bool, delta int, fastScoreRel bool,
arena PEAlignArena) (int, []int, int, int, float64) {
var score, shift int
var startA, startB int
@ -403,7 +403,7 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
qualSeqB = seqB.Qualities()[0:partLen]
extra3 = seqB.Len() - partLen
score = _FillMatrixPeLeftAlign(
rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap,
rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, scale,
&arena.pointer.scoreMatrix,
&arena.pointer.pathMatrix)
} else {
@ -425,7 +425,7 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
extra3 = partLen - seqA.Len()
score = _FillMatrixPeRightAlign(
rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap,
rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, scale,
&arena.pointer.scoreMatrix,
&arena.pointer.pathMatrix)
}
@ -482,7 +482,7 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
qualSeqB = seqB.Qualities()
scoreR := _FillMatrixPeRightAlign(
rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap,
rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, scale,
&arena.pointer.scoreMatrix,
&arena.pointer.pathMatrix)
@ -491,7 +491,7 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
&arena.pointer.path)
scoreL := _FillMatrixPeLeftAlign(
rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap,
rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, scale,
&arena.pointer.scoreMatrix,
&arena.pointer.pathMatrix)

View File

@ -10,11 +10,12 @@ var _ForwardFile = ""
var _ReverseFile = ""
var _Delta = 5
var _MinOverlap = 20
var _GapPenality = float64(2.0)
var _GapPenality = 2.0
var _WithoutStats = false
var _MinIdentity = 0.9
var _NoFastAlign = false
var _FastScoreAbs = false
var _PenalityScale = 1.0
func PairingOptionSet(options *getoptions.GetOpt) {
options.StringVar(&_ForwardFile, "forward-reads", "",
@ -38,6 +39,8 @@ func PairingOptionSet(options *getoptions.GetOpt) {
options.Float64Var(&_GapPenality, "gap-penality", _GapPenality,
options.Alias("G"),
options.Description("Gap penality expressed as the multiply factor applied to the mismatch score between two nucleotides with a quality of 40 (default 2)."))
options.Float64Var(&_PenalityScale, "penality-scale", _PenalityScale,
options.Description("Scale factor applied to the mismatch score and the gap penality (default 1)."))
options.BoolVar(&_WithoutStats, "without-stat", _WithoutStats,
options.Alias("S"),
options.Description("Remove alignment statistics from the produced consensus sequences."))
@ -85,6 +88,10 @@ func CLIGapPenality() float64 {
return _GapPenality
}
func CLIPenalityScale() float64 {
return _PenalityScale
}
func CLIWithStats() bool {
return !_WithoutStats
}

View File

@ -106,11 +106,14 @@ func JoinPairedSequence(seqA, seqB *obiseq.BioSequence, inplace bool) *obiseq.Bi
// An obiseq.BioSequence corresponding to the assembling of the both
// input sequence.
func AssemblePESequences(seqA, seqB *obiseq.BioSequence,
gap float64, delta, minOverlap int, minIdentity float64, withStats bool,
gap, scale float64, delta, minOverlap int, minIdentity float64, withStats bool,
inplace bool, fastAlign, fastModeRel bool,
arenaAlign obialign.PEAlignArena) *obiseq.BioSequence {
score, path, fastcount, over, fastscore := obialign.PEAlign(seqA, seqB, gap, fastAlign, delta, fastModeRel, arenaAlign)
score, path, fastcount, over, fastscore := obialign.PEAlign(seqA, seqB,
gap, scale,
fastAlign, delta, fastModeRel,
arenaAlign)
cons, match := obialign.BuildQualityConsensus(seqA, seqB, path, true)
left := path[0]
@ -210,7 +213,7 @@ func AssemblePESequences(seqA, seqB *obiseq.BioSequence,
// The function returns an iterator over batches of obiseq.Biosequence object.
// each pair of processed sequences produces one sequence in the result iterator.
func IAssemblePESequencesBatch(iterator obiiter.IBioSequence,
gap float64, delta, minOverlap int,
gap, scale float64, delta, minOverlap int,
minIdentity float64, fastAlign, fastModeRel,
withStats bool, sizes ...int) obiiter.IBioSequence {
@ -241,7 +244,9 @@ func IAssemblePESequencesBatch(iterator obiiter.IBioSequence,
cons := make(obiseq.BioSequenceSlice, len(batch.Slice()))
for i, A := range batch.Slice() {
B := A.PairedWith()
cons[i] = AssemblePESequences(A, B.ReverseComplement(true), gap, delta, minOverlap, minIdentity, withStats, true, fastAlign, fastModeRel, arena)
cons[i] = AssemblePESequences(A, B.ReverseComplement(true),
gap, scale,
delta, minOverlap, minIdentity, withStats, true, fastAlign, fastModeRel, arena)
}
newIter.Push(obiiter.MakeBioSequenceBatch(
batch.Order(),

View File

@ -13,7 +13,7 @@ import (
)
func IPCRTagPESequencesBatch(iterator obiiter.IBioSequence,
gap float64, delta, minOverlap int,
gap, scale float64, delta, minOverlap int,
minIdentity float64, fastAlign, fastScoreRel,
withStats bool) obiiter.IBioSequence {
@ -50,7 +50,8 @@ func IPCRTagPESequencesBatch(iterator obiiter.IBioSequence,
B := A.PairedWith()
consensus := obipairing.AssemblePESequences(
A.Copy(), B.ReverseComplement(false),
gap, delta, minOverlap, minIdentity, withStats, true,
gap, scale,
delta, minOverlap, minIdentity, withStats, true,
fastAlign, fastScoreRel, arena,
)