Adds the option --pairing-scale to obipairing

Former-commit-id: c60416708467f5e818e70e08b3c512014b6212f0
This commit is contained in:
2023-12-07 12:28:16 +01:00
parent 008f33aee4
commit 37c3e16d5d
6 changed files with 42 additions and 27 deletions

View File

@ -41,6 +41,7 @@ func main() {
paired := obipairing.IAssemblePESequencesBatch(pairs, paired := obipairing.IAssemblePESequencesBatch(pairs,
obipairing.CLIGapPenality(), obipairing.CLIGapPenality(),
obipairing.CLIPenalityScale(),
obipairing.CLIDelta(), obipairing.CLIDelta(),
obipairing.CLIMinOverlap(), obipairing.CLIMinOverlap(),
obipairing.CLIMinIdentity(), obipairing.CLIMinIdentity(),

View File

@ -44,6 +44,7 @@ func main() {
paired := obitagpcr.IPCRTagPESequencesBatch(pairs, paired := obitagpcr.IPCRTagPESequencesBatch(pairs,
obipairing.CLIGapPenality(), obipairing.CLIGapPenality(),
obipairing.CLIPenalityScale(),
obipairing.CLIDelta(), obipairing.CLIDelta(),
obipairing.CLIMinOverlap(), obipairing.CLIMinOverlap(),
obipairing.CLIMinIdentity(), obipairing.CLIMinIdentity(),

View File

@ -111,17 +111,17 @@ func _GetMatrixFrom(matrix *[]int, lenA, a, b int) (int, int, int) {
return m[i_left], m[i_diag], m[i_top] return m[i_left], m[i_diag], m[i_top]
} }
func _PairingScorePeAlign(baseA, qualA, baseB, qualB byte) int { func _PairingScorePeAlign(baseA, qualA, baseB, qualB byte, scale float64) int {
partMatch := _NucPartMatch[baseA&31][baseB&31] partMatch := _NucPartMatch[baseA&31][baseB&31]
// log.Printf("id : %f A : %s %d B : %s %d\n", part_match, string(baseA), qualA, string(baseB), qualB) // log.Printf("id : %f A : %s %d B : %s %d\n", part_match, string(baseA), qualA, string(baseB), qualB)
switch int(partMatch * 100) { switch int(partMatch * 100) {
case 100: case 100:
return _NucScorePartMatchMatch[qualA][qualB] return _NucScorePartMatchMatch[qualA][qualB]
case 0: case 0:
return _NucScorePartMatchMismatch[qualA][qualB] return int(float64(_NucScorePartMatchMismatch[qualA][qualB])*scale + 0.5)
default: default:
return int(partMatch*float64(_NucScorePartMatchMatch[qualA][qualB]) + return int(partMatch*float64(_NucScorePartMatchMatch[qualA][qualB]) +
(1-partMatch)*float64(_NucScorePartMatchMismatch[qualA][qualB]) + (1-partMatch)*float64(_NucScorePartMatchMismatch[qualA][qualB])*scale +
0.5) 0.5)
} }
} }
@ -135,7 +135,7 @@ func _PairingScorePeAlign(baseA, qualA, baseB, qualB byte) int {
// - 0 : for diagonal // - 0 : for diagonal
// - -1 : for top // - -1 : for top
// - +1 : for left // - +1 : for left
func _FillMatrixPeLeftAlign(seqA, qualA, seqB, qualB []byte, gap float64, func _FillMatrixPeLeftAlign(seqA, qualA, seqB, qualB []byte, gap, scale float64,
scoreMatrix, pathMatrix *[]int) int { scoreMatrix, pathMatrix *[]int) int {
la := len(seqA) la := len(seqA)
@ -143,7 +143,7 @@ func _FillMatrixPeLeftAlign(seqA, qualA, seqB, qualB []byte, gap float64,
// The actual gap score is the gap score times the mismatch between // The actual gap score is the gap score times the mismatch between
// two bases with a score of 40 // two bases with a score of 40
gapPenalty := int(gap * float64(_NucScorePartMatchMismatch[40][40])) gapPenalty := int(scale*gap*float64(_NucScorePartMatchMismatch[40][40]) + 0.5)
needed := (la + 1) * (lb + 1) needed := (la + 1) * (lb + 1)
@ -177,7 +177,7 @@ func _FillMatrixPeLeftAlign(seqA, qualA, seqB, qualB []byte, gap float64,
left, diag, top := _GetMatrixFrom(scoreMatrix, la, i, j) left, diag, top := _GetMatrixFrom(scoreMatrix, la, i, j)
// log.Infof("LA: i : %d j : %d left : %d diag : %d top : %d\n", i, j, left, diag, top) // log.Infof("LA: i : %d j : %d left : %d diag : %d top : %d\n", i, j, left, diag, top)
diag += _PairingScorePeAlign(seqA[i], qualA[i], seqB[j], qualB[j]) diag += _PairingScorePeAlign(seqA[i], qualA[i], seqB[j], qualB[j], scale)
left += gapPenalty left += gapPenalty
top += gapPenalty top += gapPenalty
@ -195,7 +195,7 @@ func _FillMatrixPeLeftAlign(seqA, qualA, seqB, qualB []byte, gap float64,
// Special case for the last line Left gap are free // Special case for the last line Left gap are free
left, diag, top := _GetMatrixFrom(scoreMatrix, la, la1, j) left, diag, top := _GetMatrixFrom(scoreMatrix, la, la1, j)
diag += _PairingScorePeAlign(seqA[la1], qualA[la1], seqB[j], qualB[j]) diag += _PairingScorePeAlign(seqA[la1], qualA[la1], seqB[j], qualB[j], scale)
top += gapPenalty top += gapPenalty
switch { switch {
@ -218,7 +218,7 @@ func _FillMatrixPeLeftAlign(seqA, qualA, seqB, qualB []byte, gap float64,
// With A spanning over lines and B over columns // With A spanning over lines and B over columns
// - First line gap = 0 // - First line gap = 0
// - Last column gaps = 0 // - Last column gaps = 0
func _FillMatrixPeRightAlign(seqA, qualA, seqB, qualB []byte, gap float64, func _FillMatrixPeRightAlign(seqA, qualA, seqB, qualB []byte, gap, scale float64,
scoreMatrix, pathMatrix *[]int) int { scoreMatrix, pathMatrix *[]int) int {
la := len(seqA) la := len(seqA)
@ -226,7 +226,7 @@ func _FillMatrixPeRightAlign(seqA, qualA, seqB, qualB []byte, gap float64,
// The actual gap score is the gap score times the mismatch between // The actual gap score is the gap score times the mismatch between
// two bases with a score of 40 // two bases with a score of 40
gapPenalty := int(gap * float64(_NucScorePartMatchMismatch[40][40])) gapPenalty := int(scale*gap*float64(_NucScorePartMatchMismatch[40][40]) + 0.5)
needed := (la + 1) * (lb + 1) needed := (la + 1) * (lb + 1)
@ -259,7 +259,7 @@ func _FillMatrixPeRightAlign(seqA, qualA, seqB, qualB []byte, gap float64,
for i := 0; i < la; i++ { for i := 0; i < la; i++ {
left, diag, top := _GetMatrixFrom(scoreMatrix, la, i, j) left, diag, top := _GetMatrixFrom(scoreMatrix, la, i, j)
diag += _PairingScorePeAlign(seqA[i], qualA[i], seqB[j], qualB[j]) diag += _PairingScorePeAlign(seqA[i], qualA[i], seqB[j], qualB[j], scale)
left += gapPenalty left += gapPenalty
top += gapPenalty top += gapPenalty
@ -283,7 +283,7 @@ func _FillMatrixPeRightAlign(seqA, qualA, seqB, qualB []byte, gap float64,
for i := 0; i < la; i++ { for i := 0; i < la; i++ {
left, diag, top := _GetMatrixFrom(scoreMatrix, la, i, lb1) left, diag, top := _GetMatrixFrom(scoreMatrix, la, i, lb1)
diag += _PairingScorePeAlign(seqA[i], qualA[i], seqB[lb1], qualB[lb1]) diag += _PairingScorePeAlign(seqA[i], qualA[i], seqB[lb1], qualB[lb1], scale)
left += gapPenalty left += gapPenalty
// log.Infof("LR: i : %d j : %d left : %d diag : %d top : %d [%d]\n", i, lb1, left, diag, top, _GetMatrix(scoreMatrix, la, i, lb1)) // log.Infof("LR: i : %d j : %d left : %d diag : %d top : %d [%d]\n", i, lb1, left, diag, top, _GetMatrix(scoreMatrix, la, i, lb1))
@ -302,7 +302,7 @@ func _FillMatrixPeRightAlign(seqA, qualA, seqB, qualB []byte, gap float64,
} }
func PELeftAlign(seqA, seqB *obiseq.BioSequence, gap float64, func PELeftAlign(seqA, seqB *obiseq.BioSequence, gap, scale float64,
arena PEAlignArena) (int, []int) { arena PEAlignArena) (int, []int) {
if !_InitializedDnaScore { if !_InitializedDnaScore {
@ -315,7 +315,7 @@ func PELeftAlign(seqA, seqB *obiseq.BioSequence, gap float64,
} }
score := _FillMatrixPeLeftAlign(seqA.Sequence(), seqA.Qualities(), score := _FillMatrixPeLeftAlign(seqA.Sequence(), seqA.Qualities(),
seqB.Sequence(), seqB.Qualities(), gap, seqB.Sequence(), seqB.Qualities(), gap, scale,
&arena.pointer.scoreMatrix, &arena.pointer.scoreMatrix,
&arena.pointer.pathMatrix) &arena.pointer.pathMatrix)
@ -326,7 +326,7 @@ func PELeftAlign(seqA, seqB *obiseq.BioSequence, gap float64,
return score, arena.pointer.path return score, arena.pointer.path
} }
func PERightAlign(seqA, seqB *obiseq.BioSequence, gap float64, func PERightAlign(seqA, seqB *obiseq.BioSequence, gap, scale float64,
arena PEAlignArena) (int, []int) { arena PEAlignArena) (int, []int) {
if !_InitializedDnaScore { if !_InitializedDnaScore {
@ -339,7 +339,7 @@ func PERightAlign(seqA, seqB *obiseq.BioSequence, gap float64,
} }
score := _FillMatrixPeRightAlign(seqA.Sequence(), seqA.Qualities(), score := _FillMatrixPeRightAlign(seqA.Sequence(), seqA.Qualities(),
seqB.Sequence(), seqB.Qualities(), gap, seqB.Sequence(), seqB.Qualities(), gap, scale,
&arena.pointer.scoreMatrix, &arena.pointer.scoreMatrix,
&arena.pointer.pathMatrix) &arena.pointer.pathMatrix)
@ -351,7 +351,7 @@ func PERightAlign(seqA, seqB *obiseq.BioSequence, gap float64,
} }
func PEAlign(seqA, seqB *obiseq.BioSequence, func PEAlign(seqA, seqB *obiseq.BioSequence,
gap float64, fastAlign bool, delta int, fastScoreRel bool, gap, scale float64, fastAlign bool, delta int, fastScoreRel bool,
arena PEAlignArena) (int, []int, int, int, float64) { arena PEAlignArena) (int, []int, int, int, float64) {
var score, shift int var score, shift int
var startA, startB int var startA, startB int
@ -403,7 +403,7 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
qualSeqB = seqB.Qualities()[0:partLen] qualSeqB = seqB.Qualities()[0:partLen]
extra3 = seqB.Len() - partLen extra3 = seqB.Len() - partLen
score = _FillMatrixPeLeftAlign( score = _FillMatrixPeLeftAlign(
rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, scale,
&arena.pointer.scoreMatrix, &arena.pointer.scoreMatrix,
&arena.pointer.pathMatrix) &arena.pointer.pathMatrix)
} else { } else {
@ -425,7 +425,7 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
extra3 = partLen - seqA.Len() extra3 = partLen - seqA.Len()
score = _FillMatrixPeRightAlign( score = _FillMatrixPeRightAlign(
rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, scale,
&arena.pointer.scoreMatrix, &arena.pointer.scoreMatrix,
&arena.pointer.pathMatrix) &arena.pointer.pathMatrix)
} }
@ -482,7 +482,7 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
qualSeqB = seqB.Qualities() qualSeqB = seqB.Qualities()
scoreR := _FillMatrixPeRightAlign( scoreR := _FillMatrixPeRightAlign(
rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, scale,
&arena.pointer.scoreMatrix, &arena.pointer.scoreMatrix,
&arena.pointer.pathMatrix) &arena.pointer.pathMatrix)
@ -491,7 +491,7 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
&arena.pointer.path) &arena.pointer.path)
scoreL := _FillMatrixPeLeftAlign( scoreL := _FillMatrixPeLeftAlign(
rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, scale,
&arena.pointer.scoreMatrix, &arena.pointer.scoreMatrix,
&arena.pointer.pathMatrix) &arena.pointer.pathMatrix)

View File

@ -10,11 +10,12 @@ var _ForwardFile = ""
var _ReverseFile = "" var _ReverseFile = ""
var _Delta = 5 var _Delta = 5
var _MinOverlap = 20 var _MinOverlap = 20
var _GapPenality = float64(2.0) var _GapPenality = 2.0
var _WithoutStats = false var _WithoutStats = false
var _MinIdentity = 0.9 var _MinIdentity = 0.9
var _NoFastAlign = false var _NoFastAlign = false
var _FastScoreAbs = false var _FastScoreAbs = false
var _PenalityScale = 1.0
func PairingOptionSet(options *getoptions.GetOpt) { func PairingOptionSet(options *getoptions.GetOpt) {
options.StringVar(&_ForwardFile, "forward-reads", "", options.StringVar(&_ForwardFile, "forward-reads", "",
@ -38,6 +39,8 @@ func PairingOptionSet(options *getoptions.GetOpt) {
options.Float64Var(&_GapPenality, "gap-penality", _GapPenality, options.Float64Var(&_GapPenality, "gap-penality", _GapPenality,
options.Alias("G"), options.Alias("G"),
options.Description("Gap penality expressed as the multiply factor applied to the mismatch score between two nucleotides with a quality of 40 (default 2).")) options.Description("Gap penality expressed as the multiply factor applied to the mismatch score between two nucleotides with a quality of 40 (default 2)."))
options.Float64Var(&_PenalityScale, "penality-scale", _PenalityScale,
options.Description("Scale factor applied to the mismatch score and the gap penality (default 1)."))
options.BoolVar(&_WithoutStats, "without-stat", _WithoutStats, options.BoolVar(&_WithoutStats, "without-stat", _WithoutStats,
options.Alias("S"), options.Alias("S"),
options.Description("Remove alignment statistics from the produced consensus sequences.")) options.Description("Remove alignment statistics from the produced consensus sequences."))
@ -85,6 +88,10 @@ func CLIGapPenality() float64 {
return _GapPenality return _GapPenality
} }
func CLIPenalityScale() float64 {
return _PenalityScale
}
func CLIWithStats() bool { func CLIWithStats() bool {
return !_WithoutStats return !_WithoutStats
} }

View File

@ -106,11 +106,14 @@ func JoinPairedSequence(seqA, seqB *obiseq.BioSequence, inplace bool) *obiseq.Bi
// An obiseq.BioSequence corresponding to the assembling of the both // An obiseq.BioSequence corresponding to the assembling of the both
// input sequence. // input sequence.
func AssemblePESequences(seqA, seqB *obiseq.BioSequence, func AssemblePESequences(seqA, seqB *obiseq.BioSequence,
gap float64, delta, minOverlap int, minIdentity float64, withStats bool, gap, scale float64, delta, minOverlap int, minIdentity float64, withStats bool,
inplace bool, fastAlign, fastModeRel bool, inplace bool, fastAlign, fastModeRel bool,
arenaAlign obialign.PEAlignArena) *obiseq.BioSequence { arenaAlign obialign.PEAlignArena) *obiseq.BioSequence {
score, path, fastcount, over, fastscore := obialign.PEAlign(seqA, seqB, gap, fastAlign, delta, fastModeRel, arenaAlign) score, path, fastcount, over, fastscore := obialign.PEAlign(seqA, seqB,
gap, scale,
fastAlign, delta, fastModeRel,
arenaAlign)
cons, match := obialign.BuildQualityConsensus(seqA, seqB, path, true) cons, match := obialign.BuildQualityConsensus(seqA, seqB, path, true)
left := path[0] left := path[0]
@ -210,7 +213,7 @@ func AssemblePESequences(seqA, seqB *obiseq.BioSequence,
// The function returns an iterator over batches of obiseq.Biosequence object. // The function returns an iterator over batches of obiseq.Biosequence object.
// each pair of processed sequences produces one sequence in the result iterator. // each pair of processed sequences produces one sequence in the result iterator.
func IAssemblePESequencesBatch(iterator obiiter.IBioSequence, func IAssemblePESequencesBatch(iterator obiiter.IBioSequence,
gap float64, delta, minOverlap int, gap, scale float64, delta, minOverlap int,
minIdentity float64, fastAlign, fastModeRel, minIdentity float64, fastAlign, fastModeRel,
withStats bool, sizes ...int) obiiter.IBioSequence { withStats bool, sizes ...int) obiiter.IBioSequence {
@ -241,7 +244,9 @@ func IAssemblePESequencesBatch(iterator obiiter.IBioSequence,
cons := make(obiseq.BioSequenceSlice, len(batch.Slice())) cons := make(obiseq.BioSequenceSlice, len(batch.Slice()))
for i, A := range batch.Slice() { for i, A := range batch.Slice() {
B := A.PairedWith() B := A.PairedWith()
cons[i] = AssemblePESequences(A, B.ReverseComplement(true), gap, delta, minOverlap, minIdentity, withStats, true, fastAlign, fastModeRel, arena) cons[i] = AssemblePESequences(A, B.ReverseComplement(true),
gap, scale,
delta, minOverlap, minIdentity, withStats, true, fastAlign, fastModeRel, arena)
} }
newIter.Push(obiiter.MakeBioSequenceBatch( newIter.Push(obiiter.MakeBioSequenceBatch(
batch.Order(), batch.Order(),

View File

@ -13,7 +13,7 @@ import (
) )
func IPCRTagPESequencesBatch(iterator obiiter.IBioSequence, func IPCRTagPESequencesBatch(iterator obiiter.IBioSequence,
gap float64, delta, minOverlap int, gap, scale float64, delta, minOverlap int,
minIdentity float64, fastAlign, fastScoreRel, minIdentity float64, fastAlign, fastScoreRel,
withStats bool) obiiter.IBioSequence { withStats bool) obiiter.IBioSequence {
@ -50,7 +50,8 @@ func IPCRTagPESequencesBatch(iterator obiiter.IBioSequence,
B := A.PairedWith() B := A.PairedWith()
consensus := obipairing.AssemblePESequences( consensus := obipairing.AssemblePESequences(
A.Copy(), B.ReverseComplement(false), A.Copy(), B.ReverseComplement(false),
gap, delta, minOverlap, minIdentity, withStats, true, gap, scale,
delta, minOverlap, minIdentity, withStats, true,
fastAlign, fastScoreRel, arena, fastAlign, fastScoreRel, arena,
) )