mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Adds the option --pairing-scale to obipairing
Former-commit-id: c60416708467f5e818e70e08b3c512014b6212f0
This commit is contained in:
@ -41,6 +41,7 @@ func main() {
|
||||
|
||||
paired := obipairing.IAssemblePESequencesBatch(pairs,
|
||||
obipairing.CLIGapPenality(),
|
||||
obipairing.CLIPenalityScale(),
|
||||
obipairing.CLIDelta(),
|
||||
obipairing.CLIMinOverlap(),
|
||||
obipairing.CLIMinIdentity(),
|
||||
|
@ -44,6 +44,7 @@ func main() {
|
||||
|
||||
paired := obitagpcr.IPCRTagPESequencesBatch(pairs,
|
||||
obipairing.CLIGapPenality(),
|
||||
obipairing.CLIPenalityScale(),
|
||||
obipairing.CLIDelta(),
|
||||
obipairing.CLIMinOverlap(),
|
||||
obipairing.CLIMinIdentity(),
|
||||
|
@ -111,17 +111,17 @@ func _GetMatrixFrom(matrix *[]int, lenA, a, b int) (int, int, int) {
|
||||
return m[i_left], m[i_diag], m[i_top]
|
||||
}
|
||||
|
||||
func _PairingScorePeAlign(baseA, qualA, baseB, qualB byte) int {
|
||||
func _PairingScorePeAlign(baseA, qualA, baseB, qualB byte, scale float64) int {
|
||||
partMatch := _NucPartMatch[baseA&31][baseB&31]
|
||||
// log.Printf("id : %f A : %s %d B : %s %d\n", part_match, string(baseA), qualA, string(baseB), qualB)
|
||||
switch int(partMatch * 100) {
|
||||
case 100:
|
||||
return _NucScorePartMatchMatch[qualA][qualB]
|
||||
case 0:
|
||||
return _NucScorePartMatchMismatch[qualA][qualB]
|
||||
return int(float64(_NucScorePartMatchMismatch[qualA][qualB])*scale + 0.5)
|
||||
default:
|
||||
return int(partMatch*float64(_NucScorePartMatchMatch[qualA][qualB]) +
|
||||
(1-partMatch)*float64(_NucScorePartMatchMismatch[qualA][qualB]) +
|
||||
(1-partMatch)*float64(_NucScorePartMatchMismatch[qualA][qualB])*scale +
|
||||
0.5)
|
||||
}
|
||||
}
|
||||
@ -135,7 +135,7 @@ func _PairingScorePeAlign(baseA, qualA, baseB, qualB byte) int {
|
||||
// - 0 : for diagonal
|
||||
// - -1 : for top
|
||||
// - +1 : for left
|
||||
func _FillMatrixPeLeftAlign(seqA, qualA, seqB, qualB []byte, gap float64,
|
||||
func _FillMatrixPeLeftAlign(seqA, qualA, seqB, qualB []byte, gap, scale float64,
|
||||
scoreMatrix, pathMatrix *[]int) int {
|
||||
|
||||
la := len(seqA)
|
||||
@ -143,7 +143,7 @@ func _FillMatrixPeLeftAlign(seqA, qualA, seqB, qualB []byte, gap float64,
|
||||
|
||||
// The actual gap score is the gap score times the mismatch between
|
||||
// two bases with a score of 40
|
||||
gapPenalty := int(gap * float64(_NucScorePartMatchMismatch[40][40]))
|
||||
gapPenalty := int(scale*gap*float64(_NucScorePartMatchMismatch[40][40]) + 0.5)
|
||||
|
||||
needed := (la + 1) * (lb + 1)
|
||||
|
||||
@ -177,7 +177,7 @@ func _FillMatrixPeLeftAlign(seqA, qualA, seqB, qualB []byte, gap float64,
|
||||
left, diag, top := _GetMatrixFrom(scoreMatrix, la, i, j)
|
||||
// log.Infof("LA: i : %d j : %d left : %d diag : %d top : %d\n", i, j, left, diag, top)
|
||||
|
||||
diag += _PairingScorePeAlign(seqA[i], qualA[i], seqB[j], qualB[j])
|
||||
diag += _PairingScorePeAlign(seqA[i], qualA[i], seqB[j], qualB[j], scale)
|
||||
left += gapPenalty
|
||||
top += gapPenalty
|
||||
|
||||
@ -195,7 +195,7 @@ func _FillMatrixPeLeftAlign(seqA, qualA, seqB, qualB []byte, gap float64,
|
||||
// Special case for the last line Left gap are free
|
||||
|
||||
left, diag, top := _GetMatrixFrom(scoreMatrix, la, la1, j)
|
||||
diag += _PairingScorePeAlign(seqA[la1], qualA[la1], seqB[j], qualB[j])
|
||||
diag += _PairingScorePeAlign(seqA[la1], qualA[la1], seqB[j], qualB[j], scale)
|
||||
top += gapPenalty
|
||||
|
||||
switch {
|
||||
@ -218,7 +218,7 @@ func _FillMatrixPeLeftAlign(seqA, qualA, seqB, qualB []byte, gap float64,
|
||||
// With A spanning over lines and B over columns
|
||||
// - First line gap = 0
|
||||
// - Last column gaps = 0
|
||||
func _FillMatrixPeRightAlign(seqA, qualA, seqB, qualB []byte, gap float64,
|
||||
func _FillMatrixPeRightAlign(seqA, qualA, seqB, qualB []byte, gap, scale float64,
|
||||
scoreMatrix, pathMatrix *[]int) int {
|
||||
|
||||
la := len(seqA)
|
||||
@ -226,7 +226,7 @@ func _FillMatrixPeRightAlign(seqA, qualA, seqB, qualB []byte, gap float64,
|
||||
|
||||
// The actual gap score is the gap score times the mismatch between
|
||||
// two bases with a score of 40
|
||||
gapPenalty := int(gap * float64(_NucScorePartMatchMismatch[40][40]))
|
||||
gapPenalty := int(scale*gap*float64(_NucScorePartMatchMismatch[40][40]) + 0.5)
|
||||
|
||||
needed := (la + 1) * (lb + 1)
|
||||
|
||||
@ -259,7 +259,7 @@ func _FillMatrixPeRightAlign(seqA, qualA, seqB, qualB []byte, gap float64,
|
||||
for i := 0; i < la; i++ {
|
||||
left, diag, top := _GetMatrixFrom(scoreMatrix, la, i, j)
|
||||
|
||||
diag += _PairingScorePeAlign(seqA[i], qualA[i], seqB[j], qualB[j])
|
||||
diag += _PairingScorePeAlign(seqA[i], qualA[i], seqB[j], qualB[j], scale)
|
||||
left += gapPenalty
|
||||
top += gapPenalty
|
||||
|
||||
@ -283,7 +283,7 @@ func _FillMatrixPeRightAlign(seqA, qualA, seqB, qualB []byte, gap float64,
|
||||
for i := 0; i < la; i++ {
|
||||
|
||||
left, diag, top := _GetMatrixFrom(scoreMatrix, la, i, lb1)
|
||||
diag += _PairingScorePeAlign(seqA[i], qualA[i], seqB[lb1], qualB[lb1])
|
||||
diag += _PairingScorePeAlign(seqA[i], qualA[i], seqB[lb1], qualB[lb1], scale)
|
||||
left += gapPenalty
|
||||
|
||||
// log.Infof("LR: i : %d j : %d left : %d diag : %d top : %d [%d]\n", i, lb1, left, diag, top, _GetMatrix(scoreMatrix, la, i, lb1))
|
||||
@ -302,7 +302,7 @@ func _FillMatrixPeRightAlign(seqA, qualA, seqB, qualB []byte, gap float64,
|
||||
|
||||
}
|
||||
|
||||
func PELeftAlign(seqA, seqB *obiseq.BioSequence, gap float64,
|
||||
func PELeftAlign(seqA, seqB *obiseq.BioSequence, gap, scale float64,
|
||||
arena PEAlignArena) (int, []int) {
|
||||
|
||||
if !_InitializedDnaScore {
|
||||
@ -315,7 +315,7 @@ func PELeftAlign(seqA, seqB *obiseq.BioSequence, gap float64,
|
||||
}
|
||||
|
||||
score := _FillMatrixPeLeftAlign(seqA.Sequence(), seqA.Qualities(),
|
||||
seqB.Sequence(), seqB.Qualities(), gap,
|
||||
seqB.Sequence(), seqB.Qualities(), gap, scale,
|
||||
&arena.pointer.scoreMatrix,
|
||||
&arena.pointer.pathMatrix)
|
||||
|
||||
@ -326,7 +326,7 @@ func PELeftAlign(seqA, seqB *obiseq.BioSequence, gap float64,
|
||||
return score, arena.pointer.path
|
||||
}
|
||||
|
||||
func PERightAlign(seqA, seqB *obiseq.BioSequence, gap float64,
|
||||
func PERightAlign(seqA, seqB *obiseq.BioSequence, gap, scale float64,
|
||||
arena PEAlignArena) (int, []int) {
|
||||
|
||||
if !_InitializedDnaScore {
|
||||
@ -339,7 +339,7 @@ func PERightAlign(seqA, seqB *obiseq.BioSequence, gap float64,
|
||||
}
|
||||
|
||||
score := _FillMatrixPeRightAlign(seqA.Sequence(), seqA.Qualities(),
|
||||
seqB.Sequence(), seqB.Qualities(), gap,
|
||||
seqB.Sequence(), seqB.Qualities(), gap, scale,
|
||||
&arena.pointer.scoreMatrix,
|
||||
&arena.pointer.pathMatrix)
|
||||
|
||||
@ -351,7 +351,7 @@ func PERightAlign(seqA, seqB *obiseq.BioSequence, gap float64,
|
||||
}
|
||||
|
||||
func PEAlign(seqA, seqB *obiseq.BioSequence,
|
||||
gap float64, fastAlign bool, delta int, fastScoreRel bool,
|
||||
gap, scale float64, fastAlign bool, delta int, fastScoreRel bool,
|
||||
arena PEAlignArena) (int, []int, int, int, float64) {
|
||||
var score, shift int
|
||||
var startA, startB int
|
||||
@ -403,7 +403,7 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
|
||||
qualSeqB = seqB.Qualities()[0:partLen]
|
||||
extra3 = seqB.Len() - partLen
|
||||
score = _FillMatrixPeLeftAlign(
|
||||
rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap,
|
||||
rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, scale,
|
||||
&arena.pointer.scoreMatrix,
|
||||
&arena.pointer.pathMatrix)
|
||||
} else {
|
||||
@ -425,7 +425,7 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
|
||||
extra3 = partLen - seqA.Len()
|
||||
|
||||
score = _FillMatrixPeRightAlign(
|
||||
rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap,
|
||||
rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, scale,
|
||||
&arena.pointer.scoreMatrix,
|
||||
&arena.pointer.pathMatrix)
|
||||
}
|
||||
@ -482,7 +482,7 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
|
||||
qualSeqB = seqB.Qualities()
|
||||
|
||||
scoreR := _FillMatrixPeRightAlign(
|
||||
rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap,
|
||||
rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, scale,
|
||||
&arena.pointer.scoreMatrix,
|
||||
&arena.pointer.pathMatrix)
|
||||
|
||||
@ -491,7 +491,7 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
|
||||
&arena.pointer.path)
|
||||
|
||||
scoreL := _FillMatrixPeLeftAlign(
|
||||
rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap,
|
||||
rawSeqA, qualSeqA, rawSeqB, qualSeqB, gap, scale,
|
||||
&arena.pointer.scoreMatrix,
|
||||
&arena.pointer.pathMatrix)
|
||||
|
||||
|
@ -10,11 +10,12 @@ var _ForwardFile = ""
|
||||
var _ReverseFile = ""
|
||||
var _Delta = 5
|
||||
var _MinOverlap = 20
|
||||
var _GapPenality = float64(2.0)
|
||||
var _GapPenality = 2.0
|
||||
var _WithoutStats = false
|
||||
var _MinIdentity = 0.9
|
||||
var _NoFastAlign = false
|
||||
var _FastScoreAbs = false
|
||||
var _PenalityScale = 1.0
|
||||
|
||||
func PairingOptionSet(options *getoptions.GetOpt) {
|
||||
options.StringVar(&_ForwardFile, "forward-reads", "",
|
||||
@ -38,6 +39,8 @@ func PairingOptionSet(options *getoptions.GetOpt) {
|
||||
options.Float64Var(&_GapPenality, "gap-penality", _GapPenality,
|
||||
options.Alias("G"),
|
||||
options.Description("Gap penality expressed as the multiply factor applied to the mismatch score between two nucleotides with a quality of 40 (default 2)."))
|
||||
options.Float64Var(&_PenalityScale, "penality-scale", _PenalityScale,
|
||||
options.Description("Scale factor applied to the mismatch score and the gap penality (default 1)."))
|
||||
options.BoolVar(&_WithoutStats, "without-stat", _WithoutStats,
|
||||
options.Alias("S"),
|
||||
options.Description("Remove alignment statistics from the produced consensus sequences."))
|
||||
@ -85,6 +88,10 @@ func CLIGapPenality() float64 {
|
||||
return _GapPenality
|
||||
}
|
||||
|
||||
func CLIPenalityScale() float64 {
|
||||
return _PenalityScale
|
||||
}
|
||||
|
||||
func CLIWithStats() bool {
|
||||
return !_WithoutStats
|
||||
}
|
||||
|
@ -106,11 +106,14 @@ func JoinPairedSequence(seqA, seqB *obiseq.BioSequence, inplace bool) *obiseq.Bi
|
||||
// An obiseq.BioSequence corresponding to the assembling of the both
|
||||
// input sequence.
|
||||
func AssemblePESequences(seqA, seqB *obiseq.BioSequence,
|
||||
gap float64, delta, minOverlap int, minIdentity float64, withStats bool,
|
||||
gap, scale float64, delta, minOverlap int, minIdentity float64, withStats bool,
|
||||
inplace bool, fastAlign, fastModeRel bool,
|
||||
arenaAlign obialign.PEAlignArena) *obiseq.BioSequence {
|
||||
|
||||
score, path, fastcount, over, fastscore := obialign.PEAlign(seqA, seqB, gap, fastAlign, delta, fastModeRel, arenaAlign)
|
||||
score, path, fastcount, over, fastscore := obialign.PEAlign(seqA, seqB,
|
||||
gap, scale,
|
||||
fastAlign, delta, fastModeRel,
|
||||
arenaAlign)
|
||||
cons, match := obialign.BuildQualityConsensus(seqA, seqB, path, true)
|
||||
|
||||
left := path[0]
|
||||
@ -210,7 +213,7 @@ func AssemblePESequences(seqA, seqB *obiseq.BioSequence,
|
||||
// The function returns an iterator over batches of obiseq.Biosequence object.
|
||||
// each pair of processed sequences produces one sequence in the result iterator.
|
||||
func IAssemblePESequencesBatch(iterator obiiter.IBioSequence,
|
||||
gap float64, delta, minOverlap int,
|
||||
gap, scale float64, delta, minOverlap int,
|
||||
minIdentity float64, fastAlign, fastModeRel,
|
||||
withStats bool, sizes ...int) obiiter.IBioSequence {
|
||||
|
||||
@ -241,7 +244,9 @@ func IAssemblePESequencesBatch(iterator obiiter.IBioSequence,
|
||||
cons := make(obiseq.BioSequenceSlice, len(batch.Slice()))
|
||||
for i, A := range batch.Slice() {
|
||||
B := A.PairedWith()
|
||||
cons[i] = AssemblePESequences(A, B.ReverseComplement(true), gap, delta, minOverlap, minIdentity, withStats, true, fastAlign, fastModeRel, arena)
|
||||
cons[i] = AssemblePESequences(A, B.ReverseComplement(true),
|
||||
gap, scale,
|
||||
delta, minOverlap, minIdentity, withStats, true, fastAlign, fastModeRel, arena)
|
||||
}
|
||||
newIter.Push(obiiter.MakeBioSequenceBatch(
|
||||
batch.Order(),
|
||||
|
@ -13,7 +13,7 @@ import (
|
||||
)
|
||||
|
||||
func IPCRTagPESequencesBatch(iterator obiiter.IBioSequence,
|
||||
gap float64, delta, minOverlap int,
|
||||
gap, scale float64, delta, minOverlap int,
|
||||
minIdentity float64, fastAlign, fastScoreRel,
|
||||
withStats bool) obiiter.IBioSequence {
|
||||
|
||||
@ -50,7 +50,8 @@ func IPCRTagPESequencesBatch(iterator obiiter.IBioSequence,
|
||||
B := A.PairedWith()
|
||||
consensus := obipairing.AssemblePESequences(
|
||||
A.Copy(), B.ReverseComplement(false),
|
||||
gap, delta, minOverlap, minIdentity, withStats, true,
|
||||
gap, scale,
|
||||
delta, minOverlap, minIdentity, withStats, true,
|
||||
fastAlign, fastScoreRel, arena,
|
||||
)
|
||||
|
||||
|
Reference in New Issue
Block a user