Stabilize the obipairing output. whant run twice on the same dataset results are identical

This commit is contained in:
2023-02-05 11:06:31 +01:00
parent a6af0abdd8
commit 4117cbdd08
3 changed files with 33 additions and 22 deletions

View File

@ -322,9 +322,13 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
}
extra5 = -startA
startB = 0
rawSeqA = seqA.Sequence()[startA:]
qualSeqA = seqA.Qualities()[startA:]
partLen = len(rawSeqA)
if partLen > seqB.Len() {
partLen = seqB.Len()
}
rawSeqB = seqB.Sequence()[0:partLen]
qualSeqB = seqB.Qualities()[0:partLen]
extra3 = seqB.Len() - partLen
@ -343,6 +347,9 @@ func PEAlign(seqA, seqB *obiseq.BioSequence,
rawSeqB = seqB.Sequence()[startB:]
qualSeqB = seqB.Qualities()[startB:]
partLen = len(rawSeqB)
if partLen > seqA.Len() {
partLen = seqA.Len()
}
rawSeqA = seqA.Sequence()[:partLen]
qualSeqA = seqA.Qualities()[:partLen]
extra3 = partLen - seqA.Len()

View File

@ -122,6 +122,10 @@ func FastShiftFourMer(index [][]int, seq *obiseq.BioSequence, buffer *[]byte) (i
if count > maxcount {
maxshift = shift
maxcount = count
} else {
if count == maxcount && shift < maxshift {
maxshift = shift
}
}
}

View File

@ -121,13 +121,14 @@ func AssemblePESequences(seqA, seqB *obiseq.BioSequence,
lcons := cons.Len()
aliLength := lcons - _Abs(left) - _Abs(right)
identity := float64(match) / float64(aliLength)
if aliLength == 0 {
identity = 0
}
annot := cons.Annotations()
if aliLength >= minOverlap && identity >= minIdentity {
annot["mode"] = "alignment"
if withStats {
annot := cons.Annotations()
annot["mode"] = "alignment"
annot["score"] = score
if left < 0 {
annot["seq_a_single"] = -left
annot["ali_dir"] = "left"
@ -142,29 +143,28 @@ func AssemblePESequences(seqA, seqB *obiseq.BioSequence,
} else {
annot["seq_b_single"] = right
}
scoreNorm := float64(0)
if aliLength > 0 {
scoreNorm = math.Round(float64(match)/float64(aliLength)*1000) / 1000
}
annot["ali_length"] = aliLength
annot["seq_ab_match"] = match
annot["score_norm"] = scoreNorm
if inplace {
seqA.Recycle()
seqB.Recycle()
}
}
if inplace {
seqA.Recycle()
seqB.Recycle()
}
} else {
cons = JoinPairedSequence(seqA, seqB, inplace)
annot = cons.Annotations()
annot["mode"] = "join"
}
if withStats {
annot := cons.Annotations()
annot["mode"] = "join"
if withStats {
annot["score"] = score
scoreNorm := float64(0)
if aliLength > 0 {
scoreNorm = math.Round(float64(match)/float64(aliLength)*1000) / 1000
} else {
scoreNorm = 0
}
annot["ali_length"] = aliLength
annot["seq_ab_match"] = match
annot["score_norm"] = scoreNorm
}
return cons