mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Several bug in annotation management
This commit is contained in:
@ -5,7 +5,9 @@
|
||||
package obialign
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||
@ -113,7 +115,7 @@ func BuildAlignment(seqA, seqB *obiseq.BioSequence,
|
||||
// In that case arenas will be allocated by the function but, they will not
|
||||
// be reusable for other alignments and desallocated at the BuildQualityConsensus
|
||||
// return.
|
||||
func BuildQualityConsensus(seqA, seqB *obiseq.BioSequence, path []int) (*obiseq.BioSequence, int) {
|
||||
func BuildQualityConsensus(seqA, seqB *obiseq.BioSequence, path []int, statOnMismatch bool) (*obiseq.BioSequence, int) {
|
||||
|
||||
bufferSA := obiseq.GetSlice(seqA.Length())
|
||||
bufferSB := obiseq.GetSlice(seqB.Length())
|
||||
@ -142,6 +144,8 @@ func BuildQualityConsensus(seqA, seqB *obiseq.BioSequence, path []int) (*obiseq.
|
||||
var qM, qm byte
|
||||
var i int
|
||||
|
||||
mismatches := make(map[string]int)
|
||||
|
||||
match := 0
|
||||
|
||||
for i, qA = range bufferQA {
|
||||
@ -149,6 +153,10 @@ func BuildQualityConsensus(seqA, seqB *obiseq.BioSequence, path []int) (*obiseq.
|
||||
nB := bufferSB[i]
|
||||
qB = bufferQB[i]
|
||||
|
||||
if statOnMismatch && nA != nB && nA != ' ' && nB != ' ' {
|
||||
mismatches[strings.ToUpper(fmt.Sprintf("(%c:%02d)->(%c:%02d)", nA, qA, nB, qB))] = i + 1
|
||||
}
|
||||
|
||||
if qA > qB {
|
||||
qM = qA
|
||||
qm = qB
|
||||
@ -188,5 +196,39 @@ func BuildQualityConsensus(seqA, seqB *obiseq.BioSequence, path []int) (*obiseq.
|
||||
)
|
||||
consSeq.SetQualities(bufferQA)
|
||||
|
||||
if statOnMismatch && len(mismatches) > 0 {
|
||||
consSeq.SetAttribute("pairing_mismatches", mismatches)
|
||||
}
|
||||
|
||||
return consSeq, match
|
||||
}
|
||||
|
||||
// func BuildCigar(seqA, seqB *obiseq.BioSequence, path []int) string {
|
||||
|
||||
// lp := len(path)
|
||||
|
||||
// posA := 0
|
||||
// posB := 0
|
||||
// oldStep := ' '
|
||||
// kstep := ' '
|
||||
// for i := 0; i < lp; i++ {
|
||||
// step := path[i]
|
||||
|
||||
// if step < 0 {
|
||||
// kstep='D'
|
||||
// posA -= step
|
||||
// }
|
||||
// if step > 0 {
|
||||
// kstep='I'
|
||||
// posB += step
|
||||
// }
|
||||
|
||||
// i++
|
||||
// step = path[i]
|
||||
// if step > 0 {
|
||||
// kstep = 'M'
|
||||
// posA += step
|
||||
// posB += step
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
@ -275,7 +275,7 @@ func _Pcr(seq ApatSequence,
|
||||
(opt.MaxLength() == 0 || length <= opt.MaxLength()) {
|
||||
amplicon, _ := sequence.Subsequence(fm[1], rm[0], opt.pointer.circular)
|
||||
annot := amplicon.Annotations()
|
||||
goutils.CopyMap(annot, sequence.Annotations())
|
||||
goutils.MustFillMap(annot, sequence.Annotations())
|
||||
annot["forward_primer"] = forward.String()
|
||||
|
||||
match, _ := sequence.Subsequence(fm[0], fm[1], opt.pointer.circular)
|
||||
@ -350,7 +350,7 @@ func _Pcr(seq ApatSequence,
|
||||
amplicon = amplicon.ReverseComplement(true)
|
||||
|
||||
annot := amplicon.Annotations()
|
||||
goutils.CopyMap(annot, sequence.Annotations())
|
||||
goutils.MustFillMap(annot, sequence.Annotations())
|
||||
annot["forward_primer"] = forward.String()
|
||||
|
||||
match, _ := sequence.Subsequence(rm[0], rm[1], opt.pointer.circular)
|
||||
|
@ -53,6 +53,7 @@ func (library *NGSLibrary) Match(sequence *obiseq.BioSequence) *DemultiplexMatch
|
||||
|
||||
func (library *NGSLibrary) ExtractBarcode(sequence *obiseq.BioSequence, inplace bool) (*obiseq.BioSequence, error) {
|
||||
match := library.Match(sequence)
|
||||
|
||||
return match.ExtractBarcode(sequence, inplace)
|
||||
}
|
||||
|
||||
@ -245,6 +246,7 @@ func (match *DemultiplexMatch) ExtractBarcode(sequence *obiseq.BioSequence, inpl
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if !match.IsDirect {
|
||||
sequence.ReverseComplement(true)
|
||||
}
|
||||
|
@ -182,5 +182,3 @@ func ExtractBarcodeSliceWorker(ngslibrary NGSLibrary,
|
||||
|
||||
return worker
|
||||
}
|
||||
|
||||
|
||||
|
@ -257,6 +257,20 @@ func (s *BioSequence) GetBool(key string) (bool, bool) {
|
||||
return val, ok
|
||||
}
|
||||
|
||||
func (s *BioSequence) GetIntMap(key string) (map[string]int, bool) {
|
||||
var val map[string]int
|
||||
var err error
|
||||
|
||||
v, ok := s.GetAttribute(key)
|
||||
|
||||
if ok {
|
||||
val, err = goutils.InterfaceToIntMap(v)
|
||||
ok = err == nil
|
||||
}
|
||||
|
||||
return val, ok
|
||||
}
|
||||
|
||||
// Returning the MD5 hash of the sequence.
|
||||
func (s *BioSequence) MD5() [16]byte {
|
||||
return md5.Sum(s.sequence)
|
||||
|
@ -41,7 +41,7 @@ func GetSlice(capacity int) []byte {
|
||||
|
||||
func CopySlice(src []byte) []byte {
|
||||
sl := GetSlice(len(src))
|
||||
copy(sl,src)
|
||||
copy(sl, src)
|
||||
return sl
|
||||
}
|
||||
|
||||
@ -69,7 +69,7 @@ func GetAnnotation(values ...Annotation) Annotation {
|
||||
}
|
||||
|
||||
if len(values) > 0 {
|
||||
goutils.CopyMap(a, values[0])
|
||||
goutils.MustFillMap(a, values[0])
|
||||
}
|
||||
|
||||
return a
|
||||
|
@ -1,7 +1,7 @@
|
||||
package obiseq
|
||||
|
||||
// ".ABCDEFGHIJKLMNOPQRSTUVWXYZ#![]"
|
||||
var __revcmp_dna__ = []byte(".TVGHEFCDIJMLKNOPQYSAABWXRZ#!][")
|
||||
var _revcmpDNA = []byte(".TVGHEFCDIJMLKNOPQYSAABWXRZ#!][")
|
||||
|
||||
// Reverse complements a DNA sequence.
|
||||
// If the inplace parametter is true, that operation is done in place.
|
||||
@ -15,8 +15,11 @@ func (sequence *BioSequence) ReverseComplement(inplace bool) *BioSequence {
|
||||
|
||||
for i, j := sequence.Length()-1, 0; i >= j; i-- {
|
||||
|
||||
s[j], s[i] = __revcmp_dna__[s[i]&31]|(s[i]&0x20),
|
||||
__revcmp_dna__[s[j]&31]|(s[j]&0x20)
|
||||
// ASCII code & 31 -> builds an index in witch (a|A) is 1
|
||||
// ASCII code & 0x20 -> Foce lower case
|
||||
|
||||
s[j], s[i] = _revcmpDNA[s[i]&31]|(s[i]&0x20),
|
||||
_revcmpDNA[s[j]&31]|(s[j]&0x20)
|
||||
j++
|
||||
}
|
||||
|
||||
@ -28,5 +31,36 @@ func (sequence *BioSequence) ReverseComplement(inplace bool) *BioSequence {
|
||||
}
|
||||
}
|
||||
|
||||
return sequence._revcmpMutation()
|
||||
}
|
||||
|
||||
func (sequence *BioSequence) _revcmpMutation() *BioSequence {
|
||||
|
||||
rev := func(m string) string {
|
||||
b := []byte(m)
|
||||
|
||||
// Echange and reverse complement symboles
|
||||
b[1], b[9] = _revcmpDNA[b[9]&31]|(b[9]&0x20),
|
||||
_revcmpDNA[b[1]&31]|(b[1]&0x20)
|
||||
|
||||
// Exchange sequencing scores
|
||||
b[3], b[4], b[11], b[12] = b[11], b[12], b[3], b[4]
|
||||
|
||||
return string(b)
|
||||
}
|
||||
|
||||
lseq := sequence.Length()
|
||||
|
||||
mut, ok := sequence.GetIntMap("pairing_mismatches")
|
||||
if ok && len(mut) > 0 {
|
||||
cmut := make(map[string]int, len(mut))
|
||||
|
||||
for m, p := range mut {
|
||||
cmut[rev(m)] = lseq - p + 1
|
||||
}
|
||||
|
||||
sequence.SetAttribute("pairing_mismatches", cmut)
|
||||
}
|
||||
|
||||
return sequence
|
||||
}
|
||||
|
@ -43,9 +43,30 @@ func (sequence *BioSequence) Subsequence(from, to int, circular bool) (*BioSeque
|
||||
|
||||
}
|
||||
|
||||
if len(sequence.Annotations()) > 0 {
|
||||
if sequence.HasAnnotation() {
|
||||
newSeq.annotations = GetAnnotation(sequence.Annotations())
|
||||
}
|
||||
|
||||
return newSeq, nil
|
||||
return newSeq._subseqMutation(from), nil
|
||||
}
|
||||
|
||||
func (sequence *BioSequence) _subseqMutation(shift int) *BioSequence {
|
||||
|
||||
lseq := sequence.Length()
|
||||
|
||||
mut, ok := sequence.GetIntMap("pairing_mismatches")
|
||||
if ok && len(mut) > 0 {
|
||||
cmut := make(map[string]int, len(mut))
|
||||
|
||||
for m, p := range mut {
|
||||
if p < lseq {
|
||||
cmut[m] = p - shift
|
||||
}
|
||||
}
|
||||
|
||||
sequence.SetAttribute("pairing_mismatches", cmut)
|
||||
}
|
||||
|
||||
return sequence
|
||||
|
||||
}
|
||||
|
@ -110,7 +110,7 @@ func AssemblePESequences(seqA, seqB *obiseq.BioSequence,
|
||||
arenaAlign obialign.PEAlignArena) *obiseq.BioSequence {
|
||||
|
||||
score, path := obialign.PEAlign(seqA, seqB, gap, delta, arenaAlign)
|
||||
cons, match := obialign.BuildQualityConsensus(seqA, seqB, path)
|
||||
cons, match := obialign.BuildQualityConsensus(seqA, seqB, path,true)
|
||||
|
||||
left := path[0]
|
||||
right := 0
|
||||
|
Reference in New Issue
Block a user