Several bug in annotation management

This commit is contained in:
2022-10-12 23:01:47 +02:00
parent aae3398701
commit f8df48338d
9 changed files with 124 additions and 13 deletions

View File

@ -257,6 +257,20 @@ func (s *BioSequence) GetBool(key string) (bool, bool) {
return val, ok
}
func (s *BioSequence) GetIntMap(key string) (map[string]int, bool) {
var val map[string]int
var err error
v, ok := s.GetAttribute(key)
if ok {
val, err = goutils.InterfaceToIntMap(v)
ok = err == nil
}
return val, ok
}
// Returning the MD5 hash of the sequence.
func (s *BioSequence) MD5() [16]byte {
return md5.Sum(s.sequence)

View File

@ -41,7 +41,7 @@ func GetSlice(capacity int) []byte {
func CopySlice(src []byte) []byte {
sl := GetSlice(len(src))
copy(sl,src)
copy(sl, src)
return sl
}
@ -69,7 +69,7 @@ func GetAnnotation(values ...Annotation) Annotation {
}
if len(values) > 0 {
goutils.CopyMap(a, values[0])
goutils.MustFillMap(a, values[0])
}
return a

View File

@ -1,7 +1,7 @@
package obiseq
// ".ABCDEFGHIJKLMNOPQRSTUVWXYZ#![]"
var __revcmp_dna__ = []byte(".TVGHEFCDIJMLKNOPQYSAABWXRZ#!][")
var _revcmpDNA = []byte(".TVGHEFCDIJMLKNOPQYSAABWXRZ#!][")
// Reverse complements a DNA sequence.
// If the inplace parametter is true, that operation is done in place.
@ -15,8 +15,11 @@ func (sequence *BioSequence) ReverseComplement(inplace bool) *BioSequence {
for i, j := sequence.Length()-1, 0; i >= j; i-- {
s[j], s[i] = __revcmp_dna__[s[i]&31]|(s[i]&0x20),
__revcmp_dna__[s[j]&31]|(s[j]&0x20)
// ASCII code & 31 -> builds an index in witch (a|A) is 1
// ASCII code & 0x20 -> Foce lower case
s[j], s[i] = _revcmpDNA[s[i]&31]|(s[i]&0x20),
_revcmpDNA[s[j]&31]|(s[j]&0x20)
j++
}
@ -28,5 +31,36 @@ func (sequence *BioSequence) ReverseComplement(inplace bool) *BioSequence {
}
}
return sequence._revcmpMutation()
}
func (sequence *BioSequence) _revcmpMutation() *BioSequence {
rev := func(m string) string {
b := []byte(m)
// Echange and reverse complement symboles
b[1], b[9] = _revcmpDNA[b[9]&31]|(b[9]&0x20),
_revcmpDNA[b[1]&31]|(b[1]&0x20)
// Exchange sequencing scores
b[3], b[4], b[11], b[12] = b[11], b[12], b[3], b[4]
return string(b)
}
lseq := sequence.Length()
mut, ok := sequence.GetIntMap("pairing_mismatches")
if ok && len(mut) > 0 {
cmut := make(map[string]int, len(mut))
for m, p := range mut {
cmut[rev(m)] = lseq - p + 1
}
sequence.SetAttribute("pairing_mismatches", cmut)
}
return sequence
}

View File

@ -43,9 +43,30 @@ func (sequence *BioSequence) Subsequence(from, to int, circular bool) (*BioSeque
}
if len(sequence.Annotations()) > 0 {
if sequence.HasAnnotation() {
newSeq.annotations = GetAnnotation(sequence.Annotations())
}
return newSeq, nil
return newSeq._subseqMutation(from), nil
}
func (sequence *BioSequence) _subseqMutation(shift int) *BioSequence {
lseq := sequence.Length()
mut, ok := sequence.GetIntMap("pairing_mismatches")
if ok && len(mut) > 0 {
cmut := make(map[string]int, len(mut))
for m, p := range mut {
if p < lseq {
cmut[m] = p - shift
}
}
sequence.SetAttribute("pairing_mismatches", cmut)
}
return sequence
}