From f8df48338d5d69e75d6514d33abe2f3d7afbd2f5 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Wed, 12 Oct 2022 23:01:47 +0200 Subject: [PATCH] Several bug in annotation management --- pkg/obialign/alignment.go | 44 +++++++++++++++++++++++++++++- pkg/obiapat/pcr.go | 4 +-- pkg/obingslibrary/match.go | 2 ++ pkg/obingslibrary/worker.go | 2 -- pkg/obiseq/biosequence.go | 14 ++++++++++ pkg/obiseq/pool.go | 4 +-- pkg/obiseq/revcomp.go | 40 +++++++++++++++++++++++++-- pkg/obiseq/subseq.go | 25 +++++++++++++++-- pkg/obitools/obipairing/pairing.go | 2 +- 9 files changed, 124 insertions(+), 13 deletions(-) diff --git a/pkg/obialign/alignment.go b/pkg/obialign/alignment.go index 0b1bc80..13b6714 100644 --- a/pkg/obialign/alignment.go +++ b/pkg/obialign/alignment.go @@ -5,7 +5,9 @@ package obialign import ( + "fmt" "math" + "strings" "sync" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" @@ -113,7 +115,7 @@ func BuildAlignment(seqA, seqB *obiseq.BioSequence, // In that case arenas will be allocated by the function but, they will not // be reusable for other alignments and desallocated at the BuildQualityConsensus // return. -func BuildQualityConsensus(seqA, seqB *obiseq.BioSequence, path []int) (*obiseq.BioSequence, int) { +func BuildQualityConsensus(seqA, seqB *obiseq.BioSequence, path []int, statOnMismatch bool) (*obiseq.BioSequence, int) { bufferSA := obiseq.GetSlice(seqA.Length()) bufferSB := obiseq.GetSlice(seqB.Length()) @@ -142,6 +144,8 @@ func BuildQualityConsensus(seqA, seqB *obiseq.BioSequence, path []int) (*obiseq. var qM, qm byte var i int + mismatches := make(map[string]int) + match := 0 for i, qA = range bufferQA { @@ -149,6 +153,10 @@ func BuildQualityConsensus(seqA, seqB *obiseq.BioSequence, path []int) (*obiseq. nB := bufferSB[i] qB = bufferQB[i] + if statOnMismatch && nA != nB && nA != ' ' && nB != ' ' { + mismatches[strings.ToUpper(fmt.Sprintf("(%c:%02d)->(%c:%02d)", nA, qA, nB, qB))] = i + 1 + } + if qA > qB { qM = qA qm = qB @@ -188,5 +196,39 @@ func BuildQualityConsensus(seqA, seqB *obiseq.BioSequence, path []int) (*obiseq. ) consSeq.SetQualities(bufferQA) + if statOnMismatch && len(mismatches) > 0 { + consSeq.SetAttribute("pairing_mismatches", mismatches) + } + return consSeq, match } + +// func BuildCigar(seqA, seqB *obiseq.BioSequence, path []int) string { + +// lp := len(path) + +// posA := 0 +// posB := 0 +// oldStep := ' ' +// kstep := ' ' +// for i := 0; i < lp; i++ { +// step := path[i] + +// if step < 0 { +// kstep='D' +// posA -= step +// } +// if step > 0 { +// kstep='I' +// posB += step +// } + +// i++ +// step = path[i] +// if step > 0 { +// kstep = 'M' +// posA += step +// posB += step +// } +// } +// } diff --git a/pkg/obiapat/pcr.go b/pkg/obiapat/pcr.go index 847c808..b6b498e 100644 --- a/pkg/obiapat/pcr.go +++ b/pkg/obiapat/pcr.go @@ -275,7 +275,7 @@ func _Pcr(seq ApatSequence, (opt.MaxLength() == 0 || length <= opt.MaxLength()) { amplicon, _ := sequence.Subsequence(fm[1], rm[0], opt.pointer.circular) annot := amplicon.Annotations() - goutils.CopyMap(annot, sequence.Annotations()) + goutils.MustFillMap(annot, sequence.Annotations()) annot["forward_primer"] = forward.String() match, _ := sequence.Subsequence(fm[0], fm[1], opt.pointer.circular) @@ -350,7 +350,7 @@ func _Pcr(seq ApatSequence, amplicon = amplicon.ReverseComplement(true) annot := amplicon.Annotations() - goutils.CopyMap(annot, sequence.Annotations()) + goutils.MustFillMap(annot, sequence.Annotations()) annot["forward_primer"] = forward.String() match, _ := sequence.Subsequence(rm[0], rm[1], opt.pointer.circular) diff --git a/pkg/obingslibrary/match.go b/pkg/obingslibrary/match.go index d188308..1ee2590 100644 --- a/pkg/obingslibrary/match.go +++ b/pkg/obingslibrary/match.go @@ -53,6 +53,7 @@ func (library *NGSLibrary) Match(sequence *obiseq.BioSequence) *DemultiplexMatch func (library *NGSLibrary) ExtractBarcode(sequence *obiseq.BioSequence, inplace bool) (*obiseq.BioSequence, error) { match := library.Match(sequence) + return match.ExtractBarcode(sequence, inplace) } @@ -245,6 +246,7 @@ func (match *DemultiplexMatch) ExtractBarcode(sequence *obiseq.BioSequence, inpl } } + if !match.IsDirect { sequence.ReverseComplement(true) } diff --git a/pkg/obingslibrary/worker.go b/pkg/obingslibrary/worker.go index dd6ecf0..515f9df 100644 --- a/pkg/obingslibrary/worker.go +++ b/pkg/obingslibrary/worker.go @@ -182,5 +182,3 @@ func ExtractBarcodeSliceWorker(ngslibrary NGSLibrary, return worker } - - diff --git a/pkg/obiseq/biosequence.go b/pkg/obiseq/biosequence.go index 8392374..0855e21 100644 --- a/pkg/obiseq/biosequence.go +++ b/pkg/obiseq/biosequence.go @@ -257,6 +257,20 @@ func (s *BioSequence) GetBool(key string) (bool, bool) { return val, ok } +func (s *BioSequence) GetIntMap(key string) (map[string]int, bool) { + var val map[string]int + var err error + + v, ok := s.GetAttribute(key) + + if ok { + val, err = goutils.InterfaceToIntMap(v) + ok = err == nil + } + + return val, ok +} + // Returning the MD5 hash of the sequence. func (s *BioSequence) MD5() [16]byte { return md5.Sum(s.sequence) diff --git a/pkg/obiseq/pool.go b/pkg/obiseq/pool.go index 5db9c95..6bbcb09 100644 --- a/pkg/obiseq/pool.go +++ b/pkg/obiseq/pool.go @@ -41,7 +41,7 @@ func GetSlice(capacity int) []byte { func CopySlice(src []byte) []byte { sl := GetSlice(len(src)) - copy(sl,src) + copy(sl, src) return sl } @@ -69,7 +69,7 @@ func GetAnnotation(values ...Annotation) Annotation { } if len(values) > 0 { - goutils.CopyMap(a, values[0]) + goutils.MustFillMap(a, values[0]) } return a diff --git a/pkg/obiseq/revcomp.go b/pkg/obiseq/revcomp.go index e44b987..190d1c0 100644 --- a/pkg/obiseq/revcomp.go +++ b/pkg/obiseq/revcomp.go @@ -1,7 +1,7 @@ package obiseq // ".ABCDEFGHIJKLMNOPQRSTUVWXYZ#![]" -var __revcmp_dna__ = []byte(".TVGHEFCDIJMLKNOPQYSAABWXRZ#!][") +var _revcmpDNA = []byte(".TVGHEFCDIJMLKNOPQYSAABWXRZ#!][") // Reverse complements a DNA sequence. // If the inplace parametter is true, that operation is done in place. @@ -15,8 +15,11 @@ func (sequence *BioSequence) ReverseComplement(inplace bool) *BioSequence { for i, j := sequence.Length()-1, 0; i >= j; i-- { - s[j], s[i] = __revcmp_dna__[s[i]&31]|(s[i]&0x20), - __revcmp_dna__[s[j]&31]|(s[j]&0x20) + // ASCII code & 31 -> builds an index in witch (a|A) is 1 + // ASCII code & 0x20 -> Foce lower case + + s[j], s[i] = _revcmpDNA[s[i]&31]|(s[i]&0x20), + _revcmpDNA[s[j]&31]|(s[j]&0x20) j++ } @@ -28,5 +31,36 @@ func (sequence *BioSequence) ReverseComplement(inplace bool) *BioSequence { } } + return sequence._revcmpMutation() +} + +func (sequence *BioSequence) _revcmpMutation() *BioSequence { + + rev := func(m string) string { + b := []byte(m) + + // Echange and reverse complement symboles + b[1], b[9] = _revcmpDNA[b[9]&31]|(b[9]&0x20), + _revcmpDNA[b[1]&31]|(b[1]&0x20) + + // Exchange sequencing scores + b[3], b[4], b[11], b[12] = b[11], b[12], b[3], b[4] + + return string(b) + } + + lseq := sequence.Length() + + mut, ok := sequence.GetIntMap("pairing_mismatches") + if ok && len(mut) > 0 { + cmut := make(map[string]int, len(mut)) + + for m, p := range mut { + cmut[rev(m)] = lseq - p + 1 + } + + sequence.SetAttribute("pairing_mismatches", cmut) + } + return sequence } diff --git a/pkg/obiseq/subseq.go b/pkg/obiseq/subseq.go index f7bd68d..4ba1265 100644 --- a/pkg/obiseq/subseq.go +++ b/pkg/obiseq/subseq.go @@ -43,9 +43,30 @@ func (sequence *BioSequence) Subsequence(from, to int, circular bool) (*BioSeque } - if len(sequence.Annotations()) > 0 { + if sequence.HasAnnotation() { newSeq.annotations = GetAnnotation(sequence.Annotations()) } - return newSeq, nil + return newSeq._subseqMutation(from), nil +} + +func (sequence *BioSequence) _subseqMutation(shift int) *BioSequence { + + lseq := sequence.Length() + + mut, ok := sequence.GetIntMap("pairing_mismatches") + if ok && len(mut) > 0 { + cmut := make(map[string]int, len(mut)) + + for m, p := range mut { + if p < lseq { + cmut[m] = p - shift + } + } + + sequence.SetAttribute("pairing_mismatches", cmut) + } + + return sequence + } diff --git a/pkg/obitools/obipairing/pairing.go b/pkg/obitools/obipairing/pairing.go index a061256..aad96f7 100644 --- a/pkg/obitools/obipairing/pairing.go +++ b/pkg/obitools/obipairing/pairing.go @@ -110,7 +110,7 @@ func AssemblePESequences(seqA, seqB *obiseq.BioSequence, arenaAlign obialign.PEAlignArena) *obiseq.BioSequence { score, path := obialign.PEAlign(seqA, seqB, gap, delta, arenaAlign) - cons, match := obialign.BuildQualityConsensus(seqA, seqB, path) + cons, match := obialign.BuildQualityConsensus(seqA, seqB, path,true) left := path[0] right := 0