mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
debug of obimultiplex
Former-commit-id: 1cf276840feb9d6135d96bd1bf63627d3085ae6e
This commit is contained in:
@ -52,10 +52,12 @@ func _samenuc(a, b byte) bool {
|
||||
// Returns:
|
||||
// - The score of the LCS.
|
||||
// - The length of the LCS.
|
||||
func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[]uint64) (int, int) {
|
||||
func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[]uint64) (int, int, int) {
|
||||
|
||||
lA := len(bA)
|
||||
lB := len(bB)
|
||||
end := 0
|
||||
pend := 0
|
||||
|
||||
// Ensure that A is the longest
|
||||
if lA < lB {
|
||||
@ -75,7 +77,7 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
|
||||
|
||||
// The difference of length is larger the maximum allowed errors
|
||||
if delta > maxError {
|
||||
return -1, -1
|
||||
return -1, -1, -1
|
||||
}
|
||||
|
||||
// // BEGINNING OF DEBUG CODE //
|
||||
@ -121,7 +123,7 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
|
||||
}
|
||||
previous[extra+even-1] = encodeValues(0, 1, false) // Initialise cell 1,0
|
||||
|
||||
N := lB + ((delta) >> 1)
|
||||
N := lB + (delta >> 1)
|
||||
|
||||
// log.Debugln("N = ", N, " delta = ", delta, " extra = ", extra, " maxError = ", maxError)
|
||||
|
||||
@ -147,6 +149,7 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
|
||||
switch {
|
||||
|
||||
case i == 0:
|
||||
// We are setting the gaps of the first row
|
||||
Sup = _notavail
|
||||
Sdiag = _notavail
|
||||
if endgapfree {
|
||||
@ -155,10 +158,12 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
|
||||
Sleft = encodeValues(0, j, false)
|
||||
}
|
||||
case j == 0:
|
||||
// We are setting the gaps of the first column
|
||||
Sup = encodeValues(0, i, false)
|
||||
Sdiag = _notavail
|
||||
Sleft = _notavail
|
||||
default:
|
||||
// We are in the middle of the matrix
|
||||
Sdiag = _incpath(previous[x])
|
||||
if _samenuc(bA[j-1], bB[i-1]) {
|
||||
Sdiag = _incscore(Sdiag)
|
||||
@ -187,6 +192,15 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
|
||||
score = Sup
|
||||
default:
|
||||
score = Sleft
|
||||
|
||||
if endgapfree && i == lB {
|
||||
_, l, o := decodeValues(Sleft)
|
||||
|
||||
if l > pend && o == false {
|
||||
pend = l
|
||||
end = j
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// I supose the bug was here
|
||||
@ -271,6 +285,14 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
|
||||
score = Sup
|
||||
default:
|
||||
score = Sleft
|
||||
if endgapfree && i == lB {
|
||||
_, l, o := decodeValues(Sleft)
|
||||
|
||||
if l > pend && o == false {
|
||||
pend = l
|
||||
end = j
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// I supose the bug was here
|
||||
@ -331,10 +353,10 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
|
||||
// // end OF DEBUG CODE //
|
||||
|
||||
if o {
|
||||
return -1, -1
|
||||
return -1, -1, -1
|
||||
}
|
||||
|
||||
return s, l
|
||||
return s, l, end
|
||||
}
|
||||
|
||||
// FastLCSEGFScore calculates the score of the longest common subsequence between two bio sequences in end-gap-free mode.
|
||||
@ -356,7 +378,7 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
|
||||
// Returns:
|
||||
// - The score of the longest common subsequence.
|
||||
// - The length of the shortest alignment corresponding to the LCS.
|
||||
func FastLCSEGFScore(seqA, seqB *obiseq.BioSequence, maxError int, buffer *[]uint64) (int, int) {
|
||||
func FastLCSEGFScore(seqA, seqB *obiseq.BioSequence, maxError int, buffer *[]uint64) (int, int, int) {
|
||||
return FastLCSEGFScoreByte(seqA.Sequence(), seqB.Sequence(), maxError, true, buffer)
|
||||
}
|
||||
|
||||
@ -380,5 +402,7 @@ func FastLCSEGFScore(seqA, seqB *obiseq.BioSequence, maxError int, buffer *[]uin
|
||||
// - The score of the longest common subsequence.
|
||||
// - The length of the shortest alignment corresponding to the LCS.
|
||||
func FastLCSScore(seqA, seqB *obiseq.BioSequence, maxError int, buffer *[]uint64) (int, int) {
|
||||
return FastLCSEGFScoreByte(seqA.Sequence(), seqB.Sequence(), maxError, false, buffer)
|
||||
score, alilen, _ := FastLCSEGFScoreByte(seqA.Sequence(), seqB.Sequence(), maxError, false, buffer)
|
||||
|
||||
return score, alilen
|
||||
}
|
||||
|
104
pkg/obialign/locatepattern.go
Normal file
104
pkg/obialign/locatepattern.go
Normal file
@ -0,0 +1,104 @@
|
||||
package obialign
|
||||
|
||||
func buffIndex(i, j, width int) int {
|
||||
return (i+1)*width + (j + 1)
|
||||
}
|
||||
func LocatePattern(pattern, sequence []byte) (int, int, int) {
|
||||
width := len(pattern) + 1
|
||||
buffsize := (len(pattern) + 1) * (len(sequence) + 1)
|
||||
buffer := make([]int, buffsize)
|
||||
path := make([]int, buffsize)
|
||||
|
||||
for j := 0; j < len(pattern); j++ {
|
||||
idx := buffIndex(-1, j, width)
|
||||
buffer[idx] = -j - 1
|
||||
path[idx] = -1
|
||||
}
|
||||
|
||||
for i := -1; i < len(sequence); i++ {
|
||||
idx := buffIndex(i, -1, width)
|
||||
buffer[idx] = 0
|
||||
path[idx] = +1
|
||||
}
|
||||
|
||||
path[0] = 0
|
||||
jmax := len(pattern) - 1
|
||||
for i := 0; i < len(sequence); i++ {
|
||||
for j := 0; j < jmax; j++ {
|
||||
match := -1
|
||||
if _samenuc(pattern[j], sequence[i]) {
|
||||
match = 0
|
||||
}
|
||||
|
||||
idx := buffIndex(i, j, width)
|
||||
|
||||
diag := buffer[buffIndex(i-1, j-1, width)] + match
|
||||
left := buffer[buffIndex(i, j-1, width)] - 1
|
||||
up := buffer[buffIndex(i-1, j, width)] - 1
|
||||
|
||||
score := max(diag, up, left)
|
||||
|
||||
buffer[idx] = score
|
||||
|
||||
switch {
|
||||
case score == left:
|
||||
path[idx] = -1
|
||||
case score == diag:
|
||||
path[idx] = 0
|
||||
case score == up:
|
||||
path[idx] = +1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for i := 0; i < len(sequence); i++ {
|
||||
idx := buffIndex(i, jmax, width)
|
||||
|
||||
match := -1
|
||||
if _samenuc(pattern[jmax], sequence[i]) {
|
||||
match = 0
|
||||
}
|
||||
|
||||
diag := buffer[buffIndex(i-1, jmax-1, width)] + match
|
||||
left := buffer[buffIndex(i, jmax-1, width)] - 1
|
||||
up := buffer[buffIndex(i-1, jmax, width)]
|
||||
|
||||
score := max(diag, up, left)
|
||||
buffer[idx] = score
|
||||
switch {
|
||||
case score == left:
|
||||
path[idx] = -1
|
||||
case score == diag:
|
||||
path[idx] = 0
|
||||
case score == up:
|
||||
path[idx] = +1
|
||||
}
|
||||
}
|
||||
|
||||
i := len(sequence) - 1
|
||||
j := jmax
|
||||
end := -1
|
||||
lali := 0
|
||||
for i > -1 && j > 0 {
|
||||
lali++
|
||||
switch path[buffIndex(i, j, width)] {
|
||||
case 0:
|
||||
j--
|
||||
if end == -1 {
|
||||
end = i
|
||||
lali = 1
|
||||
}
|
||||
i--
|
||||
case 1:
|
||||
i--
|
||||
case -1:
|
||||
j--
|
||||
if end == -1 {
|
||||
end = i
|
||||
lali = 1
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
return i, end + 1, -buffer[buffIndex(len(sequence)-1, len(pattern)-1, width)]
|
||||
}
|
@ -317,9 +317,6 @@ func (pattern ApatPattern) FindAllIndex(sequence ApatSequence, begin, length int
|
||||
func (pattern ApatPattern) BestMatch(sequence ApatSequence, begin, length int) (start int, end int, nerr int, matched bool) {
|
||||
res := pattern.FindAllIndex(sequence, begin, length)
|
||||
|
||||
sbuffer := [(int(C.MAX_PAT_LEN) + int(C.MAX_PAT_ERR) + 1) * (int(C.MAX_PAT_LEN) + 1)]uint64{}
|
||||
buffer := sbuffer[:]
|
||||
|
||||
if len(res) == 0 {
|
||||
matched = false
|
||||
return
|
||||
@ -358,18 +355,51 @@ func (pattern ApatPattern) BestMatch(sequence ApatSequence, begin, length int) (
|
||||
best[0], nerr, int(pattern.pointer.pointer.patlen),
|
||||
sequence.Len(), start, end)
|
||||
|
||||
score, lali := obialign.FastLCSEGFScoreByte(
|
||||
frg,
|
||||
(*cpattern)[0:int(pattern.pointer.pointer.patlen)],
|
||||
nerr, true, &buffer)
|
||||
from, to, score := obialign.LocatePattern((*cpattern)[0:int(pattern.pointer.pointer.patlen)], frg)
|
||||
|
||||
nerr = lali - score
|
||||
start = best[0] + int(pattern.pointer.pointer.patlen) - lali
|
||||
end = start + lali
|
||||
log.Debugln("results", score, lali, start, nerr)
|
||||
// olderr := m[2]
|
||||
|
||||
nerr = score
|
||||
start = start + from
|
||||
end = start + to
|
||||
log.Debugln("results", score, start, nerr)
|
||||
return
|
||||
}
|
||||
|
||||
func (pattern ApatPattern) FilterBestMatch(sequence ApatSequence, begin, length int) (loc [][3]int) {
|
||||
res := pattern.FindAllIndex(sequence, begin, length)
|
||||
filtered := make([][3]int, 0, len(res))
|
||||
|
||||
best := [3]int{0, 0, 10000}
|
||||
for _, m := range res {
|
||||
// log.Warnf("Current : Begin : %d End : %d Err : %d", m[0], m[1], m[2])
|
||||
// log.Warnf("Best : Begin : %d End : %d Err : %d", best[0], best[1], best[2])
|
||||
if (m[0] - m[2]) < best[1]+best[2] {
|
||||
// match are overlapping
|
||||
// log.Warnln("overlap")
|
||||
if m[2] < best[2] {
|
||||
// log.Warnln("better")
|
||||
best = m
|
||||
}
|
||||
} else if best[2] < 10000 {
|
||||
// no overlap
|
||||
// log.Warnln("no overlap")
|
||||
filtered = append(filtered, best)
|
||||
best = m
|
||||
}
|
||||
}
|
||||
|
||||
if best[2] < 10000 {
|
||||
filtered = append(filtered, best)
|
||||
}
|
||||
|
||||
if len(filtered) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
return filtered
|
||||
}
|
||||
|
||||
// tagaacaggctcctctag
|
||||
// func AllocatedApaSequences() int {
|
||||
// return int(_AllocatedApaSequences)
|
||||
@ -392,18 +422,15 @@ func (pattern ApatPattern) BestMatch(sequence ApatSequence, begin, length int) (
|
||||
// the match. Following the GO convention the end position is not included in the
|
||||
// match. The third value indicates the number of error detected for this occurrence.
|
||||
func (pattern ApatPattern) AllMatches(sequence ApatSequence, begin, length int) (loc [][3]int) {
|
||||
res := pattern.FindAllIndex(sequence, begin, length)
|
||||
|
||||
sbuffer := [(int(C.MAX_PAT_LEN) + int(C.MAX_PAT_ERR) + 1) * (int(C.MAX_PAT_LEN) + 1)]uint64{}
|
||||
buffer := sbuffer[:]
|
||||
res := pattern.FilterBestMatch(sequence, begin, length)
|
||||
|
||||
for _, m := range res {
|
||||
// Recompute the start and end position of the match
|
||||
// when the pattern allows for indels
|
||||
if m[2] > 0 && pattern.pointer.pointer.hasIndel {
|
||||
start := m[0] - m[2]
|
||||
end := m[0] + int(pattern.pointer.pointer.patlen) + m[2]
|
||||
start = max(start, 0)
|
||||
end := start + int(pattern.pointer.pointer.patlen) + 2*m[2]
|
||||
end = min(end, sequence.Len())
|
||||
// 1 << 30 = 1,073,741,824 = 1Gb
|
||||
// It's a virtual array mapping the sequence to the pattern
|
||||
@ -412,20 +439,18 @@ func (pattern ApatPattern) AllMatches(sequence ApatSequence, begin, length int)
|
||||
cpattern := (*[1 << 30]byte)(unsafe.Pointer(pattern.pointer.pointer.cpat))
|
||||
frg := sequence.pointer.reference.Sequence()[start:end]
|
||||
|
||||
score, lali := obialign.FastLCSEGFScoreByte(
|
||||
frg,
|
||||
(*cpattern)[0:int(pattern.pointer.pointer.patlen)],
|
||||
m[2], true, &buffer)
|
||||
begin, end, score := obialign.LocatePattern((*cpattern)[0:int(pattern.pointer.pointer.patlen)], frg)
|
||||
|
||||
// log.Debugf("seq[%d] : %s %d, %d", i, sequence.pointer.reference.Id(), score, lali)
|
||||
|
||||
m[2] = lali - score
|
||||
m[0] = m[0] + int(pattern.pointer.pointer.patlen) - lali
|
||||
m[1] = m[0] + lali
|
||||
// olderr := m[2]
|
||||
m[2] = score
|
||||
m[0] = start + begin
|
||||
m[1] = start + end
|
||||
// log.Warnf("seq[%d@%d:%d] %d: %s %d - %s:%s:%s", i, m[0], m[1], olderr, sequence.pointer.reference.Id(), score,
|
||||
// frg, (*cpattern)[0:int(pattern.pointer.pointer.patlen)], sequence.pointer.reference.Sequence()[m[0]:m[1]])
|
||||
}
|
||||
}
|
||||
|
||||
log.Debugf("All matches : %v", res)
|
||||
// log.Debugf("All matches : %v", res)
|
||||
|
||||
return res
|
||||
}
|
||||
|
@ -400,6 +400,58 @@ var library_parameter = map[string]func(library *obingslibrary.NGSLibrary, value
|
||||
log.Fatalln("Invalid value for @reverse_primer_error parameter")
|
||||
}
|
||||
},
|
||||
"tag_indels": func(library *obingslibrary.NGSLibrary, values ...string) {
|
||||
switch len(values) {
|
||||
case 0:
|
||||
log.Fatalln("Missing value for @tag_indels parameter")
|
||||
case 1:
|
||||
indels, err := strconv.Atoi(values[0])
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Invalid value %s for @tag_indels parameter", values[0])
|
||||
}
|
||||
library.SetTagIndels(indels)
|
||||
case 2:
|
||||
indels, err := strconv.Atoi(values[1])
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Invalid value %s for @tag_indels parameter", values[1])
|
||||
}
|
||||
|
||||
library.SetTagIndelsFor(values[0], indels)
|
||||
}
|
||||
},
|
||||
|
||||
"forward_tag_indels": func(library *obingslibrary.NGSLibrary, values ...string) {
|
||||
switch len(values) {
|
||||
case 0:
|
||||
log.Fatalln("Missing value for @forward_tag_indels parameter")
|
||||
case 1:
|
||||
indels, err := strconv.Atoi(values[0])
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Invalid value %s for @forward_tag_indels parameter", values[0])
|
||||
}
|
||||
library.SetForwardTagIndels(indels)
|
||||
default:
|
||||
log.Fatalln("Invalid value for @forward_tag_indels parameter")
|
||||
}
|
||||
},
|
||||
"reverse_tag_indels": func(library *obingslibrary.NGSLibrary, values ...string) {
|
||||
switch len(values) {
|
||||
case 0:
|
||||
log.Fatalln("Missing value for @reverse_tag_indels parameter")
|
||||
case 1:
|
||||
indels, err := strconv.Atoi(values[0])
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Invalid value %s for @reverse_tag_indels parameter", values[0])
|
||||
}
|
||||
library.SetReverseTagIndels(indels)
|
||||
default:
|
||||
log.Fatalln("Invalid value for @reverse_tag_indels parameter")
|
||||
}
|
||||
},
|
||||
"indels": func(library *obingslibrary.NGSLibrary, values ...string) {
|
||||
switch len(values) {
|
||||
case 0:
|
||||
|
@ -27,6 +27,8 @@ type Marker struct {
|
||||
Reverse_matching string
|
||||
Forward_tag_delimiter byte
|
||||
Reverse_tag_delimiter byte
|
||||
Forward_tag_indels int
|
||||
Reverse_tag_indels int
|
||||
samples map[TagPair]*PCR
|
||||
}
|
||||
|
||||
@ -335,6 +337,19 @@ func (marker *Marker) SetTagDelimiter(delim byte) {
|
||||
marker.SetReverseTagDelimiter(delim)
|
||||
}
|
||||
|
||||
func (marker *Marker) SetForwardTagIndels(indels int) {
|
||||
marker.Forward_tag_indels = indels
|
||||
}
|
||||
|
||||
func (marker *Marker) SetReverseTagIndels(indels int) {
|
||||
marker.Reverse_tag_indels = indels
|
||||
}
|
||||
|
||||
func (marker *Marker) SetTagIndels(indels int) {
|
||||
marker.SetForwardTagIndels(indels)
|
||||
marker.SetReverseTagIndels(indels)
|
||||
}
|
||||
|
||||
func (marker *Marker) SetForwardAllowedMismatches(allowed_mismatches int) {
|
||||
marker.Forward_error = allowed_mismatches
|
||||
}
|
||||
|
@ -110,6 +110,63 @@ func lookForTag(seq string, delimiter byte) string {
|
||||
return seq[begin:end]
|
||||
}
|
||||
|
||||
func lookForRescueTag(seq string, delimiter byte, taglength, border, indel int) string {
|
||||
// log.Info("lookForRescueTag")
|
||||
|
||||
i := len(seq) - 1
|
||||
|
||||
// Skip the border part not corresponding to the tag delimiter
|
||||
for i >= 0 && seq[i] != delimiter {
|
||||
i--
|
||||
}
|
||||
|
||||
delimlen := 0
|
||||
for i >= 0 && seq[i] == delimiter {
|
||||
i--
|
||||
delimlen++
|
||||
}
|
||||
|
||||
if obiutils.Abs(delimlen-border) > indel {
|
||||
return ""
|
||||
}
|
||||
|
||||
// log.Infof("delimlen: %d", delimlen)
|
||||
|
||||
end := i + 1
|
||||
|
||||
i -= taglength - indel
|
||||
|
||||
for i >= 0 && seq[i] != delimiter {
|
||||
i--
|
||||
}
|
||||
|
||||
delimlen = 0
|
||||
for i >= 0 && seq[i] == delimiter {
|
||||
i--
|
||||
delimlen++
|
||||
}
|
||||
|
||||
if obiutils.Abs(delimlen-border) > indel {
|
||||
return ""
|
||||
}
|
||||
|
||||
delimlen = min(delimlen, border)
|
||||
|
||||
// log.Infof("delimlen: %d", delimlen)
|
||||
|
||||
begin := i + delimlen + 1
|
||||
|
||||
if i < 0 || obiutils.Abs(taglength-end+begin) > indel {
|
||||
return ""
|
||||
}
|
||||
|
||||
// log.Infof("begin: %d, end: %d", begin, end)
|
||||
// log.Infof("seq: %s", seq)
|
||||
// log.Infof("seq[begin:end]: %s", seq[begin:end])
|
||||
|
||||
return seq[begin:end]
|
||||
}
|
||||
|
||||
func (marker *Marker) beginDelimitedTagExtractor(
|
||||
sequence *obiseq.BioSequence,
|
||||
begin int,
|
||||
@ -131,6 +188,33 @@ func (marker *Marker) beginDelimitedTagExtractor(
|
||||
return lookForTag(sequence.String()[fb:begin], delimiter)
|
||||
}
|
||||
|
||||
func (marker *Marker) beginRescueTagExtractor(
|
||||
sequence *obiseq.BioSequence,
|
||||
begin int,
|
||||
forward bool) string {
|
||||
|
||||
delimiter := marker.Forward_tag_delimiter
|
||||
border := marker.Forward_spacer
|
||||
taglength := marker.Forward_tag_length
|
||||
delta := marker.Forward_tag_indels
|
||||
|
||||
if !forward {
|
||||
taglength = marker.Reverse_tag_length
|
||||
border = marker.Reverse_spacer
|
||||
delimiter = marker.Reverse_tag_delimiter
|
||||
delta = marker.Reverse_tag_indels
|
||||
}
|
||||
|
||||
frglength := border + taglength
|
||||
|
||||
fb := begin - frglength*2
|
||||
if fb < 0 {
|
||||
fb = 0
|
||||
}
|
||||
|
||||
return lookForRescueTag(sequence.String()[fb:begin], delimiter, taglength, border, delta)
|
||||
}
|
||||
|
||||
func (marker *Marker) beginFixedTagExtractor(
|
||||
sequence *obiseq.BioSequence,
|
||||
begin int,
|
||||
@ -183,6 +267,43 @@ func (marker *Marker) endDelimitedTagExtractor(
|
||||
return lookForTag(tag_seq.ReverseComplement(true).String(), delimiter)
|
||||
}
|
||||
|
||||
func (marker *Marker) endRescueTagExtractor(
|
||||
sequence *obiseq.BioSequence,
|
||||
end int,
|
||||
forward bool) string {
|
||||
|
||||
delimiter := marker.Reverse_tag_delimiter
|
||||
border := marker.Reverse_spacer
|
||||
taglength := marker.Reverse_tag_length
|
||||
delta := marker.Reverse_tag_indels
|
||||
|
||||
if !forward {
|
||||
taglength = marker.Forward_tag_length
|
||||
border = marker.Forward_spacer
|
||||
delimiter = marker.Forward_tag_delimiter
|
||||
delta = marker.Forward_tag_indels
|
||||
}
|
||||
|
||||
frglength := border + taglength
|
||||
|
||||
fb := end + frglength*2
|
||||
|
||||
if fb > sequence.Len() {
|
||||
fb = sequence.Len()
|
||||
}
|
||||
|
||||
if end >= fb {
|
||||
return ""
|
||||
}
|
||||
|
||||
tag_seq, err := sequence.Subsequence(end, fb, false)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Cannot extract sequence tag : %v", err)
|
||||
}
|
||||
|
||||
return lookForRescueTag(tag_seq.ReverseComplement(true).String(), delimiter, taglength, border, delta)
|
||||
}
|
||||
func (marker *Marker) endFixedTagExtractor(
|
||||
sequence *obiseq.BioSequence,
|
||||
end int,
|
||||
@ -218,13 +339,23 @@ func (marker *Marker) beginTagExtractor(
|
||||
if marker.Forward_tag_delimiter == 0 {
|
||||
return marker.beginFixedTagExtractor(sequence, begin, forward)
|
||||
} else {
|
||||
return marker.beginDelimitedTagExtractor(sequence, begin, forward)
|
||||
if marker.Forward_tag_indels == 0 {
|
||||
return marker.beginDelimitedTagExtractor(sequence, begin, forward)
|
||||
} else {
|
||||
// log.Warn("Rescue tag for forward primers")
|
||||
return marker.beginRescueTagExtractor(sequence, begin, forward)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if marker.Reverse_tag_delimiter == 0 {
|
||||
return marker.beginFixedTagExtractor(sequence, begin, forward)
|
||||
} else {
|
||||
return marker.beginDelimitedTagExtractor(sequence, begin, forward)
|
||||
if marker.Reverse_tag_indels == 0 {
|
||||
return marker.beginDelimitedTagExtractor(sequence, begin, forward)
|
||||
} else {
|
||||
// log.Warn("Rescue tag for reverse/complement primers")
|
||||
return marker.beginRescueTagExtractor(sequence, begin, forward)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -237,13 +368,23 @@ func (marker *Marker) endTagExtractor(
|
||||
if marker.Reverse_tag_delimiter == 0 {
|
||||
return marker.endFixedTagExtractor(sequence, end, forward)
|
||||
} else {
|
||||
return marker.endDelimitedTagExtractor(sequence, end, forward)
|
||||
if marker.Reverse_tag_indels == 0 {
|
||||
return marker.endDelimitedTagExtractor(sequence, end, forward)
|
||||
} else {
|
||||
// log.Warn("Rescue tag for reverse primers")
|
||||
return marker.endRescueTagExtractor(sequence, end, forward)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if marker.Forward_tag_delimiter == 0 {
|
||||
return marker.endFixedTagExtractor(sequence, end, forward)
|
||||
} else {
|
||||
return marker.endDelimitedTagExtractor(sequence, end, forward)
|
||||
if marker.Forward_tag_indels == 0 {
|
||||
return marker.endDelimitedTagExtractor(sequence, end, forward)
|
||||
} else {
|
||||
// log.Warn("Rescue tag for forward/complement primers")
|
||||
return marker.endRescueTagExtractor(sequence, end, forward)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -264,6 +405,10 @@ func (library *NGSLibrary) TagExtractor(
|
||||
forward_tag := marker.beginTagExtractor(sequence, begin, forward)
|
||||
reverse_tag := marker.endTagExtractor(sequence, end, forward)
|
||||
|
||||
if !forward {
|
||||
forward_tag, reverse_tag = reverse_tag, forward_tag
|
||||
}
|
||||
|
||||
if forward_tag != "" {
|
||||
annotations["obimultiplex_forward_tag"] = forward_tag
|
||||
}
|
||||
@ -347,14 +492,12 @@ func (library *NGSLibrary) SampleIdentifier(
|
||||
case "hamming":
|
||||
forward, fdistance = marker.ClosestForwardTag(tags.Forward, Hamming)
|
||||
annotations["obimultiplex_forward_matching"] = "hamming"
|
||||
annotations["obimultiplex_forward_tag_dist"] = fdistance
|
||||
annotations["obimultiplex_forward_proposed_tag"] = forward
|
||||
case "indel":
|
||||
forward, fdistance = marker.ClosestForwardTag(tags.Forward, Levenshtein)
|
||||
annotations["obimultiplex_forward_matching"] = "indel"
|
||||
annotations["obimultiplex_forward_tag_dist"] = fdistance
|
||||
annotations["obimultiplex_forward_proposed_tag"] = forward
|
||||
}
|
||||
annotations["obimultiplex_forward_tag_dist"] = fdistance
|
||||
annotations["obimultiplex_forward_proposed_tag"] = forward
|
||||
}
|
||||
|
||||
if tags.Reverse != "" {
|
||||
@ -366,14 +509,12 @@ func (library *NGSLibrary) SampleIdentifier(
|
||||
case "hamming":
|
||||
reverse, rdistance = marker.ClosestReverseTag(tags.Reverse, Hamming)
|
||||
annotations["obimultiplex_reverse_matching"] = "hamming"
|
||||
annotations["obimultiplex_reverse_tag_dist"] = rdistance
|
||||
annotations["obimultiplex_reverse_proposed_tag"] = reverse
|
||||
case "indel":
|
||||
reverse, rdistance = marker.ClosestReverseTag(tags.Reverse, Levenshtein)
|
||||
annotations["obimultiplex_reverse_matching"] = "indel"
|
||||
annotations["obimultiplex_reverse_tag_dist"] = rdistance
|
||||
annotations["obimultiplex_reverse_proposed_tag"] = reverse
|
||||
}
|
||||
annotations["obimultiplex_reverse_tag_dist"] = rdistance
|
||||
annotations["obimultiplex_reverse_proposed_tag"] = reverse
|
||||
}
|
||||
|
||||
proposed := TagPair{forward, reverse}
|
||||
@ -492,18 +633,22 @@ func (library *NGSLibrary) ExtractMultiBarcode(sequence *obiseq.BioSequence) (ob
|
||||
annotations["obimultiplex_reverse_primer"] = primerseqs[from.Marker].Reverse
|
||||
|
||||
if from.Forward {
|
||||
// With have a barcode in the orientation from the forward primer to the reverse
|
||||
|
||||
// Try to extract the forward primer match
|
||||
if from.Begin < 0 || from.End > sequence.Len() {
|
||||
barcode_error = true
|
||||
annotations["obimultiplex_error"] = "Cannot extract forward match"
|
||||
annotations["obimultiplex_error"] = "Cannot extract forward primer match"
|
||||
} else {
|
||||
annotations["obimultiplex_forward_match"] = sequence.String()[from.Begin:from.End]
|
||||
}
|
||||
|
||||
// Try to extract the reverse primer match
|
||||
sseq, err := sequence.Subsequence(match.Begin, match.End, false)
|
||||
|
||||
if err != nil {
|
||||
barcode_error = true
|
||||
annotations["obimultiplex_error"] = "Cannot extract reverse match"
|
||||
annotations["obimultiplex_error"] = "Cannot extract reverse primer match"
|
||||
} else {
|
||||
annotations["obimultiplex_reverse_match"] = sseq.ReverseComplement(true).String()
|
||||
}
|
||||
@ -511,18 +656,22 @@ func (library *NGSLibrary) ExtractMultiBarcode(sequence *obiseq.BioSequence) (ob
|
||||
annotations["obimultiplex_forward_error"] = from.Mismatches
|
||||
annotations["obimultiplex_reverse_error"] = match.Mismatches
|
||||
} else {
|
||||
// With have a barcode in the orientation from the reverse primer to the forward
|
||||
|
||||
// Try to extract the reverse primer match
|
||||
if from.Begin < 0 || from.End > sequence.Len() {
|
||||
barcode_error = true
|
||||
annotations["obimultiplex_error"] = "Cannot extract reverse match"
|
||||
annotations["obimultiplex_error"] = "Cannot extract reverse primer match"
|
||||
} else {
|
||||
annotations["obimultiplex_reverse_match"] = sequence.String()[from.Begin:from.End]
|
||||
}
|
||||
|
||||
// Try to extract the forward primer match
|
||||
sseq, err := sequence.Subsequence(match.Begin, match.End, false)
|
||||
|
||||
if err != nil {
|
||||
barcode_error = true
|
||||
annotations["obimultiplex_error"] = "Cannot extract forward match"
|
||||
annotations["obimultiplex_error"] = "Cannot extract forward primer match"
|
||||
} else {
|
||||
annotations["obimultiplex_forward_match"] = sseq.ReverseComplement(true).String()
|
||||
}
|
||||
@ -531,6 +680,7 @@ func (library *NGSLibrary) ExtractMultiBarcode(sequence *obiseq.BioSequence) (ob
|
||||
annotations["obimultiplex_forward_error"] = match.Mismatches
|
||||
}
|
||||
|
||||
// if we were able to extract the primer matches we can extract the barcode
|
||||
if !barcode_error {
|
||||
tags := library.TagExtractor(sequence, annotations, primerseqs[from.Marker], from.Begin, match.End, from.Forward)
|
||||
|
||||
@ -540,6 +690,8 @@ func (library *NGSLibrary) ExtractMultiBarcode(sequence *obiseq.BioSequence) (ob
|
||||
return nil, fmt.Errorf("%s [%s] : Cannot extract barcode %d : %v", sequence.Id(), sequence.Source(), q, err)
|
||||
}
|
||||
|
||||
annotations["obimultiplex_direction"] = map[bool]string{true: "forward", false: "reverse"}[from.Forward]
|
||||
|
||||
if !match.Forward {
|
||||
barcode = barcode.ReverseComplement(true)
|
||||
}
|
||||
|
@ -57,6 +57,8 @@ func (library *NGSLibrary) GetMarker(forward, reverse string) (*Marker, bool) {
|
||||
Reverse_matching: "strict",
|
||||
Forward_allows_indels: false,
|
||||
Reverse_allows_indels: false,
|
||||
Forward_tag_indels: 0,
|
||||
Reverse_tag_indels: 0,
|
||||
samples: make(map[TagPair]*PCR, 1000),
|
||||
}
|
||||
|
||||
@ -122,6 +124,40 @@ func (library *NGSLibrary) SetTagSpacerFor(primer string, spacer int) {
|
||||
}
|
||||
}
|
||||
|
||||
func (library *NGSLibrary) SetForwardTagIndels(indels int) {
|
||||
for _, marker := range library.Markers {
|
||||
marker.SetForwardTagIndels(indels)
|
||||
}
|
||||
}
|
||||
|
||||
func (library *NGSLibrary) SetReverseTagIndels(indels int) {
|
||||
for _, marker := range library.Markers {
|
||||
marker.SetReverseTagIndels(indels)
|
||||
}
|
||||
}
|
||||
|
||||
func (library *NGSLibrary) SetTagIndels(indels int) {
|
||||
library.SetForwardTagIndels(indels)
|
||||
library.SetReverseTagIndels(indels)
|
||||
}
|
||||
|
||||
func (library *NGSLibrary) SetTagIndelsFor(primer string, indels int) {
|
||||
primer = strings.ToLower(primer)
|
||||
primers, ok := library.Primers[primer]
|
||||
|
||||
if ok {
|
||||
marker, ok := library.Markers[primers]
|
||||
|
||||
if ok {
|
||||
if primer == primers.Forward {
|
||||
marker.SetForwardTagIndels(indels)
|
||||
} else {
|
||||
marker.SetReverseTagIndels(indels)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (library *NGSLibrary) SetForwardTagDelimiter(delim byte) {
|
||||
for _, marker := range library.Markers {
|
||||
marker.SetForwardTagDelimiter(delim)
|
||||
|
@ -7,7 +7,7 @@ import (
|
||||
// TODO: The version number is extracted from git. This induces that the version
|
||||
// corresponds to the last commit, and not the one when the file will be
|
||||
// commited
|
||||
var _Commit = "5611fb1"
|
||||
var _Commit = "efad98d"
|
||||
var _Version = "Release 4.2.0"
|
||||
|
||||
// Version returns the version of the obitools package.
|
||||
|
10
pkg/obiutils/abs.go
Normal file
10
pkg/obiutils/abs.go
Normal file
@ -0,0 +1,10 @@
|
||||
package obiutils
|
||||
|
||||
import "golang.org/x/exp/constraints"
|
||||
|
||||
func Abs[T constraints.Signed](x T) T {
|
||||
if x < 0 {
|
||||
return -x
|
||||
}
|
||||
return x
|
||||
}
|
Reference in New Issue
Block a user