mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
debug of obimultiplex
Former-commit-id: 1cf276840feb9d6135d96bd1bf63627d3085ae6e
This commit is contained in:
@ -52,10 +52,12 @@ func _samenuc(a, b byte) bool {
|
|||||||
// Returns:
|
// Returns:
|
||||||
// - The score of the LCS.
|
// - The score of the LCS.
|
||||||
// - The length of the LCS.
|
// - The length of the LCS.
|
||||||
func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[]uint64) (int, int) {
|
func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[]uint64) (int, int, int) {
|
||||||
|
|
||||||
lA := len(bA)
|
lA := len(bA)
|
||||||
lB := len(bB)
|
lB := len(bB)
|
||||||
|
end := 0
|
||||||
|
pend := 0
|
||||||
|
|
||||||
// Ensure that A is the longest
|
// Ensure that A is the longest
|
||||||
if lA < lB {
|
if lA < lB {
|
||||||
@ -75,7 +77,7 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
|
|||||||
|
|
||||||
// The difference of length is larger the maximum allowed errors
|
// The difference of length is larger the maximum allowed errors
|
||||||
if delta > maxError {
|
if delta > maxError {
|
||||||
return -1, -1
|
return -1, -1, -1
|
||||||
}
|
}
|
||||||
|
|
||||||
// // BEGINNING OF DEBUG CODE //
|
// // BEGINNING OF DEBUG CODE //
|
||||||
@ -121,7 +123,7 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
|
|||||||
}
|
}
|
||||||
previous[extra+even-1] = encodeValues(0, 1, false) // Initialise cell 1,0
|
previous[extra+even-1] = encodeValues(0, 1, false) // Initialise cell 1,0
|
||||||
|
|
||||||
N := lB + ((delta) >> 1)
|
N := lB + (delta >> 1)
|
||||||
|
|
||||||
// log.Debugln("N = ", N, " delta = ", delta, " extra = ", extra, " maxError = ", maxError)
|
// log.Debugln("N = ", N, " delta = ", delta, " extra = ", extra, " maxError = ", maxError)
|
||||||
|
|
||||||
@ -147,6 +149,7 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
|
|||||||
switch {
|
switch {
|
||||||
|
|
||||||
case i == 0:
|
case i == 0:
|
||||||
|
// We are setting the gaps of the first row
|
||||||
Sup = _notavail
|
Sup = _notavail
|
||||||
Sdiag = _notavail
|
Sdiag = _notavail
|
||||||
if endgapfree {
|
if endgapfree {
|
||||||
@ -155,10 +158,12 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
|
|||||||
Sleft = encodeValues(0, j, false)
|
Sleft = encodeValues(0, j, false)
|
||||||
}
|
}
|
||||||
case j == 0:
|
case j == 0:
|
||||||
|
// We are setting the gaps of the first column
|
||||||
Sup = encodeValues(0, i, false)
|
Sup = encodeValues(0, i, false)
|
||||||
Sdiag = _notavail
|
Sdiag = _notavail
|
||||||
Sleft = _notavail
|
Sleft = _notavail
|
||||||
default:
|
default:
|
||||||
|
// We are in the middle of the matrix
|
||||||
Sdiag = _incpath(previous[x])
|
Sdiag = _incpath(previous[x])
|
||||||
if _samenuc(bA[j-1], bB[i-1]) {
|
if _samenuc(bA[j-1], bB[i-1]) {
|
||||||
Sdiag = _incscore(Sdiag)
|
Sdiag = _incscore(Sdiag)
|
||||||
@ -187,6 +192,15 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
|
|||||||
score = Sup
|
score = Sup
|
||||||
default:
|
default:
|
||||||
score = Sleft
|
score = Sleft
|
||||||
|
|
||||||
|
if endgapfree && i == lB {
|
||||||
|
_, l, o := decodeValues(Sleft)
|
||||||
|
|
||||||
|
if l > pend && o == false {
|
||||||
|
pend = l
|
||||||
|
end = j
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// I supose the bug was here
|
// I supose the bug was here
|
||||||
@ -271,6 +285,14 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
|
|||||||
score = Sup
|
score = Sup
|
||||||
default:
|
default:
|
||||||
score = Sleft
|
score = Sleft
|
||||||
|
if endgapfree && i == lB {
|
||||||
|
_, l, o := decodeValues(Sleft)
|
||||||
|
|
||||||
|
if l > pend && o == false {
|
||||||
|
pend = l
|
||||||
|
end = j
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// I supose the bug was here
|
// I supose the bug was here
|
||||||
@ -331,10 +353,10 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
|
|||||||
// // end OF DEBUG CODE //
|
// // end OF DEBUG CODE //
|
||||||
|
|
||||||
if o {
|
if o {
|
||||||
return -1, -1
|
return -1, -1, -1
|
||||||
}
|
}
|
||||||
|
|
||||||
return s, l
|
return s, l, end
|
||||||
}
|
}
|
||||||
|
|
||||||
// FastLCSEGFScore calculates the score of the longest common subsequence between two bio sequences in end-gap-free mode.
|
// FastLCSEGFScore calculates the score of the longest common subsequence between two bio sequences in end-gap-free mode.
|
||||||
@ -356,7 +378,7 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[
|
|||||||
// Returns:
|
// Returns:
|
||||||
// - The score of the longest common subsequence.
|
// - The score of the longest common subsequence.
|
||||||
// - The length of the shortest alignment corresponding to the LCS.
|
// - The length of the shortest alignment corresponding to the LCS.
|
||||||
func FastLCSEGFScore(seqA, seqB *obiseq.BioSequence, maxError int, buffer *[]uint64) (int, int) {
|
func FastLCSEGFScore(seqA, seqB *obiseq.BioSequence, maxError int, buffer *[]uint64) (int, int, int) {
|
||||||
return FastLCSEGFScoreByte(seqA.Sequence(), seqB.Sequence(), maxError, true, buffer)
|
return FastLCSEGFScoreByte(seqA.Sequence(), seqB.Sequence(), maxError, true, buffer)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -380,5 +402,7 @@ func FastLCSEGFScore(seqA, seqB *obiseq.BioSequence, maxError int, buffer *[]uin
|
|||||||
// - The score of the longest common subsequence.
|
// - The score of the longest common subsequence.
|
||||||
// - The length of the shortest alignment corresponding to the LCS.
|
// - The length of the shortest alignment corresponding to the LCS.
|
||||||
func FastLCSScore(seqA, seqB *obiseq.BioSequence, maxError int, buffer *[]uint64) (int, int) {
|
func FastLCSScore(seqA, seqB *obiseq.BioSequence, maxError int, buffer *[]uint64) (int, int) {
|
||||||
return FastLCSEGFScoreByte(seqA.Sequence(), seqB.Sequence(), maxError, false, buffer)
|
score, alilen, _ := FastLCSEGFScoreByte(seqA.Sequence(), seqB.Sequence(), maxError, false, buffer)
|
||||||
|
|
||||||
|
return score, alilen
|
||||||
}
|
}
|
||||||
|
104
pkg/obialign/locatepattern.go
Normal file
104
pkg/obialign/locatepattern.go
Normal file
@ -0,0 +1,104 @@
|
|||||||
|
package obialign
|
||||||
|
|
||||||
|
func buffIndex(i, j, width int) int {
|
||||||
|
return (i+1)*width + (j + 1)
|
||||||
|
}
|
||||||
|
func LocatePattern(pattern, sequence []byte) (int, int, int) {
|
||||||
|
width := len(pattern) + 1
|
||||||
|
buffsize := (len(pattern) + 1) * (len(sequence) + 1)
|
||||||
|
buffer := make([]int, buffsize)
|
||||||
|
path := make([]int, buffsize)
|
||||||
|
|
||||||
|
for j := 0; j < len(pattern); j++ {
|
||||||
|
idx := buffIndex(-1, j, width)
|
||||||
|
buffer[idx] = -j - 1
|
||||||
|
path[idx] = -1
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := -1; i < len(sequence); i++ {
|
||||||
|
idx := buffIndex(i, -1, width)
|
||||||
|
buffer[idx] = 0
|
||||||
|
path[idx] = +1
|
||||||
|
}
|
||||||
|
|
||||||
|
path[0] = 0
|
||||||
|
jmax := len(pattern) - 1
|
||||||
|
for i := 0; i < len(sequence); i++ {
|
||||||
|
for j := 0; j < jmax; j++ {
|
||||||
|
match := -1
|
||||||
|
if _samenuc(pattern[j], sequence[i]) {
|
||||||
|
match = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
idx := buffIndex(i, j, width)
|
||||||
|
|
||||||
|
diag := buffer[buffIndex(i-1, j-1, width)] + match
|
||||||
|
left := buffer[buffIndex(i, j-1, width)] - 1
|
||||||
|
up := buffer[buffIndex(i-1, j, width)] - 1
|
||||||
|
|
||||||
|
score := max(diag, up, left)
|
||||||
|
|
||||||
|
buffer[idx] = score
|
||||||
|
|
||||||
|
switch {
|
||||||
|
case score == left:
|
||||||
|
path[idx] = -1
|
||||||
|
case score == diag:
|
||||||
|
path[idx] = 0
|
||||||
|
case score == up:
|
||||||
|
path[idx] = +1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := 0; i < len(sequence); i++ {
|
||||||
|
idx := buffIndex(i, jmax, width)
|
||||||
|
|
||||||
|
match := -1
|
||||||
|
if _samenuc(pattern[jmax], sequence[i]) {
|
||||||
|
match = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
diag := buffer[buffIndex(i-1, jmax-1, width)] + match
|
||||||
|
left := buffer[buffIndex(i, jmax-1, width)] - 1
|
||||||
|
up := buffer[buffIndex(i-1, jmax, width)]
|
||||||
|
|
||||||
|
score := max(diag, up, left)
|
||||||
|
buffer[idx] = score
|
||||||
|
switch {
|
||||||
|
case score == left:
|
||||||
|
path[idx] = -1
|
||||||
|
case score == diag:
|
||||||
|
path[idx] = 0
|
||||||
|
case score == up:
|
||||||
|
path[idx] = +1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
i := len(sequence) - 1
|
||||||
|
j := jmax
|
||||||
|
end := -1
|
||||||
|
lali := 0
|
||||||
|
for i > -1 && j > 0 {
|
||||||
|
lali++
|
||||||
|
switch path[buffIndex(i, j, width)] {
|
||||||
|
case 0:
|
||||||
|
j--
|
||||||
|
if end == -1 {
|
||||||
|
end = i
|
||||||
|
lali = 1
|
||||||
|
}
|
||||||
|
i--
|
||||||
|
case 1:
|
||||||
|
i--
|
||||||
|
case -1:
|
||||||
|
j--
|
||||||
|
if end == -1 {
|
||||||
|
end = i
|
||||||
|
lali = 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
return i, end + 1, -buffer[buffIndex(len(sequence)-1, len(pattern)-1, width)]
|
||||||
|
}
|
@ -317,9 +317,6 @@ func (pattern ApatPattern) FindAllIndex(sequence ApatSequence, begin, length int
|
|||||||
func (pattern ApatPattern) BestMatch(sequence ApatSequence, begin, length int) (start int, end int, nerr int, matched bool) {
|
func (pattern ApatPattern) BestMatch(sequence ApatSequence, begin, length int) (start int, end int, nerr int, matched bool) {
|
||||||
res := pattern.FindAllIndex(sequence, begin, length)
|
res := pattern.FindAllIndex(sequence, begin, length)
|
||||||
|
|
||||||
sbuffer := [(int(C.MAX_PAT_LEN) + int(C.MAX_PAT_ERR) + 1) * (int(C.MAX_PAT_LEN) + 1)]uint64{}
|
|
||||||
buffer := sbuffer[:]
|
|
||||||
|
|
||||||
if len(res) == 0 {
|
if len(res) == 0 {
|
||||||
matched = false
|
matched = false
|
||||||
return
|
return
|
||||||
@ -358,18 +355,51 @@ func (pattern ApatPattern) BestMatch(sequence ApatSequence, begin, length int) (
|
|||||||
best[0], nerr, int(pattern.pointer.pointer.patlen),
|
best[0], nerr, int(pattern.pointer.pointer.patlen),
|
||||||
sequence.Len(), start, end)
|
sequence.Len(), start, end)
|
||||||
|
|
||||||
score, lali := obialign.FastLCSEGFScoreByte(
|
from, to, score := obialign.LocatePattern((*cpattern)[0:int(pattern.pointer.pointer.patlen)], frg)
|
||||||
frg,
|
|
||||||
(*cpattern)[0:int(pattern.pointer.pointer.patlen)],
|
|
||||||
nerr, true, &buffer)
|
|
||||||
|
|
||||||
nerr = lali - score
|
// olderr := m[2]
|
||||||
start = best[0] + int(pattern.pointer.pointer.patlen) - lali
|
|
||||||
end = start + lali
|
nerr = score
|
||||||
log.Debugln("results", score, lali, start, nerr)
|
start = start + from
|
||||||
|
end = start + to
|
||||||
|
log.Debugln("results", score, start, nerr)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (pattern ApatPattern) FilterBestMatch(sequence ApatSequence, begin, length int) (loc [][3]int) {
|
||||||
|
res := pattern.FindAllIndex(sequence, begin, length)
|
||||||
|
filtered := make([][3]int, 0, len(res))
|
||||||
|
|
||||||
|
best := [3]int{0, 0, 10000}
|
||||||
|
for _, m := range res {
|
||||||
|
// log.Warnf("Current : Begin : %d End : %d Err : %d", m[0], m[1], m[2])
|
||||||
|
// log.Warnf("Best : Begin : %d End : %d Err : %d", best[0], best[1], best[2])
|
||||||
|
if (m[0] - m[2]) < best[1]+best[2] {
|
||||||
|
// match are overlapping
|
||||||
|
// log.Warnln("overlap")
|
||||||
|
if m[2] < best[2] {
|
||||||
|
// log.Warnln("better")
|
||||||
|
best = m
|
||||||
|
}
|
||||||
|
} else if best[2] < 10000 {
|
||||||
|
// no overlap
|
||||||
|
// log.Warnln("no overlap")
|
||||||
|
filtered = append(filtered, best)
|
||||||
|
best = m
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if best[2] < 10000 {
|
||||||
|
filtered = append(filtered, best)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(filtered) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return filtered
|
||||||
|
}
|
||||||
|
|
||||||
// tagaacaggctcctctag
|
// tagaacaggctcctctag
|
||||||
// func AllocatedApaSequences() int {
|
// func AllocatedApaSequences() int {
|
||||||
// return int(_AllocatedApaSequences)
|
// return int(_AllocatedApaSequences)
|
||||||
@ -392,18 +422,15 @@ func (pattern ApatPattern) BestMatch(sequence ApatSequence, begin, length int) (
|
|||||||
// the match. Following the GO convention the end position is not included in the
|
// the match. Following the GO convention the end position is not included in the
|
||||||
// match. The third value indicates the number of error detected for this occurrence.
|
// match. The third value indicates the number of error detected for this occurrence.
|
||||||
func (pattern ApatPattern) AllMatches(sequence ApatSequence, begin, length int) (loc [][3]int) {
|
func (pattern ApatPattern) AllMatches(sequence ApatSequence, begin, length int) (loc [][3]int) {
|
||||||
res := pattern.FindAllIndex(sequence, begin, length)
|
res := pattern.FilterBestMatch(sequence, begin, length)
|
||||||
|
|
||||||
sbuffer := [(int(C.MAX_PAT_LEN) + int(C.MAX_PAT_ERR) + 1) * (int(C.MAX_PAT_LEN) + 1)]uint64{}
|
|
||||||
buffer := sbuffer[:]
|
|
||||||
|
|
||||||
for _, m := range res {
|
for _, m := range res {
|
||||||
// Recompute the start and end position of the match
|
// Recompute the start and end position of the match
|
||||||
// when the pattern allows for indels
|
// when the pattern allows for indels
|
||||||
if m[2] > 0 && pattern.pointer.pointer.hasIndel {
|
if m[2] > 0 && pattern.pointer.pointer.hasIndel {
|
||||||
start := m[0] - m[2]
|
start := m[0] - m[2]
|
||||||
end := m[0] + int(pattern.pointer.pointer.patlen) + m[2]
|
|
||||||
start = max(start, 0)
|
start = max(start, 0)
|
||||||
|
end := start + int(pattern.pointer.pointer.patlen) + 2*m[2]
|
||||||
end = min(end, sequence.Len())
|
end = min(end, sequence.Len())
|
||||||
// 1 << 30 = 1,073,741,824 = 1Gb
|
// 1 << 30 = 1,073,741,824 = 1Gb
|
||||||
// It's a virtual array mapping the sequence to the pattern
|
// It's a virtual array mapping the sequence to the pattern
|
||||||
@ -412,20 +439,18 @@ func (pattern ApatPattern) AllMatches(sequence ApatSequence, begin, length int)
|
|||||||
cpattern := (*[1 << 30]byte)(unsafe.Pointer(pattern.pointer.pointer.cpat))
|
cpattern := (*[1 << 30]byte)(unsafe.Pointer(pattern.pointer.pointer.cpat))
|
||||||
frg := sequence.pointer.reference.Sequence()[start:end]
|
frg := sequence.pointer.reference.Sequence()[start:end]
|
||||||
|
|
||||||
score, lali := obialign.FastLCSEGFScoreByte(
|
begin, end, score := obialign.LocatePattern((*cpattern)[0:int(pattern.pointer.pointer.patlen)], frg)
|
||||||
frg,
|
|
||||||
(*cpattern)[0:int(pattern.pointer.pointer.patlen)],
|
|
||||||
m[2], true, &buffer)
|
|
||||||
|
|
||||||
// log.Debugf("seq[%d] : %s %d, %d", i, sequence.pointer.reference.Id(), score, lali)
|
// olderr := m[2]
|
||||||
|
m[2] = score
|
||||||
m[2] = lali - score
|
m[0] = start + begin
|
||||||
m[0] = m[0] + int(pattern.pointer.pointer.patlen) - lali
|
m[1] = start + end
|
||||||
m[1] = m[0] + lali
|
// log.Warnf("seq[%d@%d:%d] %d: %s %d - %s:%s:%s", i, m[0], m[1], olderr, sequence.pointer.reference.Id(), score,
|
||||||
|
// frg, (*cpattern)[0:int(pattern.pointer.pointer.patlen)], sequence.pointer.reference.Sequence()[m[0]:m[1]])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Debugf("All matches : %v", res)
|
// log.Debugf("All matches : %v", res)
|
||||||
|
|
||||||
return res
|
return res
|
||||||
}
|
}
|
||||||
|
@ -400,6 +400,58 @@ var library_parameter = map[string]func(library *obingslibrary.NGSLibrary, value
|
|||||||
log.Fatalln("Invalid value for @reverse_primer_error parameter")
|
log.Fatalln("Invalid value for @reverse_primer_error parameter")
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"tag_indels": func(library *obingslibrary.NGSLibrary, values ...string) {
|
||||||
|
switch len(values) {
|
||||||
|
case 0:
|
||||||
|
log.Fatalln("Missing value for @tag_indels parameter")
|
||||||
|
case 1:
|
||||||
|
indels, err := strconv.Atoi(values[0])
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Invalid value %s for @tag_indels parameter", values[0])
|
||||||
|
}
|
||||||
|
library.SetTagIndels(indels)
|
||||||
|
case 2:
|
||||||
|
indels, err := strconv.Atoi(values[1])
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Invalid value %s for @tag_indels parameter", values[1])
|
||||||
|
}
|
||||||
|
|
||||||
|
library.SetTagIndelsFor(values[0], indels)
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
"forward_tag_indels": func(library *obingslibrary.NGSLibrary, values ...string) {
|
||||||
|
switch len(values) {
|
||||||
|
case 0:
|
||||||
|
log.Fatalln("Missing value for @forward_tag_indels parameter")
|
||||||
|
case 1:
|
||||||
|
indels, err := strconv.Atoi(values[0])
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Invalid value %s for @forward_tag_indels parameter", values[0])
|
||||||
|
}
|
||||||
|
library.SetForwardTagIndels(indels)
|
||||||
|
default:
|
||||||
|
log.Fatalln("Invalid value for @forward_tag_indels parameter")
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"reverse_tag_indels": func(library *obingslibrary.NGSLibrary, values ...string) {
|
||||||
|
switch len(values) {
|
||||||
|
case 0:
|
||||||
|
log.Fatalln("Missing value for @reverse_tag_indels parameter")
|
||||||
|
case 1:
|
||||||
|
indels, err := strconv.Atoi(values[0])
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Invalid value %s for @reverse_tag_indels parameter", values[0])
|
||||||
|
}
|
||||||
|
library.SetReverseTagIndels(indels)
|
||||||
|
default:
|
||||||
|
log.Fatalln("Invalid value for @reverse_tag_indels parameter")
|
||||||
|
}
|
||||||
|
},
|
||||||
"indels": func(library *obingslibrary.NGSLibrary, values ...string) {
|
"indels": func(library *obingslibrary.NGSLibrary, values ...string) {
|
||||||
switch len(values) {
|
switch len(values) {
|
||||||
case 0:
|
case 0:
|
||||||
|
@ -27,6 +27,8 @@ type Marker struct {
|
|||||||
Reverse_matching string
|
Reverse_matching string
|
||||||
Forward_tag_delimiter byte
|
Forward_tag_delimiter byte
|
||||||
Reverse_tag_delimiter byte
|
Reverse_tag_delimiter byte
|
||||||
|
Forward_tag_indels int
|
||||||
|
Reverse_tag_indels int
|
||||||
samples map[TagPair]*PCR
|
samples map[TagPair]*PCR
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -335,6 +337,19 @@ func (marker *Marker) SetTagDelimiter(delim byte) {
|
|||||||
marker.SetReverseTagDelimiter(delim)
|
marker.SetReverseTagDelimiter(delim)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (marker *Marker) SetForwardTagIndels(indels int) {
|
||||||
|
marker.Forward_tag_indels = indels
|
||||||
|
}
|
||||||
|
|
||||||
|
func (marker *Marker) SetReverseTagIndels(indels int) {
|
||||||
|
marker.Reverse_tag_indels = indels
|
||||||
|
}
|
||||||
|
|
||||||
|
func (marker *Marker) SetTagIndels(indels int) {
|
||||||
|
marker.SetForwardTagIndels(indels)
|
||||||
|
marker.SetReverseTagIndels(indels)
|
||||||
|
}
|
||||||
|
|
||||||
func (marker *Marker) SetForwardAllowedMismatches(allowed_mismatches int) {
|
func (marker *Marker) SetForwardAllowedMismatches(allowed_mismatches int) {
|
||||||
marker.Forward_error = allowed_mismatches
|
marker.Forward_error = allowed_mismatches
|
||||||
}
|
}
|
||||||
|
@ -110,6 +110,63 @@ func lookForTag(seq string, delimiter byte) string {
|
|||||||
return seq[begin:end]
|
return seq[begin:end]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func lookForRescueTag(seq string, delimiter byte, taglength, border, indel int) string {
|
||||||
|
// log.Info("lookForRescueTag")
|
||||||
|
|
||||||
|
i := len(seq) - 1
|
||||||
|
|
||||||
|
// Skip the border part not corresponding to the tag delimiter
|
||||||
|
for i >= 0 && seq[i] != delimiter {
|
||||||
|
i--
|
||||||
|
}
|
||||||
|
|
||||||
|
delimlen := 0
|
||||||
|
for i >= 0 && seq[i] == delimiter {
|
||||||
|
i--
|
||||||
|
delimlen++
|
||||||
|
}
|
||||||
|
|
||||||
|
if obiutils.Abs(delimlen-border) > indel {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// log.Infof("delimlen: %d", delimlen)
|
||||||
|
|
||||||
|
end := i + 1
|
||||||
|
|
||||||
|
i -= taglength - indel
|
||||||
|
|
||||||
|
for i >= 0 && seq[i] != delimiter {
|
||||||
|
i--
|
||||||
|
}
|
||||||
|
|
||||||
|
delimlen = 0
|
||||||
|
for i >= 0 && seq[i] == delimiter {
|
||||||
|
i--
|
||||||
|
delimlen++
|
||||||
|
}
|
||||||
|
|
||||||
|
if obiutils.Abs(delimlen-border) > indel {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
delimlen = min(delimlen, border)
|
||||||
|
|
||||||
|
// log.Infof("delimlen: %d", delimlen)
|
||||||
|
|
||||||
|
begin := i + delimlen + 1
|
||||||
|
|
||||||
|
if i < 0 || obiutils.Abs(taglength-end+begin) > indel {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// log.Infof("begin: %d, end: %d", begin, end)
|
||||||
|
// log.Infof("seq: %s", seq)
|
||||||
|
// log.Infof("seq[begin:end]: %s", seq[begin:end])
|
||||||
|
|
||||||
|
return seq[begin:end]
|
||||||
|
}
|
||||||
|
|
||||||
func (marker *Marker) beginDelimitedTagExtractor(
|
func (marker *Marker) beginDelimitedTagExtractor(
|
||||||
sequence *obiseq.BioSequence,
|
sequence *obiseq.BioSequence,
|
||||||
begin int,
|
begin int,
|
||||||
@ -131,6 +188,33 @@ func (marker *Marker) beginDelimitedTagExtractor(
|
|||||||
return lookForTag(sequence.String()[fb:begin], delimiter)
|
return lookForTag(sequence.String()[fb:begin], delimiter)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (marker *Marker) beginRescueTagExtractor(
|
||||||
|
sequence *obiseq.BioSequence,
|
||||||
|
begin int,
|
||||||
|
forward bool) string {
|
||||||
|
|
||||||
|
delimiter := marker.Forward_tag_delimiter
|
||||||
|
border := marker.Forward_spacer
|
||||||
|
taglength := marker.Forward_tag_length
|
||||||
|
delta := marker.Forward_tag_indels
|
||||||
|
|
||||||
|
if !forward {
|
||||||
|
taglength = marker.Reverse_tag_length
|
||||||
|
border = marker.Reverse_spacer
|
||||||
|
delimiter = marker.Reverse_tag_delimiter
|
||||||
|
delta = marker.Reverse_tag_indels
|
||||||
|
}
|
||||||
|
|
||||||
|
frglength := border + taglength
|
||||||
|
|
||||||
|
fb := begin - frglength*2
|
||||||
|
if fb < 0 {
|
||||||
|
fb = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
return lookForRescueTag(sequence.String()[fb:begin], delimiter, taglength, border, delta)
|
||||||
|
}
|
||||||
|
|
||||||
func (marker *Marker) beginFixedTagExtractor(
|
func (marker *Marker) beginFixedTagExtractor(
|
||||||
sequence *obiseq.BioSequence,
|
sequence *obiseq.BioSequence,
|
||||||
begin int,
|
begin int,
|
||||||
@ -183,6 +267,43 @@ func (marker *Marker) endDelimitedTagExtractor(
|
|||||||
return lookForTag(tag_seq.ReverseComplement(true).String(), delimiter)
|
return lookForTag(tag_seq.ReverseComplement(true).String(), delimiter)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (marker *Marker) endRescueTagExtractor(
|
||||||
|
sequence *obiseq.BioSequence,
|
||||||
|
end int,
|
||||||
|
forward bool) string {
|
||||||
|
|
||||||
|
delimiter := marker.Reverse_tag_delimiter
|
||||||
|
border := marker.Reverse_spacer
|
||||||
|
taglength := marker.Reverse_tag_length
|
||||||
|
delta := marker.Reverse_tag_indels
|
||||||
|
|
||||||
|
if !forward {
|
||||||
|
taglength = marker.Forward_tag_length
|
||||||
|
border = marker.Forward_spacer
|
||||||
|
delimiter = marker.Forward_tag_delimiter
|
||||||
|
delta = marker.Forward_tag_indels
|
||||||
|
}
|
||||||
|
|
||||||
|
frglength := border + taglength
|
||||||
|
|
||||||
|
fb := end + frglength*2
|
||||||
|
|
||||||
|
if fb > sequence.Len() {
|
||||||
|
fb = sequence.Len()
|
||||||
|
}
|
||||||
|
|
||||||
|
if end >= fb {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
tag_seq, err := sequence.Subsequence(end, fb, false)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Cannot extract sequence tag : %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return lookForRescueTag(tag_seq.ReverseComplement(true).String(), delimiter, taglength, border, delta)
|
||||||
|
}
|
||||||
func (marker *Marker) endFixedTagExtractor(
|
func (marker *Marker) endFixedTagExtractor(
|
||||||
sequence *obiseq.BioSequence,
|
sequence *obiseq.BioSequence,
|
||||||
end int,
|
end int,
|
||||||
@ -218,13 +339,23 @@ func (marker *Marker) beginTagExtractor(
|
|||||||
if marker.Forward_tag_delimiter == 0 {
|
if marker.Forward_tag_delimiter == 0 {
|
||||||
return marker.beginFixedTagExtractor(sequence, begin, forward)
|
return marker.beginFixedTagExtractor(sequence, begin, forward)
|
||||||
} else {
|
} else {
|
||||||
return marker.beginDelimitedTagExtractor(sequence, begin, forward)
|
if marker.Forward_tag_indels == 0 {
|
||||||
|
return marker.beginDelimitedTagExtractor(sequence, begin, forward)
|
||||||
|
} else {
|
||||||
|
// log.Warn("Rescue tag for forward primers")
|
||||||
|
return marker.beginRescueTagExtractor(sequence, begin, forward)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if marker.Reverse_tag_delimiter == 0 {
|
if marker.Reverse_tag_delimiter == 0 {
|
||||||
return marker.beginFixedTagExtractor(sequence, begin, forward)
|
return marker.beginFixedTagExtractor(sequence, begin, forward)
|
||||||
} else {
|
} else {
|
||||||
return marker.beginDelimitedTagExtractor(sequence, begin, forward)
|
if marker.Reverse_tag_indels == 0 {
|
||||||
|
return marker.beginDelimitedTagExtractor(sequence, begin, forward)
|
||||||
|
} else {
|
||||||
|
// log.Warn("Rescue tag for reverse/complement primers")
|
||||||
|
return marker.beginRescueTagExtractor(sequence, begin, forward)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -237,13 +368,23 @@ func (marker *Marker) endTagExtractor(
|
|||||||
if marker.Reverse_tag_delimiter == 0 {
|
if marker.Reverse_tag_delimiter == 0 {
|
||||||
return marker.endFixedTagExtractor(sequence, end, forward)
|
return marker.endFixedTagExtractor(sequence, end, forward)
|
||||||
} else {
|
} else {
|
||||||
return marker.endDelimitedTagExtractor(sequence, end, forward)
|
if marker.Reverse_tag_indels == 0 {
|
||||||
|
return marker.endDelimitedTagExtractor(sequence, end, forward)
|
||||||
|
} else {
|
||||||
|
// log.Warn("Rescue tag for reverse primers")
|
||||||
|
return marker.endRescueTagExtractor(sequence, end, forward)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if marker.Forward_tag_delimiter == 0 {
|
if marker.Forward_tag_delimiter == 0 {
|
||||||
return marker.endFixedTagExtractor(sequence, end, forward)
|
return marker.endFixedTagExtractor(sequence, end, forward)
|
||||||
} else {
|
} else {
|
||||||
return marker.endDelimitedTagExtractor(sequence, end, forward)
|
if marker.Forward_tag_indels == 0 {
|
||||||
|
return marker.endDelimitedTagExtractor(sequence, end, forward)
|
||||||
|
} else {
|
||||||
|
// log.Warn("Rescue tag for forward/complement primers")
|
||||||
|
return marker.endRescueTagExtractor(sequence, end, forward)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -264,6 +405,10 @@ func (library *NGSLibrary) TagExtractor(
|
|||||||
forward_tag := marker.beginTagExtractor(sequence, begin, forward)
|
forward_tag := marker.beginTagExtractor(sequence, begin, forward)
|
||||||
reverse_tag := marker.endTagExtractor(sequence, end, forward)
|
reverse_tag := marker.endTagExtractor(sequence, end, forward)
|
||||||
|
|
||||||
|
if !forward {
|
||||||
|
forward_tag, reverse_tag = reverse_tag, forward_tag
|
||||||
|
}
|
||||||
|
|
||||||
if forward_tag != "" {
|
if forward_tag != "" {
|
||||||
annotations["obimultiplex_forward_tag"] = forward_tag
|
annotations["obimultiplex_forward_tag"] = forward_tag
|
||||||
}
|
}
|
||||||
@ -347,14 +492,12 @@ func (library *NGSLibrary) SampleIdentifier(
|
|||||||
case "hamming":
|
case "hamming":
|
||||||
forward, fdistance = marker.ClosestForwardTag(tags.Forward, Hamming)
|
forward, fdistance = marker.ClosestForwardTag(tags.Forward, Hamming)
|
||||||
annotations["obimultiplex_forward_matching"] = "hamming"
|
annotations["obimultiplex_forward_matching"] = "hamming"
|
||||||
annotations["obimultiplex_forward_tag_dist"] = fdistance
|
|
||||||
annotations["obimultiplex_forward_proposed_tag"] = forward
|
|
||||||
case "indel":
|
case "indel":
|
||||||
forward, fdistance = marker.ClosestForwardTag(tags.Forward, Levenshtein)
|
forward, fdistance = marker.ClosestForwardTag(tags.Forward, Levenshtein)
|
||||||
annotations["obimultiplex_forward_matching"] = "indel"
|
annotations["obimultiplex_forward_matching"] = "indel"
|
||||||
annotations["obimultiplex_forward_tag_dist"] = fdistance
|
|
||||||
annotations["obimultiplex_forward_proposed_tag"] = forward
|
|
||||||
}
|
}
|
||||||
|
annotations["obimultiplex_forward_tag_dist"] = fdistance
|
||||||
|
annotations["obimultiplex_forward_proposed_tag"] = forward
|
||||||
}
|
}
|
||||||
|
|
||||||
if tags.Reverse != "" {
|
if tags.Reverse != "" {
|
||||||
@ -366,14 +509,12 @@ func (library *NGSLibrary) SampleIdentifier(
|
|||||||
case "hamming":
|
case "hamming":
|
||||||
reverse, rdistance = marker.ClosestReverseTag(tags.Reverse, Hamming)
|
reverse, rdistance = marker.ClosestReverseTag(tags.Reverse, Hamming)
|
||||||
annotations["obimultiplex_reverse_matching"] = "hamming"
|
annotations["obimultiplex_reverse_matching"] = "hamming"
|
||||||
annotations["obimultiplex_reverse_tag_dist"] = rdistance
|
|
||||||
annotations["obimultiplex_reverse_proposed_tag"] = reverse
|
|
||||||
case "indel":
|
case "indel":
|
||||||
reverse, rdistance = marker.ClosestReverseTag(tags.Reverse, Levenshtein)
|
reverse, rdistance = marker.ClosestReverseTag(tags.Reverse, Levenshtein)
|
||||||
annotations["obimultiplex_reverse_matching"] = "indel"
|
annotations["obimultiplex_reverse_matching"] = "indel"
|
||||||
annotations["obimultiplex_reverse_tag_dist"] = rdistance
|
|
||||||
annotations["obimultiplex_reverse_proposed_tag"] = reverse
|
|
||||||
}
|
}
|
||||||
|
annotations["obimultiplex_reverse_tag_dist"] = rdistance
|
||||||
|
annotations["obimultiplex_reverse_proposed_tag"] = reverse
|
||||||
}
|
}
|
||||||
|
|
||||||
proposed := TagPair{forward, reverse}
|
proposed := TagPair{forward, reverse}
|
||||||
@ -492,18 +633,22 @@ func (library *NGSLibrary) ExtractMultiBarcode(sequence *obiseq.BioSequence) (ob
|
|||||||
annotations["obimultiplex_reverse_primer"] = primerseqs[from.Marker].Reverse
|
annotations["obimultiplex_reverse_primer"] = primerseqs[from.Marker].Reverse
|
||||||
|
|
||||||
if from.Forward {
|
if from.Forward {
|
||||||
|
// With have a barcode in the orientation from the forward primer to the reverse
|
||||||
|
|
||||||
|
// Try to extract the forward primer match
|
||||||
if from.Begin < 0 || from.End > sequence.Len() {
|
if from.Begin < 0 || from.End > sequence.Len() {
|
||||||
barcode_error = true
|
barcode_error = true
|
||||||
annotations["obimultiplex_error"] = "Cannot extract forward match"
|
annotations["obimultiplex_error"] = "Cannot extract forward primer match"
|
||||||
} else {
|
} else {
|
||||||
annotations["obimultiplex_forward_match"] = sequence.String()[from.Begin:from.End]
|
annotations["obimultiplex_forward_match"] = sequence.String()[from.Begin:from.End]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Try to extract the reverse primer match
|
||||||
sseq, err := sequence.Subsequence(match.Begin, match.End, false)
|
sseq, err := sequence.Subsequence(match.Begin, match.End, false)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
barcode_error = true
|
barcode_error = true
|
||||||
annotations["obimultiplex_error"] = "Cannot extract reverse match"
|
annotations["obimultiplex_error"] = "Cannot extract reverse primer match"
|
||||||
} else {
|
} else {
|
||||||
annotations["obimultiplex_reverse_match"] = sseq.ReverseComplement(true).String()
|
annotations["obimultiplex_reverse_match"] = sseq.ReverseComplement(true).String()
|
||||||
}
|
}
|
||||||
@ -511,18 +656,22 @@ func (library *NGSLibrary) ExtractMultiBarcode(sequence *obiseq.BioSequence) (ob
|
|||||||
annotations["obimultiplex_forward_error"] = from.Mismatches
|
annotations["obimultiplex_forward_error"] = from.Mismatches
|
||||||
annotations["obimultiplex_reverse_error"] = match.Mismatches
|
annotations["obimultiplex_reverse_error"] = match.Mismatches
|
||||||
} else {
|
} else {
|
||||||
|
// With have a barcode in the orientation from the reverse primer to the forward
|
||||||
|
|
||||||
|
// Try to extract the reverse primer match
|
||||||
if from.Begin < 0 || from.End > sequence.Len() {
|
if from.Begin < 0 || from.End > sequence.Len() {
|
||||||
barcode_error = true
|
barcode_error = true
|
||||||
annotations["obimultiplex_error"] = "Cannot extract reverse match"
|
annotations["obimultiplex_error"] = "Cannot extract reverse primer match"
|
||||||
} else {
|
} else {
|
||||||
annotations["obimultiplex_reverse_match"] = sequence.String()[from.Begin:from.End]
|
annotations["obimultiplex_reverse_match"] = sequence.String()[from.Begin:from.End]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Try to extract the forward primer match
|
||||||
sseq, err := sequence.Subsequence(match.Begin, match.End, false)
|
sseq, err := sequence.Subsequence(match.Begin, match.End, false)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
barcode_error = true
|
barcode_error = true
|
||||||
annotations["obimultiplex_error"] = "Cannot extract forward match"
|
annotations["obimultiplex_error"] = "Cannot extract forward primer match"
|
||||||
} else {
|
} else {
|
||||||
annotations["obimultiplex_forward_match"] = sseq.ReverseComplement(true).String()
|
annotations["obimultiplex_forward_match"] = sseq.ReverseComplement(true).String()
|
||||||
}
|
}
|
||||||
@ -531,6 +680,7 @@ func (library *NGSLibrary) ExtractMultiBarcode(sequence *obiseq.BioSequence) (ob
|
|||||||
annotations["obimultiplex_forward_error"] = match.Mismatches
|
annotations["obimultiplex_forward_error"] = match.Mismatches
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// if we were able to extract the primer matches we can extract the barcode
|
||||||
if !barcode_error {
|
if !barcode_error {
|
||||||
tags := library.TagExtractor(sequence, annotations, primerseqs[from.Marker], from.Begin, match.End, from.Forward)
|
tags := library.TagExtractor(sequence, annotations, primerseqs[from.Marker], from.Begin, match.End, from.Forward)
|
||||||
|
|
||||||
@ -540,6 +690,8 @@ func (library *NGSLibrary) ExtractMultiBarcode(sequence *obiseq.BioSequence) (ob
|
|||||||
return nil, fmt.Errorf("%s [%s] : Cannot extract barcode %d : %v", sequence.Id(), sequence.Source(), q, err)
|
return nil, fmt.Errorf("%s [%s] : Cannot extract barcode %d : %v", sequence.Id(), sequence.Source(), q, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
annotations["obimultiplex_direction"] = map[bool]string{true: "forward", false: "reverse"}[from.Forward]
|
||||||
|
|
||||||
if !match.Forward {
|
if !match.Forward {
|
||||||
barcode = barcode.ReverseComplement(true)
|
barcode = barcode.ReverseComplement(true)
|
||||||
}
|
}
|
||||||
|
@ -57,6 +57,8 @@ func (library *NGSLibrary) GetMarker(forward, reverse string) (*Marker, bool) {
|
|||||||
Reverse_matching: "strict",
|
Reverse_matching: "strict",
|
||||||
Forward_allows_indels: false,
|
Forward_allows_indels: false,
|
||||||
Reverse_allows_indels: false,
|
Reverse_allows_indels: false,
|
||||||
|
Forward_tag_indels: 0,
|
||||||
|
Reverse_tag_indels: 0,
|
||||||
samples: make(map[TagPair]*PCR, 1000),
|
samples: make(map[TagPair]*PCR, 1000),
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -122,6 +124,40 @@ func (library *NGSLibrary) SetTagSpacerFor(primer string, spacer int) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (library *NGSLibrary) SetForwardTagIndels(indels int) {
|
||||||
|
for _, marker := range library.Markers {
|
||||||
|
marker.SetForwardTagIndels(indels)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (library *NGSLibrary) SetReverseTagIndels(indels int) {
|
||||||
|
for _, marker := range library.Markers {
|
||||||
|
marker.SetReverseTagIndels(indels)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (library *NGSLibrary) SetTagIndels(indels int) {
|
||||||
|
library.SetForwardTagIndels(indels)
|
||||||
|
library.SetReverseTagIndels(indels)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (library *NGSLibrary) SetTagIndelsFor(primer string, indels int) {
|
||||||
|
primer = strings.ToLower(primer)
|
||||||
|
primers, ok := library.Primers[primer]
|
||||||
|
|
||||||
|
if ok {
|
||||||
|
marker, ok := library.Markers[primers]
|
||||||
|
|
||||||
|
if ok {
|
||||||
|
if primer == primers.Forward {
|
||||||
|
marker.SetForwardTagIndels(indels)
|
||||||
|
} else {
|
||||||
|
marker.SetReverseTagIndels(indels)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (library *NGSLibrary) SetForwardTagDelimiter(delim byte) {
|
func (library *NGSLibrary) SetForwardTagDelimiter(delim byte) {
|
||||||
for _, marker := range library.Markers {
|
for _, marker := range library.Markers {
|
||||||
marker.SetForwardTagDelimiter(delim)
|
marker.SetForwardTagDelimiter(delim)
|
||||||
|
@ -7,7 +7,7 @@ import (
|
|||||||
// TODO: The version number is extracted from git. This induces that the version
|
// TODO: The version number is extracted from git. This induces that the version
|
||||||
// corresponds to the last commit, and not the one when the file will be
|
// corresponds to the last commit, and not the one when the file will be
|
||||||
// commited
|
// commited
|
||||||
var _Commit = "5611fb1"
|
var _Commit = "efad98d"
|
||||||
var _Version = "Release 4.2.0"
|
var _Version = "Release 4.2.0"
|
||||||
|
|
||||||
// Version returns the version of the obitools package.
|
// Version returns the version of the obitools package.
|
||||||
|
10
pkg/obiutils/abs.go
Normal file
10
pkg/obiutils/abs.go
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
package obiutils
|
||||||
|
|
||||||
|
import "golang.org/x/exp/constraints"
|
||||||
|
|
||||||
|
func Abs[T constraints.Signed](x T) T {
|
||||||
|
if x < 0 {
|
||||||
|
return -x
|
||||||
|
}
|
||||||
|
return x
|
||||||
|
}
|
Reference in New Issue
Block a user