diff --git a/pkg/obialign/fastlcsegf.go b/pkg/obialign/fastlcsegf.go index c80414d..d59ac46 100644 --- a/pkg/obialign/fastlcsegf.go +++ b/pkg/obialign/fastlcsegf.go @@ -52,10 +52,12 @@ func _samenuc(a, b byte) bool { // Returns: // - The score of the LCS. // - The length of the LCS. -func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[]uint64) (int, int) { +func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[]uint64) (int, int, int) { lA := len(bA) lB := len(bB) + end := 0 + pend := 0 // Ensure that A is the longest if lA < lB { @@ -75,7 +77,7 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[ // The difference of length is larger the maximum allowed errors if delta > maxError { - return -1, -1 + return -1, -1, -1 } // // BEGINNING OF DEBUG CODE // @@ -121,7 +123,7 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[ } previous[extra+even-1] = encodeValues(0, 1, false) // Initialise cell 1,0 - N := lB + ((delta) >> 1) + N := lB + (delta >> 1) // log.Debugln("N = ", N, " delta = ", delta, " extra = ", extra, " maxError = ", maxError) @@ -147,6 +149,7 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[ switch { case i == 0: + // We are setting the gaps of the first row Sup = _notavail Sdiag = _notavail if endgapfree { @@ -155,10 +158,12 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[ Sleft = encodeValues(0, j, false) } case j == 0: + // We are setting the gaps of the first column Sup = encodeValues(0, i, false) Sdiag = _notavail Sleft = _notavail default: + // We are in the middle of the matrix Sdiag = _incpath(previous[x]) if _samenuc(bA[j-1], bB[i-1]) { Sdiag = _incscore(Sdiag) @@ -187,6 +192,15 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[ score = Sup default: score = Sleft + + if endgapfree && i == lB { + _, l, o := decodeValues(Sleft) + + if l > pend && o == false { + pend = l + end = j + } + } } // I supose the bug was here @@ -271,6 +285,14 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[ score = Sup default: score = Sleft + if endgapfree && i == lB { + _, l, o := decodeValues(Sleft) + + if l > pend && o == false { + pend = l + end = j + } + } } // I supose the bug was here @@ -331,10 +353,10 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[ // // end OF DEBUG CODE // if o { - return -1, -1 + return -1, -1, -1 } - return s, l + return s, l, end } // FastLCSEGFScore calculates the score of the longest common subsequence between two bio sequences in end-gap-free mode. @@ -356,7 +378,7 @@ func FastLCSEGFScoreByte(bA, bB []byte, maxError int, endgapfree bool, buffer *[ // Returns: // - The score of the longest common subsequence. // - The length of the shortest alignment corresponding to the LCS. -func FastLCSEGFScore(seqA, seqB *obiseq.BioSequence, maxError int, buffer *[]uint64) (int, int) { +func FastLCSEGFScore(seqA, seqB *obiseq.BioSequence, maxError int, buffer *[]uint64) (int, int, int) { return FastLCSEGFScoreByte(seqA.Sequence(), seqB.Sequence(), maxError, true, buffer) } @@ -380,5 +402,7 @@ func FastLCSEGFScore(seqA, seqB *obiseq.BioSequence, maxError int, buffer *[]uin // - The score of the longest common subsequence. // - The length of the shortest alignment corresponding to the LCS. func FastLCSScore(seqA, seqB *obiseq.BioSequence, maxError int, buffer *[]uint64) (int, int) { - return FastLCSEGFScoreByte(seqA.Sequence(), seqB.Sequence(), maxError, false, buffer) + score, alilen, _ := FastLCSEGFScoreByte(seqA.Sequence(), seqB.Sequence(), maxError, false, buffer) + + return score, alilen } diff --git a/pkg/obialign/locatepattern.go b/pkg/obialign/locatepattern.go new file mode 100644 index 0000000..f188605 --- /dev/null +++ b/pkg/obialign/locatepattern.go @@ -0,0 +1,104 @@ +package obialign + +func buffIndex(i, j, width int) int { + return (i+1)*width + (j + 1) +} +func LocatePattern(pattern, sequence []byte) (int, int, int) { + width := len(pattern) + 1 + buffsize := (len(pattern) + 1) * (len(sequence) + 1) + buffer := make([]int, buffsize) + path := make([]int, buffsize) + + for j := 0; j < len(pattern); j++ { + idx := buffIndex(-1, j, width) + buffer[idx] = -j - 1 + path[idx] = -1 + } + + for i := -1; i < len(sequence); i++ { + idx := buffIndex(i, -1, width) + buffer[idx] = 0 + path[idx] = +1 + } + + path[0] = 0 + jmax := len(pattern) - 1 + for i := 0; i < len(sequence); i++ { + for j := 0; j < jmax; j++ { + match := -1 + if _samenuc(pattern[j], sequence[i]) { + match = 0 + } + + idx := buffIndex(i, j, width) + + diag := buffer[buffIndex(i-1, j-1, width)] + match + left := buffer[buffIndex(i, j-1, width)] - 1 + up := buffer[buffIndex(i-1, j, width)] - 1 + + score := max(diag, up, left) + + buffer[idx] = score + + switch { + case score == left: + path[idx] = -1 + case score == diag: + path[idx] = 0 + case score == up: + path[idx] = +1 + } + } + } + + for i := 0; i < len(sequence); i++ { + idx := buffIndex(i, jmax, width) + + match := -1 + if _samenuc(pattern[jmax], sequence[i]) { + match = 0 + } + + diag := buffer[buffIndex(i-1, jmax-1, width)] + match + left := buffer[buffIndex(i, jmax-1, width)] - 1 + up := buffer[buffIndex(i-1, jmax, width)] + + score := max(diag, up, left) + buffer[idx] = score + switch { + case score == left: + path[idx] = -1 + case score == diag: + path[idx] = 0 + case score == up: + path[idx] = +1 + } + } + + i := len(sequence) - 1 + j := jmax + end := -1 + lali := 0 + for i > -1 && j > 0 { + lali++ + switch path[buffIndex(i, j, width)] { + case 0: + j-- + if end == -1 { + end = i + lali = 1 + } + i-- + case 1: + i-- + case -1: + j-- + if end == -1 { + end = i + lali = 1 + } + } + + } + return i, end + 1, -buffer[buffIndex(len(sequence)-1, len(pattern)-1, width)] +} diff --git a/pkg/obiapat/pattern.go b/pkg/obiapat/pattern.go index fd5a050..3cea666 100644 --- a/pkg/obiapat/pattern.go +++ b/pkg/obiapat/pattern.go @@ -317,9 +317,6 @@ func (pattern ApatPattern) FindAllIndex(sequence ApatSequence, begin, length int func (pattern ApatPattern) BestMatch(sequence ApatSequence, begin, length int) (start int, end int, nerr int, matched bool) { res := pattern.FindAllIndex(sequence, begin, length) - sbuffer := [(int(C.MAX_PAT_LEN) + int(C.MAX_PAT_ERR) + 1) * (int(C.MAX_PAT_LEN) + 1)]uint64{} - buffer := sbuffer[:] - if len(res) == 0 { matched = false return @@ -358,18 +355,51 @@ func (pattern ApatPattern) BestMatch(sequence ApatSequence, begin, length int) ( best[0], nerr, int(pattern.pointer.pointer.patlen), sequence.Len(), start, end) - score, lali := obialign.FastLCSEGFScoreByte( - frg, - (*cpattern)[0:int(pattern.pointer.pointer.patlen)], - nerr, true, &buffer) + from, to, score := obialign.LocatePattern((*cpattern)[0:int(pattern.pointer.pointer.patlen)], frg) - nerr = lali - score - start = best[0] + int(pattern.pointer.pointer.patlen) - lali - end = start + lali - log.Debugln("results", score, lali, start, nerr) + // olderr := m[2] + + nerr = score + start = start + from + end = start + to + log.Debugln("results", score, start, nerr) return } +func (pattern ApatPattern) FilterBestMatch(sequence ApatSequence, begin, length int) (loc [][3]int) { + res := pattern.FindAllIndex(sequence, begin, length) + filtered := make([][3]int, 0, len(res)) + + best := [3]int{0, 0, 10000} + for _, m := range res { + // log.Warnf("Current : Begin : %d End : %d Err : %d", m[0], m[1], m[2]) + // log.Warnf("Best : Begin : %d End : %d Err : %d", best[0], best[1], best[2]) + if (m[0] - m[2]) < best[1]+best[2] { + // match are overlapping + // log.Warnln("overlap") + if m[2] < best[2] { + // log.Warnln("better") + best = m + } + } else if best[2] < 10000 { + // no overlap + // log.Warnln("no overlap") + filtered = append(filtered, best) + best = m + } + } + + if best[2] < 10000 { + filtered = append(filtered, best) + } + + if len(filtered) == 0 { + return nil + } + + return filtered +} + // tagaacaggctcctctag // func AllocatedApaSequences() int { // return int(_AllocatedApaSequences) @@ -392,18 +422,15 @@ func (pattern ApatPattern) BestMatch(sequence ApatSequence, begin, length int) ( // the match. Following the GO convention the end position is not included in the // match. The third value indicates the number of error detected for this occurrence. func (pattern ApatPattern) AllMatches(sequence ApatSequence, begin, length int) (loc [][3]int) { - res := pattern.FindAllIndex(sequence, begin, length) - - sbuffer := [(int(C.MAX_PAT_LEN) + int(C.MAX_PAT_ERR) + 1) * (int(C.MAX_PAT_LEN) + 1)]uint64{} - buffer := sbuffer[:] + res := pattern.FilterBestMatch(sequence, begin, length) for _, m := range res { // Recompute the start and end position of the match // when the pattern allows for indels if m[2] > 0 && pattern.pointer.pointer.hasIndel { start := m[0] - m[2] - end := m[0] + int(pattern.pointer.pointer.patlen) + m[2] start = max(start, 0) + end := start + int(pattern.pointer.pointer.patlen) + 2*m[2] end = min(end, sequence.Len()) // 1 << 30 = 1,073,741,824 = 1Gb // It's a virtual array mapping the sequence to the pattern @@ -412,20 +439,18 @@ func (pattern ApatPattern) AllMatches(sequence ApatSequence, begin, length int) cpattern := (*[1 << 30]byte)(unsafe.Pointer(pattern.pointer.pointer.cpat)) frg := sequence.pointer.reference.Sequence()[start:end] - score, lali := obialign.FastLCSEGFScoreByte( - frg, - (*cpattern)[0:int(pattern.pointer.pointer.patlen)], - m[2], true, &buffer) + begin, end, score := obialign.LocatePattern((*cpattern)[0:int(pattern.pointer.pointer.patlen)], frg) - // log.Debugf("seq[%d] : %s %d, %d", i, sequence.pointer.reference.Id(), score, lali) - - m[2] = lali - score - m[0] = m[0] + int(pattern.pointer.pointer.patlen) - lali - m[1] = m[0] + lali + // olderr := m[2] + m[2] = score + m[0] = start + begin + m[1] = start + end + // log.Warnf("seq[%d@%d:%d] %d: %s %d - %s:%s:%s", i, m[0], m[1], olderr, sequence.pointer.reference.Id(), score, + // frg, (*cpattern)[0:int(pattern.pointer.pointer.patlen)], sequence.pointer.reference.Sequence()[m[0]:m[1]]) } } - log.Debugf("All matches : %v", res) + // log.Debugf("All matches : %v", res) return res } diff --git a/pkg/obiformats/ngsfilter_read.go b/pkg/obiformats/ngsfilter_read.go index c305df0..7ed1e7a 100644 --- a/pkg/obiformats/ngsfilter_read.go +++ b/pkg/obiformats/ngsfilter_read.go @@ -400,6 +400,58 @@ var library_parameter = map[string]func(library *obingslibrary.NGSLibrary, value log.Fatalln("Invalid value for @reverse_primer_error parameter") } }, + "tag_indels": func(library *obingslibrary.NGSLibrary, values ...string) { + switch len(values) { + case 0: + log.Fatalln("Missing value for @tag_indels parameter") + case 1: + indels, err := strconv.Atoi(values[0]) + + if err != nil { + log.Fatalf("Invalid value %s for @tag_indels parameter", values[0]) + } + library.SetTagIndels(indels) + case 2: + indels, err := strconv.Atoi(values[1]) + + if err != nil { + log.Fatalf("Invalid value %s for @tag_indels parameter", values[1]) + } + + library.SetTagIndelsFor(values[0], indels) + } + }, + + "forward_tag_indels": func(library *obingslibrary.NGSLibrary, values ...string) { + switch len(values) { + case 0: + log.Fatalln("Missing value for @forward_tag_indels parameter") + case 1: + indels, err := strconv.Atoi(values[0]) + + if err != nil { + log.Fatalf("Invalid value %s for @forward_tag_indels parameter", values[0]) + } + library.SetForwardTagIndels(indels) + default: + log.Fatalln("Invalid value for @forward_tag_indels parameter") + } + }, + "reverse_tag_indels": func(library *obingslibrary.NGSLibrary, values ...string) { + switch len(values) { + case 0: + log.Fatalln("Missing value for @reverse_tag_indels parameter") + case 1: + indels, err := strconv.Atoi(values[0]) + + if err != nil { + log.Fatalf("Invalid value %s for @reverse_tag_indels parameter", values[0]) + } + library.SetReverseTagIndels(indels) + default: + log.Fatalln("Invalid value for @reverse_tag_indels parameter") + } + }, "indels": func(library *obingslibrary.NGSLibrary, values ...string) { switch len(values) { case 0: diff --git a/pkg/obingslibrary/marker.go b/pkg/obingslibrary/marker.go index 307a288..b5ea341 100644 --- a/pkg/obingslibrary/marker.go +++ b/pkg/obingslibrary/marker.go @@ -27,6 +27,8 @@ type Marker struct { Reverse_matching string Forward_tag_delimiter byte Reverse_tag_delimiter byte + Forward_tag_indels int + Reverse_tag_indels int samples map[TagPair]*PCR } @@ -335,6 +337,19 @@ func (marker *Marker) SetTagDelimiter(delim byte) { marker.SetReverseTagDelimiter(delim) } +func (marker *Marker) SetForwardTagIndels(indels int) { + marker.Forward_tag_indels = indels +} + +func (marker *Marker) SetReverseTagIndels(indels int) { + marker.Reverse_tag_indels = indels +} + +func (marker *Marker) SetTagIndels(indels int) { + marker.SetForwardTagIndels(indels) + marker.SetReverseTagIndels(indels) +} + func (marker *Marker) SetForwardAllowedMismatches(allowed_mismatches int) { marker.Forward_error = allowed_mismatches } diff --git a/pkg/obingslibrary/multimatch.go b/pkg/obingslibrary/multimatch.go index 7991290..092550a 100644 --- a/pkg/obingslibrary/multimatch.go +++ b/pkg/obingslibrary/multimatch.go @@ -110,6 +110,63 @@ func lookForTag(seq string, delimiter byte) string { return seq[begin:end] } +func lookForRescueTag(seq string, delimiter byte, taglength, border, indel int) string { + // log.Info("lookForRescueTag") + + i := len(seq) - 1 + + // Skip the border part not corresponding to the tag delimiter + for i >= 0 && seq[i] != delimiter { + i-- + } + + delimlen := 0 + for i >= 0 && seq[i] == delimiter { + i-- + delimlen++ + } + + if obiutils.Abs(delimlen-border) > indel { + return "" + } + + // log.Infof("delimlen: %d", delimlen) + + end := i + 1 + + i -= taglength - indel + + for i >= 0 && seq[i] != delimiter { + i-- + } + + delimlen = 0 + for i >= 0 && seq[i] == delimiter { + i-- + delimlen++ + } + + if obiutils.Abs(delimlen-border) > indel { + return "" + } + + delimlen = min(delimlen, border) + + // log.Infof("delimlen: %d", delimlen) + + begin := i + delimlen + 1 + + if i < 0 || obiutils.Abs(taglength-end+begin) > indel { + return "" + } + + // log.Infof("begin: %d, end: %d", begin, end) + // log.Infof("seq: %s", seq) + // log.Infof("seq[begin:end]: %s", seq[begin:end]) + + return seq[begin:end] +} + func (marker *Marker) beginDelimitedTagExtractor( sequence *obiseq.BioSequence, begin int, @@ -131,6 +188,33 @@ func (marker *Marker) beginDelimitedTagExtractor( return lookForTag(sequence.String()[fb:begin], delimiter) } +func (marker *Marker) beginRescueTagExtractor( + sequence *obiseq.BioSequence, + begin int, + forward bool) string { + + delimiter := marker.Forward_tag_delimiter + border := marker.Forward_spacer + taglength := marker.Forward_tag_length + delta := marker.Forward_tag_indels + + if !forward { + taglength = marker.Reverse_tag_length + border = marker.Reverse_spacer + delimiter = marker.Reverse_tag_delimiter + delta = marker.Reverse_tag_indels + } + + frglength := border + taglength + + fb := begin - frglength*2 + if fb < 0 { + fb = 0 + } + + return lookForRescueTag(sequence.String()[fb:begin], delimiter, taglength, border, delta) +} + func (marker *Marker) beginFixedTagExtractor( sequence *obiseq.BioSequence, begin int, @@ -183,6 +267,43 @@ func (marker *Marker) endDelimitedTagExtractor( return lookForTag(tag_seq.ReverseComplement(true).String(), delimiter) } +func (marker *Marker) endRescueTagExtractor( + sequence *obiseq.BioSequence, + end int, + forward bool) string { + + delimiter := marker.Reverse_tag_delimiter + border := marker.Reverse_spacer + taglength := marker.Reverse_tag_length + delta := marker.Reverse_tag_indels + + if !forward { + taglength = marker.Forward_tag_length + border = marker.Forward_spacer + delimiter = marker.Forward_tag_delimiter + delta = marker.Forward_tag_indels + } + + frglength := border + taglength + + fb := end + frglength*2 + + if fb > sequence.Len() { + fb = sequence.Len() + } + + if end >= fb { + return "" + } + + tag_seq, err := sequence.Subsequence(end, fb, false) + + if err != nil { + log.Fatalf("Cannot extract sequence tag : %v", err) + } + + return lookForRescueTag(tag_seq.ReverseComplement(true).String(), delimiter, taglength, border, delta) +} func (marker *Marker) endFixedTagExtractor( sequence *obiseq.BioSequence, end int, @@ -218,13 +339,23 @@ func (marker *Marker) beginTagExtractor( if marker.Forward_tag_delimiter == 0 { return marker.beginFixedTagExtractor(sequence, begin, forward) } else { - return marker.beginDelimitedTagExtractor(sequence, begin, forward) + if marker.Forward_tag_indels == 0 { + return marker.beginDelimitedTagExtractor(sequence, begin, forward) + } else { + // log.Warn("Rescue tag for forward primers") + return marker.beginRescueTagExtractor(sequence, begin, forward) + } } } else { if marker.Reverse_tag_delimiter == 0 { return marker.beginFixedTagExtractor(sequence, begin, forward) } else { - return marker.beginDelimitedTagExtractor(sequence, begin, forward) + if marker.Reverse_tag_indels == 0 { + return marker.beginDelimitedTagExtractor(sequence, begin, forward) + } else { + // log.Warn("Rescue tag for reverse/complement primers") + return marker.beginRescueTagExtractor(sequence, begin, forward) + } } } } @@ -237,13 +368,23 @@ func (marker *Marker) endTagExtractor( if marker.Reverse_tag_delimiter == 0 { return marker.endFixedTagExtractor(sequence, end, forward) } else { - return marker.endDelimitedTagExtractor(sequence, end, forward) + if marker.Reverse_tag_indels == 0 { + return marker.endDelimitedTagExtractor(sequence, end, forward) + } else { + // log.Warn("Rescue tag for reverse primers") + return marker.endRescueTagExtractor(sequence, end, forward) + } } } else { if marker.Forward_tag_delimiter == 0 { return marker.endFixedTagExtractor(sequence, end, forward) } else { - return marker.endDelimitedTagExtractor(sequence, end, forward) + if marker.Forward_tag_indels == 0 { + return marker.endDelimitedTagExtractor(sequence, end, forward) + } else { + // log.Warn("Rescue tag for forward/complement primers") + return marker.endRescueTagExtractor(sequence, end, forward) + } } } } @@ -264,6 +405,10 @@ func (library *NGSLibrary) TagExtractor( forward_tag := marker.beginTagExtractor(sequence, begin, forward) reverse_tag := marker.endTagExtractor(sequence, end, forward) + if !forward { + forward_tag, reverse_tag = reverse_tag, forward_tag + } + if forward_tag != "" { annotations["obimultiplex_forward_tag"] = forward_tag } @@ -347,14 +492,12 @@ func (library *NGSLibrary) SampleIdentifier( case "hamming": forward, fdistance = marker.ClosestForwardTag(tags.Forward, Hamming) annotations["obimultiplex_forward_matching"] = "hamming" - annotations["obimultiplex_forward_tag_dist"] = fdistance - annotations["obimultiplex_forward_proposed_tag"] = forward case "indel": forward, fdistance = marker.ClosestForwardTag(tags.Forward, Levenshtein) annotations["obimultiplex_forward_matching"] = "indel" - annotations["obimultiplex_forward_tag_dist"] = fdistance - annotations["obimultiplex_forward_proposed_tag"] = forward } + annotations["obimultiplex_forward_tag_dist"] = fdistance + annotations["obimultiplex_forward_proposed_tag"] = forward } if tags.Reverse != "" { @@ -366,14 +509,12 @@ func (library *NGSLibrary) SampleIdentifier( case "hamming": reverse, rdistance = marker.ClosestReverseTag(tags.Reverse, Hamming) annotations["obimultiplex_reverse_matching"] = "hamming" - annotations["obimultiplex_reverse_tag_dist"] = rdistance - annotations["obimultiplex_reverse_proposed_tag"] = reverse case "indel": reverse, rdistance = marker.ClosestReverseTag(tags.Reverse, Levenshtein) annotations["obimultiplex_reverse_matching"] = "indel" - annotations["obimultiplex_reverse_tag_dist"] = rdistance - annotations["obimultiplex_reverse_proposed_tag"] = reverse } + annotations["obimultiplex_reverse_tag_dist"] = rdistance + annotations["obimultiplex_reverse_proposed_tag"] = reverse } proposed := TagPair{forward, reverse} @@ -492,18 +633,22 @@ func (library *NGSLibrary) ExtractMultiBarcode(sequence *obiseq.BioSequence) (ob annotations["obimultiplex_reverse_primer"] = primerseqs[from.Marker].Reverse if from.Forward { + // With have a barcode in the orientation from the forward primer to the reverse + + // Try to extract the forward primer match if from.Begin < 0 || from.End > sequence.Len() { barcode_error = true - annotations["obimultiplex_error"] = "Cannot extract forward match" + annotations["obimultiplex_error"] = "Cannot extract forward primer match" } else { annotations["obimultiplex_forward_match"] = sequence.String()[from.Begin:from.End] } + // Try to extract the reverse primer match sseq, err := sequence.Subsequence(match.Begin, match.End, false) if err != nil { barcode_error = true - annotations["obimultiplex_error"] = "Cannot extract reverse match" + annotations["obimultiplex_error"] = "Cannot extract reverse primer match" } else { annotations["obimultiplex_reverse_match"] = sseq.ReverseComplement(true).String() } @@ -511,18 +656,22 @@ func (library *NGSLibrary) ExtractMultiBarcode(sequence *obiseq.BioSequence) (ob annotations["obimultiplex_forward_error"] = from.Mismatches annotations["obimultiplex_reverse_error"] = match.Mismatches } else { + // With have a barcode in the orientation from the reverse primer to the forward + + // Try to extract the reverse primer match if from.Begin < 0 || from.End > sequence.Len() { barcode_error = true - annotations["obimultiplex_error"] = "Cannot extract reverse match" + annotations["obimultiplex_error"] = "Cannot extract reverse primer match" } else { annotations["obimultiplex_reverse_match"] = sequence.String()[from.Begin:from.End] } + // Try to extract the forward primer match sseq, err := sequence.Subsequence(match.Begin, match.End, false) if err != nil { barcode_error = true - annotations["obimultiplex_error"] = "Cannot extract forward match" + annotations["obimultiplex_error"] = "Cannot extract forward primer match" } else { annotations["obimultiplex_forward_match"] = sseq.ReverseComplement(true).String() } @@ -531,6 +680,7 @@ func (library *NGSLibrary) ExtractMultiBarcode(sequence *obiseq.BioSequence) (ob annotations["obimultiplex_forward_error"] = match.Mismatches } + // if we were able to extract the primer matches we can extract the barcode if !barcode_error { tags := library.TagExtractor(sequence, annotations, primerseqs[from.Marker], from.Begin, match.End, from.Forward) @@ -540,6 +690,8 @@ func (library *NGSLibrary) ExtractMultiBarcode(sequence *obiseq.BioSequence) (ob return nil, fmt.Errorf("%s [%s] : Cannot extract barcode %d : %v", sequence.Id(), sequence.Source(), q, err) } + annotations["obimultiplex_direction"] = map[bool]string{true: "forward", false: "reverse"}[from.Forward] + if !match.Forward { barcode = barcode.ReverseComplement(true) } diff --git a/pkg/obingslibrary/ngslibrary.go b/pkg/obingslibrary/ngslibrary.go index 9f30de0..4c8e7ab 100644 --- a/pkg/obingslibrary/ngslibrary.go +++ b/pkg/obingslibrary/ngslibrary.go @@ -57,6 +57,8 @@ func (library *NGSLibrary) GetMarker(forward, reverse string) (*Marker, bool) { Reverse_matching: "strict", Forward_allows_indels: false, Reverse_allows_indels: false, + Forward_tag_indels: 0, + Reverse_tag_indels: 0, samples: make(map[TagPair]*PCR, 1000), } @@ -122,6 +124,40 @@ func (library *NGSLibrary) SetTagSpacerFor(primer string, spacer int) { } } +func (library *NGSLibrary) SetForwardTagIndels(indels int) { + for _, marker := range library.Markers { + marker.SetForwardTagIndels(indels) + } +} + +func (library *NGSLibrary) SetReverseTagIndels(indels int) { + for _, marker := range library.Markers { + marker.SetReverseTagIndels(indels) + } +} + +func (library *NGSLibrary) SetTagIndels(indels int) { + library.SetForwardTagIndels(indels) + library.SetReverseTagIndels(indels) +} + +func (library *NGSLibrary) SetTagIndelsFor(primer string, indels int) { + primer = strings.ToLower(primer) + primers, ok := library.Primers[primer] + + if ok { + marker, ok := library.Markers[primers] + + if ok { + if primer == primers.Forward { + marker.SetForwardTagIndels(indels) + } else { + marker.SetReverseTagIndels(indels) + } + } + } +} + func (library *NGSLibrary) SetForwardTagDelimiter(delim byte) { for _, marker := range library.Markers { marker.SetForwardTagDelimiter(delim) diff --git a/pkg/obioptions/version.go b/pkg/obioptions/version.go index 4358cbd..fc4adbc 100644 --- a/pkg/obioptions/version.go +++ b/pkg/obioptions/version.go @@ -7,7 +7,7 @@ import ( // TODO: The version number is extracted from git. This induces that the version // corresponds to the last commit, and not the one when the file will be // commited -var _Commit = "5611fb1" +var _Commit = "efad98d" var _Version = "Release 4.2.0" // Version returns the version of the obitools package. diff --git a/pkg/obiutils/abs.go b/pkg/obiutils/abs.go new file mode 100644 index 0000000..2d9b726 --- /dev/null +++ b/pkg/obiutils/abs.go @@ -0,0 +1,10 @@ +package obiutils + +import "golang.org/x/exp/constraints" + +func Abs[T constraints.Signed](x T) T { + if x < 0 { + return -x + } + return x +}