Plenty of small bugs

Former-commit-id: 42c7fab7d65906c80ab4cd32da6867ff21842ea8
This commit is contained in:
Eric Coissac
2024-06-04 16:49:12 +02:00
parent e843d2aa5c
commit 65f5109957
15 changed files with 894 additions and 264 deletions

281
pkg/obingslibrary/marker.go Normal file
View File

@ -0,0 +1,281 @@
package obingslibrary
import (
"fmt"
"strings"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiapat"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
)
type Marker struct {
forward obiapat.ApatPattern
cforward obiapat.ApatPattern
reverse obiapat.ApatPattern
creverse obiapat.ApatPattern
Forward_tag_length int
Reverse_tag_length int
Forward_spacer int
Reverse_spacer int
Forward_tag_delimiter byte
Reverse_tag_delimiter byte
samples map[TagPair]*PCR
}
func (marker *Marker) Compile(forward, reverse string, maxError int, allowsIndel bool) error {
var err error
marker.CheckTagLength()
marker.forward, err = obiapat.MakeApatPattern(forward, maxError, allowsIndel)
if err != nil {
return err
}
marker.reverse, err = obiapat.MakeApatPattern(reverse, maxError, allowsIndel)
if err != nil {
return err
}
marker.cforward, err = marker.forward.ReverseComplement()
if err != nil {
return err
}
marker.creverse, err = marker.reverse.ReverseComplement()
if err != nil {
return err
}
return nil
}
// Match finds the best matching demultiplex for a given sequence.
//
// Parameters:
//
// marker - a pointer to a Marker struct that contains the forward and reverse primers.
// sequence - a pointer to a BioSequence struct that represents the input sequence.
//
// Returns:
//
// A pointer to a DemultiplexMatch struct that contains the best matching demultiplex.
func (marker *Marker) Match(sequence *obiseq.BioSequence) *DemultiplexMatch {
aseq, _ := obiapat.MakeApatSequence(sequence, false)
start, end, nerr, matched := marker.forward.BestMatch(aseq, marker.Forward_tag_length, -1)
if matched {
if start < 0 {
start = 0
}
if end > sequence.Len() {
end = sequence.Len()
}
sseq := sequence.String()
direct := sseq[start:end]
tagstart := max(start-marker.Forward_tag_length, 0)
ftag := strings.ToLower(sseq[tagstart:start])
m := DemultiplexMatch{
ForwardMatch: direct,
ForwardTag: ftag,
BarcodeStart: end,
ForwardMismatches: nerr,
IsDirect: true,
Error: nil,
}
start, end, nerr, matched = marker.creverse.BestMatch(aseq, start, -1)
if matched {
// extracting primer matches
reverse, _ := sequence.Subsequence(start, end, false)
defer reverse.Recycle()
reverse = reverse.ReverseComplement(true)
endtag := min(end+marker.Reverse_tag_length, sequence.Len())
rtag, err := sequence.Subsequence(end, endtag, false)
defer rtag.Recycle()
srtag := ""
if err != nil {
rtag = nil
} else {
rtag.ReverseComplement(true)
srtag = strings.ToLower(rtag.String())
}
m.ReverseMatch = strings.ToLower(reverse.String())
m.ReverseMismatches = nerr
m.BarcodeEnd = start
m.ReverseTag = srtag
sample, ok := marker.samples[TagPair{ftag, srtag}]
if ok {
m.Pcr = sample
}
return &m
}
m.Error = fmt.Errorf("cannot locates reverse priming site")
return &m
}
//
// At this point the forward primer didn't match.
// We try now with the reverse primer
//
start, end, nerr, matched = marker.reverse.BestMatch(aseq, marker.Reverse_tag_length, -1)
if matched {
if start < 0 {
start = 0
}
if end > sequence.Len() {
end = sequence.Len()
}
sseq := sequence.String()
reverse := strings.ToLower(sseq[start:end])
tagstart := max(start-marker.Reverse_tag_length, 0)
rtag := strings.ToLower(sseq[tagstart:start])
m := DemultiplexMatch{
ReverseMatch: reverse,
ReverseTag: rtag,
BarcodeStart: end,
ReverseMismatches: nerr,
IsDirect: false,
Error: nil,
}
start, end, nerr, matched = marker.cforward.BestMatch(aseq, end, -1)
if matched {
direct, _ := sequence.Subsequence(start, end, false)
defer direct.Recycle()
direct = direct.ReverseComplement(true)
endtag := min(end+marker.Forward_tag_length, sequence.Len())
ftag, err := sequence.Subsequence(end, endtag, false)
defer ftag.Recycle()
sftag := ""
if err != nil {
ftag = nil
} else {
ftag = ftag.ReverseComplement(true)
sftag = strings.ToLower(ftag.String())
}
m.ForwardMatch = direct.String()
m.ForwardTag = sftag
m.ForwardMismatches = nerr
m.BarcodeEnd = start
sample, ok := marker.samples[TagPair{sftag, rtag}]
if ok {
m.Pcr = sample
}
return &m
}
m.Error = fmt.Errorf("cannot locates forward priming site")
return &m
}
return nil
}
func (marker *Marker) GetPCR(forward, reverse string) (*PCR, bool) {
pair := TagPair{forward, reverse}
pcr, ok := marker.samples[pair]
if ok {
return pcr, ok
}
ipcr := PCR{}
marker.samples[pair] = &ipcr
return &ipcr, false
}
func (marker *Marker) CheckTagLength() error {
forward_length := make(map[int]int)
reverse_length := make(map[int]int)
marker.Forward_tag_length = -1
marker.Reverse_tag_length = -1
for tags := range marker.samples {
forward_length[len(tags.Forward)]++
reverse_length[len(tags.Reverse)]++
}
maxfl, _ := obiutils.MaxMap(forward_length)
if len(forward_length) > 1 {
others := make([]int, 0)
for k := range forward_length {
if k != maxfl {
others = append(others, k)
}
}
return fmt.Errorf("forward tag length %d is not the same for all the PCRs : %v\n", maxfl, others)
}
maxrl, _ := obiutils.MaxMap(reverse_length)
if len(reverse_length) > 1 {
others := make([]int, 0)
for k := range reverse_length {
if k != maxrl {
others = append(others, k)
}
}
return fmt.Errorf("reverse tag length %d is not the same for all the PCRs : %v\n", maxrl, others)
}
marker.Forward_tag_length = maxfl
marker.Reverse_tag_length = maxrl
return nil
}
func (marker *Marker) SetForwardTagSpacer(spacer int) {
marker.Forward_spacer = spacer
}
func (marker *Marker) SetReverseTagSpacer(spacer int) {
marker.Reverse_spacer = spacer
}
func (marker *Marker) SetTagSpacer(spacer int) {
marker.SetForwardTagSpacer(spacer)
marker.SetReverseTagSpacer(spacer)
}
func (marker *Marker) SetForwardTagDelimiter(delim byte) {
marker.Forward_tag_delimiter = delim
}
func (marker *Marker) SetReverseTagDelimiter(delim byte) {
marker.Reverse_tag_delimiter = delim
}
func (marker *Marker) SetTagDelimiter(delim byte) {
marker.SetForwardTagDelimiter(delim)
marker.SetReverseTagDelimiter(delim)
}

View File

@ -7,7 +7,6 @@ import (
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiapat"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
)
@ -28,7 +27,7 @@ type DemultiplexMatch struct {
}
func (library *NGSLibrary) Compile(maxError int, allowsIndel bool) error {
for primers, marker := range *library {
for primers, marker := range library.Markers {
err := marker.Compile(primers.Forward,
primers.Reverse,
maxError, allowsIndel)
@ -40,7 +39,7 @@ func (library *NGSLibrary) Compile(maxError int, allowsIndel bool) error {
}
func (library *NGSLibrary) Match(sequence *obiseq.BioSequence) *DemultiplexMatch {
for primers, marker := range *library {
for primers, marker := range library.Markers {
m := marker.Match(sequence)
if m != nil {
m.ForwardPrimer = strings.ToLower(primers.Forward)
@ -57,203 +56,6 @@ func (library *NGSLibrary) ExtractBarcode(sequence *obiseq.BioSequence, inplace
return match.ExtractBarcode(sequence, inplace)
}
func (marker *Marker) Compile(forward, reverse string, maxError int, allowsIndel bool) error {
var err error
marker.forward, err = obiapat.MakeApatPattern(forward, maxError, allowsIndel)
if err != nil {
return err
}
marker.reverse, err = obiapat.MakeApatPattern(reverse, maxError, allowsIndel)
if err != nil {
return err
}
marker.cforward, err = marker.forward.ReverseComplement()
if err != nil {
return err
}
marker.creverse, err = marker.reverse.ReverseComplement()
if err != nil {
return err
}
marker.taglength = 0
for tags := range marker.samples {
lf := len(tags.Forward)
lr := len(tags.Reverse)
l := lf
if lf == 0 {
l = lr
}
if lr != 0 && l != lr {
return fmt.Errorf("forward tag (%s) and reverse tag (%s) do not have the same length",
tags.Forward, tags.Reverse)
}
if marker.taglength != 0 && l != marker.taglength {
return fmt.Errorf("tag pair (%s,%s) is not compatible with a tag length of %d",
tags.Forward, tags.Reverse, marker.taglength)
} else {
marker.taglength = l
}
}
return nil
}
// Match finds the best matching demultiplex for a given sequence.
//
// Parameters:
//
// marker - a pointer to a Marker struct that contains the forward and reverse primers.
// sequence - a pointer to a BioSequence struct that represents the input sequence.
//
// Returns:
//
// A pointer to a DemultiplexMatch struct that contains the best matching demultiplex.
func (marker *Marker) Match(sequence *obiseq.BioSequence) *DemultiplexMatch {
aseq, _ := obiapat.MakeApatSequence(sequence, false)
start, end, nerr, matched := marker.forward.BestMatch(aseq, marker.taglength, -1)
if matched {
if start < 0 {
start = 0
}
if end > sequence.Len() {
end = sequence.Len()
}
sseq := sequence.String()
direct := sseq[start:end]
tagstart := max(start-marker.taglength, 0)
ftag := strings.ToLower(sseq[tagstart:start])
m := DemultiplexMatch{
ForwardMatch: direct,
ForwardTag: ftag,
BarcodeStart: end,
ForwardMismatches: nerr,
IsDirect: true,
Error: nil,
}
start, end, nerr, matched = marker.creverse.BestMatch(aseq, start, -1)
if matched {
// extracting primer matches
reverse, _ := sequence.Subsequence(start, end, false)
defer reverse.Recycle()
reverse = reverse.ReverseComplement(true)
endtag := min(end+marker.taglength, sequence.Len())
rtag, err := sequence.Subsequence(end, endtag, false)
defer rtag.Recycle()
srtag := ""
if err != nil {
rtag = nil
} else {
rtag.ReverseComplement(true)
srtag = strings.ToLower(rtag.String())
}
m.ReverseMatch = strings.ToLower(reverse.String())
m.ReverseMismatches = nerr
m.BarcodeEnd = start
m.ReverseTag = srtag
sample, ok := marker.samples[TagPair{ftag, srtag}]
if ok {
m.Pcr = sample
}
return &m
}
m.Error = fmt.Errorf("cannot locates reverse priming site")
return &m
}
//
// At this point the forward primer didn't match.
// We try now with the reverse primer
//
start, end, nerr, matched = marker.reverse.BestMatch(aseq, marker.taglength, -1)
if matched {
if start < 0 {
start = 0
}
if end > sequence.Len() {
end = sequence.Len()
}
sseq := sequence.String()
reverse := strings.ToLower(sseq[start:end])
tagstart := max(start-marker.taglength, 0)
rtag := strings.ToLower(sseq[tagstart:start])
m := DemultiplexMatch{
ReverseMatch: reverse,
ReverseTag: rtag,
BarcodeStart: end,
ReverseMismatches: nerr,
IsDirect: false,
Error: nil,
}
start, end, nerr, matched = marker.cforward.BestMatch(aseq, end, -1)
if matched {
direct, _ := sequence.Subsequence(start, end, false)
defer direct.Recycle()
direct = direct.ReverseComplement(true)
endtag := min(end+marker.taglength, sequence.Len())
ftag, err := sequence.Subsequence(end, endtag, false)
defer ftag.Recycle()
sftag := ""
if err != nil {
ftag = nil
} else {
ftag = ftag.ReverseComplement(true)
sftag = strings.ToLower(ftag.String())
}
m.ForwardMatch = direct.String()
m.ForwardTag = sftag
m.ForwardMismatches = nerr
m.BarcodeEnd = start
sample, ok := marker.samples[TagPair{sftag, rtag}]
if ok {
m.Pcr = sample
}
return &m
}
m.Error = fmt.Errorf("cannot locates forward priming site")
return &m
}
return nil
}
// ExtractBarcode extracts the barcode from the given biosequence.
//
// Parameters:

View File

@ -0,0 +1,186 @@
package obingslibrary
import (
"sort"
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiapat"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
)
type PrimerMatch struct {
Begin int
End int
Mismatches int
Marker int
Forward bool
}
type TagMatcher func(
sequence *obiseq.BioSequence,
begin, end int, forward bool) (TagPair, error)
// func (library *NGSLibrary) MakeTagMatcherFixedLength() TagMatcher {
// return func(sequence *obiseq.BioSequence, begin, end int, forward bool) (TagPair, error) {
// fb := 0
// fe := 0
// if forward {
// fb = begin - library.Forward_spacer - library.Forward_tag_length
// } else {
// fb = begin - library.Reverse_spacer - library.Reverse_tag_length
// }
// if fb < 0 {
// return TagPair{}, fmt.Errorf("begin too small")
// }
// if forward {
// fe = end + library.Reverse_tag_length + library.Reverse_spacer
// } else {
// fe = end + library.Forward_tag_length + library.Forward_spacer
// }
// if fe > len(sequence.String()) {
// return TagPair{}, fmt.Errorf("end too large")
// }
// ftag := sequence.String()[fb:begin]
// rtag, err := sequence.Subsequence(end, fe, true)
// if err != nil {
// return TagPair{}, fmt.Errorf("error in subsequence : %v", err)
// }
// return TagPair{Forward: ftag, Reverse: rtag.String()}, nil
// }
// }
func (library *NGSLibrary) ExtractMultiBarcode(sequence *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
i := 1
markers := make([]*Marker, len(library.Markers)+1)
primerseqs := make([]PrimerPair, len(library.Markers)+1)
matches := make([]PrimerMatch, 0, len(library.Markers)+1)
aseq, err := obiapat.MakeApatSequence(sequence, false)
if err != nil {
log.Fatalf("error in building apat sequence : %v\n", err)
}
for primers, marker := range library.Markers {
markers[i] = marker
primerseqs[i] = primers
locs := marker.forward.AllMatches(aseq, 0, -1)
if len(locs) > 0 {
for _, loc := range locs {
matches = append(matches, PrimerMatch{
Begin: loc[0],
End: loc[1],
Mismatches: loc[2],
Marker: i,
Forward: true,
})
}
locs = marker.creverse.AllMatches(aseq, locs[0][0]+1, -1)
if len(locs) > 0 {
for _, loc := range locs {
matches = append(matches, PrimerMatch{
Begin: loc[0],
End: loc[1],
Mismatches: loc[2],
Marker: -i,
Forward: true,
})
}
}
}
locs = marker.reverse.AllMatches(aseq, 0, -1)
if len(locs) > 0 {
for _, loc := range locs {
matches = append(matches, PrimerMatch{
Begin: loc[0],
End: loc[1],
Mismatches: loc[2],
Marker: i,
Forward: false,
})
}
locs = marker.cforward.AllMatches(aseq, locs[0][0]+1, -1)
if len(locs) > 0 {
for _, loc := range locs {
matches = append(matches, PrimerMatch{
Begin: loc[0],
End: loc[1],
Mismatches: loc[2],
Marker: -i,
Forward: false,
})
}
}
}
i++
}
if len(matches) > 0 {
sort.Slice(matches, func(i, j int) bool {
return matches[i].Begin < matches[j].Begin
})
state := 0
var from PrimerMatch
q := 0
for _, match := range matches {
switch state {
case 0:
if match.Marker > 0 {
from = match
state = 1
}
case 1:
if match.Marker == -from.Marker && match.Forward == from.Forward {
q++
log.Infof("%d -- %s [%s:%s] %s : %d -> %d mismatches : %d:%d",
q,
sequence.Id(),
primerseqs[from.Marker].Forward,
primerseqs[from.Marker].Reverse,
map[bool]string{true: "forward", false: "reverse"}[from.Forward],
from.End,
match.Begin-1,
from.Mismatches,
match.Mismatches,
)
state = 0
} else if match.Marker > 0 {
log.Warnf("Marker mismatch : %d %d", match.Marker, from.Marker)
from = match
} else {
log.Warnf("Marker mismatch : %d %d", match.Marker, from.Marker)
state = 0
}
}
}
}
return nil, nil
}
func (library *NGSLibrary) ExtractMultiBarcodeSliceWorker(options ...WithOption) obiseq.SeqSliceWorker {
opt := MakeOptions(options)
library.Compile(opt.AllowedMismatch(), opt.AllowsIndel())
worker := func(sequence *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
return library.ExtractMultiBarcode(sequence)
}
return obiseq.SeqToSliceWorker(worker, true)
}

View File

@ -1,7 +1,8 @@
package obingslibrary
import (
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiapat"
"fmt"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
)
@ -22,44 +23,112 @@ type PCR struct {
Annotations obiseq.Annotation
}
type Marker struct {
forward obiapat.ApatPattern
cforward obiapat.ApatPattern
reverse obiapat.ApatPattern
creverse obiapat.ApatPattern
taglength int
samples map[TagPair]*PCR
type NGSLibrary struct {
Matching string
Primers map[string]PrimerPair
Markers map[PrimerPair]*Marker
}
type NGSLibrary map[PrimerPair]*Marker
func MakeNGSLibrary() NGSLibrary {
return make(NGSLibrary, 10)
return NGSLibrary{
Primers: make(map[string]PrimerPair, 10),
Markers: make(map[PrimerPair]*Marker, 10),
}
}
func (library *NGSLibrary) GetMarker(forward, reverse string) (*Marker, bool) {
pair := PrimerPair{forward, reverse}
marker, ok := (*library)[pair]
marker, ok := (library.Markers)[pair]
if ok {
return marker, true
}
m := Marker{samples: make(map[TagPair]*PCR, 1000)}
(*library)[pair] = &m
m := Marker{
Forward_tag_length: 0,
Reverse_tag_length: 0,
Forward_spacer: 0,
Reverse_spacer: 0,
Forward_tag_delimiter: 0,
Reverse_tag_delimiter: 0,
samples: make(map[TagPair]*PCR, 1000),
}
(library.Markers)[pair] = &m
return &m, false
}
func (marker *Marker) GetPCR(forward, reverse string) (*PCR, bool) {
pair := TagPair{forward, reverse}
pcr, ok := marker.samples[pair]
func (library *NGSLibrary) SetForwardTagSpacer(spacer int) {
for _, marker := range library.Markers {
marker.SetForwardTagSpacer(spacer)
}
}
func (library *NGSLibrary) SetReverseTagSpacer(spacer int) {
for _, marker := range library.Markers {
marker.SetReverseTagSpacer(spacer)
}
}
func (library *NGSLibrary) SetTagSpacer(spacer int) {
library.SetForwardTagSpacer(spacer)
library.SetReverseTagSpacer(spacer)
}
func (library *NGSLibrary) SetTagSpacerFor(primer string, spacer int) {
primers, ok := library.Primers[primer]
if ok {
return pcr, ok
marker, ok := library.Markers[primers]
if ok {
if primer == primers.Forward {
marker.SetForwardTagSpacer(spacer)
} else {
marker.SetReverseTagSpacer(spacer)
}
}
}
ipcr := PCR{}
marker.samples[pair] = &ipcr
return &ipcr, false
}
func (library *NGSLibrary) SetForwardTagDelimiter(delim byte) {
for _, marker := range library.Markers {
marker.SetForwardTagDelimiter(delim)
}
}
func (library *NGSLibrary) SetReverseTagDelimiter(delim byte) {
for _, marker := range library.Markers {
marker.SetReverseTagDelimiter(delim)
}
}
func (library *NGSLibrary) SetTagDelimiter(delim byte) {
library.SetForwardTagDelimiter(delim)
library.SetReverseTagDelimiter(delim)
}
func (library *NGSLibrary) CheckTagLength() {
for _, marker := range library.Markers {
marker.CheckTagLength()
}
}
func (library *NGSLibrary) CheckPrimerUnicity() error {
for primers := range library.Markers {
if _, ok := library.Primers[primers.Forward]; ok {
return fmt.Errorf("forward primer %s is used more than once", primers.Forward)
}
if _, ok := library.Primers[primers.Reverse]; ok {
return fmt.Errorf("reverse primer %s is used more than once", primers.Reverse)
}
if primers.Forward == primers.Reverse {
return fmt.Errorf("forward and reverse primers are the same : %s", primers.Forward)
}
library.Primers[primers.Forward] = primers
library.Primers[primers.Reverse] = primers
}
return nil
}

View File

@ -139,7 +139,7 @@ func MakeOptions(setters []WithOption) Options {
return opt
}
func _ExtractBarcodeSlice(ngslibrary NGSLibrary,
func _ExtractBarcodeSlice(ngslibrary *NGSLibrary,
sequences obiseq.BioSequenceSlice,
options Options) obiseq.BioSequenceSlice {
newSlice := make(obiseq.BioSequenceSlice, 0, len(sequences))
@ -154,7 +154,7 @@ func _ExtractBarcodeSlice(ngslibrary NGSLibrary,
return newSlice
}
func ExtractBarcodeSlice(ngslibrary NGSLibrary,
func ExtractBarcodeSlice(ngslibrary *NGSLibrary,
sequences obiseq.BioSequenceSlice,
options ...WithOption) obiseq.BioSequenceSlice {
@ -165,7 +165,7 @@ func ExtractBarcodeSlice(ngslibrary NGSLibrary,
return _ExtractBarcodeSlice(ngslibrary, sequences, opt)
}
func ExtractBarcodeSliceWorker(ngslibrary NGSLibrary,
func ExtractBarcodeSliceWorker(ngslibrary *NGSLibrary,
options ...WithOption) obiseq.SeqSliceWorker {
opt := MakeOptions(options)