mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-12-08 16:50:27 +00:00
Plenty of small bugs
Former-commit-id: 42c7fab7d65906c80ab4cd32da6867ff21842ea8
This commit is contained in:
186
pkg/obingslibrary/multimatch.go
Normal file
186
pkg/obingslibrary/multimatch.go
Normal file
@@ -0,0 +1,186 @@
|
||||
package obingslibrary
|
||||
|
||||
import (
|
||||
"sort"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiapat"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||
)
|
||||
|
||||
type PrimerMatch struct {
|
||||
Begin int
|
||||
End int
|
||||
Mismatches int
|
||||
Marker int
|
||||
Forward bool
|
||||
}
|
||||
|
||||
type TagMatcher func(
|
||||
sequence *obiseq.BioSequence,
|
||||
begin, end int, forward bool) (TagPair, error)
|
||||
|
||||
// func (library *NGSLibrary) MakeTagMatcherFixedLength() TagMatcher {
|
||||
// return func(sequence *obiseq.BioSequence, begin, end int, forward bool) (TagPair, error) {
|
||||
// fb := 0
|
||||
// fe := 0
|
||||
// if forward {
|
||||
// fb = begin - library.Forward_spacer - library.Forward_tag_length
|
||||
// } else {
|
||||
// fb = begin - library.Reverse_spacer - library.Reverse_tag_length
|
||||
// }
|
||||
|
||||
// if fb < 0 {
|
||||
// return TagPair{}, fmt.Errorf("begin too small")
|
||||
// }
|
||||
// if forward {
|
||||
// fe = end + library.Reverse_tag_length + library.Reverse_spacer
|
||||
// } else {
|
||||
// fe = end + library.Forward_tag_length + library.Forward_spacer
|
||||
// }
|
||||
|
||||
// if fe > len(sequence.String()) {
|
||||
// return TagPair{}, fmt.Errorf("end too large")
|
||||
// }
|
||||
|
||||
// ftag := sequence.String()[fb:begin]
|
||||
// rtag, err := sequence.Subsequence(end, fe, true)
|
||||
|
||||
// if err != nil {
|
||||
// return TagPair{}, fmt.Errorf("error in subsequence : %v", err)
|
||||
// }
|
||||
|
||||
// return TagPair{Forward: ftag, Reverse: rtag.String()}, nil
|
||||
// }
|
||||
|
||||
// }
|
||||
|
||||
func (library *NGSLibrary) ExtractMultiBarcode(sequence *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
|
||||
i := 1
|
||||
markers := make([]*Marker, len(library.Markers)+1)
|
||||
primerseqs := make([]PrimerPair, len(library.Markers)+1)
|
||||
matches := make([]PrimerMatch, 0, len(library.Markers)+1)
|
||||
aseq, err := obiapat.MakeApatSequence(sequence, false)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("error in building apat sequence : %v\n", err)
|
||||
}
|
||||
|
||||
for primers, marker := range library.Markers {
|
||||
markers[i] = marker
|
||||
primerseqs[i] = primers
|
||||
locs := marker.forward.AllMatches(aseq, 0, -1)
|
||||
if len(locs) > 0 {
|
||||
for _, loc := range locs {
|
||||
matches = append(matches, PrimerMatch{
|
||||
Begin: loc[0],
|
||||
End: loc[1],
|
||||
Mismatches: loc[2],
|
||||
Marker: i,
|
||||
Forward: true,
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
locs = marker.creverse.AllMatches(aseq, locs[0][0]+1, -1)
|
||||
|
||||
if len(locs) > 0 {
|
||||
for _, loc := range locs {
|
||||
matches = append(matches, PrimerMatch{
|
||||
Begin: loc[0],
|
||||
End: loc[1],
|
||||
Mismatches: loc[2],
|
||||
Marker: -i,
|
||||
Forward: true,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
locs = marker.reverse.AllMatches(aseq, 0, -1)
|
||||
if len(locs) > 0 {
|
||||
for _, loc := range locs {
|
||||
matches = append(matches, PrimerMatch{
|
||||
Begin: loc[0],
|
||||
End: loc[1],
|
||||
Mismatches: loc[2],
|
||||
Marker: i,
|
||||
Forward: false,
|
||||
})
|
||||
}
|
||||
|
||||
locs = marker.cforward.AllMatches(aseq, locs[0][0]+1, -1)
|
||||
|
||||
if len(locs) > 0 {
|
||||
for _, loc := range locs {
|
||||
matches = append(matches, PrimerMatch{
|
||||
Begin: loc[0],
|
||||
End: loc[1],
|
||||
Mismatches: loc[2],
|
||||
Marker: -i,
|
||||
Forward: false,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
i++
|
||||
}
|
||||
|
||||
if len(matches) > 0 {
|
||||
sort.Slice(matches, func(i, j int) bool {
|
||||
return matches[i].Begin < matches[j].Begin
|
||||
})
|
||||
|
||||
state := 0
|
||||
var from PrimerMatch
|
||||
q := 0
|
||||
for _, match := range matches {
|
||||
|
||||
switch state {
|
||||
case 0:
|
||||
if match.Marker > 0 {
|
||||
from = match
|
||||
state = 1
|
||||
}
|
||||
|
||||
case 1:
|
||||
if match.Marker == -from.Marker && match.Forward == from.Forward {
|
||||
q++
|
||||
log.Infof("%d -- %s [%s:%s] %s : %d -> %d mismatches : %d:%d",
|
||||
q,
|
||||
sequence.Id(),
|
||||
primerseqs[from.Marker].Forward,
|
||||
primerseqs[from.Marker].Reverse,
|
||||
map[bool]string{true: "forward", false: "reverse"}[from.Forward],
|
||||
from.End,
|
||||
match.Begin-1,
|
||||
from.Mismatches,
|
||||
match.Mismatches,
|
||||
)
|
||||
state = 0
|
||||
} else if match.Marker > 0 {
|
||||
log.Warnf("Marker mismatch : %d %d", match.Marker, from.Marker)
|
||||
from = match
|
||||
} else {
|
||||
log.Warnf("Marker mismatch : %d %d", match.Marker, from.Marker)
|
||||
state = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (library *NGSLibrary) ExtractMultiBarcodeSliceWorker(options ...WithOption) obiseq.SeqSliceWorker {
|
||||
opt := MakeOptions(options)
|
||||
|
||||
library.Compile(opt.AllowedMismatch(), opt.AllowsIndel())
|
||||
|
||||
worker := func(sequence *obiseq.BioSequence) (obiseq.BioSequenceSlice, error) {
|
||||
return library.ExtractMultiBarcode(sequence)
|
||||
}
|
||||
|
||||
return obiseq.SeqToSliceWorker(worker, true)
|
||||
}
|
||||
Reference in New Issue
Block a user