mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Adds the command obimultiplex
This commit is contained in:
@ -5,7 +5,7 @@ import (
|
|||||||
|
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obipcr"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obimultiplex"
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
@ -24,11 +24,11 @@ func main() {
|
|||||||
// trace.Start(ftrace)
|
// trace.Start(ftrace)
|
||||||
// defer trace.Stop()
|
// defer trace.Stop()
|
||||||
|
|
||||||
optionParser := obioptions.GenerateOptionParser(obipcr.OptionSet)
|
optionParser := obioptions.GenerateOptionParser(obimultiplex.OptionSet)
|
||||||
|
|
||||||
_, args, _ := optionParser(os.Args)
|
_, args, _ := optionParser(os.Args)
|
||||||
|
|
||||||
sequences, _ := obiconvert.ReadBioSequencesBatch(args...)
|
sequences, _ := obiconvert.ReadBioSequencesBatch(args...)
|
||||||
amplicons, _ := obipcr.PCR(sequences)
|
amplicons, _ := obimultiplex.IExtractBarcodeBatches(sequences)
|
||||||
obiconvert.WriteBioSequencesBatch(amplicons, true)
|
obiconvert.WriteBioSequencesBatch(amplicons, true)
|
||||||
}
|
}
|
||||||
|
@ -39,7 +39,7 @@ func main() {
|
|||||||
obipairing.MinOverlap(),
|
obipairing.MinOverlap(),
|
||||||
obipairing.MinIdentity(),
|
obipairing.MinIdentity(),
|
||||||
obipairing.WithStats(),
|
obipairing.WithStats(),
|
||||||
obioptions.ParallelWorkers(),
|
obioptions.CLIParallelWorkers(),
|
||||||
)
|
)
|
||||||
obiconvert.WriteBioSequencesBatch(paired, true)
|
obiconvert.WriteBioSequencesBatch(paired, true)
|
||||||
}
|
}
|
||||||
|
@ -52,7 +52,18 @@ func main() {
|
|||||||
|
|
||||||
file, _ := os.Open("sample/wolf_diet_ngsfilter.txt")
|
file, _ := os.Open("sample/wolf_diet_ngsfilter.txt")
|
||||||
xxx, _ := obiformats.ReadNGSFilter(file)
|
xxx, _ := obiformats.ReadNGSFilter(file)
|
||||||
|
xxx.Compile(2)
|
||||||
|
fmt.Printf("%v\n==================\n", xxx)
|
||||||
|
|
||||||
fmt.Println(xxx)
|
for pp, m := range xxx {
|
||||||
|
fmt.Printf("%v %v\n", pp, *m)
|
||||||
|
}
|
||||||
|
|
||||||
|
seqfile, _ := obiformats.ReadFastSeqFromFile("xxxx.fastq")
|
||||||
|
|
||||||
|
for seqfile.Next() {
|
||||||
|
seq := seqfile.Get()
|
||||||
|
barcode, _ := xxx.ExtractBarcode(seq, true)
|
||||||
|
fmt.Println(obiformats.FormatFasta(barcode, obiformats.FormatFastSeqOBIHeader))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -6,29 +6,10 @@ import (
|
|||||||
"io"
|
"io"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obingslibrary"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
)
|
)
|
||||||
|
|
||||||
type PrimerPair struct {
|
|
||||||
Forward string
|
|
||||||
Reverse string
|
|
||||||
}
|
|
||||||
|
|
||||||
type TagPair struct {
|
|
||||||
Forward string
|
|
||||||
Reverse string
|
|
||||||
}
|
|
||||||
|
|
||||||
type PCR struct {
|
|
||||||
Experiment string
|
|
||||||
Sample string
|
|
||||||
Partial bool
|
|
||||||
Annotations obiseq.Annotation
|
|
||||||
}
|
|
||||||
|
|
||||||
type PCRs map[TagPair]PCR
|
|
||||||
type NGSFilter map[PrimerPair]PCRs
|
|
||||||
|
|
||||||
func _readLines(reader io.Reader) []string {
|
func _readLines(reader io.Reader) []string {
|
||||||
r := bufio.NewReader(reader)
|
r := bufio.NewReader(reader)
|
||||||
bytes := []byte{}
|
bytes := []byte{}
|
||||||
@ -53,12 +34,15 @@ func _readLines(reader io.Reader) []string {
|
|||||||
return lines
|
return lines
|
||||||
}
|
}
|
||||||
|
|
||||||
func _parseMainNGSFilterTags(text string) TagPair {
|
func _parseMainNGSFilterTags(text string) obingslibrary.TagPair {
|
||||||
|
|
||||||
tags := strings.Split(text, ":")
|
tags := strings.Split(text, ":")
|
||||||
|
|
||||||
if len(tags) == 1 {
|
if len(tags) == 1 {
|
||||||
return TagPair{tags[0], tags[0]}
|
return obingslibrary.TagPair{
|
||||||
|
Forward: tags[0],
|
||||||
|
Reverse: tags[0],
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if tags[0] == "-" {
|
if tags[0] == "-" {
|
||||||
@ -69,28 +53,34 @@ func _parseMainNGSFilterTags(text string) TagPair {
|
|||||||
tags[1] = ""
|
tags[1] = ""
|
||||||
}
|
}
|
||||||
|
|
||||||
return TagPair{tags[0], tags[1]}
|
return obingslibrary.TagPair{
|
||||||
|
Forward: tags[0],
|
||||||
|
Reverse: tags[1],
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func _parseMainNGSFilter(text string) (PrimerPair, TagPair, string, string, bool) {
|
func _parseMainNGSFilter(text string) (obingslibrary.PrimerPair, obingslibrary.TagPair, string, string, bool) {
|
||||||
fields := strings.Fields(text)
|
fields := strings.Fields(text)
|
||||||
|
|
||||||
tags := _parseMainNGSFilterTags(fields[2])
|
tags := _parseMainNGSFilterTags(fields[2])
|
||||||
partial := fields[5] == "T" || fields[5] == "t"
|
partial := fields[5] == "T" || fields[5] == "t"
|
||||||
|
|
||||||
return PrimerPair{fields[3], fields[4]},
|
return obingslibrary.PrimerPair{
|
||||||
|
Forward: fields[3],
|
||||||
|
Reverse: fields[4],
|
||||||
|
},
|
||||||
tags,
|
tags,
|
||||||
fields[0],
|
fields[0],
|
||||||
fields[1],
|
fields[1],
|
||||||
partial
|
partial
|
||||||
}
|
}
|
||||||
|
|
||||||
func ReadNGSFilter(reader io.Reader) (NGSFilter, error) {
|
func ReadNGSFilter(reader io.Reader) (obingslibrary.NGSLibrary, error) {
|
||||||
ngsfilter := make(NGSFilter, 10)
|
ngsfilter := obingslibrary.MakeNGSLibrary()
|
||||||
|
|
||||||
lines := _readLines(reader)
|
lines := _readLines(reader)
|
||||||
|
|
||||||
for _, line := range lines {
|
for i, line := range lines {
|
||||||
line = strings.TrimSpace(line)
|
line = strings.TrimSpace(line)
|
||||||
|
|
||||||
if strings.HasPrefix(line, "#") || len(line) == 0 {
|
if strings.HasPrefix(line, "#") || len(line) == 0 {
|
||||||
@ -100,33 +90,25 @@ func ReadNGSFilter(reader io.Reader) (NGSFilter, error) {
|
|||||||
split := strings.SplitN(line, "@", 2)
|
split := strings.SplitN(line, "@", 2)
|
||||||
|
|
||||||
primers, tags, experiment, sample, partial := _parseMainNGSFilter(split[0])
|
primers, tags, experiment, sample, partial := _parseMainNGSFilter(split[0])
|
||||||
newPCR := PCR{
|
|
||||||
Experiment: experiment,
|
|
||||||
Sample: sample,
|
|
||||||
Partial: partial,
|
|
||||||
Annotations: nil,
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(split) > 1 && len(split[1]) > 0 {
|
marker, _ := ngsfilter.GetMarker(primers.Forward, primers.Reverse)
|
||||||
newPCR.Annotations = obiseq.GetAnnotation()
|
pcr, ok := marker.GetPCR(tags.Forward, tags.Reverse)
|
||||||
ParseOBIFeatures(split[1], newPCR.Annotations)
|
|
||||||
}
|
|
||||||
|
|
||||||
samples, ok := ngsfilter[primers]
|
|
||||||
|
|
||||||
if ok {
|
if ok {
|
||||||
pcr, ok := samples[tags]
|
return ngsfilter,
|
||||||
|
fmt.Errorf("line %d : tag pair (%s,%s) used more than once with marker (%s,%s)",
|
||||||
if ok {
|
i, tags.Forward, tags.Reverse, primers.Forward, primers.Reverse)
|
||||||
return nil, fmt.Errorf("pair of tags %v used for samples %s in %s and %s in %s",
|
|
||||||
tags, sample, experiment, pcr.Sample, pcr.Experiment)
|
|
||||||
}
|
|
||||||
|
|
||||||
samples[tags] = newPCR
|
|
||||||
} else {
|
|
||||||
ngsfilter[primers] = make(PCRs, 1000)
|
|
||||||
ngsfilter[primers][tags] = newPCR
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pcr.Experiment = experiment
|
||||||
|
pcr.Sample = sample
|
||||||
|
pcr.Partial = partial
|
||||||
|
|
||||||
|
if len(split) > 1 && len(split[1]) > 0 {
|
||||||
|
pcr.Annotations = make(obiseq.Annotation)
|
||||||
|
ParseOBIFeatures(split[1], pcr.Annotations)
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return ngsfilter, nil
|
return ngsfilter, nil
|
||||||
|
284
pkg/obingslibrary/match.go
Normal file
284
pkg/obingslibrary/match.go
Normal file
@ -0,0 +1,284 @@
|
|||||||
|
package obingslibrary
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"log"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiapat"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
|
)
|
||||||
|
|
||||||
|
type DemultiplexMatch struct {
|
||||||
|
ForwardMatch string
|
||||||
|
ReverseMatch string
|
||||||
|
ForwardTag string
|
||||||
|
ReverseTag string
|
||||||
|
BarcodeStart int
|
||||||
|
BarcodeEnd int
|
||||||
|
ForwardMismatches int
|
||||||
|
ReverseMismatches int
|
||||||
|
IsDirect bool
|
||||||
|
Pcr *PCR
|
||||||
|
ForwardPrimer string
|
||||||
|
ReversePrimer string
|
||||||
|
Error error
|
||||||
|
}
|
||||||
|
|
||||||
|
func (library *NGSLibrary) Compile(maxError int) error {
|
||||||
|
for primers, marker := range *library {
|
||||||
|
err := marker.Compile(primers.Forward,
|
||||||
|
primers.Reverse,
|
||||||
|
maxError)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (library *NGSLibrary) Match(sequence obiseq.BioSequence) *DemultiplexMatch {
|
||||||
|
for primers, marker := range *library {
|
||||||
|
m := marker.Match(sequence)
|
||||||
|
if m != nil {
|
||||||
|
m.ForwardPrimer = strings.ToLower(primers.Forward)
|
||||||
|
m.ReversePrimer = strings.ToLower(primers.Reverse)
|
||||||
|
return m
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (library *NGSLibrary) ExtractBarcode(sequence obiseq.BioSequence, inplace bool) (obiseq.BioSequence, error) {
|
||||||
|
match := library.Match(sequence)
|
||||||
|
return match.ExtractBarcode(sequence, inplace)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (marker *Marker) Compile(forward, reverse string, maxError int) error {
|
||||||
|
var err error
|
||||||
|
marker.forward, err = obiapat.MakeApatPattern(forward,
|
||||||
|
maxError)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
marker.reverse, err = obiapat.MakeApatPattern(reverse,
|
||||||
|
maxError)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
marker.cforward, err = marker.forward.ReverseComplement()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
marker.creverse, err = marker.reverse.ReverseComplement()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
marker.taglength = 0
|
||||||
|
for tags := range marker.samples {
|
||||||
|
lf := len(tags.Forward)
|
||||||
|
lr := len(tags.Reverse)
|
||||||
|
|
||||||
|
l := lf
|
||||||
|
if lf == 0 {
|
||||||
|
l = lr
|
||||||
|
}
|
||||||
|
|
||||||
|
if lr != 0 && l != lr {
|
||||||
|
return fmt.Errorf("forward tag (%s) and reverse tag (%s) do not have the same length",
|
||||||
|
tags.Forward, tags.Reverse)
|
||||||
|
}
|
||||||
|
|
||||||
|
if marker.taglength != 0 && l != marker.taglength {
|
||||||
|
return fmt.Errorf("tag pair (%s,%s) is not compatible with a tag length of %d",
|
||||||
|
tags.Forward, tags.Reverse, marker.taglength)
|
||||||
|
} else {
|
||||||
|
marker.taglength = l
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (marker *Marker) Match(sequence obiseq.BioSequence) *DemultiplexMatch {
|
||||||
|
aseq, _ := obiapat.MakeApatSequence(sequence, false)
|
||||||
|
match := marker.forward.FindAllIndex(aseq, marker.taglength)
|
||||||
|
|
||||||
|
if len(match) > 0 {
|
||||||
|
sseq := sequence.String()
|
||||||
|
direct := sseq[match[0][0]:match[0][1]]
|
||||||
|
ftag := sseq[(match[0][0] - marker.taglength):match[0][0]]
|
||||||
|
|
||||||
|
m := DemultiplexMatch{
|
||||||
|
ForwardMatch: direct,
|
||||||
|
ForwardTag: ftag,
|
||||||
|
BarcodeStart: match[0][1],
|
||||||
|
ForwardMismatches: match[0][2],
|
||||||
|
IsDirect: true,
|
||||||
|
Error: nil,
|
||||||
|
}
|
||||||
|
|
||||||
|
rmatch := marker.creverse.FindAllIndex(aseq, match[0][1])
|
||||||
|
|
||||||
|
if len(rmatch) > 0 {
|
||||||
|
|
||||||
|
// extracting primer matches
|
||||||
|
reverse, _ := sequence.Subsequence(rmatch[0][0], rmatch[0][1], false)
|
||||||
|
defer reverse.Recycle()
|
||||||
|
reverse = reverse.ReverseComplement(true)
|
||||||
|
rtag, err := sequence.Subsequence(rmatch[0][1], rmatch[0][1]+marker.taglength, false)
|
||||||
|
defer rtag.Recycle()
|
||||||
|
srtag := ""
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
rtag = obiseq.NilBioSequence
|
||||||
|
} else {
|
||||||
|
rtag.ReverseComplement(true)
|
||||||
|
srtag = strings.ToLower(rtag.String())
|
||||||
|
}
|
||||||
|
|
||||||
|
m.ReverseMatch = strings.ToLower(reverse.String())
|
||||||
|
m.ReverseMismatches = rmatch[0][2]
|
||||||
|
m.BarcodeEnd = rmatch[0][0]
|
||||||
|
m.ReverseTag = srtag
|
||||||
|
|
||||||
|
sample, ok := marker.samples[TagPair{ftag, srtag}]
|
||||||
|
|
||||||
|
if ok {
|
||||||
|
m.Pcr = sample
|
||||||
|
}
|
||||||
|
|
||||||
|
return &m
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
err := fmt.Errorf("cannot locates reverse priming site")
|
||||||
|
m.Error = err
|
||||||
|
|
||||||
|
return &m
|
||||||
|
}
|
||||||
|
|
||||||
|
match = marker.reverse.FindAllIndex(aseq, marker.taglength)
|
||||||
|
|
||||||
|
if len(match) > 0 {
|
||||||
|
sseq := sequence.String()
|
||||||
|
|
||||||
|
reverse := strings.ToLower(sseq[match[0][0]:match[0][1]])
|
||||||
|
rtag := strings.ToLower(sseq[(match[0][0] - marker.taglength):match[0][0]])
|
||||||
|
|
||||||
|
m := DemultiplexMatch{
|
||||||
|
ReverseMatch: reverse,
|
||||||
|
ReverseTag: rtag,
|
||||||
|
BarcodeStart: match[0][1],
|
||||||
|
ReverseMismatches: match[0][2],
|
||||||
|
IsDirect: false,
|
||||||
|
Error: nil,
|
||||||
|
}
|
||||||
|
|
||||||
|
rmatch := marker.cforward.FindAllIndex(aseq, match[0][1])
|
||||||
|
|
||||||
|
if len(rmatch) > 0 {
|
||||||
|
|
||||||
|
direct, _ := sequence.Subsequence(rmatch[0][0], rmatch[0][1], false)
|
||||||
|
defer direct.Recycle()
|
||||||
|
direct = direct.ReverseComplement(true)
|
||||||
|
|
||||||
|
ftag, err := sequence.Subsequence(rmatch[0][1], rmatch[0][1]+marker.taglength, false)
|
||||||
|
defer ftag.Recycle()
|
||||||
|
sftag := ""
|
||||||
|
if err != nil {
|
||||||
|
ftag = obiseq.NilBioSequence
|
||||||
|
|
||||||
|
} else {
|
||||||
|
ftag = ftag.ReverseComplement(true)
|
||||||
|
sftag = ftag.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
m.ForwardMatch = direct.String()
|
||||||
|
m.ForwardTag = sftag
|
||||||
|
m.ReverseMismatches = rmatch[0][2]
|
||||||
|
m.BarcodeEnd = rmatch[0][0]
|
||||||
|
|
||||||
|
sample, ok := marker.samples[TagPair{sftag, rtag}]
|
||||||
|
|
||||||
|
if ok {
|
||||||
|
m.Pcr = sample
|
||||||
|
}
|
||||||
|
|
||||||
|
return &m
|
||||||
|
}
|
||||||
|
|
||||||
|
err := fmt.Errorf("cannot locates forward priming site")
|
||||||
|
m.Error = err
|
||||||
|
return &m
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (match *DemultiplexMatch) ExtractBarcode(sequence obiseq.BioSequence, inplace bool) (obiseq.BioSequence, error) {
|
||||||
|
if !inplace {
|
||||||
|
sequence = sequence.Copy()
|
||||||
|
}
|
||||||
|
|
||||||
|
if match == nil {
|
||||||
|
annot := sequence.Annotations()
|
||||||
|
annot["demultiplex_error"] = "cannot match any primer pair"
|
||||||
|
return sequence, errors.New("cannot match any primer pair")
|
||||||
|
}
|
||||||
|
|
||||||
|
if match.ForwardMatch != "" && match.ReverseMatch != "" {
|
||||||
|
var err error
|
||||||
|
sequence, err = sequence.Subsequence(match.BarcodeStart, match.BarcodeEnd, false)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("cannot extract sub sequence %d..%d %v", match.BarcodeStart, match.BarcodeEnd, *match)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !match.IsDirect {
|
||||||
|
sequence.ReverseComplement(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
annot := sequence.Annotations()
|
||||||
|
if annot == nil {
|
||||||
|
log.Fatalf("nil annot %v", sequence)
|
||||||
|
}
|
||||||
|
annot["forward_primer"] = match.ForwardPrimer
|
||||||
|
annot["reverse_primer"] = match.ReversePrimer
|
||||||
|
|
||||||
|
if match.IsDirect {
|
||||||
|
annot["direction"] = "direct"
|
||||||
|
} else {
|
||||||
|
annot["direction"] = "reverse"
|
||||||
|
}
|
||||||
|
|
||||||
|
if match.ForwardMatch != "" {
|
||||||
|
annot["forward_match"] = match.ForwardMatch
|
||||||
|
annot["forward_mismatches"] = match.ForwardMismatches
|
||||||
|
annot["forward_tag"] = match.ForwardTag
|
||||||
|
}
|
||||||
|
|
||||||
|
if match.ReverseMatch != "" {
|
||||||
|
annot["reverse_match"] = match.ReverseMatch
|
||||||
|
annot["reverse_mismatches"] = match.ReverseMismatches
|
||||||
|
annot["reverse_tag"] = match.ReverseTag
|
||||||
|
}
|
||||||
|
|
||||||
|
if match.Error != nil {
|
||||||
|
annot["demultiplex_error"] = fmt.Sprintf("%v", match.Error)
|
||||||
|
}
|
||||||
|
|
||||||
|
if match.Pcr != nil {
|
||||||
|
annot["sample"] = match.Pcr.Sample
|
||||||
|
annot["experiment"] = match.Pcr.Experiment
|
||||||
|
for k, val := range match.Pcr.Annotations {
|
||||||
|
annot[k] = val
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return sequence, match.Error
|
||||||
|
}
|
65
pkg/obingslibrary/ngslibrary.go
Normal file
65
pkg/obingslibrary/ngslibrary.go
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
package obingslibrary
|
||||||
|
|
||||||
|
import (
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiapat"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
|
)
|
||||||
|
|
||||||
|
type PrimerPair struct {
|
||||||
|
Forward string
|
||||||
|
Reverse string
|
||||||
|
}
|
||||||
|
|
||||||
|
type TagPair struct {
|
||||||
|
Forward string
|
||||||
|
Reverse string
|
||||||
|
}
|
||||||
|
|
||||||
|
type PCR struct {
|
||||||
|
Experiment string
|
||||||
|
Sample string
|
||||||
|
Partial bool
|
||||||
|
Annotations obiseq.Annotation
|
||||||
|
}
|
||||||
|
|
||||||
|
type Marker struct {
|
||||||
|
forward obiapat.ApatPattern
|
||||||
|
cforward obiapat.ApatPattern
|
||||||
|
reverse obiapat.ApatPattern
|
||||||
|
creverse obiapat.ApatPattern
|
||||||
|
taglength int
|
||||||
|
samples map[TagPair]*PCR
|
||||||
|
}
|
||||||
|
type NGSLibrary map[PrimerPair]*Marker
|
||||||
|
|
||||||
|
func MakeNGSLibrary() NGSLibrary {
|
||||||
|
return make(NGSLibrary, 10)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (library *NGSLibrary) GetMarker(forward, reverse string) (*Marker, bool) {
|
||||||
|
pair := PrimerPair{forward, reverse}
|
||||||
|
marker, ok := (*library)[pair]
|
||||||
|
|
||||||
|
if ok {
|
||||||
|
return marker, true
|
||||||
|
}
|
||||||
|
|
||||||
|
m := Marker{samples: make(map[TagPair]*PCR, 1000)}
|
||||||
|
(*library)[pair] = &m
|
||||||
|
|
||||||
|
return &m, false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (marker *Marker) GetPCR(forward, reverse string) (*PCR, bool) {
|
||||||
|
pair := TagPair{forward, reverse}
|
||||||
|
pcr, ok := marker.samples[pair]
|
||||||
|
|
||||||
|
if ok {
|
||||||
|
return pcr, ok
|
||||||
|
}
|
||||||
|
|
||||||
|
ipcr := PCR{}
|
||||||
|
marker.samples[pair] = &ipcr
|
||||||
|
|
||||||
|
return &ipcr, false
|
||||||
|
}
|
182
pkg/obingslibrary/worker.go
Normal file
182
pkg/obingslibrary/worker.go
Normal file
@ -0,0 +1,182 @@
|
|||||||
|
package obingslibrary
|
||||||
|
|
||||||
|
import "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
|
|
||||||
|
type _Options struct {
|
||||||
|
discardErrors bool
|
||||||
|
unidentified string
|
||||||
|
allowedMismatch int
|
||||||
|
withProgressBar bool
|
||||||
|
parallelWorkers int
|
||||||
|
batchSize int
|
||||||
|
bufferSize int
|
||||||
|
}
|
||||||
|
|
||||||
|
// Options stores a set of option usable by the
|
||||||
|
// PCR simulation algotithm.
|
||||||
|
type Options struct {
|
||||||
|
pointer *_Options
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithOption is the standard type for function
|
||||||
|
// declaring options.
|
||||||
|
type WithOption func(Options)
|
||||||
|
|
||||||
|
func OptionDiscardErrors(yes bool) WithOption {
|
||||||
|
f := WithOption(func(opt Options) {
|
||||||
|
opt.pointer.discardErrors = yes
|
||||||
|
})
|
||||||
|
|
||||||
|
return f
|
||||||
|
}
|
||||||
|
|
||||||
|
func OptionUnidentified(filename string) WithOption {
|
||||||
|
f := WithOption(func(opt Options) {
|
||||||
|
opt.pointer.unidentified = filename
|
||||||
|
})
|
||||||
|
|
||||||
|
return f
|
||||||
|
}
|
||||||
|
|
||||||
|
func OptionWithProgressBar(yes bool) WithOption {
|
||||||
|
f := WithOption(func(opt Options) {
|
||||||
|
opt.pointer.withProgressBar = yes
|
||||||
|
})
|
||||||
|
|
||||||
|
return f
|
||||||
|
}
|
||||||
|
|
||||||
|
func OptionAllowedMismatches(count int) WithOption {
|
||||||
|
f := WithOption(func(opt Options) {
|
||||||
|
opt.pointer.allowedMismatch = count
|
||||||
|
})
|
||||||
|
|
||||||
|
return f
|
||||||
|
}
|
||||||
|
|
||||||
|
// OptionBufferSize sets the requested channel
|
||||||
|
// buffer size.
|
||||||
|
func OptionBufferSize(size int) WithOption {
|
||||||
|
f := WithOption(func(opt Options) {
|
||||||
|
opt.pointer.bufferSize = size
|
||||||
|
})
|
||||||
|
|
||||||
|
return f
|
||||||
|
}
|
||||||
|
|
||||||
|
// OptionParallelWorkers sets how many search
|
||||||
|
// jobs will be run in parallel.
|
||||||
|
func OptionParallelWorkers(nworkers int) WithOption {
|
||||||
|
f := WithOption(func(opt Options) {
|
||||||
|
opt.pointer.parallelWorkers = nworkers
|
||||||
|
})
|
||||||
|
|
||||||
|
return f
|
||||||
|
}
|
||||||
|
|
||||||
|
// OptionBatchSize sets the requested sequence
|
||||||
|
// batch size.
|
||||||
|
func OptionBatchSize(size int) WithOption {
|
||||||
|
f := WithOption(func(opt Options) {
|
||||||
|
opt.pointer.batchSize = size
|
||||||
|
})
|
||||||
|
|
||||||
|
return f
|
||||||
|
}
|
||||||
|
|
||||||
|
func (options Options) DiscardErrors() bool {
|
||||||
|
return options.pointer.unidentified == "" || options.pointer.discardErrors
|
||||||
|
}
|
||||||
|
|
||||||
|
func (options Options) Unidentified() string {
|
||||||
|
return options.pointer.unidentified
|
||||||
|
}
|
||||||
|
|
||||||
|
func (options Options) AllowedMismatch() int {
|
||||||
|
return options.pointer.allowedMismatch
|
||||||
|
}
|
||||||
|
|
||||||
|
func (options Options) WithProgressBar() bool {
|
||||||
|
return options.pointer.withProgressBar
|
||||||
|
}
|
||||||
|
|
||||||
|
// BufferSize returns the size of the channel
|
||||||
|
// buffer specified by the options
|
||||||
|
func (options Options) BufferSize() int {
|
||||||
|
return options.pointer.bufferSize
|
||||||
|
}
|
||||||
|
|
||||||
|
// BatchSize returns the size of the
|
||||||
|
// sequence batch used by the PCR algorithm
|
||||||
|
func (options Options) BatchSize() int {
|
||||||
|
return options.pointer.batchSize
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParallelWorkers returns how many search
|
||||||
|
// jobs will be run in parallel.
|
||||||
|
func (options Options) ParallelWorkers() int {
|
||||||
|
return options.pointer.parallelWorkers
|
||||||
|
}
|
||||||
|
|
||||||
|
// MakeOptions buils a new default option set for
|
||||||
|
// the PCR simulation algoithm.
|
||||||
|
func MakeOptions(setters []WithOption) Options {
|
||||||
|
o := _Options{
|
||||||
|
discardErrors: true,
|
||||||
|
unidentified: "",
|
||||||
|
allowedMismatch: 0,
|
||||||
|
withProgressBar: false,
|
||||||
|
parallelWorkers: 4,
|
||||||
|
batchSize: 1000,
|
||||||
|
bufferSize: 100,
|
||||||
|
}
|
||||||
|
|
||||||
|
opt := Options{&o}
|
||||||
|
|
||||||
|
for _, set := range setters {
|
||||||
|
set(opt)
|
||||||
|
}
|
||||||
|
|
||||||
|
return opt
|
||||||
|
}
|
||||||
|
|
||||||
|
func _ExtractBarcodeSlice(ngslibrary NGSLibrary,
|
||||||
|
sequences obiseq.BioSequenceSlice,
|
||||||
|
options Options) obiseq.BioSequenceSlice {
|
||||||
|
newSlice := make(obiseq.BioSequenceSlice,0,len(sequences))
|
||||||
|
|
||||||
|
for _, seq := range sequences {
|
||||||
|
s, err := ngslibrary.ExtractBarcode(seq,true)
|
||||||
|
if err==nil || ! options.pointer.discardErrors {
|
||||||
|
newSlice = append(newSlice, s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return newSlice
|
||||||
|
}
|
||||||
|
|
||||||
|
func ExtractBarcodeSlice(ngslibrary NGSLibrary,
|
||||||
|
sequences obiseq.BioSequenceSlice,
|
||||||
|
options ...WithOption) obiseq.BioSequenceSlice {
|
||||||
|
|
||||||
|
opt := MakeOptions(options)
|
||||||
|
|
||||||
|
ngslibrary.Compile(opt.AllowedMismatch())
|
||||||
|
|
||||||
|
return _ExtractBarcodeSlice(ngslibrary, sequences, opt)
|
||||||
|
}
|
||||||
|
|
||||||
|
func ExtractBarcodeSliceWorker(ngslibrary NGSLibrary,
|
||||||
|
options ...WithOption) obiseq.SeqSliceWorker {
|
||||||
|
|
||||||
|
opt := MakeOptions(options)
|
||||||
|
|
||||||
|
ngslibrary.Compile(opt.AllowedMismatch())
|
||||||
|
|
||||||
|
worker := func(sequences obiseq.BioSequenceSlice) obiseq.BioSequenceSlice {
|
||||||
|
return _ExtractBarcodeSlice(ngslibrary, sequences, opt)
|
||||||
|
}
|
||||||
|
|
||||||
|
return worker
|
||||||
|
}
|
||||||
|
|
@ -41,23 +41,23 @@ func GenerateOptionParser(optionset ...func(*getoptions.GetOpt)) ArgumentParser
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Predicate indicating if the debug mode is activated.
|
// Predicate indicating if the debug mode is activated.
|
||||||
func IsDebugMode() bool {
|
func CLIIsDebugMode() bool {
|
||||||
return _Debug
|
return _Debug
|
||||||
}
|
}
|
||||||
|
|
||||||
// ParallelWorkers returns the number of parallel workers requested by
|
// CLIParallelWorkers returns the number of parallel workers requested by
|
||||||
// the command line option --workers|-w.
|
// the command line option --workers|-w.
|
||||||
func ParallelWorkers() int {
|
func CLIParallelWorkers() int {
|
||||||
return _ParallelWorkers
|
return _ParallelWorkers
|
||||||
}
|
}
|
||||||
|
|
||||||
// BufferSize returns the expeted channel buffer size for obitools
|
// CLIBufferSize returns the expeted channel buffer size for obitools
|
||||||
func BufferSize() int {
|
func CLIBufferSize() int {
|
||||||
return _BufferSize
|
return _BufferSize
|
||||||
}
|
}
|
||||||
|
|
||||||
// BatchSize returns the expeted size of the sequence batches
|
// CLIBatchSize returns the expeted size of the sequence batches
|
||||||
func BatchSize() int {
|
func CLIBatchSize() int {
|
||||||
return _BatchSize
|
return _BatchSize
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -61,11 +61,13 @@ func (sequence *BioSequence) Recycle() {
|
|||||||
|
|
||||||
pseq := sequence.sequence
|
pseq := sequence.sequence
|
||||||
|
|
||||||
RecycleSlice(pseq.sequence)
|
if pseq != nil {
|
||||||
RecycleSlice(pseq.feature)
|
RecycleSlice(pseq.sequence)
|
||||||
RecycleSlice(pseq.feature)
|
RecycleSlice(pseq.feature)
|
||||||
|
RecycleSlice(pseq.feature)
|
||||||
|
|
||||||
RecycleAnnotation(pseq.annotations)
|
RecycleAnnotation(pseq.annotations)
|
||||||
|
}
|
||||||
|
|
||||||
sequence.sequence = nil
|
sequence.sequence = nil
|
||||||
}
|
}
|
||||||
@ -132,9 +134,14 @@ func (s BioSequence) HasAnnotation() bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s BioSequence) Annotations() Annotation {
|
func (s BioSequence) Annotations() Annotation {
|
||||||
|
if s.sequence == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
if s.sequence.annotations == nil {
|
if s.sequence.annotations == nil {
|
||||||
s.sequence.annotations = GetAnnotation()
|
s.sequence.annotations = GetAnnotation()
|
||||||
}
|
}
|
||||||
|
|
||||||
return s.sequence.annotations
|
return s.sequence.annotations
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -36,10 +36,12 @@ var BioSequenceAnnotationPool = sync.Pool{
|
|||||||
}
|
}
|
||||||
|
|
||||||
func RecycleAnnotation(a Annotation) {
|
func RecycleAnnotation(a Annotation) {
|
||||||
for k := range a {
|
if a != nil {
|
||||||
delete(a, k)
|
for k := range a {
|
||||||
|
delete(a, k)
|
||||||
|
}
|
||||||
|
BioSequenceAnnotationPool.Put(&(a))
|
||||||
}
|
}
|
||||||
BioSequenceAnnotationPool.Put(&(a))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetAnnotation(values ...Annotation) Annotation {
|
func GetAnnotation(values ...Annotation) Annotation {
|
||||||
|
@ -81,14 +81,14 @@ func ReadBioSequencesBatch(filenames ...string) (obiseq.IBioSequenceBatch, error
|
|||||||
opts = append(opts, obiformats.OptionsFastSeqHeaderParser(obiformats.ParseGuessedFastSeqHeader))
|
opts = append(opts, obiformats.OptionsFastSeqHeaderParser(obiformats.ParseGuessedFastSeqHeader))
|
||||||
}
|
}
|
||||||
|
|
||||||
nworkers := obioptions.ParallelWorkers() / 4
|
nworkers := obioptions.CLIParallelWorkers() / 4
|
||||||
if nworkers < 2 {
|
if nworkers < 2 {
|
||||||
nworkers = 2
|
nworkers = 2
|
||||||
}
|
}
|
||||||
|
|
||||||
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers))
|
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers))
|
||||||
opts = append(opts, obiformats.OptionsBufferSize(obioptions.BufferSize()))
|
opts = append(opts, obiformats.OptionsBufferSize(obioptions.CLIBufferSize()))
|
||||||
opts = append(opts, obiformats.OptionsBatchSize(obioptions.BatchSize()))
|
opts = append(opts, obiformats.OptionsBatchSize(obioptions.CLIBatchSize()))
|
||||||
|
|
||||||
opts = append(opts, obiformats.OptionsQualityShift(InputQualityShift()))
|
opts = append(opts, obiformats.OptionsQualityShift(InputQualityShift()))
|
||||||
|
|
||||||
|
@ -24,14 +24,14 @@ func WriteBioSequences(iterator obiseq.IBioSequence, filenames ...string) error
|
|||||||
opts = append(opts, obiformats.OptionsFastSeqHeaderFormat(obiformats.FormatFastSeqJsonHeader))
|
opts = append(opts, obiformats.OptionsFastSeqHeaderFormat(obiformats.FormatFastSeqJsonHeader))
|
||||||
}
|
}
|
||||||
|
|
||||||
nworkers := obioptions.ParallelWorkers() / 4
|
nworkers := obioptions.CLIParallelWorkers() / 4
|
||||||
if nworkers < 2 {
|
if nworkers < 2 {
|
||||||
nworkers = 2
|
nworkers = 2
|
||||||
}
|
}
|
||||||
|
|
||||||
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers))
|
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers))
|
||||||
opts = append(opts, obiformats.OptionsBufferSize(obioptions.BufferSize()))
|
opts = append(opts, obiformats.OptionsBufferSize(obioptions.CLIBufferSize()))
|
||||||
opts = append(opts, obiformats.OptionsBatchSize(obioptions.BatchSize()))
|
opts = append(opts, obiformats.OptionsBatchSize(obioptions.CLIBatchSize()))
|
||||||
|
|
||||||
opts = append(opts, obiformats.OptionsQualityShift(OutputQualityShift()))
|
opts = append(opts, obiformats.OptionsQualityShift(OutputQualityShift()))
|
||||||
|
|
||||||
@ -84,14 +84,14 @@ func WriteBioSequencesBatch(iterator obiseq.IBioSequenceBatch,
|
|||||||
opts = append(opts, obiformats.OptionsFastSeqHeaderFormat(obiformats.FormatFastSeqJsonHeader))
|
opts = append(opts, obiformats.OptionsFastSeqHeaderFormat(obiformats.FormatFastSeqJsonHeader))
|
||||||
}
|
}
|
||||||
|
|
||||||
nworkers := obioptions.ParallelWorkers() / 4
|
nworkers := obioptions.CLIParallelWorkers() / 4
|
||||||
if nworkers < 2 {
|
if nworkers < 2 {
|
||||||
nworkers = 2
|
nworkers = 2
|
||||||
}
|
}
|
||||||
|
|
||||||
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers))
|
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers))
|
||||||
opts = append(opts, obiformats.OptionsBufferSize(obioptions.BufferSize()))
|
opts = append(opts, obiformats.OptionsBufferSize(obioptions.CLIBufferSize()))
|
||||||
opts = append(opts, obiformats.OptionsBatchSize(obioptions.BatchSize()))
|
opts = append(opts, obiformats.OptionsBatchSize(obioptions.CLIBatchSize()))
|
||||||
|
|
||||||
opts = append(opts, obiformats.OptionsQualityShift(OutputQualityShift()))
|
opts = append(opts, obiformats.OptionsQualityShift(OutputQualityShift()))
|
||||||
|
|
||||||
|
@ -1 +1,77 @@
|
|||||||
package obimultiplex
|
package obimultiplex
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obingslibrary"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
||||||
|
"github.com/DavidGamba/go-getoptions"
|
||||||
|
)
|
||||||
|
|
||||||
|
var _NGSFilterFile = ""
|
||||||
|
var _UnidentifiedFile = ""
|
||||||
|
var _AllowedMismatch = int(2)
|
||||||
|
var _ConservedError = false
|
||||||
|
|
||||||
|
// PCROptionSet defines every options related to a simulated PCR.
|
||||||
|
//
|
||||||
|
// The function adds to a CLI every options proposed to the user
|
||||||
|
// to tune the parametters of the PCR simulation algorithm.
|
||||||
|
//
|
||||||
|
// Parameters
|
||||||
|
//
|
||||||
|
// - option : is a pointer to a getoptions.GetOpt instance normaly
|
||||||
|
// produced by the
|
||||||
|
func MultiplexOptionSet(options *getoptions.GetOpt) {
|
||||||
|
options.StringVar(&_NGSFilterFile, "tag-list", _NGSFilterFile,
|
||||||
|
options.Alias("t"),
|
||||||
|
options.Required("You must provide a tag list file following the NGSFilter format"),
|
||||||
|
options.Description("File name of the NGSFilter file describing PCRs."))
|
||||||
|
|
||||||
|
options.BoolVar(&_ConservedError, "keep-errors", _ConservedError,
|
||||||
|
options.Description("Prints symbol counts."))
|
||||||
|
|
||||||
|
options.StringVar(&_UnidentifiedFile, "unidentified", _UnidentifiedFile,
|
||||||
|
options.Alias("u"),
|
||||||
|
options.Description("Filename used to store the sequences unassigned to any sample."))
|
||||||
|
|
||||||
|
options.IntVar(&_AllowedMismatch, "allowed-mismatches", _AllowedMismatch,
|
||||||
|
options.Alias("e"),
|
||||||
|
options.Description("Used to specify the number of errors allowed for matching primers."))
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func OptionSet(options *getoptions.GetOpt) {
|
||||||
|
obiconvert.OptionSet(options)
|
||||||
|
MultiplexOptionSet(options)
|
||||||
|
}
|
||||||
|
|
||||||
|
func CLIAllowedMismatch() int {
|
||||||
|
return _AllowedMismatch
|
||||||
|
}
|
||||||
|
|
||||||
|
func CLIUnidentifiedFileName() string {
|
||||||
|
return _UnidentifiedFile
|
||||||
|
}
|
||||||
|
|
||||||
|
func CLIConservedErrors() bool {
|
||||||
|
return _UnidentifiedFile != "" || _ConservedError
|
||||||
|
}
|
||||||
|
|
||||||
|
func CLINGSFIlter() (obingslibrary.NGSLibrary, error) {
|
||||||
|
file, err := os.Open(_NGSFilterFile)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("open file error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
ngsfiler, err := obiformats.ReadNGSFilter(file)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("NGSfilter reading file error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return ngsfiler, nil
|
||||||
|
}
|
||||||
|
Reference in New Issue
Block a user