From 8dbda68746fd1f9ea75bc8bc40fcc10b31ac93d9 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Tue, 1 Feb 2022 17:31:28 +0100 Subject: [PATCH] Adds the command obimultiplex --- cmd/obitools/obimultiplex/main.go | 6 +- cmd/obitools/obipairing/main.go | 2 +- cmd/test/main.go | 13 +- pkg/obiformats/ngsfilter_read.go | 84 +++--- pkg/obingslibrary/match.go | 284 +++++++++++++++++++++ pkg/obingslibrary/ngslibrary.go | 65 +++++ pkg/obingslibrary/worker.go | 182 +++++++++++++ pkg/obioptions/options.go | 14 +- pkg/obiseq/biosequence.go | 15 +- pkg/obiseq/pool.go | 8 +- pkg/obitools/obiconvert/sequence_reader.go | 6 +- pkg/obitools/obiconvert/sequence_writer.go | 12 +- pkg/obitools/obimultiplex/options.go | 76 ++++++ 13 files changed, 688 insertions(+), 79 deletions(-) create mode 100644 pkg/obingslibrary/match.go create mode 100644 pkg/obingslibrary/ngslibrary.go create mode 100644 pkg/obingslibrary/worker.go diff --git a/cmd/obitools/obimultiplex/main.go b/cmd/obitools/obimultiplex/main.go index 0cb8432..aa72cab 100644 --- a/cmd/obitools/obimultiplex/main.go +++ b/cmd/obitools/obimultiplex/main.go @@ -5,7 +5,7 @@ import ( "git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert" - "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obipcr" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obimultiplex" ) func main() { @@ -24,11 +24,11 @@ func main() { // trace.Start(ftrace) // defer trace.Stop() - optionParser := obioptions.GenerateOptionParser(obipcr.OptionSet) + optionParser := obioptions.GenerateOptionParser(obimultiplex.OptionSet) _, args, _ := optionParser(os.Args) sequences, _ := obiconvert.ReadBioSequencesBatch(args...) - amplicons, _ := obipcr.PCR(sequences) + amplicons, _ := obimultiplex.IExtractBarcodeBatches(sequences) obiconvert.WriteBioSequencesBatch(amplicons, true) } diff --git a/cmd/obitools/obipairing/main.go b/cmd/obitools/obipairing/main.go index ab28c6b..90eea2d 100644 --- a/cmd/obitools/obipairing/main.go +++ b/cmd/obitools/obipairing/main.go @@ -39,7 +39,7 @@ func main() { obipairing.MinOverlap(), obipairing.MinIdentity(), obipairing.WithStats(), - obioptions.ParallelWorkers(), + obioptions.CLIParallelWorkers(), ) obiconvert.WriteBioSequencesBatch(paired, true) } diff --git a/cmd/test/main.go b/cmd/test/main.go index 393bcc6..b328442 100644 --- a/cmd/test/main.go +++ b/cmd/test/main.go @@ -52,7 +52,18 @@ func main() { file, _ := os.Open("sample/wolf_diet_ngsfilter.txt") xxx, _ := obiformats.ReadNGSFilter(file) + xxx.Compile(2) + fmt.Printf("%v\n==================\n", xxx) - fmt.Println(xxx) + for pp, m := range xxx { + fmt.Printf("%v %v\n", pp, *m) + } + seqfile, _ := obiformats.ReadFastSeqFromFile("xxxx.fastq") + + for seqfile.Next() { + seq := seqfile.Get() + barcode, _ := xxx.ExtractBarcode(seq, true) + fmt.Println(obiformats.FormatFasta(barcode, obiformats.FormatFastSeqOBIHeader)) + } } diff --git a/pkg/obiformats/ngsfilter_read.go b/pkg/obiformats/ngsfilter_read.go index c7053f3..2ace74d 100644 --- a/pkg/obiformats/ngsfilter_read.go +++ b/pkg/obiformats/ngsfilter_read.go @@ -6,29 +6,10 @@ import ( "io" "strings" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obingslibrary" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" ) -type PrimerPair struct { - Forward string - Reverse string -} - -type TagPair struct { - Forward string - Reverse string -} - -type PCR struct { - Experiment string - Sample string - Partial bool - Annotations obiseq.Annotation -} - -type PCRs map[TagPair]PCR -type NGSFilter map[PrimerPair]PCRs - func _readLines(reader io.Reader) []string { r := bufio.NewReader(reader) bytes := []byte{} @@ -53,12 +34,15 @@ func _readLines(reader io.Reader) []string { return lines } -func _parseMainNGSFilterTags(text string) TagPair { +func _parseMainNGSFilterTags(text string) obingslibrary.TagPair { tags := strings.Split(text, ":") if len(tags) == 1 { - return TagPair{tags[0], tags[0]} + return obingslibrary.TagPair{ + Forward: tags[0], + Reverse: tags[0], + } } if tags[0] == "-" { @@ -69,28 +53,34 @@ func _parseMainNGSFilterTags(text string) TagPair { tags[1] = "" } - return TagPair{tags[0], tags[1]} + return obingslibrary.TagPair{ + Forward: tags[0], + Reverse: tags[1], + } } -func _parseMainNGSFilter(text string) (PrimerPair, TagPair, string, string, bool) { +func _parseMainNGSFilter(text string) (obingslibrary.PrimerPair, obingslibrary.TagPair, string, string, bool) { fields := strings.Fields(text) tags := _parseMainNGSFilterTags(fields[2]) partial := fields[5] == "T" || fields[5] == "t" - return PrimerPair{fields[3], fields[4]}, + return obingslibrary.PrimerPair{ + Forward: fields[3], + Reverse: fields[4], + }, tags, fields[0], fields[1], partial } -func ReadNGSFilter(reader io.Reader) (NGSFilter, error) { - ngsfilter := make(NGSFilter, 10) +func ReadNGSFilter(reader io.Reader) (obingslibrary.NGSLibrary, error) { + ngsfilter := obingslibrary.MakeNGSLibrary() lines := _readLines(reader) - for _, line := range lines { + for i, line := range lines { line = strings.TrimSpace(line) if strings.HasPrefix(line, "#") || len(line) == 0 { @@ -100,33 +90,25 @@ func ReadNGSFilter(reader io.Reader) (NGSFilter, error) { split := strings.SplitN(line, "@", 2) primers, tags, experiment, sample, partial := _parseMainNGSFilter(split[0]) - newPCR := PCR{ - Experiment: experiment, - Sample: sample, - Partial: partial, - Annotations: nil, - } - if len(split) > 1 && len(split[1]) > 0 { - newPCR.Annotations = obiseq.GetAnnotation() - ParseOBIFeatures(split[1], newPCR.Annotations) - } - - samples, ok := ngsfilter[primers] + marker, _ := ngsfilter.GetMarker(primers.Forward, primers.Reverse) + pcr, ok := marker.GetPCR(tags.Forward, tags.Reverse) if ok { - pcr, ok := samples[tags] - - if ok { - return nil, fmt.Errorf("pair of tags %v used for samples %s in %s and %s in %s", - tags, sample, experiment, pcr.Sample, pcr.Experiment) - } - - samples[tags] = newPCR - } else { - ngsfilter[primers] = make(PCRs, 1000) - ngsfilter[primers][tags] = newPCR + return ngsfilter, + fmt.Errorf("line %d : tag pair (%s,%s) used more than once with marker (%s,%s)", + i, tags.Forward, tags.Reverse, primers.Forward, primers.Reverse) } + + pcr.Experiment = experiment + pcr.Sample = sample + pcr.Partial = partial + + if len(split) > 1 && len(split[1]) > 0 { + pcr.Annotations = make(obiseq.Annotation) + ParseOBIFeatures(split[1], pcr.Annotations) + } + } return ngsfilter, nil diff --git a/pkg/obingslibrary/match.go b/pkg/obingslibrary/match.go new file mode 100644 index 0000000..ed2ba42 --- /dev/null +++ b/pkg/obingslibrary/match.go @@ -0,0 +1,284 @@ +package obingslibrary + +import ( + "errors" + "fmt" + "log" + "strings" + + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiapat" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" +) + +type DemultiplexMatch struct { + ForwardMatch string + ReverseMatch string + ForwardTag string + ReverseTag string + BarcodeStart int + BarcodeEnd int + ForwardMismatches int + ReverseMismatches int + IsDirect bool + Pcr *PCR + ForwardPrimer string + ReversePrimer string + Error error +} + +func (library *NGSLibrary) Compile(maxError int) error { + for primers, marker := range *library { + err := marker.Compile(primers.Forward, + primers.Reverse, + maxError) + if err != nil { + return err + } + } + return nil +} + +func (library *NGSLibrary) Match(sequence obiseq.BioSequence) *DemultiplexMatch { + for primers, marker := range *library { + m := marker.Match(sequence) + if m != nil { + m.ForwardPrimer = strings.ToLower(primers.Forward) + m.ReversePrimer = strings.ToLower(primers.Reverse) + return m + } + } + return nil +} + +func (library *NGSLibrary) ExtractBarcode(sequence obiseq.BioSequence, inplace bool) (obiseq.BioSequence, error) { + match := library.Match(sequence) + return match.ExtractBarcode(sequence, inplace) +} + +func (marker *Marker) Compile(forward, reverse string, maxError int) error { + var err error + marker.forward, err = obiapat.MakeApatPattern(forward, + maxError) + if err != nil { + return err + } + marker.reverse, err = obiapat.MakeApatPattern(reverse, + maxError) + if err != nil { + return err + } + + marker.cforward, err = marker.forward.ReverseComplement() + if err != nil { + return err + } + marker.creverse, err = marker.reverse.ReverseComplement() + if err != nil { + return err + } + + marker.taglength = 0 + for tags := range marker.samples { + lf := len(tags.Forward) + lr := len(tags.Reverse) + + l := lf + if lf == 0 { + l = lr + } + + if lr != 0 && l != lr { + return fmt.Errorf("forward tag (%s) and reverse tag (%s) do not have the same length", + tags.Forward, tags.Reverse) + } + + if marker.taglength != 0 && l != marker.taglength { + return fmt.Errorf("tag pair (%s,%s) is not compatible with a tag length of %d", + tags.Forward, tags.Reverse, marker.taglength) + } else { + marker.taglength = l + } + } + + return nil +} + +func (marker *Marker) Match(sequence obiseq.BioSequence) *DemultiplexMatch { + aseq, _ := obiapat.MakeApatSequence(sequence, false) + match := marker.forward.FindAllIndex(aseq, marker.taglength) + + if len(match) > 0 { + sseq := sequence.String() + direct := sseq[match[0][0]:match[0][1]] + ftag := sseq[(match[0][0] - marker.taglength):match[0][0]] + + m := DemultiplexMatch{ + ForwardMatch: direct, + ForwardTag: ftag, + BarcodeStart: match[0][1], + ForwardMismatches: match[0][2], + IsDirect: true, + Error: nil, + } + + rmatch := marker.creverse.FindAllIndex(aseq, match[0][1]) + + if len(rmatch) > 0 { + + // extracting primer matches + reverse, _ := sequence.Subsequence(rmatch[0][0], rmatch[0][1], false) + defer reverse.Recycle() + reverse = reverse.ReverseComplement(true) + rtag, err := sequence.Subsequence(rmatch[0][1], rmatch[0][1]+marker.taglength, false) + defer rtag.Recycle() + srtag := "" + + if err != nil { + rtag = obiseq.NilBioSequence + } else { + rtag.ReverseComplement(true) + srtag = strings.ToLower(rtag.String()) + } + + m.ReverseMatch = strings.ToLower(reverse.String()) + m.ReverseMismatches = rmatch[0][2] + m.BarcodeEnd = rmatch[0][0] + m.ReverseTag = srtag + + sample, ok := marker.samples[TagPair{ftag, srtag}] + + if ok { + m.Pcr = sample + } + + return &m + + } + + err := fmt.Errorf("cannot locates reverse priming site") + m.Error = err + + return &m + } + + match = marker.reverse.FindAllIndex(aseq, marker.taglength) + + if len(match) > 0 { + sseq := sequence.String() + + reverse := strings.ToLower(sseq[match[0][0]:match[0][1]]) + rtag := strings.ToLower(sseq[(match[0][0] - marker.taglength):match[0][0]]) + + m := DemultiplexMatch{ + ReverseMatch: reverse, + ReverseTag: rtag, + BarcodeStart: match[0][1], + ReverseMismatches: match[0][2], + IsDirect: false, + Error: nil, + } + + rmatch := marker.cforward.FindAllIndex(aseq, match[0][1]) + + if len(rmatch) > 0 { + + direct, _ := sequence.Subsequence(rmatch[0][0], rmatch[0][1], false) + defer direct.Recycle() + direct = direct.ReverseComplement(true) + + ftag, err := sequence.Subsequence(rmatch[0][1], rmatch[0][1]+marker.taglength, false) + defer ftag.Recycle() + sftag := "" + if err != nil { + ftag = obiseq.NilBioSequence + + } else { + ftag = ftag.ReverseComplement(true) + sftag = ftag.String() + } + + m.ForwardMatch = direct.String() + m.ForwardTag = sftag + m.ReverseMismatches = rmatch[0][2] + m.BarcodeEnd = rmatch[0][0] + + sample, ok := marker.samples[TagPair{sftag, rtag}] + + if ok { + m.Pcr = sample + } + + return &m + } + + err := fmt.Errorf("cannot locates forward priming site") + m.Error = err + return &m + } + + return nil +} + +func (match *DemultiplexMatch) ExtractBarcode(sequence obiseq.BioSequence, inplace bool) (obiseq.BioSequence, error) { + if !inplace { + sequence = sequence.Copy() + } + + if match == nil { + annot := sequence.Annotations() + annot["demultiplex_error"] = "cannot match any primer pair" + return sequence, errors.New("cannot match any primer pair") + } + + if match.ForwardMatch != "" && match.ReverseMatch != "" { + var err error + sequence, err = sequence.Subsequence(match.BarcodeStart, match.BarcodeEnd, false) + + if err != nil { + log.Fatalf("cannot extract sub sequence %d..%d %v", match.BarcodeStart, match.BarcodeEnd, *match) + } + } + + if !match.IsDirect { + sequence.ReverseComplement(true) + } + + annot := sequence.Annotations() + if annot == nil { + log.Fatalf("nil annot %v", sequence) + } + annot["forward_primer"] = match.ForwardPrimer + annot["reverse_primer"] = match.ReversePrimer + + if match.IsDirect { + annot["direction"] = "direct" + } else { + annot["direction"] = "reverse" + } + + if match.ForwardMatch != "" { + annot["forward_match"] = match.ForwardMatch + annot["forward_mismatches"] = match.ForwardMismatches + annot["forward_tag"] = match.ForwardTag + } + + if match.ReverseMatch != "" { + annot["reverse_match"] = match.ReverseMatch + annot["reverse_mismatches"] = match.ReverseMismatches + annot["reverse_tag"] = match.ReverseTag + } + + if match.Error != nil { + annot["demultiplex_error"] = fmt.Sprintf("%v", match.Error) + } + + if match.Pcr != nil { + annot["sample"] = match.Pcr.Sample + annot["experiment"] = match.Pcr.Experiment + for k, val := range match.Pcr.Annotations { + annot[k] = val + } + } + + return sequence, match.Error +} diff --git a/pkg/obingslibrary/ngslibrary.go b/pkg/obingslibrary/ngslibrary.go new file mode 100644 index 0000000..8cb5785 --- /dev/null +++ b/pkg/obingslibrary/ngslibrary.go @@ -0,0 +1,65 @@ +package obingslibrary + +import ( + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiapat" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" +) + +type PrimerPair struct { + Forward string + Reverse string +} + +type TagPair struct { + Forward string + Reverse string +} + +type PCR struct { + Experiment string + Sample string + Partial bool + Annotations obiseq.Annotation +} + +type Marker struct { + forward obiapat.ApatPattern + cforward obiapat.ApatPattern + reverse obiapat.ApatPattern + creverse obiapat.ApatPattern + taglength int + samples map[TagPair]*PCR +} +type NGSLibrary map[PrimerPair]*Marker + +func MakeNGSLibrary() NGSLibrary { + return make(NGSLibrary, 10) +} + +func (library *NGSLibrary) GetMarker(forward, reverse string) (*Marker, bool) { + pair := PrimerPair{forward, reverse} + marker, ok := (*library)[pair] + + if ok { + return marker, true + } + + m := Marker{samples: make(map[TagPair]*PCR, 1000)} + (*library)[pair] = &m + + return &m, false +} + +func (marker *Marker) GetPCR(forward, reverse string) (*PCR, bool) { + pair := TagPair{forward, reverse} + pcr, ok := marker.samples[pair] + + if ok { + return pcr, ok + } + + ipcr := PCR{} + marker.samples[pair] = &ipcr + + return &ipcr, false +} diff --git a/pkg/obingslibrary/worker.go b/pkg/obingslibrary/worker.go new file mode 100644 index 0000000..76c3568 --- /dev/null +++ b/pkg/obingslibrary/worker.go @@ -0,0 +1,182 @@ +package obingslibrary + +import "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" + +type _Options struct { + discardErrors bool + unidentified string + allowedMismatch int + withProgressBar bool + parallelWorkers int + batchSize int + bufferSize int +} + +// Options stores a set of option usable by the +// PCR simulation algotithm. +type Options struct { + pointer *_Options +} + +// WithOption is the standard type for function +// declaring options. +type WithOption func(Options) + +func OptionDiscardErrors(yes bool) WithOption { + f := WithOption(func(opt Options) { + opt.pointer.discardErrors = yes + }) + + return f +} + +func OptionUnidentified(filename string) WithOption { + f := WithOption(func(opt Options) { + opt.pointer.unidentified = filename + }) + + return f +} + +func OptionWithProgressBar(yes bool) WithOption { + f := WithOption(func(opt Options) { + opt.pointer.withProgressBar = yes + }) + + return f +} + +func OptionAllowedMismatches(count int) WithOption { + f := WithOption(func(opt Options) { + opt.pointer.allowedMismatch = count + }) + + return f +} + +// OptionBufferSize sets the requested channel +// buffer size. +func OptionBufferSize(size int) WithOption { + f := WithOption(func(opt Options) { + opt.pointer.bufferSize = size + }) + + return f +} + +// OptionParallelWorkers sets how many search +// jobs will be run in parallel. +func OptionParallelWorkers(nworkers int) WithOption { + f := WithOption(func(opt Options) { + opt.pointer.parallelWorkers = nworkers + }) + + return f +} + +// OptionBatchSize sets the requested sequence +// batch size. +func OptionBatchSize(size int) WithOption { + f := WithOption(func(opt Options) { + opt.pointer.batchSize = size + }) + + return f +} + +func (options Options) DiscardErrors() bool { + return options.pointer.unidentified == "" || options.pointer.discardErrors +} + +func (options Options) Unidentified() string { + return options.pointer.unidentified +} + +func (options Options) AllowedMismatch() int { + return options.pointer.allowedMismatch +} + +func (options Options) WithProgressBar() bool { + return options.pointer.withProgressBar +} + +// BufferSize returns the size of the channel +// buffer specified by the options +func (options Options) BufferSize() int { + return options.pointer.bufferSize +} + +// BatchSize returns the size of the +// sequence batch used by the PCR algorithm +func (options Options) BatchSize() int { + return options.pointer.batchSize +} + +// ParallelWorkers returns how many search +// jobs will be run in parallel. +func (options Options) ParallelWorkers() int { + return options.pointer.parallelWorkers +} + +// MakeOptions buils a new default option set for +// the PCR simulation algoithm. +func MakeOptions(setters []WithOption) Options { + o := _Options{ + discardErrors: true, + unidentified: "", + allowedMismatch: 0, + withProgressBar: false, + parallelWorkers: 4, + batchSize: 1000, + bufferSize: 100, + } + + opt := Options{&o} + + for _, set := range setters { + set(opt) + } + + return opt +} + +func _ExtractBarcodeSlice(ngslibrary NGSLibrary, + sequences obiseq.BioSequenceSlice, + options Options) obiseq.BioSequenceSlice { + newSlice := make(obiseq.BioSequenceSlice,0,len(sequences)) + + for _, seq := range sequences { + s, err := ngslibrary.ExtractBarcode(seq,true) + if err==nil || ! options.pointer.discardErrors { + newSlice = append(newSlice, s) + } + } + + return newSlice +} + +func ExtractBarcodeSlice(ngslibrary NGSLibrary, + sequences obiseq.BioSequenceSlice, + options ...WithOption) obiseq.BioSequenceSlice { + + opt := MakeOptions(options) + + ngslibrary.Compile(opt.AllowedMismatch()) + + return _ExtractBarcodeSlice(ngslibrary, sequences, opt) +} + +func ExtractBarcodeSliceWorker(ngslibrary NGSLibrary, + options ...WithOption) obiseq.SeqSliceWorker { + + opt := MakeOptions(options) + + ngslibrary.Compile(opt.AllowedMismatch()) + + worker := func(sequences obiseq.BioSequenceSlice) obiseq.BioSequenceSlice { + return _ExtractBarcodeSlice(ngslibrary, sequences, opt) + } + + return worker +} + diff --git a/pkg/obioptions/options.go b/pkg/obioptions/options.go index 95e5438..685ed58 100644 --- a/pkg/obioptions/options.go +++ b/pkg/obioptions/options.go @@ -41,23 +41,23 @@ func GenerateOptionParser(optionset ...func(*getoptions.GetOpt)) ArgumentParser } // Predicate indicating if the debug mode is activated. -func IsDebugMode() bool { +func CLIIsDebugMode() bool { return _Debug } -// ParallelWorkers returns the number of parallel workers requested by +// CLIParallelWorkers returns the number of parallel workers requested by // the command line option --workers|-w. -func ParallelWorkers() int { +func CLIParallelWorkers() int { return _ParallelWorkers } -// BufferSize returns the expeted channel buffer size for obitools -func BufferSize() int { +// CLIBufferSize returns the expeted channel buffer size for obitools +func CLIBufferSize() int { return _BufferSize } -// BatchSize returns the expeted size of the sequence batches -func BatchSize() int { +// CLIBatchSize returns the expeted size of the sequence batches +func CLIBatchSize() int { return _BatchSize } diff --git a/pkg/obiseq/biosequence.go b/pkg/obiseq/biosequence.go index dc0a8bd..c3b8b54 100644 --- a/pkg/obiseq/biosequence.go +++ b/pkg/obiseq/biosequence.go @@ -61,11 +61,13 @@ func (sequence *BioSequence) Recycle() { pseq := sequence.sequence - RecycleSlice(pseq.sequence) - RecycleSlice(pseq.feature) - RecycleSlice(pseq.feature) + if pseq != nil { + RecycleSlice(pseq.sequence) + RecycleSlice(pseq.feature) + RecycleSlice(pseq.feature) - RecycleAnnotation(pseq.annotations) + RecycleAnnotation(pseq.annotations) + } sequence.sequence = nil } @@ -132,9 +134,14 @@ func (s BioSequence) HasAnnotation() bool { } func (s BioSequence) Annotations() Annotation { + if s.sequence == nil { + return nil + } + if s.sequence.annotations == nil { s.sequence.annotations = GetAnnotation() } + return s.sequence.annotations } diff --git a/pkg/obiseq/pool.go b/pkg/obiseq/pool.go index 3bef790..c822099 100644 --- a/pkg/obiseq/pool.go +++ b/pkg/obiseq/pool.go @@ -36,10 +36,12 @@ var BioSequenceAnnotationPool = sync.Pool{ } func RecycleAnnotation(a Annotation) { - for k := range a { - delete(a, k) + if a != nil { + for k := range a { + delete(a, k) + } + BioSequenceAnnotationPool.Put(&(a)) } - BioSequenceAnnotationPool.Put(&(a)) } func GetAnnotation(values ...Annotation) Annotation { diff --git a/pkg/obitools/obiconvert/sequence_reader.go b/pkg/obitools/obiconvert/sequence_reader.go index f922c39..b8b47c6 100644 --- a/pkg/obitools/obiconvert/sequence_reader.go +++ b/pkg/obitools/obiconvert/sequence_reader.go @@ -81,14 +81,14 @@ func ReadBioSequencesBatch(filenames ...string) (obiseq.IBioSequenceBatch, error opts = append(opts, obiformats.OptionsFastSeqHeaderParser(obiformats.ParseGuessedFastSeqHeader)) } - nworkers := obioptions.ParallelWorkers() / 4 + nworkers := obioptions.CLIParallelWorkers() / 4 if nworkers < 2 { nworkers = 2 } opts = append(opts, obiformats.OptionsParallelWorkers(nworkers)) - opts = append(opts, obiformats.OptionsBufferSize(obioptions.BufferSize())) - opts = append(opts, obiformats.OptionsBatchSize(obioptions.BatchSize())) + opts = append(opts, obiformats.OptionsBufferSize(obioptions.CLIBufferSize())) + opts = append(opts, obiformats.OptionsBatchSize(obioptions.CLIBatchSize())) opts = append(opts, obiformats.OptionsQualityShift(InputQualityShift())) diff --git a/pkg/obitools/obiconvert/sequence_writer.go b/pkg/obitools/obiconvert/sequence_writer.go index ccc6612..f32c4eb 100644 --- a/pkg/obitools/obiconvert/sequence_writer.go +++ b/pkg/obitools/obiconvert/sequence_writer.go @@ -24,14 +24,14 @@ func WriteBioSequences(iterator obiseq.IBioSequence, filenames ...string) error opts = append(opts, obiformats.OptionsFastSeqHeaderFormat(obiformats.FormatFastSeqJsonHeader)) } - nworkers := obioptions.ParallelWorkers() / 4 + nworkers := obioptions.CLIParallelWorkers() / 4 if nworkers < 2 { nworkers = 2 } opts = append(opts, obiformats.OptionsParallelWorkers(nworkers)) - opts = append(opts, obiformats.OptionsBufferSize(obioptions.BufferSize())) - opts = append(opts, obiformats.OptionsBatchSize(obioptions.BatchSize())) + opts = append(opts, obiformats.OptionsBufferSize(obioptions.CLIBufferSize())) + opts = append(opts, obiformats.OptionsBatchSize(obioptions.CLIBatchSize())) opts = append(opts, obiformats.OptionsQualityShift(OutputQualityShift())) @@ -84,14 +84,14 @@ func WriteBioSequencesBatch(iterator obiseq.IBioSequenceBatch, opts = append(opts, obiformats.OptionsFastSeqHeaderFormat(obiformats.FormatFastSeqJsonHeader)) } - nworkers := obioptions.ParallelWorkers() / 4 + nworkers := obioptions.CLIParallelWorkers() / 4 if nworkers < 2 { nworkers = 2 } opts = append(opts, obiformats.OptionsParallelWorkers(nworkers)) - opts = append(opts, obiformats.OptionsBufferSize(obioptions.BufferSize())) - opts = append(opts, obiformats.OptionsBatchSize(obioptions.BatchSize())) + opts = append(opts, obiformats.OptionsBufferSize(obioptions.CLIBufferSize())) + opts = append(opts, obiformats.OptionsBatchSize(obioptions.CLIBatchSize())) opts = append(opts, obiformats.OptionsQualityShift(OutputQualityShift())) diff --git a/pkg/obitools/obimultiplex/options.go b/pkg/obitools/obimultiplex/options.go index 19db7ca..37ffd80 100644 --- a/pkg/obitools/obimultiplex/options.go +++ b/pkg/obitools/obimultiplex/options.go @@ -1 +1,77 @@ package obimultiplex + +import ( + "fmt" + "os" + + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obingslibrary" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert" + "github.com/DavidGamba/go-getoptions" +) + +var _NGSFilterFile = "" +var _UnidentifiedFile = "" +var _AllowedMismatch = int(2) +var _ConservedError = false + +// PCROptionSet defines every options related to a simulated PCR. +// +// The function adds to a CLI every options proposed to the user +// to tune the parametters of the PCR simulation algorithm. +// +// Parameters +// +// - option : is a pointer to a getoptions.GetOpt instance normaly +// produced by the +func MultiplexOptionSet(options *getoptions.GetOpt) { + options.StringVar(&_NGSFilterFile, "tag-list", _NGSFilterFile, + options.Alias("t"), + options.Required("You must provide a tag list file following the NGSFilter format"), + options.Description("File name of the NGSFilter file describing PCRs.")) + + options.BoolVar(&_ConservedError, "keep-errors", _ConservedError, + options.Description("Prints symbol counts.")) + + options.StringVar(&_UnidentifiedFile, "unidentified", _UnidentifiedFile, + options.Alias("u"), + options.Description("Filename used to store the sequences unassigned to any sample.")) + + options.IntVar(&_AllowedMismatch, "allowed-mismatches", _AllowedMismatch, + options.Alias("e"), + options.Description("Used to specify the number of errors allowed for matching primers.")) + +} + +func OptionSet(options *getoptions.GetOpt) { + obiconvert.OptionSet(options) + MultiplexOptionSet(options) +} + +func CLIAllowedMismatch() int { + return _AllowedMismatch +} + +func CLIUnidentifiedFileName() string { + return _UnidentifiedFile +} + +func CLIConservedErrors() bool { + return _UnidentifiedFile != "" || _ConservedError +} + +func CLINGSFIlter() (obingslibrary.NGSLibrary, error) { + file, err := os.Open(_NGSFilterFile) + + if err != nil { + return nil, fmt.Errorf("open file error: %v", err) + } + + ngsfiler, err := obiformats.ReadNGSFilter(file) + + if err != nil { + return nil, fmt.Errorf("NGSfilter reading file error: %v", err) + } + + return ngsfiler, nil +}