mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 08:10:45 +00:00
obimultiplex2 in now obimultiplex
Former-commit-id: ee6f5e15a3f1729dfc2806d039c842c9c3bdd343
This commit is contained in:
@ -1,56 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obimultiplex2"
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
||||
// f, err := os.Create("cpu.pprof")
|
||||
// if err != nil {
|
||||
// log.Fatal(err)
|
||||
// }
|
||||
// pprof.StartCPUProfile(f)
|
||||
// defer pprof.StopCPUProfile()
|
||||
|
||||
// ftrace, err := os.Create("cpu.trace")
|
||||
// if err != nil {
|
||||
// log.Fatal(err)
|
||||
// }
|
||||
// trace.Start(ftrace)
|
||||
// defer trace.Stop()
|
||||
|
||||
optionParser := obioptions.GenerateOptionParser(obimultiplex2.OptionSet)
|
||||
|
||||
_, args := optionParser(os.Args)
|
||||
|
||||
if obimultiplex2.CLIAskConfigTemplate() {
|
||||
fmt.Print(obimultiplex2.CLIConfigTemplate())
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
if !obimultiplex2.CLIHasNGSFilterFile() {
|
||||
log.Error("You must provide a tag list file following the NGSFilter format")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
sequences, err := obiconvert.CLIReadBioSequences(args...)
|
||||
|
||||
if err != nil {
|
||||
log.Errorf("Cannot open file (%v)", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
amplicons, _ := obimultiplex2.IExtractBarcode(sequences)
|
||||
obiconvert.CLIWriteBioSequences(amplicons, true)
|
||||
amplicons.Wait()
|
||||
obiiter.WaitForLastPipe()
|
||||
|
||||
}
|
109
pkg/obitools/obimultiplex2/options.go → doc/book/wolf_data/wolf_diet_ngsfilter.csv
Normal file → Executable file
109
pkg/obitools/obimultiplex2/options.go → doc/book/wolf_data/wolf_diet_ngsfilter.csv
Normal file → Executable file
@ -1,105 +1,4 @@
|
||||
package obimultiplex2
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiformats"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obingslibrary"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||
"github.com/DavidGamba/go-getoptions"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
var _NGSFilterFile = ""
|
||||
var _askTemplate = false
|
||||
var _UnidentifiedFile = ""
|
||||
var _AllowedMismatch = -1
|
||||
var _AllowsIndel = false
|
||||
var _ConservedError = false
|
||||
|
||||
// PCROptionSet defines every options related to a simulated PCR.
|
||||
//
|
||||
// The function adds to a CLI every options proposed to the user
|
||||
// to tune the parametters of the PCR simulation algorithm.
|
||||
//
|
||||
// # Parameters
|
||||
//
|
||||
// - option : is a pointer to a getoptions.GetOpt instance normaly
|
||||
// produced by the
|
||||
func MultiplexOptionSet(options *getoptions.GetOpt) {
|
||||
options.StringVar(&_NGSFilterFile, "tag-list", _NGSFilterFile,
|
||||
options.Alias("t"),
|
||||
options.Description("File name of the NGSFilter file describing PCRs."))
|
||||
|
||||
options.BoolVar(&_ConservedError, "keep-errors", _ConservedError,
|
||||
options.Description("Prints symbol counts."))
|
||||
|
||||
options.BoolVar(&_AllowsIndel, "with-indels", _AllowsIndel,
|
||||
options.Description("Allows for indels during the primers matching."))
|
||||
|
||||
options.StringVar(&_UnidentifiedFile, "unidentified", _UnidentifiedFile,
|
||||
options.Alias("u"),
|
||||
options.Description("Filename used to store the sequences unassigned to any sample."))
|
||||
|
||||
options.IntVar(&_AllowedMismatch, "allowed-mismatches", _AllowedMismatch,
|
||||
options.Alias("e"),
|
||||
options.Description("Used to specify the number of errors allowed for matching primers."))
|
||||
|
||||
options.BoolVar(&_askTemplate, "template", _askTemplate,
|
||||
options.Description("Print on the standard output an example of CSV configuration file."),
|
||||
)
|
||||
|
||||
}
|
||||
|
||||
func OptionSet(options *getoptions.GetOpt) {
|
||||
obiconvert.OptionSet(options)
|
||||
MultiplexOptionSet(options)
|
||||
}
|
||||
|
||||
func CLIAllowedMismatch() int {
|
||||
return _AllowedMismatch
|
||||
}
|
||||
|
||||
func CLIAllowsIndel() bool {
|
||||
return _AllowsIndel
|
||||
}
|
||||
func CLIUnidentifiedFileName() string {
|
||||
return _UnidentifiedFile
|
||||
}
|
||||
|
||||
func CLIConservedErrors() bool {
|
||||
return _UnidentifiedFile != "" || _ConservedError
|
||||
}
|
||||
|
||||
func CLIHasNGSFilterFile() bool {
|
||||
return _NGSFilterFile != ""
|
||||
}
|
||||
|
||||
func CLINGSFIlter() (*obingslibrary.NGSLibrary, error) {
|
||||
file, err := os.Open(_NGSFilterFile)
|
||||
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("open file error: %v", err)
|
||||
}
|
||||
|
||||
log.Infof("Reading NGSFilter file: %s", _NGSFilterFile)
|
||||
ngsfiler, err := obiformats.ReadNGSFilter(file)
|
||||
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("NGSfilter reading file error: %v", err)
|
||||
}
|
||||
|
||||
return ngsfiler, nil
|
||||
}
|
||||
|
||||
func CLIAskConfigTemplate() bool {
|
||||
return _askTemplate
|
||||
}
|
||||
|
||||
func CLIConfigTemplate() string {
|
||||
return `###
|
||||
###
|
||||
### Example of NGSFilter CSV configuration file
|
||||
###
|
||||
#
|
||||
@ -192,11 +91,11 @@ func CLIConfigTemplate() string {
|
||||
# The sample tag must be unique in the library for a given pair of primers
|
||||
# + They can be a simple DNA word as here. This means that the same tag is used
|
||||
# for both primers.
|
||||
# + It can be two DNA words separated by a colon. For example, aagtag:gaagtag.
|
||||
# + It can be two DNA words separated by a colon. For example, `aagtag:gaagtag`.
|
||||
# This means that the first tag is used for the forward primer and the second for the
|
||||
# reverse primers. "aagtag" is the same as "aagtag:aagtag".
|
||||
# + In the two word syntax, if a primer forward or reverse is not tagged, its tag
|
||||
# is replaced by a hyphen '-', for example 'aagtag:-' or '-:aagtag'.
|
||||
# is replaced by a hyphen `-`, for example `aagtag:-` or `-:aagtag`.
|
||||
# For a given primer all the tags must have the same length.
|
||||
# - forward_primer: the forward primer sequence
|
||||
# - reverse_primer: the reverse primer sequence
|
||||
@ -206,5 +105,3 @@ wolf_diet,13a_F730603,aattaac,TTAGATACCCCACTATGC,TAGAACAGGCTCCTCTAG
|
||||
wolf_diet,15a_F730814,gaagtag,TTAGATACCCCACTATGC,TAGAACAGGCTCCTCTAG
|
||||
wolf_diet,26a_F040644,gaatatc,TTAGATACCCCACTATGC,TAGAACAGGCTCCTCTAG
|
||||
wolf_diet,29a_F260619,gcctcct,TTAGATACCCCACTATGC,TAGAACAGGCTCCTCTAG
|
||||
`
|
||||
}
|
Can't render this file because it has a wrong number of fields in line 4.
|
@ -7,7 +7,7 @@ import (
|
||||
// TODO: The version number is extracted from git. This induces that the version
|
||||
// corresponds to the last commit, and not the one when the file will be
|
||||
// commited
|
||||
var _Commit = "a396240"
|
||||
var _Commit = "748a235"
|
||||
var _Version = "Release 4.2.0"
|
||||
|
||||
// Version returns the version of the obitools package.
|
||||
|
@ -28,19 +28,20 @@ func IExtractBarcode(iterator obiiter.IBioSequence) (obiiter.IBioSequence, error
|
||||
log.Fatalf("%v", err)
|
||||
}
|
||||
|
||||
worker := obingslibrary.ExtractBarcodeSliceWorker(ngsfilter, opts...)
|
||||
worker := ngsfilter.ExtractMultiBarcodeSliceWorker(opts...)
|
||||
|
||||
newIter := iterator.MakeISliceWorker(worker, false)
|
||||
out := newIter
|
||||
|
||||
if !CLIConservedErrors() {
|
||||
log.Println("Discards unassigned sequences")
|
||||
newIter = newIter.Rebatch(obioptions.CLIBatchSize())
|
||||
log.Infoln("Discards unassigned sequences")
|
||||
out = out.FilterOn(obiseq.HasAttribute("demultiplex_error").Not(), obioptions.CLIBatchSize())
|
||||
}
|
||||
|
||||
var unidentified obiiter.IBioSequence
|
||||
if CLIUnidentifiedFileName() != "" {
|
||||
log.Printf("Unassigned sequences saved in file: %s\n", CLIUnidentifiedFileName())
|
||||
unidentified, newIter = newIter.DivideOn(obiseq.HasAttribute("demultiplex_error"),
|
||||
unidentified, out = newIter.DivideOn(obiseq.HasAttribute("demultiplex_error"),
|
||||
obioptions.CLIBatchSize())
|
||||
|
||||
go func() {
|
||||
@ -56,5 +57,5 @@ func IExtractBarcode(iterator obiiter.IBioSequence) (obiiter.IBioSequence, error
|
||||
}
|
||||
log.Printf("Sequence demultiplexing using %d workers\n", obioptions.CLIParallelWorkers())
|
||||
|
||||
return newIter, nil
|
||||
return out, nil
|
||||
}
|
||||
|
@ -15,7 +15,7 @@ import (
|
||||
var _NGSFilterFile = ""
|
||||
var _askTemplate = false
|
||||
var _UnidentifiedFile = ""
|
||||
var _AllowedMismatch = int(2)
|
||||
var _AllowedMismatch = -1
|
||||
var _AllowsIndel = false
|
||||
var _ConservedError = false
|
||||
|
||||
@ -99,10 +99,112 @@ func CLIAskConfigTemplate() bool {
|
||||
}
|
||||
|
||||
func CLIConfigTemplate() string {
|
||||
return `experiment,sample,sample_tag,forward_primer,reverse_primer
|
||||
return `###
|
||||
### Example of NGSFilter CSV configuration file
|
||||
###
|
||||
#
|
||||
# The CSV file can contain comments starting with the # character
|
||||
# and empty lines.
|
||||
# At the top of the file a set of lines of three or four columns and having
|
||||
# the first column containing @param can be used to define parameters
|
||||
# for the obimultiplex tool. The structure of these lines is :
|
||||
#
|
||||
# @param,parameter_name,parameter_value
|
||||
# @param,parameter_name,parameter_value1,parameter_value2
|
||||
#
|
||||
# The following lines describes the PCR multiplexed in the sequencing library.
|
||||
# The first line describes the columns of the CSV file and the following lines
|
||||
# describe the PCR multiplexed.
|
||||
#
|
||||
# Five columns are expected :
|
||||
#
|
||||
# - experiment: the experiment name
|
||||
# - sample: the sample (pcr) name
|
||||
# - sample_tag: the tag identifying the sample
|
||||
# - forward_primer: the forward primer sequence
|
||||
# - reverse_primer: the reverse primer sequence
|
||||
#
|
||||
# Supplementary columns are allowed. Their names and content will be used to
|
||||
# annotate the sequence corresponding to the sample, as the key=value; located
|
||||
# after the @ sign did in the original ngsfilter file format.
|
||||
#
|
||||
###
|
||||
### Description of the parameters
|
||||
###
|
||||
#
|
||||
# The forward_spacer and the reverse_spacer allow to specify the number of
|
||||
# nucleotide separating the 5' end of the forward or reverse primer respectively
|
||||
# to the 3' end of the tag. The default value is 0.
|
||||
#
|
||||
# The param spacer allows for specify this value for both forward and reverse
|
||||
# simultaneously. The spacer parameter can also, when used wirh two arguments,
|
||||
# allow to specify the # the spacer value for a specific primer:
|
||||
#
|
||||
# @param,spacer,CAGCTGCTATGTCGATGCTGACT,2
|
||||
#
|
||||
@param,forward_spacer,0
|
||||
@param,reverse_spacer,0
|
||||
#
|
||||
# A new method for designing indel proof tag is to not use one of the four
|
||||
# nucleotides in their sequence and to flank the tag with this fourth nucleotide.
|
||||
# That nucleotide is the tag delimiter. Similarly, to the spacer value,
|
||||
# three ways to specify the tag delimiter exist:
|
||||
# - the forward_tag_delimiter and reverse_tag_delimiter
|
||||
# - the tag_delimiter in its two forms with one and two arguments
|
||||
#
|
||||
@param,forward_tag_delimiter,0
|
||||
@param,reverse_tag_delimiter,0
|
||||
#
|
||||
# Three algorithms are available to math a pair of tags with a sample.
|
||||
# It is specified using the @matching parameter. The three possible
|
||||
# values are strict, hamming, and indel. The default value is strict.
|
||||
# As for previous parameters, forward_matching and reverse_matching can
|
||||
# be used to specify the matching value for each primer. And spacer
|
||||
# can be used with two arguments to specify the matching value for
|
||||
# a specific primer.
|
||||
#
|
||||
@param,matching,strict
|
||||
#
|
||||
# The primer_mismatches parameter allows to specify the number of errors allowed
|
||||
# when matching the primer. The default value is 2. The same declination of
|
||||
# the parameters forward_primer_mismatches and reverse_primer_mismatches exist.
|
||||
#
|
||||
@param,primer_mismatches,2
|
||||
#
|
||||
# The @indel parameter allows to specify if indel are allowed during the matching
|
||||
# of the primers to the sequence. The default value is false. forward_indel and
|
||||
# reverse_indel can be used to specify the value for each primer.
|
||||
#
|
||||
@param,indels,false
|
||||
#
|
||||
###
|
||||
### Description of the PCR multiplexed
|
||||
###
|
||||
#
|
||||
# Below is an example for the minimal description of the PCRs multiplexed in the
|
||||
# sequencing library.
|
||||
#
|
||||
# The first line is the column names and must exist.
|
||||
# Five columns are expected :
|
||||
# - experiment: the experiment name, that allows for grouping samples
|
||||
# - sample: the sample (pcr) name
|
||||
# - sample_tag: the tag identifying the sample
|
||||
# The sample tag must be unique in the library for a given pair of primers
|
||||
# + They can be a simple DNA word as here. This means that the same tag is used
|
||||
# for both primers.
|
||||
# + It can be two DNA words separated by a colon. For example, aagtag:gaagtag.
|
||||
# This means that the first tag is used for the forward primer and the second for the
|
||||
# reverse primers. "aagtag" is the same as "aagtag:aagtag".
|
||||
# + In the two word syntax, if a primer forward or reverse is not tagged, its tag
|
||||
# is replaced by a hyphen '-', for example 'aagtag:-' or '-:aagtag'.
|
||||
# For a given primer all the tags must have the same length.
|
||||
# - forward_primer: the forward primer sequence
|
||||
# - reverse_primer: the reverse primer sequence
|
||||
#
|
||||
experiment,sample,sample_tag,forward_primer,reverse_primer
|
||||
wolf_diet,13a_F730603,aattaac,TTAGATACCCCACTATGC,TAGAACAGGCTCCTCTAG
|
||||
wolf_diet,15a_F730814,gaagtag:gaatatc,TTAGATACCCCACTATGC,TAGAACAGGCTCCTCTAG
|
||||
wolf_diet,26a_F040644,gaatatc:-,TTAGATACCCCACTATGC,TAGAACAGGCTCCTCTAG
|
||||
wolf_diet,29a_F260619,-:-,TTAGATACCCCACTATGC,TAGAACAGGCTCCTCTAG
|
||||
wolf_diet,15a_F730814,gaagtag,TTAGATACCCCACTATGC,TAGAACAGGCTCCTCTAG
|
||||
wolf_diet,26a_F040644,gaatatc,TTAGATACCCCACTATGC,TAGAACAGGCTCCTCTAG
|
||||
wolf_diet,29a_F260619,gcctcct,TTAGATACCCCACTATGC,TAGAACAGGCTCCTCTAG
|
||||
`
|
||||
}
|
||||
|
@ -1,61 +0,0 @@
|
||||
package obimultiplex2
|
||||
|
||||
import (
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obingslibrary"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
|
||||
)
|
||||
|
||||
func IExtractBarcode(iterator obiiter.IBioSequence) (obiiter.IBioSequence, error) {
|
||||
|
||||
opts := make([]obingslibrary.WithOption, 0, 10)
|
||||
|
||||
opts = append(opts,
|
||||
obingslibrary.OptionAllowedMismatches(CLIAllowedMismatch()),
|
||||
obingslibrary.OptionAllowedIndel(CLIAllowsIndel()),
|
||||
obingslibrary.OptionUnidentified(CLIUnidentifiedFileName()),
|
||||
obingslibrary.OptionDiscardErrors(!CLIConservedErrors()),
|
||||
obingslibrary.OptionParallelWorkers(obioptions.CLIParallelWorkers()),
|
||||
obingslibrary.OptionBatchSize(obioptions.CLIBatchSize()),
|
||||
)
|
||||
|
||||
ngsfilter, err := CLINGSFIlter()
|
||||
if err != nil {
|
||||
log.Fatalf("%v", err)
|
||||
}
|
||||
|
||||
worker := ngsfilter.ExtractMultiBarcodeSliceWorker(opts...)
|
||||
|
||||
newIter := iterator.MakeISliceWorker(worker, false)
|
||||
out := newIter
|
||||
|
||||
if !CLIConservedErrors() {
|
||||
log.Infoln("Discards unassigned sequences")
|
||||
out = out.FilterOn(obiseq.HasAttribute("demultiplex_error").Not(), obioptions.CLIBatchSize())
|
||||
}
|
||||
|
||||
var unidentified obiiter.IBioSequence
|
||||
if CLIUnidentifiedFileName() != "" {
|
||||
log.Printf("Unassigned sequences saved in file: %s\n", CLIUnidentifiedFileName())
|
||||
unidentified, out = newIter.DivideOn(obiseq.HasAttribute("demultiplex_error"),
|
||||
obioptions.CLIBatchSize())
|
||||
|
||||
go func() {
|
||||
_, err := obiconvert.CLIWriteBioSequences(unidentified,
|
||||
true,
|
||||
CLIUnidentifiedFileName())
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("%v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
}
|
||||
log.Printf("Sequence demultiplexing using %d workers\n", obioptions.CLIParallelWorkers())
|
||||
|
||||
return out, nil
|
||||
}
|
Reference in New Issue
Block a user