Plenty of small bugs

Former-commit-id: 42c7fab7d65906c80ab4cd32da6867ff21842ea8
This commit is contained in:
Eric Coissac
2024-06-04 16:49:12 +02:00
parent e843d2aa5c
commit 65f5109957
15 changed files with 894 additions and 264 deletions

View File

@@ -77,7 +77,7 @@ func CLIHasNGSFilterFile() bool {
return _NGSFilterFile != ""
}
func CLINGSFIlter() (obingslibrary.NGSLibrary, error) {
func CLINGSFIlter() (*obingslibrary.NGSLibrary, error) {
file, err := os.Open(_NGSFilterFile)
if err != nil {

View File

@@ -0,0 +1,60 @@
package obimultiplex2
import (
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obingslibrary"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
)
func IExtractBarcode(iterator obiiter.IBioSequence) (obiiter.IBioSequence, error) {
opts := make([]obingslibrary.WithOption, 0, 10)
opts = append(opts,
obingslibrary.OptionAllowedMismatches(CLIAllowedMismatch()),
obingslibrary.OptionAllowedIndel(CLIAllowsIndel()),
obingslibrary.OptionUnidentified(CLIUnidentifiedFileName()),
obingslibrary.OptionDiscardErrors(!CLIConservedErrors()),
obingslibrary.OptionParallelWorkers(obioptions.CLIParallelWorkers()),
obingslibrary.OptionBatchSize(obioptions.CLIBatchSize()),
)
ngsfilter, err := CLINGSFIlter()
if err != nil {
log.Fatalf("%v", err)
}
worker := ngsfilter.ExtractMultiBarcodeSliceWorker(opts...)
newIter := iterator.MakeISliceWorker(worker, false)
if !CLIConservedErrors() {
log.Infoln("Discards unassigned sequences")
newIter = newIter.FilterOn(obiseq.HasAttribute("demultiplex_error").Not(), obioptions.CLIBatchSize())
}
var unidentified obiiter.IBioSequence
if CLIUnidentifiedFileName() != "" {
log.Printf("Unassigned sequences saved in file: %s\n", CLIUnidentifiedFileName())
unidentified, newIter = newIter.DivideOn(obiseq.HasAttribute("demultiplex_error"),
obioptions.CLIBatchSize())
go func() {
_, err := obiconvert.CLIWriteBioSequences(unidentified,
true,
CLIUnidentifiedFileName())
if err != nil {
log.Fatalf("%v", err)
}
}()
}
log.Printf("Sequence demultiplexing using %d workers\n", obioptions.CLIParallelWorkers())
return newIter, nil
}

View File

@@ -0,0 +1,108 @@
package obimultiplex2
import (
"fmt"
"os"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiformats"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obingslibrary"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert"
"github.com/DavidGamba/go-getoptions"
log "github.com/sirupsen/logrus"
)
var _NGSFilterFile = ""
var _askTemplate = false
var _UnidentifiedFile = ""
var _AllowedMismatch = int(2)
var _AllowsIndel = false
var _ConservedError = false
// PCROptionSet defines every options related to a simulated PCR.
//
// The function adds to a CLI every options proposed to the user
// to tune the parametters of the PCR simulation algorithm.
//
// # Parameters
//
// - option : is a pointer to a getoptions.GetOpt instance normaly
// produced by the
func MultiplexOptionSet(options *getoptions.GetOpt) {
options.StringVar(&_NGSFilterFile, "tag-list", _NGSFilterFile,
options.Alias("t"),
options.Description("File name of the NGSFilter file describing PCRs."))
options.BoolVar(&_ConservedError, "keep-errors", _ConservedError,
options.Description("Prints symbol counts."))
options.BoolVar(&_AllowsIndel, "with-indels", _AllowsIndel,
options.Description("Allows for indels during the primers matching."))
options.StringVar(&_UnidentifiedFile, "unidentified", _UnidentifiedFile,
options.Alias("u"),
options.Description("Filename used to store the sequences unassigned to any sample."))
options.IntVar(&_AllowedMismatch, "allowed-mismatches", _AllowedMismatch,
options.Alias("e"),
options.Description("Used to specify the number of errors allowed for matching primers."))
options.BoolVar(&_askTemplate, "template", _askTemplate,
options.Description("Print on the standard output an example of CSV configuration file."),
)
}
func OptionSet(options *getoptions.GetOpt) {
obiconvert.OptionSet(options)
MultiplexOptionSet(options)
}
func CLIAllowedMismatch() int {
return _AllowedMismatch
}
func CLIAllowsIndel() bool {
return _AllowsIndel
}
func CLIUnidentifiedFileName() string {
return _UnidentifiedFile
}
func CLIConservedErrors() bool {
return _UnidentifiedFile != "" || _ConservedError
}
func CLIHasNGSFilterFile() bool {
return _NGSFilterFile != ""
}
func CLINGSFIlter() (*obingslibrary.NGSLibrary, error) {
file, err := os.Open(_NGSFilterFile)
if err != nil {
return nil, fmt.Errorf("open file error: %v", err)
}
log.Infof("Reading NGSFilter file: %s", _NGSFilterFile)
ngsfiler, err := obiformats.ReadNGSFilter(file)
if err != nil {
return nil, fmt.Errorf("NGSfilter reading file error: %v", err)
}
return ngsfiler, nil
}
func CLIAskConfigTemplate() bool {
return _askTemplate
}
func CLIConfigTemplate() string {
return `experiment,sample,sample_tag,forward_primer,reverse_primer
wolf_diet,13a_F730603,aattaac,TTAGATACCCCACTATGC,TAGAACAGGCTCCTCTAG
wolf_diet,15a_F730814,gaagtag:gaatatc,TTAGATACCCCACTATGC,TAGAACAGGCTCCTCTAG
wolf_diet,26a_F040644,gaatatc:-,TTAGATACCCCACTATGC,TAGAACAGGCTCCTCTAG
wolf_diet,29a_F260619,-:-,TTAGATACCCCACTATGC,TAGAACAGGCTCCTCTAG
`
}

View File

@@ -33,12 +33,6 @@ func IPCRTagPESequencesBatch(iterator obiiter.IBioSequence,
newIter := obiiter.MakeIBioSequence()
newIter.MarkAsPaired()
newIter.Add(nworkers)
go func() {
newIter.WaitAndClose()
log.Printf("End of the sequence PCR Taging")
}()
f := func(iterator obiiter.IBioSequence, wid int) {
arena := obialign.MakePEAlignArena(150, 150)
@@ -128,16 +122,22 @@ func IPCRTagPESequencesBatch(iterator obiiter.IBioSequence,
log.Printf("Start of the sequence Pairing using %d workers\n", nworkers)
newIter.Add(nworkers)
for i := 1; i < nworkers; i++ {
go f(iterator.Split(), i)
}
go f(iterator, 0)
go func() {
newIter.WaitAndClose()
log.Printf("End of the sequence PCR Taging")
}()
iout := newIter
if !obimultiplex.CLIConservedErrors() {
log.Println("Discards unassigned sequences")
iout = iout.Rebatch(obioptions.CLIBatchSize())
iout = iout.FilterOn(obiseq.HasAttribute("demultiplex_error").Not(), obioptions.CLIBatchSize())
}
var unidentified obiiter.IBioSequence