Adds a first version of a new obidistribute command

This commit is contained in:
2022-02-14 00:01:01 +01:00
parent 1544bafde1
commit eb32620bb3
13 changed files with 567 additions and 36 deletions

View File

@@ -0,0 +1,57 @@
package obidistribute
import (
"log"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
)
func DistributeSequence(sequences obiseq.IBioSequenceBatch) {
opts := make([]obiformats.WithOption, 0, 10)
switch obiconvert.OutputFastHeaderFormat() {
case "json":
log.Println("On output use JSON headers")
opts = append(opts, obiformats.OptionsFastSeqHeaderFormat(obiformats.FormatFastSeqJsonHeader))
case "obi":
log.Println("On output use OBI headers")
opts = append(opts, obiformats.OptionsFastSeqHeaderFormat(obiformats.FormatFastSeqOBIHeader))
default:
log.Println("On output use JSON headers")
opts = append(opts, obiformats.OptionsFastSeqHeaderFormat(obiformats.FormatFastSeqJsonHeader))
}
nworkers := obioptions.CLIParallelWorkers() / 4
if nworkers < 2 {
nworkers = 2
}
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers))
opts = append(opts, obiformats.OptionsBufferSize(obioptions.CLIBufferSize()))
opts = append(opts, obiformats.OptionsBatchSize(obioptions.CLIBatchSize()))
opts = append(opts, obiformats.OptionsQualityShift(obiconvert.OutputQualityShift()))
var formater obiformats.SequenceBatchWriterToFile
switch obiconvert.OutputFormat() {
case "fastq":
formater = obiformats.WriteFastqBatchToFile
case "fasta":
formater = obiformats.WriteFastaBatchToFile
default:
formater = obiformats.WriteSequencesBatchToFile
}
dispatcher := sequences.Distribute(CLISequenceClassifier(),
obioptions.CLIBatchSize())
obiformats.WriterDispatcher(CLIFileNamePattern(),
dispatcher, formater, opts...,
)
}

View File

@@ -0,0 +1,58 @@
package obidistribute
import (
"fmt"
"log"
"strings"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
"github.com/DavidGamba/go-getoptions"
)
var _FilenamePattern = ""
var _SequenceClassifierTag = ""
var _BatchCount = 0
func DistributeOptionSet(options *getoptions.GetOpt) {
options.StringVar(&_FilenamePattern, "pattern", _FilenamePattern,
options.Alias("p"),
options.Required("You must provide at pattern for the file names "),
options.Description("The N first sequence records of the file are discarded from the analysis and not reported to the output file."))
options.StringVar(&_SequenceClassifierTag, "classifier", _SequenceClassifierTag,
options.Alias("c"),
options.Description("The N first sequence records of the file are discarded from the analysis and not reported to the output file."))
options.IntVar(&_BatchCount, "batch", 0,
options.Alias("n"),
options.Description("The N first sequence records of the file are discarded from the analysis and not reported to the output file."))
}
func OptionSet(options *getoptions.GetOpt) {
obiconvert.InputOptionSet(options)
obiconvert.OutputOptionSet(options)
DistributeOptionSet(options)
}
func CLISequenceClassifier() obiseq.SequenceClassifier {
switch {
case _SequenceClassifierTag != "":
return obiseq.AnnotationClassifier(_SequenceClassifierTag)
case _BatchCount > 0:
return obiseq.RotateClassifier(_BatchCount)
}
log.Fatal("one of the options --classifier or --batch must be specified")
return nil
}
func CLIFileNamePattern() string {
x := fmt.Sprintf(_FilenamePattern, "_xxx_")
if strings.Contains(x, "(string=_xxx_)") {
log.Panicf("patern %s is not correct : %s", _FilenamePattern, x)
}
return _FilenamePattern
}

View File

@@ -18,10 +18,12 @@ func PairingOptionSet(options *getoptions.GetOpt) {
options.StringSliceVar(&_ForwardFiles, "forward-reads",
1, 1000,
options.Alias("F"),
options.Required("You must provide at least one forward file"),
options.Description("The file names containing the forward reads"))
options.StringSliceVar(&_ReverseFiles, "reverse-reads",
1, 1000,
options.Alias("R"),
options.Required("You must provide at least one reverse file"),
options.Description("The file names containing the reverse reads"))
options.IntVar(&_Delta, "delta", _Delta,
options.Alias("D"),