mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-12-08 16:50:27 +00:00
Refactoring codes for removing buffer size options. An some other changes...
Former-commit-id: 10b57cc1a27446ade3c444217341e9651e89cdce
This commit is contained in:
63
pkg/obitools/obicleandb/obicleandb.go
Normal file
63
pkg/obitools/obicleandb/obicleandb.go
Normal file
@@ -0,0 +1,63 @@
|
||||
package obicleandb
|
||||
|
||||
import (
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obichunk"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obigrep"
|
||||
)
|
||||
|
||||
func ICleanDB(itertator obiiter.IBioSequence) obiiter.IBioSequence {
|
||||
var rankPredicate obiseq.SequencePredicate
|
||||
|
||||
options := make([]obichunk.WithOption, 0, 30)
|
||||
|
||||
// Make sequence dereplication with a constraint on the taxid.
|
||||
// To be merged, both sequences must have the same taxid.
|
||||
|
||||
options = append(options,
|
||||
obichunk.OptionBatchCount(100),
|
||||
obichunk.OptionSortOnMemory(),
|
||||
obichunk.OptionSubCategory("taxid"),
|
||||
obichunk.OptionsParallelWorkers(
|
||||
obioptions.CLIParallelWorkers()),
|
||||
obichunk.OptionsBatchSize(
|
||||
obioptions.CLIBatchSize()),
|
||||
obichunk.OptionNAValue("NA"),
|
||||
)
|
||||
|
||||
unique, err := obichunk.IUniqueSequence(itertator, options...)
|
||||
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
taxonomy := obigrep.CLILoadSelectedTaxonomy()
|
||||
|
||||
if len(obigrep.CLIRequiredRanks()) > 0 {
|
||||
rankPredicate = obigrep.CLIHasRankDefinedPredicate()
|
||||
} else {
|
||||
rankPredicate = taxonomy.HasRequiredRank("species").And(taxonomy.HasRequiredRank("genus")).And(taxonomy.HasRequiredRank("family"))
|
||||
}
|
||||
|
||||
goodTaxa := taxonomy.IsAValidTaxon(CLIUpdateTaxids()).And(rankPredicate)
|
||||
|
||||
usable := unique.FilterOn(goodTaxa,
|
||||
obioptions.CLIBatchSize(),
|
||||
obioptions.CLIParallelWorkers())
|
||||
|
||||
annotated := usable.MakeIWorker(taxonomy.MakeSetSpeciesWorker(),
|
||||
obioptions.CLIParallelWorkers(),
|
||||
).MakeIWorker(taxonomy.MakeSetGenusWorker(),
|
||||
obioptions.CLIParallelWorkers(),
|
||||
).MakeIWorker(taxonomy.MakeSetFamilyWorker(),
|
||||
obioptions.CLIParallelWorkers(),
|
||||
)
|
||||
|
||||
// annotated.MakeIConditionalWorker(obiseq.IsMoreAbundantOrEqualTo(3),1000)
|
||||
|
||||
return annotated
|
||||
}
|
||||
@@ -60,6 +60,21 @@ func InputOptionSet(options *getoptions.GetOpt) {
|
||||
|
||||
}
|
||||
|
||||
func OutputModeOptionSet(options *getoptions.GetOpt) {
|
||||
options.BoolVar(&__no_progress_bar__, "no-progressbar", false,
|
||||
options.Description("Disable the progress bar printing"))
|
||||
|
||||
options.BoolVar(&__compressed__, "compress", false,
|
||||
options.Alias("Z"),
|
||||
options.Description("Output is compressed"))
|
||||
|
||||
options.StringVar(&__output_file_name__, "out", __output_file_name__,
|
||||
options.Alias("o"),
|
||||
options.ArgName("FILENAME"),
|
||||
options.Description("Filename used for saving the output"),
|
||||
)
|
||||
}
|
||||
|
||||
func OutputOptionSet(options *getoptions.GetOpt) {
|
||||
options.BoolVar(&__output_in_fasta__, "fasta-output", false,
|
||||
options.Description("Read data following the ecoPCR output format."))
|
||||
@@ -73,19 +88,7 @@ func OutputOptionSet(options *getoptions.GetOpt) {
|
||||
options.Alias("O"),
|
||||
options.Description("output FASTA/FASTQ title line annotations follow OBI format."))
|
||||
|
||||
options.BoolVar(&__no_progress_bar__, "no-progressbar", false,
|
||||
options.Description("Disable the progress bar printing"))
|
||||
|
||||
options.BoolVar(&__compressed__, "compress", false,
|
||||
options.Alias("Z"),
|
||||
options.Description("Output is compressed"))
|
||||
|
||||
options.StringVar(&__output_file_name__, "out", __output_file_name__,
|
||||
options.Alias("o"),
|
||||
options.ArgName("FILENAME"),
|
||||
options.Description("Filename used for saving the output"),
|
||||
)
|
||||
|
||||
OutputModeOptionSet(options)
|
||||
}
|
||||
|
||||
func PairedFilesOptionSet(options *getoptions.GetOpt) {
|
||||
@@ -197,4 +200,4 @@ func CLIHasPairedFile() bool {
|
||||
}
|
||||
func CLIPairedFileName() string {
|
||||
return __paired_file_name__
|
||||
}
|
||||
}
|
||||
|
||||
@@ -48,6 +48,10 @@ func _ExpandListOfFiles(check_ext bool, filenames ...string) ([]string, error) {
|
||||
strings.HasSuffix(path, "fasta.gz") ||
|
||||
strings.HasSuffix(path, "fastq") ||
|
||||
strings.HasSuffix(path, "fastq.gz") ||
|
||||
strings.HasSuffix(path, "seq") ||
|
||||
strings.HasSuffix(path, "seq.gz") ||
|
||||
strings.HasSuffix(path, "gb") ||
|
||||
strings.HasSuffix(path, "gb.gz") ||
|
||||
strings.HasSuffix(path, "dat") ||
|
||||
strings.HasSuffix(path, "dat.gz") ||
|
||||
strings.HasSuffix(path, "ecopcr") ||
|
||||
@@ -82,13 +86,12 @@ func CLIReadBioSequences(filenames ...string) (obiiter.IBioSequence, error) {
|
||||
opts = append(opts, obiformats.OptionsFastSeqHeaderParser(obiformats.ParseGuessedFastSeqHeader))
|
||||
}
|
||||
|
||||
nworkers := obioptions.CLIParallelWorkers() // / 4
|
||||
nworkers := obioptions.CLIParallelWorkers()
|
||||
if nworkers < 2 {
|
||||
nworkers = 2
|
||||
}
|
||||
|
||||
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers))
|
||||
opts = append(opts, obiformats.OptionsBufferSize(obioptions.CLIBufferSize()))
|
||||
opts = append(opts, obiformats.OptionsBatchSize(obioptions.CLIBatchSize()))
|
||||
|
||||
opts = append(opts, obiformats.OptionsQualityShift(CLIInputQualityShift()))
|
||||
|
||||
@@ -60,7 +60,6 @@ func CLIWriteBioSequences(iterator obiiter.IBioSequence,
|
||||
}
|
||||
|
||||
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers))
|
||||
opts = append(opts, obiformats.OptionsBufferSize(obioptions.CLIBufferSize()))
|
||||
opts = append(opts, obiformats.OptionsBatchSize(obioptions.CLIBatchSize()))
|
||||
|
||||
opts = append(opts, obiformats.OptionsQualityShift(CLIOutputQualityShift()))
|
||||
|
||||
61
pkg/obitools/obicsv/obicsv.go
Normal file
61
pkg/obitools/obicsv/obicsv.go
Normal file
@@ -0,0 +1,61 @@
|
||||
package obicsv
|
||||
|
||||
import (
|
||||
"log"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
||||
)
|
||||
|
||||
func CLIWriteCSV(iterator obiiter.IBioSequence,
|
||||
terminalAction bool, filenames ...string) (obiiter.IBioSequence, error) {
|
||||
|
||||
if obiconvert.CLIProgressBar() {
|
||||
iterator = iterator.Speed()
|
||||
}
|
||||
|
||||
var newIter obiiter.IBioSequence
|
||||
|
||||
opts := make([]obiformats.WithOption, 0, 10)
|
||||
|
||||
nworkers := obioptions.CLIParallelWorkers() / 4
|
||||
if nworkers < 2 {
|
||||
nworkers = 2
|
||||
}
|
||||
|
||||
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers))
|
||||
opts = append(opts, obiformats.OptionsBatchSize(obioptions.CLIBatchSize()))
|
||||
|
||||
opts = append(opts, obiformats.OptionsQualityShift(obiconvert.CLIOutputQualityShift()))
|
||||
opts = append(opts, obiformats.OptionsCompressed(obiconvert.CLICompressed()))
|
||||
|
||||
opts = append(opts, obiformats.CSVId(CLIPrintId()),
|
||||
obiformats.CSVCount(CLIPrintCount()),
|
||||
obiformats.CSVTaxon(CLIPrintTaxon()),
|
||||
obiformats.CSVDefinition(CLIPrintDefinition()),
|
||||
obiformats.CSVKeys(CLIToBeKeptAttributes()),
|
||||
)
|
||||
|
||||
var err error
|
||||
|
||||
if len(filenames) == 0 {
|
||||
newIter, err = obiformats.WriteCSVToStdout(iterator, opts...)
|
||||
} else {
|
||||
newIter, err = obiformats.WriteCSVToFile(iterator, filenames[0], opts...)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Write file error: %v", err)
|
||||
return obiiter.NilIBioSequence, err
|
||||
}
|
||||
|
||||
if terminalAction {
|
||||
newIter.Recycle()
|
||||
return obiiter.NilIBioSequence, nil
|
||||
}
|
||||
|
||||
return newIter, nil
|
||||
|
||||
}
|
||||
126
pkg/obitools/obicsv/options.go
Normal file
126
pkg/obitools/obicsv/options.go
Normal file
@@ -0,0 +1,126 @@
|
||||
package obicsv
|
||||
|
||||
import (
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
||||
"github.com/DavidGamba/go-getoptions"
|
||||
)
|
||||
|
||||
var _outputIds = true
|
||||
var _outputCount = false
|
||||
var _outputTaxon = false
|
||||
var _outputSequence = true
|
||||
var _outputQuality = true
|
||||
var _outputDefinition = false
|
||||
var _obipairing = false
|
||||
var _autoColumns = false
|
||||
var _keepOnly = make([]string, 0)
|
||||
var _naValue = "NA"
|
||||
|
||||
var _softAttributes = map[string][]string{
|
||||
"obipairing": {"mode", "seq_a_single", "seq_b_single",
|
||||
"ali_dir", "score", "score_norm",
|
||||
"seq_ab_match", "pairing_mismatches",
|
||||
},
|
||||
}
|
||||
|
||||
func CSVOptionSet(options *getoptions.GetOpt) {
|
||||
options.BoolVar(&_outputIds, "ids", _outputIds,
|
||||
options.Alias("i"),
|
||||
options.Description("Prints sequence ids in the ouput."))
|
||||
|
||||
options.BoolVar(&_outputSequence, "sequence", _outputSequence,
|
||||
options.Alias("s"),
|
||||
options.Description("Prints sequence itself in the output."))
|
||||
|
||||
options.BoolVar(&_outputQuality, "quality", _outputQuality,
|
||||
options.Alias("q"),
|
||||
options.Description("Prints sequence quality in the output."))
|
||||
|
||||
options.BoolVar(&_outputDefinition, "definition", _outputDefinition,
|
||||
options.Alias("d"),
|
||||
options.Description("Prints sequence definition in the output."))
|
||||
|
||||
options.BoolVar(&_autoColumns, "auto", _autoColumns,
|
||||
options.Description("Based on the first sequences, propose a list of attibutes to print"))
|
||||
|
||||
options.BoolVar(&_outputCount, "count", _outputCount,
|
||||
options.Description("Prints the count attribute in the output"))
|
||||
|
||||
options.BoolVar(&_outputTaxon, "taxon", _outputTaxon,
|
||||
options.Description("Prints the NCBI taxid and its related scientific name"))
|
||||
|
||||
options.BoolVar(&_obipairing, "obipairing", _obipairing,
|
||||
options.Description("Prints the attributes added by obipairing"))
|
||||
|
||||
options.StringSliceVar(&_keepOnly, "keep", 1, 1,
|
||||
options.Alias("k"),
|
||||
options.ArgName("KEY"),
|
||||
options.Description("Keeps only attribute with key <KEY>. Several -k options can be combined."))
|
||||
|
||||
options.StringVar(&_naValue, "na-value", _naValue,
|
||||
options.ArgName("NAVALUE"),
|
||||
options.Description("A string representing non available values in the CSV file."))
|
||||
}
|
||||
|
||||
func OptionSet(options *getoptions.GetOpt) {
|
||||
obiconvert.OutputModeOptionSet(options)
|
||||
CSVOptionSet(options)
|
||||
}
|
||||
|
||||
func CLIPrintId() bool {
|
||||
return _outputIds
|
||||
}
|
||||
|
||||
func CLIPrintSequence() bool {
|
||||
return _outputSequence
|
||||
}
|
||||
|
||||
func CLIPrintCount() bool {
|
||||
return _outputCount
|
||||
}
|
||||
func CLIPrintTaxon() bool {
|
||||
return _outputTaxon
|
||||
}
|
||||
func CLIPrintQuality() bool {
|
||||
return _outputQuality
|
||||
}
|
||||
|
||||
func CLIPrintDefinition() bool {
|
||||
return _outputDefinition
|
||||
}
|
||||
|
||||
func CLIAutoColumns() bool {
|
||||
return _autoColumns
|
||||
}
|
||||
|
||||
func CLIHasToBeKeptAttributes() bool {
|
||||
return len(_keepOnly) > 0
|
||||
}
|
||||
|
||||
func CLIToBeKeptAttributes() []string {
|
||||
if _obipairing {
|
||||
_keepOnly = append(_keepOnly, _softAttributes["obipairing"]...)
|
||||
}
|
||||
|
||||
if i := goutils.LookFor(_keepOnly, "count"); i >= 0 {
|
||||
_keepOnly = goutils.RemoveIndex(_keepOnly, i)
|
||||
_outputCount = true
|
||||
}
|
||||
|
||||
if i := goutils.LookFor(_keepOnly, "taxid"); i >= 0 {
|
||||
_keepOnly = goutils.RemoveIndex(_keepOnly, i)
|
||||
_outputTaxon = true
|
||||
}
|
||||
|
||||
if i := goutils.LookFor(_keepOnly, "scientific_name"); i >= 0 {
|
||||
_keepOnly = goutils.RemoveIndex(_keepOnly, i)
|
||||
_outputTaxon = true
|
||||
}
|
||||
|
||||
return _keepOnly
|
||||
}
|
||||
|
||||
func CLINAValue() string {
|
||||
return _naValue
|
||||
}
|
||||
@@ -31,7 +31,6 @@ func DistributeSequence(sequences obiiter.IBioSequence) {
|
||||
}
|
||||
|
||||
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers),
|
||||
obiformats.OptionsBufferSize(obioptions.CLIBufferSize()),
|
||||
obiformats.OptionsBatchSize(obioptions.CLIBatchSize()),
|
||||
obiformats.OptionsQualityShift(obiconvert.CLIOutputQualityShift()),
|
||||
obiformats.OptionsAppendFile(CLIAppendSequences()),
|
||||
|
||||
@@ -39,7 +39,6 @@ func CLIFilterSequence(iterator obiiter.IBioSequence) obiiter.IBioSequence {
|
||||
newIter = iterator.FilterOn(predicate,
|
||||
obioptions.CLIBatchSize(),
|
||||
obioptions.CLIParallelWorkers(),
|
||||
obioptions.CLIBufferSize(),
|
||||
)
|
||||
}
|
||||
} else {
|
||||
|
||||
@@ -20,7 +20,6 @@ func IExtractBarcode(iterator obiiter.IBioSequence) (obiiter.IBioSequence, error
|
||||
obingslibrary.OptionDiscardErrors(!CLIConservedErrors()),
|
||||
obingslibrary.OptionParallelWorkers(obioptions.CLIParallelWorkers()),
|
||||
obingslibrary.OptionBatchSize(obioptions.CLIBatchSize()),
|
||||
obingslibrary.OptionBufferSize(obioptions.CLIBufferSize()),
|
||||
)
|
||||
|
||||
ngsfilter, err := CLINGSFIlter()
|
||||
|
||||
@@ -211,17 +211,13 @@ func IAssemblePESequencesBatch(iterator obiiter.IBioSequence,
|
||||
}
|
||||
|
||||
nworkers := obioptions.CLIMaxCPU() * 3 / 2
|
||||
buffsize := iterator.BufferSize()
|
||||
|
||||
if len(sizes) > 0 {
|
||||
nworkers = sizes[0]
|
||||
}
|
||||
|
||||
if len(sizes) > 1 {
|
||||
buffsize = sizes[1]
|
||||
}
|
||||
|
||||
newIter := obiiter.MakeIBioSequence(buffsize)
|
||||
newIter := obiiter.MakeIBioSequence()
|
||||
|
||||
newIter.Add(nworkers)
|
||||
|
||||
|
||||
@@ -51,8 +51,6 @@ func Unique(sequences obiiter.IBioSequence) obiiter.IBioSequence {
|
||||
options = append(options,
|
||||
obichunk.OptionsParallelWorkers(
|
||||
obioptions.CLIParallelWorkers()),
|
||||
obichunk.OptionsBufferSize(
|
||||
obioptions.CLIBufferSize()),
|
||||
obichunk.OptionsBatchSize(
|
||||
obioptions.CLIBatchSize()),
|
||||
obichunk.OptionNAValue(CLINAValue()),
|
||||
|
||||
Reference in New Issue
Block a user