diff --git a/cmd/obitools/obicleandb/main.go b/cmd/obitools/obicleandb/main.go index 938e04d..c817fef 100644 --- a/cmd/obitools/obicleandb/main.go +++ b/cmd/obitools/obicleandb/main.go @@ -3,6 +3,7 @@ package main import ( "os" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obicleandb" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils" @@ -11,7 +12,7 @@ import ( ) func main() { - obioptions.SetBatchSize(10) + obidefault.SetBatchSize(10) optionParser := obioptions.GenerateOptionParser(obicleandb.OptionSet) diff --git a/cmd/obitools/obiconvert/main.go b/cmd/obitools/obiconvert/main.go index 84bb4bc..93cc9ff 100644 --- a/cmd/obitools/obiconvert/main.go +++ b/cmd/obitools/obiconvert/main.go @@ -3,6 +3,7 @@ package main import ( "os" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils" @@ -10,8 +11,8 @@ import ( ) func main() { - obioptions.SetStrictReadWorker(2) - obioptions.SetStrictWriteWorker(2) + obidefault.SetStrictReadWorker(2) + obidefault.SetStrictWriteWorker(2) optionParser := obioptions.GenerateOptionParser(obiconvert.OptionSet) diff --git a/cmd/obitools/obicount/main.go b/cmd/obitools/obicount/main.go index 2a6838f..1e1e1e7 100644 --- a/cmd/obitools/obicount/main.go +++ b/cmd/obitools/obicount/main.go @@ -4,6 +4,7 @@ import ( "fmt" "os" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obicount" @@ -33,7 +34,7 @@ func main() { _, args := optionParser(os.Args) - obioptions.SetStrictReadWorker(min(4, obioptions.CLIParallelWorkers())) + obidefault.SetStrictReadWorker(min(4, obidefault.ParallelWorkers())) fs, err := obiconvert.CLIReadBioSequences(args...) obiconvert.OpenSequenceDataErrorMessage(args, err) diff --git a/cmd/obitools/obidemerge/main.go b/cmd/obitools/obidemerge/main.go index 054c1e0..699887a 100644 --- a/cmd/obitools/obidemerge/main.go +++ b/cmd/obitools/obidemerge/main.go @@ -3,6 +3,7 @@ package main import ( "os" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obidemerge" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils" @@ -11,8 +12,8 @@ import ( ) func main() { - obioptions.SetStrictReadWorker(2) - obioptions.SetStrictWriteWorker(2) + obidefault.SetStrictReadWorker(2) + obidefault.SetStrictWriteWorker(2) optionParser := obioptions.GenerateOptionParser(obidemerge.OptionSet) diff --git a/cmd/obitools/obijoin/main.go b/cmd/obitools/obijoin/main.go index 69af504..cfe99c3 100644 --- a/cmd/obitools/obijoin/main.go +++ b/cmd/obitools/obijoin/main.go @@ -3,6 +3,7 @@ package main import ( "os" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obijoin" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils" @@ -11,8 +12,8 @@ import ( ) func main() { - obioptions.SetStrictReadWorker(2) - obioptions.SetStrictWriteWorker(2) + obidefault.SetStrictReadWorker(2) + obidefault.SetStrictWriteWorker(2) optionParser := obioptions.GenerateOptionParser(obijoin.OptionSet) diff --git a/cmd/obitools/obipairing/main.go b/cmd/obitools/obipairing/main.go index 1e1c3bf..7370f8b 100644 --- a/cmd/obitools/obipairing/main.go +++ b/cmd/obitools/obipairing/main.go @@ -5,6 +5,7 @@ import ( log "github.com/sirupsen/logrus" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obipairing" @@ -33,8 +34,8 @@ func main() { optionParser(os.Args) - obioptions.SetStrictReadWorker(2) - obioptions.SetStrictWriteWorker(2) + obidefault.SetStrictReadWorker(2) + obidefault.SetStrictWriteWorker(2) pairs, err := obipairing.CLIPairedSequence() if err != nil { @@ -51,7 +52,7 @@ func main() { obipairing.CLIFastMode(), obipairing.CLIFastRelativeScore(), obipairing.CLIWithStats(), - obioptions.CLIParallelWorkers(), + obidefault.ParallelWorkers(), ) obiconvert.CLIWriteBioSequences(paired, true) diff --git a/cmd/obitools/obipcr/main.go b/cmd/obitools/obipcr/main.go index f439fcd..15d863b 100644 --- a/cmd/obitools/obipcr/main.go +++ b/cmd/obitools/obipcr/main.go @@ -3,6 +3,7 @@ package main import ( "os" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obipcr" @@ -23,10 +24,10 @@ func main() { // trace.Start(ftrace) // defer trace.Stop() - obioptions.SetWorkerPerCore(2) - obioptions.SetReadWorkerPerCore(0.5) - obioptions.SetParallelFilesRead(obioptions.CLIParallelWorkers() / 4) - obioptions.SetBatchSize(10) + obidefault.SetWorkerPerCore(2) + obidefault.SetReadWorkerPerCore(0.5) + obidefault.SetParallelFilesRead(obidefault.ParallelWorkers() / 4) + obidefault.SetBatchSize(10) optionParser := obioptions.GenerateOptionParser(obipcr.OptionSet) diff --git a/cmd/obitools/obitag/main.go b/cmd/obitools/obitag/main.go index 3b76ee2..0380429 100644 --- a/cmd/obitools/obitag/main.go +++ b/cmd/obitools/obitag/main.go @@ -6,6 +6,7 @@ import ( log "github.com/sirupsen/logrus" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert" @@ -33,10 +34,10 @@ func main() { // trace.Start(ftrace) // defer trace.Stop() - obioptions.SetWorkerPerCore(2) - obioptions.SetStrictReadWorker(1) - obioptions.SetStrictWriteWorker(1) - obioptions.SetBatchSize(10) + obidefault.SetWorkerPerCore(2) + obidefault.SetStrictReadWorker(1) + obidefault.SetStrictWriteWorker(1) + obidefault.SetBatchSize(10) optionParser := obioptions.GenerateOptionParser(obitag.OptionSet) diff --git a/cmd/obitools/obitagpcr/main.go b/cmd/obitools/obitagpcr/main.go index bc2e096..64a88e2 100644 --- a/cmd/obitools/obitagpcr/main.go +++ b/cmd/obitools/obitagpcr/main.go @@ -5,6 +5,7 @@ import ( log "github.com/sirupsen/logrus" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obipairing" @@ -30,7 +31,7 @@ func main() { // trace.Start(ftrace) // defer trace.Stop() - obioptions.SetWorkerPerCore(1) + obidefault.SetWorkerPerCore(1) optionParser := obioptions.GenerateOptionParser(obitagpcr.OptionSet) diff --git a/cmd/obitools/obiuniq/main.go b/cmd/obitools/obiuniq/main.go index 9b18c46..8e9926d 100644 --- a/cmd/obitools/obiuniq/main.go +++ b/cmd/obitools/obiuniq/main.go @@ -31,7 +31,7 @@ func main() { // trace.Start(ftrace) // defer trace.Stop() - obioptions.SetBatchSize(10) + obidefault.SetBatchSize(10) obidefault.SetReadQualities(false) optionParser := obioptions.GenerateOptionParser(obiuniq.OptionSet) diff --git a/cmd/test/main.go b/cmd/test/main.go index 2e07616..8a66f0c 100644 --- a/cmd/test/main.go +++ b/cmd/test/main.go @@ -3,13 +3,13 @@ package main import ( "os" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitaxformat" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils" ) func main() { - obitaxformat.DetectTaxonomyFormat(os.Args[1]) + obitax.DetectTaxonomyFormat(os.Args[1]) println(obiutils.RemoveAllExt("toto/tutu/test.txt")) println(obiutils.Basename("toto/tutu/test.txt")) diff --git a/pkg/obiapat/pcr.go b/pkg/obiapat/pcr.go index 989fa2e..d57a984 100644 --- a/pkg/obiapat/pcr.go +++ b/pkg/obiapat/pcr.go @@ -3,7 +3,7 @@ package obiapat import ( log "github.com/sirupsen/logrus" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils" ) @@ -104,7 +104,7 @@ func MakeOptions(setters []WithOption) Options { extension: -1, fullExtension: false, circular: false, - parallelWorkers: obioptions.CLIParallelWorkers(), + parallelWorkers: obidefault.ParallelWorkers(), batchSize: 100, forward: NilApatPattern, cfwd: NilApatPattern, diff --git a/pkg/obichunk/options.go b/pkg/obichunk/options.go index 7eb0114..f54909b 100644 --- a/pkg/obichunk/options.go +++ b/pkg/obichunk/options.go @@ -1,7 +1,7 @@ package obichunk import ( - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" ) @@ -29,8 +29,8 @@ func MakeOptions(setters []WithOption) Options { navalue: "NA", cacheOnDisk: false, batchCount: 100, - batchSize: obioptions.CLIBatchSize(), - parallelWorkers: obioptions.CLIParallelWorkers(), + batchSize: obidefault.BatchSize(), + parallelWorkers: obidefault.ParallelWorkers(), noSingleton: false, } diff --git a/pkg/obichunk/subchunks.go b/pkg/obichunk/subchunks.go index 4a798f6..2b41809 100644 --- a/pkg/obichunk/subchunks.go +++ b/pkg/obichunk/subchunks.go @@ -6,8 +6,8 @@ import ( log "github.com/sirupsen/logrus" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" ) @@ -62,7 +62,7 @@ func ISequenceSubChunk(iterator obiiter.IBioSequence, nworkers int) (obiiter.IBioSequence, error) { if nworkers <= 0 { - nworkers = obioptions.CLIParallelWorkers() + nworkers = obidefault.ParallelWorkers() } newIter := obiiter.MakeIBioSequence() diff --git a/pkg/obidefault/batch.go b/pkg/obidefault/batch.go new file mode 100644 index 0000000..5a128fa --- /dev/null +++ b/pkg/obidefault/batch.go @@ -0,0 +1,26 @@ +package obidefault + +var _BatchSize = 2000 + +// SetBatchSize sets the size of the sequence batches. +// +// n - an integer representing the size of the sequence batches. +func SetBatchSize(n int) { + _BatchSize = n +} + +// CLIBatchSize returns the expected size of the sequence batches. +// +// In Obitools, the sequences are processed in parallel by batches. +// The number of sequence in each batch is determined by the command line option +// --batch-size and the environment variable OBIBATCHSIZE. +// +// No parameters. +// Returns an integer value. +func BatchSize() int { + return _BatchSize +} + +func BatchSizePtr() *int { + return &_BatchSize +} diff --git a/pkg/obidefault/compressed.go b/pkg/obidefault/compressed.go new file mode 100644 index 0000000..76c398f --- /dev/null +++ b/pkg/obidefault/compressed.go @@ -0,0 +1,15 @@ +package obidefault + +var __compressed__ = false + +func CompressOutput() bool { + return __compressed__ +} + +func SetCompressOutput(b bool) { + __compressed__ = b +} + +func CompressedPtr() *bool { + return &__compressed__ +} diff --git a/pkg/obidefault/taxonomy.go b/pkg/obidefault/taxonomy.go new file mode 100644 index 0000000..eac3ec6 --- /dev/null +++ b/pkg/obidefault/taxonomy.go @@ -0,0 +1,32 @@ +package obidefault + +var __taxonomy__ = "" +var __alternative_name__ = false + +func SelectedTaxonomy() string { + return __taxonomy__ +} + +func HasSelectedTaxonomy() bool { + return __taxonomy__ != "" +} + +func AreAlternativeNamesSelected() bool { + return __alternative_name__ +} + +func SelectedTaxonomyPtr() *string { + return &__taxonomy__ +} + +func AlternativeNamesSelectedPtr() *bool { + return &__alternative_name__ +} + +func SetSelectedTaxonomy(taxonomy string) { + __taxonomy__ = taxonomy +} + +func SetAlternativeNamesSelected(alt bool) { + __alternative_name__ = alt +} diff --git a/pkg/obidefault/workers.go b/pkg/obidefault/workers.go new file mode 100644 index 0000000..8dc8bff --- /dev/null +++ b/pkg/obidefault/workers.go @@ -0,0 +1,170 @@ +package obidefault + +import "runtime" + +var _MaxAllowedCPU = runtime.NumCPU() +var _WorkerPerCore = 1.0 + +var _ReadWorkerPerCore = 0.25 +var _WriteWorkerPerCore = 0.25 + +var _StrictReadWorker = 0 +var _StrictWriteWorker = 0 + +var _ParallelFilesRead = 0 + +// CLIParallelWorkers returns the number of parallel workers used for +// computing the result. +// +// The number of parallel workers is determined by the command line option +// --max-cpu|-m and the environment variable OBIMAXCPU. This number is +// multiplied by the variable _WorkerPerCore. +// +// No parameters. +// Returns an integer representing the number of parallel workers. +func ParallelWorkers() int { + return int(float64(MaxCPU()) * float64(WorkerPerCore())) +} + +// CLIMaxCPU returns the maximum number of CPU cores allowed. +// +// The maximum number of CPU cores is determined by the command line option +// --max-cpu|-m and the environment variable OBIMAXCPU. +// +// No parameters. +// Returns an integer representing the maximum number of CPU cores allowed. +func MaxCPU() int { + return _MaxAllowedCPU +} + +func MaxCPUPtr() *int { + return &_MaxAllowedCPU +} + +// WorkerPerCore returns the number of workers per CPU core. +// +// No parameters. +// Returns a float64 representing the number of workers per CPU core. +func WorkerPerCore() float64 { + return _WorkerPerCore +} + +// SetWorkerPerCore sets the number of workers per CPU core. +// +// It takes a float64 parameter representing the number of workers +// per CPU core and does not return any value. +func SetWorkerPerCore(n float64) { + _WorkerPerCore = n +} + +// SetMaxCPU sets the maximum number of CPU cores allowed. +// +// n - an integer representing the new maximum number of CPU cores. +func SetMaxCPU(n int) { + _MaxAllowedCPU = n +} + +// SetReadWorker sets the number of workers for reading files. +// +// The number of worker dedicated to reading files is determined +// as the number of allowed CPU cores multiplied by number of read workers per core. +// Setting the number of read workers using this function allows to decouple the number +// of read workers from the number of CPU cores. +// +// n - an integer representing the number of workers to be set. +func SetStrictReadWorker(n int) { + _StrictReadWorker = n +} + +func SetStrictWriteWorker(n int) { + _StrictWriteWorker = n +} + +// SetReadWorkerPerCore sets the number of worker per CPU +// core for reading files. +// +// n float64 +func SetReadWorkerPerCore(n float64) { + _ReadWorkerPerCore = n +} + +func SetWriteWorkerPerCore(n float64) { + _WriteWorkerPerCore = n +} + +// ReadWorker returns the number of workers for reading files. +// +// No parameters. +// Returns an integer representing the number of workers. +func StrictReadWorker() int { + return _StrictReadWorker +} + +func StrictWriteWorker() int { + return _StrictWriteWorker +} + +// CLIReadParallelWorkers returns the number of parallel workers used for +// reading files. +// +// The number of parallel workers is determined by the command line option +// --max-cpu|-m and the environment variable OBIMAXCPU. This number is +// multiplied by the variable _ReadWorkerPerCore. +// +// No parameters. +// Returns an integer representing the number of parallel workers. +func ReadParallelWorkers() int { + if StrictReadWorker() == 0 { + n := int(float64(MaxCPU()) * ReadWorkerPerCore()) + if n == 0 { + n = 1 + } + return n + } else { + return StrictReadWorker() + } +} + +func WriteParallelWorkers() int { + if StrictWriteWorker() == 0 { + n := int(float64(MaxCPU()) * WriteWorkerPerCore()) + if n == 0 { + n = 1 + } + return n + } else { + return StrictReadWorker() + } +} + +// ReadWorkerPerCore returns the number of worker per CPU core for +// computing the result. +// +// No parameters. +// Returns a float64 representing the number of worker per CPU core. +func ReadWorkerPerCore() float64 { + return _ReadWorkerPerCore +} + +func WriteWorkerPerCore() float64 { + return _ReadWorkerPerCore +} + +// ParallelFilesRead returns the number of files to be read in parallel. +// +// No parameters. +// Returns an integer representing the number of files to be read. +func ParallelFilesRead() int { + if _ParallelFilesRead == 0 { + return ReadParallelWorkers() + } else { + return _ParallelFilesRead + } +} + +// SetParallelFilesRead sets the number of files to be read in parallel. +// +// n - an integer representing the number of files to be set. +func SetParallelFilesRead(n int) { + _ParallelFilesRead = n +} diff --git a/pkg/obiformats/options.go b/pkg/obiformats/options.go index f478b2f..2068b75 100644 --- a/pkg/obiformats/options.go +++ b/pkg/obiformats/options.go @@ -1,7 +1,7 @@ package obiformats import ( - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" ) @@ -49,8 +49,8 @@ func MakeOptions(setters []WithOption) Options { seqBatchFormater: nil, with_progress_bar: false, buffer_size: 2, - parallel_workers: obioptions.CLIReadParallelWorkers(), - batch_size: obioptions.CLIBatchSize(), + parallel_workers: obidefault.ReadParallelWorkers(), + batch_size: obidefault.BatchSize(), total_seq_size: 1024 * 1024 * 100, // 100 MB by default no_order: false, full_file_batch: false, diff --git a/pkg/obiiter/batchiterator.go b/pkg/obiiter/batchiterator.go index 95fc044..76b6ab5 100644 --- a/pkg/obiiter/batchiterator.go +++ b/pkg/obiiter/batchiterator.go @@ -10,7 +10,7 @@ import ( log "github.com/sirupsen/logrus" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils" "github.com/tevino/abool/v2" @@ -591,7 +591,7 @@ func (iterator IBioSequence) DivideOn(predicate obiseq.SequencePredicate, // A function that takes a predicate and a batch of sequences and returns a filtered batch of sequences. func (iterator IBioSequence) FilterOn(predicate obiseq.SequencePredicate, size int, sizes ...int) IBioSequence { - nworkers := obioptions.CLIReadParallelWorkers() + nworkers := obidefault.ReadParallelWorkers() if len(sizes) > 0 { nworkers = sizes[0] @@ -643,7 +643,7 @@ func (iterator IBioSequence) FilterOn(predicate obiseq.SequencePredicate, func (iterator IBioSequence) FilterAnd(predicate obiseq.SequencePredicate, size int, sizes ...int) IBioSequence { - nworkers := obioptions.CLIReadParallelWorkers() + nworkers := obidefault.ReadParallelWorkers() if len(sizes) > 0 { nworkers = sizes[0] diff --git a/pkg/obitools/obicsv/iter.go b/pkg/obiiter/csv.go similarity index 99% rename from pkg/obitools/obicsv/iter.go rename to pkg/obiiter/csv.go index 5d994d4..9a1e8d2 100644 --- a/pkg/obitools/obicsv/iter.go +++ b/pkg/obiiter/csv.go @@ -1,4 +1,4 @@ -package obicsv +package obiiter import ( "fmt" diff --git a/pkg/obiiter/distribute.go b/pkg/obiiter/distribute.go index 9015b90..381f3bd 100644 --- a/pkg/obiiter/distribute.go +++ b/pkg/obiiter/distribute.go @@ -4,7 +4,7 @@ import ( "fmt" "sync" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" ) @@ -74,7 +74,7 @@ func (dist *IDistribute) Classifier() *obiseq.BioSequenceClassifier { // It ensures that the outputs are closed and cleaned up once // processing is complete. func (iterator IBioSequence) Distribute(class *obiseq.BioSequenceClassifier, sizes ...int) IDistribute { - batchsize := obioptions.CLIBatchSize() + batchsize := obidefault.BatchSize() outputs := make(map[int]IBioSequence, 100) slices := make(map[int]*obiseq.BioSequenceSlice, 100) diff --git a/pkg/obiiter/paired.go b/pkg/obiiter/paired.go index 5fc2d12..5655379 100644 --- a/pkg/obiiter/paired.go +++ b/pkg/obiiter/paired.go @@ -1,7 +1,7 @@ package obiiter import ( - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" log "github.com/sirupsen/logrus" ) @@ -41,8 +41,8 @@ func (iter IBioSequence) PairTo(p IBioSequence) IBioSequence { newIter := MakeIBioSequence() - iter = iter.SortBatches().Rebatch(obioptions.CLIBatchSize()) - p = p.SortBatches().Rebatch(obioptions.CLIBatchSize()) + iter = iter.SortBatches().Rebatch(obidefault.BatchSize()) + p = p.SortBatches().Rebatch(obidefault.BatchSize()) newIter.Add(1) diff --git a/pkg/obiiter/workers.go b/pkg/obiiter/workers.go index b403584..6320737 100644 --- a/pkg/obiiter/workers.go +++ b/pkg/obiiter/workers.go @@ -3,7 +3,7 @@ package obiiter import ( log "github.com/sirupsen/logrus" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" ) @@ -18,7 +18,7 @@ import ( func (iterator IBioSequence) MakeIWorker(worker obiseq.SeqWorker, breakOnError bool, sizes ...int) IBioSequence { - nworkers := obioptions.CLIParallelWorkers() + nworkers := obidefault.ParallelWorkers() if len(sizes) > 0 { nworkers = sizes[0] @@ -34,13 +34,13 @@ func (iterator IBioSequence) MakeIWorker(worker obiseq.SeqWorker, // Parameters: // - predicate: A function that takes a sequence and returns a boolean value indicating whether the sequence satisfies a certain condition. // - worker: A function that takes a sequence and returns a modified version of the sequence. -// - sizes: Optional. One or more integers representing the number of workers to be used for parallel processing. If not provided, the number of workers will be determined by the obioptions.CLIReadParallelWorkers() function. +// - sizes: Optional. One or more integers representing the number of workers to be used for parallel processing. If not provided, the number of workers will be determined by the obidefault.ReadParallelWorkers() function. // // Return: // - newIter: A new IBioSequence iterator with the modified sequences. func (iterator IBioSequence) MakeIConditionalWorker(predicate obiseq.SequencePredicate, worker obiseq.SeqWorker, breakOnError bool, sizes ...int) IBioSequence { - nworkers := obioptions.CLIReadParallelWorkers() + nworkers := obidefault.ReadParallelWorkers() if len(sizes) > 0 { nworkers = sizes[0] @@ -63,7 +63,7 @@ func (iterator IBioSequence) MakeIConditionalWorker(predicate obiseq.SequencePre // // The function returns a new IBioSequence containing the modified slices. func (iterator IBioSequence) MakeISliceWorker(worker obiseq.SeqSliceWorker, breakOnError bool, sizes ...int) IBioSequence { - nworkers := obioptions.CLIParallelWorkers() + nworkers := obidefault.ParallelWorkers() if len(sizes) > 0 { nworkers = sizes[0] diff --git a/pkg/obilua/lua.go b/pkg/obilua/lua.go index 20ff121..f39d1fa 100644 --- a/pkg/obilua/lua.go +++ b/pkg/obilua/lua.go @@ -6,8 +6,8 @@ import ( "os" "reflect" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" log "github.com/sirupsen/logrus" lua "github.com/yuin/gopher-lua" @@ -154,7 +154,7 @@ func LuaProcessor(iterator obiiter.IBioSequence, name, program string, breakOnEr newIter := obiiter.MakeIBioSequence() if nworkers <= 0 { - nworkers = obioptions.CLIParallelWorkers() + nworkers = obidefault.ParallelWorkers() } newIter.Add(nworkers) diff --git a/pkg/obingslibrary/worker.go b/pkg/obingslibrary/worker.go index 39a9993..2cfe391 100644 --- a/pkg/obingslibrary/worker.go +++ b/pkg/obingslibrary/worker.go @@ -1,7 +1,7 @@ package obingslibrary import ( - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" ) @@ -126,8 +126,8 @@ func MakeOptions(setters []WithOption) Options { allowedMismatch: 0, allowsIndel: false, withProgressBar: false, - parallelWorkers: obioptions.CLIParallelWorkers(), - batchSize: obioptions.CLIBatchSize(), + parallelWorkers: obidefault.ParallelWorkers(), + batchSize: obidefault.BatchSize(), } opt := Options{&o} diff --git a/pkg/obioptions/options.go b/pkg/obioptions/options.go index 674b86c..50d8824 100644 --- a/pkg/obioptions/options.go +++ b/pkg/obioptions/options.go @@ -1,14 +1,11 @@ package obioptions import ( - "errors" "fmt" "os" "runtime" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitaxformat" log "github.com/sirupsen/logrus" "github.com/DavidGamba/go-getoptions" @@ -18,21 +15,11 @@ import ( ) var _Debug = false -var _WorkerPerCore = 1.0 -var _ReadWorkerPerCore = 0.25 -var _WriteWorkerPerCore = 0.25 -var _StrictReadWorker = 0 -var _StrictWriteWorker = 0 -var _ParallelFilesRead = 0 -var _MaxAllowedCPU = runtime.NumCPU() var _BatchSize = 2000 var _Pprof = false var _PprofMudex = 10 var _PprofGoroutine = 6060 -var __taxonomy__ = "" -var __alternative_name__ = false - type ArgumentParser func([]string) (*getoptions.GetOpt, []string) func GenerateOptionParser(optionset ...func(*getoptions.GetOpt)) ArgumentParser { @@ -56,7 +43,7 @@ func GenerateOptionParser(optionset ...func(*getoptions.GetOpt)) ArgumentParser // options.Alias("w"), // options.Description("Number of parallele threads computing the result")) - options.IntVar(&_MaxAllowedCPU, "max-cpu", _MaxAllowedCPU, + options.IntVar(obidefault.MaxCPUPtr(), "max-cpu", obidefault.MaxCPU(), options.GetEnv("OBIMAXCPU"), options.Description("Number of parallele threads computing the result")) @@ -71,7 +58,7 @@ func GenerateOptionParser(optionset ...func(*getoptions.GetOpt)) ArgumentParser options.GetEnv("OBIPPROFGOROUTINE"), options.Description("Enable profiling of goroutine blocking profile.")) - options.IntVar(&_BatchSize, "batch-size", _BatchSize, + options.IntVar(obidefault.BatchSizePtr(), "batch-size", obidefault.BatchSize(), options.GetEnv("OBIBATCHSIZE"), options.Description("Number of sequence per batch for paralelle processing")) @@ -79,6 +66,10 @@ func GenerateOptionParser(optionset ...func(*getoptions.GetOpt)) ArgumentParser options.GetEnv("OBISOLEXA"), options.Description("Decodes quality string according to the Solexa specification.")) + options.BoolVar(obidefault.CompressedPtr(), "compressed", obidefault.CompressOutput(), + options.Alias("Z"), + options.Description("Compress all the result using gzip")) + for _, o := range optionset { o(options) } @@ -129,14 +120,6 @@ func GenerateOptionParser(optionset ...func(*getoptions.GetOpt)) ArgumentParser log.Info(" go tool pprof -http=127.0.0.1:8080 'http://localhost:6060/debug/pprof/block'") } - if options.Called("taxonomy") { - taxonomy, err := obitaxformat.LoadTaxonomy(CLISelectedTaxonomy(), - !CLIAreAlternativeNamesSelected()) - if err != nil { - log.Fatalf("Loading taxonomy error: %v", err) - } - taxonomy.SetAsDefault() - } // Handle user errors if err != nil { fmt.Fprintf(os.Stderr, "ERROR: %s\n\n", err) @@ -145,30 +128,30 @@ func GenerateOptionParser(optionset ...func(*getoptions.GetOpt)) ArgumentParser } // Setup the maximum number of CPU usable by the program - if _MaxAllowedCPU == 1 { + if obidefault.MaxCPU() == 1 { log.Warn("Limitating the Maximum number of CPU to 1 is not recommanded") log.Warn("The number of CPU requested has been set to 2") - SetMaxCPU(2) + obidefault.SetMaxCPU(2) } if options.Called("force-one-cpu") { log.Warn("Limitating the Maximum number of CPU to 1 is not recommanded") log.Warn("The number of CPU has been forced to 1") log.Warn("This can lead to unexpected behavior") - SetMaxCPU(1) + obidefault.SetMaxCPU(1) } - runtime.GOMAXPROCS(_MaxAllowedCPU) + runtime.GOMAXPROCS(obidefault.MaxCPU()) if options.Called("max-cpu") || options.Called("force-one-cpu") { - log.Printf("CPU number limited to %d", _MaxAllowedCPU) + log.Printf("CPU number limited to %d", obidefault.MaxCPU()) } if options.Called("no-singleton") { log.Printf("No singleton option set") } - log.Printf("Number of workers set %d", CLIParallelWorkers()) + log.Printf("Number of workers set %d", obidefault.ParallelWorkers()) // if options.Called("workers") { @@ -184,17 +167,17 @@ func GenerateOptionParser(optionset ...func(*getoptions.GetOpt)) ArgumentParser func LoadTaxonomyOptionSet(options *getoptions.GetOpt, required, alternatiive bool) { if required { - options.StringVar(&__taxonomy__, "taxonomy", "", + options.StringVar(obidefault.SelectedTaxonomyPtr(), "taxonomy", obidefault.SelectedTaxonomy(), options.Alias("t"), options.Required(), options.Description("Path to the taxonomy database.")) } else { - options.StringVar(&__taxonomy__, "taxonomy", "", + options.StringVar(obidefault.SelectedTaxonomyPtr(), "taxonomy", obidefault.SelectedTaxonomy(), options.Alias("t"), options.Description("Path to the taxonomy database.")) } if alternatiive { - options.BoolVar(&__alternative_name__, "alternative-names", false, + options.BoolVar(obidefault.AlternativeNamesSelectedPtr(), "alternative-names", obidefault.AreAlternativeNamesSelected(), options.Alias("a"), options.Description("Enable the search on all alternative names and not only scientific names.")) } @@ -212,84 +195,6 @@ func CLIIsDebugMode() bool { return _Debug } -// CLIParallelWorkers returns the number of parallel workers used for -// computing the result. -// -// The number of parallel workers is determined by the command line option -// --max-cpu|-m and the environment variable OBIMAXCPU. This number is -// multiplied by the variable _WorkerPerCore. -// -// No parameters. -// Returns an integer representing the number of parallel workers. -func CLIParallelWorkers() int { - return int(float64(CLIMaxCPU()) * float64(WorkerPerCore())) -} - -// CLIReadParallelWorkers returns the number of parallel workers used for -// reading files. -// -// The number of parallel workers is determined by the command line option -// --max-cpu|-m and the environment variable OBIMAXCPU. This number is -// multiplied by the variable _ReadWorkerPerCore. -// -// No parameters. -// Returns an integer representing the number of parallel workers. -func CLIReadParallelWorkers() int { - if StrictReadWorker() == 0 { - n := int(float64(CLIMaxCPU()) * ReadWorkerPerCore()) - if n == 0 { - n = 1 - } - return n - } else { - return StrictReadWorker() - } -} - -// CLIWriteParallelWorkers returns the number of parallel workers used for -// writing files. -// -// The number of parallel workers is determined by the command line option -// --max-cpu|-m and the environment variable OBIMAXCPU. This number is -// multiplied by the variable _WriteWorkerPerCore. -// -// No parameters. -// Returns an integer representing the number of parallel workers. -func CLIWriteParallelWorkers() int { - if StrictWriteWorker() == 0 { - n := int(float64(CLIMaxCPU()) * WriteWorkerPerCore()) - if n == 0 { - n = 1 - } - return n - } else { - return StrictWriteWorker() - } -} - -// CLIMaxCPU returns the maximum number of CPU cores allowed. -// -// The maximum number of CPU cores is determined by the command line option -// --max-cpu|-m and the environment variable OBIMAXCPU. -// -// No parameters. -// Returns an integer representing the maximum number of CPU cores allowed. -func CLIMaxCPU() int { - return _MaxAllowedCPU -} - -// CLIBatchSize returns the expected size of the sequence batches. -// -// In Obitools, the sequences are processed in parallel by batches. -// The number of sequence in each batch is determined by the command line option -// --batch-size and the environment variable OBIBATCHSIZE. -// -// No parameters. -// Returns an integer value. -func CLIBatchSize() int { - return _BatchSize -} - // SetDebugOn sets the debug mode on. func SetDebugOn() { _Debug = true @@ -299,148 +204,3 @@ func SetDebugOn() { func SetDebugOff() { _Debug = false } - -// SetWorkerPerCore sets the number of workers per CPU core. -// -// It takes a float64 parameter representing the number of workers -// per CPU core and does not return any value. -func SetWorkerPerCore(n float64) { - _WorkerPerCore = n -} - -// SetReadWorkerPerCore sets the number of worker per CPU -// core for reading files. -// -// n float64 -func SetReadWorkerPerCore(n float64) { - _ReadWorkerPerCore = n -} - -// WorkerPerCore returns the number of workers per CPU core. -// -// No parameters. -// Returns a float64 representing the number of workers per CPU core. -func WorkerPerCore() float64 { - return _WorkerPerCore -} - -// ReadWorkerPerCore returns the number of worker per CPU core for -// computing the result. -// -// No parameters. -// Returns a float64 representing the number of worker per CPU core. -func ReadWorkerPerCore() float64 { - return _ReadWorkerPerCore -} - -// WriteWorkerPerCore returns the number of worker per CPU core for -// computing the result. -// -// No parameters. -// Returns a float64 representing the number of worker per CPU core. -func WriteWorkerPerCore() float64 { - return _WriteWorkerPerCore -} - -// SetBatchSize sets the size of the sequence batches. -// -// n - an integer representing the size of the sequence batches. -func SetBatchSize(n int) { - _BatchSize = n -} - -// SetMaxCPU sets the maximum number of CPU cores allowed. -// -// n - an integer representing the new maximum number of CPU cores. -func SetMaxCPU(n int) { - _MaxAllowedCPU = n -} - -// SetReadWorker sets the number of workers for reading files. -// -// The number of worker dedicated to reading files is determined -// as the number of allowed CPU cores multiplied by number of read workers per core. -// Setting the number of read workers using this function allows to decouple the number -// of read workers from the number of CPU cores. -// -// n - an integer representing the number of workers to be set. -func SetStrictReadWorker(n int) { - _StrictReadWorker = n -} - -// ReadWorker returns the number of workers for reading files. -// -// No parameters. -// Returns an integer representing the number of workers. -func StrictReadWorker() int { - return _StrictReadWorker -} - -// SetWriteWorker sets the number of workers for writing files. -// -// The number of worker dedicated to writing files is determined -// as the number of allowed CPU cores multiplied by number of write workers per core. -// Setting the number of write workers using this function allows to decouple the number -// of write workers from the number of CPU cores. -// -// n - an integer representing the number of workers to be set. -func SetStrictWriteWorker(n int) { - _StrictWriteWorker = n -} - -// WriteWorker returns the number of workers for writing files. -// -// No parameters. -// Returns an integer representing the number of workers. -func StrictWriteWorker() int { - return _StrictWriteWorker -} - -// ParallelFilesRead returns the number of files to be read in parallel. -// -// No parameters. -// Returns an integer representing the number of files to be read. -func ParallelFilesRead() int { - if _ParallelFilesRead == 0 { - return CLIReadParallelWorkers() - } else { - return _ParallelFilesRead - } -} - -// SetParallelFilesRead sets the number of files to be read in parallel. -// -// n - an integer representing the number of files to be set. -func SetParallelFilesRead(n int) { - _ParallelFilesRead = n -} - -func CLISelectedTaxonomy() string { - return __taxonomy__ -} - -func CLIHasSelectedTaxonomy() bool { - return __taxonomy__ != "" -} - -func CLIAreAlternativeNamesSelected() bool { - return __alternative_name__ -} - -func CLILoadSelectedTaxonomy() (*obitax.Taxonomy, error) { - if obitax.IsDefaultTaxonomyDefined() { - return obitax.DefaultTaxonomy(), nil - } - - if CLISelectedTaxonomy() != "" { - taxonomy, err := obitaxformat.LoadTaxonomy(CLISelectedTaxonomy(), - !CLIAreAlternativeNamesSelected()) - if err != nil { - return nil, err - } - taxonomy.SetAsDefault() - return taxonomy, nil - } - - return nil, errors.New("no taxonomy selected using option -t|--taxonomy") -} diff --git a/pkg/obioptions/version.go b/pkg/obioptions/version.go index ecd3e0b..4c06358 100644 --- a/pkg/obioptions/version.go +++ b/pkg/obioptions/version.go @@ -8,7 +8,7 @@ import ( // corresponds to the last commit, and not the one when the file will be // commited -var _Commit = "9acb4a8" +var _Commit = "0a567f6" var _Version = "Release 4.2.0" // Version returns the version of the obitools package. diff --git a/pkg/obitaxformat/csvtaxdump/read.go b/pkg/obitax/csvtaxdump_read.go similarity index 89% rename from pkg/obitaxformat/csvtaxdump/read.go rename to pkg/obitax/csvtaxdump_read.go index 51c36d0..adb70d6 100644 --- a/pkg/obitaxformat/csvtaxdump/read.go +++ b/pkg/obitax/csvtaxdump_read.go @@ -1,16 +1,15 @@ -package csvtaxdump +package obitax import ( "encoding/csv" "errors" "strings" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils" log "github.com/sirupsen/logrus" ) -func LoadCSVTaxonomy(path string, onlysn bool) (*obitax.Taxonomy, error) { +func LoadCSVTaxonomy(path string, onlysn bool) (*Taxonomy, error) { file, err := obiutils.Ropen(path) @@ -71,7 +70,7 @@ func LoadCSVTaxonomy(path string, onlysn bool) (*obitax.Taxonomy, error) { name := obiutils.RemoveAllExt(path) short := obiutils.Basename(path) - taxonomy := obitax.NewTaxonomy(name, short, obiutils.AsciiAlphaNumSet) + taxonomy := NewTaxonomy(name, short, obiutils.AsciiAlphaNumSet) line, err := csvfile.Read() diff --git a/pkg/obitaxformat/csvtaxdump/write.go b/pkg/obitax/csvtaxdump_write.go similarity index 50% rename from pkg/obitaxformat/csvtaxdump/write.go rename to pkg/obitax/csvtaxdump_write.go index 82c9b1b..a6f14b1 100644 --- a/pkg/obitaxformat/csvtaxdump/write.go +++ b/pkg/obitax/csvtaxdump_write.go @@ -1,12 +1,11 @@ -package csvtaxdump +package obitax // import ( // "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" -// "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obicsv" // ) // func WriteTaxonomyCSV(iterator obiiter.IBioSequence, -// terminalAction bool, filenames ...string) *obicsv.ICSVRecord { +// terminalAction bool, filenames ...string) *obiiter.ICSVRecord { // return nil // } diff --git a/pkg/obitax/default_taxonomy.go b/pkg/obitax/default_taxonomy.go index 9a10ac1..df7d886 100644 --- a/pkg/obitax/default_taxonomy.go +++ b/pkg/obitax/default_taxonomy.go @@ -1,6 +1,9 @@ package obitax -import log "github.com/sirupsen/logrus" +import ( + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" + log "github.com/sirupsen/logrus" +) var __defaut_taxonomy__ *Taxonomy @@ -26,5 +29,20 @@ func IsDefaultTaxonomyDefined() bool { } func DefaultTaxonomy() *Taxonomy { + var err error + if __defaut_taxonomy__ == nil { + if obidefault.HasSelectedTaxonomy() { + __defaut_taxonomy__, err = LoadTaxonomy( + obidefault.SelectedTaxonomy(), + !obidefault.AreAlternativeNamesSelected(), + ) + + if err != nil { + log.Fatalf("Cannot load default taxonomy: %v", err) + + } + } + } + return __defaut_taxonomy__ } diff --git a/pkg/obitaxformat/ncbitaxdump/read.go b/pkg/obitax/ncbitaxdump_read.go similarity index 92% rename from pkg/obitaxformat/ncbitaxdump/read.go rename to pkg/obitax/ncbitaxdump_read.go index 3ba0bea..f359ea4 100644 --- a/pkg/obitaxformat/ncbitaxdump/read.go +++ b/pkg/obitax/ncbitaxdump_read.go @@ -1,4 +1,4 @@ -package ncbitaxdump +package obitax import ( "bufio" @@ -11,7 +11,6 @@ import ( log "github.com/sirupsen/logrus" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils" ) @@ -28,7 +27,7 @@ import ( // The function reads each record from the input, trims whitespace from the taxid, parent, and rank, // and adds the taxon to the taxonomy. If an error occurs while adding a taxon, the function logs // a fatal error and terminates the program. -func loadNodeTable(reader io.Reader, taxonomy *obitax.Taxonomy) { +func loadNodeTable(reader io.Reader, taxonomy *Taxonomy) { file := csv.NewReader(reader) file.Comma = '|' file.Comment = '#' @@ -66,7 +65,7 @@ func loadNodeTable(reader io.Reader, taxonomy *obitax.Taxonomy) { // The number of taxon names successfully loaded into the taxonomy. If a line is too long, -1 is returned. // The function processes each line, trims whitespace from the taxid, name, and class name, and sets // the name in the taxonomy if the conditions are met. -func loadNameTable(reader io.Reader, taxonomy *obitax.Taxonomy, onlysn bool) int { +func loadNameTable(reader io.Reader, taxonomy *Taxonomy, onlysn bool) int { // file := csv.NewReader(reader) // file.Comma = '|' // file.Comment = '#' @@ -112,7 +111,7 @@ func loadNameTable(reader io.Reader, taxonomy *obitax.Taxonomy, onlysn bool) int // // The number of alias mappings successfully loaded into the taxonomy. The function processes // each record, trims whitespace from the old and new taxid, and adds the alias to the taxonomy. -func loadMergedTable(reader io.Reader, taxonomy *obitax.Taxonomy) int { +func loadMergedTable(reader io.Reader, taxonomy *Taxonomy) int { file := csv.NewReader(reader) file.Comma = '|' file.Comment = '#' @@ -143,9 +142,9 @@ func loadMergedTable(reader io.Reader, taxonomy *obitax.Taxonomy) int { // Returns: // - A pointer to the obitax.Taxonomy object containing the loaded taxonomy data, or an error // if any of the files cannot be opened or read. -func LoadNCBITaxDump(directory string, onlysn bool) (*obitax.Taxonomy, error) { +func LoadNCBITaxDump(directory string, onlysn bool) (*Taxonomy, error) { - taxonomy := obitax.NewTaxonomy("NCBI Taxonomy", "taxon", obiutils.AsciiDigitSet) + taxonomy := NewTaxonomy("NCBI Taxonomy", "taxon", obiutils.AsciiDigitSet) // // Load the Taxonomy nodes diff --git a/pkg/obitaxformat/ncbitaxdump/readtar.go b/pkg/obitax/ncbitaxdump_readtar.go similarity index 91% rename from pkg/obitaxformat/ncbitaxdump/readtar.go rename to pkg/obitax/ncbitaxdump_readtar.go index 92d112b..a5862eb 100644 --- a/pkg/obitaxformat/ncbitaxdump/readtar.go +++ b/pkg/obitax/ncbitaxdump_readtar.go @@ -1,11 +1,10 @@ -package ncbitaxdump +package obitax import ( "archive/tar" "bufio" "fmt" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils" log "github.com/sirupsen/logrus" @@ -63,9 +62,9 @@ func IsNCBITarTaxDump(path string) bool { return citations && division && gencode && names && delnodes && gc && merged && nodes } -func LoadNCBITarTaxDump(path string, onlysn bool) (*obitax.Taxonomy, error) { +func LoadNCBITarTaxDump(path string, onlysn bool) (*Taxonomy, error) { - taxonomy := obitax.NewTaxonomy("NCBI Taxonomy", "taxon", obiutils.AsciiDigitSet) + taxonomy := NewTaxonomy("NCBI Taxonomy", "taxon", obiutils.AsciiDigitSet) // // Load the Taxonomy nodes diff --git a/pkg/obitaxformat/taxonomy_read.go b/pkg/obitax/taxonomy_read.go similarity index 69% rename from pkg/obitaxformat/taxonomy_read.go rename to pkg/obitax/taxonomy_read.go index 4a2a102..2bd46c7 100644 --- a/pkg/obitaxformat/taxonomy_read.go +++ b/pkg/obitax/taxonomy_read.go @@ -1,26 +1,23 @@ -package obitaxformat +package obitax import ( "fmt" "os" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitaxformat/csvtaxdump" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitaxformat/ncbitaxdump" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils" "github.com/gabriel-vasile/mimetype" log "github.com/sirupsen/logrus" ) -type TaxonomyLoader func(path string, onlysn bool) (*obitax.Taxonomy, error) +type TaxonomyLoader func(path string, onlysn bool) (*Taxonomy, error) func DetectTaxonomyTarFormat(path string) (TaxonomyLoader, error) { switch { - case ncbitaxdump.IsNCBITarTaxDump(path): + case IsNCBITarTaxDump(path): log.Infof("NCBI Taxdump Tar Archive detected: %s", path) - return ncbitaxdump.LoadNCBITarTaxDump, nil + return LoadNCBITarTaxDump, nil } return nil, fmt.Errorf("unknown taxonomy format: %s", path) @@ -44,7 +41,7 @@ func DetectTaxonomyFormat(path string) (TaxonomyLoader, error) { if fileInfo.IsDir() { // For the moment, we only support NCBI Taxdump directory format log.Infof("NCBI Taxdump detected: %s", path) - return ncbitaxdump.LoadNCBITaxDump, nil + return LoadNCBITaxDump, nil } else { file, err := obiutils.Ropen(path) @@ -63,7 +60,7 @@ func DetectTaxonomyFormat(path string) (TaxonomyLoader, error) { switch mimetype.String() { case "text/csv": - return csvtaxdump.LoadCSVTaxonomy, nil + return LoadCSVTaxonomy, nil case "application/x-tar": return DetectTaxonomyTarFormat(path) } @@ -74,7 +71,7 @@ func DetectTaxonomyFormat(path string) (TaxonomyLoader, error) { return nil, nil } -func LoadTaxonomy(path string, onlysn bool) (*obitax.Taxonomy, error) { +func LoadTaxonomy(path string, onlysn bool) (*Taxonomy, error) { loader, err := DetectTaxonomyFormat(path) if err != nil { diff --git a/pkg/obitools/obiannotate/obiannotate.go b/pkg/obitools/obiannotate/obiannotate.go index 5a7fc84..1d65125 100644 --- a/pkg/obitools/obiannotate/obiannotate.go +++ b/pkg/obitools/obiannotate/obiannotate.go @@ -7,8 +7,8 @@ import ( "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiapat" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obicorazick" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obigrep" @@ -345,7 +345,7 @@ func CLIAnnotationPipeline() obiiter.Pipeable { worker := CLIAnnotationWorker() annotator := obiseq.SeqToSliceConditionalWorker(predicate, worker, false) - f := obiiter.SliceWorkerPipe(annotator, false, obioptions.CLIParallelWorkers()) + f := obiiter.SliceWorkerPipe(annotator, false, obidefault.ParallelWorkers()) return f } diff --git a/pkg/obitools/obiclean/obiclean.go b/pkg/obitools/obiclean/obiclean.go index 98530ea..aa9641e 100644 --- a/pkg/obitools/obiclean/obiclean.go +++ b/pkg/obitools/obiclean/obiclean.go @@ -4,8 +4,8 @@ import ( "fmt" "os" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils" @@ -298,7 +298,7 @@ func CLIOBIClean(itertator obiiter.IBioSequence) obiiter.IBioSequence { BuildSeqGraph(samples, DistStepMax(), - obioptions.CLIParallelWorkers()) + obidefault.ParallelWorkers()) if RatioMax() < 1.0 { bar := (*progressbar.ProgressBar)(nil) diff --git a/pkg/obitools/obicleandb/obicleandb.go b/pkg/obitools/obicleandb/obicleandb.go index 6a5b1e5..027361a 100644 --- a/pkg/obitools/obicleandb/obicleandb.go +++ b/pkg/obitools/obicleandb/obicleandb.go @@ -7,8 +7,8 @@ import ( "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obialign" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obichunk" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obistats" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax" @@ -234,9 +234,9 @@ func ICleanDB(itertator obiiter.IBioSequence) obiiter.IBioSequence { obichunk.OptionSortOnMemory(), obichunk.OptionSubCategory("taxid"), obichunk.OptionsParallelWorkers( - obioptions.CLIParallelWorkers()), + obidefault.ParallelWorkers()), obichunk.OptionsBatchSize( - obioptions.CLIBatchSize()), + obidefault.BatchSize()), obichunk.OptionNAValue("NA"), ) @@ -261,22 +261,22 @@ func ICleanDB(itertator obiiter.IBioSequence) obiiter.IBioSequence { goodTaxa := obiseq.IsAValidTaxon(taxonomy, CLIUpdateTaxids()).And(rankPredicate) usable := unique.FilterOn(goodTaxa, - obioptions.CLIBatchSize(), - obioptions.CLIParallelWorkers()) + obidefault.BatchSize(), + obidefault.ParallelWorkers()) annotated := usable.MakeIWorker(obiseq.MakeSetSpeciesWorker(taxonomy), false, - obioptions.CLIParallelWorkers(), + obidefault.ParallelWorkers(), ).MakeIWorker(obiseq.MakeSetGenusWorker(taxonomy), false, - obioptions.CLIParallelWorkers(), + obidefault.ParallelWorkers(), ).MakeIWorker(obiseq.MakeSetFamilyWorker(taxonomy), false, - obioptions.CLIParallelWorkers(), + obidefault.ParallelWorkers(), ) // .MakeIWorker(SequenceTrust, // false, - // obioptions.CLIParallelWorkers(), + // obidefault.ParallelWorkers(), // ) source, references := annotated.Load() @@ -284,7 +284,7 @@ func ICleanDB(itertator obiiter.IBioSequence) obiiter.IBioSequence { mannwithney := MakeSequenceFamilyGenusWorker(references) partof := obiiter.IBatchOver(source, references, - obioptions.CLIBatchSize()) + obidefault.BatchSize()) // genera_iterator, err := obichunk.ISequenceChunk( // annotated, diff --git a/pkg/obitools/obiconsensus/obiconsensus.go b/pkg/obitools/obiconsensus/obiconsensus.go index aab05ba..046c663 100644 --- a/pkg/obitools/obiconsensus/obiconsensus.go +++ b/pkg/obitools/obiconsensus/obiconsensus.go @@ -8,11 +8,11 @@ import ( "sync" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obialign" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiformats" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obigraph" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obikmer" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obisuffix" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiannotate" @@ -472,7 +472,7 @@ func CLIOBIMinion(itertator obiiter.IBioSequence) obiiter.IBioSequence { CLISampleAttribute(), seqs, CLIDistStepMax(), - obioptions.CLIParallelWorkers()) + obidefault.ParallelWorkers()) if bar != nil { bar.Add(1) } diff --git a/pkg/obitools/obiconvert/sequence_reader.go b/pkg/obitools/obiconvert/sequence_reader.go index 5cec695..d38b4c5 100644 --- a/pkg/obitools/obiconvert/sequence_reader.go +++ b/pkg/obitools/obiconvert/sequence_reader.go @@ -12,7 +12,6 @@ import ( "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiformats" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" ) func ExpandListOfFiles(check_ext bool, filenames ...string) ([]string, error) { @@ -102,13 +101,13 @@ func CLIReadBioSequences(filenames ...string) (obiiter.IBioSequence, error) { opts = append(opts, obiformats.OptionsReadQualities(obidefault.ReadQualities())) - nworkers := obioptions.CLIReadParallelWorkers() + nworkers := obidefault.ReadParallelWorkers() if nworkers < 2 { nworkers = 2 } opts = append(opts, obiformats.OptionsParallelWorkers(nworkers)) - opts = append(opts, obiformats.OptionsBatchSize(obioptions.CLIBatchSize())) + opts = append(opts, obiformats.OptionsBatchSize(obidefault.BatchSize())) opts = append(opts, obiformats.OptionsFullFileBatch(FullFileBatch())) @@ -159,7 +158,7 @@ func CLIReadBioSequences(filenames ...string) (obiiter.IBioSequence, error) { nreader := 1 if CLINoInputOrder() { - nreader = obioptions.ParallelFilesRead() + nreader = obidefault.ParallelFilesRead() } iterator = obiformats.ReadSequencesBatchFromFiles( diff --git a/pkg/obitools/obiconvert/sequence_writer.go b/pkg/obitools/obiconvert/sequence_writer.go index 172dbc5..8c12a7f 100644 --- a/pkg/obitools/obiconvert/sequence_writer.go +++ b/pkg/obitools/obiconvert/sequence_writer.go @@ -8,9 +8,9 @@ import ( log "github.com/sirupsen/logrus" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiformats" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" ) func BuildPairedFileNames(filename string) (string, string) { @@ -53,10 +53,10 @@ func CLIWriteBioSequences(iterator obiiter.IBioSequence, opts = append(opts, obiformats.OptionsFastSeqHeaderFormat(obiformats.FormatFastSeqJsonHeader)) } - nworkers := obioptions.CLIWriteParallelWorkers() + nworkers := obidefault.WriteParallelWorkers() opts = append(opts, obiformats.OptionsParallelWorkers(nworkers)) - opts = append(opts, obiformats.OptionsBatchSize(obioptions.CLIBatchSize())) + opts = append(opts, obiformats.OptionsBatchSize(obidefault.BatchSize())) opts = append(opts, obiformats.OptionsCompressed(CLICompressed())) diff --git a/pkg/obitools/obicsv/csvoption.go b/pkg/obitools/obicsv/csvoption.go index 7adf3a5..2440654 100644 --- a/pkg/obitools/obicsv/csvoption.go +++ b/pkg/obitools/obicsv/csvoption.go @@ -1,7 +1,8 @@ package obicsv import ( - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" ) // __options__ holds configuration options for processing. @@ -53,8 +54,8 @@ func MakeOptions(setters []WithOption) Options { with_progress_bar: false, filename: "-", buffer_size: 2, - parallel_workers: obioptions.CLIReadParallelWorkers(), - batch_size: obioptions.CLIBatchSize(), + parallel_workers: obidefault.ReadParallelWorkers(), + batch_size: obidefault.BatchSize(), no_order: false, full_file_batch: false, closefile: false, @@ -69,7 +70,7 @@ func MakeOptions(setters []WithOption) Options { csv_quality: false, csv_separator: ",", csv_navalue: "NA", - csv_keys: make(CSVHeader, 0), + csv_keys: make(obiiter.CSVHeader, 0), csv_auto: false, source: "unknown", } diff --git a/pkg/obitools/obicsv/obicsv.go b/pkg/obitools/obicsv/obicsv.go index 1d2bd31..f7b174e 100644 --- a/pkg/obitools/obicsv/obicsv.go +++ b/pkg/obitools/obicsv/obicsv.go @@ -3,13 +3,13 @@ package obicsv import ( log "github.com/sirupsen/logrus" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert" ) func CLIWriteSequenceCSV(iterator obiiter.IBioSequence, - terminalAction bool, filenames ...string) *ICSVRecord { + terminalAction bool, filenames ...string) *obiiter.ICSVRecord { if obiconvert.CLIProgressBar() { iterator = iterator.Speed("Writing CSV") @@ -17,14 +17,14 @@ func CLIWriteSequenceCSV(iterator obiiter.IBioSequence, opts := make([]WithOption, 0, 10) - nworkers := obioptions.CLIParallelWorkers() / 4 + nworkers := obidefault.ParallelWorkers() / 4 if nworkers < 2 { nworkers = 2 } opts = append(opts, OptionsParallelWorkers(nworkers)) - opts = append(opts, OptionsBatchSize(obioptions.CLIBatchSize())) - opts = append(opts, OptionsCompressed(obiconvert.CLICompressed())) + opts = append(opts, OptionsBatchSize(obidefault.BatchSize())) + opts = append(opts, OptionsCompressed(obidefault.CompressOutput())) opts = append(opts, CSVId(CLIPrintId()), CSVCount(CLIPrintCount()), @@ -42,12 +42,12 @@ func CLIWriteSequenceCSV(iterator obiiter.IBioSequence, } -func CLICSVWriter(iterator *ICSVRecord, +func CLICSVWriter(iterator *obiiter.ICSVRecord, terminalAction bool, - options ...WithOption) *ICSVRecord { + options ...WithOption) *obiiter.ICSVRecord { var err error - var newIter *ICSVRecord + var newIter *obiiter.ICSVRecord if obiconvert.CLIOutPutFileName() != "-" { options = append(options, OptionFileName(obiconvert.CLIOutPutFileName())) diff --git a/pkg/obitools/obicsv/sequence.go b/pkg/obitools/obicsv/sequence.go index fc57a1f..9d0f6e9 100644 --- a/pkg/obitools/obicsv/sequence.go +++ b/pkg/obitools/obicsv/sequence.go @@ -8,9 +8,9 @@ import ( "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" ) -func CSVSequenceHeader(opt Options) CSVHeader { +func CSVSequenceHeader(opt Options) obiiter.CSVHeader { keys := opt.CSVKeys() - record := make(CSVHeader, 0, len(keys)+4) + record := make(obiiter.CSVHeader, 0, len(keys)+4) if opt.CSVId() { record.AppendField("id") @@ -45,12 +45,12 @@ func CSVSequenceHeader(opt Options) CSVHeader { return record } -func CSVBatchFromSequences(batch obiiter.BioSequenceBatch, opt Options) CSVRecordBatch { +func CSVBatchFromSequences(batch obiiter.BioSequenceBatch, opt Options) obiiter.CSVRecordBatch { keys := opt.CSVKeys() - csvslice := make([]CSVRecord, batch.Len()) + csvslice := make([]obiiter.CSVRecord, batch.Len()) for i, sequence := range batch.Slice() { - record := make(CSVRecord) + record := make(obiiter.CSVRecord) if opt.CSVId() { record["id"] = sequence.Id() @@ -108,10 +108,10 @@ func CSVBatchFromSequences(batch obiiter.BioSequenceBatch, opt Options) CSVRecor csvslice[i] = record } - return MakeCSVRecordBatch(batch.Source(), batch.Order(), csvslice) + return obiiter.MakeCSVRecordBatch(batch.Source(), batch.Order(), csvslice) } -func NewCSVSequenceIterator(iter obiiter.IBioSequence, options ...WithOption) *ICSVRecord { +func NewCSVSequenceIterator(iter obiiter.IBioSequence, options ...WithOption) *obiiter.ICSVRecord { opt := MakeOptions(options) @@ -128,7 +128,7 @@ func NewCSVSequenceIterator(iter obiiter.IBioSequence, options ...WithOption) *I } } - newIter := NewICSVRecord() + newIter := obiiter.NewICSVRecord() newIter.SetHeader(CSVSequenceHeader(opt)) nwriters := opt.ParallelWorkers() diff --git a/pkg/obitools/obicsv/writer.go b/pkg/obitools/obicsv/writer.go index 941f99a..9d435a3 100644 --- a/pkg/obitools/obicsv/writer.go +++ b/pkg/obitools/obicsv/writer.go @@ -7,12 +7,13 @@ import ( "os" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiformats" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils" log "github.com/sirupsen/logrus" ) -func FormatCVSBatch(batch CSVRecordBatch, header CSVHeader, navalue string) *bytes.Buffer { +func FormatCVSBatch(batch obiiter.CSVRecordBatch, header obiiter.CSVHeader, navalue string) *bytes.Buffer { buff := new(bytes.Buffer) csv := csv.NewWriter(buff) @@ -44,14 +45,14 @@ func FormatCVSBatch(batch CSVRecordBatch, header CSVHeader, navalue string) *byt return buff } -func WriteCSV(iterator *ICSVRecord, +func WriteCSV(iterator *obiiter.ICSVRecord, file io.WriteCloser, - options ...WithOption) (*ICSVRecord, error) { + options ...WithOption) (*obiiter.ICSVRecord, error) { opt := MakeOptions(options) file, _ = obiutils.CompressStream(file, opt.CompressedFile(), opt.CloseFile()) - newIter := NewICSVRecord() + newIter := obiiter.NewICSVRecord() nwriters := opt.ParallelWorkers() @@ -65,7 +66,7 @@ func WriteCSV(iterator *ICSVRecord, log.Debugf("Writing CSV file done") }() - ff := func(iterator *ICSVRecord) { + ff := func(iterator *obiiter.ICSVRecord) { for iterator.Next() { batch := iterator.Get() @@ -108,8 +109,8 @@ func WriteCSV(iterator *ICSVRecord, // os.Stdout as the output file, and the options slice. // // The function returns the same bio sequence iterator and an error if any occurred. -func WriteCSVToStdout(iterator *ICSVRecord, - options ...WithOption) (*ICSVRecord, error) { +func WriteCSVToStdout(iterator *obiiter.ICSVRecord, + options ...WithOption) (*obiiter.ICSVRecord, error) { // options = append(options, OptionDontCloseFile()) options = append(options, OptionCloseFile()) return WriteCSV(iterator, os.Stdout, options...) @@ -126,9 +127,9 @@ func WriteCSVToStdout(iterator *ICSVRecord, // Returns: // - obiiter.IBioSequence: The updated biosequence iterator. // - error: Any error that occurred during the writing process. -func WriteCSVToFile(iterator *ICSVRecord, +func WriteCSVToFile(iterator *obiiter.ICSVRecord, filename string, - options ...WithOption) (*ICSVRecord, error) { + options ...WithOption) (*obiiter.ICSVRecord, error) { opt := MakeOptions(options) flags := os.O_WRONLY | os.O_CREATE diff --git a/pkg/obitools/obidemerge/demerge.go b/pkg/obitools/obidemerge/demerge.go index b3ca77b..a0bfc17 100644 --- a/pkg/obitools/obidemerge/demerge.go +++ b/pkg/obitools/obidemerge/demerge.go @@ -1,8 +1,8 @@ package obidemerge import ( + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" ) @@ -34,5 +34,5 @@ func MakeDemergeWorker(key string) obiseq.SeqWorker { func CLIDemergeSequences(iterator obiiter.IBioSequence) obiiter.IBioSequence { worker := MakeDemergeWorker(CLIDemergeSlot()) - return iterator.MakeIWorker(worker, false, obioptions.CLIParallelWorkers(), 0) + return iterator.MakeIWorker(worker, false, obidefault.ParallelWorkers(), 0) } diff --git a/pkg/obitools/obidistribute/distribute.go b/pkg/obitools/obidistribute/distribute.go index 42b9405..4757a28 100644 --- a/pkg/obitools/obidistribute/distribute.go +++ b/pkg/obitools/obidistribute/distribute.go @@ -3,9 +3,9 @@ package obidistribute import ( log "github.com/sirupsen/logrus" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiformats" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert" ) @@ -25,13 +25,13 @@ func CLIDistributeSequence(sequences obiiter.IBioSequence) { opts = append(opts, obiformats.OptionsFastSeqHeaderFormat(obiformats.FormatFastSeqJsonHeader)) } - nworkers := obioptions.CLIParallelWorkers() / 4 + nworkers := obidefault.ParallelWorkers() / 4 if nworkers < 2 { nworkers = 2 } opts = append(opts, obiformats.OptionsParallelWorkers(nworkers), - obiformats.OptionsBatchSize(obioptions.CLIBatchSize()), + obiformats.OptionsBatchSize(obidefault.BatchSize()), obiformats.OptionsAppendFile(CLIAppendSequences()), obiformats.OptionsCompressed(obiconvert.CLICompressed())) @@ -47,7 +47,7 @@ func CLIDistributeSequence(sequences obiiter.IBioSequence) { } dispatcher := sequences.Distribute(CLISequenceClassifier(), - obioptions.CLIBatchSize()) + obidefault.BatchSize()) obiformats.WriterDispatcher(CLIFileNamePattern(), dispatcher, formater, opts..., diff --git a/pkg/obitools/obifind/iterator.go b/pkg/obitools/obifind/iterator.go index a97a570..660c0e1 100644 --- a/pkg/obitools/obifind/iterator.go +++ b/pkg/obitools/obifind/iterator.go @@ -3,9 +3,9 @@ package obifind import ( "slices" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obicsv" ) type __options__ struct { @@ -38,7 +38,7 @@ type WithOption func(Options) // - An Options instance with the specified settings. func MakeOptions(setters []WithOption) Options { o := __options__{ - batch_size: obioptions.CLIBatchSize(), // Number of items to process in a batch + batch_size: obidefault.BatchSize(), // Number of items to process in a batch with_pattern: true, with_parent: false, with_path: false, @@ -192,12 +192,12 @@ func OptionsWithMetadata(values ...string) WithOption { return f } -func NewCSVTaxaIterator(iterator *obitax.ITaxon, options ...WithOption) *obicsv.ICSVRecord { +func NewCSVTaxaIterator(iterator *obitax.ITaxon, options ...WithOption) *obiiter.ICSVRecord { opt := MakeOptions(options) metakeys := make([]string, 0) - newIter := obicsv.NewICSVRecord() + newIter := obiiter.NewICSVRecord() newIter.Add(1) @@ -240,11 +240,11 @@ func NewCSVTaxaIterator(iterator *obitax.ITaxon, options ...WithOption) *obicsv. go func() { o := 0 - data := make([]obicsv.CSVRecord, 0, batch_size) + data := make([]obiiter.CSVRecord, 0, batch_size) for iterator.Next() { taxon := iterator.Get() - record := make(obicsv.CSVRecord) + record := make(obiiter.CSVRecord) if opt.WithPattern() { record["query"] = taxon.MetadataAsString("query") @@ -282,15 +282,15 @@ func NewCSVTaxaIterator(iterator *obitax.ITaxon, options ...WithOption) *obicsv. data = append(data, record) if len(data) >= batch_size { - newIter.Push(obicsv.MakeCSVRecordBatch(opt.Source(), o, data)) - data = make([]obicsv.CSVRecord, 0, batch_size) + newIter.Push(obiiter.MakeCSVRecordBatch(opt.Source(), o, data)) + data = make([]obiiter.CSVRecord, 0, batch_size) o++ } } if len(data) > 0 { - newIter.Push(obicsv.MakeCSVRecordBatch(opt.Source(), o, data)) + newIter.Push(obiiter.MakeCSVRecordBatch(opt.Source(), o, data)) } newIter.Done() diff --git a/pkg/obitools/obifind/obifind.go b/pkg/obitools/obifind/obifind.go index f1df393..68877c8 100644 --- a/pkg/obitools/obifind/obifind.go +++ b/pkg/obitools/obifind/obifind.go @@ -1,7 +1,8 @@ package obifind import ( - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obicsv" @@ -39,7 +40,7 @@ func CLIFilterRankRestriction(iterator *obitax.ITaxon) *obitax.ITaxon { return iterator } -func CLICSVTaxaIterator(iterator *obitax.ITaxon) *obicsv.ICSVRecord { +func CLICSVTaxaIterator(iterator *obitax.ITaxon) *obiiter.ICSVRecord { if iterator == nil { return nil } @@ -53,12 +54,12 @@ func CLICSVTaxaIterator(iterator *obitax.ITaxon) *obicsv.ICSVRecord { OptionsWithScientificName(CLIWithScientificName()), OptionsWithPath(CLIWithPath()), OptionsRawTaxid(CLIRawTaxid()), - OptionsSource(obioptions.CLISelectedTaxonomy()), + OptionsSource(obidefault.SelectedTaxonomy()), ) return NewCSVTaxaIterator(iterator, options...) } -func CLICSVTaxaWriter(iterator *obitax.ITaxon, terminalAction bool) *obicsv.ICSVRecord { +func CLICSVTaxaWriter(iterator *obitax.ITaxon, terminalAction bool) *obiiter.ICSVRecord { return obicsv.CLICSVWriter(CLICSVTaxaIterator(iterator), terminalAction) } diff --git a/pkg/obitools/obigrep/grep.go b/pkg/obitools/obigrep/grep.go index 453d40e..bdac04f 100644 --- a/pkg/obitools/obigrep/grep.go +++ b/pkg/obitools/obigrep/grep.go @@ -3,8 +3,8 @@ package obigrep import ( log "github.com/sirupsen/logrus" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert" ) @@ -23,7 +23,7 @@ func CLIFilterSequence(iterator obiiter.IBioSequence) obiiter.IBioSequence { log.Printf("Discarded sequences saved in file: %s\n", CLIDiscardedFileName()) newIter, discarded = iterator.DivideOn(predicate, - obioptions.CLIBatchSize()) + obidefault.BatchSize()) go func() { _, err := obiconvert.CLIWriteBioSequences(discarded, @@ -37,8 +37,8 @@ func CLIFilterSequence(iterator obiiter.IBioSequence) obiiter.IBioSequence { } else { newIter = iterator.FilterOn(predicate, - obioptions.CLIBatchSize(), - obioptions.CLIParallelWorkers(), + obidefault.BatchSize(), + obidefault.ParallelWorkers(), ) } } else { diff --git a/pkg/obitools/obijoin/join.go b/pkg/obitools/obijoin/join.go index a34bdff..492e281 100644 --- a/pkg/obitools/obijoin/join.go +++ b/pkg/obitools/obijoin/join.go @@ -1,9 +1,9 @@ package obijoin import ( + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiformats" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils" @@ -137,7 +137,7 @@ func CLIJoinSequences(iterator obiiter.IBioSequence) obiiter.IBioSequence { worker := MakeJoinWorker(keys.Left, index, CLIUpdateId(), CLIUpdateSequence(), CLIUpdateQuality()) - iterator = iterator.MakeIWorker(worker, false, obioptions.CLIParallelWorkers()) + iterator = iterator.MakeIWorker(worker, false, obidefault.ParallelWorkers()) return iterator } diff --git a/pkg/obitools/obikmersim/obikmersim.go b/pkg/obitools/obikmersim/obikmersim.go index e96862d..ec9437b 100644 --- a/pkg/obitools/obikmersim/obikmersim.go +++ b/pkg/obitools/obikmersim/obikmersim.go @@ -4,10 +4,10 @@ import ( "math" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obialign" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obifp" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obikmer" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" ) @@ -147,7 +147,7 @@ func CLILookForSharedKmers(iterator obiiter.IBioSequence) obiiter.IBioSequence { source, references := CLIReference() if iterator == obiiter.NilIBioSequence { - iterator = obiiter.IBatchOver(source, references, obioptions.CLIBatchSize()) + iterator = obiiter.IBatchOver(source, references, obidefault.BatchSize()) } if CLISelf() { @@ -163,7 +163,7 @@ func CLILookForSharedKmers(iterator obiiter.IBioSequence) obiiter.IBioSequence { CLIMaxKmerOccurs()) worker := MakeCountMatchWorker(kmerMatch, CLIMinSharedKmers()) - newIter = iterator.MakeIWorker(worker, false, obioptions.CLIParallelWorkers()) + newIter = iterator.MakeIWorker(worker, false, obidefault.ParallelWorkers()) return newIter.FilterEmpty() } @@ -174,7 +174,7 @@ func CLIAlignSequences(iterator obiiter.IBioSequence) obiiter.IBioSequence { source, references := CLIReference() if iterator == obiiter.NilIBioSequence { - iterator = obiiter.IBatchOver(source, references, obioptions.CLIBatchSize()) + iterator = obiiter.IBatchOver(source, references, obidefault.BatchSize()) } if CLISelf() { @@ -188,7 +188,7 @@ func CLIAlignSequences(iterator obiiter.IBioSequence) obiiter.IBioSequence { CLISparseMode(), CLIMaxKmerOccurs()) worker := MakeKmerAlignWorker(kmerMatch, CLIMinSharedKmers(), CLIGap(), CLIScale(), CLIDelta(), CLIFastRelativeScore(), 0.8, true) - newIter = iterator.MakeIWorker(worker, false, obioptions.CLIParallelWorkers()) + newIter = iterator.MakeIWorker(worker, false, obidefault.ParallelWorkers()) return newIter.FilterEmpty() } diff --git a/pkg/obitools/obikmersim/options.go b/pkg/obitools/obikmersim/options.go index eaef9da..cd40e1e 100644 --- a/pkg/obitools/obikmersim/options.go +++ b/pkg/obitools/obikmersim/options.go @@ -1,8 +1,8 @@ package obikmersim import ( + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiformats" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert" "github.com/DavidGamba/go-getoptions" @@ -105,7 +105,7 @@ func CLIReference() (string, obiseq.BioSequenceSlice) { nreader := 1 if obiconvert.CLINoInputOrder() { - nreader = obioptions.StrictReadWorker() + nreader = obidefault.StrictReadWorker() } source, references := obiformats.ReadSequencesBatchFromFiles( diff --git a/pkg/obitools/obilandmark/obilandmark.go b/pkg/obitools/obilandmark/obilandmark.go index 7f68f86..c49b400 100644 --- a/pkg/obitools/obilandmark/obilandmark.go +++ b/pkg/obitools/obilandmark/obilandmark.go @@ -6,8 +6,8 @@ import ( "sync" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obialign" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obistats" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax" @@ -30,7 +30,7 @@ import ( // Returns: // - seqworld: A matrix of float64 values representing the mapped coordinates. func MapOnLandmarkSequences(library obiseq.BioSequenceSlice, landmark_idx []int, sizes ...int) obiutils.Matrix[float64] { - nworkers := obioptions.CLIParallelWorkers() + nworkers := obidefault.ParallelWorkers() if len(sizes) > 0 { nworkers = sizes[0] @@ -154,7 +154,7 @@ func CLISelectLandmarkSequences(iterator obiiter.IBioSequence) obiiter.IBioSeque } } - if obioptions.CLIHasSelectedTaxonomy() { + if obidefault.HasSelectedTaxonomy() { taxo := obitax.DefaultTaxonomy() if taxo == nil { log.Fatal("No taxonomy available") @@ -191,6 +191,6 @@ func CLISelectLandmarkSequences(iterator obiiter.IBioSequence) obiiter.IBioSeque } } - return obiiter.IBatchOver(source, library, obioptions.CLIBatchSize()) + return obiiter.IBatchOver(source, library, obidefault.BatchSize()) } diff --git a/pkg/obitools/obimatrix/obimatrix.go b/pkg/obitools/obimatrix/obimatrix.go index 5456b6b..a7b0b9e 100644 --- a/pkg/obitools/obimatrix/obimatrix.go +++ b/pkg/obitools/obimatrix/obimatrix.go @@ -8,8 +8,8 @@ import ( log "github.com/sirupsen/logrus" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils" "golang.org/x/exp/maps" @@ -93,7 +93,7 @@ func (data *MatrixData) Update(s *obiseq.BioSequence, mapkey string) *MatrixData func IMatrix(iterator obiiter.IBioSequence) *MatrixData { - nproc := obioptions.CLIParallelWorkers() + nproc := obidefault.ParallelWorkers() waiter := sync.WaitGroup{} mapAttribute := CLIMapAttribute() diff --git a/pkg/obitools/obimicrosat/microsat.go b/pkg/obitools/obimicrosat/microsat.go index f9fd95d..296419b 100644 --- a/pkg/obitools/obimicrosat/microsat.go +++ b/pkg/obitools/obimicrosat/microsat.go @@ -4,8 +4,8 @@ import ( "fmt" "sort" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" "github.com/dlclark/regexp2" ) @@ -168,7 +168,7 @@ func CLIAnnotateMicrosat(iterator obiiter.IBioSequence) obiiter.IBioSequence { CLIMinFlankLength(), CLIReoriented()) - newIter = iterator.MakeIWorker(worker, false, obioptions.CLIParallelWorkers()) + newIter = iterator.MakeIWorker(worker, false, obidefault.ParallelWorkers()) return newIter.FilterEmpty() diff --git a/pkg/obitools/obimultiplex/demultiplex.go b/pkg/obitools/obimultiplex/demultiplex.go index 2858e83..064e4d5 100644 --- a/pkg/obitools/obimultiplex/demultiplex.go +++ b/pkg/obitools/obimultiplex/demultiplex.go @@ -3,9 +3,9 @@ package obimultiplex import ( log "github.com/sirupsen/logrus" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obingslibrary" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert" ) @@ -19,8 +19,8 @@ func IExtractBarcode(iterator obiiter.IBioSequence) (obiiter.IBioSequence, error obingslibrary.OptionAllowedIndel(CLIAllowsIndel()), obingslibrary.OptionUnidentified(CLIUnidentifiedFileName()), obingslibrary.OptionDiscardErrors(!CLIConservedErrors()), - obingslibrary.OptionParallelWorkers(obioptions.CLIParallelWorkers()), - obingslibrary.OptionBatchSize(obioptions.CLIBatchSize()), + obingslibrary.OptionParallelWorkers(obidefault.ParallelWorkers()), + obingslibrary.OptionBatchSize(obidefault.BatchSize()), ) ngsfilter, err := CLINGSFIlter() @@ -35,14 +35,14 @@ func IExtractBarcode(iterator obiiter.IBioSequence) (obiiter.IBioSequence, error if !CLIConservedErrors() { log.Infoln("Discards unassigned sequences") - out = out.FilterOn(obiseq.HasAttribute("obimultiplex_error").Not(), obioptions.CLIBatchSize()) + out = out.FilterOn(obiseq.HasAttribute("obimultiplex_error").Not(), obidefault.BatchSize()) } var unidentified obiiter.IBioSequence if CLIUnidentifiedFileName() != "" { log.Printf("Unassigned sequences saved in file: %s\n", CLIUnidentifiedFileName()) unidentified, out = newIter.DivideOn(obiseq.HasAttribute("obimultiplex_error"), - obioptions.CLIBatchSize()) + obidefault.BatchSize()) go func() { _, err := obiconvert.CLIWriteBioSequences(unidentified, @@ -55,7 +55,7 @@ func IExtractBarcode(iterator obiiter.IBioSequence) (obiiter.IBioSequence, error }() } - log.Printf("Sequence demultiplexing using %d workers\n", obioptions.CLIParallelWorkers()) + log.Printf("Sequence demultiplexing using %d workers\n", obidefault.ParallelWorkers()) return out, nil } diff --git a/pkg/obitools/obipairing/pairing.go b/pkg/obitools/obipairing/pairing.go index a15e3ab..0614f29 100644 --- a/pkg/obitools/obipairing/pairing.go +++ b/pkg/obitools/obipairing/pairing.go @@ -6,8 +6,8 @@ import ( log "github.com/sirupsen/logrus" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obialign" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils" ) @@ -222,7 +222,7 @@ func IAssemblePESequencesBatch(iterator obiiter.IBioSequence, log.Fatalln("Sequence data must be paired") } - nworkers := obioptions.CLIParallelWorkers() + nworkers := obidefault.ParallelWorkers() if len(sizes) > 0 { nworkers = sizes[0] diff --git a/pkg/obitools/obipcr/pcr.go b/pkg/obitools/obipcr/pcr.go index f713b76..90ce0fb 100644 --- a/pkg/obitools/obipcr/pcr.go +++ b/pkg/obitools/obipcr/pcr.go @@ -2,8 +2,8 @@ package obipcr import ( "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiapat" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" log "github.com/sirupsen/logrus" ) @@ -50,7 +50,7 @@ func CLIPCR(iterator obiiter.IBioSequence) (obiiter.IBioSequence, error) { len(CLIReversePrimer()))+min(len(CLIForwardPrimer()), len(CLIReversePrimer()))/2, 100, - obioptions.CLIParallelWorkers(), + obidefault.ParallelWorkers(), ) log.Infof("Fragmenting sequence longer than %dbp into chuncks of %dbp", CLIMaxLength()*1000, @@ -59,5 +59,5 @@ func CLIPCR(iterator obiiter.IBioSequence) (obiiter.IBioSequence, error) { iterator = iterator.Pipe(frags) } - return iterator.LimitMemory(0.5).MakeISliceWorker(worker, false, obioptions.CLIParallelWorkers()), nil + return iterator.LimitMemory(0.5).MakeISliceWorker(worker, false, obidefault.ParallelWorkers()), nil } diff --git a/pkg/obitools/obirefidx/famlilyindexing.go b/pkg/obitools/obirefidx/famlilyindexing.go index 91efdf6..47b7e4b 100644 --- a/pkg/obitools/obirefidx/famlilyindexing.go +++ b/pkg/obitools/obirefidx/famlilyindexing.go @@ -8,9 +8,9 @@ import ( "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obialign" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obichunk" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obikmer" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax" "github.com/schollz/progressbar/v3" @@ -111,7 +111,7 @@ func MakeIndexingSliceWorker(indexslot, idslot string, waiting.Done() } - nworkers := max(min(obioptions.CLIParallelWorkers(), len(sequences)/10), 1) + nworkers := max(min(obidefault.ParallelWorkers(), len(sequences)/10), 1) waiting.Add(nworkers) @@ -134,9 +134,9 @@ func IndexFamilyDB(iterator obiiter.IBioSequence) obiiter.IBioSequence { nref := len(references) log.Infof("Done. Database contains %d sequences", nref) - taxonomy, error := obioptions.CLILoadSelectedTaxonomy() - if error != nil { - log.Panicln(error) + taxonomy := obitax.DefaultTaxonomy() + if taxonomy == nil { + log.Panicln("No taxonomy available use the --taxonomy option") } log.Infoln("Indexing database kmers...") @@ -155,15 +155,15 @@ func IndexFamilyDB(iterator obiiter.IBioSequence) obiiter.IBioSequence { log.Info("done") partof := obiiter.IBatchOver(source, references, - obioptions.CLIBatchSize()).MakeIWorker(obiseq.MakeSetSpeciesWorker(taxonomy), + obidefault.BatchSize()).MakeIWorker(obiseq.MakeSetSpeciesWorker(taxonomy), false, - obioptions.CLIParallelWorkers(), + obidefault.ParallelWorkers(), ).MakeIWorker(obiseq.MakeSetGenusWorker(taxonomy), false, - obioptions.CLIParallelWorkers(), + obidefault.ParallelWorkers(), ).MakeIWorker(obiseq.MakeSetFamilyWorker(taxonomy), false, - obioptions.CLIParallelWorkers(), + obidefault.ParallelWorkers(), ) family_iterator, err := obichunk.ISequenceChunk( @@ -178,11 +178,11 @@ func IndexFamilyDB(iterator obiiter.IBioSequence) obiiter.IBioSequence { family_iterator.MakeISliceWorker( MakeStartClusterSliceWorker("reffamidx", 0.9), false, - obioptions.CLIParallelWorkers(), + obidefault.ParallelWorkers(), ).MakeISliceWorker( MakeIndexingSliceWorker("reffamidx_in", "reffamidx_id", &refcounts, taxonomy), false, - obioptions.CLIParallelWorkers(), + obidefault.ParallelWorkers(), ).Speed("Family Indexing", nref).Consume() clusters := obiseq.MakeBioSequenceSlice(0) @@ -240,7 +240,7 @@ func IndexFamilyDB(iterator obiiter.IBioSequence) obiiter.IBioSequence { waiting.Done() } - nworkers := obioptions.CLIParallelWorkers() + nworkers := obidefault.ParallelWorkers() waiting.Add(nworkers) for w := 0; w < nworkers; w++ { @@ -250,7 +250,7 @@ func IndexFamilyDB(iterator obiiter.IBioSequence) obiiter.IBioSequence { waiting.Wait() results := obiiter.IBatchOver(source, references, - obioptions.CLIBatchSize()).Speed("Writing db", nref) + obidefault.BatchSize()).Speed("Writing db", nref) return results } diff --git a/pkg/obitools/obirefidx/geomindexing.go b/pkg/obitools/obirefidx/geomindexing.go index 22eb4e6..3feca44 100644 --- a/pkg/obitools/obirefidx/geomindexing.go +++ b/pkg/obitools/obirefidx/geomindexing.go @@ -7,7 +7,7 @@ import ( log "github.com/sirupsen/logrus" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obistats" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax" @@ -32,7 +32,7 @@ func GeomIndexSesquence(seqidx int, iseq_channel := make(chan int) - for k := 0; k < obioptions.CLIParallelWorkers(); k++ { + for k := 0; k < obidefault.ParallelWorkers(); k++ { wg.Add(1) go func() { defer wg.Done() diff --git a/pkg/obitools/obirefidx/obirefidx.go b/pkg/obitools/obirefidx/obirefidx.go index a7e0763..1ecca14 100644 --- a/pkg/obitools/obirefidx/obirefidx.go +++ b/pkg/obitools/obirefidx/obirefidx.go @@ -6,9 +6,9 @@ import ( log "github.com/sirupsen/logrus" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obialign" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obikmer" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils" @@ -274,7 +274,7 @@ func IndexReferenceDB(iterator obiiter.IBioSequence) obiiter.IBioSequence { indexed.Done() } - nworkers := obioptions.CLIParallelWorkers() + nworkers := obidefault.ParallelWorkers() indexed.Add(nworkers) go func() { @@ -285,5 +285,5 @@ func IndexReferenceDB(iterator obiiter.IBioSequence) obiiter.IBioSequence { go f() } - return indexed.Rebatch(obioptions.CLIBatchSize()) + return indexed.Rebatch(obidefault.BatchSize()) } diff --git a/pkg/obitools/obiscript/obiscript.go b/pkg/obitools/obiscript/obiscript.go index f2e61ce..d4b0920 100644 --- a/pkg/obitools/obiscript/obiscript.go +++ b/pkg/obitools/obiscript/obiscript.go @@ -1,14 +1,14 @@ package obiscript import ( + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obilua" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" ) func CLIScriptPipeline() obiiter.Pipeable { - pipe := obilua.LuaScriptPipe(CLIScriptFilename(), true, obioptions.CLIParallelWorkers()) + pipe := obilua.LuaScriptPipe(CLIScriptFilename(), true, obidefault.ParallelWorkers()) return pipe } diff --git a/pkg/obitools/obisplit/obisplit.go b/pkg/obitools/obisplit/obisplit.go index e9397ec..f8da5fe 100644 --- a/pkg/obitools/obisplit/obisplit.go +++ b/pkg/obitools/obisplit/obisplit.go @@ -7,8 +7,8 @@ import ( log "github.com/sirupsen/logrus" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiapat" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" ) @@ -295,7 +295,7 @@ func CLISlitPipeline() obiiter.Pipeable { worker := SplitPatternWorker(CLIConfig()) annotator := obiseq.SeqToSliceWorker(worker, false) - f := obiiter.SliceWorkerPipe(annotator, false, obioptions.CLIParallelWorkers()) + f := obiiter.SliceWorkerPipe(annotator, false, obidefault.ParallelWorkers()) return f } diff --git a/pkg/obitools/obisummary/obisummary.go b/pkg/obitools/obisummary/obisummary.go index 4dd1d5f..6e47afa 100644 --- a/pkg/obitools/obisummary/obisummary.go +++ b/pkg/obitools/obisummary/obisummary.go @@ -3,8 +3,8 @@ package obisummary import ( "sync" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils" ) @@ -154,7 +154,7 @@ func (data *DataSummary) Update(s *obiseq.BioSequence) *DataSummary { func ISummary(iterator obiiter.IBioSequence, summarise []string) map[string]interface{} { - nproc := obioptions.CLIParallelWorkers() + nproc := obidefault.ParallelWorkers() waiter := sync.WaitGroup{} summaries := make([]*DataSummary, nproc) diff --git a/pkg/obitools/obitag/obigeomtag.go b/pkg/obitools/obitag/obigeomtag.go index 21f21aa..c03b803 100644 --- a/pkg/obitools/obitag/obigeomtag.go +++ b/pkg/obitools/obitag/obigeomtag.go @@ -6,8 +6,8 @@ import ( log "github.com/sirupsen/logrus" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obialign" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obistats" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax" @@ -200,5 +200,5 @@ func CLIGeomAssignTaxonomy(iterator obiiter.IBioSequence, ) obiiter.IBioSequence { worker := GeomIdentifySeqWorker(&references, taxo) - return iterator.MakeIWorker(worker, false, obioptions.CLIParallelWorkers(), 0) + return iterator.MakeIWorker(worker, false, obidefault.ParallelWorkers(), 0) } diff --git a/pkg/obitools/obitag/obitag.go b/pkg/obitools/obitag/obitag.go index 3b704f1..e5b340f 100644 --- a/pkg/obitools/obitag/obitag.go +++ b/pkg/obitools/obitag/obitag.go @@ -8,9 +8,9 @@ import ( "golang.org/x/exp/slices" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obialign" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obikmer" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obirefidx" @@ -266,5 +266,5 @@ func CLIAssignTaxonomy(iterator obiiter.IBioSequence, worker := IdentifySeqWorker(references, refcounts, taxa, taxo, CLIRunExact()) - return iterator.MakeIWorker(worker, false, obioptions.CLIParallelWorkers(), 0) + return iterator.MakeIWorker(worker, false, obidefault.ParallelWorkers(), 0) } diff --git a/pkg/obitools/obitag/options.go b/pkg/obitools/obitag/options.go index 53c5011..46575dc 100644 --- a/pkg/obitools/obitag/options.go +++ b/pkg/obitools/obitag/options.go @@ -3,6 +3,7 @@ package obitag import ( log "github.com/sirupsen/logrus" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiformats" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" @@ -87,15 +88,15 @@ func CLISaveRefetenceDB(db obiseq.BioSequenceSlice) { opts = append(opts, obiformats.OptionsFastSeqHeaderFormat(obiformats.FormatFastSeqJsonHeader)) } - nworkers := obioptions.CLIParallelWorkers() / 4 + nworkers := obidefault.ParallelWorkers() / 4 if nworkers < 2 { nworkers = 2 } opts = append(opts, obiformats.OptionsParallelWorkers(nworkers)) - opts = append(opts, obiformats.OptionsBatchSize(obioptions.CLIBatchSize())) + opts = append(opts, obiformats.OptionsBatchSize(obidefault.BatchSize())) - opts = append(opts, obiformats.OptionsCompressed(obiconvert.CLICompressed())) + opts = append(opts, obiformats.OptionsCompressed(obidefault.CompressOutput())) var err error diff --git a/pkg/obitools/obitagpcr/pcrtag.go b/pkg/obitools/obitagpcr/pcrtag.go index 6c2cbbf..6215c2a 100644 --- a/pkg/obitools/obitagpcr/pcrtag.go +++ b/pkg/obitools/obitagpcr/pcrtag.go @@ -4,8 +4,8 @@ import ( log "github.com/sirupsen/logrus" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obialign" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obiconvert" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitools/obimultiplex" @@ -44,7 +44,7 @@ func IPCRTagPESequencesBatch(iterator obiiter.IBioSequence, log.Fatalln("Sequence data must be paired") } - nworkers := obioptions.CLIParallelWorkers() + nworkers := obidefault.ParallelWorkers() ngsfilter, err := obimultiplex.CLINGSFIlter() if err != nil { @@ -180,14 +180,14 @@ func IPCRTagPESequencesBatch(iterator obiiter.IBioSequence, if !obimultiplex.CLIConservedErrors() { log.Println("Discards unassigned sequences") - iout = iout.FilterOn(obiseq.HasAttribute("obimultiplex_error").Not(), obioptions.CLIBatchSize()) + iout = iout.FilterOn(obiseq.HasAttribute("obimultiplex_error").Not(), obidefault.BatchSize()) } var unidentified obiiter.IBioSequence if obimultiplex.CLIUnidentifiedFileName() != "" { log.Printf("Unassigned sequences saved in file: %s\n", obimultiplex.CLIUnidentifiedFileName()) unidentified, iout = iout.DivideOn(obiseq.HasAttribute("obimultiplex_error"), - obioptions.CLIBatchSize()) + obidefault.BatchSize()) go func() { _, err := obiconvert.CLIWriteBioSequences(unidentified, diff --git a/pkg/obitools/obiuniq/unique.go b/pkg/obitools/obiuniq/unique.go index c651eff..701fc7e 100644 --- a/pkg/obitools/obiuniq/unique.go +++ b/pkg/obitools/obiuniq/unique.go @@ -4,8 +4,8 @@ import ( log "github.com/sirupsen/logrus" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obichunk" + "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obidefault" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiiter" - "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions" ) func CLIUnique(sequences obiiter.IBioSequence) obiiter.IBioSequence { @@ -50,9 +50,9 @@ func CLIUnique(sequences obiiter.IBioSequence) obiiter.IBioSequence { options = append(options, obichunk.OptionsParallelWorkers( - obioptions.CLIParallelWorkers()), + obidefault.ParallelWorkers()), obichunk.OptionsBatchSize( - obioptions.CLIBatchSize()), + obidefault.BatchSize()), obichunk.OptionNAValue(CLINAValue()), )