package obioptions import ( "errors" "fmt" "os" "runtime" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiformats/ncbitaxdump" "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax" log "github.com/sirupsen/logrus" "github.com/DavidGamba/go-getoptions" "net/http" _ "net/http/pprof" ) var _Debug = false var _WorkerPerCore = 1.0 var _ReadWorkerPerCore = 0.25 var _WriteWorkerPerCore = 0.25 var _StrictReadWorker = 0 var _StrictWriteWorker = 0 var _ParallelFilesRead = 0 var _MaxAllowedCPU = runtime.NumCPU() var _BatchSize = 2000 var _Pprof = false var _PprofMudex = 10 var _PprofGoroutine = 6060 var _Quality_Shift_Input = byte(33) var _Quality_Shift_Output = byte(33) var __taxdump__ = "" var __alternative_name__ = false type ArgumentParser func([]string) (*getoptions.GetOpt, []string) func GenerateOptionParser(optionset ...func(*getoptions.GetOpt)) ArgumentParser { options := getoptions.New() options.SetMode(getoptions.Bundling) options.SetUnknownMode(getoptions.Fail) options.Bool("help", false, options.Alias("h", "?")) options.Bool("version", false, options.Description("Prints the version and exits.")) options.BoolVar(&_Debug, "debug", false, options.GetEnv("OBIDEBUG"), options.Description("Enable debug mode, by setting log level to debug.")) options.BoolVar(&_Pprof, "pprof", false, options.Description("Enable pprof server. Look at the log for details.")) // options.IntVar(&_ParallelWorkers, "workers", _ParallelWorkers, // options.Alias("w"), // options.Description("Number of parallele threads computing the result")) options.IntVar(&_MaxAllowedCPU, "max-cpu", _MaxAllowedCPU, options.GetEnv("OBIMAXCPU"), options.Description("Number of parallele threads computing the result")) options.BoolVar(&_Pprof, "force-one-cpu", false, options.Description("Force to use only one cpu core for parallel processing")) options.IntVar(&_PprofMudex, "pprof-mutex", _PprofMudex, options.GetEnv("OBIPPROFMUTEX"), options.Description("Enable profiling of mutex lock.")) options.IntVar(&_PprofGoroutine, "pprof-goroutine", _PprofGoroutine, options.GetEnv("OBIPPROFGOROUTINE"), options.Description("Enable profiling of goroutine blocking profile.")) options.IntVar(&_BatchSize, "batch-size", _BatchSize, options.GetEnv("OBIBATCHSIZE"), options.Description("Number of sequence per batch for paralelle processing")) options.Bool("solexa", false, options.GetEnv("OBISOLEXA"), options.Description("Decodes quality string according to the Solexa specification.")) for _, o := range optionset { o(options) } return func(args []string) (*getoptions.GetOpt, []string) { remaining, err := options.Parse(args[1:]) if options.Called("help") { fmt.Fprint(os.Stderr, options.Help()) os.Exit(1) } if options.Called("version") { fmt.Fprintf(os.Stderr, "OBITools %s\n", VersionString()) os.Exit(0) } log.SetLevel(log.InfoLevel) if options.Called("debug") { log.SetLevel(log.DebugLevel) log.Debugln("Switch to debug level logging") } if options.Called("pprof") { url := "localhost:6060" go http.ListenAndServe(url, nil) log.Infof("Start a pprof server at address %s/debug/pprof", url) log.Info("Profil can be followed running concurrently the command :") log.Info(" go tool pprof -http=127.0.0.1:8080 'http://localhost:6060/debug/pprof/profile?seconds=30'") } if options.Called("pprof-mutex") { url := "localhost:6060" go http.ListenAndServe(url, nil) runtime.SetMutexProfileFraction(_PprofMudex) log.Infof("Start a pprof server at address %s/debug/pprof", url) log.Info("Profil can be followed running concurrently the command :") log.Info(" go tool pprof -http=127.0.0.1:8080 'http://localhost:6060/debug/pprof/mutex'") } if options.Called("pprof-goroutine") { url := "localhost:6060" go http.ListenAndServe(url, nil) runtime.SetBlockProfileRate(_PprofGoroutine) log.Infof("Start a pprof server at address %s/debug/pprof", url) log.Info("Profil can be followed running concurrently the command :") log.Info(" go tool pprof -http=127.0.0.1:8080 'http://localhost:6060/debug/pprof/block'") } if options.Called("taxdump") { taxonomy, err := ncbitaxdump.LoadNCBITaxDump(CLISelectedNCBITaxDump(), !CLIAreAlternativeNamesSelected()) if err != nil { log.Fatalf("Loading taxonomy error: %v", err) } taxonomy.SetAsDefault() } // Handle user errors if err != nil { fmt.Fprintf(os.Stderr, "ERROR: %s\n\n", err) fmt.Fprint(os.Stderr, options.Help(getoptions.HelpSynopsis)) os.Exit(1) } // Setup the maximum number of CPU usable by the program if _MaxAllowedCPU == 1 { log.Warn("Limitating the Maximum number of CPU to 1 is not recommanded") log.Warn("The number of CPU requested has been set to 2") SetMaxCPU(2) } if options.Called("force-one-cpu") { log.Warn("Limitating the Maximum number of CPU to 1 is not recommanded") log.Warn("The number of CPU has been forced to 1") log.Warn("This can lead to unexpected behavior") SetMaxCPU(1) } runtime.GOMAXPROCS(_MaxAllowedCPU) if options.Called("max-cpu") || options.Called("force-one-cpu") { log.Printf("CPU number limited to %d", _MaxAllowedCPU) } if options.Called("no-singleton") { log.Printf("No singleton option set") } log.Printf("Number of workers set %d", CLIParallelWorkers()) // if options.Called("workers") { // } if options.Called("solexa") { SetInputQualityShift(64) } return options, remaining } } func LoadTaxonomyOptionSet(options *getoptions.GetOpt, required, alternatiive bool) { if required { options.StringVar(&__taxdump__, "taxdump", "", options.Alias("t"), options.Required(), options.Description("Points to the directory containing the NCBI Taxonomy database dump.")) } else { options.StringVar(&__taxdump__, "taxdump", "", options.Alias("t"), options.Description("Points to the directory containing the NCBI Taxonomy database dump.")) } if alternatiive { options.BoolVar(&__alternative_name__, "alternative-names", false, options.Alias("a"), options.Description("Enable the search on all alternative names and not only scientific names.")) } } // CLIIsDebugMode returns whether the CLI is in debug mode. // // The debug mode is activated by the command line option --debug or // the environment variable OBIDEBUG. // It can be activated programmatically by the SetDebugOn() function. // // No parameters. // Returns a boolean indicating if the CLI is in debug mode. func CLIIsDebugMode() bool { return _Debug } // CLIParallelWorkers returns the number of parallel workers used for // computing the result. // // The number of parallel workers is determined by the command line option // --max-cpu|-m and the environment variable OBIMAXCPU. This number is // multiplied by the variable _WorkerPerCore. // // No parameters. // Returns an integer representing the number of parallel workers. func CLIParallelWorkers() int { return int(float64(CLIMaxCPU()) * float64(WorkerPerCore())) } // CLIReadParallelWorkers returns the number of parallel workers used for // reading files. // // The number of parallel workers is determined by the command line option // --max-cpu|-m and the environment variable OBIMAXCPU. This number is // multiplied by the variable _ReadWorkerPerCore. // // No parameters. // Returns an integer representing the number of parallel workers. func CLIReadParallelWorkers() int { if StrictReadWorker() == 0 { n := int(float64(CLIMaxCPU()) * ReadWorkerPerCore()) if n == 0 { n = 1 } return n } else { return StrictReadWorker() } } // CLIWriteParallelWorkers returns the number of parallel workers used for // writing files. // // The number of parallel workers is determined by the command line option // --max-cpu|-m and the environment variable OBIMAXCPU. This number is // multiplied by the variable _WriteWorkerPerCore. // // No parameters. // Returns an integer representing the number of parallel workers. func CLIWriteParallelWorkers() int { if StrictWriteWorker() == 0 { n := int(float64(CLIMaxCPU()) * WriteWorkerPerCore()) if n == 0 { n = 1 } return n } else { return StrictWriteWorker() } } // CLIMaxCPU returns the maximum number of CPU cores allowed. // // The maximum number of CPU cores is determined by the command line option // --max-cpu|-m and the environment variable OBIMAXCPU. // // No parameters. // Returns an integer representing the maximum number of CPU cores allowed. func CLIMaxCPU() int { return _MaxAllowedCPU } // CLIBatchSize returns the expected size of the sequence batches. // // In Obitools, the sequences are processed in parallel by batches. // The number of sequence in each batch is determined by the command line option // --batch-size and the environment variable OBIBATCHSIZE. // // No parameters. // Returns an integer value. func CLIBatchSize() int { return _BatchSize } // SetDebugOn sets the debug mode on. func SetDebugOn() { _Debug = true } // SetDebugOff sets the debug mode off. func SetDebugOff() { _Debug = false } // SetWorkerPerCore sets the number of workers per CPU core. // // It takes a float64 parameter representing the number of workers // per CPU core and does not return any value. func SetWorkerPerCore(n float64) { _WorkerPerCore = n } // SetReadWorkerPerCore sets the number of worker per CPU // core for reading files. // // n float64 func SetReadWorkerPerCore(n float64) { _ReadWorkerPerCore = n } // WorkerPerCore returns the number of workers per CPU core. // // No parameters. // Returns a float64 representing the number of workers per CPU core. func WorkerPerCore() float64 { return _WorkerPerCore } // ReadWorkerPerCore returns the number of worker per CPU core for // computing the result. // // No parameters. // Returns a float64 representing the number of worker per CPU core. func ReadWorkerPerCore() float64 { return _ReadWorkerPerCore } // WriteWorkerPerCore returns the number of worker per CPU core for // computing the result. // // No parameters. // Returns a float64 representing the number of worker per CPU core. func WriteWorkerPerCore() float64 { return _WriteWorkerPerCore } // SetBatchSize sets the size of the sequence batches. // // n - an integer representing the size of the sequence batches. func SetBatchSize(n int) { _BatchSize = n } // InputQualityShift returns the quality shift value for input. // // It can be set programmatically by the SetInputQualityShift() function. // This value is used to decode the quality scores in FASTQ files. // The quality shift value defaults to 33, which is the correct value for // Sanger formated FASTQ files. // The quality shift value can be modified to 64 by the command line option // --solexa, for decoding old Solexa formated FASTQ files. // // No parameters. // Returns an integer representing the quality shift value for input. func InputQualityShift() byte { return _Quality_Shift_Input } // OutputQualityShift returns the quality shift value used for FASTQ output. // // No parameters. // Returns an integer representing the quality shift value for output. func OutputQualityShift() byte { return _Quality_Shift_Output } // SetInputQualityShift sets the quality shift value for decoding FASTQ. // // n - an integer representing the quality shift value to be set. func SetInputQualityShift[T int | byte](n T) { _Quality_Shift_Input = byte(n) } // SetOutputQualityShift sets the quality shift value used for FASTQ output. // // n - an integer representing the quality shift value to be set. func SetOutputQualityShift[T int | byte](n T) { _Quality_Shift_Output = byte(n) } // SetMaxCPU sets the maximum number of CPU cores allowed. // // n - an integer representing the new maximum number of CPU cores. func SetMaxCPU(n int) { _MaxAllowedCPU = n } // SetReadWorker sets the number of workers for reading files. // // The number of worker dedicated to reading files is determined // as the number of allowed CPU cores multiplied by number of read workers per core. // Setting the number of read workers using this function allows to decouple the number // of read workers from the number of CPU cores. // // n - an integer representing the number of workers to be set. func SetStrictReadWorker(n int) { _StrictReadWorker = n } // ReadWorker returns the number of workers for reading files. // // No parameters. // Returns an integer representing the number of workers. func StrictReadWorker() int { return _StrictReadWorker } // SetWriteWorker sets the number of workers for writing files. // // The number of worker dedicated to writing files is determined // as the number of allowed CPU cores multiplied by number of write workers per core. // Setting the number of write workers using this function allows to decouple the number // of write workers from the number of CPU cores. // // n - an integer representing the number of workers to be set. func SetStrictWriteWorker(n int) { _StrictWriteWorker = n } // WriteWorker returns the number of workers for writing files. // // No parameters. // Returns an integer representing the number of workers. func StrictWriteWorker() int { return _StrictWriteWorker } // ParallelFilesRead returns the number of files to be read in parallel. // // No parameters. // Returns an integer representing the number of files to be read. func ParallelFilesRead() int { if _ParallelFilesRead == 0 { return CLIReadParallelWorkers() } else { return _ParallelFilesRead } } // SetParallelFilesRead sets the number of files to be read in parallel. // // n - an integer representing the number of files to be set. func SetParallelFilesRead(n int) { _ParallelFilesRead = n } func CLISelectedNCBITaxDump() string { return __taxdump__ } func CLIHasSelectedTaxonomy() bool { return __taxdump__ != "" } func CLIAreAlternativeNamesSelected() bool { return __alternative_name__ } func CLILoadSelectedTaxonomy() (*obitax.Taxonomy, error) { if obitax.IsDefaultTaxonomyDefined() { return obitax.DefaultTaxonomy(), nil } if CLISelectedNCBITaxDump() != "" { taxonomy, err := ncbitaxdump.LoadNCBITaxDump(CLISelectedNCBITaxDump(), !CLIAreAlternativeNamesSelected()) if err != nil { return nil, err } taxonomy.SetAsDefault() return taxonomy, nil } return nil, errors.New("no NCBI taxdump selected using option -t|--taxdump") }