Files
obitools4/pkg/obioptions/options.go
2024-11-16 10:01:07 +01:00

485 lines
14 KiB
Go

package obioptions
import (
"errors"
"fmt"
"os"
"runtime"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiformats/ncbitaxdump"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obitax"
log "github.com/sirupsen/logrus"
"github.com/DavidGamba/go-getoptions"
"net/http"
_ "net/http/pprof"
)
var _Debug = false
var _WorkerPerCore = 1.0
var _ReadWorkerPerCore = 0.25
var _WriteWorkerPerCore = 0.25
var _StrictReadWorker = 0
var _StrictWriteWorker = 0
var _ParallelFilesRead = 0
var _MaxAllowedCPU = runtime.NumCPU()
var _BatchSize = 2000
var _Pprof = false
var _PprofMudex = 10
var _PprofGoroutine = 6060
var _Quality_Shift_Input = byte(33)
var _Quality_Shift_Output = byte(33)
var __taxdump__ = ""
var __alternative_name__ = false
type ArgumentParser func([]string) (*getoptions.GetOpt, []string)
func GenerateOptionParser(optionset ...func(*getoptions.GetOpt)) ArgumentParser {
options := getoptions.New()
options.SetMode(getoptions.Bundling)
options.SetUnknownMode(getoptions.Fail)
options.Bool("help", false, options.Alias("h", "?"))
options.Bool("version", false,
options.Description("Prints the version and exits."))
options.BoolVar(&_Debug, "debug", false,
options.GetEnv("OBIDEBUG"),
options.Description("Enable debug mode, by setting log level to debug."))
options.BoolVar(&_Pprof, "pprof", false,
options.Description("Enable pprof server. Look at the log for details."))
// options.IntVar(&_ParallelWorkers, "workers", _ParallelWorkers,
// options.Alias("w"),
// options.Description("Number of parallele threads computing the result"))
options.IntVar(&_MaxAllowedCPU, "max-cpu", _MaxAllowedCPU,
options.GetEnv("OBIMAXCPU"),
options.Description("Number of parallele threads computing the result"))
options.BoolVar(&_Pprof, "force-one-cpu", false,
options.Description("Force to use only one cpu core for parallel processing"))
options.IntVar(&_PprofMudex, "pprof-mutex", _PprofMudex,
options.GetEnv("OBIPPROFMUTEX"),
options.Description("Enable profiling of mutex lock."))
options.IntVar(&_PprofGoroutine, "pprof-goroutine", _PprofGoroutine,
options.GetEnv("OBIPPROFGOROUTINE"),
options.Description("Enable profiling of goroutine blocking profile."))
options.IntVar(&_BatchSize, "batch-size", _BatchSize,
options.GetEnv("OBIBATCHSIZE"),
options.Description("Number of sequence per batch for paralelle processing"))
options.Bool("solexa", false,
options.GetEnv("OBISOLEXA"),
options.Description("Decodes quality string according to the Solexa specification."))
for _, o := range optionset {
o(options)
}
return func(args []string) (*getoptions.GetOpt, []string) {
remaining, err := options.Parse(args[1:])
if options.Called("help") {
fmt.Fprint(os.Stderr, options.Help())
os.Exit(1)
}
if options.Called("version") {
fmt.Fprintf(os.Stderr, "OBITools %s\n", VersionString())
os.Exit(0)
}
log.SetLevel(log.InfoLevel)
if options.Called("debug") {
log.SetLevel(log.DebugLevel)
log.Debugln("Switch to debug level logging")
}
if options.Called("pprof") {
url := "localhost:6060"
go http.ListenAndServe(url, nil)
log.Infof("Start a pprof server at address %s/debug/pprof", url)
log.Info("Profil can be followed running concurrently the command :")
log.Info(" go tool pprof -http=127.0.0.1:8080 'http://localhost:6060/debug/pprof/profile?seconds=30'")
}
if options.Called("pprof-mutex") {
url := "localhost:6060"
go http.ListenAndServe(url, nil)
runtime.SetMutexProfileFraction(_PprofMudex)
log.Infof("Start a pprof server at address %s/debug/pprof", url)
log.Info("Profil can be followed running concurrently the command :")
log.Info(" go tool pprof -http=127.0.0.1:8080 'http://localhost:6060/debug/pprof/mutex'")
}
if options.Called("pprof-goroutine") {
url := "localhost:6060"
go http.ListenAndServe(url, nil)
runtime.SetBlockProfileRate(_PprofGoroutine)
log.Infof("Start a pprof server at address %s/debug/pprof", url)
log.Info("Profil can be followed running concurrently the command :")
log.Info(" go tool pprof -http=127.0.0.1:8080 'http://localhost:6060/debug/pprof/block'")
}
if options.Called("taxdump") {
taxonomy, err := ncbitaxdump.LoadNCBITaxDump(CLISelectedNCBITaxDump(),
!CLIAreAlternativeNamesSelected())
if err != nil {
log.Fatalf("Loading taxonomy error: %v", err)
}
taxonomy.SetAsDefault()
}
// Handle user errors
if err != nil {
fmt.Fprintf(os.Stderr, "ERROR: %s\n\n", err)
fmt.Fprint(os.Stderr, options.Help(getoptions.HelpSynopsis))
os.Exit(1)
}
// Setup the maximum number of CPU usable by the program
if _MaxAllowedCPU == 1 {
log.Warn("Limitating the Maximum number of CPU to 1 is not recommanded")
log.Warn("The number of CPU requested has been set to 2")
SetMaxCPU(2)
}
if options.Called("force-one-cpu") {
log.Warn("Limitating the Maximum number of CPU to 1 is not recommanded")
log.Warn("The number of CPU has been forced to 1")
log.Warn("This can lead to unexpected behavior")
SetMaxCPU(1)
}
runtime.GOMAXPROCS(_MaxAllowedCPU)
if options.Called("max-cpu") || options.Called("force-one-cpu") {
log.Printf("CPU number limited to %d", _MaxAllowedCPU)
}
if options.Called("no-singleton") {
log.Printf("No singleton option set")
}
log.Printf("Number of workers set %d", CLIParallelWorkers())
// if options.Called("workers") {
// }
if options.Called("solexa") {
SetInputQualityShift(64)
}
return options, remaining
}
}
func LoadTaxonomyOptionSet(options *getoptions.GetOpt, required, alternatiive bool) {
if required {
options.StringVar(&__taxdump__, "taxdump", "",
options.Alias("t"),
options.Required(),
options.Description("Points to the directory containing the NCBI Taxonomy database dump."))
} else {
options.StringVar(&__taxdump__, "taxdump", "",
options.Alias("t"),
options.Description("Points to the directory containing the NCBI Taxonomy database dump."))
}
if alternatiive {
options.BoolVar(&__alternative_name__, "alternative-names", false,
options.Alias("a"),
options.Description("Enable the search on all alternative names and not only scientific names."))
}
}
// CLIIsDebugMode returns whether the CLI is in debug mode.
//
// The debug mode is activated by the command line option --debug or
// the environment variable OBIDEBUG.
// It can be activated programmatically by the SetDebugOn() function.
//
// No parameters.
// Returns a boolean indicating if the CLI is in debug mode.
func CLIIsDebugMode() bool {
return _Debug
}
// CLIParallelWorkers returns the number of parallel workers used for
// computing the result.
//
// The number of parallel workers is determined by the command line option
// --max-cpu|-m and the environment variable OBIMAXCPU. This number is
// multiplied by the variable _WorkerPerCore.
//
// No parameters.
// Returns an integer representing the number of parallel workers.
func CLIParallelWorkers() int {
return int(float64(CLIMaxCPU()) * float64(WorkerPerCore()))
}
// CLIReadParallelWorkers returns the number of parallel workers used for
// reading files.
//
// The number of parallel workers is determined by the command line option
// --max-cpu|-m and the environment variable OBIMAXCPU. This number is
// multiplied by the variable _ReadWorkerPerCore.
//
// No parameters.
// Returns an integer representing the number of parallel workers.
func CLIReadParallelWorkers() int {
if StrictReadWorker() == 0 {
n := int(float64(CLIMaxCPU()) * ReadWorkerPerCore())
if n == 0 {
n = 1
}
return n
} else {
return StrictReadWorker()
}
}
// CLIWriteParallelWorkers returns the number of parallel workers used for
// writing files.
//
// The number of parallel workers is determined by the command line option
// --max-cpu|-m and the environment variable OBIMAXCPU. This number is
// multiplied by the variable _WriteWorkerPerCore.
//
// No parameters.
// Returns an integer representing the number of parallel workers.
func CLIWriteParallelWorkers() int {
if StrictWriteWorker() == 0 {
n := int(float64(CLIMaxCPU()) * WriteWorkerPerCore())
if n == 0 {
n = 1
}
return n
} else {
return StrictWriteWorker()
}
}
// CLIMaxCPU returns the maximum number of CPU cores allowed.
//
// The maximum number of CPU cores is determined by the command line option
// --max-cpu|-m and the environment variable OBIMAXCPU.
//
// No parameters.
// Returns an integer representing the maximum number of CPU cores allowed.
func CLIMaxCPU() int {
return _MaxAllowedCPU
}
// CLIBatchSize returns the expected size of the sequence batches.
//
// In Obitools, the sequences are processed in parallel by batches.
// The number of sequence in each batch is determined by the command line option
// --batch-size and the environment variable OBIBATCHSIZE.
//
// No parameters.
// Returns an integer value.
func CLIBatchSize() int {
return _BatchSize
}
// SetDebugOn sets the debug mode on.
func SetDebugOn() {
_Debug = true
}
// SetDebugOff sets the debug mode off.
func SetDebugOff() {
_Debug = false
}
// SetWorkerPerCore sets the number of workers per CPU core.
//
// It takes a float64 parameter representing the number of workers
// per CPU core and does not return any value.
func SetWorkerPerCore(n float64) {
_WorkerPerCore = n
}
// SetReadWorkerPerCore sets the number of worker per CPU
// core for reading files.
//
// n float64
func SetReadWorkerPerCore(n float64) {
_ReadWorkerPerCore = n
}
// WorkerPerCore returns the number of workers per CPU core.
//
// No parameters.
// Returns a float64 representing the number of workers per CPU core.
func WorkerPerCore() float64 {
return _WorkerPerCore
}
// ReadWorkerPerCore returns the number of worker per CPU core for
// computing the result.
//
// No parameters.
// Returns a float64 representing the number of worker per CPU core.
func ReadWorkerPerCore() float64 {
return _ReadWorkerPerCore
}
// WriteWorkerPerCore returns the number of worker per CPU core for
// computing the result.
//
// No parameters.
// Returns a float64 representing the number of worker per CPU core.
func WriteWorkerPerCore() float64 {
return _WriteWorkerPerCore
}
// SetBatchSize sets the size of the sequence batches.
//
// n - an integer representing the size of the sequence batches.
func SetBatchSize(n int) {
_BatchSize = n
}
// InputQualityShift returns the quality shift value for input.
//
// It can be set programmatically by the SetInputQualityShift() function.
// This value is used to decode the quality scores in FASTQ files.
// The quality shift value defaults to 33, which is the correct value for
// Sanger formated FASTQ files.
// The quality shift value can be modified to 64 by the command line option
// --solexa, for decoding old Solexa formated FASTQ files.
//
// No parameters.
// Returns an integer representing the quality shift value for input.
func InputQualityShift() byte {
return _Quality_Shift_Input
}
// OutputQualityShift returns the quality shift value used for FASTQ output.
//
// No parameters.
// Returns an integer representing the quality shift value for output.
func OutputQualityShift() byte {
return _Quality_Shift_Output
}
// SetInputQualityShift sets the quality shift value for decoding FASTQ.
//
// n - an integer representing the quality shift value to be set.
func SetInputQualityShift[T int | byte](n T) {
_Quality_Shift_Input = byte(n)
}
// SetOutputQualityShift sets the quality shift value used for FASTQ output.
//
// n - an integer representing the quality shift value to be set.
func SetOutputQualityShift[T int | byte](n T) {
_Quality_Shift_Output = byte(n)
}
// SetMaxCPU sets the maximum number of CPU cores allowed.
//
// n - an integer representing the new maximum number of CPU cores.
func SetMaxCPU(n int) {
_MaxAllowedCPU = n
}
// SetReadWorker sets the number of workers for reading files.
//
// The number of worker dedicated to reading files is determined
// as the number of allowed CPU cores multiplied by number of read workers per core.
// Setting the number of read workers using this function allows to decouple the number
// of read workers from the number of CPU cores.
//
// n - an integer representing the number of workers to be set.
func SetStrictReadWorker(n int) {
_StrictReadWorker = n
}
// ReadWorker returns the number of workers for reading files.
//
// No parameters.
// Returns an integer representing the number of workers.
func StrictReadWorker() int {
return _StrictReadWorker
}
// SetWriteWorker sets the number of workers for writing files.
//
// The number of worker dedicated to writing files is determined
// as the number of allowed CPU cores multiplied by number of write workers per core.
// Setting the number of write workers using this function allows to decouple the number
// of write workers from the number of CPU cores.
//
// n - an integer representing the number of workers to be set.
func SetStrictWriteWorker(n int) {
_StrictWriteWorker = n
}
// WriteWorker returns the number of workers for writing files.
//
// No parameters.
// Returns an integer representing the number of workers.
func StrictWriteWorker() int {
return _StrictWriteWorker
}
// ParallelFilesRead returns the number of files to be read in parallel.
//
// No parameters.
// Returns an integer representing the number of files to be read.
func ParallelFilesRead() int {
if _ParallelFilesRead == 0 {
return CLIReadParallelWorkers()
} else {
return _ParallelFilesRead
}
}
// SetParallelFilesRead sets the number of files to be read in parallel.
//
// n - an integer representing the number of files to be set.
func SetParallelFilesRead(n int) {
_ParallelFilesRead = n
}
func CLISelectedNCBITaxDump() string {
return __taxdump__
}
func CLIHasSelectedTaxonomy() bool {
return __taxdump__ != ""
}
func CLIAreAlternativeNamesSelected() bool {
return __alternative_name__
}
func CLILoadSelectedTaxonomy() (*obitax.Taxonomy, error) {
if obitax.IsDefaultTaxonomyDefined() {
return obitax.DefaultTaxonomy(), nil
}
if CLISelectedNCBITaxDump() != "" {
taxonomy, err := ncbitaxdump.LoadNCBITaxDump(CLISelectedNCBITaxDump(),
!CLIAreAlternativeNamesSelected())
if err != nil {
return nil, err
}
taxonomy.SetAsDefault()
return taxonomy, nil
}
return nil, errors.New("no NCBI taxdump selected using option -t|--taxdump")
}