mirror of
https://github.com/metabarcoding/obitools4.git
synced 2026-03-25 05:20:52 +00:00
Implement memory-aware batch sizing with --batch-mem CLI option, enabling adaptive batching based on estimated sequence memory footprint. Key changes: - Added _BatchMem and related getters/setters in pkg/obidefault - Implemented RebatchBySize() in pkg/obiter for memory-constrained batching - Added BioSequence.MemorySize() for conservative memory estimation - Integrated batch-mem option in pkg/obioptions with human-readable size parsing (e.g., 128K, 64M, 1G) - Added obiutils.ParseMemSize/FormatMemSize for unit conversion - Enhanced pool GC in pkg/obiseq/pool.go to trigger explicit GC for large slice discards - Updated sequence_reader.go to apply memory-based rebatching when enabled
57 lines
1.3 KiB
Go
57 lines
1.3 KiB
Go
package obidefault
|
|
|
|
var _BatchSize = 2000
|
|
|
|
// SetBatchSize sets the size of the sequence batches.
|
|
//
|
|
// n - an integer representing the size of the sequence batches.
|
|
func SetBatchSize(n int) {
|
|
_BatchSize = n
|
|
}
|
|
|
|
// CLIBatchSize returns the expected size of the sequence batches.
|
|
//
|
|
// In Obitools, the sequences are processed in parallel by batches.
|
|
// The number of sequence in each batch is determined by the command line option
|
|
// --batch-size and the environment variable OBIBATCHSIZE.
|
|
//
|
|
// No parameters.
|
|
// Returns an integer value.
|
|
func BatchSize() int {
|
|
return _BatchSize
|
|
}
|
|
|
|
func BatchSizePtr() *int {
|
|
return &_BatchSize
|
|
}
|
|
|
|
// _BatchMem holds the maximum cumulative memory (in bytes) per batch when
|
|
// memory-based batching is requested. A value of 0 disables memory-based
|
|
// batching and falls back to count-based batching.
|
|
var _BatchMem = 0
|
|
var _BatchMemStr = ""
|
|
|
|
// SetBatchMem sets the memory budget per batch in bytes.
|
|
func SetBatchMem(n int) {
|
|
_BatchMem = n
|
|
}
|
|
|
|
// BatchMem returns the current memory budget per batch in bytes.
|
|
// A value of 0 means memory-based batching is disabled.
|
|
func BatchMem() int {
|
|
return _BatchMem
|
|
}
|
|
|
|
func BatchMemPtr() *int {
|
|
return &_BatchMem
|
|
}
|
|
|
|
// BatchMemStr returns the raw --batch-mem string value as provided on the CLI.
|
|
func BatchMemStr() string {
|
|
return _BatchMemStr
|
|
}
|
|
|
|
func BatchMemStrPtr() *string {
|
|
return &_BatchMemStr
|
|
}
|