mirror of
https://github.com/metabarcoding/obitools4.git
synced 2026-03-25 13:30:52 +00:00
Add memory-based batching support
Implement memory-aware batch sizing with --batch-mem CLI option, enabling adaptive batching based on estimated sequence memory footprint. Key changes: - Added _BatchMem and related getters/setters in pkg/obidefault - Implemented RebatchBySize() in pkg/obiter for memory-constrained batching - Added BioSequence.MemorySize() for conservative memory estimation - Integrated batch-mem option in pkg/obioptions with human-readable size parsing (e.g., 128K, 64M, 1G) - Added obiutils.ParseMemSize/FormatMemSize for unit conversion - Enhanced pool GC in pkg/obiseq/pool.go to trigger explicit GC for large slice discards - Updated sequence_reader.go to apply memory-based rebatching when enabled
This commit is contained in:
@@ -1,13 +1,20 @@
|
||||
package obiseq
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiutils"
|
||||
)
|
||||
|
||||
const _LargeSliceThreshold = 100 * 1024 // 100 kb — below: leave to GC, above: trigger explicit GC
|
||||
const _GCBytesBudget = int64(256 * 1024 * 1024) // trigger GC every 256 MB of large discards
|
||||
|
||||
var _largeSliceDiscardedBytes = atomic.Int64{}
|
||||
|
||||
var _BioSequenceByteSlicePool = sync.Pool{
|
||||
New: func() interface{} {
|
||||
bs := make([]byte, 0, 300)
|
||||
@@ -34,6 +41,13 @@ func RecycleSlice(s *[]byte) {
|
||||
}
|
||||
if cap(*s) <= 1024 {
|
||||
_BioSequenceByteSlicePool.Put(s)
|
||||
} else if cap(*s) >= _LargeSliceThreshold {
|
||||
n := int64(cap(*s))
|
||||
*s = nil
|
||||
prev := _largeSliceDiscardedBytes.Load()
|
||||
if _largeSliceDiscardedBytes.Add(n)/_GCBytesBudget > prev/_GCBytesBudget {
|
||||
runtime.GC()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user