mirror of
https://github.com/metabarcoding/obitools4.git
synced 2026-03-25 13:30:52 +00:00
Add memory-based batching support
Implement memory-aware batch sizing with --batch-mem CLI option, enabling adaptive batching based on estimated sequence memory footprint. Key changes: - Added _BatchMem and related getters/setters in pkg/obidefault - Implemented RebatchBySize() in pkg/obiter for memory-constrained batching - Added BioSequence.MemorySize() for conservative memory estimation - Integrated batch-mem option in pkg/obioptions with human-readable size parsing (e.g., 128K, 64M, 1G) - Added obiutils.ParseMemSize/FormatMemSize for unit conversion - Enhanced pool GC in pkg/obiseq/pool.go to trigger explicit GC for large slice discards - Updated sequence_reader.go to apply memory-based rebatching when enabled
This commit is contained in:
@@ -273,6 +273,28 @@ func (s *BioSequence) Len() int {
|
||||
return len(s.sequence)
|
||||
}
|
||||
|
||||
// MemorySize returns an estimate of the memory footprint of the BioSequence
|
||||
// in bytes. It accounts for the sequence, quality scores, feature data,
|
||||
// annotations, and fixed struct overhead. The estimate is conservative
|
||||
// (cap rather than len for byte slices) so it is suitable for memory-based
|
||||
// batching decisions.
|
||||
func (s *BioSequence) MemorySize() int {
|
||||
if s == nil {
|
||||
return 0
|
||||
}
|
||||
// fixed struct overhead (strings, pointers, mutex pointer)
|
||||
const overhead = 128
|
||||
n := overhead
|
||||
n += cap(s.sequence)
|
||||
n += cap(s.qualities)
|
||||
n += cap(s.feature)
|
||||
n += len(s.id)
|
||||
n += len(s.source)
|
||||
// rough annotation estimate: each key+value pair ~64 bytes on average
|
||||
n += len(s.annotations) * 64
|
||||
return n
|
||||
}
|
||||
|
||||
// HasQualities checks if the BioSequence has sequence qualitiy scores.
|
||||
//
|
||||
// This function does not have any parameters.
|
||||
|
||||
Reference in New Issue
Block a user