Compare commits

..

1 Commits

Author SHA1 Message Date
Eric Coissac
f519c0ef7f 4.4.27: Static Linux builds, memory-aware batching, and build toolchain upgrades
This release includes significant improvements to resource management, build reliability, and portability.

### Memory-Aware Batching (Backward Compatible)
- Added configurable batch size and memory limits to prevent excessive memory usage during large dataset processing.
- Introduced a new `--batch-mem` command-line option (e.g., `128K`, `64M`, `1G`) to enable adaptive batching based on estimated sequence memory footprint.
- Internal batching logic now flushes batches when either size or memory thresholds are exceeded, ensuring predictable behavior.
- Conservative memory estimation and explicit garbage collection after large batch discards improve resource efficiency.

### Linux Build Enhancements
- Enabled static linking for Linux binaries using musl, producing self-contained executables with no external runtime dependencies.
- Refined cross-compilation toolchain to use architecture-specific CGO header paths, improving reliability across target architectures.
- Switched Linux builds to use Docker-based static compilation for consistency and reproducibility.

### Build System & Toolchain Improvements
- Upgraded Go toolchain to 1.26, with updated dependencies including golang.org/x/net v0.38.0.
- Fixed Makefile quoting for LDFLAGS to handle paths containing spaces.
- Enhanced build error handling to display logs before cleanup on failure.
- Improved install script with correct environment variable setup (GOROOT, GOPATH, GOTOOLCHAIN) and added progress indicators for downloads.

Note: All batching behavior remains non-breaking, with default constraints ensuring safe processing of large datasets.
2026-03-14 11:43:43 +01:00
7 changed files with 34 additions and 28 deletions

View File

@@ -79,7 +79,7 @@ jobs:
-w /src \
-e VERSION="${VERSION}" \
golang:1.26-alpine \
sh -c "apk add --no-cache gcc musl-dev zlib-dev zlib-static make && \
sh -c "apk add --no-cache gcc musl-dev zlib-dev make && \
make LDFLAGS='-linkmode=external -extldflags=-static' obitools"
mkdir -p artifacts
tar -czf artifacts/obitools4_${VERSION}_${{ matrix.output_name }}.tar.gz -C build .

View File

@@ -57,21 +57,34 @@ func (dist *IDistribute) Classifier() *obiseq.BioSequenceClassifier {
}
// Distribute organizes the biosequences from the iterator into batches
// based on the provided classifier. It returns an IDistribute instance
// that manages the distribution of the sequences.
// based on the provided classifier and batch sizes. It returns an
// IDistribute instance that manages the distribution of the sequences.
//
// Batches are flushed when either BatchSizeMax() sequences or BatchMem()
// bytes are accumulated per key, mirroring the RebatchBySize strategy.
func (iterator IBioSequence) Distribute(class *obiseq.BioSequenceClassifier) IDistribute {
maxCount := obidefault.BatchSizeMax()
maxBytes := obidefault.BatchMem()
// Parameters:
// - class: A pointer to a BioSequenceClassifier used to classify
// the biosequences during distribution.
// - sizes: Optional integer values specifying the batch size. If
// no sizes are provided, a default batch size of 5000 is used.
//
// Returns:
// An IDistribute instance that contains the outputs of the
// classified biosequences, a channel for new data notifications,
// and the classifier used for distribution. The method operates
// asynchronously, processing the sequences in separate goroutines.
// It ensures that the outputs are closed and cleaned up once
// processing is complete.
func (iterator IBioSequence) Distribute(class *obiseq.BioSequenceClassifier, sizes ...int) IDistribute {
batchsize := obidefault.BatchSize()
outputs := make(map[int]IBioSequence, 100)
slices := make(map[int]*obiseq.BioSequenceSlice, 100)
bufBytes := make(map[int]int, 100)
orders := make(map[int]int, 100)
news := make(chan int)
if len(sizes) > 0 {
batchsize = sizes[0]
}
jobDone := sync.WaitGroup{}
lock := sync.Mutex{}
@@ -102,7 +115,6 @@ func (iterator IBioSequence) Distribute(class *obiseq.BioSequenceClassifier) IDi
slice = &s
slices[key] = slice
orders[key] = 0
bufBytes[key] = 0
lock.Lock()
outputs[key] = MakeIBioSequence()
@@ -111,20 +123,14 @@ func (iterator IBioSequence) Distribute(class *obiseq.BioSequenceClassifier) IDi
news <- key
}
sz := s.MemorySize()
countFull := maxCount > 0 && len(*slice) >= maxCount
memFull := maxBytes > 0 && bufBytes[key]+sz > maxBytes && len(*slice) > 0
if countFull || memFull {
*slice = append(*slice, s)
if len(*slice) == batchsize {
outputs[key].Push(MakeBioSequenceBatch(source, orders[key], *slice))
orders[key]++
s := obiseq.MakeBioSequenceSlice()
slices[key] = &s
slice = &s
bufBytes[key] = 0
}
*slice = append(*slice, s)
bufBytes[key] += sz
}
}

View File

@@ -31,8 +31,7 @@ func obiseqslice2Lua(interpreter *lua.LState,
}
func newObiSeqSlice(luaState *lua.LState) int {
capacity := luaState.OptInt(1, 0)
seqslice := obiseq.NewBioSequenceSlice(capacity)
seqslice := obiseq.NewBioSequenceSlice()
luaState.Push(obiseqslice2Lua(luaState, seqslice))
return 1
}

View File

@@ -3,7 +3,7 @@ package obioptions
// Version is automatically updated by the Makefile from version.txt
// The patch number (third digit) is incremented on each push to the repository
var _Version = "Release 4.4.29"
var _Version = "Release 4.4.27"
// Version returns the version of the obitools package.
//

View File

@@ -104,11 +104,11 @@ func SeqToSliceWorker(worker SeqWorker,
for _, s := range input {
r, err := worker(s)
if err == nil {
if i+len(r) > cap(output) {
output = slices.Grow(output[:i], len(r))
output = output[:cap(output)]
}
for _, rs := range r {
if i == len(output) {
output = slices.Grow(output, cap(output))
output = output[:cap(output)]
}
output[i] = rs
i++
}

View File

@@ -46,7 +46,8 @@ func CLIDistributeSequence(sequences obiiter.IBioSequence) {
formater = obiformats.WriteSequencesToFile
}
dispatcher := sequences.Distribute(CLISequenceClassifier())
dispatcher := sequences.Distribute(CLISequenceClassifier(),
obidefault.BatchSize())
obiformats.WriterDispatcher(CLIFileNamePattern(),
dispatcher, formater, opts...,

View File

@@ -1 +1 @@
4.4.29
4.4.27