Compare commits

..

1 Commits

Author SHA1 Message Date
Eric Coissac
3cd4944fd8 4.4.25: Static Linux Builds, Memory-Aware Batching, and Build Stability
### Static Linux Builds
- Added `CGO_CFLAGS` environment variable to the release workflow for consistent static linking on Linux, ensuring portable, self-contained executables.
- Updated `go.work.sum` with the new dependency `golang.org/x/net v0.38.0`.
- Removed obsolete logs archive file from the distribution.

### Memory-Aware Batching (Introduced in 4.4.23, now stable)
- Users can now control batching behavior using `--batch-mem` to specify memory limits (e.g., `128K`, `64M`, `1G`), enabling adaptive batching based on sequence data size.
- Batching now respects both byte and record count limits, flushing batches when either threshold is exceeded.
- Conservative memory estimation for sequences (`BioSequence.MemorySize()`) and explicit garbage collection after large batch discards improve resource predictability.
- Default constraints remain non-breaking: minimum 1, maximum 2000 records per batch with a default memory limit of 128 MB.

### Build System Improvements
- Updated Go toolchain to 1.26.1 and bumped key dependencies for security and performance.
- Fixed Makefile quoting for `LDFLAGS` to safely handle paths containing spaces.
- Enhanced build failure handling: error logs are now displayed before cleanup, aiding diagnostics.
- The install script now correctly configures `GOROOT`, `GOPATH`, and `GOTOOLCHAIN`, creates the GOPATH directory, and shows a progress bar during downloads.
2026-03-13 19:24:26 +01:00
8 changed files with 50 additions and 47 deletions

View File

@@ -16,7 +16,7 @@ jobs:
- name: Setup Go - name: Setup Go
uses: actions/setup-go@v5 uses: actions/setup-go@v5
with: with:
go-version: "1.26" go-version: "1.23"
- name: Checkout obitools4 project - name: Checkout obitools4 project
uses: actions/checkout@v4 uses: actions/checkout@v4
- name: Run tests - name: Run tests
@@ -54,7 +54,7 @@ jobs:
- name: Setup Go - name: Setup Go
uses: actions/setup-go@v5 uses: actions/setup-go@v5
with: with:
go-version: "1.26" go-version: "1.23"
- name: Extract version from tag - name: Extract version from tag
id: get_version id: get_version
@@ -62,6 +62,12 @@ jobs:
TAG=${GITHUB_REF#refs/tags/Release_} TAG=${GITHUB_REF#refs/tags/Release_}
echo "version=$TAG" >> $GITHUB_OUTPUT echo "version=$TAG" >> $GITHUB_OUTPUT
- name: Install build tools (Linux)
if: runner.os == 'Linux'
run: |
sudo apt-get update -q
sudo apt-get install -y musl-tools zlib1g-dev
- name: Install build tools (macOS) - name: Install build tools (macOS)
if: runner.os == 'macOS' if: runner.os == 'macOS'
run: | run: |
@@ -69,30 +75,21 @@ jobs:
xcode-select --install 2>/dev/null || true xcode-select --install 2>/dev/null || true
xcode-select -p xcode-select -p
- name: Build binaries (Linux) - name: Build binaries
if: runner.os == 'Linux'
env:
VERSION: ${{ steps.get_version.outputs.version }}
run: |
docker run --rm \
-v "$(pwd):/src" \
-w /src \
-e VERSION="${VERSION}" \
golang:1.26-alpine \
sh -c "apk add --no-cache gcc musl-dev zlib-dev zlib-static make && \
make LDFLAGS='-linkmode=external -extldflags=-static' obitools"
mkdir -p artifacts
tar -czf artifacts/obitools4_${VERSION}_${{ matrix.output_name }}.tar.gz -C build .
- name: Build binaries (macOS)
if: runner.os == 'macOS'
env: env:
GOOS: ${{ matrix.goos }} GOOS: ${{ matrix.goos }}
GOARCH: ${{ matrix.goarch }} GOARCH: ${{ matrix.goarch }}
VERSION: ${{ steps.get_version.outputs.version }} VERSION: ${{ steps.get_version.outputs.version }}
CC: ${{ matrix.goos == 'linux' && 'musl-gcc' || '' }}
CGO_CFLAGS: ${{ matrix.goos == 'linux' && '-I/usr/include' || '' }}
run: | run: |
make obitools if [ "$GOOS" = "linux" ]; then
make LDFLAGS='-linkmode=external -extldflags=-static' obitools
else
make obitools
fi
mkdir -p artifacts mkdir -p artifacts
# Create a single tar.gz with all binaries for this platform
tar -czf artifacts/obitools4_${VERSION}_${{ matrix.output_name }}.tar.gz -C build . tar -czf artifacts/obitools4_${VERSION}_${{ matrix.output_name }}.tar.gz -C build .
- name: Upload artifacts - name: Upload artifacts

Binary file not shown.

View File

@@ -57,21 +57,34 @@ func (dist *IDistribute) Classifier() *obiseq.BioSequenceClassifier {
} }
// Distribute organizes the biosequences from the iterator into batches // Distribute organizes the biosequences from the iterator into batches
// based on the provided classifier. It returns an IDistribute instance // based on the provided classifier and batch sizes. It returns an
// that manages the distribution of the sequences. // IDistribute instance that manages the distribution of the sequences.
// //
// Batches are flushed when either BatchSizeMax() sequences or BatchMem() // Parameters:
// bytes are accumulated per key, mirroring the RebatchBySize strategy. // - class: A pointer to a BioSequenceClassifier used to classify
func (iterator IBioSequence) Distribute(class *obiseq.BioSequenceClassifier) IDistribute { // the biosequences during distribution.
maxCount := obidefault.BatchSizeMax() // - sizes: Optional integer values specifying the batch size. If
maxBytes := obidefault.BatchMem() // no sizes are provided, a default batch size of 5000 is used.
//
// Returns:
// An IDistribute instance that contains the outputs of the
// classified biosequences, a channel for new data notifications,
// and the classifier used for distribution. The method operates
// asynchronously, processing the sequences in separate goroutines.
// It ensures that the outputs are closed and cleaned up once
// processing is complete.
func (iterator IBioSequence) Distribute(class *obiseq.BioSequenceClassifier, sizes ...int) IDistribute {
batchsize := obidefault.BatchSize()
outputs := make(map[int]IBioSequence, 100) outputs := make(map[int]IBioSequence, 100)
slices := make(map[int]*obiseq.BioSequenceSlice, 100) slices := make(map[int]*obiseq.BioSequenceSlice, 100)
bufBytes := make(map[int]int, 100)
orders := make(map[int]int, 100) orders := make(map[int]int, 100)
news := make(chan int) news := make(chan int)
if len(sizes) > 0 {
batchsize = sizes[0]
}
jobDone := sync.WaitGroup{} jobDone := sync.WaitGroup{}
lock := sync.Mutex{} lock := sync.Mutex{}
@@ -102,7 +115,6 @@ func (iterator IBioSequence) Distribute(class *obiseq.BioSequenceClassifier) IDi
slice = &s slice = &s
slices[key] = slice slices[key] = slice
orders[key] = 0 orders[key] = 0
bufBytes[key] = 0
lock.Lock() lock.Lock()
outputs[key] = MakeIBioSequence() outputs[key] = MakeIBioSequence()
@@ -111,20 +123,14 @@ func (iterator IBioSequence) Distribute(class *obiseq.BioSequenceClassifier) IDi
news <- key news <- key
} }
sz := s.MemorySize() *slice = append(*slice, s)
countFull := maxCount > 0 && len(*slice) >= maxCount
memFull := maxBytes > 0 && bufBytes[key]+sz > maxBytes && len(*slice) > 0 if len(*slice) == batchsize {
if countFull || memFull {
outputs[key].Push(MakeBioSequenceBatch(source, orders[key], *slice)) outputs[key].Push(MakeBioSequenceBatch(source, orders[key], *slice))
orders[key]++ orders[key]++
s := obiseq.MakeBioSequenceSlice() s := obiseq.MakeBioSequenceSlice()
slices[key] = &s slices[key] = &s
slice = &s
bufBytes[key] = 0
} }
*slice = append(*slice, s)
bufBytes[key] += sz
} }
} }

View File

@@ -31,8 +31,7 @@ func obiseqslice2Lua(interpreter *lua.LState,
} }
func newObiSeqSlice(luaState *lua.LState) int { func newObiSeqSlice(luaState *lua.LState) int {
capacity := luaState.OptInt(1, 0) seqslice := obiseq.NewBioSequenceSlice()
seqslice := obiseq.NewBioSequenceSlice(capacity)
luaState.Push(obiseqslice2Lua(luaState, seqslice)) luaState.Push(obiseqslice2Lua(luaState, seqslice))
return 1 return 1
} }

View File

@@ -3,7 +3,7 @@ package obioptions
// Version is automatically updated by the Makefile from version.txt // Version is automatically updated by the Makefile from version.txt
// The patch number (third digit) is incremented on each push to the repository // The patch number (third digit) is incremented on each push to the repository
var _Version = "Release 4.4.29" var _Version = "Release 4.4.25"
// Version returns the version of the obitools package. // Version returns the version of the obitools package.
// //

View File

@@ -104,11 +104,11 @@ func SeqToSliceWorker(worker SeqWorker,
for _, s := range input { for _, s := range input {
r, err := worker(s) r, err := worker(s)
if err == nil { if err == nil {
if i+len(r) > cap(output) {
output = slices.Grow(output[:i], len(r))
output = output[:cap(output)]
}
for _, rs := range r { for _, rs := range r {
if i == len(output) {
output = slices.Grow(output, cap(output))
output = output[:cap(output)]
}
output[i] = rs output[i] = rs
i++ i++
} }

View File

@@ -46,7 +46,8 @@ func CLIDistributeSequence(sequences obiiter.IBioSequence) {
formater = obiformats.WriteSequencesToFile formater = obiformats.WriteSequencesToFile
} }
dispatcher := sequences.Distribute(CLISequenceClassifier()) dispatcher := sequences.Distribute(CLISequenceClassifier(),
obidefault.BatchSize())
obiformats.WriterDispatcher(CLIFileNamePattern(), obiformats.WriterDispatcher(CLIFileNamePattern(),
dispatcher, formater, opts..., dispatcher, formater, opts...,

View File

@@ -1 +1 @@
4.4.29 4.4.25