mirror of
https://github.com/metabarcoding/obitools4.git
synced 2026-03-25 13:30:52 +00:00
Compare commits
6 Commits
push-wpwqv
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f92f285417 | ||
|
|
a786b58ed3 | ||
|
|
a2b26712b2 | ||
|
|
1599abc9ad | ||
|
|
af213ab446 | ||
|
|
a60184c115 |
2
.github/workflows/release.yml
vendored
2
.github/workflows/release.yml
vendored
@@ -79,7 +79,7 @@ jobs:
|
||||
-w /src \
|
||||
-e VERSION="${VERSION}" \
|
||||
golang:1.26-alpine \
|
||||
sh -c "apk add --no-cache gcc musl-dev zlib-dev make && \
|
||||
sh -c "apk add --no-cache gcc musl-dev zlib-dev zlib-static make && \
|
||||
make LDFLAGS='-linkmode=external -extldflags=-static' obitools"
|
||||
mkdir -p artifacts
|
||||
tar -czf artifacts/obitools4_${VERSION}_${{ matrix.output_name }}.tar.gz -C build .
|
||||
|
||||
@@ -57,34 +57,21 @@ func (dist *IDistribute) Classifier() *obiseq.BioSequenceClassifier {
|
||||
}
|
||||
|
||||
// Distribute organizes the biosequences from the iterator into batches
|
||||
// based on the provided classifier and batch sizes. It returns an
|
||||
// IDistribute instance that manages the distribution of the sequences.
|
||||
// based on the provided classifier. It returns an IDistribute instance
|
||||
// that manages the distribution of the sequences.
|
||||
//
|
||||
// Parameters:
|
||||
// - class: A pointer to a BioSequenceClassifier used to classify
|
||||
// the biosequences during distribution.
|
||||
// - sizes: Optional integer values specifying the batch size. If
|
||||
// no sizes are provided, a default batch size of 5000 is used.
|
||||
//
|
||||
// Returns:
|
||||
// An IDistribute instance that contains the outputs of the
|
||||
// classified biosequences, a channel for new data notifications,
|
||||
// and the classifier used for distribution. The method operates
|
||||
// asynchronously, processing the sequences in separate goroutines.
|
||||
// It ensures that the outputs are closed and cleaned up once
|
||||
// processing is complete.
|
||||
func (iterator IBioSequence) Distribute(class *obiseq.BioSequenceClassifier, sizes ...int) IDistribute {
|
||||
batchsize := obidefault.BatchSize()
|
||||
// Batches are flushed when either BatchSizeMax() sequences or BatchMem()
|
||||
// bytes are accumulated per key, mirroring the RebatchBySize strategy.
|
||||
func (iterator IBioSequence) Distribute(class *obiseq.BioSequenceClassifier) IDistribute {
|
||||
maxCount := obidefault.BatchSizeMax()
|
||||
maxBytes := obidefault.BatchMem()
|
||||
|
||||
outputs := make(map[int]IBioSequence, 100)
|
||||
slices := make(map[int]*obiseq.BioSequenceSlice, 100)
|
||||
bufBytes := make(map[int]int, 100)
|
||||
orders := make(map[int]int, 100)
|
||||
news := make(chan int)
|
||||
|
||||
if len(sizes) > 0 {
|
||||
batchsize = sizes[0]
|
||||
}
|
||||
|
||||
jobDone := sync.WaitGroup{}
|
||||
lock := sync.Mutex{}
|
||||
|
||||
@@ -115,6 +102,7 @@ func (iterator IBioSequence) Distribute(class *obiseq.BioSequenceClassifier, siz
|
||||
slice = &s
|
||||
slices[key] = slice
|
||||
orders[key] = 0
|
||||
bufBytes[key] = 0
|
||||
|
||||
lock.Lock()
|
||||
outputs[key] = MakeIBioSequence()
|
||||
@@ -123,14 +111,20 @@ func (iterator IBioSequence) Distribute(class *obiseq.BioSequenceClassifier, siz
|
||||
news <- key
|
||||
}
|
||||
|
||||
*slice = append(*slice, s)
|
||||
|
||||
if len(*slice) == batchsize {
|
||||
sz := s.MemorySize()
|
||||
countFull := maxCount > 0 && len(*slice) >= maxCount
|
||||
memFull := maxBytes > 0 && bufBytes[key]+sz > maxBytes && len(*slice) > 0
|
||||
if countFull || memFull {
|
||||
outputs[key].Push(MakeBioSequenceBatch(source, orders[key], *slice))
|
||||
orders[key]++
|
||||
s := obiseq.MakeBioSequenceSlice()
|
||||
slices[key] = &s
|
||||
slice = &s
|
||||
bufBytes[key] = 0
|
||||
}
|
||||
|
||||
*slice = append(*slice, s)
|
||||
bufBytes[key] += sz
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -31,7 +31,8 @@ func obiseqslice2Lua(interpreter *lua.LState,
|
||||
}
|
||||
|
||||
func newObiSeqSlice(luaState *lua.LState) int {
|
||||
seqslice := obiseq.NewBioSequenceSlice()
|
||||
capacity := luaState.OptInt(1, 0)
|
||||
seqslice := obiseq.NewBioSequenceSlice(capacity)
|
||||
luaState.Push(obiseqslice2Lua(luaState, seqslice))
|
||||
return 1
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@ package obioptions
|
||||
// Version is automatically updated by the Makefile from version.txt
|
||||
// The patch number (third digit) is incremented on each push to the repository
|
||||
|
||||
var _Version = "Release 4.4.26"
|
||||
var _Version = "Release 4.4.29"
|
||||
|
||||
// Version returns the version of the obitools package.
|
||||
//
|
||||
|
||||
@@ -104,11 +104,11 @@ func SeqToSliceWorker(worker SeqWorker,
|
||||
for _, s := range input {
|
||||
r, err := worker(s)
|
||||
if err == nil {
|
||||
for _, rs := range r {
|
||||
if i == len(output) {
|
||||
output = slices.Grow(output, cap(output))
|
||||
if i+len(r) > cap(output) {
|
||||
output = slices.Grow(output[:i], len(r))
|
||||
output = output[:cap(output)]
|
||||
}
|
||||
for _, rs := range r {
|
||||
output[i] = rs
|
||||
i++
|
||||
}
|
||||
|
||||
@@ -46,8 +46,7 @@ func CLIDistributeSequence(sequences obiiter.IBioSequence) {
|
||||
formater = obiformats.WriteSequencesToFile
|
||||
}
|
||||
|
||||
dispatcher := sequences.Distribute(CLISequenceClassifier(),
|
||||
obidefault.BatchSize())
|
||||
dispatcher := sequences.Distribute(CLISequenceClassifier())
|
||||
|
||||
obiformats.WriterDispatcher(CLIFileNamePattern(),
|
||||
dispatcher, formater, opts...,
|
||||
|
||||
@@ -1 +1 @@
|
||||
4.4.26
|
||||
4.4.29
|
||||
|
||||
Reference in New Issue
Block a user