Files
obitools4/pkg/obiiter/distribute.go

115 lines
2.0 KiB
Go
Raw Normal View History

package obiiter
import (
"fmt"
"sync"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
)
type IDistribute struct {
2023-01-22 22:04:17 +01:00
outputs map[int]IBioSequence
news chan int
classifier *obiseq.BioSequenceClassifier
lock *sync.Mutex
}
2023-01-22 22:04:17 +01:00
func (dist *IDistribute) Outputs(key int) (IBioSequence, error) {
dist.lock.Lock()
iter, ok := dist.outputs[key]
dist.lock.Unlock()
if !ok {
2023-01-22 22:04:17 +01:00
return NilIBioSequence, fmt.Errorf("code %d unknown", key)
}
return iter, nil
}
2022-02-18 22:53:09 +01:00
func (dist *IDistribute) News() chan int {
return dist.news
}
func (dist *IDistribute) Classifier() *obiseq.BioSequenceClassifier {
return dist.classifier
}
2023-01-22 22:04:17 +01:00
func (iterator IBioSequence) Distribute(class *obiseq.BioSequenceClassifier, sizes ...int) IDistribute {
batchsize := 5000
2023-01-22 22:04:17 +01:00
outputs := make(map[int]IBioSequence, 100)
slices := make(map[int]*obiseq.BioSequenceSlice, 100)
2022-02-18 22:53:09 +01:00
orders := make(map[int]int, 100)
news := make(chan int)
if len(sizes) > 0 {
batchsize = sizes[0]
}
jobDone := sync.WaitGroup{}
lock := sync.Mutex{}
jobDone.Add(1)
go func() {
jobDone.Wait()
close(news)
for _, i := range outputs {
i.Close()
}
}()
go func() {
iterator = iterator.SortBatches()
for iterator.Next() {
seqs := iterator.Get()
for _, s := range seqs.Slice() {
2022-02-18 22:53:09 +01:00
key := class.Code(s)
slice, ok := slices[key]
if !ok {
s := obiseq.MakeBioSequenceSlice()
slice = &s
slices[key] = slice
orders[key] = 0
lock.Lock()
outputs[key] = MakeIBioSequence()
lock.Unlock()
news <- key
}
*slice = append(*slice, s)
2022-02-18 10:00:42 +01:00
if len(*slice) == batchsize {
outputs[key].Push(MakeBioSequenceBatch(orders[key], *slice))
orders[key]++
s := obiseq.MakeBioSequenceSlice()
slices[key] = &s
}
}
seqs.Recycle(false)
}
for key, slice := range slices {
if len(*slice) > 0 {
outputs[key].Push(MakeBioSequenceBatch(orders[key], *slice))
}
}
jobDone.Done()
}()
return IDistribute{
outputs,
news,
class,
&lock}
}