2022-02-14 00:01:01 +01:00
|
|
|
package obiseq
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"sync"
|
|
|
|
)
|
|
|
|
|
|
|
|
type IDistribute struct {
|
2022-02-18 22:53:09 +01:00
|
|
|
outputs map[int]IBioSequenceBatch
|
|
|
|
news chan int
|
2022-02-14 00:01:01 +01:00
|
|
|
lock *sync.Mutex
|
|
|
|
}
|
|
|
|
|
2022-02-18 22:53:09 +01:00
|
|
|
func (dist *IDistribute) Outputs(key int) (IBioSequenceBatch, error) {
|
2022-02-14 00:01:01 +01:00
|
|
|
dist.lock.Lock()
|
|
|
|
iter, ok := dist.outputs[key]
|
|
|
|
dist.lock.Unlock()
|
|
|
|
|
|
|
|
if !ok {
|
2022-02-18 22:53:09 +01:00
|
|
|
return NilIBioSequenceBatch, fmt.Errorf("code %d unknown", key)
|
2022-02-14 00:01:01 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return iter, nil
|
|
|
|
}
|
|
|
|
|
2022-02-18 22:53:09 +01:00
|
|
|
func (dist *IDistribute) News() chan int {
|
2022-02-14 00:01:01 +01:00
|
|
|
return dist.news
|
|
|
|
}
|
|
|
|
|
2022-02-18 22:53:09 +01:00
|
|
|
func (iterator IBioSequenceBatch) Distribute(class *BioSequenceClassifier, sizes ...int) IDistribute {
|
2022-02-14 00:01:01 +01:00
|
|
|
batchsize := 5000
|
|
|
|
buffsize := 2
|
|
|
|
|
2022-02-18 22:53:09 +01:00
|
|
|
outputs := make(map[int]IBioSequenceBatch, 100)
|
|
|
|
slices := make(map[int]*BioSequenceSlice, 100)
|
|
|
|
orders := make(map[int]int, 100)
|
|
|
|
news := make(chan int)
|
2022-02-14 00:01:01 +01:00
|
|
|
|
|
|
|
if len(sizes) > 0 {
|
|
|
|
batchsize = sizes[0]
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(sizes) > 1 {
|
|
|
|
buffsize = sizes[1]
|
|
|
|
}
|
|
|
|
|
|
|
|
jobDone := sync.WaitGroup{}
|
|
|
|
lock := sync.Mutex{}
|
|
|
|
|
|
|
|
jobDone.Add(1)
|
|
|
|
|
|
|
|
go func() {
|
|
|
|
jobDone.Wait()
|
|
|
|
close(news)
|
|
|
|
for _, i := range outputs {
|
|
|
|
close(i.Channel())
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
go func() {
|
|
|
|
iterator = iterator.SortBatches()
|
|
|
|
|
|
|
|
for iterator.Next() {
|
|
|
|
seqs := iterator.Get()
|
2022-02-15 00:47:02 +01:00
|
|
|
for _, s := range seqs.Slice() {
|
2022-02-18 22:53:09 +01:00
|
|
|
key := class.Code(s)
|
2022-02-14 00:01:01 +01:00
|
|
|
slice, ok := slices[key]
|
|
|
|
|
|
|
|
if !ok {
|
2022-02-18 22:53:09 +01:00
|
|
|
s := GetBioSequenceSlice()
|
2022-02-14 00:01:01 +01:00
|
|
|
slice = &s
|
|
|
|
slices[key] = slice
|
|
|
|
orders[key] = 0
|
|
|
|
|
|
|
|
lock.Lock()
|
2022-02-15 00:47:02 +01:00
|
|
|
outputs[key] = MakeIBioSequenceBatch(buffsize)
|
2022-02-14 00:01:01 +01:00
|
|
|
lock.Unlock()
|
|
|
|
|
|
|
|
news <- key
|
|
|
|
}
|
|
|
|
|
|
|
|
*slice = append(*slice, s)
|
2022-02-18 10:00:42 +01:00
|
|
|
|
2022-02-14 00:01:01 +01:00
|
|
|
if len(*slice) == batchsize {
|
|
|
|
outputs[key].Channel() <- MakeBioSequenceBatch(orders[key], *slice...)
|
|
|
|
orders[key]++
|
2022-02-18 22:53:09 +01:00
|
|
|
s := GetBioSequenceSlice()
|
2022-02-14 00:01:01 +01:00
|
|
|
slices[key] = &s
|
|
|
|
}
|
|
|
|
}
|
2022-02-18 22:53:09 +01:00
|
|
|
seqs.Recycle()
|
2022-02-14 00:01:01 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
for key, slice := range slices {
|
|
|
|
if len(*slice) > 0 {
|
|
|
|
outputs[key].Channel() <- MakeBioSequenceBatch(orders[key], *slice...)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
jobDone.Done()
|
|
|
|
|
|
|
|
}()
|
|
|
|
|
|
|
|
return IDistribute{
|
|
|
|
outputs,
|
|
|
|
news,
|
|
|
|
&lock}
|
|
|
|
|
|
|
|
}
|