2022-02-24 07:08:40 +01:00
|
|
|
package obiiter
|
2022-01-13 23:27:39 +01:00
|
|
|
|
|
|
|
import (
|
2022-02-24 12:14:52 +01:00
|
|
|
log "github.com/sirupsen/logrus"
|
2022-02-24 07:08:40 +01:00
|
|
|
|
|
|
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
2022-01-13 23:27:39 +01:00
|
|
|
)
|
|
|
|
|
2022-02-24 07:08:40 +01:00
|
|
|
type SeqAnnotator func(*obiseq.BioSequence)
|
2022-01-13 23:27:39 +01:00
|
|
|
|
2022-02-24 07:08:40 +01:00
|
|
|
type SeqWorker func(*obiseq.BioSequence) *obiseq.BioSequence
|
|
|
|
type SeqSliceWorker func(obiseq.BioSequenceSlice) obiseq.BioSequenceSlice
|
2022-01-13 23:27:39 +01:00
|
|
|
|
|
|
|
func AnnotatorToSeqWorker(function SeqAnnotator) SeqWorker {
|
2022-02-24 07:08:40 +01:00
|
|
|
f := func(seq *obiseq.BioSequence) *obiseq.BioSequence {
|
2022-01-13 23:27:39 +01:00
|
|
|
function(seq)
|
|
|
|
return seq
|
|
|
|
}
|
|
|
|
return f
|
|
|
|
}
|
|
|
|
|
2022-08-31 20:38:03 +02:00
|
|
|
// That method allows for applying a SeqWorker function on every sequences.
|
|
|
|
//
|
|
|
|
// Sequences are provided by the iterator and modified sequences are pushed
|
|
|
|
// on the returned IBioSequenceBatch.
|
|
|
|
//
|
|
|
|
// Moreover the SeqWorker function, the method accepted two optional integer parameters.
|
|
|
|
// - First is allowing to indicates the number of workers running in parallele (default 4)
|
|
|
|
// - The second the size of the chanel buffer. By default set to the same value than the input buffer.
|
2022-01-13 23:27:39 +01:00
|
|
|
func (iterator IBioSequenceBatch) MakeIWorker(worker SeqWorker, sizes ...int) IBioSequenceBatch {
|
|
|
|
nworkers := 4
|
|
|
|
buffsize := iterator.BufferSize()
|
|
|
|
|
|
|
|
if len(sizes) > 0 {
|
|
|
|
nworkers = sizes[0]
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(sizes) > 1 {
|
|
|
|
buffsize = sizes[1]
|
|
|
|
}
|
|
|
|
|
2022-01-14 17:32:12 +01:00
|
|
|
newIter := MakeIBioSequenceBatch(buffsize)
|
2022-01-13 23:27:39 +01:00
|
|
|
|
2022-01-14 17:32:12 +01:00
|
|
|
newIter.Add(nworkers)
|
2022-01-13 23:27:39 +01:00
|
|
|
|
|
|
|
go func() {
|
2022-02-21 19:00:23 +01:00
|
|
|
newIter.WaitAndClose()
|
2022-02-24 12:14:52 +01:00
|
|
|
log.Debugln("End of the batch workers")
|
2022-01-13 23:27:39 +01:00
|
|
|
|
|
|
|
}()
|
|
|
|
|
|
|
|
f := func(iterator IBioSequenceBatch) {
|
|
|
|
for iterator.Next() {
|
|
|
|
batch := iterator.Get()
|
|
|
|
for i, seq := range batch.slice {
|
|
|
|
batch.slice[i] = worker(seq)
|
|
|
|
}
|
2022-02-21 19:00:23 +01:00
|
|
|
newIter.Push(batch)
|
2022-01-13 23:27:39 +01:00
|
|
|
}
|
2022-01-14 17:32:12 +01:00
|
|
|
newIter.Done()
|
2022-01-13 23:27:39 +01:00
|
|
|
}
|
|
|
|
|
2022-02-24 12:14:52 +01:00
|
|
|
log.Debugln("Start of the batch workers")
|
2022-01-14 23:11:36 +01:00
|
|
|
for i := 0; i < nworkers-1; i++ {
|
2022-01-13 23:27:39 +01:00
|
|
|
go f(iterator.Split())
|
|
|
|
}
|
2022-01-14 23:11:36 +01:00
|
|
|
go f(iterator)
|
2022-01-13 23:27:39 +01:00
|
|
|
|
2022-01-14 17:32:12 +01:00
|
|
|
return newIter
|
2022-01-13 23:27:39 +01:00
|
|
|
}
|
|
|
|
|
2022-08-31 20:38:03 +02:00
|
|
|
func (iterator IBioSequenceBatch) MakeIConditionalWorker(predicate obiseq.SequencePredicate,
|
|
|
|
worker SeqWorker, sizes ...int) IBioSequenceBatch {
|
|
|
|
nworkers := 4
|
|
|
|
buffsize := iterator.BufferSize()
|
|
|
|
|
|
|
|
if len(sizes) > 0 {
|
|
|
|
nworkers = sizes[0]
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(sizes) > 1 {
|
|
|
|
buffsize = sizes[1]
|
|
|
|
}
|
|
|
|
|
|
|
|
newIter := MakeIBioSequenceBatch(buffsize)
|
|
|
|
|
|
|
|
newIter.Add(nworkers)
|
|
|
|
|
|
|
|
go func() {
|
|
|
|
newIter.WaitAndClose()
|
|
|
|
log.Debugln("End of the batch workers")
|
|
|
|
|
|
|
|
}()
|
|
|
|
|
|
|
|
f := func(iterator IBioSequenceBatch) {
|
|
|
|
for iterator.Next() {
|
|
|
|
batch := iterator.Get()
|
|
|
|
for i, seq := range batch.slice {
|
|
|
|
if predicate(batch.slice[i]) {
|
|
|
|
batch.slice[i] = worker(seq)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
newIter.Push(batch)
|
|
|
|
}
|
|
|
|
newIter.Done()
|
|
|
|
}
|
|
|
|
|
|
|
|
log.Debugln("Start of the batch workers")
|
|
|
|
for i := 0; i < nworkers-1; i++ {
|
|
|
|
go f(iterator.Split())
|
|
|
|
}
|
|
|
|
go f(iterator)
|
|
|
|
|
|
|
|
return newIter
|
|
|
|
}
|
|
|
|
|
2022-01-13 23:27:39 +01:00
|
|
|
func (iterator IBioSequenceBatch) MakeISliceWorker(worker SeqSliceWorker, sizes ...int) IBioSequenceBatch {
|
|
|
|
nworkers := 4
|
|
|
|
buffsize := iterator.BufferSize()
|
|
|
|
|
|
|
|
if len(sizes) > 0 {
|
|
|
|
nworkers = sizes[0]
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(sizes) > 1 {
|
|
|
|
buffsize = sizes[1]
|
|
|
|
}
|
|
|
|
|
2022-01-14 17:32:12 +01:00
|
|
|
newIter := MakeIBioSequenceBatch(buffsize)
|
2022-01-13 23:27:39 +01:00
|
|
|
|
2022-01-14 17:32:12 +01:00
|
|
|
newIter.Add(nworkers)
|
2022-01-13 23:27:39 +01:00
|
|
|
|
|
|
|
go func() {
|
2022-02-21 19:00:23 +01:00
|
|
|
newIter.WaitAndClose()
|
2022-01-13 23:27:39 +01:00
|
|
|
log.Println("End of the batch slice workers")
|
|
|
|
}()
|
|
|
|
|
|
|
|
f := func(iterator IBioSequenceBatch) {
|
|
|
|
for iterator.Next() {
|
|
|
|
batch := iterator.Get()
|
|
|
|
batch.slice = worker(batch.slice)
|
2022-01-14 17:32:12 +01:00
|
|
|
newIter.pointer.channel <- batch
|
2022-01-13 23:27:39 +01:00
|
|
|
}
|
2022-01-14 17:32:12 +01:00
|
|
|
newIter.Done()
|
2022-01-13 23:27:39 +01:00
|
|
|
}
|
|
|
|
|
2022-08-21 13:41:58 +02:00
|
|
|
log.Printf("Start of the batch slice workers on %d workers (buffer : %d)\n", nworkers, buffsize)
|
2022-01-15 19:10:16 +01:00
|
|
|
for i := 0; i < nworkers-1; i++ {
|
2022-01-13 23:27:39 +01:00
|
|
|
go f(iterator.Split())
|
|
|
|
}
|
2022-01-14 23:11:36 +01:00
|
|
|
go f(iterator)
|
2022-01-13 23:27:39 +01:00
|
|
|
|
2022-01-14 17:32:12 +01:00
|
|
|
return newIter
|
2022-01-13 23:27:39 +01:00
|
|
|
}
|
2022-02-24 07:08:40 +01:00
|
|
|
|
|
|
|
func (iterator IBioSequence) MakeIWorker(worker SeqWorker, sizes ...int) IBioSequence {
|
|
|
|
buffsize := iterator.BufferSize()
|
|
|
|
|
|
|
|
if len(sizes) > 0 {
|
|
|
|
buffsize = sizes[0]
|
|
|
|
}
|
|
|
|
|
|
|
|
newIter := MakeIBioSequence(buffsize)
|
|
|
|
|
|
|
|
newIter.Add(1)
|
|
|
|
|
|
|
|
go func() {
|
|
|
|
newIter.Wait()
|
|
|
|
close(newIter.pointer.channel)
|
|
|
|
}()
|
|
|
|
|
|
|
|
go func() {
|
|
|
|
for iterator.Next() {
|
|
|
|
seq := iterator.Get()
|
|
|
|
seq = worker(seq)
|
|
|
|
newIter.pointer.channel <- seq
|
|
|
|
}
|
|
|
|
newIter.Done()
|
|
|
|
}()
|
|
|
|
|
|
|
|
return newIter
|
|
|
|
}
|
|
|
|
|
|
|
|
func WorkerPipe(worker SeqWorker, sizes ...int) Pipeable {
|
|
|
|
f := func(iterator IBioSequenceBatch) IBioSequenceBatch {
|
2022-08-21 13:41:58 +02:00
|
|
|
return iterator.MakeIWorker(worker, sizes...)
|
2022-02-24 07:08:40 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return f
|
|
|
|
}
|
|
|
|
|
|
|
|
func SliceWorkerPipe(worker SeqSliceWorker, sizes ...int) Pipeable {
|
|
|
|
f := func(iterator IBioSequenceBatch) IBioSequenceBatch {
|
2022-08-21 13:41:58 +02:00
|
|
|
return iterator.MakeISliceWorker(worker, sizes...)
|
2022-02-24 07:08:40 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return f
|
|
|
|
}
|