2022-02-24 07:08:40 +01:00
package obiiter
2022-01-13 23:27:39 +01:00
import (
2022-02-24 12:14:52 +01:00
log "github.com/sirupsen/logrus"
2022-02-24 07:08:40 +01:00
2023-11-29 12:14:37 +01:00
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obioptions"
"git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
2022-01-13 23:27:39 +01:00
)
2022-08-31 20:38:03 +02:00
// That method allows for applying a SeqWorker function on every sequences.
//
// Sequences are provided by the iterator and modified sequences are pushed
// on the returned IBioSequenceBatch.
//
// Moreover the SeqWorker function, the method accepted two optional integer parameters.
// - First is allowing to indicates the number of workers running in parallele (default 4)
// - The second the size of the chanel buffer. By default set to the same value than the input buffer.
2024-03-02 16:03:46 -04:00
func ( iterator IBioSequence ) MakeIWorker ( worker obiseq . SeqWorker ,
breakOnError bool ,
sizes ... int ) IBioSequence {
2023-08-25 14:36:38 +02:00
nworkers := obioptions . CLIParallelWorkers ( )
2022-01-13 23:27:39 +01:00
if len ( sizes ) > 0 {
nworkers = sizes [ 0 ]
}
2024-03-03 11:16:24 -04:00
sw := obiseq . SeqToSliceWorker ( worker , breakOnError )
2024-04-30 12:22:22 +02:00
return iterator . MakeISliceWorker ( sw , breakOnError , nworkers )
2022-01-13 23:27:39 +01:00
}
2023-11-29 12:14:37 +01:00
// MakeIConditionalWorker applies a given worker function to each sequence in the iterator that satisfies the given predicate.
// It creates a new iterator with the modified sequences and returns it.
//
// Parameters:
// - predicate: A function that takes a sequence and returns a boolean value indicating whether the sequence satisfies a certain condition.
// - worker: A function that takes a sequence and returns a modified version of the sequence.
// - sizes: Optional. One or more integers representing the number of workers to be used for parallel processing. If not provided, the number of workers will be determined by the obioptions.CLIReadParallelWorkers() function.
//
// Return:
// - newIter: A new IBioSequence iterator with the modified sequences.
2023-01-22 22:04:17 +01:00
func ( iterator IBioSequence ) MakeIConditionalWorker ( predicate obiseq . SequencePredicate ,
2024-03-02 16:03:46 -04:00
worker obiseq . SeqWorker , breakOnError bool , sizes ... int ) IBioSequence {
2023-11-22 09:46:30 +01:00
nworkers := obioptions . CLIReadParallelWorkers ( )
2022-08-31 20:38:03 +02:00
if len ( sizes ) > 0 {
nworkers = sizes [ 0 ]
}
2024-03-03 11:16:24 -04:00
sw := obiseq . SeqToSliceConditionalWorker ( predicate , worker , breakOnError )
2024-03-02 16:03:46 -04:00
2024-04-30 12:22:22 +02:00
return iterator . MakeISliceWorker ( sw , breakOnError , nworkers )
2022-08-31 20:38:03 +02:00
}
2023-11-29 12:14:37 +01:00
// MakeISliceWorker applies a SeqSliceWorker function to each slice in the IBioSequence iterator,
// creating a new IBioSequence with the modified slices.
//
// The worker function takes a slice as input and returns a modified slice. It is applied to each
// slice in the iterator.
//
// The sizes argument is optional and specifies the number of workers to use. If sizes is not
// provided, the default number of workers is used.
//
// The function returns a new IBioSequence containing the modified slices.
2024-03-02 16:03:46 -04:00
func ( iterator IBioSequence ) MakeISliceWorker ( worker obiseq . SeqSliceWorker , breakOnError bool , sizes ... int ) IBioSequence {
2023-11-22 09:46:30 +01:00
nworkers := obioptions . CLIParallelWorkers ( )
2022-01-13 23:27:39 +01:00
if len ( sizes ) > 0 {
nworkers = sizes [ 0 ]
}
2023-03-07 11:12:13 +07:00
newIter := MakeIBioSequence ( )
2022-01-13 23:27:39 +01:00
2023-01-22 22:04:17 +01:00
f := func ( iterator IBioSequence ) {
2024-03-02 16:03:46 -04:00
var err error
2022-01-13 23:27:39 +01:00
for iterator . Next ( ) {
batch := iterator . Get ( )
2024-03-02 16:03:46 -04:00
batch . slice , err = worker ( batch . slice )
if err != nil && breakOnError {
log . Fatalf ( "Error on sequence processing : %v" , err )
}
2023-12-03 22:44:13 +01:00
newIter . Push ( batch )
2022-01-13 23:27:39 +01:00
}
2022-01-14 17:32:12 +01:00
newIter . Done ( )
2022-01-13 23:27:39 +01:00
}
2024-04-30 12:22:22 +02:00
log . Debugln ( "Start of the batch workers" )
for i := 1 ; i < nworkers ; i ++ {
newIter . Add ( 1 )
2022-01-13 23:27:39 +01:00
go f ( iterator . Split ( ) )
}
2024-04-30 12:22:22 +02:00
newIter . Add ( 1 )
2022-01-14 23:11:36 +01:00
go f ( iterator )
2022-01-13 23:27:39 +01:00
2024-04-30 12:22:22 +02:00
go func ( ) {
newIter . WaitAndClose ( )
log . Debugln ( "End of the batch workers" )
} ( )
2023-02-23 23:35:58 +01:00
if iterator . IsPaired ( ) {
newIter . MarkAsPaired ( )
}
2022-01-14 17:32:12 +01:00
return newIter
2022-01-13 23:27:39 +01:00
}
2022-02-24 07:08:40 +01:00
2023-11-29 12:14:37 +01:00
// WorkerPipe is a function that takes a SeqWorker and a variadic list of sizes as parameters and returns a Pipeable.
//
// The WorkerPipe function creates a closure that takes an IBioSequence iterator as a parameter and returns an IBioSequence.
// Inside the closure, the MakeIWorker method of the iterator is called with the provided worker and sizes, and the result is returned.
//
// Parameters:
// - worker: A SeqWorker object that represents the worker to be used in the closure.
// - sizes: A variadic list of int values that represents the sizes to be used in the MakeIWorker method.
//
// Return:
// - f: A Pipeable object that represents the closure created by the WorkerPipe function.
2024-03-02 16:03:46 -04:00
func WorkerPipe ( worker obiseq . SeqWorker , breakOnError bool , sizes ... int ) Pipeable {
2023-01-22 22:04:17 +01:00
f := func ( iterator IBioSequence ) IBioSequence {
2024-03-02 16:03:46 -04:00
return iterator . MakeIWorker ( worker , breakOnError , sizes ... )
2022-02-24 07:08:40 +01:00
}
return f
}
2023-11-29 12:14:37 +01:00
// SliceWorkerPipe creates a Pipeable function that applies a SeqSliceWorker to an iterator.
//
// The worker parameter is the SeqSliceWorker to be applied.
// The sizes parameter is a variadic parameter representing the sizes of the slices.
// The function returns a Pipeable function that applies the SeqSliceWorker to the iterator.
2024-03-02 16:03:46 -04:00
func SliceWorkerPipe ( worker obiseq . SeqSliceWorker , breakOnError bool , sizes ... int ) Pipeable {
2023-01-22 22:04:17 +01:00
f := func ( iterator IBioSequence ) IBioSequence {
2024-03-02 16:03:46 -04:00
return iterator . MakeISliceWorker ( worker , breakOnError , sizes ... )
2022-02-24 07:08:40 +01:00
}
return f
}