2022-02-24 07:08:40 +01:00
|
|
|
package obiiter
|
2022-01-13 23:27:39 +01:00
|
|
|
|
|
|
|
import (
|
|
|
|
"sync"
|
2023-02-08 13:14:26 +01:00
|
|
|
"time"
|
2022-02-24 07:08:40 +01:00
|
|
|
|
2022-11-17 11:09:58 +01:00
|
|
|
log "github.com/sirupsen/logrus"
|
|
|
|
|
2022-02-24 07:08:40 +01:00
|
|
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
2022-01-13 23:27:39 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
type PairedBioSequenceBatch struct {
|
2022-02-24 07:08:40 +01:00
|
|
|
forward obiseq.BioSequenceSlice
|
|
|
|
reverse obiseq.BioSequenceSlice
|
2022-01-13 23:27:39 +01:00
|
|
|
order int
|
|
|
|
}
|
|
|
|
|
|
|
|
var NilPairedBioSequenceBatch = PairedBioSequenceBatch{nil, nil, -1}
|
|
|
|
|
|
|
|
func MakePairedBioSequenceBatch(forward, reverse BioSequenceBatch) PairedBioSequenceBatch {
|
|
|
|
if forward.order != reverse.order {
|
|
|
|
log.Fatalf("Forward order : %d and reverse order : %d are not matching",
|
|
|
|
forward.order, reverse.order)
|
|
|
|
}
|
|
|
|
|
|
|
|
for i := range reverse.slice {
|
|
|
|
reverse.slice[i].ReverseComplement(true)
|
|
|
|
}
|
|
|
|
|
|
|
|
return PairedBioSequenceBatch{
|
|
|
|
forward: forward.slice,
|
|
|
|
reverse: reverse.slice,
|
|
|
|
order: forward.order,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (batch PairedBioSequenceBatch) Order() int {
|
|
|
|
return batch.order
|
|
|
|
}
|
|
|
|
|
2022-02-24 07:08:40 +01:00
|
|
|
func (batch PairedBioSequenceBatch) Reorder(newOrder int) PairedBioSequenceBatch {
|
|
|
|
batch.order = newOrder
|
|
|
|
return batch
|
|
|
|
}
|
|
|
|
|
2022-11-17 11:09:58 +01:00
|
|
|
func (batch PairedBioSequenceBatch) Len() int {
|
2022-01-13 23:27:39 +01:00
|
|
|
return len(batch.forward)
|
|
|
|
}
|
|
|
|
|
2022-02-24 07:08:40 +01:00
|
|
|
func (batch PairedBioSequenceBatch) Forward() obiseq.BioSequenceSlice {
|
2022-01-13 23:27:39 +01:00
|
|
|
return batch.forward
|
|
|
|
}
|
|
|
|
|
2022-02-24 07:08:40 +01:00
|
|
|
func (batch PairedBioSequenceBatch) Reverse() obiseq.BioSequenceSlice {
|
2022-01-13 23:27:39 +01:00
|
|
|
return batch.reverse
|
|
|
|
}
|
|
|
|
|
|
|
|
func (batch PairedBioSequenceBatch) IsNil() bool {
|
|
|
|
return batch.forward == nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Structure implementing an iterator over bioseq.BioSequenceBatch
|
|
|
|
// based on a channel.
|
|
|
|
type __ipairedbiosequencebatch__ struct {
|
|
|
|
channel chan PairedBioSequenceBatch
|
|
|
|
current PairedBioSequenceBatch
|
2022-01-14 23:11:36 +01:00
|
|
|
pushBack bool
|
2022-01-13 23:27:39 +01:00
|
|
|
all_done *sync.WaitGroup
|
|
|
|
buffer_size int
|
|
|
|
finished bool
|
|
|
|
p_finished *bool
|
|
|
|
}
|
|
|
|
|
|
|
|
type IPairedBioSequenceBatch struct {
|
|
|
|
pointer *__ipairedbiosequencebatch__
|
|
|
|
}
|
|
|
|
|
|
|
|
var NilIPairedBioSequenceBatch = IPairedBioSequenceBatch{pointer: nil}
|
|
|
|
|
|
|
|
func MakeIPairedBioSequenceBatch(sizes ...int) IPairedBioSequenceBatch {
|
|
|
|
buffsize := 1
|
|
|
|
|
|
|
|
if len(sizes) > 0 {
|
|
|
|
buffsize = sizes[0]
|
|
|
|
}
|
|
|
|
|
|
|
|
i := __ipairedbiosequencebatch__{
|
|
|
|
channel: make(chan PairedBioSequenceBatch, buffsize),
|
|
|
|
current: NilPairedBioSequenceBatch,
|
2022-01-14 23:11:36 +01:00
|
|
|
pushBack: false,
|
2022-01-13 23:27:39 +01:00
|
|
|
buffer_size: buffsize,
|
|
|
|
finished: false,
|
2022-01-14 23:11:36 +01:00
|
|
|
p_finished: nil,
|
|
|
|
}
|
|
|
|
|
2022-01-13 23:27:39 +01:00
|
|
|
i.p_finished = &i.finished
|
|
|
|
waiting := sync.WaitGroup{}
|
|
|
|
i.all_done = &waiting
|
|
|
|
ii := IPairedBioSequenceBatch{&i}
|
2023-02-08 13:14:26 +01:00
|
|
|
|
|
|
|
RegisterAPipe()
|
2022-01-13 23:27:39 +01:00
|
|
|
return ii
|
|
|
|
}
|
|
|
|
|
|
|
|
func (iterator IPairedBioSequenceBatch) Add(n int) {
|
|
|
|
iterator.pointer.all_done.Add(n)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (iterator IPairedBioSequenceBatch) Done() {
|
|
|
|
iterator.pointer.all_done.Done()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (iterator IPairedBioSequenceBatch) Wait() {
|
|
|
|
iterator.pointer.all_done.Wait()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (iterator IPairedBioSequenceBatch) Channel() chan PairedBioSequenceBatch {
|
|
|
|
return iterator.pointer.channel
|
|
|
|
}
|
|
|
|
|
2023-02-08 13:14:26 +01:00
|
|
|
func (iterator IPairedBioSequenceBatch) Close() {
|
|
|
|
close(iterator.pointer.channel)
|
|
|
|
UnregisterPipe()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (iterator IPairedBioSequenceBatch) WaitAndClose() {
|
|
|
|
iterator.Wait()
|
|
|
|
|
|
|
|
for len(iterator.Channel()) > 0 {
|
|
|
|
time.Sleep(time.Millisecond)
|
|
|
|
}
|
|
|
|
|
|
|
|
iterator.Close()
|
|
|
|
}
|
|
|
|
|
2022-01-13 23:27:39 +01:00
|
|
|
func (iterator IPairedBioSequenceBatch) IsNil() bool {
|
|
|
|
return iterator.pointer == nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (iterator IPairedBioSequenceBatch) BufferSize() int {
|
|
|
|
return iterator.pointer.buffer_size
|
|
|
|
}
|
|
|
|
|
|
|
|
func (iterator IPairedBioSequenceBatch) Split() IPairedBioSequenceBatch {
|
|
|
|
i := __ipairedbiosequencebatch__{
|
|
|
|
channel: iterator.pointer.channel,
|
|
|
|
current: NilPairedBioSequenceBatch,
|
2022-01-14 23:11:36 +01:00
|
|
|
pushBack: false,
|
2022-01-13 23:27:39 +01:00
|
|
|
all_done: iterator.pointer.all_done,
|
|
|
|
buffer_size: iterator.pointer.buffer_size,
|
|
|
|
finished: false,
|
|
|
|
p_finished: iterator.pointer.p_finished}
|
2022-01-14 17:32:12 +01:00
|
|
|
newIter := IPairedBioSequenceBatch{&i}
|
|
|
|
return newIter
|
2022-01-13 23:27:39 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
func (iterator IPairedBioSequenceBatch) Next() bool {
|
|
|
|
if *(iterator.pointer.p_finished) {
|
|
|
|
return false
|
|
|
|
}
|
2022-01-14 23:11:36 +01:00
|
|
|
|
|
|
|
if iterator.pointer.pushBack {
|
|
|
|
iterator.pointer.pushBack = false
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
2022-01-13 23:27:39 +01:00
|
|
|
next, ok := (<-iterator.pointer.channel)
|
|
|
|
|
|
|
|
if ok {
|
|
|
|
iterator.pointer.current = next
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
iterator.pointer.current = NilPairedBioSequenceBatch
|
|
|
|
*iterator.pointer.p_finished = true
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2022-01-14 23:11:36 +01:00
|
|
|
func (iterator IPairedBioSequenceBatch) PushBack() {
|
|
|
|
if !iterator.pointer.current.IsNil() {
|
|
|
|
iterator.pointer.pushBack = true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-01-13 23:27:39 +01:00
|
|
|
// The 'Get' method returns the instance of BioSequenceBatch
|
|
|
|
// currently pointed by the iterator. You have to use the
|
|
|
|
// 'Next' method to move to the next entry before calling
|
|
|
|
// 'Get' to retreive the following instance.
|
|
|
|
func (iterator IPairedBioSequenceBatch) Get() PairedBioSequenceBatch {
|
|
|
|
return iterator.pointer.current
|
|
|
|
}
|
|
|
|
|
|
|
|
// Finished returns 'true' value if no more data is available
|
|
|
|
// from the iterator.
|
|
|
|
func (iterator IPairedBioSequenceBatch) Finished() bool {
|
|
|
|
return *iterator.pointer.p_finished
|
|
|
|
}
|
|
|
|
|
|
|
|
func (iterator IPairedBioSequenceBatch) SortBatches(sizes ...int) IPairedBioSequenceBatch {
|
|
|
|
buffsize := iterator.BufferSize()
|
|
|
|
|
|
|
|
if len(sizes) > 0 {
|
|
|
|
buffsize = sizes[0]
|
|
|
|
}
|
|
|
|
|
2022-01-14 17:32:12 +01:00
|
|
|
newIter := MakeIPairedBioSequenceBatch(buffsize)
|
2022-01-13 23:27:39 +01:00
|
|
|
|
2022-01-14 17:32:12 +01:00
|
|
|
newIter.Add(1)
|
2022-01-13 23:27:39 +01:00
|
|
|
|
|
|
|
go func() {
|
2022-01-14 17:32:12 +01:00
|
|
|
newIter.Wait()
|
|
|
|
close(newIter.pointer.channel)
|
2022-01-13 23:27:39 +01:00
|
|
|
}()
|
|
|
|
|
|
|
|
next_to_send := 0
|
|
|
|
received := make(map[int]PairedBioSequenceBatch)
|
|
|
|
go func() {
|
|
|
|
for iterator.Next() {
|
|
|
|
batch := iterator.Get()
|
|
|
|
if batch.order == next_to_send {
|
2022-01-14 17:32:12 +01:00
|
|
|
newIter.pointer.channel <- batch
|
2022-01-13 23:27:39 +01:00
|
|
|
next_to_send++
|
|
|
|
batch, ok := received[next_to_send]
|
|
|
|
for ok {
|
2022-01-14 17:32:12 +01:00
|
|
|
newIter.pointer.channel <- batch
|
2022-01-13 23:27:39 +01:00
|
|
|
delete(received, next_to_send)
|
|
|
|
next_to_send++
|
|
|
|
batch, ok = received[next_to_send]
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
received[batch.order] = batch
|
|
|
|
}
|
|
|
|
}
|
2022-01-14 17:32:12 +01:00
|
|
|
newIter.Done()
|
2022-01-13 23:27:39 +01:00
|
|
|
}()
|
|
|
|
|
2022-01-14 17:32:12 +01:00
|
|
|
return newIter
|
2022-01-13 23:27:39 +01:00
|
|
|
|
|
|
|
}
|