Add some code refactoring from the blackboard branch

This commit is contained in:
Eric Coissac
2024-08-02 12:35:46 +02:00
parent bc1aaaf7d9
commit 1b1cd41fd3
38 changed files with 491 additions and 330 deletions

View File

@ -3,50 +3,118 @@ package obiiter
import "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
type BioSequenceBatch struct {
slice obiseq.BioSequenceSlice
order int
source string
slice obiseq.BioSequenceSlice
order int
}
var NilBioSequenceBatch = BioSequenceBatch{nil, -1}
var NilBioSequenceBatch = BioSequenceBatch{"", nil, -1}
func MakeBioSequenceBatch(order int,
// MakeBioSequenceBatch creates a new BioSequenceBatch with the given source, order, and sequences.
//
// Parameters:
// - source: The source of the BioSequenceBatch.
// - order: The order of the BioSequenceBatch.
// - sequences: The slice of BioSequence.
//
// Returns:
// - BioSequenceBatch: The newly created BioSequenceBatch.
func MakeBioSequenceBatch(
source string,
order int,
sequences obiseq.BioSequenceSlice) BioSequenceBatch {
return BioSequenceBatch{
slice: sequences,
order: order,
source: source,
slice: sequences,
order: order,
}
}
// Order returns the order of the BioSequenceBatch.
//
// Returns:
// - int: The order of the BioSequenceBatch.
func (batch BioSequenceBatch) Order() int {
return batch.order
}
// Source returns the source of the BioSequenceBatch.
//
// Returns:
// - string: The source of the BioSequenceBatch.
func (batch BioSequenceBatch) Source() string {
return batch.source
}
// Reorder updates the order of the BioSequenceBatch and returns the updated batch.
//
// Parameters:
// - newOrder: The new order value to assign to the BioSequenceBatch.
//
// Returns:
// - BioSequenceBatch: The updated BioSequenceBatch with the new order value.
func (batch BioSequenceBatch) Reorder(newOrder int) BioSequenceBatch {
batch.order = newOrder
return batch
}
// Slice returns the BioSequenceSlice contained within the BioSequenceBatch.
//
// Returns:
// - obiseq.BioSequenceSlice: The BioSequenceSlice contained within the BioSequenceBatch.
func (batch BioSequenceBatch) Slice() obiseq.BioSequenceSlice {
return batch.slice
}
// Len returns the number of BioSequence elements in the given BioSequenceBatch.
//
// Parameters:
// - batch: The BioSequenceBatch to get the length from.
//
// Return type:
// - int: The number of BioSequence elements in the BioSequenceBatch.
func (batch BioSequenceBatch) Len() int {
return len(batch.slice)
}
// NotEmpty returns whether the BioSequenceBatch is empty or not.
//
// It checks if the BioSequenceSlice contained within the BioSequenceBatch is not empty.
//
// Returns:
// - bool: True if the BioSequenceBatch is not empty, false otherwise.
func (batch BioSequenceBatch) NotEmpty() bool {
return batch.slice.NotEmpty()
}
// Pop0 returns and removes the first element of the BioSequenceBatch.
//
// It does not take any parameters.
// It returns a pointer to a BioSequence object.
func (batch BioSequenceBatch) Pop0() *obiseq.BioSequence {
return batch.slice.Pop0()
}
// IsNil checks if the BioSequenceBatch's slice is nil.
//
// This function takes a BioSequenceBatch as a parameter and returns a boolean value indicating whether the slice of the BioSequenceBatch is nil or not.
//
// Parameters:
// - batch: The BioSequenceBatch to check for nil slice.
//
// Returns:
// - bool: True if the BioSequenceBatch's slice is nil, false otherwise.
func (batch BioSequenceBatch) IsNil() bool {
return batch.slice == nil
}
// Recycle cleans up the BioSequenceBatch by recycling its elements and resetting its slice.
//
// If including_seq is true, each element of the BioSequenceBatch's slice is recycled using the Recycle method,
// and then set to nil. If including_seq is false, each element is simply set to nil.
//
// This function does not return anything.
func (batch BioSequenceBatch) Recycle(including_seq bool) {
batch.slice.Recycle(including_seq)
batch.slice = nil

View File

@ -424,9 +424,11 @@ func (iterator IBioSequence) Rebatch(size int) IBioSequence {
order := 0
iterator = iterator.SortBatches()
buffer := obiseq.MakeBioSequenceSlice()
source := ""
for iterator.Next() {
seqs := iterator.Get()
source = seqs.Source()
lc := seqs.Len()
remains := lc
i := 0
@ -436,7 +438,7 @@ func (iterator IBioSequence) Rebatch(size int) IBioSequence {
remains = lc - to_push - i
buffer = append(buffer, seqs.Slice()[i:(i+to_push)]...)
if len(buffer) == size {
newIter.Push(MakeBioSequenceBatch(order, buffer))
newIter.Push(MakeBioSequenceBatch(source, order, buffer))
log.Debugf("Rebatch #%d pushd", order)
order++
buffer = obiseq.MakeBioSequenceSlice()
@ -447,7 +449,7 @@ func (iterator IBioSequence) Rebatch(size int) IBioSequence {
}
log.Debug("End of the rebatch loop")
if len(buffer) > 0 {
newIter.Push(MakeBioSequenceBatch(order, buffer))
newIter.Push(MakeBioSequenceBatch(source, order, buffer))
log.Debugf("Final Rebatch #%d pushd", order)
}
@ -526,12 +528,14 @@ func (iterator IBioSequence) DivideOn(predicate obiseq.SequencePredicate,
trueOrder := 0
falseOrder := 0
iterator = iterator.SortBatches()
source := ""
trueSlice := obiseq.MakeBioSequenceSlice()
falseSlice := obiseq.MakeBioSequenceSlice()
for iterator.Next() {
seqs := iterator.Get()
source = seqs.Source()
for _, s := range seqs.slice {
if predicate(s) {
trueSlice = append(trueSlice, s)
@ -540,13 +544,13 @@ func (iterator IBioSequence) DivideOn(predicate obiseq.SequencePredicate,
}
if len(trueSlice) == size {
trueIter.Push(MakeBioSequenceBatch(trueOrder, trueSlice))
trueIter.Push(MakeBioSequenceBatch(source, trueOrder, trueSlice))
trueOrder++
trueSlice = obiseq.MakeBioSequenceSlice()
}
if len(falseSlice) == size {
falseIter.Push(MakeBioSequenceBatch(falseOrder, falseSlice))
falseIter.Push(MakeBioSequenceBatch(source, falseOrder, falseSlice))
falseOrder++
falseSlice = obiseq.MakeBioSequenceSlice()
}
@ -555,11 +559,11 @@ func (iterator IBioSequence) DivideOn(predicate obiseq.SequencePredicate,
}
if len(trueSlice) > 0 {
trueIter.Push(MakeBioSequenceBatch(trueOrder, trueSlice))
trueIter.Push(MakeBioSequenceBatch(source, trueOrder, trueSlice))
}
if len(falseSlice) > 0 {
falseIter.Push(MakeBioSequenceBatch(falseOrder, falseSlice))
falseIter.Push(MakeBioSequenceBatch(source, falseOrder, falseSlice))
}
trueIter.Done()
@ -686,17 +690,22 @@ func (iterator IBioSequence) FilterAnd(predicate obiseq.SequencePredicate,
// Load all sequences availables from an IBioSequenceBatch iterator into
// a large obiseq.BioSequenceSlice.
func (iterator IBioSequence) Load() obiseq.BioSequenceSlice {
func (iterator IBioSequence) Load() (string, obiseq.BioSequenceSlice) {
chunk := obiseq.MakeBioSequenceSlice()
source := ""
chunck := obiseq.MakeBioSequenceSlice()
for iterator.Next() {
b := iterator.Get()
if source == "" {
source = b.Source()
}
log.Debugf("append %d sequences", b.Len())
chunck = append(chunck, b.Slice()...)
chunk = append(chunk, b.Slice()...)
b.Recycle(false)
}
return chunck
return source, chunk
}
// CompleteFileIterator generates a new iterator for reading a complete file.
@ -718,10 +727,10 @@ func (iterator IBioSequence) CompleteFileIterator() IBioSequence {
}()
go func() {
slice := iterator.Load()
source, slice := iterator.Load()
log.Printf("A batch of %d sequence is read", len(slice))
if len(slice) > 0 {
newIter.Push(MakeBioSequenceBatch(0, slice))
newIter.Push(MakeBioSequenceBatch(source, 0, slice))
}
newIter.Done()
}()
@ -735,7 +744,7 @@ func (iterator IBioSequence) CompleteFileIterator() IBioSequence {
// It takes a slice of BioSequence objects, and returns an iterator that will return batches of
// BioSequence objects
func IBatchOver(data obiseq.BioSequenceSlice,
func IBatchOver(source string, data obiseq.BioSequenceSlice,
size int, sizes ...int) IBioSequence {
newIter := MakeIBioSequence()
@ -755,7 +764,7 @@ func IBatchOver(data obiseq.BioSequenceSlice,
if next > ldata {
next = ldata
}
newIter.Push(MakeBioSequenceBatch(batchid, data[i:next]))
newIter.Push(MakeBioSequenceBatch(source, batchid, data[i:next]))
batchid++
}

View File

@ -61,9 +61,12 @@ func (iterator IBioSequence) Distribute(class *obiseq.BioSequenceClassifier, siz
go func() {
iterator = iterator.SortBatches()
source := ""
for iterator.Next() {
seqs := iterator.Get()
source = seqs.Source()
for _, s := range seqs.Slice() {
key := class.Code(s)
slice, ok := slices[key]
@ -84,7 +87,7 @@ func (iterator IBioSequence) Distribute(class *obiseq.BioSequenceClassifier, siz
*slice = append(*slice, s)
if len(*slice) == batchsize {
outputs[key].Push(MakeBioSequenceBatch(orders[key], *slice))
outputs[key].Push(MakeBioSequenceBatch(source, orders[key], *slice))
orders[key]++
s := obiseq.MakeBioSequenceSlice()
slices[key] = &s
@ -95,7 +98,7 @@ func (iterator IBioSequence) Distribute(class *obiseq.BioSequenceClassifier, siz
for key, slice := range slices {
if len(*slice) > 0 {
outputs[key].Push(MakeBioSequenceBatch(orders[key], *slice))
outputs[key].Push(MakeBioSequenceBatch(source, orders[key], *slice))
}
}

View File

@ -20,9 +20,11 @@ func IFragments(minsize, length, overlap, size, nworkers int) Pipeable {
}()
f := func(iterator IBioSequence, id int) {
source := ""
for iterator.Next() {
news := obiseq.MakeBioSequenceSlice()
sl := iterator.Get()
source = sl.Source()
for _, s := range sl.Slice() {
if s.Len() <= minsize {
@ -52,7 +54,7 @@ func IFragments(minsize, length, overlap, size, nworkers int) Pipeable {
s.Recycle()
}
} // End of the slice loop
newiter.Push(MakeBioSequenceBatch(sl.Order(), news))
newiter.Push(MakeBioSequenceBatch(source, sl.Order(), news))
sl.Recycle(false)
} // End of the iterator loop

View File

@ -9,9 +9,11 @@ func (b BioSequenceBatch) IsPaired() bool {
}
func (b BioSequenceBatch) PairedWith() BioSequenceBatch {
return MakeBioSequenceBatch(b.order,
*b.slice.PairedWith())
return MakeBioSequenceBatch(
b.Source(),
b.order,
*b.slice.PairedWith(),
)
}
func (b *BioSequenceBatch) PairTo(p *BioSequenceBatch) {