mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Add some code refactoring from the blackboard branch
This commit is contained in:
@ -3,50 +3,118 @@ package obiiter
|
||||
import "git.metabarcoding.org/obitools/obitools4/obitools4/pkg/obiseq"
|
||||
|
||||
type BioSequenceBatch struct {
|
||||
slice obiseq.BioSequenceSlice
|
||||
order int
|
||||
source string
|
||||
slice obiseq.BioSequenceSlice
|
||||
order int
|
||||
}
|
||||
|
||||
var NilBioSequenceBatch = BioSequenceBatch{nil, -1}
|
||||
var NilBioSequenceBatch = BioSequenceBatch{"", nil, -1}
|
||||
|
||||
func MakeBioSequenceBatch(order int,
|
||||
// MakeBioSequenceBatch creates a new BioSequenceBatch with the given source, order, and sequences.
|
||||
//
|
||||
// Parameters:
|
||||
// - source: The source of the BioSequenceBatch.
|
||||
// - order: The order of the BioSequenceBatch.
|
||||
// - sequences: The slice of BioSequence.
|
||||
//
|
||||
// Returns:
|
||||
// - BioSequenceBatch: The newly created BioSequenceBatch.
|
||||
func MakeBioSequenceBatch(
|
||||
source string,
|
||||
order int,
|
||||
sequences obiseq.BioSequenceSlice) BioSequenceBatch {
|
||||
|
||||
return BioSequenceBatch{
|
||||
slice: sequences,
|
||||
order: order,
|
||||
source: source,
|
||||
slice: sequences,
|
||||
order: order,
|
||||
}
|
||||
}
|
||||
|
||||
// Order returns the order of the BioSequenceBatch.
|
||||
//
|
||||
// Returns:
|
||||
// - int: The order of the BioSequenceBatch.
|
||||
func (batch BioSequenceBatch) Order() int {
|
||||
return batch.order
|
||||
}
|
||||
|
||||
// Source returns the source of the BioSequenceBatch.
|
||||
//
|
||||
// Returns:
|
||||
// - string: The source of the BioSequenceBatch.
|
||||
func (batch BioSequenceBatch) Source() string {
|
||||
return batch.source
|
||||
}
|
||||
|
||||
// Reorder updates the order of the BioSequenceBatch and returns the updated batch.
|
||||
//
|
||||
// Parameters:
|
||||
// - newOrder: The new order value to assign to the BioSequenceBatch.
|
||||
//
|
||||
// Returns:
|
||||
// - BioSequenceBatch: The updated BioSequenceBatch with the new order value.
|
||||
func (batch BioSequenceBatch) Reorder(newOrder int) BioSequenceBatch {
|
||||
batch.order = newOrder
|
||||
return batch
|
||||
}
|
||||
|
||||
// Slice returns the BioSequenceSlice contained within the BioSequenceBatch.
|
||||
//
|
||||
// Returns:
|
||||
// - obiseq.BioSequenceSlice: The BioSequenceSlice contained within the BioSequenceBatch.
|
||||
func (batch BioSequenceBatch) Slice() obiseq.BioSequenceSlice {
|
||||
return batch.slice
|
||||
}
|
||||
|
||||
// Len returns the number of BioSequence elements in the given BioSequenceBatch.
|
||||
//
|
||||
// Parameters:
|
||||
// - batch: The BioSequenceBatch to get the length from.
|
||||
//
|
||||
// Return type:
|
||||
// - int: The number of BioSequence elements in the BioSequenceBatch.
|
||||
func (batch BioSequenceBatch) Len() int {
|
||||
return len(batch.slice)
|
||||
}
|
||||
|
||||
// NotEmpty returns whether the BioSequenceBatch is empty or not.
|
||||
//
|
||||
// It checks if the BioSequenceSlice contained within the BioSequenceBatch is not empty.
|
||||
//
|
||||
// Returns:
|
||||
// - bool: True if the BioSequenceBatch is not empty, false otherwise.
|
||||
func (batch BioSequenceBatch) NotEmpty() bool {
|
||||
return batch.slice.NotEmpty()
|
||||
}
|
||||
|
||||
// Pop0 returns and removes the first element of the BioSequenceBatch.
|
||||
//
|
||||
// It does not take any parameters.
|
||||
// It returns a pointer to a BioSequence object.
|
||||
func (batch BioSequenceBatch) Pop0() *obiseq.BioSequence {
|
||||
return batch.slice.Pop0()
|
||||
}
|
||||
|
||||
// IsNil checks if the BioSequenceBatch's slice is nil.
|
||||
//
|
||||
// This function takes a BioSequenceBatch as a parameter and returns a boolean value indicating whether the slice of the BioSequenceBatch is nil or not.
|
||||
//
|
||||
// Parameters:
|
||||
// - batch: The BioSequenceBatch to check for nil slice.
|
||||
//
|
||||
// Returns:
|
||||
// - bool: True if the BioSequenceBatch's slice is nil, false otherwise.
|
||||
func (batch BioSequenceBatch) IsNil() bool {
|
||||
return batch.slice == nil
|
||||
}
|
||||
|
||||
// Recycle cleans up the BioSequenceBatch by recycling its elements and resetting its slice.
|
||||
//
|
||||
// If including_seq is true, each element of the BioSequenceBatch's slice is recycled using the Recycle method,
|
||||
// and then set to nil. If including_seq is false, each element is simply set to nil.
|
||||
//
|
||||
// This function does not return anything.
|
||||
func (batch BioSequenceBatch) Recycle(including_seq bool) {
|
||||
batch.slice.Recycle(including_seq)
|
||||
batch.slice = nil
|
||||
|
@ -424,9 +424,11 @@ func (iterator IBioSequence) Rebatch(size int) IBioSequence {
|
||||
order := 0
|
||||
iterator = iterator.SortBatches()
|
||||
buffer := obiseq.MakeBioSequenceSlice()
|
||||
source := ""
|
||||
|
||||
for iterator.Next() {
|
||||
seqs := iterator.Get()
|
||||
source = seqs.Source()
|
||||
lc := seqs.Len()
|
||||
remains := lc
|
||||
i := 0
|
||||
@ -436,7 +438,7 @@ func (iterator IBioSequence) Rebatch(size int) IBioSequence {
|
||||
remains = lc - to_push - i
|
||||
buffer = append(buffer, seqs.Slice()[i:(i+to_push)]...)
|
||||
if len(buffer) == size {
|
||||
newIter.Push(MakeBioSequenceBatch(order, buffer))
|
||||
newIter.Push(MakeBioSequenceBatch(source, order, buffer))
|
||||
log.Debugf("Rebatch #%d pushd", order)
|
||||
order++
|
||||
buffer = obiseq.MakeBioSequenceSlice()
|
||||
@ -447,7 +449,7 @@ func (iterator IBioSequence) Rebatch(size int) IBioSequence {
|
||||
}
|
||||
log.Debug("End of the rebatch loop")
|
||||
if len(buffer) > 0 {
|
||||
newIter.Push(MakeBioSequenceBatch(order, buffer))
|
||||
newIter.Push(MakeBioSequenceBatch(source, order, buffer))
|
||||
log.Debugf("Final Rebatch #%d pushd", order)
|
||||
}
|
||||
|
||||
@ -526,12 +528,14 @@ func (iterator IBioSequence) DivideOn(predicate obiseq.SequencePredicate,
|
||||
trueOrder := 0
|
||||
falseOrder := 0
|
||||
iterator = iterator.SortBatches()
|
||||
source := ""
|
||||
|
||||
trueSlice := obiseq.MakeBioSequenceSlice()
|
||||
falseSlice := obiseq.MakeBioSequenceSlice()
|
||||
|
||||
for iterator.Next() {
|
||||
seqs := iterator.Get()
|
||||
source = seqs.Source()
|
||||
for _, s := range seqs.slice {
|
||||
if predicate(s) {
|
||||
trueSlice = append(trueSlice, s)
|
||||
@ -540,13 +544,13 @@ func (iterator IBioSequence) DivideOn(predicate obiseq.SequencePredicate,
|
||||
}
|
||||
|
||||
if len(trueSlice) == size {
|
||||
trueIter.Push(MakeBioSequenceBatch(trueOrder, trueSlice))
|
||||
trueIter.Push(MakeBioSequenceBatch(source, trueOrder, trueSlice))
|
||||
trueOrder++
|
||||
trueSlice = obiseq.MakeBioSequenceSlice()
|
||||
}
|
||||
|
||||
if len(falseSlice) == size {
|
||||
falseIter.Push(MakeBioSequenceBatch(falseOrder, falseSlice))
|
||||
falseIter.Push(MakeBioSequenceBatch(source, falseOrder, falseSlice))
|
||||
falseOrder++
|
||||
falseSlice = obiseq.MakeBioSequenceSlice()
|
||||
}
|
||||
@ -555,11 +559,11 @@ func (iterator IBioSequence) DivideOn(predicate obiseq.SequencePredicate,
|
||||
}
|
||||
|
||||
if len(trueSlice) > 0 {
|
||||
trueIter.Push(MakeBioSequenceBatch(trueOrder, trueSlice))
|
||||
trueIter.Push(MakeBioSequenceBatch(source, trueOrder, trueSlice))
|
||||
}
|
||||
|
||||
if len(falseSlice) > 0 {
|
||||
falseIter.Push(MakeBioSequenceBatch(falseOrder, falseSlice))
|
||||
falseIter.Push(MakeBioSequenceBatch(source, falseOrder, falseSlice))
|
||||
}
|
||||
|
||||
trueIter.Done()
|
||||
@ -686,17 +690,22 @@ func (iterator IBioSequence) FilterAnd(predicate obiseq.SequencePredicate,
|
||||
|
||||
// Load all sequences availables from an IBioSequenceBatch iterator into
|
||||
// a large obiseq.BioSequenceSlice.
|
||||
func (iterator IBioSequence) Load() obiseq.BioSequenceSlice {
|
||||
func (iterator IBioSequence) Load() (string, obiseq.BioSequenceSlice) {
|
||||
|
||||
chunk := obiseq.MakeBioSequenceSlice()
|
||||
source := ""
|
||||
|
||||
chunck := obiseq.MakeBioSequenceSlice()
|
||||
for iterator.Next() {
|
||||
b := iterator.Get()
|
||||
if source == "" {
|
||||
source = b.Source()
|
||||
}
|
||||
log.Debugf("append %d sequences", b.Len())
|
||||
chunck = append(chunck, b.Slice()...)
|
||||
chunk = append(chunk, b.Slice()...)
|
||||
b.Recycle(false)
|
||||
}
|
||||
|
||||
return chunck
|
||||
return source, chunk
|
||||
}
|
||||
|
||||
// CompleteFileIterator generates a new iterator for reading a complete file.
|
||||
@ -718,10 +727,10 @@ func (iterator IBioSequence) CompleteFileIterator() IBioSequence {
|
||||
}()
|
||||
|
||||
go func() {
|
||||
slice := iterator.Load()
|
||||
source, slice := iterator.Load()
|
||||
log.Printf("A batch of %d sequence is read", len(slice))
|
||||
if len(slice) > 0 {
|
||||
newIter.Push(MakeBioSequenceBatch(0, slice))
|
||||
newIter.Push(MakeBioSequenceBatch(source, 0, slice))
|
||||
}
|
||||
newIter.Done()
|
||||
}()
|
||||
@ -735,7 +744,7 @@ func (iterator IBioSequence) CompleteFileIterator() IBioSequence {
|
||||
|
||||
// It takes a slice of BioSequence objects, and returns an iterator that will return batches of
|
||||
// BioSequence objects
|
||||
func IBatchOver(data obiseq.BioSequenceSlice,
|
||||
func IBatchOver(source string, data obiseq.BioSequenceSlice,
|
||||
size int, sizes ...int) IBioSequence {
|
||||
|
||||
newIter := MakeIBioSequence()
|
||||
@ -755,7 +764,7 @@ func IBatchOver(data obiseq.BioSequenceSlice,
|
||||
if next > ldata {
|
||||
next = ldata
|
||||
}
|
||||
newIter.Push(MakeBioSequenceBatch(batchid, data[i:next]))
|
||||
newIter.Push(MakeBioSequenceBatch(source, batchid, data[i:next]))
|
||||
batchid++
|
||||
}
|
||||
|
||||
|
@ -61,9 +61,12 @@ func (iterator IBioSequence) Distribute(class *obiseq.BioSequenceClassifier, siz
|
||||
|
||||
go func() {
|
||||
iterator = iterator.SortBatches()
|
||||
source := ""
|
||||
|
||||
for iterator.Next() {
|
||||
seqs := iterator.Get()
|
||||
source = seqs.Source()
|
||||
|
||||
for _, s := range seqs.Slice() {
|
||||
key := class.Code(s)
|
||||
slice, ok := slices[key]
|
||||
@ -84,7 +87,7 @@ func (iterator IBioSequence) Distribute(class *obiseq.BioSequenceClassifier, siz
|
||||
*slice = append(*slice, s)
|
||||
|
||||
if len(*slice) == batchsize {
|
||||
outputs[key].Push(MakeBioSequenceBatch(orders[key], *slice))
|
||||
outputs[key].Push(MakeBioSequenceBatch(source, orders[key], *slice))
|
||||
orders[key]++
|
||||
s := obiseq.MakeBioSequenceSlice()
|
||||
slices[key] = &s
|
||||
@ -95,7 +98,7 @@ func (iterator IBioSequence) Distribute(class *obiseq.BioSequenceClassifier, siz
|
||||
|
||||
for key, slice := range slices {
|
||||
if len(*slice) > 0 {
|
||||
outputs[key].Push(MakeBioSequenceBatch(orders[key], *slice))
|
||||
outputs[key].Push(MakeBioSequenceBatch(source, orders[key], *slice))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -20,9 +20,11 @@ func IFragments(minsize, length, overlap, size, nworkers int) Pipeable {
|
||||
}()
|
||||
|
||||
f := func(iterator IBioSequence, id int) {
|
||||
source := ""
|
||||
for iterator.Next() {
|
||||
news := obiseq.MakeBioSequenceSlice()
|
||||
sl := iterator.Get()
|
||||
source = sl.Source()
|
||||
for _, s := range sl.Slice() {
|
||||
|
||||
if s.Len() <= minsize {
|
||||
@ -52,7 +54,7 @@ func IFragments(minsize, length, overlap, size, nworkers int) Pipeable {
|
||||
s.Recycle()
|
||||
}
|
||||
} // End of the slice loop
|
||||
newiter.Push(MakeBioSequenceBatch(sl.Order(), news))
|
||||
newiter.Push(MakeBioSequenceBatch(source, sl.Order(), news))
|
||||
sl.Recycle(false)
|
||||
} // End of the iterator loop
|
||||
|
||||
|
@ -9,9 +9,11 @@ func (b BioSequenceBatch) IsPaired() bool {
|
||||
}
|
||||
|
||||
func (b BioSequenceBatch) PairedWith() BioSequenceBatch {
|
||||
return MakeBioSequenceBatch(b.order,
|
||||
*b.slice.PairedWith())
|
||||
|
||||
return MakeBioSequenceBatch(
|
||||
b.Source(),
|
||||
b.order,
|
||||
*b.slice.PairedWith(),
|
||||
)
|
||||
}
|
||||
|
||||
func (b *BioSequenceBatch) PairTo(p *BioSequenceBatch) {
|
||||
|
Reference in New Issue
Block a user