Some code refactoring, a new version of obiuniq more efficient in memory and a first make file allowing to build obitools

This commit is contained in:
2022-02-24 07:08:40 +01:00
parent 2e7c1834b0
commit eaf65fbcce
39 changed files with 1225 additions and 241 deletions

View File

@ -8,6 +8,7 @@ import (
"path/filepath"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
)
@ -33,12 +34,12 @@ func find(root, ext string) []string {
return a
}
func ISequenceChunkOnDisk(iterator obiseq.IBioSequenceBatch,
func ISequenceChunkOnDisk(iterator obiiter.IBioSequenceBatch,
classifier *obiseq.BioSequenceClassifier,
sizes ...int) (obiseq.IBioSequenceBatch, error) {
sizes ...int) (obiiter.IBioSequenceBatch, error) {
dir, err := tempDir()
if err != nil {
return obiseq.NilIBioSequenceBatch, err
return obiiter.NilIBioSequenceBatch, err
}
bufferSize := iterator.BufferSize()
@ -47,7 +48,7 @@ func ISequenceChunkOnDisk(iterator obiseq.IBioSequenceBatch,
bufferSize = sizes[0]
}
newIter := obiseq.MakeIBioSequenceBatch(bufferSize)
newIter := obiiter.MakeIBioSequenceBatch(bufferSize)
newIter.Add(1)
@ -86,7 +87,7 @@ func ISequenceChunkOnDisk(iterator obiseq.IBioSequenceBatch,
b.Recycle()
}
newIter.Push(obiseq.MakeBioSequenceBatch(order, chunck))
newIter.Push(obiiter.MakeBioSequenceBatch(order, chunck))
}

View File

@ -4,12 +4,13 @@ import (
"log"
"sync"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
)
func ISequenceChunk(iterator obiseq.IBioSequenceBatch,
func ISequenceChunk(iterator obiiter.IBioSequenceBatch,
classifier *obiseq.BioSequenceClassifier,
sizes ...int) (obiseq.IBioSequenceBatch, error) {
sizes ...int) (obiiter.IBioSequenceBatch, error) {
bufferSize := iterator.BufferSize()
@ -17,7 +18,7 @@ func ISequenceChunk(iterator obiseq.IBioSequenceBatch,
bufferSize = sizes[0]
}
newIter := obiseq.MakeIBioSequenceBatch(bufferSize)
newIter := obiiter.MakeIBioSequenceBatch(bufferSize)
newIter.Add(1)
@ -64,7 +65,7 @@ func ISequenceChunk(iterator obiseq.IBioSequenceBatch,
for _, chunck := range chunks {
if len(*chunck) > 0 {
newIter.Push(obiseq.MakeBioSequenceBatch(order, *chunck))
newIter.Push(obiiter.MakeBioSequenceBatch(order, *chunck))
order++
}

View File

@ -5,6 +5,7 @@ import (
"sort"
"sync/atomic"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
)
@ -54,9 +55,9 @@ func (by _By) Sort(seqs []sSS) {
// End of the sort interface
//
func ISequenceSubChunk(iterator obiseq.IBioSequenceBatch,
func ISequenceSubChunk(iterator obiiter.IBioSequenceBatch,
classifier *obiseq.BioSequenceClassifier,
sizes ...int) (obiseq.IBioSequenceBatch, error) {
sizes ...int) (obiiter.IBioSequenceBatch, error) {
bufferSize := iterator.BufferSize()
nworkers := 4
@ -69,7 +70,7 @@ func ISequenceSubChunk(iterator obiseq.IBioSequenceBatch,
bufferSize = sizes[1]
}
newIter := obiseq.MakeIBioSequenceBatch(bufferSize)
newIter := obiiter.MakeIBioSequenceBatch(bufferSize)
newIter.Add(nworkers)
@ -86,7 +87,7 @@ func ISequenceSubChunk(iterator obiseq.IBioSequenceBatch,
return neworder
}
ff := func(iterator obiseq.IBioSequenceBatch,
ff := func(iterator obiiter.IBioSequenceBatch,
classifier *obiseq.BioSequenceClassifier) {
ordered := make([]sSS, 100)
@ -121,7 +122,7 @@ func ISequenceSubChunk(iterator obiseq.IBioSequenceBatch,
ss := obiseq.MakeBioSequenceSlice()
for i, v := range ordered {
if v.code != last {
newIter.Push(obiseq.MakeBioSequenceBatch(nextOrder(), ss))
newIter.Push(obiiter.MakeBioSequenceBatch(nextOrder(), ss))
ss = obiseq.MakeBioSequenceSlice()
last = v.code
}
@ -131,7 +132,7 @@ func ISequenceSubChunk(iterator obiseq.IBioSequenceBatch,
}
if len(ss) > 0 {
newIter.Push(obiseq.MakeBioSequenceBatch(nextOrder(), ss))
newIter.Push(obiiter.MakeBioSequenceBatch(nextOrder(), ss))
}
} else {
newIter.Push(batch.Reorder(nextOrder()))

View File

@ -3,26 +3,27 @@ package obichunk
import (
"sync"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
)
func IUniqueSequence(iterator obiseq.IBioSequenceBatch,
options ...WithOption) (obiseq.IBioSequenceBatch, error) {
func IUniqueSequence(iterator obiiter.IBioSequenceBatch,
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
var err error
opts := MakeOptions(options)
nworkers := opts.ParallelWorkers()
iUnique := obiseq.MakeIBioSequenceBatch(opts.BufferSize())
iUnique := obiiter.MakeIBioSequenceBatch(opts.BufferSize())
if opts.SortOnDisk() {
nworkers = 1
iterator, err = ISequenceChunkOnDisk(iterator,
obiseq.HashClassifier(opts.BatchCount()),
opts.BufferSize())
0)
if err != nil {
return obiseq.NilIBioSequenceBatch, err
return obiiter.NilIBioSequenceBatch, err
}
} else {
@ -31,7 +32,7 @@ func IUniqueSequence(iterator obiseq.IBioSequenceBatch,
opts.BufferSize())
if err != nil {
return obiseq.NilIBioSequenceBatch, err
return obiiter.NilIBioSequenceBatch, err
}
}
@ -53,12 +54,12 @@ func IUniqueSequence(iterator obiseq.IBioSequenceBatch,
return neworder
}
var ff func(obiseq.IBioSequenceBatch, *obiseq.BioSequenceClassifier, int)
var ff func(obiiter.IBioSequenceBatch, *obiseq.BioSequenceClassifier, int)
cat := opts.Categories()
na := opts.NAValue()
ff = func(input obiseq.IBioSequenceBatch,
ff = func(input obiiter.IBioSequenceBatch,
classifier *obiseq.BioSequenceClassifier,
icat int) {
icat--
@ -67,9 +68,9 @@ func IUniqueSequence(iterator obiseq.IBioSequenceBatch,
1,
opts.BufferSize())
var next obiseq.IBioSequenceBatch
var next obiiter.IBioSequenceBatch
if icat >= 0 {
next = obiseq.MakeIBioSequenceBatch(opts.BufferSize())
next = obiiter.MakeIBioSequenceBatch(opts.BufferSize())
iUnique.Add(1)
go ff(next,