mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Some code refactoring, a new version of obiuniq more efficient in memory and a first make file allowing to build obitools
This commit is contained in:
@ -8,6 +8,7 @@ import (
|
||||
"path/filepath"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||
)
|
||||
|
||||
@ -33,12 +34,12 @@ func find(root, ext string) []string {
|
||||
return a
|
||||
}
|
||||
|
||||
func ISequenceChunkOnDisk(iterator obiseq.IBioSequenceBatch,
|
||||
func ISequenceChunkOnDisk(iterator obiiter.IBioSequenceBatch,
|
||||
classifier *obiseq.BioSequenceClassifier,
|
||||
sizes ...int) (obiseq.IBioSequenceBatch, error) {
|
||||
sizes ...int) (obiiter.IBioSequenceBatch, error) {
|
||||
dir, err := tempDir()
|
||||
if err != nil {
|
||||
return obiseq.NilIBioSequenceBatch, err
|
||||
return obiiter.NilIBioSequenceBatch, err
|
||||
}
|
||||
|
||||
bufferSize := iterator.BufferSize()
|
||||
@ -47,7 +48,7 @@ func ISequenceChunkOnDisk(iterator obiseq.IBioSequenceBatch,
|
||||
bufferSize = sizes[0]
|
||||
}
|
||||
|
||||
newIter := obiseq.MakeIBioSequenceBatch(bufferSize)
|
||||
newIter := obiiter.MakeIBioSequenceBatch(bufferSize)
|
||||
|
||||
newIter.Add(1)
|
||||
|
||||
@ -86,7 +87,7 @@ func ISequenceChunkOnDisk(iterator obiseq.IBioSequenceBatch,
|
||||
b.Recycle()
|
||||
}
|
||||
|
||||
newIter.Push(obiseq.MakeBioSequenceBatch(order, chunck))
|
||||
newIter.Push(obiiter.MakeBioSequenceBatch(order, chunck))
|
||||
|
||||
}
|
||||
|
||||
|
@ -4,12 +4,13 @@ import (
|
||||
"log"
|
||||
"sync"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||
)
|
||||
|
||||
func ISequenceChunk(iterator obiseq.IBioSequenceBatch,
|
||||
func ISequenceChunk(iterator obiiter.IBioSequenceBatch,
|
||||
classifier *obiseq.BioSequenceClassifier,
|
||||
sizes ...int) (obiseq.IBioSequenceBatch, error) {
|
||||
sizes ...int) (obiiter.IBioSequenceBatch, error) {
|
||||
|
||||
bufferSize := iterator.BufferSize()
|
||||
|
||||
@ -17,7 +18,7 @@ func ISequenceChunk(iterator obiseq.IBioSequenceBatch,
|
||||
bufferSize = sizes[0]
|
||||
}
|
||||
|
||||
newIter := obiseq.MakeIBioSequenceBatch(bufferSize)
|
||||
newIter := obiiter.MakeIBioSequenceBatch(bufferSize)
|
||||
|
||||
newIter.Add(1)
|
||||
|
||||
@ -64,7 +65,7 @@ func ISequenceChunk(iterator obiseq.IBioSequenceBatch,
|
||||
for _, chunck := range chunks {
|
||||
|
||||
if len(*chunck) > 0 {
|
||||
newIter.Push(obiseq.MakeBioSequenceBatch(order, *chunck))
|
||||
newIter.Push(obiiter.MakeBioSequenceBatch(order, *chunck))
|
||||
order++
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,7 @@ import (
|
||||
"sort"
|
||||
"sync/atomic"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||
)
|
||||
|
||||
@ -54,9 +55,9 @@ func (by _By) Sort(seqs []sSS) {
|
||||
// End of the sort interface
|
||||
//
|
||||
|
||||
func ISequenceSubChunk(iterator obiseq.IBioSequenceBatch,
|
||||
func ISequenceSubChunk(iterator obiiter.IBioSequenceBatch,
|
||||
classifier *obiseq.BioSequenceClassifier,
|
||||
sizes ...int) (obiseq.IBioSequenceBatch, error) {
|
||||
sizes ...int) (obiiter.IBioSequenceBatch, error) {
|
||||
|
||||
bufferSize := iterator.BufferSize()
|
||||
nworkers := 4
|
||||
@ -69,7 +70,7 @@ func ISequenceSubChunk(iterator obiseq.IBioSequenceBatch,
|
||||
bufferSize = sizes[1]
|
||||
}
|
||||
|
||||
newIter := obiseq.MakeIBioSequenceBatch(bufferSize)
|
||||
newIter := obiiter.MakeIBioSequenceBatch(bufferSize)
|
||||
|
||||
newIter.Add(nworkers)
|
||||
|
||||
@ -86,7 +87,7 @@ func ISequenceSubChunk(iterator obiseq.IBioSequenceBatch,
|
||||
return neworder
|
||||
}
|
||||
|
||||
ff := func(iterator obiseq.IBioSequenceBatch,
|
||||
ff := func(iterator obiiter.IBioSequenceBatch,
|
||||
classifier *obiseq.BioSequenceClassifier) {
|
||||
|
||||
ordered := make([]sSS, 100)
|
||||
@ -121,7 +122,7 @@ func ISequenceSubChunk(iterator obiseq.IBioSequenceBatch,
|
||||
ss := obiseq.MakeBioSequenceSlice()
|
||||
for i, v := range ordered {
|
||||
if v.code != last {
|
||||
newIter.Push(obiseq.MakeBioSequenceBatch(nextOrder(), ss))
|
||||
newIter.Push(obiiter.MakeBioSequenceBatch(nextOrder(), ss))
|
||||
ss = obiseq.MakeBioSequenceSlice()
|
||||
last = v.code
|
||||
}
|
||||
@ -131,7 +132,7 @@ func ISequenceSubChunk(iterator obiseq.IBioSequenceBatch,
|
||||
}
|
||||
|
||||
if len(ss) > 0 {
|
||||
newIter.Push(obiseq.MakeBioSequenceBatch(nextOrder(), ss))
|
||||
newIter.Push(obiiter.MakeBioSequenceBatch(nextOrder(), ss))
|
||||
}
|
||||
} else {
|
||||
newIter.Push(batch.Reorder(nextOrder()))
|
||||
|
@ -3,26 +3,27 @@ package obichunk
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||
)
|
||||
|
||||
func IUniqueSequence(iterator obiseq.IBioSequenceBatch,
|
||||
options ...WithOption) (obiseq.IBioSequenceBatch, error) {
|
||||
func IUniqueSequence(iterator obiiter.IBioSequenceBatch,
|
||||
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
|
||||
|
||||
var err error
|
||||
opts := MakeOptions(options)
|
||||
nworkers := opts.ParallelWorkers()
|
||||
|
||||
iUnique := obiseq.MakeIBioSequenceBatch(opts.BufferSize())
|
||||
iUnique := obiiter.MakeIBioSequenceBatch(opts.BufferSize())
|
||||
|
||||
if opts.SortOnDisk() {
|
||||
nworkers = 1
|
||||
iterator, err = ISequenceChunkOnDisk(iterator,
|
||||
obiseq.HashClassifier(opts.BatchCount()),
|
||||
opts.BufferSize())
|
||||
0)
|
||||
|
||||
if err != nil {
|
||||
return obiseq.NilIBioSequenceBatch, err
|
||||
return obiiter.NilIBioSequenceBatch, err
|
||||
}
|
||||
|
||||
} else {
|
||||
@ -31,7 +32,7 @@ func IUniqueSequence(iterator obiseq.IBioSequenceBatch,
|
||||
opts.BufferSize())
|
||||
|
||||
if err != nil {
|
||||
return obiseq.NilIBioSequenceBatch, err
|
||||
return obiiter.NilIBioSequenceBatch, err
|
||||
}
|
||||
}
|
||||
|
||||
@ -53,12 +54,12 @@ func IUniqueSequence(iterator obiseq.IBioSequenceBatch,
|
||||
return neworder
|
||||
}
|
||||
|
||||
var ff func(obiseq.IBioSequenceBatch, *obiseq.BioSequenceClassifier, int)
|
||||
var ff func(obiiter.IBioSequenceBatch, *obiseq.BioSequenceClassifier, int)
|
||||
|
||||
cat := opts.Categories()
|
||||
na := opts.NAValue()
|
||||
|
||||
ff = func(input obiseq.IBioSequenceBatch,
|
||||
ff = func(input obiiter.IBioSequenceBatch,
|
||||
classifier *obiseq.BioSequenceClassifier,
|
||||
icat int) {
|
||||
icat--
|
||||
@ -67,9 +68,9 @@ func IUniqueSequence(iterator obiseq.IBioSequenceBatch,
|
||||
1,
|
||||
opts.BufferSize())
|
||||
|
||||
var next obiseq.IBioSequenceBatch
|
||||
var next obiiter.IBioSequenceBatch
|
||||
if icat >= 0 {
|
||||
next = obiseq.MakeIBioSequenceBatch(opts.BufferSize())
|
||||
next = obiiter.MakeIBioSequenceBatch(opts.BufferSize())
|
||||
|
||||
iUnique.Add(1)
|
||||
go ff(next,
|
||||
|
Reference in New Issue
Block a user