Refactoring codes for removing buffer size options. An some other changes...

Former-commit-id: 10b57cc1a27446ade3c444217341e9651e89cdce
This commit is contained in:
2023-03-07 11:12:13 +07:00
parent 9811e440b8
commit d88de15cdc
52 changed files with 1172 additions and 421 deletions

View File

@ -36,20 +36,14 @@ func find(root, ext string) []string {
}
func ISequenceChunkOnDisk(iterator obiiter.IBioSequence,
classifier *obiseq.BioSequenceClassifier,
sizes ...int) (obiiter.IBioSequence, error) {
classifier *obiseq.BioSequenceClassifier) (obiiter.IBioSequence, error) {
dir, err := tempDir()
if err != nil {
return obiiter.NilIBioSequence, err
}
bufferSize := iterator.BufferSize()
if len(sizes) > 0 {
bufferSize = sizes[0]
}
newIter := obiiter.MakeIBioSequence(bufferSize)
newIter := obiiter.MakeIBioSequence()
newIter.Add(1)

View File

@ -10,16 +10,9 @@ import (
)
func ISequenceChunk(iterator obiiter.IBioSequence,
classifier *obiseq.BioSequenceClassifier,
sizes ...int) (obiiter.IBioSequence, error) {
classifier *obiseq.BioSequenceClassifier) (obiiter.IBioSequence, error) {
bufferSize := iterator.BufferSize()
if len(sizes) > 0 {
bufferSize = sizes[0]
}
newIter := obiiter.MakeIBioSequence(bufferSize)
newIter := obiiter.MakeIBioSequence()
newIter.Add(1)

View File

@ -6,7 +6,6 @@ type __options__ struct {
navalue string
cacheOnDisk bool
batchCount int
bufferSize int
batchSize int
parallelWorkers int
noSingleton bool
@ -25,7 +24,6 @@ func MakeOptions(setters []WithOption) Options {
navalue: "NA",
cacheOnDisk: false,
batchCount: 100,
bufferSize: 2,
batchSize: 5000,
parallelWorkers: 4,
noSingleton: false,
@ -65,10 +63,6 @@ func (opt Options) BatchCount() int {
return opt.pointer.batchCount
}
func (opt Options) BufferSize() int {
return opt.pointer.bufferSize
}
func (opt Options) BatchSize() int {
return opt.pointer.batchSize
}
@ -148,14 +142,6 @@ func OptionsBatchSize(size int) WithOption {
return f
}
func OptionsBufferSize(size int) WithOption {
f := WithOption(func(opt Options) {
opt.pointer.bufferSize = size
})
return f
}
func OptionsNoSingleton() WithOption {
f := WithOption(func(opt Options) {
opt.pointer.noSingleton = true

View File

@ -58,20 +58,13 @@ func (by _By) Sort(seqs []sSS) {
func ISequenceSubChunk(iterator obiiter.IBioSequence,
classifier *obiseq.BioSequenceClassifier,
sizes ...int) (obiiter.IBioSequence, error) {
nworkers int) (obiiter.IBioSequence, error) {
bufferSize := iterator.BufferSize()
nworkers := 4
if len(sizes) > 0 {
nworkers = sizes[0]
if nworkers <=0 {
nworkers = 4
}
if len(sizes) > 1 {
bufferSize = sizes[1]
}
newIter := obiiter.MakeIBioSequence(bufferSize)
newIter := obiiter.MakeIBioSequence()
newIter.Add(nworkers)

View File

@ -19,7 +19,7 @@ func IUniqueSequence(iterator obiiter.IBioSequence,
opts := MakeOptions(options)
nworkers := opts.ParallelWorkers()
iUnique := obiiter.MakeIBioSequence(opts.BufferSize())
iUnique := obiiter.MakeIBioSequence()
iterator = iterator.Speed("Splitting data set")
@ -28,8 +28,7 @@ func IUniqueSequence(iterator obiiter.IBioSequence,
if opts.SortOnDisk() {
nworkers = 1
iterator, err = ISequenceChunkOnDisk(iterator,
obiseq.HashClassifier(opts.BatchCount()),
0)
obiseq.HashClassifier(opts.BatchCount()))
if err != nil {
return obiiter.NilIBioSequence, err
@ -37,8 +36,7 @@ func IUniqueSequence(iterator obiiter.IBioSequence,
} else {
iterator, err = ISequenceChunk(iterator,
obiseq.HashClassifier(opts.BatchCount()),
opts.BufferSize())
obiseq.HashClassifier(opts.BatchCount()))
if err != nil {
return obiiter.NilIBioSequence, err
@ -78,12 +76,11 @@ func IUniqueSequence(iterator obiiter.IBioSequence,
icat--
input, err = ISequenceSubChunk(input,
classifier,
1,
opts.BufferSize())
1)
var next obiiter.IBioSequence
if icat >= 0 {
next = obiiter.MakeIBioSequence(opts.BufferSize())
next = obiiter.MakeIBioSequence()
iUnique.Add(1)
@ -130,7 +127,6 @@ func IUniqueSequence(iterator obiiter.IBioSequence,
iMerged := iUnique.IMergeSequenceBatch(opts.NAValue(),
opts.StatsOn(),
opts.BufferSize(),
)
return iMerged, nil