Refactoring codes for removing buffer size options. An some other changes...

Former-commit-id: 10b57cc1a27446ade3c444217341e9651e89cdce
This commit is contained in:
2023-03-07 11:12:13 +07:00
parent 9811e440b8
commit d88de15cdc
52 changed files with 1172 additions and 421 deletions

View File

@ -13,6 +13,7 @@ import (
"github.com/barkimedes/go-deepcopy"
)
// InterfaceToInt converts a interface{} to an integer value if possible.
// If not a "NotAnInteger" error is returned via the err
// return value and val is set to 0.
@ -302,15 +303,6 @@ func ReadLines(path string) (lines []string, err error) {
return
}
func Contains[T comparable](arr []T, x T) bool {
for _, v := range arr {
if v == x {
return true
}
}
return false
}
func AtomicCounter(initial ...int) func() int {
counterMutex := sync.Mutex{}
counter := 0

24
pkg/goutils/slices.go Normal file
View File

@ -0,0 +1,24 @@
package goutils
func Contains[T comparable](arr []T, x T) bool {
for _, v := range arr {
if v == x {
return true
}
}
return false
}
func LookFor[T comparable](arr []T, x T) int {
for i, v := range arr {
if v == x {
return i
}
}
return -1
}
func RemoveIndex[T comparable](s []T, index int) []T {
return append(s[:index], s[index+1:]...)
}

View File

@ -13,7 +13,6 @@ type _Options struct {
circular bool
forwardError int
reverseError int
bufferSize int
batchSize int
parallelWorkers int
forward ApatPattern
@ -66,12 +65,6 @@ func (options Options) Circular() bool {
return options.pointer.circular
}
// BufferSize returns the size of the channel
// buffer specified by the options
func (options Options) BufferSize() int {
return options.pointer.bufferSize
}
// BatchSize returns the size of the
// sequence batch used by the PCR algorithm
func (options Options) BatchSize() int {
@ -95,7 +88,6 @@ func MakeOptions(setters []WithOption) Options {
circular: false,
parallelWorkers: 4,
batchSize: 100,
bufferSize: 100,
forward: NilApatPattern,
cfwd: NilApatPattern,
reverse: NilApatPattern,
@ -188,16 +180,6 @@ func OptionCircular(circular bool) WithOption {
return f
}
// OptionBufferSize sets the requested channel
// buffer size.
func OptionBufferSize(size int) WithOption {
f := WithOption(func(opt Options) {
opt.pointer.bufferSize = size
})
return f
}
// OptionParallelWorkers sets how many search
// jobs will be run in parallel.
func OptionParallelWorkers(nworkers int) WithOption {

View File

@ -36,20 +36,14 @@ func find(root, ext string) []string {
}
func ISequenceChunkOnDisk(iterator obiiter.IBioSequence,
classifier *obiseq.BioSequenceClassifier,
sizes ...int) (obiiter.IBioSequence, error) {
classifier *obiseq.BioSequenceClassifier) (obiiter.IBioSequence, error) {
dir, err := tempDir()
if err != nil {
return obiiter.NilIBioSequence, err
}
bufferSize := iterator.BufferSize()
if len(sizes) > 0 {
bufferSize = sizes[0]
}
newIter := obiiter.MakeIBioSequence(bufferSize)
newIter := obiiter.MakeIBioSequence()
newIter.Add(1)

View File

@ -10,16 +10,9 @@ import (
)
func ISequenceChunk(iterator obiiter.IBioSequence,
classifier *obiseq.BioSequenceClassifier,
sizes ...int) (obiiter.IBioSequence, error) {
classifier *obiseq.BioSequenceClassifier) (obiiter.IBioSequence, error) {
bufferSize := iterator.BufferSize()
if len(sizes) > 0 {
bufferSize = sizes[0]
}
newIter := obiiter.MakeIBioSequence(bufferSize)
newIter := obiiter.MakeIBioSequence()
newIter.Add(1)

View File

@ -6,7 +6,6 @@ type __options__ struct {
navalue string
cacheOnDisk bool
batchCount int
bufferSize int
batchSize int
parallelWorkers int
noSingleton bool
@ -25,7 +24,6 @@ func MakeOptions(setters []WithOption) Options {
navalue: "NA",
cacheOnDisk: false,
batchCount: 100,
bufferSize: 2,
batchSize: 5000,
parallelWorkers: 4,
noSingleton: false,
@ -65,10 +63,6 @@ func (opt Options) BatchCount() int {
return opt.pointer.batchCount
}
func (opt Options) BufferSize() int {
return opt.pointer.bufferSize
}
func (opt Options) BatchSize() int {
return opt.pointer.batchSize
}
@ -148,14 +142,6 @@ func OptionsBatchSize(size int) WithOption {
return f
}
func OptionsBufferSize(size int) WithOption {
f := WithOption(func(opt Options) {
opt.pointer.bufferSize = size
})
return f
}
func OptionsNoSingleton() WithOption {
f := WithOption(func(opt Options) {
opt.pointer.noSingleton = true

View File

@ -58,20 +58,13 @@ func (by _By) Sort(seqs []sSS) {
func ISequenceSubChunk(iterator obiiter.IBioSequence,
classifier *obiseq.BioSequenceClassifier,
sizes ...int) (obiiter.IBioSequence, error) {
nworkers int) (obiiter.IBioSequence, error) {
bufferSize := iterator.BufferSize()
nworkers := 4
if len(sizes) > 0 {
nworkers = sizes[0]
if nworkers <=0 {
nworkers = 4
}
if len(sizes) > 1 {
bufferSize = sizes[1]
}
newIter := obiiter.MakeIBioSequence(bufferSize)
newIter := obiiter.MakeIBioSequence()
newIter.Add(nworkers)

View File

@ -19,7 +19,7 @@ func IUniqueSequence(iterator obiiter.IBioSequence,
opts := MakeOptions(options)
nworkers := opts.ParallelWorkers()
iUnique := obiiter.MakeIBioSequence(opts.BufferSize())
iUnique := obiiter.MakeIBioSequence()
iterator = iterator.Speed("Splitting data set")
@ -28,8 +28,7 @@ func IUniqueSequence(iterator obiiter.IBioSequence,
if opts.SortOnDisk() {
nworkers = 1
iterator, err = ISequenceChunkOnDisk(iterator,
obiseq.HashClassifier(opts.BatchCount()),
0)
obiseq.HashClassifier(opts.BatchCount()))
if err != nil {
return obiiter.NilIBioSequence, err
@ -37,8 +36,7 @@ func IUniqueSequence(iterator obiiter.IBioSequence,
} else {
iterator, err = ISequenceChunk(iterator,
obiseq.HashClassifier(opts.BatchCount()),
opts.BufferSize())
obiseq.HashClassifier(opts.BatchCount()))
if err != nil {
return obiiter.NilIBioSequence, err
@ -78,12 +76,11 @@ func IUniqueSequence(iterator obiiter.IBioSequence,
icat--
input, err = ISequenceSubChunk(input,
classifier,
1,
opts.BufferSize())
1)
var next obiiter.IBioSequence
if icat >= 0 {
next = obiiter.MakeIBioSequence(opts.BufferSize())
next = obiiter.MakeIBioSequence()
iUnique.Add(1)
@ -130,7 +127,6 @@ func IUniqueSequence(iterator obiiter.IBioSequence,
iMerged := iUnique.IMergeSequenceBatch(opts.NAValue(),
opts.StatsOn(),
opts.BufferSize(),
)
return iMerged, nil

View File

@ -0,0 +1,248 @@
package obiformats
import (
"bytes"
"encoding/csv"
"fmt"
"io"
"os"
"sync"
"time"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
log "github.com/sirupsen/logrus"
)
func CSVRecord(sequence *obiseq.BioSequence, opt Options) []string {
keys := opt.CSVKeys()
record := make([]string, 0, len(keys)+4)
if opt.CSVId() {
record = append(record, sequence.Id())
}
if opt.CSVCount() {
record = append(record, fmt.Sprint(sequence.Count()))
}
if opt.CSVTaxon() {
taxid := sequence.Taxid()
sn, ok := sequence.GetAttribute("scientific_name")
if !ok {
if taxid == 1 {
sn = "root"
} else {
sn = opt.CSVNAValue()
}
}
record = append(record, fmt.Sprint(taxid), fmt.Sprint(sn))
}
if opt.CSVDefinition() {
record = append(record, sequence.Definition())
}
for _, key := range opt.CSVKeys() {
value, ok := sequence.GetAttribute(key)
if !ok {
value = opt.CSVNAValue()
}
svalue, _ := goutils.InterfaceToString(value)
record = append(record, svalue)
}
if opt.CSVSequence() {
record = append(record, string(sequence.Sequence()))
}
if opt.CSVQuality() {
if sequence.HasQualities() {
l := sequence.Len()
q := sequence.Qualities()
ascii := make([]byte, l)
quality_shift := opt.QualityShift()
for j := 0; j < l; j++ {
ascii[j] = uint8(q[j]) + uint8(quality_shift)
}
record = append(record, string(ascii))
} else {
record = append(record, opt.CSVNAValue())
}
}
return record
}
func CSVHeader(opt Options) []string {
keys := opt.CSVKeys()
record := make([]string, 0, len(keys)+4)
if opt.CSVId() {
record = append(record, "id")
}
if opt.CSVCount() {
record = append(record, "count")
}
if opt.CSVTaxon() {
record = append(record, "taxid", "scientific_name")
}
if opt.CSVDefinition() {
record = append(record, "definition")
}
record = append(record, opt.CSVKeys()...)
if opt.CSVSequence() {
record = append(record, "sequence")
}
if opt.CSVQuality() {
record = append(record, "quality")
}
return record
}
func FormatCVSBatch(batch obiiter.BioSequenceBatch, opt Options) []byte {
buff := new(bytes.Buffer)
csv := csv.NewWriter(buff)
if batch.Order() == 0 {
csv.Write(CSVHeader(opt))
}
for _, s := range batch.Slice() {
csv.Write(CSVRecord(s, opt))
}
csv.Flush()
return buff.Bytes()
}
func WriteCSV(iterator obiiter.IBioSequence,
file io.WriteCloser,
options ...WithOption) (obiiter.IBioSequence, error) {
opt := MakeOptions(options)
file, _ = goutils.CompressStream(file, opt.CompressedFile(), opt.CloseFile())
newIter := obiiter.MakeIBioSequence()
nwriters := opt.ParallelWorkers()
obiiter.RegisterAPipe()
chunkchan := make(chan FileChunck)
newIter.Add(nwriters)
var waitWriter sync.WaitGroup
go func() {
newIter.WaitAndClose()
for len(chunkchan) > 0 {
time.Sleep(time.Millisecond)
}
close(chunkchan)
waitWriter.Wait()
}()
ff := func(iterator obiiter.IBioSequence) {
for iterator.Next() {
batch := iterator.Get()
chunkchan <- FileChunck{
FormatCVSBatch(batch, opt),
batch.Order(),
}
newIter.Push(batch)
}
newIter.Done()
}
log.Debugln("Start of the CSV file writing")
go ff(iterator)
for i := 0; i < nwriters-1; i++ {
go ff(iterator.Split())
}
next_to_send := 0
received := make(map[int]FileChunck, 100)
waitWriter.Add(1)
go func() {
for chunk := range chunkchan {
if chunk.order == next_to_send {
file.Write(chunk.text)
next_to_send++
chunk, ok := received[next_to_send]
for ok {
file.Write(chunk.text)
delete(received, next_to_send)
next_to_send++
chunk, ok = received[next_to_send]
}
} else {
received[chunk.order] = chunk
}
}
file.Close()
log.Debugln("End of the CSV file writing")
obiiter.UnregisterPipe()
waitWriter.Done()
}()
return newIter, nil
}
func WriteCSVToStdout(iterator obiiter.IBioSequence,
options ...WithOption) (obiiter.IBioSequence, error) {
options = append(options, OptionDontCloseFile())
return WriteCSV(iterator, os.Stdout, options...)
}
func WriteCSVToFile(iterator obiiter.IBioSequence,
filename string,
options ...WithOption) (obiiter.IBioSequence, error) {
opt := MakeOptions(options)
flags := os.O_WRONLY | os.O_CREATE
if opt.AppendFile() {
flags |= os.O_APPEND
}
file, err := os.OpenFile(filename, flags, 0660)
if err != nil {
log.Fatalf("open file error: %v", err)
return obiiter.NilIBioSequence, err
}
options = append(options, OptionCloseFile())
iterator, err = WriteCSV(iterator, file, options...)
if opt.HaveToSavePaired() {
var revfile *os.File
revfile, err = os.OpenFile(opt.PairedFileName(), flags, 0660)
if err != nil {
log.Fatalf("open file error: %v", err)
return obiiter.NilIBioSequence, err
}
iterator, err = WriteCSV(iterator.PairedWith(), revfile, options...)
}
return iterator, err
}

View File

@ -166,7 +166,7 @@ func ReadEcoPCR(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
opt := MakeOptions(options)
newIter := obiiter.MakeIBioSequence(opt.BufferSize())
newIter := obiiter.MakeIBioSequence()
newIter.Add(1)
go func() {

View File

@ -244,9 +244,9 @@ func _ReadFlatFileChunk(reader io.Reader, readers chan _FileChunk) {
// <CR>?<LF>//<CR>?<LF>
func ReadEMBL(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
opt := MakeOptions(options)
entry_channel := make(chan _FileChunk, opt.BufferSize())
entry_channel := make(chan _FileChunk)
newIter := obiiter.MakeIBioSequence(opt.BufferSize())
newIter := obiiter.MakeIBioSequence()
nworkers := opt.ParallelWorkers()
newIter.Add(nworkers)

View File

@ -19,6 +19,5 @@ func IParseFastSeqHeaderBatch(iterator obiiter.IBioSequence,
options ...WithOption) obiiter.IBioSequence {
opt := MakeOptions(options)
return iterator.MakeIWorker(obiseq.AnnotatorToSeqWorker(opt.ParseFastSeqHeader()),
opt.ParallelWorkers(),
opt.BufferSize())
opt.ParallelWorkers())
}

View File

@ -105,7 +105,7 @@ func ReadFastSeqFromFile(filename string, options ...WithOption) (obiiter.IBioSe
size = -1
}
newIter := obiiter.MakeIBioSequence(opt.BufferSize())
newIter := obiiter.MakeIBioSequence()
newIter.Add(1)
go func() {
@ -127,7 +127,7 @@ func ReadFastSeqFromFile(filename string, options ...WithOption) (obiiter.IBioSe
func ReadFastSeqFromStdin(options ...WithOption) obiiter.IBioSequence {
opt := MakeOptions(options)
newIter := obiiter.MakeIBioSequence(opt.BufferSize())
newIter := obiiter.MakeIBioSequence()
newIter.Add(1)

View File

@ -71,8 +71,7 @@ func WriteFasta(iterator obiiter.IBioSequence,
file, _ = goutils.CompressStream(file, opt.CompressedFile(), opt.CloseFile())
buffsize := iterator.BufferSize()
newIter := obiiter.MakeIBioSequence(buffsize)
newIter := obiiter.MakeIBioSequence()
nwriters := opt.ParallelWorkers()

View File

@ -60,8 +60,7 @@ func WriteFastq(iterator obiiter.IBioSequence,
file, _ = goutils.CompressStream(file, opt.CompressedFile(), opt.CloseFile())
buffsize := iterator.BufferSize()
newIter := obiiter.MakeIBioSequence(buffsize)
newIter := obiiter.MakeIBioSequence()
nwriters := opt.ParallelWorkers()

View File

@ -113,9 +113,9 @@ func _ParseGenbankFile(input <-chan _FileChunk, out obiiter.IBioSequence) {
func ReadGenbank(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
opt := MakeOptions(options)
entry_channel := make(chan _FileChunk, opt.BufferSize())
entry_channel := make(chan _FileChunk)
newIter := obiiter.MakeIBioSequence(opt.BufferSize())
newIter := obiiter.MakeIBioSequence()
nworkers := opt.ParallelWorkers()
newIter.Add(nworkers)

View File

@ -15,10 +15,15 @@ type __options__ struct {
closefile bool
appendfile bool
compressed bool
csv_ids bool
cvs_sequence bool
csv_id bool
csv_sequence bool
csv_quality bool
csv_definition bool
csv_count bool
csv_taxon bool
csv_keys []string
csv_separator string
csv_navalue string
paired_filename string
}
@ -40,11 +45,16 @@ func MakeOptions(setters []WithOption) Options {
closefile: false,
appendfile: false,
compressed: false,
csv_ids: true,
csv_id: true,
csv_definition: false,
cvs_sequence: true,
csv_count: false,
csv_taxon: false,
csv_sequence: true,
csv_quality: false,
csv_separator: ",",
paired_filename: "",
csv_navalue: "NA",
csv_keys: make([]string, 0),
paired_filename: "",
}
opt := Options{&o}
@ -60,10 +70,6 @@ func (opt Options) QualityShift() int {
return opt.pointer.quality_shift
}
func (opt Options) BufferSize() int {
return opt.pointer.buffer_size
}
func (opt Options) BatchSize() int {
return opt.pointer.batch_size
}
@ -96,8 +102,40 @@ func (opt Options) CompressedFile() bool {
return opt.pointer.compressed
}
func (opt Options) CSVIds() bool {
return opt.pointer.csv_ids
func (opt Options) CSVId() bool {
return opt.pointer.csv_id
}
func (opt Options) CSVDefinition() bool {
return opt.pointer.csv_definition
}
func (opt Options) CSVCount() bool {
return opt.pointer.csv_count
}
func (opt Options) CSVTaxon() bool {
return opt.pointer.csv_taxon
}
func (opt Options) CSVSequence() bool {
return opt.pointer.csv_sequence
}
func (opt Options) CSVQuality() bool {
return opt.pointer.csv_quality
}
func (opt Options) CSVKeys() []string {
return opt.pointer.csv_keys
}
func (opt Options) CSVSeparator() string {
return opt.pointer.csv_separator
}
func (opt Options) CSVNAValue() string {
return opt.pointer.csv_navalue
}
func (opt Options) HaveToSavePaired() bool {
@ -108,14 +146,6 @@ func (opt Options) PairedFileName() string {
return opt.pointer.paired_filename
}
func OptionsBufferSize(size int) WithOption {
f := WithOption(func(opt Options) {
opt.pointer.buffer_size = size
})
return f
}
func OptionCloseFile() WithOption {
f := WithOption(func(opt Options) {
opt.pointer.closefile = true
@ -247,3 +277,82 @@ func WritePairedReadsTo(filename string) WithOption {
return f
}
func CSVId(include bool) WithOption {
f := WithOption(func(opt Options) {
opt.pointer.csv_id = include
})
return f
}
func CSVSequence(include bool) WithOption {
f := WithOption(func(opt Options) {
opt.pointer.csv_sequence = include
})
return f
}
func CSVQuality(include bool) WithOption {
f := WithOption(func(opt Options) {
opt.pointer.csv_quality = include
})
return f
}
func CSVDefinition(include bool) WithOption {
f := WithOption(func(opt Options) {
opt.pointer.csv_definition = include
})
return f
}
func CSVCount(include bool) WithOption {
f := WithOption(func(opt Options) {
opt.pointer.csv_count = include
})
return f
}
func CSVTaxon(include bool) WithOption {
f := WithOption(func(opt Options) {
opt.pointer.csv_taxon = include
})
return f
}
func CSVKey(key string) WithOption {
f := WithOption(func(opt Options) {
opt.pointer.csv_keys = append(opt.pointer.csv_keys, key)
})
return f
}
func CSVKeys(keys []string) WithOption {
f := WithOption(func(opt Options) {
opt.pointer.csv_keys = append(opt.pointer.csv_keys, keys...)
})
return f
}
func CSVSeparator(separator string) WithOption {
f := WithOption(func(opt Options) {
opt.pointer.csv_separator = separator
})
return f
}
func CSVNAValue(navalue string) WithOption {
f := WithOption(func(opt Options) {
opt.pointer.csv_navalue = navalue
})
return f
}

View File

@ -60,17 +60,11 @@ type IBioSequence struct {
var NilIBioSequence = IBioSequence{pointer: nil}
func MakeIBioSequence(sizes ...int) IBioSequence {
buffsize := int32(0)
if len(sizes) > 0 {
buffsize = int32(sizes[0])
}
i := _IBioSequence{
channel: make(chan BioSequenceBatch, buffsize),
channel: make(chan BioSequenceBatch),
current: NilBioSequenceBatch,
pushBack: abool.New(),
buffer_size: buffsize,
batch_size: -1,
sequence_format: "",
finished: abool.New(),
@ -160,14 +154,6 @@ func (iterator IBioSequence) IsNil() bool {
return iterator.pointer == nil
}
func (iterator IBioSequence) BufferSize() int {
if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.BufferSize method on NilIBioSequenceBatch")
}
return int(atomic.LoadInt32(&iterator.pointer.buffer_size))
}
func (iterator IBioSequence) BatchSize() int {
if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.BatchSize method on NilIBioSequenceBatch")
@ -279,13 +265,8 @@ func (iterator IBioSequence) Finished() bool {
// Sorting the batches of sequences.
func (iterator IBioSequence) SortBatches(sizes ...int) IBioSequence {
buffsize := iterator.BufferSize()
if len(sizes) > 0 {
buffsize = sizes[0]
}
newIter := MakeIBioSequence(buffsize)
newIter := MakeIBioSequence()
newIter.Add(1)
@ -338,8 +319,7 @@ func (iterator IBioSequence) Concat(iterators ...IBioSequence) IBioSequence {
allPaired = allPaired && i.IsPaired()
}
buffsize := iterator.BufferSize()
newIter := MakeIBioSequence(buffsize)
newIter := MakeIBioSequence()
newIter.Add(1)
@ -396,8 +376,7 @@ func (iterator IBioSequence) Pool(iterators ...IBioSequence) IBioSequence {
}
nextCounter := goutils.AtomicCounter()
buffsize := iterator.BufferSize()
newIter := MakeIBioSequence(buffsize)
newIter := MakeIBioSequence()
newIter.Add(niterator)
@ -431,13 +410,8 @@ func (iterator IBioSequence) Pool(iterators ...IBioSequence) IBioSequence {
// indicated in parameter. Rebatching implies to sort the
// source IBioSequenceBatch.
func (iterator IBioSequence) Rebatch(size int, sizes ...int) IBioSequence {
buffsize := iterator.BufferSize()
if len(sizes) > 0 {
buffsize = sizes[0]
}
newIter := MakeIBioSequence(buffsize)
newIter := MakeIBioSequence()
newIter.Add(1)
@ -532,14 +506,9 @@ func (iterator IBioSequence) Count(recycle bool) (int, int, int) {
// iterator following the predicate value.
func (iterator IBioSequence) DivideOn(predicate obiseq.SequencePredicate,
size int, sizes ...int) (IBioSequence, IBioSequence) {
buffsize := iterator.BufferSize()
if len(sizes) > 0 {
buffsize = sizes[0]
}
trueIter := MakeIBioSequence(buffsize)
falseIter := MakeIBioSequence(buffsize)
trueIter := MakeIBioSequence()
falseIter := MakeIBioSequence()
trueIter.Add(1)
falseIter.Add(1)
@ -604,18 +573,13 @@ func (iterator IBioSequence) DivideOn(predicate obiseq.SequencePredicate,
// A function that takes a predicate and a batch of sequences and returns a filtered batch of sequences.
func (iterator IBioSequence) FilterOn(predicate obiseq.SequencePredicate,
size int, sizes ...int) IBioSequence {
buffsize := iterator.BufferSize()
nworkers := 4
if len(sizes) > 0 {
nworkers = sizes[0]
}
if len(sizes) > 1 {
buffsize = sizes[1]
}
trueIter := MakeIBioSequence(buffsize)
trueIter := MakeIBioSequence()
trueIter.Add(nworkers)
@ -661,18 +625,13 @@ func (iterator IBioSequence) FilterOn(predicate obiseq.SequencePredicate,
func (iterator IBioSequence) FilterAnd(predicate obiseq.SequencePredicate,
size int, sizes ...int) IBioSequence {
buffsize := iterator.BufferSize()
nworkers := 4
if len(sizes) > 0 {
nworkers = sizes[0]
}
if len(sizes) > 1 {
buffsize = sizes[1]
}
trueIter := MakeIBioSequence(buffsize)
trueIter := MakeIBioSequence()
trueIter.Add(nworkers)
@ -740,13 +699,7 @@ func (iterator IBioSequence) Load() obiseq.BioSequenceSlice {
func IBatchOver(data obiseq.BioSequenceSlice,
size int, sizes ...int) IBioSequence {
buffsize := 0
if len(sizes) > 0 {
buffsize = sizes[0]
}
newIter := MakeIBioSequence(buffsize)
newIter := MakeIBioSequence()
newIter.Add(1)

View File

@ -36,7 +36,6 @@ func (dist *IDistribute) Classifier() *obiseq.BioSequenceClassifier {
func (iterator IBioSequence) Distribute(class *obiseq.BioSequenceClassifier, sizes ...int) IDistribute {
batchsize := 5000
buffsize := 2
outputs := make(map[int]IBioSequence, 100)
slices := make(map[int]*obiseq.BioSequenceSlice, 100)
@ -47,9 +46,7 @@ func (iterator IBioSequence) Distribute(class *obiseq.BioSequenceClassifier, siz
batchsize = sizes[0]
}
if len(sizes) > 1 {
buffsize = sizes[1]
}
jobDone := sync.WaitGroup{}
lock := sync.Mutex{}
@ -80,7 +77,7 @@ func (iterator IBioSequence) Distribute(class *obiseq.BioSequenceClassifier, siz
orders[key] = 0
lock.Lock()
outputs[key] = MakeIBioSequence(buffsize)
outputs[key] = MakeIBioSequence()
lock.Unlock()
news <- key

View File

@ -4,16 +4,12 @@ import "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
func (iterator IBioSequence) IMergeSequenceBatch(na string, statsOn []string, sizes ...int) IBioSequence {
batchsize := 100
buffsize := iterator.BufferSize()
if len(sizes) > 0 {
batchsize = sizes[0]
}
if len(sizes) > 1 {
buffsize = sizes[1]
}
newIter := MakeIBioSequence(buffsize)
newIter := MakeIBioSequence()
newIter.Add(1)

View File

@ -6,7 +6,6 @@ import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
)
// That method allows for applying a SeqWorker function on every sequences.
//
// Sequences are provided by the iterator and modified sequences are pushed
@ -17,17 +16,12 @@ import (
// - The second the size of the chanel buffer. By default set to the same value than the input buffer.
func (iterator IBioSequence) MakeIWorker(worker obiseq.SeqWorker, sizes ...int) IBioSequence {
nworkers := 4
buffsize := iterator.BufferSize()
if len(sizes) > 0 {
nworkers = sizes[0]
}
if len(sizes) > 1 {
buffsize = sizes[1]
}
newIter := MakeIBioSequence(buffsize)
newIter := MakeIBioSequence()
newIter.Add(nworkers)
@ -64,17 +58,12 @@ func (iterator IBioSequence) MakeIWorker(worker obiseq.SeqWorker, sizes ...int)
func (iterator IBioSequence) MakeIConditionalWorker(predicate obiseq.SequencePredicate,
worker obiseq.SeqWorker, sizes ...int) IBioSequence {
nworkers := 4
buffsize := iterator.BufferSize()
if len(sizes) > 0 {
nworkers = sizes[0]
}
if len(sizes) > 1 {
buffsize = sizes[1]
}
newIter := MakeIBioSequence(buffsize)
newIter := MakeIBioSequence()
newIter.Add(nworkers)
@ -112,17 +101,12 @@ func (iterator IBioSequence) MakeIConditionalWorker(predicate obiseq.SequencePre
func (iterator IBioSequence) MakeISliceWorker(worker obiseq.SeqSliceWorker, sizes ...int) IBioSequence {
nworkers := 4
buffsize := iterator.BufferSize()
if len(sizes) > 0 {
nworkers = sizes[0]
}
if len(sizes) > 1 {
buffsize = sizes[1]
}
newIter := MakeIBioSequence(buffsize)
newIter := MakeIBioSequence()
newIter.Add(nworkers)
@ -140,7 +124,7 @@ func (iterator IBioSequence) MakeISliceWorker(worker obiseq.SeqSliceWorker, size
newIter.Done()
}
log.Printf("Start of the batch slice workers on %d workers (buffer : %d)\n", nworkers, buffsize)
log.Printf("Start of the batch slice workers on %d workers\n", nworkers)
for i := 0; i < nworkers-1; i++ {
go f(iterator.Split())
}
@ -168,4 +152,3 @@ func SliceWorkerPipe(worker obiseq.SeqSliceWorker, sizes ...int) Pipeable {
return f
}

View File

@ -11,7 +11,6 @@ type _Options struct {
withProgressBar bool
parallelWorkers int
batchSize int
bufferSize int
}
// Options stores a set of option usable by the
@ -56,16 +55,6 @@ func OptionAllowedMismatches(count int) WithOption {
return f
}
// OptionBufferSize sets the requested channel
// buffer size.
func OptionBufferSize(size int) WithOption {
f := WithOption(func(opt Options) {
opt.pointer.bufferSize = size
})
return f
}
// OptionParallelWorkers sets how many search
// jobs will be run in parallel.
func OptionParallelWorkers(nworkers int) WithOption {
@ -102,12 +91,6 @@ func (options Options) WithProgressBar() bool {
return options.pointer.withProgressBar
}
// BufferSize returns the size of the channel
// buffer specified by the options
func (options Options) BufferSize() int {
return options.pointer.bufferSize
}
// BatchSize returns the size of the
// sequence batch used by the PCR algorithm
func (options Options) BatchSize() int {
@ -130,7 +113,6 @@ func MakeOptions(setters []WithOption) Options {
withProgressBar: false,
parallelWorkers: 4,
batchSize: 1000,
bufferSize: 100,
}
opt := Options{&o}

View File

@ -11,12 +11,11 @@ import (
)
var _Debug = false
var _ParallelWorkers = runtime.NumCPU() * 2 - 1
var _ParallelWorkers = runtime.NumCPU()*2 - 1
var _MaxAllowedCPU = runtime.NumCPU()
var _BufferSize = 1
var _BatchSize = 5000
type ArgumentParser func([]string) (*getoptions.GetOpt, []string, error)
type ArgumentParser func([]string) (*getoptions.GetOpt, []string)
func GenerateOptionParser(optionset ...func(*getoptions.GetOpt)) ArgumentParser {
@ -38,16 +37,20 @@ func GenerateOptionParser(optionset ...func(*getoptions.GetOpt)) ArgumentParser
o(options)
}
return func(args []string) (*getoptions.GetOpt, []string, error) {
return func(args []string) (*getoptions.GetOpt, []string) {
remaining, err := options.Parse(args[1:])
if err != nil {
log.Fatalf("Error on the commande line : %v",err)
}
// Setup the maximum number of CPU usable by the program
runtime.GOMAXPROCS(_MaxAllowedCPU)
if options.Called("max-cpu") {
log.Printf("CPU number limited to %d", _MaxAllowedCPU)
if ! options.Called("workers") {
_ParallelWorkers=_MaxAllowedCPU * 2 - 1
if !options.Called("workers") {
_ParallelWorkers = _MaxAllowedCPU*2 - 1
log.Printf("Number of workers set %d", _ParallelWorkers)
}
}
@ -67,7 +70,7 @@ func GenerateOptionParser(optionset ...func(*getoptions.GetOpt)) ArgumentParser
log.Debugln("Switch to debug level logging")
}
return options, remaining, err
return options, remaining
}
}
@ -88,11 +91,6 @@ func CLIMaxCPU() int {
return _MaxAllowedCPU
}
// CLIBufferSize returns the expeted channel buffer size for obitools
func CLIBufferSize() int {
return _BufferSize
}
// CLIBatchSize returns the expeted size of the sequence batches
func CLIBatchSize() int {
return _BatchSize

View File

@ -8,6 +8,15 @@ import (
log "github.com/sirupsen/logrus"
)
func (s *BioSequence) HasAttribute(key string) bool {
ok := s.annotations != nil
if ok {
_, ok = s.annotations[key]
}
return ok
}
// A method that returns the value of the key in the annotation map.
func (s *BioSequence) GetAttribute(key string) (interface{}, bool) {
var val interface{}

View File

@ -278,3 +278,28 @@ func (s *BioSequence) Clear() {
s.sequence = s.sequence[0:0]
}
func (s *BioSequence) Composition() map[byte]int {
a := 0
c := 0
g := 0
t := 0
other := 0
for _, char := range s.sequence {
switch char {
case 'a':
a++
case 'c':
c++
case 'g':
g++
case 't':
t++
default:
other++
}
}
return map[byte]int{'a': a, 'c': c, 'g': g, 't': t, 'o': other}
}

View File

@ -316,3 +316,4 @@ func RotateClassifier(size int) *BioSequenceClassifier {
c := BioSequenceClassifier{code, value, reset, clone,"RotateClassifier"}
return &c
}

View File

@ -4,22 +4,21 @@ import (
"context"
"fmt"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obieval"
log "github.com/sirupsen/logrus"
)
func Expression(expression string) func(*BioSequence) (interface{},error) {
func Expression(expression string) func(*BioSequence) (interface{}, error) {
exp, err := obieval.OBILang.NewEvaluable(expression)
exp, err := OBILang.NewEvaluable(expression)
if err != nil {
log.Fatalf("Error in the expression : %s", expression)
}
f := func(sequence *BioSequence) (interface{},error) {
f := func(sequence *BioSequence) (interface{}, error) {
return exp(context.Background(),
map[string]interface{}{
"annotations": sequence.Annotations(),
"sequence": sequence,
"annotations": sequence.Annotations(),
"sequence": sequence,
},
)
}
@ -30,14 +29,14 @@ func Expression(expression string) func(*BioSequence) (interface{},error) {
func EditIdWorker(expression string) SeqWorker {
e := Expression(expression)
f := func(sequence *BioSequence) *BioSequence {
v,err := e(sequence)
v, err := e(sequence)
if err != nil {
log.Fatalf("Expression '%s' cannot be evaluated on sequence %s",
expression,
sequence.Id())
}
sequence.SetId(fmt.Sprintf("%v",v))
sequence.SetId(fmt.Sprintf("%v", v))
return sequence
}
@ -47,16 +46,16 @@ func EditIdWorker(expression string) SeqWorker {
func EditAttributeWorker(key string, expression string) SeqWorker {
e := Expression(expression)
f := func(sequence *BioSequence) *BioSequence {
v,err := e(sequence)
v, err := e(sequence)
if err != nil {
log.Fatalf("Expression '%s' cannot be evaluated on sequence %s",
expression,
sequence.Id())
}
sequence.SetAttribute(key,v)
sequence.SetAttribute(key, v)
return sequence
}
return f
}
}

View File

@ -1,4 +1,4 @@
package obieval
package obiseq
import (
"fmt"
@ -174,8 +174,19 @@ var OBILang = gval.NewLanguage(
log.Fatalf("%v cannot be converted to a boolan value", args[0])
}
return val, nil
}),
gval.Function("ifelse", func(args ...interface{}) (interface{}, error) {
if args[0].(bool) {
return args[1], nil
} else {
return args[2], nil
}
}),
gval.Function("gcskew", func(args ...interface{}) (interface{}, error) {
composition := (args[0].(*BioSequence)).Composition()
return float64(composition['g']-composition['c']) / float64(composition['g']+composition['c']), nil
}),
gval.Function("composition", func(args ...interface{}) (interface{}, error) {
return (args[0].(*BioSequence)).Composition(), nil
}))
func Expression(expression string) (gval.Evaluable, error) {
return OBILang.NewEvaluable(expression)
}

View File

@ -5,7 +5,6 @@ import (
"fmt"
"regexp"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obieval"
log "github.com/sirupsen/logrus"
)
@ -256,7 +255,7 @@ func IsIdIn(ids ...string) SequencePredicate {
func ExpressionPredicat(expression string) SequencePredicate {
exp, err := obieval.OBILang.NewEvaluable(expression)
exp, err := OBILang.NewEvaluable(expression)
if err != nil {
log.Fatalf("Error in the expression : %s", expression)
}

View File

@ -0,0 +1,63 @@
package obicleandb
import (
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obichunk"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obigrep"
)
func ICleanDB(itertator obiiter.IBioSequence) obiiter.IBioSequence {
var rankPredicate obiseq.SequencePredicate
options := make([]obichunk.WithOption, 0, 30)
// Make sequence dereplication with a constraint on the taxid.
// To be merged, both sequences must have the same taxid.
options = append(options,
obichunk.OptionBatchCount(100),
obichunk.OptionSortOnMemory(),
obichunk.OptionSubCategory("taxid"),
obichunk.OptionsParallelWorkers(
obioptions.CLIParallelWorkers()),
obichunk.OptionsBatchSize(
obioptions.CLIBatchSize()),
obichunk.OptionNAValue("NA"),
)
unique, err := obichunk.IUniqueSequence(itertator, options...)
if err != nil {
log.Fatal(err)
}
taxonomy := obigrep.CLILoadSelectedTaxonomy()
if len(obigrep.CLIRequiredRanks()) > 0 {
rankPredicate = obigrep.CLIHasRankDefinedPredicate()
} else {
rankPredicate = taxonomy.HasRequiredRank("species").And(taxonomy.HasRequiredRank("genus")).And(taxonomy.HasRequiredRank("family"))
}
goodTaxa := taxonomy.IsAValidTaxon(CLIUpdateTaxids()).And(rankPredicate)
usable := unique.FilterOn(goodTaxa,
obioptions.CLIBatchSize(),
obioptions.CLIParallelWorkers())
annotated := usable.MakeIWorker(taxonomy.MakeSetSpeciesWorker(),
obioptions.CLIParallelWorkers(),
).MakeIWorker(taxonomy.MakeSetGenusWorker(),
obioptions.CLIParallelWorkers(),
).MakeIWorker(taxonomy.MakeSetFamilyWorker(),
obioptions.CLIParallelWorkers(),
)
// annotated.MakeIConditionalWorker(obiseq.IsMoreAbundantOrEqualTo(3),1000)
return annotated
}

View File

@ -60,6 +60,21 @@ func InputOptionSet(options *getoptions.GetOpt) {
}
func OutputModeOptionSet(options *getoptions.GetOpt) {
options.BoolVar(&__no_progress_bar__, "no-progressbar", false,
options.Description("Disable the progress bar printing"))
options.BoolVar(&__compressed__, "compress", false,
options.Alias("Z"),
options.Description("Output is compressed"))
options.StringVar(&__output_file_name__, "out", __output_file_name__,
options.Alias("o"),
options.ArgName("FILENAME"),
options.Description("Filename used for saving the output"),
)
}
func OutputOptionSet(options *getoptions.GetOpt) {
options.BoolVar(&__output_in_fasta__, "fasta-output", false,
options.Description("Read data following the ecoPCR output format."))
@ -73,19 +88,7 @@ func OutputOptionSet(options *getoptions.GetOpt) {
options.Alias("O"),
options.Description("output FASTA/FASTQ title line annotations follow OBI format."))
options.BoolVar(&__no_progress_bar__, "no-progressbar", false,
options.Description("Disable the progress bar printing"))
options.BoolVar(&__compressed__, "compress", false,
options.Alias("Z"),
options.Description("Output is compressed"))
options.StringVar(&__output_file_name__, "out", __output_file_name__,
options.Alias("o"),
options.ArgName("FILENAME"),
options.Description("Filename used for saving the output"),
)
OutputModeOptionSet(options)
}
func PairedFilesOptionSet(options *getoptions.GetOpt) {
@ -197,4 +200,4 @@ func CLIHasPairedFile() bool {
}
func CLIPairedFileName() string {
return __paired_file_name__
}
}

View File

@ -48,6 +48,10 @@ func _ExpandListOfFiles(check_ext bool, filenames ...string) ([]string, error) {
strings.HasSuffix(path, "fasta.gz") ||
strings.HasSuffix(path, "fastq") ||
strings.HasSuffix(path, "fastq.gz") ||
strings.HasSuffix(path, "seq") ||
strings.HasSuffix(path, "seq.gz") ||
strings.HasSuffix(path, "gb") ||
strings.HasSuffix(path, "gb.gz") ||
strings.HasSuffix(path, "dat") ||
strings.HasSuffix(path, "dat.gz") ||
strings.HasSuffix(path, "ecopcr") ||
@ -82,13 +86,12 @@ func CLIReadBioSequences(filenames ...string) (obiiter.IBioSequence, error) {
opts = append(opts, obiformats.OptionsFastSeqHeaderParser(obiformats.ParseGuessedFastSeqHeader))
}
nworkers := obioptions.CLIParallelWorkers() // / 4
nworkers := obioptions.CLIParallelWorkers()
if nworkers < 2 {
nworkers = 2
}
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers))
opts = append(opts, obiformats.OptionsBufferSize(obioptions.CLIBufferSize()))
opts = append(opts, obiformats.OptionsBatchSize(obioptions.CLIBatchSize()))
opts = append(opts, obiformats.OptionsQualityShift(CLIInputQualityShift()))

View File

@ -60,7 +60,6 @@ func CLIWriteBioSequences(iterator obiiter.IBioSequence,
}
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers))
opts = append(opts, obiformats.OptionsBufferSize(obioptions.CLIBufferSize()))
opts = append(opts, obiformats.OptionsBatchSize(obioptions.CLIBatchSize()))
opts = append(opts, obiformats.OptionsQualityShift(CLIOutputQualityShift()))

View File

@ -0,0 +1,61 @@
package obicsv
import (
"log"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
)
func CLIWriteCSV(iterator obiiter.IBioSequence,
terminalAction bool, filenames ...string) (obiiter.IBioSequence, error) {
if obiconvert.CLIProgressBar() {
iterator = iterator.Speed()
}
var newIter obiiter.IBioSequence
opts := make([]obiformats.WithOption, 0, 10)
nworkers := obioptions.CLIParallelWorkers() / 4
if nworkers < 2 {
nworkers = 2
}
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers))
opts = append(opts, obiformats.OptionsBatchSize(obioptions.CLIBatchSize()))
opts = append(opts, obiformats.OptionsQualityShift(obiconvert.CLIOutputQualityShift()))
opts = append(opts, obiformats.OptionsCompressed(obiconvert.CLICompressed()))
opts = append(opts, obiformats.CSVId(CLIPrintId()),
obiformats.CSVCount(CLIPrintCount()),
obiformats.CSVTaxon(CLIPrintTaxon()),
obiformats.CSVDefinition(CLIPrintDefinition()),
obiformats.CSVKeys(CLIToBeKeptAttributes()),
)
var err error
if len(filenames) == 0 {
newIter, err = obiformats.WriteCSVToStdout(iterator, opts...)
} else {
newIter, err = obiformats.WriteCSVToFile(iterator, filenames[0], opts...)
}
if err != nil {
log.Fatalf("Write file error: %v", err)
return obiiter.NilIBioSequence, err
}
if terminalAction {
newIter.Recycle()
return obiiter.NilIBioSequence, nil
}
return newIter, nil
}

View File

@ -0,0 +1,126 @@
package obicsv
import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
"github.com/DavidGamba/go-getoptions"
)
var _outputIds = true
var _outputCount = false
var _outputTaxon = false
var _outputSequence = true
var _outputQuality = true
var _outputDefinition = false
var _obipairing = false
var _autoColumns = false
var _keepOnly = make([]string, 0)
var _naValue = "NA"
var _softAttributes = map[string][]string{
"obipairing": {"mode", "seq_a_single", "seq_b_single",
"ali_dir", "score", "score_norm",
"seq_ab_match", "pairing_mismatches",
},
}
func CSVOptionSet(options *getoptions.GetOpt) {
options.BoolVar(&_outputIds, "ids", _outputIds,
options.Alias("i"),
options.Description("Prints sequence ids in the ouput."))
options.BoolVar(&_outputSequence, "sequence", _outputSequence,
options.Alias("s"),
options.Description("Prints sequence itself in the output."))
options.BoolVar(&_outputQuality, "quality", _outputQuality,
options.Alias("q"),
options.Description("Prints sequence quality in the output."))
options.BoolVar(&_outputDefinition, "definition", _outputDefinition,
options.Alias("d"),
options.Description("Prints sequence definition in the output."))
options.BoolVar(&_autoColumns, "auto", _autoColumns,
options.Description("Based on the first sequences, propose a list of attibutes to print"))
options.BoolVar(&_outputCount, "count", _outputCount,
options.Description("Prints the count attribute in the output"))
options.BoolVar(&_outputTaxon, "taxon", _outputTaxon,
options.Description("Prints the NCBI taxid and its related scientific name"))
options.BoolVar(&_obipairing, "obipairing", _obipairing,
options.Description("Prints the attributes added by obipairing"))
options.StringSliceVar(&_keepOnly, "keep", 1, 1,
options.Alias("k"),
options.ArgName("KEY"),
options.Description("Keeps only attribute with key <KEY>. Several -k options can be combined."))
options.StringVar(&_naValue, "na-value", _naValue,
options.ArgName("NAVALUE"),
options.Description("A string representing non available values in the CSV file."))
}
func OptionSet(options *getoptions.GetOpt) {
obiconvert.OutputModeOptionSet(options)
CSVOptionSet(options)
}
func CLIPrintId() bool {
return _outputIds
}
func CLIPrintSequence() bool {
return _outputSequence
}
func CLIPrintCount() bool {
return _outputCount
}
func CLIPrintTaxon() bool {
return _outputTaxon
}
func CLIPrintQuality() bool {
return _outputQuality
}
func CLIPrintDefinition() bool {
return _outputDefinition
}
func CLIAutoColumns() bool {
return _autoColumns
}
func CLIHasToBeKeptAttributes() bool {
return len(_keepOnly) > 0
}
func CLIToBeKeptAttributes() []string {
if _obipairing {
_keepOnly = append(_keepOnly, _softAttributes["obipairing"]...)
}
if i := goutils.LookFor(_keepOnly, "count"); i >= 0 {
_keepOnly = goutils.RemoveIndex(_keepOnly, i)
_outputCount = true
}
if i := goutils.LookFor(_keepOnly, "taxid"); i >= 0 {
_keepOnly = goutils.RemoveIndex(_keepOnly, i)
_outputTaxon = true
}
if i := goutils.LookFor(_keepOnly, "scientific_name"); i >= 0 {
_keepOnly = goutils.RemoveIndex(_keepOnly, i)
_outputTaxon = true
}
return _keepOnly
}
func CLINAValue() string {
return _naValue
}

View File

@ -31,7 +31,6 @@ func DistributeSequence(sequences obiiter.IBioSequence) {
}
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers),
obiformats.OptionsBufferSize(obioptions.CLIBufferSize()),
obiformats.OptionsBatchSize(obioptions.CLIBatchSize()),
obiformats.OptionsQualityShift(obiconvert.CLIOutputQualityShift()),
obiformats.OptionsAppendFile(CLIAppendSequences()),

View File

@ -39,7 +39,6 @@ func CLIFilterSequence(iterator obiiter.IBioSequence) obiiter.IBioSequence {
newIter = iterator.FilterOn(predicate,
obioptions.CLIBatchSize(),
obioptions.CLIParallelWorkers(),
obioptions.CLIBufferSize(),
)
}
} else {

View File

@ -20,7 +20,6 @@ func IExtractBarcode(iterator obiiter.IBioSequence) (obiiter.IBioSequence, error
obingslibrary.OptionDiscardErrors(!CLIConservedErrors()),
obingslibrary.OptionParallelWorkers(obioptions.CLIParallelWorkers()),
obingslibrary.OptionBatchSize(obioptions.CLIBatchSize()),
obingslibrary.OptionBufferSize(obioptions.CLIBufferSize()),
)
ngsfilter, err := CLINGSFIlter()

View File

@ -211,17 +211,13 @@ func IAssemblePESequencesBatch(iterator obiiter.IBioSequence,
}
nworkers := obioptions.CLIMaxCPU() * 3 / 2
buffsize := iterator.BufferSize()
if len(sizes) > 0 {
nworkers = sizes[0]
}
if len(sizes) > 1 {
buffsize = sizes[1]
}
newIter := obiiter.MakeIBioSequence(buffsize)
newIter := obiiter.MakeIBioSequence()
newIter.Add(nworkers)

View File

@ -51,8 +51,6 @@ func Unique(sequences obiiter.IBioSequence) obiiter.IBioSequence {
options = append(options,
obichunk.OptionsParallelWorkers(
obioptions.CLIParallelWorkers()),
obichunk.OptionsBufferSize(
obioptions.CLIBufferSize()),
obichunk.OptionsBatchSize(
obioptions.CLIBatchSize()),
obichunk.OptionNAValue(CLINAValue()),