rename the iterator class

This commit is contained in:
2023-01-22 22:04:17 +01:00
parent 4592855095
commit f97f92df72
37 changed files with 399 additions and 354 deletions

View File

@ -35,12 +35,12 @@ func find(root, ext string) []string {
return a return a
} }
func ISequenceChunkOnDisk(iterator obiiter.IBioSequenceBatch, func ISequenceChunkOnDisk(iterator obiiter.IBioSequence,
classifier *obiseq.BioSequenceClassifier, classifier *obiseq.BioSequenceClassifier,
sizes ...int) (obiiter.IBioSequenceBatch, error) { sizes ...int) (obiiter.IBioSequence, error) {
dir, err := tempDir() dir, err := tempDir()
if err != nil { if err != nil {
return obiiter.NilIBioSequenceBatch, err return obiiter.NilIBioSequence, err
} }
bufferSize := iterator.BufferSize() bufferSize := iterator.BufferSize()
@ -49,7 +49,7 @@ func ISequenceChunkOnDisk(iterator obiiter.IBioSequenceBatch,
bufferSize = sizes[0] bufferSize = sizes[0]
} }
newIter := obiiter.MakeIBioSequenceBatch(bufferSize) newIter := obiiter.MakeIBioSequence(bufferSize)
newIter.Add(1) newIter.Add(1)

View File

@ -1,16 +1,17 @@
package obichunk package obichunk
import ( import (
log "github.com/sirupsen/logrus"
"sync" "sync"
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
) )
func ISequenceChunk(iterator obiiter.IBioSequenceBatch, func ISequenceChunk(iterator obiiter.IBioSequence,
classifier *obiseq.BioSequenceClassifier, classifier *obiseq.BioSequenceClassifier,
sizes ...int) (obiiter.IBioSequenceBatch, error) { sizes ...int) (obiiter.IBioSequence, error) {
bufferSize := iterator.BufferSize() bufferSize := iterator.BufferSize()
@ -18,7 +19,7 @@ func ISequenceChunk(iterator obiiter.IBioSequenceBatch,
bufferSize = sizes[0] bufferSize = sizes[0]
} }
newIter := obiiter.MakeIBioSequenceBatch(bufferSize) newIter := obiiter.MakeIBioSequence(bufferSize)
newIter.Add(1) newIter.Add(1)

View File

@ -56,9 +56,9 @@ func (by _By) Sort(seqs []sSS) {
// End of the sort interface // End of the sort interface
// //
func ISequenceSubChunk(iterator obiiter.IBioSequenceBatch, func ISequenceSubChunk(iterator obiiter.IBioSequence,
classifier *obiseq.BioSequenceClassifier, classifier *obiseq.BioSequenceClassifier,
sizes ...int) (obiiter.IBioSequenceBatch, error) { sizes ...int) (obiiter.IBioSequence, error) {
bufferSize := iterator.BufferSize() bufferSize := iterator.BufferSize()
nworkers := 4 nworkers := 4
@ -71,7 +71,7 @@ func ISequenceSubChunk(iterator obiiter.IBioSequenceBatch,
bufferSize = sizes[1] bufferSize = sizes[1]
} }
newIter := obiiter.MakeIBioSequenceBatch(bufferSize) newIter := obiiter.MakeIBioSequence(bufferSize)
newIter.Add(nworkers) newIter.Add(nworkers)
@ -88,7 +88,7 @@ func ISequenceSubChunk(iterator obiiter.IBioSequenceBatch,
return neworder return neworder
} }
ff := func(iterator obiiter.IBioSequenceBatch, ff := func(iterator obiiter.IBioSequence,
classifier *obiseq.BioSequenceClassifier) { classifier *obiseq.BioSequenceClassifier) {
ordered := make([]sSS, 100) ordered := make([]sSS, 100)

View File

@ -12,14 +12,14 @@ import (
// Runs dereplication algorithm on a obiiter.IBioSequenceBatch // Runs dereplication algorithm on a obiiter.IBioSequenceBatch
// iterator. // iterator.
func IUniqueSequence(iterator obiiter.IBioSequenceBatch, func IUniqueSequence(iterator obiiter.IBioSequence,
options ...WithOption) (obiiter.IBioSequenceBatch, error) { options ...WithOption) (obiiter.IBioSequence, error) {
var err error var err error
opts := MakeOptions(options) opts := MakeOptions(options)
nworkers := opts.ParallelWorkers() nworkers := opts.ParallelWorkers()
iUnique := obiiter.MakeIBioSequenceBatch(opts.BufferSize()) iUnique := obiiter.MakeIBioSequence(opts.BufferSize())
iterator = iterator.Speed("Splitting data set") iterator = iterator.Speed("Splitting data set")
@ -32,7 +32,7 @@ func IUniqueSequence(iterator obiiter.IBioSequenceBatch,
0) 0)
if err != nil { if err != nil {
return obiiter.NilIBioSequenceBatch, err return obiiter.NilIBioSequence, err
} }
} else { } else {
@ -41,7 +41,7 @@ func IUniqueSequence(iterator obiiter.IBioSequenceBatch,
opts.BufferSize()) opts.BufferSize())
if err != nil { if err != nil {
return obiiter.NilIBioSequenceBatch, err return obiiter.NilIBioSequence, err
} }
} }
@ -65,14 +65,14 @@ func IUniqueSequence(iterator obiiter.IBioSequenceBatch,
return neworder return neworder
} }
var ff func(obiiter.IBioSequenceBatch, var ff func(obiiter.IBioSequence,
*obiseq.BioSequenceClassifier, *obiseq.BioSequenceClassifier,
int) int)
cat := opts.Categories() cat := opts.Categories()
na := opts.NAValue() na := opts.NAValue()
ff = func(input obiiter.IBioSequenceBatch, ff = func(input obiiter.IBioSequence,
classifier *obiseq.BioSequenceClassifier, classifier *obiseq.BioSequenceClassifier,
icat int) { icat int) {
icat-- icat--
@ -81,9 +81,9 @@ func IUniqueSequence(iterator obiiter.IBioSequenceBatch,
1, 1,
opts.BufferSize()) opts.BufferSize())
var next obiiter.IBioSequenceBatch var next obiiter.IBioSequence
if icat >= 0 { if icat >= 0 {
next = obiiter.MakeIBioSequenceBatch(opts.BufferSize()) next = obiiter.MakeIBioSequence(opts.BufferSize())
iUnique.Add(1) iUnique.Add(1)

View File

@ -10,13 +10,13 @@ import (
func ReadSequencesBatchFromFiles(filenames []string, func ReadSequencesBatchFromFiles(filenames []string,
reader IBatchReader, reader IBatchReader,
concurrent_readers int, concurrent_readers int,
options ...WithOption) obiiter.IBioSequenceBatch { options ...WithOption) obiiter.IBioSequence {
if reader == nil { if reader == nil {
reader = ReadSequencesFromFile reader = ReadSequencesFromFile
} }
batchiter := obiiter.MakeIBioSequenceBatch(0) batchiter := obiiter.MakeIBioSequence(0)
nextCounter := goutils.AtomicCounter() nextCounter := goutils.AtomicCounter()
batchiter.Add(concurrent_readers) batchiter.Add(concurrent_readers)

View File

@ -2,4 +2,4 @@ package obiformats
import "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter" import "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
type IBatchReader func(string, ...WithOption) (obiiter.IBioSequenceBatch, error) type IBatchReader func(string, ...WithOption) (obiiter.IBioSequence, error)

View File

@ -2,15 +2,16 @@ package obiformats
import ( import (
"fmt" "fmt"
log "github.com/sirupsen/logrus"
"sync" "sync"
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
) )
type SequenceBatchWriterToFile func(iterator obiiter.IBioSequenceBatch, type SequenceBatchWriterToFile func(iterator obiiter.IBioSequence,
filename string, filename string,
options ...WithOption) (obiiter.IBioSequenceBatch, error) options ...WithOption) (obiiter.IBioSequence, error)
func WriterDispatcher(prototypename string, func WriterDispatcher(prototypename string,
dispatcher obiiter.IDistribute, dispatcher obiiter.IDistribute,

View File

@ -120,7 +120,7 @@ func __read_ecopcr_bioseq__(file *__ecopcr_file__) (*obiseq.BioSequence, error)
return bseq, nil return bseq, nil
} }
func ReadEcoPCR(reader io.Reader, options ...WithOption) obiiter.IBioSequenceBatch { func ReadEcoPCR(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
tag := make([]byte, 11) tag := make([]byte, 11)
n, _ := reader.Read(tag) n, _ := reader.Read(tag)
@ -166,7 +166,7 @@ func ReadEcoPCR(reader io.Reader, options ...WithOption) obiiter.IBioSequenceBat
opt := MakeOptions(options) opt := MakeOptions(options)
newIter := obiiter.MakeIBioSequenceBatch(opt.BufferSize()) newIter := obiiter.MakeIBioSequence(opt.BufferSize())
newIter.Add(1) newIter.Add(1)
go func() { go func() {
@ -208,7 +208,7 @@ func ReadEcoPCR(reader io.Reader, options ...WithOption) obiiter.IBioSequenceBat
return newIter return newIter
} }
func ReadEcoPCRBatchFromFile(filename string, options ...WithOption) (obiiter.IBioSequenceBatch, error) { func ReadEcoPCRBatchFromFile(filename string, options ...WithOption) (obiiter.IBioSequence, error) {
var reader io.Reader var reader io.Reader
var greader io.Reader var greader io.Reader
var err error var err error
@ -216,7 +216,7 @@ func ReadEcoPCRBatchFromFile(filename string, options ...WithOption) (obiiter.IB
reader, err = os.Open(filename) reader, err = os.Open(filename)
if err != nil { if err != nil {
log.Printf("open file error: %+v", err) log.Printf("open file error: %+v", err)
return obiiter.NilIBioSequenceBatch, err return obiiter.NilIBioSequence, err
} }
// Test if the flux is compressed by gzip // Test if the flux is compressed by gzip

View File

@ -84,7 +84,7 @@ func _EndOfLastEntry(buff []byte) int {
return -1 return -1
} }
func _ParseEmblFile(input <-chan _FileChunk, out obiiter.IBioSequenceBatch) { func _ParseEmblFile(input <-chan _FileChunk, out obiiter.IBioSequence) {
for chunks := range input { for chunks := range input {
scanner := bufio.NewScanner(chunks.raw) scanner := bufio.NewScanner(chunks.raw)
@ -201,11 +201,11 @@ func _ReadFlatFileChunk(reader io.Reader, readers chan _FileChunk) {
// 6 5 43 2 1 // 6 5 43 2 1
// //
// <CR>?<LF>//<CR>?<LF> // <CR>?<LF>//<CR>?<LF>
func ReadEMBL(reader io.Reader, options ...WithOption) obiiter.IBioSequenceBatch { func ReadEMBL(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
opt := MakeOptions(options) opt := MakeOptions(options)
entry_channel := make(chan _FileChunk, opt.BufferSize()) entry_channel := make(chan _FileChunk, opt.BufferSize())
newIter := obiiter.MakeIBioSequenceBatch(opt.BufferSize()) newIter := obiiter.MakeIBioSequence(opt.BufferSize())
nworkers := opt.ParallelWorkers() nworkers := opt.ParallelWorkers()
newIter.Add(nworkers) newIter.Add(nworkers)
@ -224,7 +224,7 @@ func ReadEMBL(reader io.Reader, options ...WithOption) obiiter.IBioSequenceBatch
return newIter return newIter
} }
func ReadEMBLFromFile(filename string, options ...WithOption) (obiiter.IBioSequenceBatch, error) { func ReadEMBLFromFile(filename string, options ...WithOption) (obiiter.IBioSequence, error) {
var reader io.Reader var reader io.Reader
var greader io.Reader var greader io.Reader
var err error var err error
@ -232,7 +232,7 @@ func ReadEMBLFromFile(filename string, options ...WithOption) (obiiter.IBioSeque
reader, err = os.Open(filename) reader, err = os.Open(filename)
if err != nil { if err != nil {
log.Printf("open file error: %+v", err) log.Printf("open file error: %+v", err)
return obiiter.NilIBioSequenceBatch, err return obiiter.NilIBioSequence, err
} }
// Test if the flux is compressed by gzip // Test if the flux is compressed by gzip

View File

@ -15,8 +15,8 @@ func ParseGuessedFastSeqHeader(sequence *obiseq.BioSequence) {
} }
} }
func IParseFastSeqHeaderBatch(iterator obiiter.IBioSequenceBatch, func IParseFastSeqHeaderBatch(iterator obiiter.IBioSequence,
options ...WithOption) obiiter.IBioSequenceBatch { options ...WithOption) obiiter.IBioSequence {
opt := MakeOptions(options) opt := MakeOptions(options)
return iterator.MakeIWorker(obiiter.AnnotatorToSeqWorker(opt.ParseFastSeqHeader()), return iterator.MakeIWorker(obiiter.AnnotatorToSeqWorker(opt.ParseFastSeqHeader()),
opt.ParallelWorkers(), opt.ParallelWorkers(),

View File

@ -19,7 +19,7 @@ import (
) )
func _FastseqReader(seqfile C.fast_kseq_p, func _FastseqReader(seqfile C.fast_kseq_p,
iterator obiiter.IBioSequenceBatch, iterator obiiter.IBioSequence,
batch_size int) { batch_size int) {
var comment string var comment string
i := 0 i := 0
@ -72,7 +72,6 @@ func _FastseqReader(seqfile C.fast_kseq_p,
ii = 0 ii = 0
} }
} }
if len(slice) > 0 { if len(slice) > 0 {
iterator.Push(obiiter.MakeBioSequenceBatch(i, slice)) iterator.Push(obiiter.MakeBioSequenceBatch(i, slice))
@ -81,7 +80,7 @@ func _FastseqReader(seqfile C.fast_kseq_p,
} }
func ReadFastSeqFromFile(filename string, options ...WithOption) (obiiter.IBioSequenceBatch, error) { func ReadFastSeqFromFile(filename string, options ...WithOption) (obiiter.IBioSequence, error) {
opt := MakeOptions(options) opt := MakeOptions(options)
name := C.CString(filename) name := C.CString(filename)
@ -94,7 +93,7 @@ func ReadFastSeqFromFile(filename string, options ...WithOption) (obiiter.IBioSe
if pointer == nil { if pointer == nil {
err = fmt.Errorf("cannot open file %s", filename) err = fmt.Errorf("cannot open file %s", filename)
return obiiter.NilIBioSequenceBatch, err return obiiter.NilIBioSequence, err
} }
size := int64(-1) size := int64(-1)
@ -106,7 +105,7 @@ func ReadFastSeqFromFile(filename string, options ...WithOption) (obiiter.IBioSe
size = -1 size = -1
} }
newIter := obiiter.MakeIBioSequenceBatch(opt.BufferSize()) newIter := obiiter.MakeIBioSequence(opt.BufferSize())
newIter.Add(1) newIter.Add(1)
go func() { go func() {
@ -126,9 +125,9 @@ func ReadFastSeqFromFile(filename string, options ...WithOption) (obiiter.IBioSe
return newIter, err return newIter, err
} }
func ReadFastSeqFromStdin(options ...WithOption) obiiter.IBioSequenceBatch { func ReadFastSeqFromStdin(options ...WithOption) obiiter.IBioSequence {
opt := MakeOptions(options) opt := MakeOptions(options)
newIter := obiiter.MakeIBioSequenceBatch(opt.BufferSize()) newIter := obiiter.MakeIBioSequence(opt.BufferSize())
newIter.Add(1) newIter.Add(1)

View File

@ -61,13 +61,13 @@ func FormatFastaBatch(batch obiiter.BioSequenceBatch, formater FormatHeader) []b
return bs.Bytes() return bs.Bytes()
} }
func WriteFasta(iterator obiiter.IBioSequenceBatch, func WriteFasta(iterator obiiter.IBioSequence,
file io.Writer, file io.Writer,
options ...WithOption) (obiiter.IBioSequenceBatch, error) { options ...WithOption) (obiiter.IBioSequence, error) {
opt := MakeOptions(options) opt := MakeOptions(options)
buffsize := iterator.BufferSize() buffsize := iterator.BufferSize()
newIter := obiiter.MakeIBioSequenceBatch(buffsize) newIter := obiiter.MakeIBioSequence(buffsize)
nwriters := opt.ParallelWorkers() nwriters := opt.ParallelWorkers()
@ -83,7 +83,7 @@ func WriteFasta(iterator obiiter.IBioSequenceBatch,
log.Debugln("End of the fasta file writing") log.Debugln("End of the fasta file writing")
}() }()
ff := func(iterator obiiter.IBioSequenceBatch) { ff := func(iterator obiiter.IBioSequence) {
for iterator.Next() { for iterator.Next() {
batch := iterator.Get() batch := iterator.Get()
@ -136,21 +136,21 @@ func WriteFasta(iterator obiiter.IBioSequenceBatch,
return newIter, nil return newIter, nil
} }
func WriteFastaToStdout(iterator obiiter.IBioSequenceBatch, func WriteFastaToStdout(iterator obiiter.IBioSequence,
options ...WithOption) (obiiter.IBioSequenceBatch, error) { options ...WithOption) (obiiter.IBioSequence, error) {
options = append(options, OptionDontCloseFile()) options = append(options, OptionDontCloseFile())
return WriteFasta(iterator, os.Stdout, options...) return WriteFasta(iterator, os.Stdout, options...)
} }
func WriteFastaToFile(iterator obiiter.IBioSequenceBatch, func WriteFastaToFile(iterator obiiter.IBioSequence,
filename string, filename string,
options ...WithOption) (obiiter.IBioSequenceBatch, error) { options ...WithOption) (obiiter.IBioSequence, error) {
file, err := os.Create(filename) file, err := os.Create(filename)
if err != nil { if err != nil {
log.Fatalf("open file error: %v", err) log.Fatalf("open file error: %v", err)
return obiiter.NilIBioSequenceBatch, err return obiiter.NilIBioSequence, err
} }
options = append(options, OptionCloseFile()) options = append(options, OptionCloseFile())

View File

@ -51,13 +51,13 @@ type FileChunck struct {
order int order int
} }
func WriteFastq(iterator obiiter.IBioSequenceBatch, func WriteFastq(iterator obiiter.IBioSequence,
file io.Writer, file io.Writer,
options ...WithOption) (obiiter.IBioSequenceBatch, error) { options ...WithOption) (obiiter.IBioSequence, error) {
opt := MakeOptions(options) opt := MakeOptions(options)
buffsize := iterator.BufferSize() buffsize := iterator.BufferSize()
newIter := obiiter.MakeIBioSequenceBatch(buffsize) newIter := obiiter.MakeIBioSequence(buffsize)
nwriters := opt.ParallelWorkers() nwriters := opt.ParallelWorkers()
@ -77,7 +77,7 @@ func WriteFastq(iterator obiiter.IBioSequenceBatch,
log.Debugln("End of the fastq file writing") log.Debugln("End of the fastq file writing")
}() }()
ff := func(iterator obiiter.IBioSequenceBatch) { ff := func(iterator obiiter.IBioSequence) {
for iterator.Next() { for iterator.Next() {
batch := iterator.Get() batch := iterator.Get()
chunk := FileChunck{ chunk := FileChunck{
@ -129,21 +129,21 @@ func WriteFastq(iterator obiiter.IBioSequenceBatch,
return newIter, nil return newIter, nil
} }
func WriteFastqToStdout(iterator obiiter.IBioSequenceBatch, func WriteFastqToStdout(iterator obiiter.IBioSequence,
options ...WithOption) (obiiter.IBioSequenceBatch, error) { options ...WithOption) (obiiter.IBioSequence, error) {
options = append(options, OptionDontCloseFile()) options = append(options, OptionDontCloseFile())
return WriteFastq(iterator, os.Stdout, options...) return WriteFastq(iterator, os.Stdout, options...)
} }
func WriteFastqToFile(iterator obiiter.IBioSequenceBatch, func WriteFastqToFile(iterator obiiter.IBioSequence,
filename string, filename string,
options ...WithOption) (obiiter.IBioSequenceBatch, error) { options ...WithOption) (obiiter.IBioSequence, error) {
file, err := os.Create(filename) file, err := os.Create(filename)
if err != nil { if err != nil {
log.Fatalf("open file error: %v", err) log.Fatalf("open file error: %v", err)
return obiiter.NilIBioSequenceBatch, err return obiiter.NilIBioSequence, err
} }
options = append(options, OptionCloseFile()) options = append(options, OptionCloseFile())

View File

@ -26,7 +26,7 @@ const (
inSequence gbstate = 4 inSequence gbstate = 4
) )
func _ParseGenbankFile(input <-chan _FileChunk, out obiiter.IBioSequenceBatch) { func _ParseGenbankFile(input <-chan _FileChunk, out obiiter.IBioSequence) {
state := inHeader state := inHeader
@ -107,11 +107,11 @@ func _ParseGenbankFile(input <-chan _FileChunk, out obiiter.IBioSequenceBatch) {
} }
func ReadGenbank(reader io.Reader, options ...WithOption) obiiter.IBioSequenceBatch { func ReadGenbank(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
opt := MakeOptions(options) opt := MakeOptions(options)
entry_channel := make(chan _FileChunk, opt.BufferSize()) entry_channel := make(chan _FileChunk, opt.BufferSize())
newIter := obiiter.MakeIBioSequenceBatch(opt.BufferSize()) newIter := obiiter.MakeIBioSequence(opt.BufferSize())
nworkers := opt.ParallelWorkers() nworkers := opt.ParallelWorkers()
newIter.Add(nworkers) newIter.Add(nworkers)
@ -130,7 +130,7 @@ func ReadGenbank(reader io.Reader, options ...WithOption) obiiter.IBioSequenceBa
return newIter return newIter
} }
func ReadGenbankFromFile(filename string, options ...WithOption) (obiiter.IBioSequenceBatch, error) { func ReadGenbankFromFile(filename string, options ...WithOption) (obiiter.IBioSequence, error) {
var reader io.Reader var reader io.Reader
var greader io.Reader var greader io.Reader
var err error var err error
@ -138,7 +138,7 @@ func ReadGenbankFromFile(filename string, options ...WithOption) (obiiter.IBioSe
reader, err = os.Open(filename) reader, err = os.Open(filename)
if err != nil { if err != nil {
log.Printf("open file error: %+v", err) log.Printf("open file error: %+v", err)
return obiiter.NilIBioSequenceBatch, err return obiiter.NilIBioSequence, err
} }
// Test if the flux is compressed by gzip // Test if the flux is compressed by gzip

View File

@ -43,7 +43,7 @@ func GuessSeqFileType(firstline string) string {
} }
func ReadSequencesFromFile(filename string, func ReadSequencesFromFile(filename string,
options ...WithOption) (obiiter.IBioSequenceBatch, error) { options ...WithOption) (obiiter.IBioSequence, error) {
var file *os.File var file *os.File
var reader io.Reader var reader io.Reader
var greader io.Reader var greader io.Reader
@ -53,7 +53,7 @@ func ReadSequencesFromFile(filename string,
if err != nil { if err != nil {
log.Fatalf("open file error: %v", err) log.Fatalf("open file error: %v", err)
return obiiter.NilIBioSequenceBatch, err return obiiter.NilIBioSequence, err
} }
reader = file reader = file
@ -72,7 +72,7 @@ func ReadSequencesFromFile(filename string,
tag, _ := breader.Peek(30) tag, _ := breader.Peek(30)
if len(tag) < 30 { if len(tag) < 30 {
newIter := obiiter.MakeIBioSequenceBatch() newIter := obiiter.MakeIBioSequence()
newIter.Close() newIter.Close()
return newIter, nil return newIter, nil
} }
@ -98,5 +98,5 @@ func ReadSequencesFromFile(filename string,
filename, filetype) filename, filetype)
} }
return obiiter.NilIBioSequenceBatch, nil return obiiter.NilIBioSequence, nil
} }

View File

@ -10,9 +10,9 @@ import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
) )
func WriteSequence(iterator obiiter.IBioSequenceBatch, func WriteSequence(iterator obiiter.IBioSequence,
file io.Writer, file io.Writer,
options ...WithOption) (obiiter.IBioSequenceBatch, error) { options ...WithOption) (obiiter.IBioSequence, error) {
iterator = iterator.Rebatch(1000) iterator = iterator.Rebatch(1000)
@ -22,7 +22,7 @@ func WriteSequence(iterator obiiter.IBioSequenceBatch,
batch := iterator.Get() batch := iterator.Get()
iterator.PushBack() iterator.PushBack()
var newIter obiiter.IBioSequenceBatch var newIter obiiter.IBioSequence
var err error var err error
if len(batch.Slice()) > 0 { if len(batch.Slice()) > 0 {
@ -42,24 +42,24 @@ func WriteSequence(iterator obiiter.IBioSequenceBatch,
return iterator, nil return iterator, nil
} }
return obiiter.NilIBioSequenceBatch, fmt.Errorf("input iterator not ready") return obiiter.NilIBioSequence, fmt.Errorf("input iterator not ready")
} }
func WriteSequencesToStdout(iterator obiiter.IBioSequenceBatch, func WriteSequencesToStdout(iterator obiiter.IBioSequence,
options ...WithOption) (obiiter.IBioSequenceBatch, error) { options ...WithOption) (obiiter.IBioSequence, error) {
options = append(options, OptionDontCloseFile()) options = append(options, OptionDontCloseFile())
return WriteSequence(iterator, os.Stdout, options...) return WriteSequence(iterator, os.Stdout, options...)
} }
func WriteSequencesToFile(iterator obiiter.IBioSequenceBatch, func WriteSequencesToFile(iterator obiiter.IBioSequence,
filename string, filename string,
options ...WithOption) (obiiter.IBioSequenceBatch, error) { options ...WithOption) (obiiter.IBioSequence, error) {
file, err := os.Create(filename) file, err := os.Create(filename)
if err != nil { if err != nil {
log.Fatalf("open file error: %v", err) log.Fatalf("open file error: %v", err)
return obiiter.NilIBioSequenceBatch, err return obiiter.NilIBioSequence, err
} }
options = append(options, OptionCloseFile()) options = append(options, OptionCloseFile())

View File

@ -15,7 +15,7 @@ import (
// Structure implementing an iterator over bioseq.BioSequenceBatch // Structure implementing an iterator over bioseq.BioSequenceBatch
// based on a channel. // based on a channel.
type _IBioSequenceBatch struct { type _IBioSequence struct {
channel chan BioSequenceBatch channel chan BioSequenceBatch
current BioSequenceBatch current BioSequenceBatch
pushBack *abool.AtomicBool pushBack *abool.AtomicBool
@ -27,24 +27,24 @@ type _IBioSequenceBatch struct {
finished *abool.AtomicBool finished *abool.AtomicBool
} }
type IBioSequenceBatch struct { type IBioSequence struct {
pointer *_IBioSequenceBatch pointer *_IBioSequence
} }
// NilIBioSequenceBatch nil instance for IBioSequenceBatch // NilIBioSequence nil instance for IBioSequenceBatch
// //
// NilIBioSequenceBatch is the nil instance for the // NilIBioSequence is the nil instance for the
// IBioSequenceBatch type. // IBioSequenceBatch type.
var NilIBioSequenceBatch = IBioSequenceBatch{pointer: nil} var NilIBioSequence = IBioSequence{pointer: nil}
func MakeIBioSequenceBatch(sizes ...int) IBioSequenceBatch { func MakeIBioSequence(sizes ...int) IBioSequence {
buffsize := int32(0) buffsize := int32(0)
if len(sizes) > 0 { if len(sizes) > 0 {
buffsize = int32(sizes[0]) buffsize = int32(sizes[0])
} }
i := _IBioSequenceBatch{ i := _IBioSequence{
channel: make(chan BioSequenceBatch, buffsize), channel: make(chan BioSequenceBatch, buffsize),
current: NilBioSequenceBatch, current: NilBioSequenceBatch,
pushBack: abool.New(), pushBack: abool.New(),
@ -58,11 +58,11 @@ func MakeIBioSequenceBatch(sizes ...int) IBioSequenceBatch {
i.all_done = &waiting i.all_done = &waiting
lock := sync.RWMutex{} lock := sync.RWMutex{}
i.lock = &lock i.lock = &lock
ii := IBioSequenceBatch{&i} ii := IBioSequence{&i}
return ii return ii
} }
func (iterator IBioSequenceBatch) Add(n int) { func (iterator IBioSequence) Add(n int) {
if iterator.pointer == nil { if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.Add method on NilIBioSequenceBatch") log.Panic("call of IBioSequenceBatch.Add method on NilIBioSequenceBatch")
} }
@ -70,7 +70,7 @@ func (iterator IBioSequenceBatch) Add(n int) {
iterator.pointer.all_done.Add(n) iterator.pointer.all_done.Add(n)
} }
func (iterator IBioSequenceBatch) Done() { func (iterator IBioSequence) Done() {
if iterator.pointer == nil { if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.Done method on NilIBioSequenceBatch") log.Panic("call of IBioSequenceBatch.Done method on NilIBioSequenceBatch")
} }
@ -78,7 +78,7 @@ func (iterator IBioSequenceBatch) Done() {
iterator.pointer.all_done.Done() iterator.pointer.all_done.Done()
} }
func (iterator IBioSequenceBatch) Unlock() { func (iterator IBioSequence) Unlock() {
if iterator.pointer == nil { if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.Unlock method on NilIBioSequenceBatch") log.Panic("call of IBioSequenceBatch.Unlock method on NilIBioSequenceBatch")
} }
@ -86,7 +86,7 @@ func (iterator IBioSequenceBatch) Unlock() {
iterator.pointer.lock.Unlock() iterator.pointer.lock.Unlock()
} }
func (iterator IBioSequenceBatch) Lock() { func (iterator IBioSequence) Lock() {
if iterator.pointer == nil { if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.Lock method on NilIBioSequenceBatch") log.Panic("call of IBioSequenceBatch.Lock method on NilIBioSequenceBatch")
} }
@ -94,7 +94,7 @@ func (iterator IBioSequenceBatch) Lock() {
iterator.pointer.lock.Lock() iterator.pointer.lock.Lock()
} }
func (iterator IBioSequenceBatch) RLock() { func (iterator IBioSequence) RLock() {
if iterator.pointer == nil { if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.RLock method on NilIBioSequenceBatch") log.Panic("call of IBioSequenceBatch.RLock method on NilIBioSequenceBatch")
} }
@ -102,7 +102,7 @@ func (iterator IBioSequenceBatch) RLock() {
iterator.pointer.lock.RLock() iterator.pointer.lock.RLock()
} }
func (iterator IBioSequenceBatch) RUnlock() { func (iterator IBioSequence) RUnlock() {
if iterator.pointer == nil { if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.RUnlock method on NilIBioSequenceBatch") log.Panic("call of IBioSequenceBatch.RUnlock method on NilIBioSequenceBatch")
} }
@ -110,7 +110,7 @@ func (iterator IBioSequenceBatch) RUnlock() {
iterator.pointer.lock.RUnlock() iterator.pointer.lock.RUnlock()
} }
func (iterator IBioSequenceBatch) Wait() { func (iterator IBioSequence) Wait() {
if iterator.pointer == nil { if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.Wait method on NilIBioSequenceBatch") log.Panic("call of IBioSequenceBatch.Wait method on NilIBioSequenceBatch")
} }
@ -118,7 +118,7 @@ func (iterator IBioSequenceBatch) Wait() {
iterator.pointer.all_done.Wait() iterator.pointer.all_done.Wait()
} }
func (iterator IBioSequenceBatch) Channel() chan BioSequenceBatch { func (iterator IBioSequence) Channel() chan BioSequenceBatch {
if iterator.pointer == nil { if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.Channel method on NilIBioSequenceBatch") log.Panic("call of IBioSequenceBatch.Channel method on NilIBioSequenceBatch")
} }
@ -126,7 +126,7 @@ func (iterator IBioSequenceBatch) Channel() chan BioSequenceBatch {
return iterator.pointer.channel return iterator.pointer.channel
} }
func (iterator IBioSequenceBatch) IsNil() bool { func (iterator IBioSequence) IsNil() bool {
if iterator.pointer == nil { if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.IsNil method on NilIBioSequenceBatch") log.Panic("call of IBioSequenceBatch.IsNil method on NilIBioSequenceBatch")
} }
@ -134,7 +134,7 @@ func (iterator IBioSequenceBatch) IsNil() bool {
return iterator.pointer == nil return iterator.pointer == nil
} }
func (iterator IBioSequenceBatch) BufferSize() int { func (iterator IBioSequence) BufferSize() int {
if iterator.pointer == nil { if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.BufferSize method on NilIBioSequenceBatch") log.Panic("call of IBioSequenceBatch.BufferSize method on NilIBioSequenceBatch")
} }
@ -142,7 +142,7 @@ func (iterator IBioSequenceBatch) BufferSize() int {
return int(atomic.LoadInt32(&iterator.pointer.buffer_size)) return int(atomic.LoadInt32(&iterator.pointer.buffer_size))
} }
func (iterator IBioSequenceBatch) BatchSize() int { func (iterator IBioSequence) BatchSize() int {
if iterator.pointer == nil { if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.BatchSize method on NilIBioSequenceBatch") log.Panic("call of IBioSequenceBatch.BatchSize method on NilIBioSequenceBatch")
} }
@ -150,7 +150,7 @@ func (iterator IBioSequenceBatch) BatchSize() int {
return int(atomic.LoadInt32(&iterator.pointer.batch_size)) return int(atomic.LoadInt32(&iterator.pointer.batch_size))
} }
func (iterator IBioSequenceBatch) SetBatchSize(size int) error { func (iterator IBioSequence) SetBatchSize(size int) error {
if size >= 0 { if size >= 0 {
atomic.StoreInt32(&iterator.pointer.batch_size, int32(size)) atomic.StoreInt32(&iterator.pointer.batch_size, int32(size))
return nil return nil
@ -159,10 +159,10 @@ func (iterator IBioSequenceBatch) SetBatchSize(size int) error {
return fmt.Errorf("size (%d) cannot be negative", size) return fmt.Errorf("size (%d) cannot be negative", size)
} }
func (iterator IBioSequenceBatch) Split() IBioSequenceBatch { func (iterator IBioSequence) Split() IBioSequence {
iterator.pointer.lock.RLock() iterator.pointer.lock.RLock()
defer iterator.pointer.lock.RUnlock() defer iterator.pointer.lock.RUnlock()
i := _IBioSequenceBatch{ i := _IBioSequence{
channel: iterator.pointer.channel, channel: iterator.pointer.channel,
current: NilBioSequenceBatch, current: NilBioSequenceBatch,
pushBack: abool.New(), pushBack: abool.New(),
@ -174,11 +174,11 @@ func (iterator IBioSequenceBatch) Split() IBioSequenceBatch {
lock := sync.RWMutex{} lock := sync.RWMutex{}
i.lock = &lock i.lock = &lock
newIter := IBioSequenceBatch{&i} newIter := IBioSequence{&i}
return newIter return newIter
} }
func (iterator IBioSequenceBatch) Next() bool { func (iterator IBioSequence) Next() bool {
if iterator.pointer.pushBack.IsSet() { if iterator.pointer.pushBack.IsSet() {
iterator.pointer.pushBack.UnSet() iterator.pointer.pushBack.UnSet()
return true return true
@ -200,7 +200,7 @@ func (iterator IBioSequenceBatch) Next() bool {
return false return false
} }
func (iterator IBioSequenceBatch) PushBack() { func (iterator IBioSequence) PushBack() {
if !iterator.pointer.current.IsNil() { if !iterator.pointer.current.IsNil() {
iterator.pointer.pushBack.Set() iterator.pointer.pushBack.Set()
} }
@ -210,11 +210,11 @@ func (iterator IBioSequenceBatch) PushBack() {
// currently pointed by the iterator. You have to use the // currently pointed by the iterator. You have to use the
// 'Next' method to move to the next entry before calling // 'Next' method to move to the next entry before calling
// 'Get' to retreive the following instance. // 'Get' to retreive the following instance.
func (iterator IBioSequenceBatch) Get() BioSequenceBatch { func (iterator IBioSequence) Get() BioSequenceBatch {
return iterator.pointer.current return iterator.pointer.current
} }
func (iterator IBioSequenceBatch) Push(batch BioSequenceBatch) { func (iterator IBioSequence) Push(batch BioSequenceBatch) {
if batch.IsNil() { if batch.IsNil() {
log.Panicln("A Nil batch is pushed on the channel") log.Panicln("A Nil batch is pushed on the channel")
} }
@ -225,11 +225,11 @@ func (iterator IBioSequenceBatch) Push(batch BioSequenceBatch) {
iterator.pointer.channel <- batch iterator.pointer.channel <- batch
} }
func (iterator IBioSequenceBatch) Close() { func (iterator IBioSequence) Close() {
close(iterator.pointer.channel) close(iterator.pointer.channel)
} }
func (iterator IBioSequenceBatch) WaitAndClose() { func (iterator IBioSequence) WaitAndClose() {
iterator.Wait() iterator.Wait()
for len(iterator.Channel()) > 0 { for len(iterator.Channel()) > 0 {
@ -240,18 +240,18 @@ func (iterator IBioSequenceBatch) WaitAndClose() {
// Finished returns 'true' value if no more data is available // Finished returns 'true' value if no more data is available
// from the iterator. // from the iterator.
func (iterator IBioSequenceBatch) Finished() bool { func (iterator IBioSequence) Finished() bool {
return iterator.pointer.finished.IsSet() return iterator.pointer.finished.IsSet()
} }
func (iterator IBioSequenceBatch) SortBatches(sizes ...int) IBioSequenceBatch { func (iterator IBioSequence) SortBatches(sizes ...int) IBioSequence {
buffsize := iterator.BufferSize() buffsize := iterator.BufferSize()
if len(sizes) > 0 { if len(sizes) > 0 {
buffsize = sizes[0] buffsize = sizes[0]
} }
newIter := MakeIBioSequenceBatch(buffsize) newIter := MakeIBioSequence(buffsize)
newIter.Add(1) newIter.Add(1)
@ -288,14 +288,14 @@ func (iterator IBioSequenceBatch) SortBatches(sizes ...int) IBioSequenceBatch {
} }
func (iterator IBioSequenceBatch) Concat(iterators ...IBioSequenceBatch) IBioSequenceBatch { func (iterator IBioSequence) Concat(iterators ...IBioSequence) IBioSequence {
if len(iterators) == 0 { if len(iterators) == 0 {
return iterator return iterator
} }
buffsize := iterator.BufferSize() buffsize := iterator.BufferSize()
newIter := MakeIBioSequenceBatch(buffsize) newIter := MakeIBioSequence(buffsize)
newIter.Add(1) newIter.Add(1)
@ -333,7 +333,7 @@ func (iterator IBioSequenceBatch) Concat(iterators ...IBioSequenceBatch) IBioSeq
return newIter return newIter
} }
func (iterator IBioSequenceBatch) Pool(iterators ...IBioSequenceBatch) IBioSequenceBatch { func (iterator IBioSequence) Pool(iterators ...IBioSequence) IBioSequence {
niterator := len(iterators) + 1 niterator := len(iterators) + 1
@ -343,7 +343,7 @@ func (iterator IBioSequenceBatch) Pool(iterators ...IBioSequenceBatch) IBioSeque
nextCounter := goutils.AtomicCounter() nextCounter := goutils.AtomicCounter()
buffsize := iterator.BufferSize() buffsize := iterator.BufferSize()
newIter := MakeIBioSequenceBatch(buffsize) newIter := MakeIBioSequence(buffsize)
newIter.Add(niterator) newIter.Add(niterator)
@ -351,7 +351,7 @@ func (iterator IBioSequenceBatch) Pool(iterators ...IBioSequenceBatch) IBioSeque
newIter.WaitAndClose() newIter.WaitAndClose()
}() }()
ff := func(iterator IBioSequenceBatch) { ff := func(iterator IBioSequence) {
for iterator.Next() { for iterator.Next() {
s := iterator.Get() s := iterator.Get()
@ -372,14 +372,14 @@ func (iterator IBioSequenceBatch) Pool(iterators ...IBioSequenceBatch) IBioSeque
// IBioSequenceBatch with every batches having the same size // IBioSequenceBatch with every batches having the same size
// indicated in parameter. Rebatching implies to sort the // indicated in parameter. Rebatching implies to sort the
// source IBioSequenceBatch. // source IBioSequenceBatch.
func (iterator IBioSequenceBatch) Rebatch(size int, sizes ...int) IBioSequenceBatch { func (iterator IBioSequence) Rebatch(size int, sizes ...int) IBioSequence {
buffsize := iterator.BufferSize() buffsize := iterator.BufferSize()
if len(sizes) > 0 { if len(sizes) > 0 {
buffsize = sizes[0] buffsize = sizes[0]
} }
newIter := MakeIBioSequenceBatch(buffsize) newIter := MakeIBioSequence(buffsize)
newIter.Add(1) newIter.Add(1)
@ -418,7 +418,7 @@ func (iterator IBioSequenceBatch) Rebatch(size int, sizes ...int) IBioSequenceBa
return newIter return newIter
} }
func (iterator IBioSequenceBatch) Recycle() { func (iterator IBioSequence) Recycle() {
log.Debugln("Start recycling of Bioseq objects") log.Debugln("Start recycling of Bioseq objects")
recycled := 0 recycled := 0
@ -434,14 +434,14 @@ func (iterator IBioSequenceBatch) Recycle() {
log.Debugf("End of the recycling of %d Bioseq objects", recycled) log.Debugf("End of the recycling of %d Bioseq objects", recycled)
} }
func (iterator IBioSequenceBatch) Consume() { func (iterator IBioSequence) Consume() {
for iterator.Next() { for iterator.Next() {
batch := iterator.Get() batch := iterator.Get()
batch.Recycle() batch.Recycle()
} }
} }
func (iterator IBioSequenceBatch) Count(recycle bool) (int, int, int) { func (iterator IBioSequence) Count(recycle bool) (int, int, int) {
variants := 0 variants := 0
reads := 0 reads := 0
nucleotides := 0 nucleotides := 0
@ -465,7 +465,7 @@ func (iterator IBioSequenceBatch) Count(recycle bool) (int, int, int) {
return variants, reads, nucleotides return variants, reads, nucleotides
} }
func (iterator IBioSequenceBatch) PairWith(reverse IBioSequenceBatch, func (iterator IBioSequence) PairWith(reverse IBioSequence,
sizes ...int) IPairedBioSequenceBatch { sizes ...int) IPairedBioSequenceBatch {
buffsize := iterator.BufferSize() buffsize := iterator.BufferSize()
batchsize := 5000 batchsize := 5000
@ -510,16 +510,16 @@ func (iterator IBioSequenceBatch) PairWith(reverse IBioSequenceBatch,
// A function that takes a predicate and returns two IBioSequenceBatch iterators. // A function that takes a predicate and returns two IBioSequenceBatch iterators.
// Sequences extracted from the input iterator are distributed among both the // Sequences extracted from the input iterator are distributed among both the
// iterator following the predicate value. // iterator following the predicate value.
func (iterator IBioSequenceBatch) DivideOn(predicate obiseq.SequencePredicate, func (iterator IBioSequence) DivideOn(predicate obiseq.SequencePredicate,
size int, sizes ...int) (IBioSequenceBatch, IBioSequenceBatch) { size int, sizes ...int) (IBioSequence, IBioSequence) {
buffsize := iterator.BufferSize() buffsize := iterator.BufferSize()
if len(sizes) > 0 { if len(sizes) > 0 {
buffsize = sizes[0] buffsize = sizes[0]
} }
trueIter := MakeIBioSequenceBatch(buffsize) trueIter := MakeIBioSequence(buffsize)
falseIter := MakeIBioSequenceBatch(buffsize) falseIter := MakeIBioSequence(buffsize)
trueIter.Add(1) trueIter.Add(1)
falseIter.Add(1) falseIter.Add(1)
@ -578,8 +578,8 @@ func (iterator IBioSequenceBatch) DivideOn(predicate obiseq.SequencePredicate,
// Filtering a batch of sequences. // Filtering a batch of sequences.
// A function that takes a predicate and a batch of sequences and returns a filtered batch of sequences. // A function that takes a predicate and a batch of sequences and returns a filtered batch of sequences.
func (iterator IBioSequenceBatch) FilterOn(predicate obiseq.SequencePredicate, func (iterator IBioSequence) FilterOn(predicate obiseq.SequencePredicate,
size int, sizes ...int) IBioSequenceBatch { size int, sizes ...int) IBioSequence {
buffsize := iterator.BufferSize() buffsize := iterator.BufferSize()
nworkers := 4 nworkers := 4
@ -591,7 +591,7 @@ func (iterator IBioSequenceBatch) FilterOn(predicate obiseq.SequencePredicate,
buffsize = sizes[1] buffsize = sizes[1]
} }
trueIter := MakeIBioSequenceBatch(buffsize) trueIter := MakeIBioSequence(buffsize)
trueIter.Add(nworkers) trueIter.Add(nworkers)
@ -599,7 +599,7 @@ func (iterator IBioSequenceBatch) FilterOn(predicate obiseq.SequencePredicate,
trueIter.WaitAndClose() trueIter.WaitAndClose()
}() }()
ff := func(iterator IBioSequenceBatch) { ff := func(iterator IBioSequence) {
// iterator = iterator.SortBatches() // iterator = iterator.SortBatches()
for iterator.Next() { for iterator.Next() {
@ -633,7 +633,7 @@ func (iterator IBioSequenceBatch) FilterOn(predicate obiseq.SequencePredicate,
// Load all sequences availables from an IBioSequenceBatch iterator into // Load all sequences availables from an IBioSequenceBatch iterator into
// a large obiseq.BioSequenceSlice. // a large obiseq.BioSequenceSlice.
func (iterator IBioSequenceBatch) Load() obiseq.BioSequenceSlice { func (iterator IBioSequence) Load() obiseq.BioSequenceSlice {
chunck := obiseq.MakeBioSequenceSlice() chunck := obiseq.MakeBioSequenceSlice()
for iterator.Next() { for iterator.Next() {
@ -648,7 +648,7 @@ func (iterator IBioSequenceBatch) Load() obiseq.BioSequenceSlice {
// It takes a slice of BioSequence objects, and returns an iterator that will return batches of // It takes a slice of BioSequence objects, and returns an iterator that will return batches of
// BioSequence objects // BioSequence objects
func IBatchOver(data obiseq.BioSequenceSlice, func IBatchOver(data obiseq.BioSequenceSlice,
size int, sizes ...int) IBioSequenceBatch { size int, sizes ...int) IBioSequence {
buffsize := 0 buffsize := 0
@ -656,7 +656,7 @@ func IBatchOver(data obiseq.BioSequenceSlice,
buffsize = sizes[1] buffsize = sizes[1]
} }
newIter := MakeIBioSequenceBatch(buffsize) newIter := MakeIBioSequence(buffsize)
newIter.Add(1) newIter.Add(1)

View File

@ -8,19 +8,19 @@ import (
) )
type IDistribute struct { type IDistribute struct {
outputs map[int]IBioSequenceBatch outputs map[int]IBioSequence
news chan int news chan int
classifier *obiseq.BioSequenceClassifier classifier *obiseq.BioSequenceClassifier
lock *sync.Mutex lock *sync.Mutex
} }
func (dist *IDistribute) Outputs(key int) (IBioSequenceBatch, error) { func (dist *IDistribute) Outputs(key int) (IBioSequence, error) {
dist.lock.Lock() dist.lock.Lock()
iter, ok := dist.outputs[key] iter, ok := dist.outputs[key]
dist.lock.Unlock() dist.lock.Unlock()
if !ok { if !ok {
return NilIBioSequenceBatch, fmt.Errorf("code %d unknown", key) return NilIBioSequence, fmt.Errorf("code %d unknown", key)
} }
return iter, nil return iter, nil
@ -34,11 +34,11 @@ func (dist *IDistribute) Classifier() *obiseq.BioSequenceClassifier {
return dist.classifier return dist.classifier
} }
func (iterator IBioSequenceBatch) Distribute(class *obiseq.BioSequenceClassifier, sizes ...int) IDistribute { func (iterator IBioSequence) Distribute(class *obiseq.BioSequenceClassifier, sizes ...int) IDistribute {
batchsize := 5000 batchsize := 5000
buffsize := 2 buffsize := 2
outputs := make(map[int]IBioSequenceBatch, 100) outputs := make(map[int]IBioSequence, 100)
slices := make(map[int]*obiseq.BioSequenceSlice, 100) slices := make(map[int]*obiseq.BioSequenceSlice, 100)
orders := make(map[int]int, 100) orders := make(map[int]int, 100)
news := make(chan int) news := make(chan int)
@ -80,7 +80,7 @@ func (iterator IBioSequenceBatch) Distribute(class *obiseq.BioSequenceClassifier
orders[key] = 0 orders[key] = 0
lock.Lock() lock.Lock()
outputs[key] = MakeIBioSequenceBatch(buffsize) outputs[key] = MakeIBioSequence(buffsize)
lock.Unlock() lock.Unlock()
news <- key news <- key

View File

@ -2,7 +2,7 @@ package obiiter
import "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" import "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
func (iterator IBioSequenceBatch) IMergeSequenceBatch(na string, statsOn []string, sizes ...int) IBioSequenceBatch { func (iterator IBioSequence) IMergeSequenceBatch(na string, statsOn []string, sizes ...int) IBioSequence {
batchsize := 100 batchsize := 100
buffsize := iterator.BufferSize() buffsize := iterator.BufferSize()
@ -13,7 +13,7 @@ func (iterator IBioSequenceBatch) IMergeSequenceBatch(na string, statsOn []strin
buffsize = sizes[1] buffsize = sizes[1]
} }
newIter := MakeIBioSequenceBatch(buffsize) newIter := MakeIBioSequence(buffsize)
newIter.Add(1) newIter.Add(1)
@ -41,7 +41,7 @@ func (iterator IBioSequenceBatch) IMergeSequenceBatch(na string, statsOn []strin
} }
func MergePipe(na string, statsOn []string, sizes ...int) Pipeable { func MergePipe(na string, statsOn []string, sizes ...int) Pipeable {
f := func(iterator IBioSequenceBatch) IBioSequenceBatch { f := func(iterator IBioSequence) IBioSequence {
return iterator.IMergeSequenceBatch(na, statsOn, sizes...) return iterator.IMergeSequenceBatch(na, statsOn, sizes...)
} }

View File

@ -1,12 +1,11 @@
package obiiter package obiiter
type Pipeable func(input IBioSequence) IBioSequence
type Pipeable func(input IBioSequenceBatch) IBioSequenceBatch func Pipeline(start Pipeable, parts ...Pipeable) Pipeable {
p := func(input IBioSequence) IBioSequence {
func Pipeline(start Pipeable,parts ...Pipeable) Pipeable {
p := func (input IBioSequenceBatch) IBioSequenceBatch {
data := start(input) data := start(input)
for _,part := range parts { for _, part := range parts {
data = part(data) data = part(data)
} }
return data return data
@ -15,17 +14,16 @@ func Pipeline(start Pipeable,parts ...Pipeable) Pipeable {
return p return p
} }
func (input IBioSequenceBatch) Pipe(start Pipeable, parts ...Pipeable) IBioSequenceBatch { func (input IBioSequence) Pipe(start Pipeable, parts ...Pipeable) IBioSequence {
p := Pipeline(start,parts...) p := Pipeline(start, parts...)
return p(input) return p(input)
} }
type Teeable func(input IBioSequence) (IBioSequence, IBioSequence)
type Teeable func(input IBioSequenceBatch) (IBioSequenceBatch,IBioSequenceBatch) func (input IBioSequence) CopyTee() (IBioSequence, IBioSequence) {
first := MakeIBioSequence()
func (input IBioSequenceBatch) CopyTee() (IBioSequenceBatch,IBioSequenceBatch) { second := MakeIBioSequence()
first := MakeIBioSequenceBatch()
second:= MakeIBioSequenceBatch()
first.Add(1) first.Add(1)
@ -36,11 +34,11 @@ func (input IBioSequenceBatch) CopyTee() (IBioSequenceBatch,IBioSequenceBatch) {
go func() { go func() {
for input.Next() { for input.Next() {
b:=input.Get() b := input.Get()
first.Push(b) first.Push(b)
second.Push(b) second.Push(b)
} }
}() }()
return first,second return first, second
} }

View File

@ -6,8 +6,8 @@ import (
"github.com/schollz/progressbar/v3" "github.com/schollz/progressbar/v3"
) )
func (iterator IBioSequenceBatch) Speed(message ...string) IBioSequenceBatch { func (iterator IBioSequence) Speed(message ...string) IBioSequence {
newIter := MakeIBioSequenceBatch() newIter := MakeIBioSequence()
newIter.Add(1) newIter.Add(1)
@ -51,7 +51,7 @@ func (iterator IBioSequenceBatch) Speed(message ...string) IBioSequenceBatch {
} }
func SpeedPipe(message ...string) Pipeable { func SpeedPipe(message ...string) Pipeable {
f := func(iterator IBioSequenceBatch) IBioSequenceBatch { f := func(iterator IBioSequence) IBioSequence {
return iterator.Speed(message...) return iterator.Speed(message...)
} }

View File

@ -27,7 +27,7 @@ func AnnotatorToSeqWorker(function SeqAnnotator) SeqWorker {
// Moreover the SeqWorker function, the method accepted two optional integer parameters. // Moreover the SeqWorker function, the method accepted two optional integer parameters.
// - First is allowing to indicates the number of workers running in parallele (default 4) // - First is allowing to indicates the number of workers running in parallele (default 4)
// - The second the size of the chanel buffer. By default set to the same value than the input buffer. // - The second the size of the chanel buffer. By default set to the same value than the input buffer.
func (iterator IBioSequenceBatch) MakeIWorker(worker SeqWorker, sizes ...int) IBioSequenceBatch { func (iterator IBioSequence) MakeIWorker(worker SeqWorker, sizes ...int) IBioSequence {
nworkers := 4 nworkers := 4
buffsize := iterator.BufferSize() buffsize := iterator.BufferSize()
@ -39,7 +39,7 @@ func (iterator IBioSequenceBatch) MakeIWorker(worker SeqWorker, sizes ...int) IB
buffsize = sizes[1] buffsize = sizes[1]
} }
newIter := MakeIBioSequenceBatch(buffsize) newIter := MakeIBioSequence(buffsize)
newIter.Add(nworkers) newIter.Add(nworkers)
@ -49,7 +49,7 @@ func (iterator IBioSequenceBatch) MakeIWorker(worker SeqWorker, sizes ...int) IB
}() }()
f := func(iterator IBioSequenceBatch) { f := func(iterator IBioSequence) {
for iterator.Next() { for iterator.Next() {
batch := iterator.Get() batch := iterator.Get()
for i, seq := range batch.slice { for i, seq := range batch.slice {
@ -69,8 +69,8 @@ func (iterator IBioSequenceBatch) MakeIWorker(worker SeqWorker, sizes ...int) IB
return newIter return newIter
} }
func (iterator IBioSequenceBatch) MakeIConditionalWorker(predicate obiseq.SequencePredicate, func (iterator IBioSequence) MakeIConditionalWorker(predicate obiseq.SequencePredicate,
worker SeqWorker, sizes ...int) IBioSequenceBatch { worker SeqWorker, sizes ...int) IBioSequence {
nworkers := 4 nworkers := 4
buffsize := iterator.BufferSize() buffsize := iterator.BufferSize()
@ -82,7 +82,7 @@ func (iterator IBioSequenceBatch) MakeIConditionalWorker(predicate obiseq.Sequen
buffsize = sizes[1] buffsize = sizes[1]
} }
newIter := MakeIBioSequenceBatch(buffsize) newIter := MakeIBioSequence(buffsize)
newIter.Add(nworkers) newIter.Add(nworkers)
@ -92,7 +92,7 @@ func (iterator IBioSequenceBatch) MakeIConditionalWorker(predicate obiseq.Sequen
}() }()
f := func(iterator IBioSequenceBatch) { f := func(iterator IBioSequence) {
for iterator.Next() { for iterator.Next() {
batch := iterator.Get() batch := iterator.Get()
for i, seq := range batch.slice { for i, seq := range batch.slice {
@ -114,7 +114,7 @@ func (iterator IBioSequenceBatch) MakeIConditionalWorker(predicate obiseq.Sequen
return newIter return newIter
} }
func (iterator IBioSequenceBatch) MakeISliceWorker(worker SeqSliceWorker, sizes ...int) IBioSequenceBatch { func (iterator IBioSequence) MakeISliceWorker(worker SeqSliceWorker, sizes ...int) IBioSequence {
nworkers := 4 nworkers := 4
buffsize := iterator.BufferSize() buffsize := iterator.BufferSize()
@ -126,7 +126,7 @@ func (iterator IBioSequenceBatch) MakeISliceWorker(worker SeqSliceWorker, sizes
buffsize = sizes[1] buffsize = sizes[1]
} }
newIter := MakeIBioSequenceBatch(buffsize) newIter := MakeIBioSequence(buffsize)
newIter.Add(nworkers) newIter.Add(nworkers)
@ -135,7 +135,7 @@ func (iterator IBioSequenceBatch) MakeISliceWorker(worker SeqSliceWorker, sizes
log.Println("End of the batch slice workers") log.Println("End of the batch slice workers")
}() }()
f := func(iterator IBioSequenceBatch) { f := func(iterator IBioSequence) {
for iterator.Next() { for iterator.Next() {
batch := iterator.Get() batch := iterator.Get()
batch.slice = worker(batch.slice) batch.slice = worker(batch.slice)
@ -154,7 +154,7 @@ func (iterator IBioSequenceBatch) MakeISliceWorker(worker SeqSliceWorker, sizes
} }
func WorkerPipe(worker SeqWorker, sizes ...int) Pipeable { func WorkerPipe(worker SeqWorker, sizes ...int) Pipeable {
f := func(iterator IBioSequenceBatch) IBioSequenceBatch { f := func(iterator IBioSequence) IBioSequence {
return iterator.MakeIWorker(worker, sizes...) return iterator.MakeIWorker(worker, sizes...)
} }
@ -162,7 +162,7 @@ func WorkerPipe(worker SeqWorker, sizes ...int) Pipeable {
} }
func SliceWorkerPipe(worker SeqSliceWorker, sizes ...int) Pipeable { func SliceWorkerPipe(worker SeqSliceWorker, sizes ...int) Pipeable {
f := func(iterator IBioSequenceBatch) IBioSequenceBatch { f := func(iterator IBioSequence) IBioSequence {
return iterator.MakeISliceWorker(worker, sizes...) return iterator.MakeISliceWorker(worker, sizes...)
} }

193
pkg/obiseq/attributes.go Normal file
View File

@ -0,0 +1,193 @@
package obiseq
import (
"fmt"
"strconv"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
log "github.com/sirupsen/logrus"
)
// A method that returns the value of the key in the annotation map.
func (s *BioSequence) GetAttribute(key string) (interface{}, bool) {
var val interface{}
ok := s.annotations != nil
if ok {
val, ok = s.annotations[key]
}
return val, ok
}
// A method that sets the value of the key in the annotation map.
func (s *BioSequence) SetAttribute(key string, value interface{}) {
annot := s.Annotations()
annot[key] = value
}
// A method that returns the value of the key in the annotation map.
func (s *BioSequence) GetIntAttribute(key string) (int, bool) {
var val int
var err error
v, ok := s.GetAttribute(key)
if ok {
val, err = goutils.InterfaceToInt(v)
ok = err == nil
}
return val, ok
}
// Deleting the key from the annotation map.
func (s *BioSequence) DeleteAttribute(key string) {
delete(s.Annotations(),key)
}
// Renaming the key in the annotation map.
func (s *BioSequence) RenameAttribute(newName, oldName string) {
val,ok := s.GetAttribute(oldName)
if (ok) {
s.SetAttribute(newName,val)
s.DeleteAttribute(oldName)
}
}
// A method that returns the value of the key in the annotation map.
func (s *BioSequence) GetNumericAttribute(key string) (float64, bool) {
var val float64
var err error
v, ok := s.GetAttribute(key)
if ok {
val, err = goutils.InterfaceToFloat64(v)
ok = err == nil
}
return val, ok
}
// A method that returns the value of the key in the annotation map.
func (s *BioSequence) GetStringAttribute(key string) (string, bool) {
var val string
v, ok := s.GetAttribute(key)
if ok {
val = fmt.Sprint(v)
}
return val, ok
}
// A method that returns the value of the key in the annotation map.
func (s *BioSequence) GetBoolAttribute(key string) (bool, bool) {
var val bool
var err error
v, ok := s.GetAttribute(key)
if ok {
val, err = goutils.InterfaceToBool(v)
ok = err == nil
}
return val, ok
}
func (s *BioSequence) GetIntMap(key string) (map[string]int, bool) {
var val map[string]int
var err error
v, ok := s.GetAttribute(key)
if ok {
val, err = goutils.InterfaceToIntMap(v)
ok = err == nil
}
return val, ok
}
// Returning the number of times the sequence has been observed.
func (s *BioSequence) Count() int {
count, ok := s.GetIntAttribute("count")
if !ok {
count = 1
}
return count
}
// Setting the number of times the sequence has been observed.
func (s *BioSequence) SetCount(count int) {
annot := s.Annotations()
annot["count"] = count
}
// Returning the taxid of the sequence.
func (s *BioSequence) Taxid() int {
taxid, ok := s.GetIntAttribute("taxid")
if !ok {
taxid = 1
}
return taxid
}
// Setting the taxid of the sequence.
func (s *BioSequence) SetTaxid(taxid int) {
annot := s.Annotations()
annot["taxid"] = taxid
}
func (s *BioSequence) OBITagRefIndex() map[int]string {
var val map[int]string
i, ok := s.GetAttribute("obitag_ref_index")
if !ok {
return nil
}
switch i := i.(type) {
case map[int]string:
val = i
case map[string]interface{}:
val = make(map[int]string, len(i))
for k, v := range i {
score, err := strconv.Atoi(k)
if err != nil {
log.Panicln(err)
}
val[score], err = goutils.InterfaceToString(v)
if err != nil {
log.Panicln(err)
}
}
case map[string]string:
val = make(map[int]string, len(i))
for k, v := range i {
score, err := strconv.Atoi(k)
if err != nil {
log.Panicln(err)
}
val[score] = v
}
default:
log.Panicln("value of attribute obitag_ref_index cannot be casted to a map[int]string")
}
return val
}

View File

@ -13,20 +13,17 @@ package obiseq
import ( import (
"bytes" "bytes"
"crypto/md5" "crypto/md5"
"fmt"
"strconv"
"sync/atomic" "sync/atomic"
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
) )
var _NewSeq = int32(0) var _NewSeq = int32(0)
var _RecycleSeq = int32(0) var _RecycleSeq = int32(0)
var _InMemSeq = int32(0) var _InMemSeq = int32(0)
var _MaxInMemSeq = int32(0)
var _BioLogRate = int(100000) // var _MaxInMemSeq = int32(0)
// var _BioLogRate = int(100000)
func LogBioSeqStatus() { func LogBioSeqStatus() {
log.Debugf("Created seq : %d Destroyed : %d In Memory : %d", _NewSeq, _RecycleSeq, _InMemSeq) log.Debugf("Created seq : %d Destroyed : %d In Memory : %d", _NewSeq, _RecycleSeq, _InMemSeq)
@ -200,154 +197,11 @@ func (s *BioSequence) Annotations() Annotation {
return s.annotations return s.annotations
} }
// A method that returns the value of the key in the annotation map.
func (s *BioSequence) GetAttribute(key string) (interface{}, bool) {
var val interface{}
ok := s.annotations != nil
if ok {
val, ok = s.annotations[key]
}
return val, ok
}
func (s *BioSequence) SetAttribute(key string, value interface{}) {
annot := s.Annotations()
annot[key] = value
}
// A method that returns the value of the key in the annotation map.
func (s *BioSequence) GetIntAttribute(key string) (int, bool) {
var val int
var err error
v, ok := s.GetAttribute(key)
if ok {
val, err = goutils.InterfaceToInt(v)
ok = err == nil
}
return val, ok
}
// A method that returns the value of the key in the annotation map.
func (s *BioSequence) GetStringAttribute(key string) (string, bool) {
var val string
v, ok := s.GetAttribute(key)
if ok {
val = fmt.Sprint(v)
}
return val, ok
}
// A method that returns the value of the key in the annotation map.
func (s *BioSequence) GetBool(key string) (bool, bool) {
var val bool
var err error
v, ok := s.GetAttribute(key)
if ok {
val, err = goutils.InterfaceToBool(v)
ok = err == nil
}
return val, ok
}
func (s *BioSequence) GetIntMap(key string) (map[string]int, bool) {
var val map[string]int
var err error
v, ok := s.GetAttribute(key)
if ok {
val, err = goutils.InterfaceToIntMap(v)
ok = err == nil
}
return val, ok
}
// Returning the MD5 hash of the sequence. // Returning the MD5 hash of the sequence.
func (s *BioSequence) MD5() [16]byte { func (s *BioSequence) MD5() [16]byte {
return md5.Sum(s.sequence) return md5.Sum(s.sequence)
} }
// Returning the number of times the sequence has been observed.
func (s *BioSequence) Count() int {
count, ok := s.GetIntAttribute("count")
if !ok {
count = 1
}
return count
}
// Returning the taxid of the sequence.
func (s *BioSequence) Taxid() int {
taxid, ok := s.GetIntAttribute("taxid")
if !ok {
taxid = 1
}
return taxid
}
func (s *BioSequence) OBITagRefIndex() map[int]string {
var val map[int]string
i, ok := s.GetAttribute("obitag_ref_index")
if !ok {
return nil
}
switch i := i.(type) {
case map[int]string:
val = i
case map[string]interface{}:
val = make(map[int]string, len(i))
for k, v := range i {
score, err := strconv.Atoi(k)
if err != nil {
log.Panicln(err)
}
val[score], err = goutils.InterfaceToString(v)
if err != nil {
log.Panicln(err)
}
}
case map[string]string:
val = make(map[int]string, len(i))
for k, v := range i {
score, err := strconv.Atoi(k)
if err != nil {
log.Panicln(err)
}
val[score] = v
}
default:
log.Panicln("value of attribute obitag_ref_index cannot be casted to a map[int]string")
}
return val
}
func (s *BioSequence) SetTaxid(taxid int) {
annot := s.Annotations()
annot["taxid"] = taxid
}
// Setting the id of the BioSequence. // Setting the id of the BioSequence.
func (s *BioSequence) SetId(id string) { func (s *BioSequence) SetId(id string) {
s.id = id s.id = id

View File

@ -0,0 +1,2 @@
package obiannotate

View File

@ -19,7 +19,7 @@ type seqPCR struct {
SonCount int SonCount int
AddedSons int AddedSons int
Edges []Edge Edges []Edge
Cluster map[int]bool // used as the set of head sequences associated to that sequence Cluster map[int]bool // used as the set of head sequences associated to that sequence
} }
// buildSamples sorts the sequences by samples // buildSamples sorts the sequences by samples
@ -58,7 +58,7 @@ func buildSamples(dataset obiseq.BioSequenceSlice,
func annotateOBIClean(dataset obiseq.BioSequenceSlice, func annotateOBIClean(dataset obiseq.BioSequenceSlice,
sample map[string]*([]*seqPCR), sample map[string]*([]*seqPCR),
tag, NAValue string) obiiter.IBioSequenceBatch { tag, NAValue string) obiiter.IBioSequence {
batchsize := 1000 batchsize := 1000
var annot = func(data obiseq.BioSequenceSlice) obiseq.BioSequenceSlice { var annot = func(data obiseq.BioSequenceSlice) obiseq.BioSequenceSlice {
@ -207,7 +207,6 @@ func GetCluster(sequence *obiseq.BioSequence) map[string]string {
return cluster return cluster
} }
// func Cluster(sample map[string]*([]*seqPCR)) { // func Cluster(sample map[string]*([]*seqPCR)) {
// for _, graph := range sample { // for _, graph := range sample {
// for _, s := range *graph { // for _, s := range *graph {
@ -215,7 +214,7 @@ func GetCluster(sequence *obiseq.BioSequence) map[string]string {
// if len(s.Edges) > 0 { // if len(s.Edges) > 0 {
// for _, f := range s.Edges { // for _, f := range s.Edges {
// } // }
// } else { // } else {
// cluster // cluster
// } // }
@ -286,7 +285,7 @@ func Weight(sequence *obiseq.BioSequence) map[string]int {
return weight return weight
} }
func IOBIClean(itertator obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch { func IOBIClean(itertator obiiter.IBioSequence) obiiter.IBioSequence {
db := itertator.Load() db := itertator.Load()
@ -318,7 +317,6 @@ func IOBIClean(itertator obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch {
} }
} }
Mutation(samples) Mutation(samples)
pbopt := make([]progressbar.Option, 0, 5) pbopt := make([]progressbar.Option, 0, 5)
@ -352,7 +350,6 @@ func IOBIClean(itertator obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch {
EmpiricalDistCsv(RatioTableFilename(), all_ratio) EmpiricalDistCsv(RatioTableFilename(), all_ratio)
} }
iter := annotateOBIClean(db, samples, SampleAttribute(), "NA") iter := annotateOBIClean(db, samples, SampleAttribute(), "NA")
if OnlyHead() { if OnlyHead() {

View File

@ -67,9 +67,9 @@ func _ExpandListOfFiles(check_ext bool, filenames ...string) ([]string, error) {
return list_of_files, nil return list_of_files, nil
} }
func ReadBioSequences(filenames ...string) (obiiter.IBioSequenceBatch, error) { func ReadBioSequences(filenames ...string) (obiiter.IBioSequence, error) {
var iterator obiiter.IBioSequenceBatch var iterator obiiter.IBioSequence
var reader func(string, ...obiformats.WithOption) (obiiter.IBioSequenceBatch, error) var reader func(string, ...obiformats.WithOption) (obiiter.IBioSequence, error)
opts := make([]obiformats.WithOption, 0, 10) opts := make([]obiformats.WithOption, 0, 10)
@ -109,7 +109,7 @@ func ReadBioSequences(filenames ...string) (obiiter.IBioSequenceBatch, error) {
list_of_files, err := _ExpandListOfFiles(false, filenames...) list_of_files, err := _ExpandListOfFiles(false, filenames...)
if err != nil { if err != nil {
return obiiter.NilIBioSequenceBatch, err return obiiter.NilIBioSequence, err
} }
switch CLIInputFormat() { switch CLIInputFormat() {
@ -140,7 +140,7 @@ func ReadBioSequences(filenames ...string) (obiiter.IBioSequenceBatch, error) {
iterator, err = reader(list_of_files[0], opts...) iterator, err = reader(list_of_files[0], opts...)
if err != nil { if err != nil {
return obiiter.NilIBioSequenceBatch, err return obiiter.NilIBioSequence, err
} }
} }

View File

@ -8,10 +8,10 @@ import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
) )
func WriteBioSequences(iterator obiiter.IBioSequenceBatch, func WriteBioSequences(iterator obiiter.IBioSequence,
terminalAction bool, filenames ...string) (obiiter.IBioSequenceBatch, error) { terminalAction bool, filenames ...string) (obiiter.IBioSequence, error) {
var newIter obiiter.IBioSequenceBatch var newIter obiiter.IBioSequence
opts := make([]obiformats.WithOption, 0, 10) opts := make([]obiformats.WithOption, 0, 10)
@ -62,12 +62,12 @@ func WriteBioSequences(iterator obiiter.IBioSequenceBatch,
if err != nil { if err != nil {
log.Fatalf("Write file error: %v", err) log.Fatalf("Write file error: %v", err)
return obiiter.NilIBioSequenceBatch, err return obiiter.NilIBioSequence, err
} }
if terminalAction { if terminalAction {
newIter.Recycle() newIter.Recycle()
return obiiter.NilIBioSequenceBatch, nil return obiiter.NilIBioSequence, nil
} }
return newIter, nil return newIter, nil

View File

@ -9,7 +9,7 @@ import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
) )
func DistributeSequence(sequences obiiter.IBioSequenceBatch) { func DistributeSequence(sequences obiiter.IBioSequence) {
opts := make([]obiformats.WithOption, 0, 10) opts := make([]obiformats.WithOption, 0, 10)

View File

@ -8,14 +8,14 @@ import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
) )
func IFilterSequence(iterator obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch { func IFilterSequence(iterator obiiter.IBioSequence) obiiter.IBioSequence {
var newIter obiiter.IBioSequenceBatch var newIter obiiter.IBioSequence
predicate := CLISequenceSelectionPredicate() predicate := CLISequenceSelectionPredicate()
if predicate != nil { if predicate != nil {
if CLISaveDiscardedSequences() { if CLISaveDiscardedSequences() {
var discarded obiiter.IBioSequenceBatch var discarded obiiter.IBioSequence
log.Printf("Discarded sequences saved in file: %s\n", CLIDiscardedFileName()) log.Printf("Discarded sequences saved in file: %s\n", CLIDiscardedFileName())
newIter, discarded = iterator.DivideOn(predicate, newIter, discarded = iterator.DivideOn(predicate,

View File

@ -10,7 +10,7 @@ import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
) )
func IExtractBarcode(iterator obiiter.IBioSequenceBatch) (obiiter.IBioSequenceBatch, error) { func IExtractBarcode(iterator obiiter.IBioSequence) (obiiter.IBioSequence, error) {
opts := make([]obingslibrary.WithOption, 0, 10) opts := make([]obingslibrary.WithOption, 0, 10)
@ -37,7 +37,7 @@ func IExtractBarcode(iterator obiiter.IBioSequenceBatch) (obiiter.IBioSequenceBa
newIter = newIter.Rebatch(obioptions.CLIBatchSize()) newIter = newIter.Rebatch(obioptions.CLIBatchSize())
} }
var unidentified obiiter.IBioSequenceBatch var unidentified obiiter.IBioSequence
if CLIUnidentifiedFileName() != "" { if CLIUnidentifiedFileName() != "" {
log.Printf("Unassigned sequences saved in file: %s\n", CLIUnidentifiedFileName()) log.Printf("Unassigned sequences saved in file: %s\n", CLIUnidentifiedFileName())
unidentified, newIter = newIter.DivideOn(obiseq.HasAttribute("demultiplex_error"), unidentified, newIter = newIter.DivideOn(obiseq.HasAttribute("demultiplex_error"),

View File

@ -206,7 +206,7 @@ func AssemblePESequences(seqA, seqB *obiseq.BioSequence,
func IAssemblePESequencesBatch(iterator obiiter.IPairedBioSequenceBatch, func IAssemblePESequencesBatch(iterator obiiter.IPairedBioSequenceBatch,
gap float64, delta, minOverlap int, gap float64, delta, minOverlap int,
minIdentity float64, minIdentity float64,
withStats bool, sizes ...int) obiiter.IBioSequenceBatch { withStats bool, sizes ...int) obiiter.IBioSequence {
nworkers := runtime.NumCPU() * 3 / 2 nworkers := runtime.NumCPU() * 3 / 2
buffsize := iterator.BufferSize() buffsize := iterator.BufferSize()
@ -219,7 +219,7 @@ func IAssemblePESequencesBatch(iterator obiiter.IPairedBioSequenceBatch,
buffsize = sizes[1] buffsize = sizes[1]
} }
newIter := obiiter.MakeIBioSequenceBatch(buffsize) newIter := obiiter.MakeIBioSequence(buffsize)
newIter.Add(nworkers) newIter.Add(nworkers)

View File

@ -9,7 +9,7 @@ import (
// PCR iterates over sequences provided by a obiseq.IBioSequenceBatch // PCR iterates over sequences provided by a obiseq.IBioSequenceBatch
// and returns an other obiseq.IBioSequenceBatch distributing // and returns an other obiseq.IBioSequenceBatch distributing
// obiseq.BioSequenceBatch containing the selected amplicon sequences. // obiseq.BioSequenceBatch containing the selected amplicon sequences.
func PCR(iterator obiiter.IBioSequenceBatch) (obiiter.IBioSequenceBatch, error) { func PCR(iterator obiiter.IBioSequence) (obiiter.IBioSequence, error) {
opts := make([]obiapat.WithOption, 0, 10) opts := make([]obiapat.WithOption, 0, 10)

View File

@ -32,7 +32,7 @@ func IndexSequence(seqidx int,
// r := 0 // r := 0
// w := 0 // w := 0
for i, ref := range references { for i, ref := range references {
lcs, alilength := obialign.FastLCSScore(sequence, ref, -1 , &matrix) lcs, alilength := obialign.FastLCSScore(sequence, ref, -1, &matrix)
score[i] = alilength - lcs score[i] = alilength - lcs
} }
@ -88,7 +88,7 @@ func IndexSequence(seqidx int,
return obitag_index return obitag_index
} }
func IndexReferenceDB(iterator obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch { func IndexReferenceDB(iterator obiiter.IBioSequence) obiiter.IBioSequence {
references := iterator.Load() references := iterator.Load()
refcounts := make( refcounts := make(
@ -118,7 +118,7 @@ func IndexReferenceDB(iterator obiiter.IBioSequenceBatch) obiiter.IBioSequenceBa
bar := progressbar.NewOptions(len(references), pbopt...) bar := progressbar.NewOptions(len(references), pbopt...)
limits := make(chan [2]int) limits := make(chan [2]int)
indexed := obiiter.MakeIBioSequenceBatch() indexed := obiiter.MakeIBioSequence()
go func() { go func() {
for i := 0; i < len(references); i += 10 { for i := 0; i < len(references); i += 10 {
limits <- [2]int{i, goutils.MinInt(i+10, len(references))} limits <- [2]int{i, goutils.MinInt(i+10, len(references))}

View File

@ -171,7 +171,7 @@ func IdentifySeqWorker(references obiseq.BioSequenceSlice,
} }
} }
func AssignTaxonomy(iterator obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch { func AssignTaxonomy(iterator obiiter.IBioSequence) obiiter.IBioSequence {
references := CLIRefDB() references := CLIRefDB()
refcounts := make( refcounts := make(

View File

@ -8,7 +8,7 @@ import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
) )
func Unique(sequences obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch { func Unique(sequences obiiter.IBioSequence) obiiter.IBioSequence {
options := make([]obichunk.WithOption, 0, 30) options := make([]obichunk.WithOption, 0, 30)

BIN
sample/.DS_Store vendored Normal file

Binary file not shown.