rename the iterator class

This commit is contained in:
2023-01-22 22:04:17 +01:00
parent 4592855095
commit f97f92df72
37 changed files with 399 additions and 354 deletions
+4 -4
View File
@@ -35,12 +35,12 @@ func find(root, ext string) []string {
return a return a
} }
func ISequenceChunkOnDisk(iterator obiiter.IBioSequenceBatch, func ISequenceChunkOnDisk(iterator obiiter.IBioSequence,
classifier *obiseq.BioSequenceClassifier, classifier *obiseq.BioSequenceClassifier,
sizes ...int) (obiiter.IBioSequenceBatch, error) { sizes ...int) (obiiter.IBioSequence, error) {
dir, err := tempDir() dir, err := tempDir()
if err != nil { if err != nil {
return obiiter.NilIBioSequenceBatch, err return obiiter.NilIBioSequence, err
} }
bufferSize := iterator.BufferSize() bufferSize := iterator.BufferSize()
@@ -49,7 +49,7 @@ func ISequenceChunkOnDisk(iterator obiiter.IBioSequenceBatch,
bufferSize = sizes[0] bufferSize = sizes[0]
} }
newIter := obiiter.MakeIBioSequenceBatch(bufferSize) newIter := obiiter.MakeIBioSequence(bufferSize)
newIter.Add(1) newIter.Add(1)
+5 -4
View File
@@ -1,16 +1,17 @@
package obichunk package obichunk
import ( import (
log "github.com/sirupsen/logrus"
"sync" "sync"
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
) )
func ISequenceChunk(iterator obiiter.IBioSequenceBatch, func ISequenceChunk(iterator obiiter.IBioSequence,
classifier *obiseq.BioSequenceClassifier, classifier *obiseq.BioSequenceClassifier,
sizes ...int) (obiiter.IBioSequenceBatch, error) { sizes ...int) (obiiter.IBioSequence, error) {
bufferSize := iterator.BufferSize() bufferSize := iterator.BufferSize()
@@ -18,7 +19,7 @@ func ISequenceChunk(iterator obiiter.IBioSequenceBatch,
bufferSize = sizes[0] bufferSize = sizes[0]
} }
newIter := obiiter.MakeIBioSequenceBatch(bufferSize) newIter := obiiter.MakeIBioSequence(bufferSize)
newIter.Add(1) newIter.Add(1)
+4 -4
View File
@@ -56,9 +56,9 @@ func (by _By) Sort(seqs []sSS) {
// End of the sort interface // End of the sort interface
// //
func ISequenceSubChunk(iterator obiiter.IBioSequenceBatch, func ISequenceSubChunk(iterator obiiter.IBioSequence,
classifier *obiseq.BioSequenceClassifier, classifier *obiseq.BioSequenceClassifier,
sizes ...int) (obiiter.IBioSequenceBatch, error) { sizes ...int) (obiiter.IBioSequence, error) {
bufferSize := iterator.BufferSize() bufferSize := iterator.BufferSize()
nworkers := 4 nworkers := 4
@@ -71,7 +71,7 @@ func ISequenceSubChunk(iterator obiiter.IBioSequenceBatch,
bufferSize = sizes[1] bufferSize = sizes[1]
} }
newIter := obiiter.MakeIBioSequenceBatch(bufferSize) newIter := obiiter.MakeIBioSequence(bufferSize)
newIter.Add(nworkers) newIter.Add(nworkers)
@@ -88,7 +88,7 @@ func ISequenceSubChunk(iterator obiiter.IBioSequenceBatch,
return neworder return neworder
} }
ff := func(iterator obiiter.IBioSequenceBatch, ff := func(iterator obiiter.IBioSequence,
classifier *obiseq.BioSequenceClassifier) { classifier *obiseq.BioSequenceClassifier) {
ordered := make([]sSS, 100) ordered := make([]sSS, 100)
+9 -9
View File
@@ -12,14 +12,14 @@ import (
// Runs dereplication algorithm on a obiiter.IBioSequenceBatch // Runs dereplication algorithm on a obiiter.IBioSequenceBatch
// iterator. // iterator.
func IUniqueSequence(iterator obiiter.IBioSequenceBatch, func IUniqueSequence(iterator obiiter.IBioSequence,
options ...WithOption) (obiiter.IBioSequenceBatch, error) { options ...WithOption) (obiiter.IBioSequence, error) {
var err error var err error
opts := MakeOptions(options) opts := MakeOptions(options)
nworkers := opts.ParallelWorkers() nworkers := opts.ParallelWorkers()
iUnique := obiiter.MakeIBioSequenceBatch(opts.BufferSize()) iUnique := obiiter.MakeIBioSequence(opts.BufferSize())
iterator = iterator.Speed("Splitting data set") iterator = iterator.Speed("Splitting data set")
@@ -32,7 +32,7 @@ func IUniqueSequence(iterator obiiter.IBioSequenceBatch,
0) 0)
if err != nil { if err != nil {
return obiiter.NilIBioSequenceBatch, err return obiiter.NilIBioSequence, err
} }
} else { } else {
@@ -41,7 +41,7 @@ func IUniqueSequence(iterator obiiter.IBioSequenceBatch,
opts.BufferSize()) opts.BufferSize())
if err != nil { if err != nil {
return obiiter.NilIBioSequenceBatch, err return obiiter.NilIBioSequence, err
} }
} }
@@ -65,14 +65,14 @@ func IUniqueSequence(iterator obiiter.IBioSequenceBatch,
return neworder return neworder
} }
var ff func(obiiter.IBioSequenceBatch, var ff func(obiiter.IBioSequence,
*obiseq.BioSequenceClassifier, *obiseq.BioSequenceClassifier,
int) int)
cat := opts.Categories() cat := opts.Categories()
na := opts.NAValue() na := opts.NAValue()
ff = func(input obiiter.IBioSequenceBatch, ff = func(input obiiter.IBioSequence,
classifier *obiseq.BioSequenceClassifier, classifier *obiseq.BioSequenceClassifier,
icat int) { icat int) {
icat-- icat--
@@ -81,9 +81,9 @@ func IUniqueSequence(iterator obiiter.IBioSequenceBatch,
1, 1,
opts.BufferSize()) opts.BufferSize())
var next obiiter.IBioSequenceBatch var next obiiter.IBioSequence
if icat >= 0 { if icat >= 0 {
next = obiiter.MakeIBioSequenceBatch(opts.BufferSize()) next = obiiter.MakeIBioSequence(opts.BufferSize())
iUnique.Add(1) iUnique.Add(1)
+2 -2
View File
@@ -10,13 +10,13 @@ import (
func ReadSequencesBatchFromFiles(filenames []string, func ReadSequencesBatchFromFiles(filenames []string,
reader IBatchReader, reader IBatchReader,
concurrent_readers int, concurrent_readers int,
options ...WithOption) obiiter.IBioSequenceBatch { options ...WithOption) obiiter.IBioSequence {
if reader == nil { if reader == nil {
reader = ReadSequencesFromFile reader = ReadSequencesFromFile
} }
batchiter := obiiter.MakeIBioSequenceBatch(0) batchiter := obiiter.MakeIBioSequence(0)
nextCounter := goutils.AtomicCounter() nextCounter := goutils.AtomicCounter()
batchiter.Add(concurrent_readers) batchiter.Add(concurrent_readers)
+1 -1
View File
@@ -2,4 +2,4 @@ package obiformats
import "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter" import "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
type IBatchReader func(string, ...WithOption) (obiiter.IBioSequenceBatch, error) type IBatchReader func(string, ...WithOption) (obiiter.IBioSequence, error)
+4 -3
View File
@@ -2,15 +2,16 @@ package obiformats
import ( import (
"fmt" "fmt"
log "github.com/sirupsen/logrus"
"sync" "sync"
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
) )
type SequenceBatchWriterToFile func(iterator obiiter.IBioSequenceBatch, type SequenceBatchWriterToFile func(iterator obiiter.IBioSequence,
filename string, filename string,
options ...WithOption) (obiiter.IBioSequenceBatch, error) options ...WithOption) (obiiter.IBioSequence, error)
func WriterDispatcher(prototypename string, func WriterDispatcher(prototypename string,
dispatcher obiiter.IDistribute, dispatcher obiiter.IDistribute,
+4 -4
View File
@@ -120,7 +120,7 @@ func __read_ecopcr_bioseq__(file *__ecopcr_file__) (*obiseq.BioSequence, error)
return bseq, nil return bseq, nil
} }
func ReadEcoPCR(reader io.Reader, options ...WithOption) obiiter.IBioSequenceBatch { func ReadEcoPCR(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
tag := make([]byte, 11) tag := make([]byte, 11)
n, _ := reader.Read(tag) n, _ := reader.Read(tag)
@@ -166,7 +166,7 @@ func ReadEcoPCR(reader io.Reader, options ...WithOption) obiiter.IBioSequenceBat
opt := MakeOptions(options) opt := MakeOptions(options)
newIter := obiiter.MakeIBioSequenceBatch(opt.BufferSize()) newIter := obiiter.MakeIBioSequence(opt.BufferSize())
newIter.Add(1) newIter.Add(1)
go func() { go func() {
@@ -208,7 +208,7 @@ func ReadEcoPCR(reader io.Reader, options ...WithOption) obiiter.IBioSequenceBat
return newIter return newIter
} }
func ReadEcoPCRBatchFromFile(filename string, options ...WithOption) (obiiter.IBioSequenceBatch, error) { func ReadEcoPCRBatchFromFile(filename string, options ...WithOption) (obiiter.IBioSequence, error) {
var reader io.Reader var reader io.Reader
var greader io.Reader var greader io.Reader
var err error var err error
@@ -216,7 +216,7 @@ func ReadEcoPCRBatchFromFile(filename string, options ...WithOption) (obiiter.IB
reader, err = os.Open(filename) reader, err = os.Open(filename)
if err != nil { if err != nil {
log.Printf("open file error: %+v", err) log.Printf("open file error: %+v", err)
return obiiter.NilIBioSequenceBatch, err return obiiter.NilIBioSequence, err
} }
// Test if the flux is compressed by gzip // Test if the flux is compressed by gzip
+5 -5
View File
@@ -84,7 +84,7 @@ func _EndOfLastEntry(buff []byte) int {
return -1 return -1
} }
func _ParseEmblFile(input <-chan _FileChunk, out obiiter.IBioSequenceBatch) { func _ParseEmblFile(input <-chan _FileChunk, out obiiter.IBioSequence) {
for chunks := range input { for chunks := range input {
scanner := bufio.NewScanner(chunks.raw) scanner := bufio.NewScanner(chunks.raw)
@@ -201,11 +201,11 @@ func _ReadFlatFileChunk(reader io.Reader, readers chan _FileChunk) {
// 6 5 43 2 1 // 6 5 43 2 1
// //
// <CR>?<LF>//<CR>?<LF> // <CR>?<LF>//<CR>?<LF>
func ReadEMBL(reader io.Reader, options ...WithOption) obiiter.IBioSequenceBatch { func ReadEMBL(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
opt := MakeOptions(options) opt := MakeOptions(options)
entry_channel := make(chan _FileChunk, opt.BufferSize()) entry_channel := make(chan _FileChunk, opt.BufferSize())
newIter := obiiter.MakeIBioSequenceBatch(opt.BufferSize()) newIter := obiiter.MakeIBioSequence(opt.BufferSize())
nworkers := opt.ParallelWorkers() nworkers := opt.ParallelWorkers()
newIter.Add(nworkers) newIter.Add(nworkers)
@@ -224,7 +224,7 @@ func ReadEMBL(reader io.Reader, options ...WithOption) obiiter.IBioSequenceBatch
return newIter return newIter
} }
func ReadEMBLFromFile(filename string, options ...WithOption) (obiiter.IBioSequenceBatch, error) { func ReadEMBLFromFile(filename string, options ...WithOption) (obiiter.IBioSequence, error) {
var reader io.Reader var reader io.Reader
var greader io.Reader var greader io.Reader
var err error var err error
@@ -232,7 +232,7 @@ func ReadEMBLFromFile(filename string, options ...WithOption) (obiiter.IBioSeque
reader, err = os.Open(filename) reader, err = os.Open(filename)
if err != nil { if err != nil {
log.Printf("open file error: %+v", err) log.Printf("open file error: %+v", err)
return obiiter.NilIBioSequenceBatch, err return obiiter.NilIBioSequence, err
} }
// Test if the flux is compressed by gzip // Test if the flux is compressed by gzip
+2 -2
View File
@@ -15,8 +15,8 @@ func ParseGuessedFastSeqHeader(sequence *obiseq.BioSequence) {
} }
} }
func IParseFastSeqHeaderBatch(iterator obiiter.IBioSequenceBatch, func IParseFastSeqHeaderBatch(iterator obiiter.IBioSequence,
options ...WithOption) obiiter.IBioSequenceBatch { options ...WithOption) obiiter.IBioSequence {
opt := MakeOptions(options) opt := MakeOptions(options)
return iterator.MakeIWorker(obiiter.AnnotatorToSeqWorker(opt.ParseFastSeqHeader()), return iterator.MakeIWorker(obiiter.AnnotatorToSeqWorker(opt.ParseFastSeqHeader()),
opt.ParallelWorkers(), opt.ParallelWorkers(),
+6 -7
View File
@@ -19,7 +19,7 @@ import (
) )
func _FastseqReader(seqfile C.fast_kseq_p, func _FastseqReader(seqfile C.fast_kseq_p,
iterator obiiter.IBioSequenceBatch, iterator obiiter.IBioSequence,
batch_size int) { batch_size int) {
var comment string var comment string
i := 0 i := 0
@@ -72,7 +72,6 @@ func _FastseqReader(seqfile C.fast_kseq_p,
ii = 0 ii = 0
} }
} }
if len(slice) > 0 { if len(slice) > 0 {
iterator.Push(obiiter.MakeBioSequenceBatch(i, slice)) iterator.Push(obiiter.MakeBioSequenceBatch(i, slice))
@@ -81,7 +80,7 @@ func _FastseqReader(seqfile C.fast_kseq_p,
} }
func ReadFastSeqFromFile(filename string, options ...WithOption) (obiiter.IBioSequenceBatch, error) { func ReadFastSeqFromFile(filename string, options ...WithOption) (obiiter.IBioSequence, error) {
opt := MakeOptions(options) opt := MakeOptions(options)
name := C.CString(filename) name := C.CString(filename)
@@ -94,7 +93,7 @@ func ReadFastSeqFromFile(filename string, options ...WithOption) (obiiter.IBioSe
if pointer == nil { if pointer == nil {
err = fmt.Errorf("cannot open file %s", filename) err = fmt.Errorf("cannot open file %s", filename)
return obiiter.NilIBioSequenceBatch, err return obiiter.NilIBioSequence, err
} }
size := int64(-1) size := int64(-1)
@@ -106,7 +105,7 @@ func ReadFastSeqFromFile(filename string, options ...WithOption) (obiiter.IBioSe
size = -1 size = -1
} }
newIter := obiiter.MakeIBioSequenceBatch(opt.BufferSize()) newIter := obiiter.MakeIBioSequence(opt.BufferSize())
newIter.Add(1) newIter.Add(1)
go func() { go func() {
@@ -126,9 +125,9 @@ func ReadFastSeqFromFile(filename string, options ...WithOption) (obiiter.IBioSe
return newIter, err return newIter, err
} }
func ReadFastSeqFromStdin(options ...WithOption) obiiter.IBioSequenceBatch { func ReadFastSeqFromStdin(options ...WithOption) obiiter.IBioSequence {
opt := MakeOptions(options) opt := MakeOptions(options)
newIter := obiiter.MakeIBioSequenceBatch(opt.BufferSize()) newIter := obiiter.MakeIBioSequence(opt.BufferSize())
newIter.Add(1) newIter.Add(1)
+9 -9
View File
@@ -61,13 +61,13 @@ func FormatFastaBatch(batch obiiter.BioSequenceBatch, formater FormatHeader) []b
return bs.Bytes() return bs.Bytes()
} }
func WriteFasta(iterator obiiter.IBioSequenceBatch, func WriteFasta(iterator obiiter.IBioSequence,
file io.Writer, file io.Writer,
options ...WithOption) (obiiter.IBioSequenceBatch, error) { options ...WithOption) (obiiter.IBioSequence, error) {
opt := MakeOptions(options) opt := MakeOptions(options)
buffsize := iterator.BufferSize() buffsize := iterator.BufferSize()
newIter := obiiter.MakeIBioSequenceBatch(buffsize) newIter := obiiter.MakeIBioSequence(buffsize)
nwriters := opt.ParallelWorkers() nwriters := opt.ParallelWorkers()
@@ -83,7 +83,7 @@ func WriteFasta(iterator obiiter.IBioSequenceBatch,
log.Debugln("End of the fasta file writing") log.Debugln("End of the fasta file writing")
}() }()
ff := func(iterator obiiter.IBioSequenceBatch) { ff := func(iterator obiiter.IBioSequence) {
for iterator.Next() { for iterator.Next() {
batch := iterator.Get() batch := iterator.Get()
@@ -136,21 +136,21 @@ func WriteFasta(iterator obiiter.IBioSequenceBatch,
return newIter, nil return newIter, nil
} }
func WriteFastaToStdout(iterator obiiter.IBioSequenceBatch, func WriteFastaToStdout(iterator obiiter.IBioSequence,
options ...WithOption) (obiiter.IBioSequenceBatch, error) { options ...WithOption) (obiiter.IBioSequence, error) {
options = append(options, OptionDontCloseFile()) options = append(options, OptionDontCloseFile())
return WriteFasta(iterator, os.Stdout, options...) return WriteFasta(iterator, os.Stdout, options...)
} }
func WriteFastaToFile(iterator obiiter.IBioSequenceBatch, func WriteFastaToFile(iterator obiiter.IBioSequence,
filename string, filename string,
options ...WithOption) (obiiter.IBioSequenceBatch, error) { options ...WithOption) (obiiter.IBioSequence, error) {
file, err := os.Create(filename) file, err := os.Create(filename)
if err != nil { if err != nil {
log.Fatalf("open file error: %v", err) log.Fatalf("open file error: %v", err)
return obiiter.NilIBioSequenceBatch, err return obiiter.NilIBioSequence, err
} }
options = append(options, OptionCloseFile()) options = append(options, OptionCloseFile())
+9 -9
View File
@@ -51,13 +51,13 @@ type FileChunck struct {
order int order int
} }
func WriteFastq(iterator obiiter.IBioSequenceBatch, func WriteFastq(iterator obiiter.IBioSequence,
file io.Writer, file io.Writer,
options ...WithOption) (obiiter.IBioSequenceBatch, error) { options ...WithOption) (obiiter.IBioSequence, error) {
opt := MakeOptions(options) opt := MakeOptions(options)
buffsize := iterator.BufferSize() buffsize := iterator.BufferSize()
newIter := obiiter.MakeIBioSequenceBatch(buffsize) newIter := obiiter.MakeIBioSequence(buffsize)
nwriters := opt.ParallelWorkers() nwriters := opt.ParallelWorkers()
@@ -77,7 +77,7 @@ func WriteFastq(iterator obiiter.IBioSequenceBatch,
log.Debugln("End of the fastq file writing") log.Debugln("End of the fastq file writing")
}() }()
ff := func(iterator obiiter.IBioSequenceBatch) { ff := func(iterator obiiter.IBioSequence) {
for iterator.Next() { for iterator.Next() {
batch := iterator.Get() batch := iterator.Get()
chunk := FileChunck{ chunk := FileChunck{
@@ -129,21 +129,21 @@ func WriteFastq(iterator obiiter.IBioSequenceBatch,
return newIter, nil return newIter, nil
} }
func WriteFastqToStdout(iterator obiiter.IBioSequenceBatch, func WriteFastqToStdout(iterator obiiter.IBioSequence,
options ...WithOption) (obiiter.IBioSequenceBatch, error) { options ...WithOption) (obiiter.IBioSequence, error) {
options = append(options, OptionDontCloseFile()) options = append(options, OptionDontCloseFile())
return WriteFastq(iterator, os.Stdout, options...) return WriteFastq(iterator, os.Stdout, options...)
} }
func WriteFastqToFile(iterator obiiter.IBioSequenceBatch, func WriteFastqToFile(iterator obiiter.IBioSequence,
filename string, filename string,
options ...WithOption) (obiiter.IBioSequenceBatch, error) { options ...WithOption) (obiiter.IBioSequence, error) {
file, err := os.Create(filename) file, err := os.Create(filename)
if err != nil { if err != nil {
log.Fatalf("open file error: %v", err) log.Fatalf("open file error: %v", err)
return obiiter.NilIBioSequenceBatch, err return obiiter.NilIBioSequence, err
} }
options = append(options, OptionCloseFile()) options = append(options, OptionCloseFile())
+5 -5
View File
@@ -26,7 +26,7 @@ const (
inSequence gbstate = 4 inSequence gbstate = 4
) )
func _ParseGenbankFile(input <-chan _FileChunk, out obiiter.IBioSequenceBatch) { func _ParseGenbankFile(input <-chan _FileChunk, out obiiter.IBioSequence) {
state := inHeader state := inHeader
@@ -107,11 +107,11 @@ func _ParseGenbankFile(input <-chan _FileChunk, out obiiter.IBioSequenceBatch) {
} }
func ReadGenbank(reader io.Reader, options ...WithOption) obiiter.IBioSequenceBatch { func ReadGenbank(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
opt := MakeOptions(options) opt := MakeOptions(options)
entry_channel := make(chan _FileChunk, opt.BufferSize()) entry_channel := make(chan _FileChunk, opt.BufferSize())
newIter := obiiter.MakeIBioSequenceBatch(opt.BufferSize()) newIter := obiiter.MakeIBioSequence(opt.BufferSize())
nworkers := opt.ParallelWorkers() nworkers := opt.ParallelWorkers()
newIter.Add(nworkers) newIter.Add(nworkers)
@@ -130,7 +130,7 @@ func ReadGenbank(reader io.Reader, options ...WithOption) obiiter.IBioSequenceBa
return newIter return newIter
} }
func ReadGenbankFromFile(filename string, options ...WithOption) (obiiter.IBioSequenceBatch, error) { func ReadGenbankFromFile(filename string, options ...WithOption) (obiiter.IBioSequence, error) {
var reader io.Reader var reader io.Reader
var greader io.Reader var greader io.Reader
var err error var err error
@@ -138,7 +138,7 @@ func ReadGenbankFromFile(filename string, options ...WithOption) (obiiter.IBioSe
reader, err = os.Open(filename) reader, err = os.Open(filename)
if err != nil { if err != nil {
log.Printf("open file error: %+v", err) log.Printf("open file error: %+v", err)
return obiiter.NilIBioSequenceBatch, err return obiiter.NilIBioSequence, err
} }
// Test if the flux is compressed by gzip // Test if the flux is compressed by gzip
+4 -4
View File
@@ -43,7 +43,7 @@ func GuessSeqFileType(firstline string) string {
} }
func ReadSequencesFromFile(filename string, func ReadSequencesFromFile(filename string,
options ...WithOption) (obiiter.IBioSequenceBatch, error) { options ...WithOption) (obiiter.IBioSequence, error) {
var file *os.File var file *os.File
var reader io.Reader var reader io.Reader
var greader io.Reader var greader io.Reader
@@ -53,7 +53,7 @@ func ReadSequencesFromFile(filename string,
if err != nil { if err != nil {
log.Fatalf("open file error: %v", err) log.Fatalf("open file error: %v", err)
return obiiter.NilIBioSequenceBatch, err return obiiter.NilIBioSequence, err
} }
reader = file reader = file
@@ -72,7 +72,7 @@ func ReadSequencesFromFile(filename string,
tag, _ := breader.Peek(30) tag, _ := breader.Peek(30)
if len(tag) < 30 { if len(tag) < 30 {
newIter := obiiter.MakeIBioSequenceBatch() newIter := obiiter.MakeIBioSequence()
newIter.Close() newIter.Close()
return newIter, nil return newIter, nil
} }
@@ -98,5 +98,5 @@ func ReadSequencesFromFile(filename string,
filename, filetype) filename, filetype)
} }
return obiiter.NilIBioSequenceBatch, nil return obiiter.NilIBioSequence, nil
} }
+9 -9
View File
@@ -10,9 +10,9 @@ import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
) )
func WriteSequence(iterator obiiter.IBioSequenceBatch, func WriteSequence(iterator obiiter.IBioSequence,
file io.Writer, file io.Writer,
options ...WithOption) (obiiter.IBioSequenceBatch, error) { options ...WithOption) (obiiter.IBioSequence, error) {
iterator = iterator.Rebatch(1000) iterator = iterator.Rebatch(1000)
@@ -22,7 +22,7 @@ func WriteSequence(iterator obiiter.IBioSequenceBatch,
batch := iterator.Get() batch := iterator.Get()
iterator.PushBack() iterator.PushBack()
var newIter obiiter.IBioSequenceBatch var newIter obiiter.IBioSequence
var err error var err error
if len(batch.Slice()) > 0 { if len(batch.Slice()) > 0 {
@@ -42,24 +42,24 @@ func WriteSequence(iterator obiiter.IBioSequenceBatch,
return iterator, nil return iterator, nil
} }
return obiiter.NilIBioSequenceBatch, fmt.Errorf("input iterator not ready") return obiiter.NilIBioSequence, fmt.Errorf("input iterator not ready")
} }
func WriteSequencesToStdout(iterator obiiter.IBioSequenceBatch, func WriteSequencesToStdout(iterator obiiter.IBioSequence,
options ...WithOption) (obiiter.IBioSequenceBatch, error) { options ...WithOption) (obiiter.IBioSequence, error) {
options = append(options, OptionDontCloseFile()) options = append(options, OptionDontCloseFile())
return WriteSequence(iterator, os.Stdout, options...) return WriteSequence(iterator, os.Stdout, options...)
} }
func WriteSequencesToFile(iterator obiiter.IBioSequenceBatch, func WriteSequencesToFile(iterator obiiter.IBioSequence,
filename string, filename string,
options ...WithOption) (obiiter.IBioSequenceBatch, error) { options ...WithOption) (obiiter.IBioSequence, error) {
file, err := os.Create(filename) file, err := os.Create(filename)
if err != nil { if err != nil {
log.Fatalf("open file error: %v", err) log.Fatalf("open file error: %v", err)
return obiiter.NilIBioSequenceBatch, err return obiiter.NilIBioSequence, err
} }
options = append(options, OptionCloseFile()) options = append(options, OptionCloseFile())
+55 -55
View File
@@ -15,7 +15,7 @@ import (
// Structure implementing an iterator over bioseq.BioSequenceBatch // Structure implementing an iterator over bioseq.BioSequenceBatch
// based on a channel. // based on a channel.
type _IBioSequenceBatch struct { type _IBioSequence struct {
channel chan BioSequenceBatch channel chan BioSequenceBatch
current BioSequenceBatch current BioSequenceBatch
pushBack *abool.AtomicBool pushBack *abool.AtomicBool
@@ -27,24 +27,24 @@ type _IBioSequenceBatch struct {
finished *abool.AtomicBool finished *abool.AtomicBool
} }
type IBioSequenceBatch struct { type IBioSequence struct {
pointer *_IBioSequenceBatch pointer *_IBioSequence
} }
// NilIBioSequenceBatch nil instance for IBioSequenceBatch // NilIBioSequence nil instance for IBioSequenceBatch
// //
// NilIBioSequenceBatch is the nil instance for the // NilIBioSequence is the nil instance for the
// IBioSequenceBatch type. // IBioSequenceBatch type.
var NilIBioSequenceBatch = IBioSequenceBatch{pointer: nil} var NilIBioSequence = IBioSequence{pointer: nil}
func MakeIBioSequenceBatch(sizes ...int) IBioSequenceBatch { func MakeIBioSequence(sizes ...int) IBioSequence {
buffsize := int32(0) buffsize := int32(0)
if len(sizes) > 0 { if len(sizes) > 0 {
buffsize = int32(sizes[0]) buffsize = int32(sizes[0])
} }
i := _IBioSequenceBatch{ i := _IBioSequence{
channel: make(chan BioSequenceBatch, buffsize), channel: make(chan BioSequenceBatch, buffsize),
current: NilBioSequenceBatch, current: NilBioSequenceBatch,
pushBack: abool.New(), pushBack: abool.New(),
@@ -58,11 +58,11 @@ func MakeIBioSequenceBatch(sizes ...int) IBioSequenceBatch {
i.all_done = &waiting i.all_done = &waiting
lock := sync.RWMutex{} lock := sync.RWMutex{}
i.lock = &lock i.lock = &lock
ii := IBioSequenceBatch{&i} ii := IBioSequence{&i}
return ii return ii
} }
func (iterator IBioSequenceBatch) Add(n int) { func (iterator IBioSequence) Add(n int) {
if iterator.pointer == nil { if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.Add method on NilIBioSequenceBatch") log.Panic("call of IBioSequenceBatch.Add method on NilIBioSequenceBatch")
} }
@@ -70,7 +70,7 @@ func (iterator IBioSequenceBatch) Add(n int) {
iterator.pointer.all_done.Add(n) iterator.pointer.all_done.Add(n)
} }
func (iterator IBioSequenceBatch) Done() { func (iterator IBioSequence) Done() {
if iterator.pointer == nil { if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.Done method on NilIBioSequenceBatch") log.Panic("call of IBioSequenceBatch.Done method on NilIBioSequenceBatch")
} }
@@ -78,7 +78,7 @@ func (iterator IBioSequenceBatch) Done() {
iterator.pointer.all_done.Done() iterator.pointer.all_done.Done()
} }
func (iterator IBioSequenceBatch) Unlock() { func (iterator IBioSequence) Unlock() {
if iterator.pointer == nil { if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.Unlock method on NilIBioSequenceBatch") log.Panic("call of IBioSequenceBatch.Unlock method on NilIBioSequenceBatch")
} }
@@ -86,7 +86,7 @@ func (iterator IBioSequenceBatch) Unlock() {
iterator.pointer.lock.Unlock() iterator.pointer.lock.Unlock()
} }
func (iterator IBioSequenceBatch) Lock() { func (iterator IBioSequence) Lock() {
if iterator.pointer == nil { if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.Lock method on NilIBioSequenceBatch") log.Panic("call of IBioSequenceBatch.Lock method on NilIBioSequenceBatch")
} }
@@ -94,7 +94,7 @@ func (iterator IBioSequenceBatch) Lock() {
iterator.pointer.lock.Lock() iterator.pointer.lock.Lock()
} }
func (iterator IBioSequenceBatch) RLock() { func (iterator IBioSequence) RLock() {
if iterator.pointer == nil { if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.RLock method on NilIBioSequenceBatch") log.Panic("call of IBioSequenceBatch.RLock method on NilIBioSequenceBatch")
} }
@@ -102,7 +102,7 @@ func (iterator IBioSequenceBatch) RLock() {
iterator.pointer.lock.RLock() iterator.pointer.lock.RLock()
} }
func (iterator IBioSequenceBatch) RUnlock() { func (iterator IBioSequence) RUnlock() {
if iterator.pointer == nil { if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.RUnlock method on NilIBioSequenceBatch") log.Panic("call of IBioSequenceBatch.RUnlock method on NilIBioSequenceBatch")
} }
@@ -110,7 +110,7 @@ func (iterator IBioSequenceBatch) RUnlock() {
iterator.pointer.lock.RUnlock() iterator.pointer.lock.RUnlock()
} }
func (iterator IBioSequenceBatch) Wait() { func (iterator IBioSequence) Wait() {
if iterator.pointer == nil { if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.Wait method on NilIBioSequenceBatch") log.Panic("call of IBioSequenceBatch.Wait method on NilIBioSequenceBatch")
} }
@@ -118,7 +118,7 @@ func (iterator IBioSequenceBatch) Wait() {
iterator.pointer.all_done.Wait() iterator.pointer.all_done.Wait()
} }
func (iterator IBioSequenceBatch) Channel() chan BioSequenceBatch { func (iterator IBioSequence) Channel() chan BioSequenceBatch {
if iterator.pointer == nil { if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.Channel method on NilIBioSequenceBatch") log.Panic("call of IBioSequenceBatch.Channel method on NilIBioSequenceBatch")
} }
@@ -126,7 +126,7 @@ func (iterator IBioSequenceBatch) Channel() chan BioSequenceBatch {
return iterator.pointer.channel return iterator.pointer.channel
} }
func (iterator IBioSequenceBatch) IsNil() bool { func (iterator IBioSequence) IsNil() bool {
if iterator.pointer == nil { if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.IsNil method on NilIBioSequenceBatch") log.Panic("call of IBioSequenceBatch.IsNil method on NilIBioSequenceBatch")
} }
@@ -134,7 +134,7 @@ func (iterator IBioSequenceBatch) IsNil() bool {
return iterator.pointer == nil return iterator.pointer == nil
} }
func (iterator IBioSequenceBatch) BufferSize() int { func (iterator IBioSequence) BufferSize() int {
if iterator.pointer == nil { if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.BufferSize method on NilIBioSequenceBatch") log.Panic("call of IBioSequenceBatch.BufferSize method on NilIBioSequenceBatch")
} }
@@ -142,7 +142,7 @@ func (iterator IBioSequenceBatch) BufferSize() int {
return int(atomic.LoadInt32(&iterator.pointer.buffer_size)) return int(atomic.LoadInt32(&iterator.pointer.buffer_size))
} }
func (iterator IBioSequenceBatch) BatchSize() int { func (iterator IBioSequence) BatchSize() int {
if iterator.pointer == nil { if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.BatchSize method on NilIBioSequenceBatch") log.Panic("call of IBioSequenceBatch.BatchSize method on NilIBioSequenceBatch")
} }
@@ -150,7 +150,7 @@ func (iterator IBioSequenceBatch) BatchSize() int {
return int(atomic.LoadInt32(&iterator.pointer.batch_size)) return int(atomic.LoadInt32(&iterator.pointer.batch_size))
} }
func (iterator IBioSequenceBatch) SetBatchSize(size int) error { func (iterator IBioSequence) SetBatchSize(size int) error {
if size >= 0 { if size >= 0 {
atomic.StoreInt32(&iterator.pointer.batch_size, int32(size)) atomic.StoreInt32(&iterator.pointer.batch_size, int32(size))
return nil return nil
@@ -159,10 +159,10 @@ func (iterator IBioSequenceBatch) SetBatchSize(size int) error {
return fmt.Errorf("size (%d) cannot be negative", size) return fmt.Errorf("size (%d) cannot be negative", size)
} }
func (iterator IBioSequenceBatch) Split() IBioSequenceBatch { func (iterator IBioSequence) Split() IBioSequence {
iterator.pointer.lock.RLock() iterator.pointer.lock.RLock()
defer iterator.pointer.lock.RUnlock() defer iterator.pointer.lock.RUnlock()
i := _IBioSequenceBatch{ i := _IBioSequence{
channel: iterator.pointer.channel, channel: iterator.pointer.channel,
current: NilBioSequenceBatch, current: NilBioSequenceBatch,
pushBack: abool.New(), pushBack: abool.New(),
@@ -174,11 +174,11 @@ func (iterator IBioSequenceBatch) Split() IBioSequenceBatch {
lock := sync.RWMutex{} lock := sync.RWMutex{}
i.lock = &lock i.lock = &lock
newIter := IBioSequenceBatch{&i} newIter := IBioSequence{&i}
return newIter return newIter
} }
func (iterator IBioSequenceBatch) Next() bool { func (iterator IBioSequence) Next() bool {
if iterator.pointer.pushBack.IsSet() { if iterator.pointer.pushBack.IsSet() {
iterator.pointer.pushBack.UnSet() iterator.pointer.pushBack.UnSet()
return true return true
@@ -200,7 +200,7 @@ func (iterator IBioSequenceBatch) Next() bool {
return false return false
} }
func (iterator IBioSequenceBatch) PushBack() { func (iterator IBioSequence) PushBack() {
if !iterator.pointer.current.IsNil() { if !iterator.pointer.current.IsNil() {
iterator.pointer.pushBack.Set() iterator.pointer.pushBack.Set()
} }
@@ -210,11 +210,11 @@ func (iterator IBioSequenceBatch) PushBack() {
// currently pointed by the iterator. You have to use the // currently pointed by the iterator. You have to use the
// 'Next' method to move to the next entry before calling // 'Next' method to move to the next entry before calling
// 'Get' to retreive the following instance. // 'Get' to retreive the following instance.
func (iterator IBioSequenceBatch) Get() BioSequenceBatch { func (iterator IBioSequence) Get() BioSequenceBatch {
return iterator.pointer.current return iterator.pointer.current
} }
func (iterator IBioSequenceBatch) Push(batch BioSequenceBatch) { func (iterator IBioSequence) Push(batch BioSequenceBatch) {
if batch.IsNil() { if batch.IsNil() {
log.Panicln("A Nil batch is pushed on the channel") log.Panicln("A Nil batch is pushed on the channel")
} }
@@ -225,11 +225,11 @@ func (iterator IBioSequenceBatch) Push(batch BioSequenceBatch) {
iterator.pointer.channel <- batch iterator.pointer.channel <- batch
} }
func (iterator IBioSequenceBatch) Close() { func (iterator IBioSequence) Close() {
close(iterator.pointer.channel) close(iterator.pointer.channel)
} }
func (iterator IBioSequenceBatch) WaitAndClose() { func (iterator IBioSequence) WaitAndClose() {
iterator.Wait() iterator.Wait()
for len(iterator.Channel()) > 0 { for len(iterator.Channel()) > 0 {
@@ -240,18 +240,18 @@ func (iterator IBioSequenceBatch) WaitAndClose() {
// Finished returns 'true' value if no more data is available // Finished returns 'true' value if no more data is available
// from the iterator. // from the iterator.
func (iterator IBioSequenceBatch) Finished() bool { func (iterator IBioSequence) Finished() bool {
return iterator.pointer.finished.IsSet() return iterator.pointer.finished.IsSet()
} }
func (iterator IBioSequenceBatch) SortBatches(sizes ...int) IBioSequenceBatch { func (iterator IBioSequence) SortBatches(sizes ...int) IBioSequence {
buffsize := iterator.BufferSize() buffsize := iterator.BufferSize()
if len(sizes) > 0 { if len(sizes) > 0 {
buffsize = sizes[0] buffsize = sizes[0]
} }
newIter := MakeIBioSequenceBatch(buffsize) newIter := MakeIBioSequence(buffsize)
newIter.Add(1) newIter.Add(1)
@@ -288,14 +288,14 @@ func (iterator IBioSequenceBatch) SortBatches(sizes ...int) IBioSequenceBatch {
} }
func (iterator IBioSequenceBatch) Concat(iterators ...IBioSequenceBatch) IBioSequenceBatch { func (iterator IBioSequence) Concat(iterators ...IBioSequence) IBioSequence {
if len(iterators) == 0 { if len(iterators) == 0 {
return iterator return iterator
} }
buffsize := iterator.BufferSize() buffsize := iterator.BufferSize()
newIter := MakeIBioSequenceBatch(buffsize) newIter := MakeIBioSequence(buffsize)
newIter.Add(1) newIter.Add(1)
@@ -333,7 +333,7 @@ func (iterator IBioSequenceBatch) Concat(iterators ...IBioSequenceBatch) IBioSeq
return newIter return newIter
} }
func (iterator IBioSequenceBatch) Pool(iterators ...IBioSequenceBatch) IBioSequenceBatch { func (iterator IBioSequence) Pool(iterators ...IBioSequence) IBioSequence {
niterator := len(iterators) + 1 niterator := len(iterators) + 1
@@ -343,7 +343,7 @@ func (iterator IBioSequenceBatch) Pool(iterators ...IBioSequenceBatch) IBioSeque
nextCounter := goutils.AtomicCounter() nextCounter := goutils.AtomicCounter()
buffsize := iterator.BufferSize() buffsize := iterator.BufferSize()
newIter := MakeIBioSequenceBatch(buffsize) newIter := MakeIBioSequence(buffsize)
newIter.Add(niterator) newIter.Add(niterator)
@@ -351,7 +351,7 @@ func (iterator IBioSequenceBatch) Pool(iterators ...IBioSequenceBatch) IBioSeque
newIter.WaitAndClose() newIter.WaitAndClose()
}() }()
ff := func(iterator IBioSequenceBatch) { ff := func(iterator IBioSequence) {
for iterator.Next() { for iterator.Next() {
s := iterator.Get() s := iterator.Get()
@@ -372,14 +372,14 @@ func (iterator IBioSequenceBatch) Pool(iterators ...IBioSequenceBatch) IBioSeque
// IBioSequenceBatch with every batches having the same size // IBioSequenceBatch with every batches having the same size
// indicated in parameter. Rebatching implies to sort the // indicated in parameter. Rebatching implies to sort the
// source IBioSequenceBatch. // source IBioSequenceBatch.
func (iterator IBioSequenceBatch) Rebatch(size int, sizes ...int) IBioSequenceBatch { func (iterator IBioSequence) Rebatch(size int, sizes ...int) IBioSequence {
buffsize := iterator.BufferSize() buffsize := iterator.BufferSize()
if len(sizes) > 0 { if len(sizes) > 0 {
buffsize = sizes[0] buffsize = sizes[0]
} }
newIter := MakeIBioSequenceBatch(buffsize) newIter := MakeIBioSequence(buffsize)
newIter.Add(1) newIter.Add(1)
@@ -418,7 +418,7 @@ func (iterator IBioSequenceBatch) Rebatch(size int, sizes ...int) IBioSequenceBa
return newIter return newIter
} }
func (iterator IBioSequenceBatch) Recycle() { func (iterator IBioSequence) Recycle() {
log.Debugln("Start recycling of Bioseq objects") log.Debugln("Start recycling of Bioseq objects")
recycled := 0 recycled := 0
@@ -434,14 +434,14 @@ func (iterator IBioSequenceBatch) Recycle() {
log.Debugf("End of the recycling of %d Bioseq objects", recycled) log.Debugf("End of the recycling of %d Bioseq objects", recycled)
} }
func (iterator IBioSequenceBatch) Consume() { func (iterator IBioSequence) Consume() {
for iterator.Next() { for iterator.Next() {
batch := iterator.Get() batch := iterator.Get()
batch.Recycle() batch.Recycle()
} }
} }
func (iterator IBioSequenceBatch) Count(recycle bool) (int, int, int) { func (iterator IBioSequence) Count(recycle bool) (int, int, int) {
variants := 0 variants := 0
reads := 0 reads := 0
nucleotides := 0 nucleotides := 0
@@ -465,7 +465,7 @@ func (iterator IBioSequenceBatch) Count(recycle bool) (int, int, int) {
return variants, reads, nucleotides return variants, reads, nucleotides
} }
func (iterator IBioSequenceBatch) PairWith(reverse IBioSequenceBatch, func (iterator IBioSequence) PairWith(reverse IBioSequence,
sizes ...int) IPairedBioSequenceBatch { sizes ...int) IPairedBioSequenceBatch {
buffsize := iterator.BufferSize() buffsize := iterator.BufferSize()
batchsize := 5000 batchsize := 5000
@@ -510,16 +510,16 @@ func (iterator IBioSequenceBatch) PairWith(reverse IBioSequenceBatch,
// A function that takes a predicate and returns two IBioSequenceBatch iterators. // A function that takes a predicate and returns two IBioSequenceBatch iterators.
// Sequences extracted from the input iterator are distributed among both the // Sequences extracted from the input iterator are distributed among both the
// iterator following the predicate value. // iterator following the predicate value.
func (iterator IBioSequenceBatch) DivideOn(predicate obiseq.SequencePredicate, func (iterator IBioSequence) DivideOn(predicate obiseq.SequencePredicate,
size int, sizes ...int) (IBioSequenceBatch, IBioSequenceBatch) { size int, sizes ...int) (IBioSequence, IBioSequence) {
buffsize := iterator.BufferSize() buffsize := iterator.BufferSize()
if len(sizes) > 0 { if len(sizes) > 0 {
buffsize = sizes[0] buffsize = sizes[0]
} }
trueIter := MakeIBioSequenceBatch(buffsize) trueIter := MakeIBioSequence(buffsize)
falseIter := MakeIBioSequenceBatch(buffsize) falseIter := MakeIBioSequence(buffsize)
trueIter.Add(1) trueIter.Add(1)
falseIter.Add(1) falseIter.Add(1)
@@ -578,8 +578,8 @@ func (iterator IBioSequenceBatch) DivideOn(predicate obiseq.SequencePredicate,
// Filtering a batch of sequences. // Filtering a batch of sequences.
// A function that takes a predicate and a batch of sequences and returns a filtered batch of sequences. // A function that takes a predicate and a batch of sequences and returns a filtered batch of sequences.
func (iterator IBioSequenceBatch) FilterOn(predicate obiseq.SequencePredicate, func (iterator IBioSequence) FilterOn(predicate obiseq.SequencePredicate,
size int, sizes ...int) IBioSequenceBatch { size int, sizes ...int) IBioSequence {
buffsize := iterator.BufferSize() buffsize := iterator.BufferSize()
nworkers := 4 nworkers := 4
@@ -591,7 +591,7 @@ func (iterator IBioSequenceBatch) FilterOn(predicate obiseq.SequencePredicate,
buffsize = sizes[1] buffsize = sizes[1]
} }
trueIter := MakeIBioSequenceBatch(buffsize) trueIter := MakeIBioSequence(buffsize)
trueIter.Add(nworkers) trueIter.Add(nworkers)
@@ -599,7 +599,7 @@ func (iterator IBioSequenceBatch) FilterOn(predicate obiseq.SequencePredicate,
trueIter.WaitAndClose() trueIter.WaitAndClose()
}() }()
ff := func(iterator IBioSequenceBatch) { ff := func(iterator IBioSequence) {
// iterator = iterator.SortBatches() // iterator = iterator.SortBatches()
for iterator.Next() { for iterator.Next() {
@@ -633,7 +633,7 @@ func (iterator IBioSequenceBatch) FilterOn(predicate obiseq.SequencePredicate,
// Load all sequences availables from an IBioSequenceBatch iterator into // Load all sequences availables from an IBioSequenceBatch iterator into
// a large obiseq.BioSequenceSlice. // a large obiseq.BioSequenceSlice.
func (iterator IBioSequenceBatch) Load() obiseq.BioSequenceSlice { func (iterator IBioSequence) Load() obiseq.BioSequenceSlice {
chunck := obiseq.MakeBioSequenceSlice() chunck := obiseq.MakeBioSequenceSlice()
for iterator.Next() { for iterator.Next() {
@@ -648,7 +648,7 @@ func (iterator IBioSequenceBatch) Load() obiseq.BioSequenceSlice {
// It takes a slice of BioSequence objects, and returns an iterator that will return batches of // It takes a slice of BioSequence objects, and returns an iterator that will return batches of
// BioSequence objects // BioSequence objects
func IBatchOver(data obiseq.BioSequenceSlice, func IBatchOver(data obiseq.BioSequenceSlice,
size int, sizes ...int) IBioSequenceBatch { size int, sizes ...int) IBioSequence {
buffsize := 0 buffsize := 0
@@ -656,7 +656,7 @@ func IBatchOver(data obiseq.BioSequenceSlice,
buffsize = sizes[1] buffsize = sizes[1]
} }
newIter := MakeIBioSequenceBatch(buffsize) newIter := MakeIBioSequence(buffsize)
newIter.Add(1) newIter.Add(1)
+6 -6
View File
@@ -8,19 +8,19 @@ import (
) )
type IDistribute struct { type IDistribute struct {
outputs map[int]IBioSequenceBatch outputs map[int]IBioSequence
news chan int news chan int
classifier *obiseq.BioSequenceClassifier classifier *obiseq.BioSequenceClassifier
lock *sync.Mutex lock *sync.Mutex
} }
func (dist *IDistribute) Outputs(key int) (IBioSequenceBatch, error) { func (dist *IDistribute) Outputs(key int) (IBioSequence, error) {
dist.lock.Lock() dist.lock.Lock()
iter, ok := dist.outputs[key] iter, ok := dist.outputs[key]
dist.lock.Unlock() dist.lock.Unlock()
if !ok { if !ok {
return NilIBioSequenceBatch, fmt.Errorf("code %d unknown", key) return NilIBioSequence, fmt.Errorf("code %d unknown", key)
} }
return iter, nil return iter, nil
@@ -34,11 +34,11 @@ func (dist *IDistribute) Classifier() *obiseq.BioSequenceClassifier {
return dist.classifier return dist.classifier
} }
func (iterator IBioSequenceBatch) Distribute(class *obiseq.BioSequenceClassifier, sizes ...int) IDistribute { func (iterator IBioSequence) Distribute(class *obiseq.BioSequenceClassifier, sizes ...int) IDistribute {
batchsize := 5000 batchsize := 5000
buffsize := 2 buffsize := 2
outputs := make(map[int]IBioSequenceBatch, 100) outputs := make(map[int]IBioSequence, 100)
slices := make(map[int]*obiseq.BioSequenceSlice, 100) slices := make(map[int]*obiseq.BioSequenceSlice, 100)
orders := make(map[int]int, 100) orders := make(map[int]int, 100)
news := make(chan int) news := make(chan int)
@@ -80,7 +80,7 @@ func (iterator IBioSequenceBatch) Distribute(class *obiseq.BioSequenceClassifier
orders[key] = 0 orders[key] = 0
lock.Lock() lock.Lock()
outputs[key] = MakeIBioSequenceBatch(buffsize) outputs[key] = MakeIBioSequence(buffsize)
lock.Unlock() lock.Unlock()
news <- key news <- key
+3 -3
View File
@@ -2,7 +2,7 @@ package obiiter
import "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" import "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
func (iterator IBioSequenceBatch) IMergeSequenceBatch(na string, statsOn []string, sizes ...int) IBioSequenceBatch { func (iterator IBioSequence) IMergeSequenceBatch(na string, statsOn []string, sizes ...int) IBioSequence {
batchsize := 100 batchsize := 100
buffsize := iterator.BufferSize() buffsize := iterator.BufferSize()
@@ -13,7 +13,7 @@ func (iterator IBioSequenceBatch) IMergeSequenceBatch(na string, statsOn []strin
buffsize = sizes[1] buffsize = sizes[1]
} }
newIter := MakeIBioSequenceBatch(buffsize) newIter := MakeIBioSequence(buffsize)
newIter.Add(1) newIter.Add(1)
@@ -41,7 +41,7 @@ func (iterator IBioSequenceBatch) IMergeSequenceBatch(na string, statsOn []strin
} }
func MergePipe(na string, statsOn []string, sizes ...int) Pipeable { func MergePipe(na string, statsOn []string, sizes ...int) Pipeable {
f := func(iterator IBioSequenceBatch) IBioSequenceBatch { f := func(iterator IBioSequence) IBioSequence {
return iterator.IMergeSequenceBatch(na, statsOn, sizes...) return iterator.IMergeSequenceBatch(na, statsOn, sizes...)
} }
+12 -14
View File
@@ -1,12 +1,11 @@
package obiiter package obiiter
type Pipeable func(input IBioSequence) IBioSequence
type Pipeable func(input IBioSequenceBatch) IBioSequenceBatch func Pipeline(start Pipeable, parts ...Pipeable) Pipeable {
p := func(input IBioSequence) IBioSequence {
func Pipeline(start Pipeable,parts ...Pipeable) Pipeable {
p := func (input IBioSequenceBatch) IBioSequenceBatch {
data := start(input) data := start(input)
for _,part := range parts { for _, part := range parts {
data = part(data) data = part(data)
} }
return data return data
@@ -15,17 +14,16 @@ func Pipeline(start Pipeable,parts ...Pipeable) Pipeable {
return p return p
} }
func (input IBioSequenceBatch) Pipe(start Pipeable, parts ...Pipeable) IBioSequenceBatch { func (input IBioSequence) Pipe(start Pipeable, parts ...Pipeable) IBioSequence {
p := Pipeline(start,parts...) p := Pipeline(start, parts...)
return p(input) return p(input)
} }
type Teeable func(input IBioSequence) (IBioSequence, IBioSequence)
type Teeable func(input IBioSequenceBatch) (IBioSequenceBatch,IBioSequenceBatch) func (input IBioSequence) CopyTee() (IBioSequence, IBioSequence) {
first := MakeIBioSequence()
func (input IBioSequenceBatch) CopyTee() (IBioSequenceBatch,IBioSequenceBatch) { second := MakeIBioSequence()
first := MakeIBioSequenceBatch()
second:= MakeIBioSequenceBatch()
first.Add(1) first.Add(1)
@@ -36,11 +34,11 @@ func (input IBioSequenceBatch) CopyTee() (IBioSequenceBatch,IBioSequenceBatch) {
go func() { go func() {
for input.Next() { for input.Next() {
b:=input.Get() b := input.Get()
first.Push(b) first.Push(b)
second.Push(b) second.Push(b)
} }
}() }()
return first,second return first, second
} }
+3 -3
View File
@@ -6,8 +6,8 @@ import (
"github.com/schollz/progressbar/v3" "github.com/schollz/progressbar/v3"
) )
func (iterator IBioSequenceBatch) Speed(message ...string) IBioSequenceBatch { func (iterator IBioSequence) Speed(message ...string) IBioSequence {
newIter := MakeIBioSequenceBatch() newIter := MakeIBioSequence()
newIter.Add(1) newIter.Add(1)
@@ -51,7 +51,7 @@ func (iterator IBioSequenceBatch) Speed(message ...string) IBioSequenceBatch {
} }
func SpeedPipe(message ...string) Pipeable { func SpeedPipe(message ...string) Pipeable {
f := func(iterator IBioSequenceBatch) IBioSequenceBatch { f := func(iterator IBioSequence) IBioSequence {
return iterator.Speed(message...) return iterator.Speed(message...)
} }
+12 -12
View File
@@ -27,7 +27,7 @@ func AnnotatorToSeqWorker(function SeqAnnotator) SeqWorker {
// Moreover the SeqWorker function, the method accepted two optional integer parameters. // Moreover the SeqWorker function, the method accepted two optional integer parameters.
// - First is allowing to indicates the number of workers running in parallele (default 4) // - First is allowing to indicates the number of workers running in parallele (default 4)
// - The second the size of the chanel buffer. By default set to the same value than the input buffer. // - The second the size of the chanel buffer. By default set to the same value than the input buffer.
func (iterator IBioSequenceBatch) MakeIWorker(worker SeqWorker, sizes ...int) IBioSequenceBatch { func (iterator IBioSequence) MakeIWorker(worker SeqWorker, sizes ...int) IBioSequence {
nworkers := 4 nworkers := 4
buffsize := iterator.BufferSize() buffsize := iterator.BufferSize()
@@ -39,7 +39,7 @@ func (iterator IBioSequenceBatch) MakeIWorker(worker SeqWorker, sizes ...int) IB
buffsize = sizes[1] buffsize = sizes[1]
} }
newIter := MakeIBioSequenceBatch(buffsize) newIter := MakeIBioSequence(buffsize)
newIter.Add(nworkers) newIter.Add(nworkers)
@@ -49,7 +49,7 @@ func (iterator IBioSequenceBatch) MakeIWorker(worker SeqWorker, sizes ...int) IB
}() }()
f := func(iterator IBioSequenceBatch) { f := func(iterator IBioSequence) {
for iterator.Next() { for iterator.Next() {
batch := iterator.Get() batch := iterator.Get()
for i, seq := range batch.slice { for i, seq := range batch.slice {
@@ -69,8 +69,8 @@ func (iterator IBioSequenceBatch) MakeIWorker(worker SeqWorker, sizes ...int) IB
return newIter return newIter
} }
func (iterator IBioSequenceBatch) MakeIConditionalWorker(predicate obiseq.SequencePredicate, func (iterator IBioSequence) MakeIConditionalWorker(predicate obiseq.SequencePredicate,
worker SeqWorker, sizes ...int) IBioSequenceBatch { worker SeqWorker, sizes ...int) IBioSequence {
nworkers := 4 nworkers := 4
buffsize := iterator.BufferSize() buffsize := iterator.BufferSize()
@@ -82,7 +82,7 @@ func (iterator IBioSequenceBatch) MakeIConditionalWorker(predicate obiseq.Sequen
buffsize = sizes[1] buffsize = sizes[1]
} }
newIter := MakeIBioSequenceBatch(buffsize) newIter := MakeIBioSequence(buffsize)
newIter.Add(nworkers) newIter.Add(nworkers)
@@ -92,7 +92,7 @@ func (iterator IBioSequenceBatch) MakeIConditionalWorker(predicate obiseq.Sequen
}() }()
f := func(iterator IBioSequenceBatch) { f := func(iterator IBioSequence) {
for iterator.Next() { for iterator.Next() {
batch := iterator.Get() batch := iterator.Get()
for i, seq := range batch.slice { for i, seq := range batch.slice {
@@ -114,7 +114,7 @@ func (iterator IBioSequenceBatch) MakeIConditionalWorker(predicate obiseq.Sequen
return newIter return newIter
} }
func (iterator IBioSequenceBatch) MakeISliceWorker(worker SeqSliceWorker, sizes ...int) IBioSequenceBatch { func (iterator IBioSequence) MakeISliceWorker(worker SeqSliceWorker, sizes ...int) IBioSequence {
nworkers := 4 nworkers := 4
buffsize := iterator.BufferSize() buffsize := iterator.BufferSize()
@@ -126,7 +126,7 @@ func (iterator IBioSequenceBatch) MakeISliceWorker(worker SeqSliceWorker, sizes
buffsize = sizes[1] buffsize = sizes[1]
} }
newIter := MakeIBioSequenceBatch(buffsize) newIter := MakeIBioSequence(buffsize)
newIter.Add(nworkers) newIter.Add(nworkers)
@@ -135,7 +135,7 @@ func (iterator IBioSequenceBatch) MakeISliceWorker(worker SeqSliceWorker, sizes
log.Println("End of the batch slice workers") log.Println("End of the batch slice workers")
}() }()
f := func(iterator IBioSequenceBatch) { f := func(iterator IBioSequence) {
for iterator.Next() { for iterator.Next() {
batch := iterator.Get() batch := iterator.Get()
batch.slice = worker(batch.slice) batch.slice = worker(batch.slice)
@@ -154,7 +154,7 @@ func (iterator IBioSequenceBatch) MakeISliceWorker(worker SeqSliceWorker, sizes
} }
func WorkerPipe(worker SeqWorker, sizes ...int) Pipeable { func WorkerPipe(worker SeqWorker, sizes ...int) Pipeable {
f := func(iterator IBioSequenceBatch) IBioSequenceBatch { f := func(iterator IBioSequence) IBioSequence {
return iterator.MakeIWorker(worker, sizes...) return iterator.MakeIWorker(worker, sizes...)
} }
@@ -162,7 +162,7 @@ func WorkerPipe(worker SeqWorker, sizes ...int) Pipeable {
} }
func SliceWorkerPipe(worker SeqSliceWorker, sizes ...int) Pipeable { func SliceWorkerPipe(worker SeqSliceWorker, sizes ...int) Pipeable {
f := func(iterator IBioSequenceBatch) IBioSequenceBatch { f := func(iterator IBioSequence) IBioSequence {
return iterator.MakeISliceWorker(worker, sizes...) return iterator.MakeISliceWorker(worker, sizes...)
} }
+193
View File
@@ -0,0 +1,193 @@
package obiseq
import (
"fmt"
"strconv"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
log "github.com/sirupsen/logrus"
)
// A method that returns the value of the key in the annotation map.
func (s *BioSequence) GetAttribute(key string) (interface{}, bool) {
var val interface{}
ok := s.annotations != nil
if ok {
val, ok = s.annotations[key]
}
return val, ok
}
// A method that sets the value of the key in the annotation map.
func (s *BioSequence) SetAttribute(key string, value interface{}) {
annot := s.Annotations()
annot[key] = value
}
// A method that returns the value of the key in the annotation map.
func (s *BioSequence) GetIntAttribute(key string) (int, bool) {
var val int
var err error
v, ok := s.GetAttribute(key)
if ok {
val, err = goutils.InterfaceToInt(v)
ok = err == nil
}
return val, ok
}
// Deleting the key from the annotation map.
func (s *BioSequence) DeleteAttribute(key string) {
delete(s.Annotations(),key)
}
// Renaming the key in the annotation map.
func (s *BioSequence) RenameAttribute(newName, oldName string) {
val,ok := s.GetAttribute(oldName)
if (ok) {
s.SetAttribute(newName,val)
s.DeleteAttribute(oldName)
}
}
// A method that returns the value of the key in the annotation map.
func (s *BioSequence) GetNumericAttribute(key string) (float64, bool) {
var val float64
var err error
v, ok := s.GetAttribute(key)
if ok {
val, err = goutils.InterfaceToFloat64(v)
ok = err == nil
}
return val, ok
}
// A method that returns the value of the key in the annotation map.
func (s *BioSequence) GetStringAttribute(key string) (string, bool) {
var val string
v, ok := s.GetAttribute(key)
if ok {
val = fmt.Sprint(v)
}
return val, ok
}
// A method that returns the value of the key in the annotation map.
func (s *BioSequence) GetBoolAttribute(key string) (bool, bool) {
var val bool
var err error
v, ok := s.GetAttribute(key)
if ok {
val, err = goutils.InterfaceToBool(v)
ok = err == nil
}
return val, ok
}
func (s *BioSequence) GetIntMap(key string) (map[string]int, bool) {
var val map[string]int
var err error
v, ok := s.GetAttribute(key)
if ok {
val, err = goutils.InterfaceToIntMap(v)
ok = err == nil
}
return val, ok
}
// Returning the number of times the sequence has been observed.
func (s *BioSequence) Count() int {
count, ok := s.GetIntAttribute("count")
if !ok {
count = 1
}
return count
}
// Setting the number of times the sequence has been observed.
func (s *BioSequence) SetCount(count int) {
annot := s.Annotations()
annot["count"] = count
}
// Returning the taxid of the sequence.
func (s *BioSequence) Taxid() int {
taxid, ok := s.GetIntAttribute("taxid")
if !ok {
taxid = 1
}
return taxid
}
// Setting the taxid of the sequence.
func (s *BioSequence) SetTaxid(taxid int) {
annot := s.Annotations()
annot["taxid"] = taxid
}
func (s *BioSequence) OBITagRefIndex() map[int]string {
var val map[int]string
i, ok := s.GetAttribute("obitag_ref_index")
if !ok {
return nil
}
switch i := i.(type) {
case map[int]string:
val = i
case map[string]interface{}:
val = make(map[int]string, len(i))
for k, v := range i {
score, err := strconv.Atoi(k)
if err != nil {
log.Panicln(err)
}
val[score], err = goutils.InterfaceToString(v)
if err != nil {
log.Panicln(err)
}
}
case map[string]string:
val = make(map[int]string, len(i))
for k, v := range i {
score, err := strconv.Atoi(k)
if err != nil {
log.Panicln(err)
}
val[score] = v
}
default:
log.Panicln("value of attribute obitag_ref_index cannot be casted to a map[int]string")
}
return val
}
+3 -149
View File
@@ -13,20 +13,17 @@ package obiseq
import ( import (
"bytes" "bytes"
"crypto/md5" "crypto/md5"
"fmt"
"strconv"
"sync/atomic" "sync/atomic"
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
) )
var _NewSeq = int32(0) var _NewSeq = int32(0)
var _RecycleSeq = int32(0) var _RecycleSeq = int32(0)
var _InMemSeq = int32(0) var _InMemSeq = int32(0)
var _MaxInMemSeq = int32(0)
var _BioLogRate = int(100000) // var _MaxInMemSeq = int32(0)
// var _BioLogRate = int(100000)
func LogBioSeqStatus() { func LogBioSeqStatus() {
log.Debugf("Created seq : %d Destroyed : %d In Memory : %d", _NewSeq, _RecycleSeq, _InMemSeq) log.Debugf("Created seq : %d Destroyed : %d In Memory : %d", _NewSeq, _RecycleSeq, _InMemSeq)
@@ -200,154 +197,11 @@ func (s *BioSequence) Annotations() Annotation {
return s.annotations return s.annotations
} }
// A method that returns the value of the key in the annotation map.
func (s *BioSequence) GetAttribute(key string) (interface{}, bool) {
var val interface{}
ok := s.annotations != nil
if ok {
val, ok = s.annotations[key]
}
return val, ok
}
func (s *BioSequence) SetAttribute(key string, value interface{}) {
annot := s.Annotations()
annot[key] = value
}
// A method that returns the value of the key in the annotation map.
func (s *BioSequence) GetIntAttribute(key string) (int, bool) {
var val int
var err error
v, ok := s.GetAttribute(key)
if ok {
val, err = goutils.InterfaceToInt(v)
ok = err == nil
}
return val, ok
}
// A method that returns the value of the key in the annotation map.
func (s *BioSequence) GetStringAttribute(key string) (string, bool) {
var val string
v, ok := s.GetAttribute(key)
if ok {
val = fmt.Sprint(v)
}
return val, ok
}
// A method that returns the value of the key in the annotation map.
func (s *BioSequence) GetBool(key string) (bool, bool) {
var val bool
var err error
v, ok := s.GetAttribute(key)
if ok {
val, err = goutils.InterfaceToBool(v)
ok = err == nil
}
return val, ok
}
func (s *BioSequence) GetIntMap(key string) (map[string]int, bool) {
var val map[string]int
var err error
v, ok := s.GetAttribute(key)
if ok {
val, err = goutils.InterfaceToIntMap(v)
ok = err == nil
}
return val, ok
}
// Returning the MD5 hash of the sequence. // Returning the MD5 hash of the sequence.
func (s *BioSequence) MD5() [16]byte { func (s *BioSequence) MD5() [16]byte {
return md5.Sum(s.sequence) return md5.Sum(s.sequence)
} }
// Returning the number of times the sequence has been observed.
func (s *BioSequence) Count() int {
count, ok := s.GetIntAttribute("count")
if !ok {
count = 1
}
return count
}
// Returning the taxid of the sequence.
func (s *BioSequence) Taxid() int {
taxid, ok := s.GetIntAttribute("taxid")
if !ok {
taxid = 1
}
return taxid
}
func (s *BioSequence) OBITagRefIndex() map[int]string {
var val map[int]string
i, ok := s.GetAttribute("obitag_ref_index")
if !ok {
return nil
}
switch i := i.(type) {
case map[int]string:
val = i
case map[string]interface{}:
val = make(map[int]string, len(i))
for k, v := range i {
score, err := strconv.Atoi(k)
if err != nil {
log.Panicln(err)
}
val[score], err = goutils.InterfaceToString(v)
if err != nil {
log.Panicln(err)
}
}
case map[string]string:
val = make(map[int]string, len(i))
for k, v := range i {
score, err := strconv.Atoi(k)
if err != nil {
log.Panicln(err)
}
val[score] = v
}
default:
log.Panicln("value of attribute obitag_ref_index cannot be casted to a map[int]string")
}
return val
}
func (s *BioSequence) SetTaxid(taxid int) {
annot := s.Annotations()
annot["taxid"] = taxid
}
// Setting the id of the BioSequence. // Setting the id of the BioSequence.
func (s *BioSequence) SetId(id string) { func (s *BioSequence) SetId(id string) {
s.id = id s.id = id
+2
View File
@@ -0,0 +1,2 @@
package obiannotate
+4 -7
View File
@@ -19,7 +19,7 @@ type seqPCR struct {
SonCount int SonCount int
AddedSons int AddedSons int
Edges []Edge Edges []Edge
Cluster map[int]bool // used as the set of head sequences associated to that sequence Cluster map[int]bool // used as the set of head sequences associated to that sequence
} }
// buildSamples sorts the sequences by samples // buildSamples sorts the sequences by samples
@@ -58,7 +58,7 @@ func buildSamples(dataset obiseq.BioSequenceSlice,
func annotateOBIClean(dataset obiseq.BioSequenceSlice, func annotateOBIClean(dataset obiseq.BioSequenceSlice,
sample map[string]*([]*seqPCR), sample map[string]*([]*seqPCR),
tag, NAValue string) obiiter.IBioSequenceBatch { tag, NAValue string) obiiter.IBioSequence {
batchsize := 1000 batchsize := 1000
var annot = func(data obiseq.BioSequenceSlice) obiseq.BioSequenceSlice { var annot = func(data obiseq.BioSequenceSlice) obiseq.BioSequenceSlice {
@@ -207,7 +207,6 @@ func GetCluster(sequence *obiseq.BioSequence) map[string]string {
return cluster return cluster
} }
// func Cluster(sample map[string]*([]*seqPCR)) { // func Cluster(sample map[string]*([]*seqPCR)) {
// for _, graph := range sample { // for _, graph := range sample {
// for _, s := range *graph { // for _, s := range *graph {
@@ -215,7 +214,7 @@ func GetCluster(sequence *obiseq.BioSequence) map[string]string {
// if len(s.Edges) > 0 { // if len(s.Edges) > 0 {
// for _, f := range s.Edges { // for _, f := range s.Edges {
// } // }
// } else { // } else {
// cluster // cluster
// } // }
@@ -286,7 +285,7 @@ func Weight(sequence *obiseq.BioSequence) map[string]int {
return weight return weight
} }
func IOBIClean(itertator obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch { func IOBIClean(itertator obiiter.IBioSequence) obiiter.IBioSequence {
db := itertator.Load() db := itertator.Load()
@@ -318,7 +317,6 @@ func IOBIClean(itertator obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch {
} }
} }
Mutation(samples) Mutation(samples)
pbopt := make([]progressbar.Option, 0, 5) pbopt := make([]progressbar.Option, 0, 5)
@@ -352,7 +350,6 @@ func IOBIClean(itertator obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch {
EmpiricalDistCsv(RatioTableFilename(), all_ratio) EmpiricalDistCsv(RatioTableFilename(), all_ratio)
} }
iter := annotateOBIClean(db, samples, SampleAttribute(), "NA") iter := annotateOBIClean(db, samples, SampleAttribute(), "NA")
if OnlyHead() { if OnlyHead() {
+5 -5
View File
@@ -67,9 +67,9 @@ func _ExpandListOfFiles(check_ext bool, filenames ...string) ([]string, error) {
return list_of_files, nil return list_of_files, nil
} }
func ReadBioSequences(filenames ...string) (obiiter.IBioSequenceBatch, error) { func ReadBioSequences(filenames ...string) (obiiter.IBioSequence, error) {
var iterator obiiter.IBioSequenceBatch var iterator obiiter.IBioSequence
var reader func(string, ...obiformats.WithOption) (obiiter.IBioSequenceBatch, error) var reader func(string, ...obiformats.WithOption) (obiiter.IBioSequence, error)
opts := make([]obiformats.WithOption, 0, 10) opts := make([]obiformats.WithOption, 0, 10)
@@ -109,7 +109,7 @@ func ReadBioSequences(filenames ...string) (obiiter.IBioSequenceBatch, error) {
list_of_files, err := _ExpandListOfFiles(false, filenames...) list_of_files, err := _ExpandListOfFiles(false, filenames...)
if err != nil { if err != nil {
return obiiter.NilIBioSequenceBatch, err return obiiter.NilIBioSequence, err
} }
switch CLIInputFormat() { switch CLIInputFormat() {
@@ -140,7 +140,7 @@ func ReadBioSequences(filenames ...string) (obiiter.IBioSequenceBatch, error) {
iterator, err = reader(list_of_files[0], opts...) iterator, err = reader(list_of_files[0], opts...)
if err != nil { if err != nil {
return obiiter.NilIBioSequenceBatch, err return obiiter.NilIBioSequence, err
} }
} }
+5 -5
View File
@@ -8,10 +8,10 @@ import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
) )
func WriteBioSequences(iterator obiiter.IBioSequenceBatch, func WriteBioSequences(iterator obiiter.IBioSequence,
terminalAction bool, filenames ...string) (obiiter.IBioSequenceBatch, error) { terminalAction bool, filenames ...string) (obiiter.IBioSequence, error) {
var newIter obiiter.IBioSequenceBatch var newIter obiiter.IBioSequence
opts := make([]obiformats.WithOption, 0, 10) opts := make([]obiformats.WithOption, 0, 10)
@@ -62,12 +62,12 @@ func WriteBioSequences(iterator obiiter.IBioSequenceBatch,
if err != nil { if err != nil {
log.Fatalf("Write file error: %v", err) log.Fatalf("Write file error: %v", err)
return obiiter.NilIBioSequenceBatch, err return obiiter.NilIBioSequence, err
} }
if terminalAction { if terminalAction {
newIter.Recycle() newIter.Recycle()
return obiiter.NilIBioSequenceBatch, nil return obiiter.NilIBioSequence, nil
} }
return newIter, nil return newIter, nil
+1 -1
View File
@@ -9,7 +9,7 @@ import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
) )
func DistributeSequence(sequences obiiter.IBioSequenceBatch) { func DistributeSequence(sequences obiiter.IBioSequence) {
opts := make([]obiformats.WithOption, 0, 10) opts := make([]obiformats.WithOption, 0, 10)
+3 -3
View File
@@ -8,14 +8,14 @@ import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
) )
func IFilterSequence(iterator obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch { func IFilterSequence(iterator obiiter.IBioSequence) obiiter.IBioSequence {
var newIter obiiter.IBioSequenceBatch var newIter obiiter.IBioSequence
predicate := CLISequenceSelectionPredicate() predicate := CLISequenceSelectionPredicate()
if predicate != nil { if predicate != nil {
if CLISaveDiscardedSequences() { if CLISaveDiscardedSequences() {
var discarded obiiter.IBioSequenceBatch var discarded obiiter.IBioSequence
log.Printf("Discarded sequences saved in file: %s\n", CLIDiscardedFileName()) log.Printf("Discarded sequences saved in file: %s\n", CLIDiscardedFileName())
newIter, discarded = iterator.DivideOn(predicate, newIter, discarded = iterator.DivideOn(predicate,
+2 -2
View File
@@ -10,7 +10,7 @@ import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
) )
func IExtractBarcode(iterator obiiter.IBioSequenceBatch) (obiiter.IBioSequenceBatch, error) { func IExtractBarcode(iterator obiiter.IBioSequence) (obiiter.IBioSequence, error) {
opts := make([]obingslibrary.WithOption, 0, 10) opts := make([]obingslibrary.WithOption, 0, 10)
@@ -37,7 +37,7 @@ func IExtractBarcode(iterator obiiter.IBioSequenceBatch) (obiiter.IBioSequenceBa
newIter = newIter.Rebatch(obioptions.CLIBatchSize()) newIter = newIter.Rebatch(obioptions.CLIBatchSize())
} }
var unidentified obiiter.IBioSequenceBatch var unidentified obiiter.IBioSequence
if CLIUnidentifiedFileName() != "" { if CLIUnidentifiedFileName() != "" {
log.Printf("Unassigned sequences saved in file: %s\n", CLIUnidentifiedFileName()) log.Printf("Unassigned sequences saved in file: %s\n", CLIUnidentifiedFileName())
unidentified, newIter = newIter.DivideOn(obiseq.HasAttribute("demultiplex_error"), unidentified, newIter = newIter.DivideOn(obiseq.HasAttribute("demultiplex_error"),
+2 -2
View File
@@ -206,7 +206,7 @@ func AssemblePESequences(seqA, seqB *obiseq.BioSequence,
func IAssemblePESequencesBatch(iterator obiiter.IPairedBioSequenceBatch, func IAssemblePESequencesBatch(iterator obiiter.IPairedBioSequenceBatch,
gap float64, delta, minOverlap int, gap float64, delta, minOverlap int,
minIdentity float64, minIdentity float64,
withStats bool, sizes ...int) obiiter.IBioSequenceBatch { withStats bool, sizes ...int) obiiter.IBioSequence {
nworkers := runtime.NumCPU() * 3 / 2 nworkers := runtime.NumCPU() * 3 / 2
buffsize := iterator.BufferSize() buffsize := iterator.BufferSize()
@@ -219,7 +219,7 @@ func IAssemblePESequencesBatch(iterator obiiter.IPairedBioSequenceBatch,
buffsize = sizes[1] buffsize = sizes[1]
} }
newIter := obiiter.MakeIBioSequenceBatch(buffsize) newIter := obiiter.MakeIBioSequence(buffsize)
newIter.Add(nworkers) newIter.Add(nworkers)
+1 -1
View File
@@ -9,7 +9,7 @@ import (
// PCR iterates over sequences provided by a obiseq.IBioSequenceBatch // PCR iterates over sequences provided by a obiseq.IBioSequenceBatch
// and returns an other obiseq.IBioSequenceBatch distributing // and returns an other obiseq.IBioSequenceBatch distributing
// obiseq.BioSequenceBatch containing the selected amplicon sequences. // obiseq.BioSequenceBatch containing the selected amplicon sequences.
func PCR(iterator obiiter.IBioSequenceBatch) (obiiter.IBioSequenceBatch, error) { func PCR(iterator obiiter.IBioSequence) (obiiter.IBioSequence, error) {
opts := make([]obiapat.WithOption, 0, 10) opts := make([]obiapat.WithOption, 0, 10)
+3 -3
View File
@@ -32,7 +32,7 @@ func IndexSequence(seqidx int,
// r := 0 // r := 0
// w := 0 // w := 0
for i, ref := range references { for i, ref := range references {
lcs, alilength := obialign.FastLCSScore(sequence, ref, -1 , &matrix) lcs, alilength := obialign.FastLCSScore(sequence, ref, -1, &matrix)
score[i] = alilength - lcs score[i] = alilength - lcs
} }
@@ -88,7 +88,7 @@ func IndexSequence(seqidx int,
return obitag_index return obitag_index
} }
func IndexReferenceDB(iterator obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch { func IndexReferenceDB(iterator obiiter.IBioSequence) obiiter.IBioSequence {
references := iterator.Load() references := iterator.Load()
refcounts := make( refcounts := make(
@@ -118,7 +118,7 @@ func IndexReferenceDB(iterator obiiter.IBioSequenceBatch) obiiter.IBioSequenceBa
bar := progressbar.NewOptions(len(references), pbopt...) bar := progressbar.NewOptions(len(references), pbopt...)
limits := make(chan [2]int) limits := make(chan [2]int)
indexed := obiiter.MakeIBioSequenceBatch() indexed := obiiter.MakeIBioSequence()
go func() { go func() {
for i := 0; i < len(references); i += 10 { for i := 0; i < len(references); i += 10 {
limits <- [2]int{i, goutils.MinInt(i+10, len(references))} limits <- [2]int{i, goutils.MinInt(i+10, len(references))}
+1 -1
View File
@@ -171,7 +171,7 @@ func IdentifySeqWorker(references obiseq.BioSequenceSlice,
} }
} }
func AssignTaxonomy(iterator obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch { func AssignTaxonomy(iterator obiiter.IBioSequence) obiiter.IBioSequence {
references := CLIRefDB() references := CLIRefDB()
refcounts := make( refcounts := make(
+1 -1
View File
@@ -8,7 +8,7 @@ import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
) )
func Unique(sequences obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch { func Unique(sequences obiiter.IBioSequence) obiiter.IBioSequence {
options := make([]obichunk.WithOption, 0, 30) options := make([]obichunk.WithOption, 0, 30)
BIN
View File
Binary file not shown.