mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
rename the iterator class
This commit is contained in:
@ -35,12 +35,12 @@ func find(root, ext string) []string {
|
||||
return a
|
||||
}
|
||||
|
||||
func ISequenceChunkOnDisk(iterator obiiter.IBioSequenceBatch,
|
||||
func ISequenceChunkOnDisk(iterator obiiter.IBioSequence,
|
||||
classifier *obiseq.BioSequenceClassifier,
|
||||
sizes ...int) (obiiter.IBioSequenceBatch, error) {
|
||||
sizes ...int) (obiiter.IBioSequence, error) {
|
||||
dir, err := tempDir()
|
||||
if err != nil {
|
||||
return obiiter.NilIBioSequenceBatch, err
|
||||
return obiiter.NilIBioSequence, err
|
||||
}
|
||||
|
||||
bufferSize := iterator.BufferSize()
|
||||
@ -49,7 +49,7 @@ func ISequenceChunkOnDisk(iterator obiiter.IBioSequenceBatch,
|
||||
bufferSize = sizes[0]
|
||||
}
|
||||
|
||||
newIter := obiiter.MakeIBioSequenceBatch(bufferSize)
|
||||
newIter := obiiter.MakeIBioSequence(bufferSize)
|
||||
|
||||
newIter.Add(1)
|
||||
|
||||
|
@ -1,16 +1,17 @@
|
||||
package obichunk
|
||||
|
||||
import (
|
||||
log "github.com/sirupsen/logrus"
|
||||
"sync"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||
)
|
||||
|
||||
func ISequenceChunk(iterator obiiter.IBioSequenceBatch,
|
||||
func ISequenceChunk(iterator obiiter.IBioSequence,
|
||||
classifier *obiseq.BioSequenceClassifier,
|
||||
sizes ...int) (obiiter.IBioSequenceBatch, error) {
|
||||
sizes ...int) (obiiter.IBioSequence, error) {
|
||||
|
||||
bufferSize := iterator.BufferSize()
|
||||
|
||||
@ -18,7 +19,7 @@ func ISequenceChunk(iterator obiiter.IBioSequenceBatch,
|
||||
bufferSize = sizes[0]
|
||||
}
|
||||
|
||||
newIter := obiiter.MakeIBioSequenceBatch(bufferSize)
|
||||
newIter := obiiter.MakeIBioSequence(bufferSize)
|
||||
|
||||
newIter.Add(1)
|
||||
|
||||
|
@ -56,9 +56,9 @@ func (by _By) Sort(seqs []sSS) {
|
||||
// End of the sort interface
|
||||
//
|
||||
|
||||
func ISequenceSubChunk(iterator obiiter.IBioSequenceBatch,
|
||||
func ISequenceSubChunk(iterator obiiter.IBioSequence,
|
||||
classifier *obiseq.BioSequenceClassifier,
|
||||
sizes ...int) (obiiter.IBioSequenceBatch, error) {
|
||||
sizes ...int) (obiiter.IBioSequence, error) {
|
||||
|
||||
bufferSize := iterator.BufferSize()
|
||||
nworkers := 4
|
||||
@ -71,7 +71,7 @@ func ISequenceSubChunk(iterator obiiter.IBioSequenceBatch,
|
||||
bufferSize = sizes[1]
|
||||
}
|
||||
|
||||
newIter := obiiter.MakeIBioSequenceBatch(bufferSize)
|
||||
newIter := obiiter.MakeIBioSequence(bufferSize)
|
||||
|
||||
newIter.Add(nworkers)
|
||||
|
||||
@ -88,7 +88,7 @@ func ISequenceSubChunk(iterator obiiter.IBioSequenceBatch,
|
||||
return neworder
|
||||
}
|
||||
|
||||
ff := func(iterator obiiter.IBioSequenceBatch,
|
||||
ff := func(iterator obiiter.IBioSequence,
|
||||
classifier *obiseq.BioSequenceClassifier) {
|
||||
|
||||
ordered := make([]sSS, 100)
|
||||
|
@ -12,14 +12,14 @@ import (
|
||||
// Runs dereplication algorithm on a obiiter.IBioSequenceBatch
|
||||
// iterator.
|
||||
|
||||
func IUniqueSequence(iterator obiiter.IBioSequenceBatch,
|
||||
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
|
||||
func IUniqueSequence(iterator obiiter.IBioSequence,
|
||||
options ...WithOption) (obiiter.IBioSequence, error) {
|
||||
|
||||
var err error
|
||||
opts := MakeOptions(options)
|
||||
nworkers := opts.ParallelWorkers()
|
||||
|
||||
iUnique := obiiter.MakeIBioSequenceBatch(opts.BufferSize())
|
||||
iUnique := obiiter.MakeIBioSequence(opts.BufferSize())
|
||||
|
||||
iterator = iterator.Speed("Splitting data set")
|
||||
|
||||
@ -32,7 +32,7 @@ func IUniqueSequence(iterator obiiter.IBioSequenceBatch,
|
||||
0)
|
||||
|
||||
if err != nil {
|
||||
return obiiter.NilIBioSequenceBatch, err
|
||||
return obiiter.NilIBioSequence, err
|
||||
}
|
||||
|
||||
} else {
|
||||
@ -41,7 +41,7 @@ func IUniqueSequence(iterator obiiter.IBioSequenceBatch,
|
||||
opts.BufferSize())
|
||||
|
||||
if err != nil {
|
||||
return obiiter.NilIBioSequenceBatch, err
|
||||
return obiiter.NilIBioSequence, err
|
||||
}
|
||||
}
|
||||
|
||||
@ -65,14 +65,14 @@ func IUniqueSequence(iterator obiiter.IBioSequenceBatch,
|
||||
return neworder
|
||||
}
|
||||
|
||||
var ff func(obiiter.IBioSequenceBatch,
|
||||
var ff func(obiiter.IBioSequence,
|
||||
*obiseq.BioSequenceClassifier,
|
||||
int)
|
||||
|
||||
cat := opts.Categories()
|
||||
na := opts.NAValue()
|
||||
|
||||
ff = func(input obiiter.IBioSequenceBatch,
|
||||
ff = func(input obiiter.IBioSequence,
|
||||
classifier *obiseq.BioSequenceClassifier,
|
||||
icat int) {
|
||||
icat--
|
||||
@ -81,9 +81,9 @@ func IUniqueSequence(iterator obiiter.IBioSequenceBatch,
|
||||
1,
|
||||
opts.BufferSize())
|
||||
|
||||
var next obiiter.IBioSequenceBatch
|
||||
var next obiiter.IBioSequence
|
||||
if icat >= 0 {
|
||||
next = obiiter.MakeIBioSequenceBatch(opts.BufferSize())
|
||||
next = obiiter.MakeIBioSequence(opts.BufferSize())
|
||||
|
||||
iUnique.Add(1)
|
||||
|
||||
|
@ -10,13 +10,13 @@ import (
|
||||
func ReadSequencesBatchFromFiles(filenames []string,
|
||||
reader IBatchReader,
|
||||
concurrent_readers int,
|
||||
options ...WithOption) obiiter.IBioSequenceBatch {
|
||||
options ...WithOption) obiiter.IBioSequence {
|
||||
|
||||
if reader == nil {
|
||||
reader = ReadSequencesFromFile
|
||||
}
|
||||
|
||||
batchiter := obiiter.MakeIBioSequenceBatch(0)
|
||||
batchiter := obiiter.MakeIBioSequence(0)
|
||||
nextCounter := goutils.AtomicCounter()
|
||||
|
||||
batchiter.Add(concurrent_readers)
|
||||
|
@ -2,4 +2,4 @@ package obiformats
|
||||
|
||||
import "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||
|
||||
type IBatchReader func(string, ...WithOption) (obiiter.IBioSequenceBatch, error)
|
||||
type IBatchReader func(string, ...WithOption) (obiiter.IBioSequence, error)
|
||||
|
@ -2,15 +2,16 @@ package obiformats
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"sync"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||
)
|
||||
|
||||
type SequenceBatchWriterToFile func(iterator obiiter.IBioSequenceBatch,
|
||||
type SequenceBatchWriterToFile func(iterator obiiter.IBioSequence,
|
||||
filename string,
|
||||
options ...WithOption) (obiiter.IBioSequenceBatch, error)
|
||||
options ...WithOption) (obiiter.IBioSequence, error)
|
||||
|
||||
func WriterDispatcher(prototypename string,
|
||||
dispatcher obiiter.IDistribute,
|
||||
|
@ -120,7 +120,7 @@ func __read_ecopcr_bioseq__(file *__ecopcr_file__) (*obiseq.BioSequence, error)
|
||||
return bseq, nil
|
||||
}
|
||||
|
||||
func ReadEcoPCR(reader io.Reader, options ...WithOption) obiiter.IBioSequenceBatch {
|
||||
func ReadEcoPCR(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
|
||||
tag := make([]byte, 11)
|
||||
n, _ := reader.Read(tag)
|
||||
|
||||
@ -166,7 +166,7 @@ func ReadEcoPCR(reader io.Reader, options ...WithOption) obiiter.IBioSequenceBat
|
||||
|
||||
opt := MakeOptions(options)
|
||||
|
||||
newIter := obiiter.MakeIBioSequenceBatch(opt.BufferSize())
|
||||
newIter := obiiter.MakeIBioSequence(opt.BufferSize())
|
||||
newIter.Add(1)
|
||||
|
||||
go func() {
|
||||
@ -208,7 +208,7 @@ func ReadEcoPCR(reader io.Reader, options ...WithOption) obiiter.IBioSequenceBat
|
||||
return newIter
|
||||
}
|
||||
|
||||
func ReadEcoPCRBatchFromFile(filename string, options ...WithOption) (obiiter.IBioSequenceBatch, error) {
|
||||
func ReadEcoPCRBatchFromFile(filename string, options ...WithOption) (obiiter.IBioSequence, error) {
|
||||
var reader io.Reader
|
||||
var greader io.Reader
|
||||
var err error
|
||||
@ -216,7 +216,7 @@ func ReadEcoPCRBatchFromFile(filename string, options ...WithOption) (obiiter.IB
|
||||
reader, err = os.Open(filename)
|
||||
if err != nil {
|
||||
log.Printf("open file error: %+v", err)
|
||||
return obiiter.NilIBioSequenceBatch, err
|
||||
return obiiter.NilIBioSequence, err
|
||||
}
|
||||
|
||||
// Test if the flux is compressed by gzip
|
||||
|
@ -84,7 +84,7 @@ func _EndOfLastEntry(buff []byte) int {
|
||||
return -1
|
||||
}
|
||||
|
||||
func _ParseEmblFile(input <-chan _FileChunk, out obiiter.IBioSequenceBatch) {
|
||||
func _ParseEmblFile(input <-chan _FileChunk, out obiiter.IBioSequence) {
|
||||
|
||||
for chunks := range input {
|
||||
scanner := bufio.NewScanner(chunks.raw)
|
||||
@ -201,11 +201,11 @@ func _ReadFlatFileChunk(reader io.Reader, readers chan _FileChunk) {
|
||||
// 6 5 43 2 1
|
||||
//
|
||||
// <CR>?<LF>//<CR>?<LF>
|
||||
func ReadEMBL(reader io.Reader, options ...WithOption) obiiter.IBioSequenceBatch {
|
||||
func ReadEMBL(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
|
||||
opt := MakeOptions(options)
|
||||
entry_channel := make(chan _FileChunk, opt.BufferSize())
|
||||
|
||||
newIter := obiiter.MakeIBioSequenceBatch(opt.BufferSize())
|
||||
newIter := obiiter.MakeIBioSequence(opt.BufferSize())
|
||||
|
||||
nworkers := opt.ParallelWorkers()
|
||||
newIter.Add(nworkers)
|
||||
@ -224,7 +224,7 @@ func ReadEMBL(reader io.Reader, options ...WithOption) obiiter.IBioSequenceBatch
|
||||
return newIter
|
||||
}
|
||||
|
||||
func ReadEMBLFromFile(filename string, options ...WithOption) (obiiter.IBioSequenceBatch, error) {
|
||||
func ReadEMBLFromFile(filename string, options ...WithOption) (obiiter.IBioSequence, error) {
|
||||
var reader io.Reader
|
||||
var greader io.Reader
|
||||
var err error
|
||||
@ -232,7 +232,7 @@ func ReadEMBLFromFile(filename string, options ...WithOption) (obiiter.IBioSeque
|
||||
reader, err = os.Open(filename)
|
||||
if err != nil {
|
||||
log.Printf("open file error: %+v", err)
|
||||
return obiiter.NilIBioSequenceBatch, err
|
||||
return obiiter.NilIBioSequence, err
|
||||
}
|
||||
|
||||
// Test if the flux is compressed by gzip
|
||||
|
@ -15,8 +15,8 @@ func ParseGuessedFastSeqHeader(sequence *obiseq.BioSequence) {
|
||||
}
|
||||
}
|
||||
|
||||
func IParseFastSeqHeaderBatch(iterator obiiter.IBioSequenceBatch,
|
||||
options ...WithOption) obiiter.IBioSequenceBatch {
|
||||
func IParseFastSeqHeaderBatch(iterator obiiter.IBioSequence,
|
||||
options ...WithOption) obiiter.IBioSequence {
|
||||
opt := MakeOptions(options)
|
||||
return iterator.MakeIWorker(obiiter.AnnotatorToSeqWorker(opt.ParseFastSeqHeader()),
|
||||
opt.ParallelWorkers(),
|
||||
|
@ -19,7 +19,7 @@ import (
|
||||
)
|
||||
|
||||
func _FastseqReader(seqfile C.fast_kseq_p,
|
||||
iterator obiiter.IBioSequenceBatch,
|
||||
iterator obiiter.IBioSequence,
|
||||
batch_size int) {
|
||||
var comment string
|
||||
i := 0
|
||||
@ -72,7 +72,6 @@ func _FastseqReader(seqfile C.fast_kseq_p,
|
||||
ii = 0
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
if len(slice) > 0 {
|
||||
iterator.Push(obiiter.MakeBioSequenceBatch(i, slice))
|
||||
@ -81,7 +80,7 @@ func _FastseqReader(seqfile C.fast_kseq_p,
|
||||
|
||||
}
|
||||
|
||||
func ReadFastSeqFromFile(filename string, options ...WithOption) (obiiter.IBioSequenceBatch, error) {
|
||||
func ReadFastSeqFromFile(filename string, options ...WithOption) (obiiter.IBioSequence, error) {
|
||||
opt := MakeOptions(options)
|
||||
|
||||
name := C.CString(filename)
|
||||
@ -94,7 +93,7 @@ func ReadFastSeqFromFile(filename string, options ...WithOption) (obiiter.IBioSe
|
||||
|
||||
if pointer == nil {
|
||||
err = fmt.Errorf("cannot open file %s", filename)
|
||||
return obiiter.NilIBioSequenceBatch, err
|
||||
return obiiter.NilIBioSequence, err
|
||||
}
|
||||
|
||||
size := int64(-1)
|
||||
@ -106,7 +105,7 @@ func ReadFastSeqFromFile(filename string, options ...WithOption) (obiiter.IBioSe
|
||||
size = -1
|
||||
}
|
||||
|
||||
newIter := obiiter.MakeIBioSequenceBatch(opt.BufferSize())
|
||||
newIter := obiiter.MakeIBioSequence(opt.BufferSize())
|
||||
newIter.Add(1)
|
||||
|
||||
go func() {
|
||||
@ -126,9 +125,9 @@ func ReadFastSeqFromFile(filename string, options ...WithOption) (obiiter.IBioSe
|
||||
return newIter, err
|
||||
}
|
||||
|
||||
func ReadFastSeqFromStdin(options ...WithOption) obiiter.IBioSequenceBatch {
|
||||
func ReadFastSeqFromStdin(options ...WithOption) obiiter.IBioSequence {
|
||||
opt := MakeOptions(options)
|
||||
newIter := obiiter.MakeIBioSequenceBatch(opt.BufferSize())
|
||||
newIter := obiiter.MakeIBioSequence(opt.BufferSize())
|
||||
|
||||
newIter.Add(1)
|
||||
|
||||
|
@ -61,13 +61,13 @@ func FormatFastaBatch(batch obiiter.BioSequenceBatch, formater FormatHeader) []b
|
||||
return bs.Bytes()
|
||||
}
|
||||
|
||||
func WriteFasta(iterator obiiter.IBioSequenceBatch,
|
||||
func WriteFasta(iterator obiiter.IBioSequence,
|
||||
file io.Writer,
|
||||
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
|
||||
options ...WithOption) (obiiter.IBioSequence, error) {
|
||||
opt := MakeOptions(options)
|
||||
|
||||
buffsize := iterator.BufferSize()
|
||||
newIter := obiiter.MakeIBioSequenceBatch(buffsize)
|
||||
newIter := obiiter.MakeIBioSequence(buffsize)
|
||||
|
||||
nwriters := opt.ParallelWorkers()
|
||||
|
||||
@ -83,7 +83,7 @@ func WriteFasta(iterator obiiter.IBioSequenceBatch,
|
||||
log.Debugln("End of the fasta file writing")
|
||||
}()
|
||||
|
||||
ff := func(iterator obiiter.IBioSequenceBatch) {
|
||||
ff := func(iterator obiiter.IBioSequence) {
|
||||
for iterator.Next() {
|
||||
|
||||
batch := iterator.Get()
|
||||
@ -136,21 +136,21 @@ func WriteFasta(iterator obiiter.IBioSequenceBatch,
|
||||
return newIter, nil
|
||||
}
|
||||
|
||||
func WriteFastaToStdout(iterator obiiter.IBioSequenceBatch,
|
||||
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
|
||||
func WriteFastaToStdout(iterator obiiter.IBioSequence,
|
||||
options ...WithOption) (obiiter.IBioSequence, error) {
|
||||
options = append(options, OptionDontCloseFile())
|
||||
return WriteFasta(iterator, os.Stdout, options...)
|
||||
}
|
||||
|
||||
func WriteFastaToFile(iterator obiiter.IBioSequenceBatch,
|
||||
func WriteFastaToFile(iterator obiiter.IBioSequence,
|
||||
filename string,
|
||||
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
|
||||
options ...WithOption) (obiiter.IBioSequence, error) {
|
||||
|
||||
file, err := os.Create(filename)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("open file error: %v", err)
|
||||
return obiiter.NilIBioSequenceBatch, err
|
||||
return obiiter.NilIBioSequence, err
|
||||
}
|
||||
|
||||
options = append(options, OptionCloseFile())
|
||||
|
@ -51,13 +51,13 @@ type FileChunck struct {
|
||||
order int
|
||||
}
|
||||
|
||||
func WriteFastq(iterator obiiter.IBioSequenceBatch,
|
||||
func WriteFastq(iterator obiiter.IBioSequence,
|
||||
file io.Writer,
|
||||
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
|
||||
options ...WithOption) (obiiter.IBioSequence, error) {
|
||||
opt := MakeOptions(options)
|
||||
|
||||
buffsize := iterator.BufferSize()
|
||||
newIter := obiiter.MakeIBioSequenceBatch(buffsize)
|
||||
newIter := obiiter.MakeIBioSequence(buffsize)
|
||||
|
||||
nwriters := opt.ParallelWorkers()
|
||||
|
||||
@ -77,7 +77,7 @@ func WriteFastq(iterator obiiter.IBioSequenceBatch,
|
||||
log.Debugln("End of the fastq file writing")
|
||||
}()
|
||||
|
||||
ff := func(iterator obiiter.IBioSequenceBatch) {
|
||||
ff := func(iterator obiiter.IBioSequence) {
|
||||
for iterator.Next() {
|
||||
batch := iterator.Get()
|
||||
chunk := FileChunck{
|
||||
@ -129,21 +129,21 @@ func WriteFastq(iterator obiiter.IBioSequenceBatch,
|
||||
return newIter, nil
|
||||
}
|
||||
|
||||
func WriteFastqToStdout(iterator obiiter.IBioSequenceBatch,
|
||||
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
|
||||
func WriteFastqToStdout(iterator obiiter.IBioSequence,
|
||||
options ...WithOption) (obiiter.IBioSequence, error) {
|
||||
options = append(options, OptionDontCloseFile())
|
||||
return WriteFastq(iterator, os.Stdout, options...)
|
||||
}
|
||||
|
||||
func WriteFastqToFile(iterator obiiter.IBioSequenceBatch,
|
||||
func WriteFastqToFile(iterator obiiter.IBioSequence,
|
||||
filename string,
|
||||
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
|
||||
options ...WithOption) (obiiter.IBioSequence, error) {
|
||||
|
||||
file, err := os.Create(filename)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("open file error: %v", err)
|
||||
return obiiter.NilIBioSequenceBatch, err
|
||||
return obiiter.NilIBioSequence, err
|
||||
}
|
||||
|
||||
options = append(options, OptionCloseFile())
|
||||
|
@ -26,7 +26,7 @@ const (
|
||||
inSequence gbstate = 4
|
||||
)
|
||||
|
||||
func _ParseGenbankFile(input <-chan _FileChunk, out obiiter.IBioSequenceBatch) {
|
||||
func _ParseGenbankFile(input <-chan _FileChunk, out obiiter.IBioSequence) {
|
||||
|
||||
state := inHeader
|
||||
|
||||
@ -107,11 +107,11 @@ func _ParseGenbankFile(input <-chan _FileChunk, out obiiter.IBioSequenceBatch) {
|
||||
|
||||
}
|
||||
|
||||
func ReadGenbank(reader io.Reader, options ...WithOption) obiiter.IBioSequenceBatch {
|
||||
func ReadGenbank(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
|
||||
opt := MakeOptions(options)
|
||||
entry_channel := make(chan _FileChunk, opt.BufferSize())
|
||||
|
||||
newIter := obiiter.MakeIBioSequenceBatch(opt.BufferSize())
|
||||
newIter := obiiter.MakeIBioSequence(opt.BufferSize())
|
||||
|
||||
nworkers := opt.ParallelWorkers()
|
||||
newIter.Add(nworkers)
|
||||
@ -130,7 +130,7 @@ func ReadGenbank(reader io.Reader, options ...WithOption) obiiter.IBioSequenceBa
|
||||
return newIter
|
||||
}
|
||||
|
||||
func ReadGenbankFromFile(filename string, options ...WithOption) (obiiter.IBioSequenceBatch, error) {
|
||||
func ReadGenbankFromFile(filename string, options ...WithOption) (obiiter.IBioSequence, error) {
|
||||
var reader io.Reader
|
||||
var greader io.Reader
|
||||
var err error
|
||||
@ -138,7 +138,7 @@ func ReadGenbankFromFile(filename string, options ...WithOption) (obiiter.IBioSe
|
||||
reader, err = os.Open(filename)
|
||||
if err != nil {
|
||||
log.Printf("open file error: %+v", err)
|
||||
return obiiter.NilIBioSequenceBatch, err
|
||||
return obiiter.NilIBioSequence, err
|
||||
}
|
||||
|
||||
// Test if the flux is compressed by gzip
|
||||
|
@ -43,7 +43,7 @@ func GuessSeqFileType(firstline string) string {
|
||||
}
|
||||
|
||||
func ReadSequencesFromFile(filename string,
|
||||
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
|
||||
options ...WithOption) (obiiter.IBioSequence, error) {
|
||||
var file *os.File
|
||||
var reader io.Reader
|
||||
var greader io.Reader
|
||||
@ -53,7 +53,7 @@ func ReadSequencesFromFile(filename string,
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("open file error: %v", err)
|
||||
return obiiter.NilIBioSequenceBatch, err
|
||||
return obiiter.NilIBioSequence, err
|
||||
}
|
||||
|
||||
reader = file
|
||||
@ -72,7 +72,7 @@ func ReadSequencesFromFile(filename string,
|
||||
tag, _ := breader.Peek(30)
|
||||
|
||||
if len(tag) < 30 {
|
||||
newIter := obiiter.MakeIBioSequenceBatch()
|
||||
newIter := obiiter.MakeIBioSequence()
|
||||
newIter.Close()
|
||||
return newIter, nil
|
||||
}
|
||||
@ -98,5 +98,5 @@ func ReadSequencesFromFile(filename string,
|
||||
filename, filetype)
|
||||
}
|
||||
|
||||
return obiiter.NilIBioSequenceBatch, nil
|
||||
return obiiter.NilIBioSequence, nil
|
||||
}
|
||||
|
@ -10,9 +10,9 @@ import (
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||
)
|
||||
|
||||
func WriteSequence(iterator obiiter.IBioSequenceBatch,
|
||||
func WriteSequence(iterator obiiter.IBioSequence,
|
||||
file io.Writer,
|
||||
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
|
||||
options ...WithOption) (obiiter.IBioSequence, error) {
|
||||
|
||||
iterator = iterator.Rebatch(1000)
|
||||
|
||||
@ -22,7 +22,7 @@ func WriteSequence(iterator obiiter.IBioSequenceBatch,
|
||||
batch := iterator.Get()
|
||||
iterator.PushBack()
|
||||
|
||||
var newIter obiiter.IBioSequenceBatch
|
||||
var newIter obiiter.IBioSequence
|
||||
var err error
|
||||
|
||||
if len(batch.Slice()) > 0 {
|
||||
@ -42,24 +42,24 @@ func WriteSequence(iterator obiiter.IBioSequenceBatch,
|
||||
return iterator, nil
|
||||
}
|
||||
|
||||
return obiiter.NilIBioSequenceBatch, fmt.Errorf("input iterator not ready")
|
||||
return obiiter.NilIBioSequence, fmt.Errorf("input iterator not ready")
|
||||
}
|
||||
|
||||
func WriteSequencesToStdout(iterator obiiter.IBioSequenceBatch,
|
||||
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
|
||||
func WriteSequencesToStdout(iterator obiiter.IBioSequence,
|
||||
options ...WithOption) (obiiter.IBioSequence, error) {
|
||||
options = append(options, OptionDontCloseFile())
|
||||
return WriteSequence(iterator, os.Stdout, options...)
|
||||
}
|
||||
|
||||
func WriteSequencesToFile(iterator obiiter.IBioSequenceBatch,
|
||||
func WriteSequencesToFile(iterator obiiter.IBioSequence,
|
||||
filename string,
|
||||
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
|
||||
options ...WithOption) (obiiter.IBioSequence, error) {
|
||||
|
||||
file, err := os.Create(filename)
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("open file error: %v", err)
|
||||
return obiiter.NilIBioSequenceBatch, err
|
||||
return obiiter.NilIBioSequence, err
|
||||
}
|
||||
|
||||
options = append(options, OptionCloseFile())
|
||||
|
@ -15,7 +15,7 @@ import (
|
||||
|
||||
// Structure implementing an iterator over bioseq.BioSequenceBatch
|
||||
// based on a channel.
|
||||
type _IBioSequenceBatch struct {
|
||||
type _IBioSequence struct {
|
||||
channel chan BioSequenceBatch
|
||||
current BioSequenceBatch
|
||||
pushBack *abool.AtomicBool
|
||||
@ -27,24 +27,24 @@ type _IBioSequenceBatch struct {
|
||||
finished *abool.AtomicBool
|
||||
}
|
||||
|
||||
type IBioSequenceBatch struct {
|
||||
pointer *_IBioSequenceBatch
|
||||
type IBioSequence struct {
|
||||
pointer *_IBioSequence
|
||||
}
|
||||
|
||||
// NilIBioSequenceBatch nil instance for IBioSequenceBatch
|
||||
// NilIBioSequence nil instance for IBioSequenceBatch
|
||||
//
|
||||
// NilIBioSequenceBatch is the nil instance for the
|
||||
// NilIBioSequence is the nil instance for the
|
||||
// IBioSequenceBatch type.
|
||||
var NilIBioSequenceBatch = IBioSequenceBatch{pointer: nil}
|
||||
var NilIBioSequence = IBioSequence{pointer: nil}
|
||||
|
||||
func MakeIBioSequenceBatch(sizes ...int) IBioSequenceBatch {
|
||||
func MakeIBioSequence(sizes ...int) IBioSequence {
|
||||
buffsize := int32(0)
|
||||
|
||||
if len(sizes) > 0 {
|
||||
buffsize = int32(sizes[0])
|
||||
}
|
||||
|
||||
i := _IBioSequenceBatch{
|
||||
i := _IBioSequence{
|
||||
channel: make(chan BioSequenceBatch, buffsize),
|
||||
current: NilBioSequenceBatch,
|
||||
pushBack: abool.New(),
|
||||
@ -58,11 +58,11 @@ func MakeIBioSequenceBatch(sizes ...int) IBioSequenceBatch {
|
||||
i.all_done = &waiting
|
||||
lock := sync.RWMutex{}
|
||||
i.lock = &lock
|
||||
ii := IBioSequenceBatch{&i}
|
||||
ii := IBioSequence{&i}
|
||||
return ii
|
||||
}
|
||||
|
||||
func (iterator IBioSequenceBatch) Add(n int) {
|
||||
func (iterator IBioSequence) Add(n int) {
|
||||
if iterator.pointer == nil {
|
||||
log.Panic("call of IBioSequenceBatch.Add method on NilIBioSequenceBatch")
|
||||
}
|
||||
@ -70,7 +70,7 @@ func (iterator IBioSequenceBatch) Add(n int) {
|
||||
iterator.pointer.all_done.Add(n)
|
||||
}
|
||||
|
||||
func (iterator IBioSequenceBatch) Done() {
|
||||
func (iterator IBioSequence) Done() {
|
||||
if iterator.pointer == nil {
|
||||
log.Panic("call of IBioSequenceBatch.Done method on NilIBioSequenceBatch")
|
||||
}
|
||||
@ -78,7 +78,7 @@ func (iterator IBioSequenceBatch) Done() {
|
||||
iterator.pointer.all_done.Done()
|
||||
}
|
||||
|
||||
func (iterator IBioSequenceBatch) Unlock() {
|
||||
func (iterator IBioSequence) Unlock() {
|
||||
if iterator.pointer == nil {
|
||||
log.Panic("call of IBioSequenceBatch.Unlock method on NilIBioSequenceBatch")
|
||||
}
|
||||
@ -86,7 +86,7 @@ func (iterator IBioSequenceBatch) Unlock() {
|
||||
iterator.pointer.lock.Unlock()
|
||||
}
|
||||
|
||||
func (iterator IBioSequenceBatch) Lock() {
|
||||
func (iterator IBioSequence) Lock() {
|
||||
if iterator.pointer == nil {
|
||||
log.Panic("call of IBioSequenceBatch.Lock method on NilIBioSequenceBatch")
|
||||
}
|
||||
@ -94,7 +94,7 @@ func (iterator IBioSequenceBatch) Lock() {
|
||||
iterator.pointer.lock.Lock()
|
||||
}
|
||||
|
||||
func (iterator IBioSequenceBatch) RLock() {
|
||||
func (iterator IBioSequence) RLock() {
|
||||
if iterator.pointer == nil {
|
||||
log.Panic("call of IBioSequenceBatch.RLock method on NilIBioSequenceBatch")
|
||||
}
|
||||
@ -102,7 +102,7 @@ func (iterator IBioSequenceBatch) RLock() {
|
||||
iterator.pointer.lock.RLock()
|
||||
}
|
||||
|
||||
func (iterator IBioSequenceBatch) RUnlock() {
|
||||
func (iterator IBioSequence) RUnlock() {
|
||||
if iterator.pointer == nil {
|
||||
log.Panic("call of IBioSequenceBatch.RUnlock method on NilIBioSequenceBatch")
|
||||
}
|
||||
@ -110,7 +110,7 @@ func (iterator IBioSequenceBatch) RUnlock() {
|
||||
iterator.pointer.lock.RUnlock()
|
||||
}
|
||||
|
||||
func (iterator IBioSequenceBatch) Wait() {
|
||||
func (iterator IBioSequence) Wait() {
|
||||
if iterator.pointer == nil {
|
||||
log.Panic("call of IBioSequenceBatch.Wait method on NilIBioSequenceBatch")
|
||||
}
|
||||
@ -118,7 +118,7 @@ func (iterator IBioSequenceBatch) Wait() {
|
||||
iterator.pointer.all_done.Wait()
|
||||
}
|
||||
|
||||
func (iterator IBioSequenceBatch) Channel() chan BioSequenceBatch {
|
||||
func (iterator IBioSequence) Channel() chan BioSequenceBatch {
|
||||
if iterator.pointer == nil {
|
||||
log.Panic("call of IBioSequenceBatch.Channel method on NilIBioSequenceBatch")
|
||||
}
|
||||
@ -126,7 +126,7 @@ func (iterator IBioSequenceBatch) Channel() chan BioSequenceBatch {
|
||||
return iterator.pointer.channel
|
||||
}
|
||||
|
||||
func (iterator IBioSequenceBatch) IsNil() bool {
|
||||
func (iterator IBioSequence) IsNil() bool {
|
||||
if iterator.pointer == nil {
|
||||
log.Panic("call of IBioSequenceBatch.IsNil method on NilIBioSequenceBatch")
|
||||
}
|
||||
@ -134,7 +134,7 @@ func (iterator IBioSequenceBatch) IsNil() bool {
|
||||
return iterator.pointer == nil
|
||||
}
|
||||
|
||||
func (iterator IBioSequenceBatch) BufferSize() int {
|
||||
func (iterator IBioSequence) BufferSize() int {
|
||||
if iterator.pointer == nil {
|
||||
log.Panic("call of IBioSequenceBatch.BufferSize method on NilIBioSequenceBatch")
|
||||
}
|
||||
@ -142,7 +142,7 @@ func (iterator IBioSequenceBatch) BufferSize() int {
|
||||
return int(atomic.LoadInt32(&iterator.pointer.buffer_size))
|
||||
}
|
||||
|
||||
func (iterator IBioSequenceBatch) BatchSize() int {
|
||||
func (iterator IBioSequence) BatchSize() int {
|
||||
if iterator.pointer == nil {
|
||||
log.Panic("call of IBioSequenceBatch.BatchSize method on NilIBioSequenceBatch")
|
||||
}
|
||||
@ -150,7 +150,7 @@ func (iterator IBioSequenceBatch) BatchSize() int {
|
||||
return int(atomic.LoadInt32(&iterator.pointer.batch_size))
|
||||
}
|
||||
|
||||
func (iterator IBioSequenceBatch) SetBatchSize(size int) error {
|
||||
func (iterator IBioSequence) SetBatchSize(size int) error {
|
||||
if size >= 0 {
|
||||
atomic.StoreInt32(&iterator.pointer.batch_size, int32(size))
|
||||
return nil
|
||||
@ -159,10 +159,10 @@ func (iterator IBioSequenceBatch) SetBatchSize(size int) error {
|
||||
return fmt.Errorf("size (%d) cannot be negative", size)
|
||||
}
|
||||
|
||||
func (iterator IBioSequenceBatch) Split() IBioSequenceBatch {
|
||||
func (iterator IBioSequence) Split() IBioSequence {
|
||||
iterator.pointer.lock.RLock()
|
||||
defer iterator.pointer.lock.RUnlock()
|
||||
i := _IBioSequenceBatch{
|
||||
i := _IBioSequence{
|
||||
channel: iterator.pointer.channel,
|
||||
current: NilBioSequenceBatch,
|
||||
pushBack: abool.New(),
|
||||
@ -174,11 +174,11 @@ func (iterator IBioSequenceBatch) Split() IBioSequenceBatch {
|
||||
lock := sync.RWMutex{}
|
||||
i.lock = &lock
|
||||
|
||||
newIter := IBioSequenceBatch{&i}
|
||||
newIter := IBioSequence{&i}
|
||||
return newIter
|
||||
}
|
||||
|
||||
func (iterator IBioSequenceBatch) Next() bool {
|
||||
func (iterator IBioSequence) Next() bool {
|
||||
if iterator.pointer.pushBack.IsSet() {
|
||||
iterator.pointer.pushBack.UnSet()
|
||||
return true
|
||||
@ -200,7 +200,7 @@ func (iterator IBioSequenceBatch) Next() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func (iterator IBioSequenceBatch) PushBack() {
|
||||
func (iterator IBioSequence) PushBack() {
|
||||
if !iterator.pointer.current.IsNil() {
|
||||
iterator.pointer.pushBack.Set()
|
||||
}
|
||||
@ -210,11 +210,11 @@ func (iterator IBioSequenceBatch) PushBack() {
|
||||
// currently pointed by the iterator. You have to use the
|
||||
// 'Next' method to move to the next entry before calling
|
||||
// 'Get' to retreive the following instance.
|
||||
func (iterator IBioSequenceBatch) Get() BioSequenceBatch {
|
||||
func (iterator IBioSequence) Get() BioSequenceBatch {
|
||||
return iterator.pointer.current
|
||||
}
|
||||
|
||||
func (iterator IBioSequenceBatch) Push(batch BioSequenceBatch) {
|
||||
func (iterator IBioSequence) Push(batch BioSequenceBatch) {
|
||||
if batch.IsNil() {
|
||||
log.Panicln("A Nil batch is pushed on the channel")
|
||||
}
|
||||
@ -225,11 +225,11 @@ func (iterator IBioSequenceBatch) Push(batch BioSequenceBatch) {
|
||||
iterator.pointer.channel <- batch
|
||||
}
|
||||
|
||||
func (iterator IBioSequenceBatch) Close() {
|
||||
func (iterator IBioSequence) Close() {
|
||||
close(iterator.pointer.channel)
|
||||
}
|
||||
|
||||
func (iterator IBioSequenceBatch) WaitAndClose() {
|
||||
func (iterator IBioSequence) WaitAndClose() {
|
||||
iterator.Wait()
|
||||
|
||||
for len(iterator.Channel()) > 0 {
|
||||
@ -240,18 +240,18 @@ func (iterator IBioSequenceBatch) WaitAndClose() {
|
||||
|
||||
// Finished returns 'true' value if no more data is available
|
||||
// from the iterator.
|
||||
func (iterator IBioSequenceBatch) Finished() bool {
|
||||
func (iterator IBioSequence) Finished() bool {
|
||||
return iterator.pointer.finished.IsSet()
|
||||
}
|
||||
|
||||
func (iterator IBioSequenceBatch) SortBatches(sizes ...int) IBioSequenceBatch {
|
||||
func (iterator IBioSequence) SortBatches(sizes ...int) IBioSequence {
|
||||
buffsize := iterator.BufferSize()
|
||||
|
||||
if len(sizes) > 0 {
|
||||
buffsize = sizes[0]
|
||||
}
|
||||
|
||||
newIter := MakeIBioSequenceBatch(buffsize)
|
||||
newIter := MakeIBioSequence(buffsize)
|
||||
|
||||
newIter.Add(1)
|
||||
|
||||
@ -288,14 +288,14 @@ func (iterator IBioSequenceBatch) SortBatches(sizes ...int) IBioSequenceBatch {
|
||||
|
||||
}
|
||||
|
||||
func (iterator IBioSequenceBatch) Concat(iterators ...IBioSequenceBatch) IBioSequenceBatch {
|
||||
func (iterator IBioSequence) Concat(iterators ...IBioSequence) IBioSequence {
|
||||
|
||||
if len(iterators) == 0 {
|
||||
return iterator
|
||||
}
|
||||
|
||||
buffsize := iterator.BufferSize()
|
||||
newIter := MakeIBioSequenceBatch(buffsize)
|
||||
newIter := MakeIBioSequence(buffsize)
|
||||
|
||||
newIter.Add(1)
|
||||
|
||||
@ -333,7 +333,7 @@ func (iterator IBioSequenceBatch) Concat(iterators ...IBioSequenceBatch) IBioSeq
|
||||
return newIter
|
||||
}
|
||||
|
||||
func (iterator IBioSequenceBatch) Pool(iterators ...IBioSequenceBatch) IBioSequenceBatch {
|
||||
func (iterator IBioSequence) Pool(iterators ...IBioSequence) IBioSequence {
|
||||
|
||||
niterator := len(iterators) + 1
|
||||
|
||||
@ -343,7 +343,7 @@ func (iterator IBioSequenceBatch) Pool(iterators ...IBioSequenceBatch) IBioSeque
|
||||
|
||||
nextCounter := goutils.AtomicCounter()
|
||||
buffsize := iterator.BufferSize()
|
||||
newIter := MakeIBioSequenceBatch(buffsize)
|
||||
newIter := MakeIBioSequence(buffsize)
|
||||
|
||||
newIter.Add(niterator)
|
||||
|
||||
@ -351,7 +351,7 @@ func (iterator IBioSequenceBatch) Pool(iterators ...IBioSequenceBatch) IBioSeque
|
||||
newIter.WaitAndClose()
|
||||
}()
|
||||
|
||||
ff := func(iterator IBioSequenceBatch) {
|
||||
ff := func(iterator IBioSequence) {
|
||||
|
||||
for iterator.Next() {
|
||||
s := iterator.Get()
|
||||
@ -372,14 +372,14 @@ func (iterator IBioSequenceBatch) Pool(iterators ...IBioSequenceBatch) IBioSeque
|
||||
// IBioSequenceBatch with every batches having the same size
|
||||
// indicated in parameter. Rebatching implies to sort the
|
||||
// source IBioSequenceBatch.
|
||||
func (iterator IBioSequenceBatch) Rebatch(size int, sizes ...int) IBioSequenceBatch {
|
||||
func (iterator IBioSequence) Rebatch(size int, sizes ...int) IBioSequence {
|
||||
buffsize := iterator.BufferSize()
|
||||
|
||||
if len(sizes) > 0 {
|
||||
buffsize = sizes[0]
|
||||
}
|
||||
|
||||
newIter := MakeIBioSequenceBatch(buffsize)
|
||||
newIter := MakeIBioSequence(buffsize)
|
||||
|
||||
newIter.Add(1)
|
||||
|
||||
@ -418,7 +418,7 @@ func (iterator IBioSequenceBatch) Rebatch(size int, sizes ...int) IBioSequenceBa
|
||||
return newIter
|
||||
}
|
||||
|
||||
func (iterator IBioSequenceBatch) Recycle() {
|
||||
func (iterator IBioSequence) Recycle() {
|
||||
|
||||
log.Debugln("Start recycling of Bioseq objects")
|
||||
recycled := 0
|
||||
@ -434,14 +434,14 @@ func (iterator IBioSequenceBatch) Recycle() {
|
||||
log.Debugf("End of the recycling of %d Bioseq objects", recycled)
|
||||
}
|
||||
|
||||
func (iterator IBioSequenceBatch) Consume() {
|
||||
func (iterator IBioSequence) Consume() {
|
||||
for iterator.Next() {
|
||||
batch := iterator.Get()
|
||||
batch.Recycle()
|
||||
}
|
||||
}
|
||||
|
||||
func (iterator IBioSequenceBatch) Count(recycle bool) (int, int, int) {
|
||||
func (iterator IBioSequence) Count(recycle bool) (int, int, int) {
|
||||
variants := 0
|
||||
reads := 0
|
||||
nucleotides := 0
|
||||
@ -465,7 +465,7 @@ func (iterator IBioSequenceBatch) Count(recycle bool) (int, int, int) {
|
||||
return variants, reads, nucleotides
|
||||
}
|
||||
|
||||
func (iterator IBioSequenceBatch) PairWith(reverse IBioSequenceBatch,
|
||||
func (iterator IBioSequence) PairWith(reverse IBioSequence,
|
||||
sizes ...int) IPairedBioSequenceBatch {
|
||||
buffsize := iterator.BufferSize()
|
||||
batchsize := 5000
|
||||
@ -510,16 +510,16 @@ func (iterator IBioSequenceBatch) PairWith(reverse IBioSequenceBatch,
|
||||
// A function that takes a predicate and returns two IBioSequenceBatch iterators.
|
||||
// Sequences extracted from the input iterator are distributed among both the
|
||||
// iterator following the predicate value.
|
||||
func (iterator IBioSequenceBatch) DivideOn(predicate obiseq.SequencePredicate,
|
||||
size int, sizes ...int) (IBioSequenceBatch, IBioSequenceBatch) {
|
||||
func (iterator IBioSequence) DivideOn(predicate obiseq.SequencePredicate,
|
||||
size int, sizes ...int) (IBioSequence, IBioSequence) {
|
||||
buffsize := iterator.BufferSize()
|
||||
|
||||
if len(sizes) > 0 {
|
||||
buffsize = sizes[0]
|
||||
}
|
||||
|
||||
trueIter := MakeIBioSequenceBatch(buffsize)
|
||||
falseIter := MakeIBioSequenceBatch(buffsize)
|
||||
trueIter := MakeIBioSequence(buffsize)
|
||||
falseIter := MakeIBioSequence(buffsize)
|
||||
|
||||
trueIter.Add(1)
|
||||
falseIter.Add(1)
|
||||
@ -578,8 +578,8 @@ func (iterator IBioSequenceBatch) DivideOn(predicate obiseq.SequencePredicate,
|
||||
|
||||
// Filtering a batch of sequences.
|
||||
// A function that takes a predicate and a batch of sequences and returns a filtered batch of sequences.
|
||||
func (iterator IBioSequenceBatch) FilterOn(predicate obiseq.SequencePredicate,
|
||||
size int, sizes ...int) IBioSequenceBatch {
|
||||
func (iterator IBioSequence) FilterOn(predicate obiseq.SequencePredicate,
|
||||
size int, sizes ...int) IBioSequence {
|
||||
buffsize := iterator.BufferSize()
|
||||
nworkers := 4
|
||||
|
||||
@ -591,7 +591,7 @@ func (iterator IBioSequenceBatch) FilterOn(predicate obiseq.SequencePredicate,
|
||||
buffsize = sizes[1]
|
||||
}
|
||||
|
||||
trueIter := MakeIBioSequenceBatch(buffsize)
|
||||
trueIter := MakeIBioSequence(buffsize)
|
||||
|
||||
trueIter.Add(nworkers)
|
||||
|
||||
@ -599,7 +599,7 @@ func (iterator IBioSequenceBatch) FilterOn(predicate obiseq.SequencePredicate,
|
||||
trueIter.WaitAndClose()
|
||||
}()
|
||||
|
||||
ff := func(iterator IBioSequenceBatch) {
|
||||
ff := func(iterator IBioSequence) {
|
||||
// iterator = iterator.SortBatches()
|
||||
|
||||
for iterator.Next() {
|
||||
@ -633,7 +633,7 @@ func (iterator IBioSequenceBatch) FilterOn(predicate obiseq.SequencePredicate,
|
||||
|
||||
// Load all sequences availables from an IBioSequenceBatch iterator into
|
||||
// a large obiseq.BioSequenceSlice.
|
||||
func (iterator IBioSequenceBatch) Load() obiseq.BioSequenceSlice {
|
||||
func (iterator IBioSequence) Load() obiseq.BioSequenceSlice {
|
||||
|
||||
chunck := obiseq.MakeBioSequenceSlice()
|
||||
for iterator.Next() {
|
||||
@ -648,7 +648,7 @@ func (iterator IBioSequenceBatch) Load() obiseq.BioSequenceSlice {
|
||||
// It takes a slice of BioSequence objects, and returns an iterator that will return batches of
|
||||
// BioSequence objects
|
||||
func IBatchOver(data obiseq.BioSequenceSlice,
|
||||
size int, sizes ...int) IBioSequenceBatch {
|
||||
size int, sizes ...int) IBioSequence {
|
||||
|
||||
buffsize := 0
|
||||
|
||||
@ -656,7 +656,7 @@ func IBatchOver(data obiseq.BioSequenceSlice,
|
||||
buffsize = sizes[1]
|
||||
}
|
||||
|
||||
newIter := MakeIBioSequenceBatch(buffsize)
|
||||
newIter := MakeIBioSequence(buffsize)
|
||||
|
||||
newIter.Add(1)
|
||||
|
||||
|
@ -8,19 +8,19 @@ import (
|
||||
)
|
||||
|
||||
type IDistribute struct {
|
||||
outputs map[int]IBioSequenceBatch
|
||||
outputs map[int]IBioSequence
|
||||
news chan int
|
||||
classifier *obiseq.BioSequenceClassifier
|
||||
lock *sync.Mutex
|
||||
}
|
||||
|
||||
func (dist *IDistribute) Outputs(key int) (IBioSequenceBatch, error) {
|
||||
func (dist *IDistribute) Outputs(key int) (IBioSequence, error) {
|
||||
dist.lock.Lock()
|
||||
iter, ok := dist.outputs[key]
|
||||
dist.lock.Unlock()
|
||||
|
||||
if !ok {
|
||||
return NilIBioSequenceBatch, fmt.Errorf("code %d unknown", key)
|
||||
return NilIBioSequence, fmt.Errorf("code %d unknown", key)
|
||||
}
|
||||
|
||||
return iter, nil
|
||||
@ -34,11 +34,11 @@ func (dist *IDistribute) Classifier() *obiseq.BioSequenceClassifier {
|
||||
return dist.classifier
|
||||
}
|
||||
|
||||
func (iterator IBioSequenceBatch) Distribute(class *obiseq.BioSequenceClassifier, sizes ...int) IDistribute {
|
||||
func (iterator IBioSequence) Distribute(class *obiseq.BioSequenceClassifier, sizes ...int) IDistribute {
|
||||
batchsize := 5000
|
||||
buffsize := 2
|
||||
|
||||
outputs := make(map[int]IBioSequenceBatch, 100)
|
||||
outputs := make(map[int]IBioSequence, 100)
|
||||
slices := make(map[int]*obiseq.BioSequenceSlice, 100)
|
||||
orders := make(map[int]int, 100)
|
||||
news := make(chan int)
|
||||
@ -80,7 +80,7 @@ func (iterator IBioSequenceBatch) Distribute(class *obiseq.BioSequenceClassifier
|
||||
orders[key] = 0
|
||||
|
||||
lock.Lock()
|
||||
outputs[key] = MakeIBioSequenceBatch(buffsize)
|
||||
outputs[key] = MakeIBioSequence(buffsize)
|
||||
lock.Unlock()
|
||||
|
||||
news <- key
|
||||
|
@ -2,7 +2,7 @@ package obiiter
|
||||
|
||||
import "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||
|
||||
func (iterator IBioSequenceBatch) IMergeSequenceBatch(na string, statsOn []string, sizes ...int) IBioSequenceBatch {
|
||||
func (iterator IBioSequence) IMergeSequenceBatch(na string, statsOn []string, sizes ...int) IBioSequence {
|
||||
batchsize := 100
|
||||
buffsize := iterator.BufferSize()
|
||||
|
||||
@ -13,7 +13,7 @@ func (iterator IBioSequenceBatch) IMergeSequenceBatch(na string, statsOn []strin
|
||||
buffsize = sizes[1]
|
||||
}
|
||||
|
||||
newIter := MakeIBioSequenceBatch(buffsize)
|
||||
newIter := MakeIBioSequence(buffsize)
|
||||
|
||||
newIter.Add(1)
|
||||
|
||||
@ -41,7 +41,7 @@ func (iterator IBioSequenceBatch) IMergeSequenceBatch(na string, statsOn []strin
|
||||
}
|
||||
|
||||
func MergePipe(na string, statsOn []string, sizes ...int) Pipeable {
|
||||
f := func(iterator IBioSequenceBatch) IBioSequenceBatch {
|
||||
f := func(iterator IBioSequence) IBioSequence {
|
||||
return iterator.IMergeSequenceBatch(na, statsOn, sizes...)
|
||||
}
|
||||
|
||||
|
@ -1,12 +1,11 @@
|
||||
package obiiter
|
||||
|
||||
type Pipeable func(input IBioSequence) IBioSequence
|
||||
|
||||
type Pipeable func(input IBioSequenceBatch) IBioSequenceBatch
|
||||
|
||||
func Pipeline(start Pipeable,parts ...Pipeable) Pipeable {
|
||||
p := func (input IBioSequenceBatch) IBioSequenceBatch {
|
||||
func Pipeline(start Pipeable, parts ...Pipeable) Pipeable {
|
||||
p := func(input IBioSequence) IBioSequence {
|
||||
data := start(input)
|
||||
for _,part := range parts {
|
||||
for _, part := range parts {
|
||||
data = part(data)
|
||||
}
|
||||
return data
|
||||
@ -15,17 +14,16 @@ func Pipeline(start Pipeable,parts ...Pipeable) Pipeable {
|
||||
return p
|
||||
}
|
||||
|
||||
func (input IBioSequenceBatch) Pipe(start Pipeable, parts ...Pipeable) IBioSequenceBatch {
|
||||
p := Pipeline(start,parts...)
|
||||
func (input IBioSequence) Pipe(start Pipeable, parts ...Pipeable) IBioSequence {
|
||||
p := Pipeline(start, parts...)
|
||||
return p(input)
|
||||
}
|
||||
|
||||
type Teeable func(input IBioSequence) (IBioSequence, IBioSequence)
|
||||
|
||||
type Teeable func(input IBioSequenceBatch) (IBioSequenceBatch,IBioSequenceBatch)
|
||||
|
||||
func (input IBioSequenceBatch) CopyTee() (IBioSequenceBatch,IBioSequenceBatch) {
|
||||
first := MakeIBioSequenceBatch()
|
||||
second:= MakeIBioSequenceBatch()
|
||||
func (input IBioSequence) CopyTee() (IBioSequence, IBioSequence) {
|
||||
first := MakeIBioSequence()
|
||||
second := MakeIBioSequence()
|
||||
|
||||
first.Add(1)
|
||||
|
||||
@ -36,11 +34,11 @@ func (input IBioSequenceBatch) CopyTee() (IBioSequenceBatch,IBioSequenceBatch) {
|
||||
|
||||
go func() {
|
||||
for input.Next() {
|
||||
b:=input.Get()
|
||||
b := input.Get()
|
||||
first.Push(b)
|
||||
second.Push(b)
|
||||
}
|
||||
}()
|
||||
|
||||
return first,second
|
||||
return first, second
|
||||
}
|
||||
|
@ -6,8 +6,8 @@ import (
|
||||
"github.com/schollz/progressbar/v3"
|
||||
)
|
||||
|
||||
func (iterator IBioSequenceBatch) Speed(message ...string) IBioSequenceBatch {
|
||||
newIter := MakeIBioSequenceBatch()
|
||||
func (iterator IBioSequence) Speed(message ...string) IBioSequence {
|
||||
newIter := MakeIBioSequence()
|
||||
|
||||
newIter.Add(1)
|
||||
|
||||
@ -51,7 +51,7 @@ func (iterator IBioSequenceBatch) Speed(message ...string) IBioSequenceBatch {
|
||||
}
|
||||
|
||||
func SpeedPipe(message ...string) Pipeable {
|
||||
f := func(iterator IBioSequenceBatch) IBioSequenceBatch {
|
||||
f := func(iterator IBioSequence) IBioSequence {
|
||||
return iterator.Speed(message...)
|
||||
}
|
||||
|
||||
|
@ -27,7 +27,7 @@ func AnnotatorToSeqWorker(function SeqAnnotator) SeqWorker {
|
||||
// Moreover the SeqWorker function, the method accepted two optional integer parameters.
|
||||
// - First is allowing to indicates the number of workers running in parallele (default 4)
|
||||
// - The second the size of the chanel buffer. By default set to the same value than the input buffer.
|
||||
func (iterator IBioSequenceBatch) MakeIWorker(worker SeqWorker, sizes ...int) IBioSequenceBatch {
|
||||
func (iterator IBioSequence) MakeIWorker(worker SeqWorker, sizes ...int) IBioSequence {
|
||||
nworkers := 4
|
||||
buffsize := iterator.BufferSize()
|
||||
|
||||
@ -39,7 +39,7 @@ func (iterator IBioSequenceBatch) MakeIWorker(worker SeqWorker, sizes ...int) IB
|
||||
buffsize = sizes[1]
|
||||
}
|
||||
|
||||
newIter := MakeIBioSequenceBatch(buffsize)
|
||||
newIter := MakeIBioSequence(buffsize)
|
||||
|
||||
newIter.Add(nworkers)
|
||||
|
||||
@ -49,7 +49,7 @@ func (iterator IBioSequenceBatch) MakeIWorker(worker SeqWorker, sizes ...int) IB
|
||||
|
||||
}()
|
||||
|
||||
f := func(iterator IBioSequenceBatch) {
|
||||
f := func(iterator IBioSequence) {
|
||||
for iterator.Next() {
|
||||
batch := iterator.Get()
|
||||
for i, seq := range batch.slice {
|
||||
@ -69,8 +69,8 @@ func (iterator IBioSequenceBatch) MakeIWorker(worker SeqWorker, sizes ...int) IB
|
||||
return newIter
|
||||
}
|
||||
|
||||
func (iterator IBioSequenceBatch) MakeIConditionalWorker(predicate obiseq.SequencePredicate,
|
||||
worker SeqWorker, sizes ...int) IBioSequenceBatch {
|
||||
func (iterator IBioSequence) MakeIConditionalWorker(predicate obiseq.SequencePredicate,
|
||||
worker SeqWorker, sizes ...int) IBioSequence {
|
||||
nworkers := 4
|
||||
buffsize := iterator.BufferSize()
|
||||
|
||||
@ -82,7 +82,7 @@ func (iterator IBioSequenceBatch) MakeIConditionalWorker(predicate obiseq.Sequen
|
||||
buffsize = sizes[1]
|
||||
}
|
||||
|
||||
newIter := MakeIBioSequenceBatch(buffsize)
|
||||
newIter := MakeIBioSequence(buffsize)
|
||||
|
||||
newIter.Add(nworkers)
|
||||
|
||||
@ -92,7 +92,7 @@ func (iterator IBioSequenceBatch) MakeIConditionalWorker(predicate obiseq.Sequen
|
||||
|
||||
}()
|
||||
|
||||
f := func(iterator IBioSequenceBatch) {
|
||||
f := func(iterator IBioSequence) {
|
||||
for iterator.Next() {
|
||||
batch := iterator.Get()
|
||||
for i, seq := range batch.slice {
|
||||
@ -114,7 +114,7 @@ func (iterator IBioSequenceBatch) MakeIConditionalWorker(predicate obiseq.Sequen
|
||||
return newIter
|
||||
}
|
||||
|
||||
func (iterator IBioSequenceBatch) MakeISliceWorker(worker SeqSliceWorker, sizes ...int) IBioSequenceBatch {
|
||||
func (iterator IBioSequence) MakeISliceWorker(worker SeqSliceWorker, sizes ...int) IBioSequence {
|
||||
nworkers := 4
|
||||
buffsize := iterator.BufferSize()
|
||||
|
||||
@ -126,7 +126,7 @@ func (iterator IBioSequenceBatch) MakeISliceWorker(worker SeqSliceWorker, sizes
|
||||
buffsize = sizes[1]
|
||||
}
|
||||
|
||||
newIter := MakeIBioSequenceBatch(buffsize)
|
||||
newIter := MakeIBioSequence(buffsize)
|
||||
|
||||
newIter.Add(nworkers)
|
||||
|
||||
@ -135,7 +135,7 @@ func (iterator IBioSequenceBatch) MakeISliceWorker(worker SeqSliceWorker, sizes
|
||||
log.Println("End of the batch slice workers")
|
||||
}()
|
||||
|
||||
f := func(iterator IBioSequenceBatch) {
|
||||
f := func(iterator IBioSequence) {
|
||||
for iterator.Next() {
|
||||
batch := iterator.Get()
|
||||
batch.slice = worker(batch.slice)
|
||||
@ -154,7 +154,7 @@ func (iterator IBioSequenceBatch) MakeISliceWorker(worker SeqSliceWorker, sizes
|
||||
}
|
||||
|
||||
func WorkerPipe(worker SeqWorker, sizes ...int) Pipeable {
|
||||
f := func(iterator IBioSequenceBatch) IBioSequenceBatch {
|
||||
f := func(iterator IBioSequence) IBioSequence {
|
||||
return iterator.MakeIWorker(worker, sizes...)
|
||||
}
|
||||
|
||||
@ -162,7 +162,7 @@ func WorkerPipe(worker SeqWorker, sizes ...int) Pipeable {
|
||||
}
|
||||
|
||||
func SliceWorkerPipe(worker SeqSliceWorker, sizes ...int) Pipeable {
|
||||
f := func(iterator IBioSequenceBatch) IBioSequenceBatch {
|
||||
f := func(iterator IBioSequence) IBioSequence {
|
||||
return iterator.MakeISliceWorker(worker, sizes...)
|
||||
}
|
||||
|
||||
|
193
pkg/obiseq/attributes.go
Normal file
193
pkg/obiseq/attributes.go
Normal file
@ -0,0 +1,193 @@
|
||||
package obiseq
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// A method that returns the value of the key in the annotation map.
|
||||
func (s *BioSequence) GetAttribute(key string) (interface{}, bool) {
|
||||
var val interface{}
|
||||
ok := s.annotations != nil
|
||||
|
||||
if ok {
|
||||
val, ok = s.annotations[key]
|
||||
}
|
||||
|
||||
return val, ok
|
||||
}
|
||||
|
||||
// A method that sets the value of the key in the annotation map.
|
||||
func (s *BioSequence) SetAttribute(key string, value interface{}) {
|
||||
annot := s.Annotations()
|
||||
annot[key] = value
|
||||
}
|
||||
|
||||
// A method that returns the value of the key in the annotation map.
|
||||
func (s *BioSequence) GetIntAttribute(key string) (int, bool) {
|
||||
var val int
|
||||
var err error
|
||||
|
||||
v, ok := s.GetAttribute(key)
|
||||
|
||||
if ok {
|
||||
val, err = goutils.InterfaceToInt(v)
|
||||
ok = err == nil
|
||||
}
|
||||
|
||||
return val, ok
|
||||
}
|
||||
|
||||
// Deleting the key from the annotation map.
|
||||
func (s *BioSequence) DeleteAttribute(key string) {
|
||||
delete(s.Annotations(),key)
|
||||
}
|
||||
|
||||
// Renaming the key in the annotation map.
|
||||
func (s *BioSequence) RenameAttribute(newName, oldName string) {
|
||||
val,ok := s.GetAttribute(oldName)
|
||||
|
||||
if (ok) {
|
||||
s.SetAttribute(newName,val)
|
||||
s.DeleteAttribute(oldName)
|
||||
}
|
||||
}
|
||||
|
||||
// A method that returns the value of the key in the annotation map.
|
||||
func (s *BioSequence) GetNumericAttribute(key string) (float64, bool) {
|
||||
var val float64
|
||||
var err error
|
||||
|
||||
v, ok := s.GetAttribute(key)
|
||||
|
||||
if ok {
|
||||
val, err = goutils.InterfaceToFloat64(v)
|
||||
ok = err == nil
|
||||
}
|
||||
|
||||
return val, ok
|
||||
}
|
||||
|
||||
|
||||
// A method that returns the value of the key in the annotation map.
|
||||
func (s *BioSequence) GetStringAttribute(key string) (string, bool) {
|
||||
var val string
|
||||
v, ok := s.GetAttribute(key)
|
||||
|
||||
if ok {
|
||||
val = fmt.Sprint(v)
|
||||
}
|
||||
|
||||
return val, ok
|
||||
}
|
||||
|
||||
// A method that returns the value of the key in the annotation map.
|
||||
func (s *BioSequence) GetBoolAttribute(key string) (bool, bool) {
|
||||
var val bool
|
||||
var err error
|
||||
|
||||
v, ok := s.GetAttribute(key)
|
||||
|
||||
if ok {
|
||||
val, err = goutils.InterfaceToBool(v)
|
||||
ok = err == nil
|
||||
}
|
||||
|
||||
return val, ok
|
||||
}
|
||||
|
||||
func (s *BioSequence) GetIntMap(key string) (map[string]int, bool) {
|
||||
var val map[string]int
|
||||
var err error
|
||||
|
||||
v, ok := s.GetAttribute(key)
|
||||
|
||||
if ok {
|
||||
val, err = goutils.InterfaceToIntMap(v)
|
||||
ok = err == nil
|
||||
}
|
||||
|
||||
return val, ok
|
||||
}
|
||||
|
||||
// Returning the number of times the sequence has been observed.
|
||||
func (s *BioSequence) Count() int {
|
||||
count, ok := s.GetIntAttribute("count")
|
||||
|
||||
if !ok {
|
||||
count = 1
|
||||
}
|
||||
|
||||
return count
|
||||
}
|
||||
|
||||
// Setting the number of times the sequence has been observed.
|
||||
func (s *BioSequence) SetCount(count int) {
|
||||
annot := s.Annotations()
|
||||
annot["count"] = count
|
||||
}
|
||||
|
||||
|
||||
// Returning the taxid of the sequence.
|
||||
func (s *BioSequence) Taxid() int {
|
||||
taxid, ok := s.GetIntAttribute("taxid")
|
||||
|
||||
if !ok {
|
||||
taxid = 1
|
||||
}
|
||||
|
||||
return taxid
|
||||
}
|
||||
|
||||
// Setting the taxid of the sequence.
|
||||
func (s *BioSequence) SetTaxid(taxid int) {
|
||||
annot := s.Annotations()
|
||||
annot["taxid"] = taxid
|
||||
}
|
||||
|
||||
|
||||
func (s *BioSequence) OBITagRefIndex() map[int]string {
|
||||
|
||||
var val map[int]string
|
||||
|
||||
i, ok := s.GetAttribute("obitag_ref_index")
|
||||
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
|
||||
switch i := i.(type) {
|
||||
case map[int]string:
|
||||
val = i
|
||||
case map[string]interface{}:
|
||||
val = make(map[int]string, len(i))
|
||||
for k, v := range i {
|
||||
score, err := strconv.Atoi(k)
|
||||
if err != nil {
|
||||
log.Panicln(err)
|
||||
}
|
||||
|
||||
val[score], err = goutils.InterfaceToString(v)
|
||||
if err != nil {
|
||||
log.Panicln(err)
|
||||
}
|
||||
}
|
||||
case map[string]string:
|
||||
val = make(map[int]string, len(i))
|
||||
for k, v := range i {
|
||||
score, err := strconv.Atoi(k)
|
||||
if err != nil {
|
||||
log.Panicln(err)
|
||||
}
|
||||
val[score] = v
|
||||
|
||||
}
|
||||
default:
|
||||
log.Panicln("value of attribute obitag_ref_index cannot be casted to a map[int]string")
|
||||
}
|
||||
|
||||
return val
|
||||
}
|
@ -13,20 +13,17 @@ package obiseq
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/md5"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"sync/atomic"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
|
||||
)
|
||||
|
||||
var _NewSeq = int32(0)
|
||||
var _RecycleSeq = int32(0)
|
||||
var _InMemSeq = int32(0)
|
||||
var _MaxInMemSeq = int32(0)
|
||||
var _BioLogRate = int(100000)
|
||||
|
||||
// var _MaxInMemSeq = int32(0)
|
||||
// var _BioLogRate = int(100000)
|
||||
|
||||
func LogBioSeqStatus() {
|
||||
log.Debugf("Created seq : %d Destroyed : %d In Memory : %d", _NewSeq, _RecycleSeq, _InMemSeq)
|
||||
@ -200,154 +197,11 @@ func (s *BioSequence) Annotations() Annotation {
|
||||
return s.annotations
|
||||
}
|
||||
|
||||
// A method that returns the value of the key in the annotation map.
|
||||
func (s *BioSequence) GetAttribute(key string) (interface{}, bool) {
|
||||
var val interface{}
|
||||
ok := s.annotations != nil
|
||||
|
||||
if ok {
|
||||
val, ok = s.annotations[key]
|
||||
}
|
||||
|
||||
return val, ok
|
||||
}
|
||||
|
||||
func (s *BioSequence) SetAttribute(key string, value interface{}) {
|
||||
annot := s.Annotations()
|
||||
annot[key] = value
|
||||
}
|
||||
|
||||
// A method that returns the value of the key in the annotation map.
|
||||
func (s *BioSequence) GetIntAttribute(key string) (int, bool) {
|
||||
var val int
|
||||
var err error
|
||||
|
||||
v, ok := s.GetAttribute(key)
|
||||
|
||||
if ok {
|
||||
val, err = goutils.InterfaceToInt(v)
|
||||
ok = err == nil
|
||||
}
|
||||
|
||||
return val, ok
|
||||
}
|
||||
|
||||
// A method that returns the value of the key in the annotation map.
|
||||
func (s *BioSequence) GetStringAttribute(key string) (string, bool) {
|
||||
var val string
|
||||
v, ok := s.GetAttribute(key)
|
||||
|
||||
if ok {
|
||||
val = fmt.Sprint(v)
|
||||
}
|
||||
|
||||
return val, ok
|
||||
}
|
||||
|
||||
// A method that returns the value of the key in the annotation map.
|
||||
func (s *BioSequence) GetBool(key string) (bool, bool) {
|
||||
var val bool
|
||||
var err error
|
||||
|
||||
v, ok := s.GetAttribute(key)
|
||||
|
||||
if ok {
|
||||
val, err = goutils.InterfaceToBool(v)
|
||||
ok = err == nil
|
||||
}
|
||||
|
||||
return val, ok
|
||||
}
|
||||
|
||||
func (s *BioSequence) GetIntMap(key string) (map[string]int, bool) {
|
||||
var val map[string]int
|
||||
var err error
|
||||
|
||||
v, ok := s.GetAttribute(key)
|
||||
|
||||
if ok {
|
||||
val, err = goutils.InterfaceToIntMap(v)
|
||||
ok = err == nil
|
||||
}
|
||||
|
||||
return val, ok
|
||||
}
|
||||
|
||||
// Returning the MD5 hash of the sequence.
|
||||
func (s *BioSequence) MD5() [16]byte {
|
||||
return md5.Sum(s.sequence)
|
||||
}
|
||||
|
||||
// Returning the number of times the sequence has been observed.
|
||||
func (s *BioSequence) Count() int {
|
||||
count, ok := s.GetIntAttribute("count")
|
||||
|
||||
if !ok {
|
||||
count = 1
|
||||
}
|
||||
|
||||
return count
|
||||
}
|
||||
|
||||
// Returning the taxid of the sequence.
|
||||
func (s *BioSequence) Taxid() int {
|
||||
taxid, ok := s.GetIntAttribute("taxid")
|
||||
|
||||
if !ok {
|
||||
taxid = 1
|
||||
}
|
||||
|
||||
return taxid
|
||||
}
|
||||
|
||||
func (s *BioSequence) OBITagRefIndex() map[int]string {
|
||||
|
||||
var val map[int]string
|
||||
|
||||
i, ok := s.GetAttribute("obitag_ref_index")
|
||||
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
|
||||
switch i := i.(type) {
|
||||
case map[int]string:
|
||||
val = i
|
||||
case map[string]interface{}:
|
||||
val = make(map[int]string, len(i))
|
||||
for k, v := range i {
|
||||
score, err := strconv.Atoi(k)
|
||||
if err != nil {
|
||||
log.Panicln(err)
|
||||
}
|
||||
|
||||
val[score], err = goutils.InterfaceToString(v)
|
||||
if err != nil {
|
||||
log.Panicln(err)
|
||||
}
|
||||
}
|
||||
case map[string]string:
|
||||
val = make(map[int]string, len(i))
|
||||
for k, v := range i {
|
||||
score, err := strconv.Atoi(k)
|
||||
if err != nil {
|
||||
log.Panicln(err)
|
||||
}
|
||||
val[score] = v
|
||||
|
||||
}
|
||||
default:
|
||||
log.Panicln("value of attribute obitag_ref_index cannot be casted to a map[int]string")
|
||||
}
|
||||
|
||||
return val
|
||||
}
|
||||
|
||||
func (s *BioSequence) SetTaxid(taxid int) {
|
||||
annot := s.Annotations()
|
||||
annot["taxid"] = taxid
|
||||
}
|
||||
|
||||
// Setting the id of the BioSequence.
|
||||
func (s *BioSequence) SetId(id string) {
|
||||
s.id = id
|
||||
|
2
pkg/obitools/obiannotate/obiannotate.go
Normal file
2
pkg/obitools/obiannotate/obiannotate.go
Normal file
@ -0,0 +1,2 @@
|
||||
package obiannotate
|
||||
|
@ -19,7 +19,7 @@ type seqPCR struct {
|
||||
SonCount int
|
||||
AddedSons int
|
||||
Edges []Edge
|
||||
Cluster map[int]bool // used as the set of head sequences associated to that sequence
|
||||
Cluster map[int]bool // used as the set of head sequences associated to that sequence
|
||||
}
|
||||
|
||||
// buildSamples sorts the sequences by samples
|
||||
@ -58,7 +58,7 @@ func buildSamples(dataset obiseq.BioSequenceSlice,
|
||||
|
||||
func annotateOBIClean(dataset obiseq.BioSequenceSlice,
|
||||
sample map[string]*([]*seqPCR),
|
||||
tag, NAValue string) obiiter.IBioSequenceBatch {
|
||||
tag, NAValue string) obiiter.IBioSequence {
|
||||
batchsize := 1000
|
||||
var annot = func(data obiseq.BioSequenceSlice) obiseq.BioSequenceSlice {
|
||||
|
||||
@ -207,7 +207,6 @@ func GetCluster(sequence *obiseq.BioSequence) map[string]string {
|
||||
return cluster
|
||||
}
|
||||
|
||||
|
||||
// func Cluster(sample map[string]*([]*seqPCR)) {
|
||||
// for _, graph := range sample {
|
||||
// for _, s := range *graph {
|
||||
@ -215,7 +214,7 @@ func GetCluster(sequence *obiseq.BioSequence) map[string]string {
|
||||
// if len(s.Edges) > 0 {
|
||||
// for _, f := range s.Edges {
|
||||
|
||||
// }
|
||||
// }
|
||||
// } else {
|
||||
// cluster
|
||||
// }
|
||||
@ -286,7 +285,7 @@ func Weight(sequence *obiseq.BioSequence) map[string]int {
|
||||
return weight
|
||||
}
|
||||
|
||||
func IOBIClean(itertator obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch {
|
||||
func IOBIClean(itertator obiiter.IBioSequence) obiiter.IBioSequence {
|
||||
|
||||
db := itertator.Load()
|
||||
|
||||
@ -318,7 +317,6 @@ func IOBIClean(itertator obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Mutation(samples)
|
||||
|
||||
pbopt := make([]progressbar.Option, 0, 5)
|
||||
@ -352,7 +350,6 @@ func IOBIClean(itertator obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch {
|
||||
EmpiricalDistCsv(RatioTableFilename(), all_ratio)
|
||||
}
|
||||
|
||||
|
||||
iter := annotateOBIClean(db, samples, SampleAttribute(), "NA")
|
||||
|
||||
if OnlyHead() {
|
||||
|
@ -67,9 +67,9 @@ func _ExpandListOfFiles(check_ext bool, filenames ...string) ([]string, error) {
|
||||
return list_of_files, nil
|
||||
}
|
||||
|
||||
func ReadBioSequences(filenames ...string) (obiiter.IBioSequenceBatch, error) {
|
||||
var iterator obiiter.IBioSequenceBatch
|
||||
var reader func(string, ...obiformats.WithOption) (obiiter.IBioSequenceBatch, error)
|
||||
func ReadBioSequences(filenames ...string) (obiiter.IBioSequence, error) {
|
||||
var iterator obiiter.IBioSequence
|
||||
var reader func(string, ...obiformats.WithOption) (obiiter.IBioSequence, error)
|
||||
|
||||
opts := make([]obiformats.WithOption, 0, 10)
|
||||
|
||||
@ -109,7 +109,7 @@ func ReadBioSequences(filenames ...string) (obiiter.IBioSequenceBatch, error) {
|
||||
|
||||
list_of_files, err := _ExpandListOfFiles(false, filenames...)
|
||||
if err != nil {
|
||||
return obiiter.NilIBioSequenceBatch, err
|
||||
return obiiter.NilIBioSequence, err
|
||||
}
|
||||
|
||||
switch CLIInputFormat() {
|
||||
@ -140,7 +140,7 @@ func ReadBioSequences(filenames ...string) (obiiter.IBioSequenceBatch, error) {
|
||||
iterator, err = reader(list_of_files[0], opts...)
|
||||
|
||||
if err != nil {
|
||||
return obiiter.NilIBioSequenceBatch, err
|
||||
return obiiter.NilIBioSequence, err
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -8,10 +8,10 @@ import (
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
||||
)
|
||||
|
||||
func WriteBioSequences(iterator obiiter.IBioSequenceBatch,
|
||||
terminalAction bool, filenames ...string) (obiiter.IBioSequenceBatch, error) {
|
||||
func WriteBioSequences(iterator obiiter.IBioSequence,
|
||||
terminalAction bool, filenames ...string) (obiiter.IBioSequence, error) {
|
||||
|
||||
var newIter obiiter.IBioSequenceBatch
|
||||
var newIter obiiter.IBioSequence
|
||||
|
||||
opts := make([]obiformats.WithOption, 0, 10)
|
||||
|
||||
@ -62,12 +62,12 @@ func WriteBioSequences(iterator obiiter.IBioSequenceBatch,
|
||||
|
||||
if err != nil {
|
||||
log.Fatalf("Write file error: %v", err)
|
||||
return obiiter.NilIBioSequenceBatch, err
|
||||
return obiiter.NilIBioSequence, err
|
||||
}
|
||||
|
||||
if terminalAction {
|
||||
newIter.Recycle()
|
||||
return obiiter.NilIBioSequenceBatch, nil
|
||||
return obiiter.NilIBioSequence, nil
|
||||
}
|
||||
|
||||
return newIter, nil
|
||||
|
@ -9,7 +9,7 @@ import (
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
||||
)
|
||||
|
||||
func DistributeSequence(sequences obiiter.IBioSequenceBatch) {
|
||||
func DistributeSequence(sequences obiiter.IBioSequence) {
|
||||
|
||||
opts := make([]obiformats.WithOption, 0, 10)
|
||||
|
||||
|
@ -8,14 +8,14 @@ import (
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
||||
)
|
||||
|
||||
func IFilterSequence(iterator obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch {
|
||||
var newIter obiiter.IBioSequenceBatch
|
||||
func IFilterSequence(iterator obiiter.IBioSequence) obiiter.IBioSequence {
|
||||
var newIter obiiter.IBioSequence
|
||||
|
||||
predicate := CLISequenceSelectionPredicate()
|
||||
|
||||
if predicate != nil {
|
||||
if CLISaveDiscardedSequences() {
|
||||
var discarded obiiter.IBioSequenceBatch
|
||||
var discarded obiiter.IBioSequence
|
||||
|
||||
log.Printf("Discarded sequences saved in file: %s\n", CLIDiscardedFileName())
|
||||
newIter, discarded = iterator.DivideOn(predicate,
|
||||
|
@ -10,7 +10,7 @@ import (
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
||||
)
|
||||
|
||||
func IExtractBarcode(iterator obiiter.IBioSequenceBatch) (obiiter.IBioSequenceBatch, error) {
|
||||
func IExtractBarcode(iterator obiiter.IBioSequence) (obiiter.IBioSequence, error) {
|
||||
|
||||
opts := make([]obingslibrary.WithOption, 0, 10)
|
||||
|
||||
@ -37,7 +37,7 @@ func IExtractBarcode(iterator obiiter.IBioSequenceBatch) (obiiter.IBioSequenceBa
|
||||
newIter = newIter.Rebatch(obioptions.CLIBatchSize())
|
||||
}
|
||||
|
||||
var unidentified obiiter.IBioSequenceBatch
|
||||
var unidentified obiiter.IBioSequence
|
||||
if CLIUnidentifiedFileName() != "" {
|
||||
log.Printf("Unassigned sequences saved in file: %s\n", CLIUnidentifiedFileName())
|
||||
unidentified, newIter = newIter.DivideOn(obiseq.HasAttribute("demultiplex_error"),
|
||||
|
@ -206,7 +206,7 @@ func AssemblePESequences(seqA, seqB *obiseq.BioSequence,
|
||||
func IAssemblePESequencesBatch(iterator obiiter.IPairedBioSequenceBatch,
|
||||
gap float64, delta, minOverlap int,
|
||||
minIdentity float64,
|
||||
withStats bool, sizes ...int) obiiter.IBioSequenceBatch {
|
||||
withStats bool, sizes ...int) obiiter.IBioSequence {
|
||||
|
||||
nworkers := runtime.NumCPU() * 3 / 2
|
||||
buffsize := iterator.BufferSize()
|
||||
@ -219,7 +219,7 @@ func IAssemblePESequencesBatch(iterator obiiter.IPairedBioSequenceBatch,
|
||||
buffsize = sizes[1]
|
||||
}
|
||||
|
||||
newIter := obiiter.MakeIBioSequenceBatch(buffsize)
|
||||
newIter := obiiter.MakeIBioSequence(buffsize)
|
||||
|
||||
newIter.Add(nworkers)
|
||||
|
||||
|
@ -9,7 +9,7 @@ import (
|
||||
// PCR iterates over sequences provided by a obiseq.IBioSequenceBatch
|
||||
// and returns an other obiseq.IBioSequenceBatch distributing
|
||||
// obiseq.BioSequenceBatch containing the selected amplicon sequences.
|
||||
func PCR(iterator obiiter.IBioSequenceBatch) (obiiter.IBioSequenceBatch, error) {
|
||||
func PCR(iterator obiiter.IBioSequence) (obiiter.IBioSequence, error) {
|
||||
|
||||
opts := make([]obiapat.WithOption, 0, 10)
|
||||
|
||||
|
@ -32,7 +32,7 @@ func IndexSequence(seqidx int,
|
||||
// r := 0
|
||||
// w := 0
|
||||
for i, ref := range references {
|
||||
lcs, alilength := obialign.FastLCSScore(sequence, ref, -1 , &matrix)
|
||||
lcs, alilength := obialign.FastLCSScore(sequence, ref, -1, &matrix)
|
||||
score[i] = alilength - lcs
|
||||
}
|
||||
|
||||
@ -88,7 +88,7 @@ func IndexSequence(seqidx int,
|
||||
return obitag_index
|
||||
}
|
||||
|
||||
func IndexReferenceDB(iterator obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch {
|
||||
func IndexReferenceDB(iterator obiiter.IBioSequence) obiiter.IBioSequence {
|
||||
|
||||
references := iterator.Load()
|
||||
refcounts := make(
|
||||
@ -118,7 +118,7 @@ func IndexReferenceDB(iterator obiiter.IBioSequenceBatch) obiiter.IBioSequenceBa
|
||||
bar := progressbar.NewOptions(len(references), pbopt...)
|
||||
|
||||
limits := make(chan [2]int)
|
||||
indexed := obiiter.MakeIBioSequenceBatch()
|
||||
indexed := obiiter.MakeIBioSequence()
|
||||
go func() {
|
||||
for i := 0; i < len(references); i += 10 {
|
||||
limits <- [2]int{i, goutils.MinInt(i+10, len(references))}
|
||||
|
@ -171,7 +171,7 @@ func IdentifySeqWorker(references obiseq.BioSequenceSlice,
|
||||
}
|
||||
}
|
||||
|
||||
func AssignTaxonomy(iterator obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch {
|
||||
func AssignTaxonomy(iterator obiiter.IBioSequence) obiiter.IBioSequence {
|
||||
|
||||
references := CLIRefDB()
|
||||
refcounts := make(
|
||||
|
@ -8,7 +8,7 @@ import (
|
||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
||||
)
|
||||
|
||||
func Unique(sequences obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch {
|
||||
func Unique(sequences obiiter.IBioSequence) obiiter.IBioSequence {
|
||||
|
||||
options := make([]obichunk.WithOption, 0, 30)
|
||||
|
||||
|
BIN
sample/.DS_Store
vendored
Normal file
BIN
sample/.DS_Store
vendored
Normal file
Binary file not shown.
Reference in New Issue
Block a user