rename the iterator class

This commit is contained in:
2023-01-22 22:04:17 +01:00
parent 4592855095
commit f97f92df72
37 changed files with 399 additions and 354 deletions

View File

@ -35,12 +35,12 @@ func find(root, ext string) []string {
return a
}
func ISequenceChunkOnDisk(iterator obiiter.IBioSequenceBatch,
func ISequenceChunkOnDisk(iterator obiiter.IBioSequence,
classifier *obiseq.BioSequenceClassifier,
sizes ...int) (obiiter.IBioSequenceBatch, error) {
sizes ...int) (obiiter.IBioSequence, error) {
dir, err := tempDir()
if err != nil {
return obiiter.NilIBioSequenceBatch, err
return obiiter.NilIBioSequence, err
}
bufferSize := iterator.BufferSize()
@ -49,7 +49,7 @@ func ISequenceChunkOnDisk(iterator obiiter.IBioSequenceBatch,
bufferSize = sizes[0]
}
newIter := obiiter.MakeIBioSequenceBatch(bufferSize)
newIter := obiiter.MakeIBioSequence(bufferSize)
newIter.Add(1)

View File

@ -1,16 +1,17 @@
package obichunk
import (
log "github.com/sirupsen/logrus"
"sync"
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
)
func ISequenceChunk(iterator obiiter.IBioSequenceBatch,
func ISequenceChunk(iterator obiiter.IBioSequence,
classifier *obiseq.BioSequenceClassifier,
sizes ...int) (obiiter.IBioSequenceBatch, error) {
sizes ...int) (obiiter.IBioSequence, error) {
bufferSize := iterator.BufferSize()
@ -18,7 +19,7 @@ func ISequenceChunk(iterator obiiter.IBioSequenceBatch,
bufferSize = sizes[0]
}
newIter := obiiter.MakeIBioSequenceBatch(bufferSize)
newIter := obiiter.MakeIBioSequence(bufferSize)
newIter.Add(1)

View File

@ -56,9 +56,9 @@ func (by _By) Sort(seqs []sSS) {
// End of the sort interface
//
func ISequenceSubChunk(iterator obiiter.IBioSequenceBatch,
func ISequenceSubChunk(iterator obiiter.IBioSequence,
classifier *obiseq.BioSequenceClassifier,
sizes ...int) (obiiter.IBioSequenceBatch, error) {
sizes ...int) (obiiter.IBioSequence, error) {
bufferSize := iterator.BufferSize()
nworkers := 4
@ -71,7 +71,7 @@ func ISequenceSubChunk(iterator obiiter.IBioSequenceBatch,
bufferSize = sizes[1]
}
newIter := obiiter.MakeIBioSequenceBatch(bufferSize)
newIter := obiiter.MakeIBioSequence(bufferSize)
newIter.Add(nworkers)
@ -88,7 +88,7 @@ func ISequenceSubChunk(iterator obiiter.IBioSequenceBatch,
return neworder
}
ff := func(iterator obiiter.IBioSequenceBatch,
ff := func(iterator obiiter.IBioSequence,
classifier *obiseq.BioSequenceClassifier) {
ordered := make([]sSS, 100)

View File

@ -12,14 +12,14 @@ import (
// Runs dereplication algorithm on a obiiter.IBioSequenceBatch
// iterator.
func IUniqueSequence(iterator obiiter.IBioSequenceBatch,
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
func IUniqueSequence(iterator obiiter.IBioSequence,
options ...WithOption) (obiiter.IBioSequence, error) {
var err error
opts := MakeOptions(options)
nworkers := opts.ParallelWorkers()
iUnique := obiiter.MakeIBioSequenceBatch(opts.BufferSize())
iUnique := obiiter.MakeIBioSequence(opts.BufferSize())
iterator = iterator.Speed("Splitting data set")
@ -32,7 +32,7 @@ func IUniqueSequence(iterator obiiter.IBioSequenceBatch,
0)
if err != nil {
return obiiter.NilIBioSequenceBatch, err
return obiiter.NilIBioSequence, err
}
} else {
@ -41,7 +41,7 @@ func IUniqueSequence(iterator obiiter.IBioSequenceBatch,
opts.BufferSize())
if err != nil {
return obiiter.NilIBioSequenceBatch, err
return obiiter.NilIBioSequence, err
}
}
@ -65,14 +65,14 @@ func IUniqueSequence(iterator obiiter.IBioSequenceBatch,
return neworder
}
var ff func(obiiter.IBioSequenceBatch,
var ff func(obiiter.IBioSequence,
*obiseq.BioSequenceClassifier,
int)
cat := opts.Categories()
na := opts.NAValue()
ff = func(input obiiter.IBioSequenceBatch,
ff = func(input obiiter.IBioSequence,
classifier *obiseq.BioSequenceClassifier,
icat int) {
icat--
@ -81,9 +81,9 @@ func IUniqueSequence(iterator obiiter.IBioSequenceBatch,
1,
opts.BufferSize())
var next obiiter.IBioSequenceBatch
var next obiiter.IBioSequence
if icat >= 0 {
next = obiiter.MakeIBioSequenceBatch(opts.BufferSize())
next = obiiter.MakeIBioSequence(opts.BufferSize())
iUnique.Add(1)

View File

@ -10,13 +10,13 @@ import (
func ReadSequencesBatchFromFiles(filenames []string,
reader IBatchReader,
concurrent_readers int,
options ...WithOption) obiiter.IBioSequenceBatch {
options ...WithOption) obiiter.IBioSequence {
if reader == nil {
reader = ReadSequencesFromFile
}
batchiter := obiiter.MakeIBioSequenceBatch(0)
batchiter := obiiter.MakeIBioSequence(0)
nextCounter := goutils.AtomicCounter()
batchiter.Add(concurrent_readers)

View File

@ -2,4 +2,4 @@ package obiformats
import "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
type IBatchReader func(string, ...WithOption) (obiiter.IBioSequenceBatch, error)
type IBatchReader func(string, ...WithOption) (obiiter.IBioSequence, error)

View File

@ -2,15 +2,16 @@ package obiformats
import (
"fmt"
log "github.com/sirupsen/logrus"
"sync"
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
)
type SequenceBatchWriterToFile func(iterator obiiter.IBioSequenceBatch,
type SequenceBatchWriterToFile func(iterator obiiter.IBioSequence,
filename string,
options ...WithOption) (obiiter.IBioSequenceBatch, error)
options ...WithOption) (obiiter.IBioSequence, error)
func WriterDispatcher(prototypename string,
dispatcher obiiter.IDistribute,

View File

@ -120,7 +120,7 @@ func __read_ecopcr_bioseq__(file *__ecopcr_file__) (*obiseq.BioSequence, error)
return bseq, nil
}
func ReadEcoPCR(reader io.Reader, options ...WithOption) obiiter.IBioSequenceBatch {
func ReadEcoPCR(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
tag := make([]byte, 11)
n, _ := reader.Read(tag)
@ -166,7 +166,7 @@ func ReadEcoPCR(reader io.Reader, options ...WithOption) obiiter.IBioSequenceBat
opt := MakeOptions(options)
newIter := obiiter.MakeIBioSequenceBatch(opt.BufferSize())
newIter := obiiter.MakeIBioSequence(opt.BufferSize())
newIter.Add(1)
go func() {
@ -208,7 +208,7 @@ func ReadEcoPCR(reader io.Reader, options ...WithOption) obiiter.IBioSequenceBat
return newIter
}
func ReadEcoPCRBatchFromFile(filename string, options ...WithOption) (obiiter.IBioSequenceBatch, error) {
func ReadEcoPCRBatchFromFile(filename string, options ...WithOption) (obiiter.IBioSequence, error) {
var reader io.Reader
var greader io.Reader
var err error
@ -216,7 +216,7 @@ func ReadEcoPCRBatchFromFile(filename string, options ...WithOption) (obiiter.IB
reader, err = os.Open(filename)
if err != nil {
log.Printf("open file error: %+v", err)
return obiiter.NilIBioSequenceBatch, err
return obiiter.NilIBioSequence, err
}
// Test if the flux is compressed by gzip

View File

@ -84,7 +84,7 @@ func _EndOfLastEntry(buff []byte) int {
return -1
}
func _ParseEmblFile(input <-chan _FileChunk, out obiiter.IBioSequenceBatch) {
func _ParseEmblFile(input <-chan _FileChunk, out obiiter.IBioSequence) {
for chunks := range input {
scanner := bufio.NewScanner(chunks.raw)
@ -201,11 +201,11 @@ func _ReadFlatFileChunk(reader io.Reader, readers chan _FileChunk) {
// 6 5 43 2 1
//
// <CR>?<LF>//<CR>?<LF>
func ReadEMBL(reader io.Reader, options ...WithOption) obiiter.IBioSequenceBatch {
func ReadEMBL(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
opt := MakeOptions(options)
entry_channel := make(chan _FileChunk, opt.BufferSize())
newIter := obiiter.MakeIBioSequenceBatch(opt.BufferSize())
newIter := obiiter.MakeIBioSequence(opt.BufferSize())
nworkers := opt.ParallelWorkers()
newIter.Add(nworkers)
@ -224,7 +224,7 @@ func ReadEMBL(reader io.Reader, options ...WithOption) obiiter.IBioSequenceBatch
return newIter
}
func ReadEMBLFromFile(filename string, options ...WithOption) (obiiter.IBioSequenceBatch, error) {
func ReadEMBLFromFile(filename string, options ...WithOption) (obiiter.IBioSequence, error) {
var reader io.Reader
var greader io.Reader
var err error
@ -232,7 +232,7 @@ func ReadEMBLFromFile(filename string, options ...WithOption) (obiiter.IBioSeque
reader, err = os.Open(filename)
if err != nil {
log.Printf("open file error: %+v", err)
return obiiter.NilIBioSequenceBatch, err
return obiiter.NilIBioSequence, err
}
// Test if the flux is compressed by gzip

View File

@ -15,8 +15,8 @@ func ParseGuessedFastSeqHeader(sequence *obiseq.BioSequence) {
}
}
func IParseFastSeqHeaderBatch(iterator obiiter.IBioSequenceBatch,
options ...WithOption) obiiter.IBioSequenceBatch {
func IParseFastSeqHeaderBatch(iterator obiiter.IBioSequence,
options ...WithOption) obiiter.IBioSequence {
opt := MakeOptions(options)
return iterator.MakeIWorker(obiiter.AnnotatorToSeqWorker(opt.ParseFastSeqHeader()),
opt.ParallelWorkers(),

View File

@ -19,7 +19,7 @@ import (
)
func _FastseqReader(seqfile C.fast_kseq_p,
iterator obiiter.IBioSequenceBatch,
iterator obiiter.IBioSequence,
batch_size int) {
var comment string
i := 0
@ -72,7 +72,6 @@ func _FastseqReader(seqfile C.fast_kseq_p,
ii = 0
}
}
if len(slice) > 0 {
iterator.Push(obiiter.MakeBioSequenceBatch(i, slice))
@ -81,7 +80,7 @@ func _FastseqReader(seqfile C.fast_kseq_p,
}
func ReadFastSeqFromFile(filename string, options ...WithOption) (obiiter.IBioSequenceBatch, error) {
func ReadFastSeqFromFile(filename string, options ...WithOption) (obiiter.IBioSequence, error) {
opt := MakeOptions(options)
name := C.CString(filename)
@ -94,7 +93,7 @@ func ReadFastSeqFromFile(filename string, options ...WithOption) (obiiter.IBioSe
if pointer == nil {
err = fmt.Errorf("cannot open file %s", filename)
return obiiter.NilIBioSequenceBatch, err
return obiiter.NilIBioSequence, err
}
size := int64(-1)
@ -106,7 +105,7 @@ func ReadFastSeqFromFile(filename string, options ...WithOption) (obiiter.IBioSe
size = -1
}
newIter := obiiter.MakeIBioSequenceBatch(opt.BufferSize())
newIter := obiiter.MakeIBioSequence(opt.BufferSize())
newIter.Add(1)
go func() {
@ -126,9 +125,9 @@ func ReadFastSeqFromFile(filename string, options ...WithOption) (obiiter.IBioSe
return newIter, err
}
func ReadFastSeqFromStdin(options ...WithOption) obiiter.IBioSequenceBatch {
func ReadFastSeqFromStdin(options ...WithOption) obiiter.IBioSequence {
opt := MakeOptions(options)
newIter := obiiter.MakeIBioSequenceBatch(opt.BufferSize())
newIter := obiiter.MakeIBioSequence(opt.BufferSize())
newIter.Add(1)

View File

@ -61,13 +61,13 @@ func FormatFastaBatch(batch obiiter.BioSequenceBatch, formater FormatHeader) []b
return bs.Bytes()
}
func WriteFasta(iterator obiiter.IBioSequenceBatch,
func WriteFasta(iterator obiiter.IBioSequence,
file io.Writer,
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
options ...WithOption) (obiiter.IBioSequence, error) {
opt := MakeOptions(options)
buffsize := iterator.BufferSize()
newIter := obiiter.MakeIBioSequenceBatch(buffsize)
newIter := obiiter.MakeIBioSequence(buffsize)
nwriters := opt.ParallelWorkers()
@ -83,7 +83,7 @@ func WriteFasta(iterator obiiter.IBioSequenceBatch,
log.Debugln("End of the fasta file writing")
}()
ff := func(iterator obiiter.IBioSequenceBatch) {
ff := func(iterator obiiter.IBioSequence) {
for iterator.Next() {
batch := iterator.Get()
@ -136,21 +136,21 @@ func WriteFasta(iterator obiiter.IBioSequenceBatch,
return newIter, nil
}
func WriteFastaToStdout(iterator obiiter.IBioSequenceBatch,
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
func WriteFastaToStdout(iterator obiiter.IBioSequence,
options ...WithOption) (obiiter.IBioSequence, error) {
options = append(options, OptionDontCloseFile())
return WriteFasta(iterator, os.Stdout, options...)
}
func WriteFastaToFile(iterator obiiter.IBioSequenceBatch,
func WriteFastaToFile(iterator obiiter.IBioSequence,
filename string,
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
options ...WithOption) (obiiter.IBioSequence, error) {
file, err := os.Create(filename)
if err != nil {
log.Fatalf("open file error: %v", err)
return obiiter.NilIBioSequenceBatch, err
return obiiter.NilIBioSequence, err
}
options = append(options, OptionCloseFile())

View File

@ -51,13 +51,13 @@ type FileChunck struct {
order int
}
func WriteFastq(iterator obiiter.IBioSequenceBatch,
func WriteFastq(iterator obiiter.IBioSequence,
file io.Writer,
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
options ...WithOption) (obiiter.IBioSequence, error) {
opt := MakeOptions(options)
buffsize := iterator.BufferSize()
newIter := obiiter.MakeIBioSequenceBatch(buffsize)
newIter := obiiter.MakeIBioSequence(buffsize)
nwriters := opt.ParallelWorkers()
@ -77,7 +77,7 @@ func WriteFastq(iterator obiiter.IBioSequenceBatch,
log.Debugln("End of the fastq file writing")
}()
ff := func(iterator obiiter.IBioSequenceBatch) {
ff := func(iterator obiiter.IBioSequence) {
for iterator.Next() {
batch := iterator.Get()
chunk := FileChunck{
@ -129,21 +129,21 @@ func WriteFastq(iterator obiiter.IBioSequenceBatch,
return newIter, nil
}
func WriteFastqToStdout(iterator obiiter.IBioSequenceBatch,
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
func WriteFastqToStdout(iterator obiiter.IBioSequence,
options ...WithOption) (obiiter.IBioSequence, error) {
options = append(options, OptionDontCloseFile())
return WriteFastq(iterator, os.Stdout, options...)
}
func WriteFastqToFile(iterator obiiter.IBioSequenceBatch,
func WriteFastqToFile(iterator obiiter.IBioSequence,
filename string,
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
options ...WithOption) (obiiter.IBioSequence, error) {
file, err := os.Create(filename)
if err != nil {
log.Fatalf("open file error: %v", err)
return obiiter.NilIBioSequenceBatch, err
return obiiter.NilIBioSequence, err
}
options = append(options, OptionCloseFile())

View File

@ -26,7 +26,7 @@ const (
inSequence gbstate = 4
)
func _ParseGenbankFile(input <-chan _FileChunk, out obiiter.IBioSequenceBatch) {
func _ParseGenbankFile(input <-chan _FileChunk, out obiiter.IBioSequence) {
state := inHeader
@ -107,11 +107,11 @@ func _ParseGenbankFile(input <-chan _FileChunk, out obiiter.IBioSequenceBatch) {
}
func ReadGenbank(reader io.Reader, options ...WithOption) obiiter.IBioSequenceBatch {
func ReadGenbank(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
opt := MakeOptions(options)
entry_channel := make(chan _FileChunk, opt.BufferSize())
newIter := obiiter.MakeIBioSequenceBatch(opt.BufferSize())
newIter := obiiter.MakeIBioSequence(opt.BufferSize())
nworkers := opt.ParallelWorkers()
newIter.Add(nworkers)
@ -130,7 +130,7 @@ func ReadGenbank(reader io.Reader, options ...WithOption) obiiter.IBioSequenceBa
return newIter
}
func ReadGenbankFromFile(filename string, options ...WithOption) (obiiter.IBioSequenceBatch, error) {
func ReadGenbankFromFile(filename string, options ...WithOption) (obiiter.IBioSequence, error) {
var reader io.Reader
var greader io.Reader
var err error
@ -138,7 +138,7 @@ func ReadGenbankFromFile(filename string, options ...WithOption) (obiiter.IBioSe
reader, err = os.Open(filename)
if err != nil {
log.Printf("open file error: %+v", err)
return obiiter.NilIBioSequenceBatch, err
return obiiter.NilIBioSequence, err
}
// Test if the flux is compressed by gzip

View File

@ -43,7 +43,7 @@ func GuessSeqFileType(firstline string) string {
}
func ReadSequencesFromFile(filename string,
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
options ...WithOption) (obiiter.IBioSequence, error) {
var file *os.File
var reader io.Reader
var greader io.Reader
@ -53,7 +53,7 @@ func ReadSequencesFromFile(filename string,
if err != nil {
log.Fatalf("open file error: %v", err)
return obiiter.NilIBioSequenceBatch, err
return obiiter.NilIBioSequence, err
}
reader = file
@ -72,7 +72,7 @@ func ReadSequencesFromFile(filename string,
tag, _ := breader.Peek(30)
if len(tag) < 30 {
newIter := obiiter.MakeIBioSequenceBatch()
newIter := obiiter.MakeIBioSequence()
newIter.Close()
return newIter, nil
}
@ -98,5 +98,5 @@ func ReadSequencesFromFile(filename string,
filename, filetype)
}
return obiiter.NilIBioSequenceBatch, nil
return obiiter.NilIBioSequence, nil
}

View File

@ -10,9 +10,9 @@ import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
)
func WriteSequence(iterator obiiter.IBioSequenceBatch,
func WriteSequence(iterator obiiter.IBioSequence,
file io.Writer,
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
options ...WithOption) (obiiter.IBioSequence, error) {
iterator = iterator.Rebatch(1000)
@ -22,7 +22,7 @@ func WriteSequence(iterator obiiter.IBioSequenceBatch,
batch := iterator.Get()
iterator.PushBack()
var newIter obiiter.IBioSequenceBatch
var newIter obiiter.IBioSequence
var err error
if len(batch.Slice()) > 0 {
@ -42,24 +42,24 @@ func WriteSequence(iterator obiiter.IBioSequenceBatch,
return iterator, nil
}
return obiiter.NilIBioSequenceBatch, fmt.Errorf("input iterator not ready")
return obiiter.NilIBioSequence, fmt.Errorf("input iterator not ready")
}
func WriteSequencesToStdout(iterator obiiter.IBioSequenceBatch,
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
func WriteSequencesToStdout(iterator obiiter.IBioSequence,
options ...WithOption) (obiiter.IBioSequence, error) {
options = append(options, OptionDontCloseFile())
return WriteSequence(iterator, os.Stdout, options...)
}
func WriteSequencesToFile(iterator obiiter.IBioSequenceBatch,
func WriteSequencesToFile(iterator obiiter.IBioSequence,
filename string,
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
options ...WithOption) (obiiter.IBioSequence, error) {
file, err := os.Create(filename)
if err != nil {
log.Fatalf("open file error: %v", err)
return obiiter.NilIBioSequenceBatch, err
return obiiter.NilIBioSequence, err
}
options = append(options, OptionCloseFile())

View File

@ -15,7 +15,7 @@ import (
// Structure implementing an iterator over bioseq.BioSequenceBatch
// based on a channel.
type _IBioSequenceBatch struct {
type _IBioSequence struct {
channel chan BioSequenceBatch
current BioSequenceBatch
pushBack *abool.AtomicBool
@ -27,24 +27,24 @@ type _IBioSequenceBatch struct {
finished *abool.AtomicBool
}
type IBioSequenceBatch struct {
pointer *_IBioSequenceBatch
type IBioSequence struct {
pointer *_IBioSequence
}
// NilIBioSequenceBatch nil instance for IBioSequenceBatch
// NilIBioSequence nil instance for IBioSequenceBatch
//
// NilIBioSequenceBatch is the nil instance for the
// NilIBioSequence is the nil instance for the
// IBioSequenceBatch type.
var NilIBioSequenceBatch = IBioSequenceBatch{pointer: nil}
var NilIBioSequence = IBioSequence{pointer: nil}
func MakeIBioSequenceBatch(sizes ...int) IBioSequenceBatch {
func MakeIBioSequence(sizes ...int) IBioSequence {
buffsize := int32(0)
if len(sizes) > 0 {
buffsize = int32(sizes[0])
}
i := _IBioSequenceBatch{
i := _IBioSequence{
channel: make(chan BioSequenceBatch, buffsize),
current: NilBioSequenceBatch,
pushBack: abool.New(),
@ -58,11 +58,11 @@ func MakeIBioSequenceBatch(sizes ...int) IBioSequenceBatch {
i.all_done = &waiting
lock := sync.RWMutex{}
i.lock = &lock
ii := IBioSequenceBatch{&i}
ii := IBioSequence{&i}
return ii
}
func (iterator IBioSequenceBatch) Add(n int) {
func (iterator IBioSequence) Add(n int) {
if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.Add method on NilIBioSequenceBatch")
}
@ -70,7 +70,7 @@ func (iterator IBioSequenceBatch) Add(n int) {
iterator.pointer.all_done.Add(n)
}
func (iterator IBioSequenceBatch) Done() {
func (iterator IBioSequence) Done() {
if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.Done method on NilIBioSequenceBatch")
}
@ -78,7 +78,7 @@ func (iterator IBioSequenceBatch) Done() {
iterator.pointer.all_done.Done()
}
func (iterator IBioSequenceBatch) Unlock() {
func (iterator IBioSequence) Unlock() {
if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.Unlock method on NilIBioSequenceBatch")
}
@ -86,7 +86,7 @@ func (iterator IBioSequenceBatch) Unlock() {
iterator.pointer.lock.Unlock()
}
func (iterator IBioSequenceBatch) Lock() {
func (iterator IBioSequence) Lock() {
if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.Lock method on NilIBioSequenceBatch")
}
@ -94,7 +94,7 @@ func (iterator IBioSequenceBatch) Lock() {
iterator.pointer.lock.Lock()
}
func (iterator IBioSequenceBatch) RLock() {
func (iterator IBioSequence) RLock() {
if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.RLock method on NilIBioSequenceBatch")
}
@ -102,7 +102,7 @@ func (iterator IBioSequenceBatch) RLock() {
iterator.pointer.lock.RLock()
}
func (iterator IBioSequenceBatch) RUnlock() {
func (iterator IBioSequence) RUnlock() {
if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.RUnlock method on NilIBioSequenceBatch")
}
@ -110,7 +110,7 @@ func (iterator IBioSequenceBatch) RUnlock() {
iterator.pointer.lock.RUnlock()
}
func (iterator IBioSequenceBatch) Wait() {
func (iterator IBioSequence) Wait() {
if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.Wait method on NilIBioSequenceBatch")
}
@ -118,7 +118,7 @@ func (iterator IBioSequenceBatch) Wait() {
iterator.pointer.all_done.Wait()
}
func (iterator IBioSequenceBatch) Channel() chan BioSequenceBatch {
func (iterator IBioSequence) Channel() chan BioSequenceBatch {
if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.Channel method on NilIBioSequenceBatch")
}
@ -126,7 +126,7 @@ func (iterator IBioSequenceBatch) Channel() chan BioSequenceBatch {
return iterator.pointer.channel
}
func (iterator IBioSequenceBatch) IsNil() bool {
func (iterator IBioSequence) IsNil() bool {
if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.IsNil method on NilIBioSequenceBatch")
}
@ -134,7 +134,7 @@ func (iterator IBioSequenceBatch) IsNil() bool {
return iterator.pointer == nil
}
func (iterator IBioSequenceBatch) BufferSize() int {
func (iterator IBioSequence) BufferSize() int {
if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.BufferSize method on NilIBioSequenceBatch")
}
@ -142,7 +142,7 @@ func (iterator IBioSequenceBatch) BufferSize() int {
return int(atomic.LoadInt32(&iterator.pointer.buffer_size))
}
func (iterator IBioSequenceBatch) BatchSize() int {
func (iterator IBioSequence) BatchSize() int {
if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.BatchSize method on NilIBioSequenceBatch")
}
@ -150,7 +150,7 @@ func (iterator IBioSequenceBatch) BatchSize() int {
return int(atomic.LoadInt32(&iterator.pointer.batch_size))
}
func (iterator IBioSequenceBatch) SetBatchSize(size int) error {
func (iterator IBioSequence) SetBatchSize(size int) error {
if size >= 0 {
atomic.StoreInt32(&iterator.pointer.batch_size, int32(size))
return nil
@ -159,10 +159,10 @@ func (iterator IBioSequenceBatch) SetBatchSize(size int) error {
return fmt.Errorf("size (%d) cannot be negative", size)
}
func (iterator IBioSequenceBatch) Split() IBioSequenceBatch {
func (iterator IBioSequence) Split() IBioSequence {
iterator.pointer.lock.RLock()
defer iterator.pointer.lock.RUnlock()
i := _IBioSequenceBatch{
i := _IBioSequence{
channel: iterator.pointer.channel,
current: NilBioSequenceBatch,
pushBack: abool.New(),
@ -174,11 +174,11 @@ func (iterator IBioSequenceBatch) Split() IBioSequenceBatch {
lock := sync.RWMutex{}
i.lock = &lock
newIter := IBioSequenceBatch{&i}
newIter := IBioSequence{&i}
return newIter
}
func (iterator IBioSequenceBatch) Next() bool {
func (iterator IBioSequence) Next() bool {
if iterator.pointer.pushBack.IsSet() {
iterator.pointer.pushBack.UnSet()
return true
@ -200,7 +200,7 @@ func (iterator IBioSequenceBatch) Next() bool {
return false
}
func (iterator IBioSequenceBatch) PushBack() {
func (iterator IBioSequence) PushBack() {
if !iterator.pointer.current.IsNil() {
iterator.pointer.pushBack.Set()
}
@ -210,11 +210,11 @@ func (iterator IBioSequenceBatch) PushBack() {
// currently pointed by the iterator. You have to use the
// 'Next' method to move to the next entry before calling
// 'Get' to retreive the following instance.
func (iterator IBioSequenceBatch) Get() BioSequenceBatch {
func (iterator IBioSequence) Get() BioSequenceBatch {
return iterator.pointer.current
}
func (iterator IBioSequenceBatch) Push(batch BioSequenceBatch) {
func (iterator IBioSequence) Push(batch BioSequenceBatch) {
if batch.IsNil() {
log.Panicln("A Nil batch is pushed on the channel")
}
@ -225,11 +225,11 @@ func (iterator IBioSequenceBatch) Push(batch BioSequenceBatch) {
iterator.pointer.channel <- batch
}
func (iterator IBioSequenceBatch) Close() {
func (iterator IBioSequence) Close() {
close(iterator.pointer.channel)
}
func (iterator IBioSequenceBatch) WaitAndClose() {
func (iterator IBioSequence) WaitAndClose() {
iterator.Wait()
for len(iterator.Channel()) > 0 {
@ -240,18 +240,18 @@ func (iterator IBioSequenceBatch) WaitAndClose() {
// Finished returns 'true' value if no more data is available
// from the iterator.
func (iterator IBioSequenceBatch) Finished() bool {
func (iterator IBioSequence) Finished() bool {
return iterator.pointer.finished.IsSet()
}
func (iterator IBioSequenceBatch) SortBatches(sizes ...int) IBioSequenceBatch {
func (iterator IBioSequence) SortBatches(sizes ...int) IBioSequence {
buffsize := iterator.BufferSize()
if len(sizes) > 0 {
buffsize = sizes[0]
}
newIter := MakeIBioSequenceBatch(buffsize)
newIter := MakeIBioSequence(buffsize)
newIter.Add(1)
@ -288,14 +288,14 @@ func (iterator IBioSequenceBatch) SortBatches(sizes ...int) IBioSequenceBatch {
}
func (iterator IBioSequenceBatch) Concat(iterators ...IBioSequenceBatch) IBioSequenceBatch {
func (iterator IBioSequence) Concat(iterators ...IBioSequence) IBioSequence {
if len(iterators) == 0 {
return iterator
}
buffsize := iterator.BufferSize()
newIter := MakeIBioSequenceBatch(buffsize)
newIter := MakeIBioSequence(buffsize)
newIter.Add(1)
@ -333,7 +333,7 @@ func (iterator IBioSequenceBatch) Concat(iterators ...IBioSequenceBatch) IBioSeq
return newIter
}
func (iterator IBioSequenceBatch) Pool(iterators ...IBioSequenceBatch) IBioSequenceBatch {
func (iterator IBioSequence) Pool(iterators ...IBioSequence) IBioSequence {
niterator := len(iterators) + 1
@ -343,7 +343,7 @@ func (iterator IBioSequenceBatch) Pool(iterators ...IBioSequenceBatch) IBioSeque
nextCounter := goutils.AtomicCounter()
buffsize := iterator.BufferSize()
newIter := MakeIBioSequenceBatch(buffsize)
newIter := MakeIBioSequence(buffsize)
newIter.Add(niterator)
@ -351,7 +351,7 @@ func (iterator IBioSequenceBatch) Pool(iterators ...IBioSequenceBatch) IBioSeque
newIter.WaitAndClose()
}()
ff := func(iterator IBioSequenceBatch) {
ff := func(iterator IBioSequence) {
for iterator.Next() {
s := iterator.Get()
@ -372,14 +372,14 @@ func (iterator IBioSequenceBatch) Pool(iterators ...IBioSequenceBatch) IBioSeque
// IBioSequenceBatch with every batches having the same size
// indicated in parameter. Rebatching implies to sort the
// source IBioSequenceBatch.
func (iterator IBioSequenceBatch) Rebatch(size int, sizes ...int) IBioSequenceBatch {
func (iterator IBioSequence) Rebatch(size int, sizes ...int) IBioSequence {
buffsize := iterator.BufferSize()
if len(sizes) > 0 {
buffsize = sizes[0]
}
newIter := MakeIBioSequenceBatch(buffsize)
newIter := MakeIBioSequence(buffsize)
newIter.Add(1)
@ -418,7 +418,7 @@ func (iterator IBioSequenceBatch) Rebatch(size int, sizes ...int) IBioSequenceBa
return newIter
}
func (iterator IBioSequenceBatch) Recycle() {
func (iterator IBioSequence) Recycle() {
log.Debugln("Start recycling of Bioseq objects")
recycled := 0
@ -434,14 +434,14 @@ func (iterator IBioSequenceBatch) Recycle() {
log.Debugf("End of the recycling of %d Bioseq objects", recycled)
}
func (iterator IBioSequenceBatch) Consume() {
func (iterator IBioSequence) Consume() {
for iterator.Next() {
batch := iterator.Get()
batch.Recycle()
}
}
func (iterator IBioSequenceBatch) Count(recycle bool) (int, int, int) {
func (iterator IBioSequence) Count(recycle bool) (int, int, int) {
variants := 0
reads := 0
nucleotides := 0
@ -465,7 +465,7 @@ func (iterator IBioSequenceBatch) Count(recycle bool) (int, int, int) {
return variants, reads, nucleotides
}
func (iterator IBioSequenceBatch) PairWith(reverse IBioSequenceBatch,
func (iterator IBioSequence) PairWith(reverse IBioSequence,
sizes ...int) IPairedBioSequenceBatch {
buffsize := iterator.BufferSize()
batchsize := 5000
@ -510,16 +510,16 @@ func (iterator IBioSequenceBatch) PairWith(reverse IBioSequenceBatch,
// A function that takes a predicate and returns two IBioSequenceBatch iterators.
// Sequences extracted from the input iterator are distributed among both the
// iterator following the predicate value.
func (iterator IBioSequenceBatch) DivideOn(predicate obiseq.SequencePredicate,
size int, sizes ...int) (IBioSequenceBatch, IBioSequenceBatch) {
func (iterator IBioSequence) DivideOn(predicate obiseq.SequencePredicate,
size int, sizes ...int) (IBioSequence, IBioSequence) {
buffsize := iterator.BufferSize()
if len(sizes) > 0 {
buffsize = sizes[0]
}
trueIter := MakeIBioSequenceBatch(buffsize)
falseIter := MakeIBioSequenceBatch(buffsize)
trueIter := MakeIBioSequence(buffsize)
falseIter := MakeIBioSequence(buffsize)
trueIter.Add(1)
falseIter.Add(1)
@ -578,8 +578,8 @@ func (iterator IBioSequenceBatch) DivideOn(predicate obiseq.SequencePredicate,
// Filtering a batch of sequences.
// A function that takes a predicate and a batch of sequences and returns a filtered batch of sequences.
func (iterator IBioSequenceBatch) FilterOn(predicate obiseq.SequencePredicate,
size int, sizes ...int) IBioSequenceBatch {
func (iterator IBioSequence) FilterOn(predicate obiseq.SequencePredicate,
size int, sizes ...int) IBioSequence {
buffsize := iterator.BufferSize()
nworkers := 4
@ -591,7 +591,7 @@ func (iterator IBioSequenceBatch) FilterOn(predicate obiseq.SequencePredicate,
buffsize = sizes[1]
}
trueIter := MakeIBioSequenceBatch(buffsize)
trueIter := MakeIBioSequence(buffsize)
trueIter.Add(nworkers)
@ -599,7 +599,7 @@ func (iterator IBioSequenceBatch) FilterOn(predicate obiseq.SequencePredicate,
trueIter.WaitAndClose()
}()
ff := func(iterator IBioSequenceBatch) {
ff := func(iterator IBioSequence) {
// iterator = iterator.SortBatches()
for iterator.Next() {
@ -633,7 +633,7 @@ func (iterator IBioSequenceBatch) FilterOn(predicate obiseq.SequencePredicate,
// Load all sequences availables from an IBioSequenceBatch iterator into
// a large obiseq.BioSequenceSlice.
func (iterator IBioSequenceBatch) Load() obiseq.BioSequenceSlice {
func (iterator IBioSequence) Load() obiseq.BioSequenceSlice {
chunck := obiseq.MakeBioSequenceSlice()
for iterator.Next() {
@ -648,7 +648,7 @@ func (iterator IBioSequenceBatch) Load() obiseq.BioSequenceSlice {
// It takes a slice of BioSequence objects, and returns an iterator that will return batches of
// BioSequence objects
func IBatchOver(data obiseq.BioSequenceSlice,
size int, sizes ...int) IBioSequenceBatch {
size int, sizes ...int) IBioSequence {
buffsize := 0
@ -656,7 +656,7 @@ func IBatchOver(data obiseq.BioSequenceSlice,
buffsize = sizes[1]
}
newIter := MakeIBioSequenceBatch(buffsize)
newIter := MakeIBioSequence(buffsize)
newIter.Add(1)

View File

@ -8,19 +8,19 @@ import (
)
type IDistribute struct {
outputs map[int]IBioSequenceBatch
outputs map[int]IBioSequence
news chan int
classifier *obiseq.BioSequenceClassifier
lock *sync.Mutex
}
func (dist *IDistribute) Outputs(key int) (IBioSequenceBatch, error) {
func (dist *IDistribute) Outputs(key int) (IBioSequence, error) {
dist.lock.Lock()
iter, ok := dist.outputs[key]
dist.lock.Unlock()
if !ok {
return NilIBioSequenceBatch, fmt.Errorf("code %d unknown", key)
return NilIBioSequence, fmt.Errorf("code %d unknown", key)
}
return iter, nil
@ -34,11 +34,11 @@ func (dist *IDistribute) Classifier() *obiseq.BioSequenceClassifier {
return dist.classifier
}
func (iterator IBioSequenceBatch) Distribute(class *obiseq.BioSequenceClassifier, sizes ...int) IDistribute {
func (iterator IBioSequence) Distribute(class *obiseq.BioSequenceClassifier, sizes ...int) IDistribute {
batchsize := 5000
buffsize := 2
outputs := make(map[int]IBioSequenceBatch, 100)
outputs := make(map[int]IBioSequence, 100)
slices := make(map[int]*obiseq.BioSequenceSlice, 100)
orders := make(map[int]int, 100)
news := make(chan int)
@ -80,7 +80,7 @@ func (iterator IBioSequenceBatch) Distribute(class *obiseq.BioSequenceClassifier
orders[key] = 0
lock.Lock()
outputs[key] = MakeIBioSequenceBatch(buffsize)
outputs[key] = MakeIBioSequence(buffsize)
lock.Unlock()
news <- key

View File

@ -2,7 +2,7 @@ package obiiter
import "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
func (iterator IBioSequenceBatch) IMergeSequenceBatch(na string, statsOn []string, sizes ...int) IBioSequenceBatch {
func (iterator IBioSequence) IMergeSequenceBatch(na string, statsOn []string, sizes ...int) IBioSequence {
batchsize := 100
buffsize := iterator.BufferSize()
@ -13,7 +13,7 @@ func (iterator IBioSequenceBatch) IMergeSequenceBatch(na string, statsOn []strin
buffsize = sizes[1]
}
newIter := MakeIBioSequenceBatch(buffsize)
newIter := MakeIBioSequence(buffsize)
newIter.Add(1)
@ -41,7 +41,7 @@ func (iterator IBioSequenceBatch) IMergeSequenceBatch(na string, statsOn []strin
}
func MergePipe(na string, statsOn []string, sizes ...int) Pipeable {
f := func(iterator IBioSequenceBatch) IBioSequenceBatch {
f := func(iterator IBioSequence) IBioSequence {
return iterator.IMergeSequenceBatch(na, statsOn, sizes...)
}

View File

@ -1,12 +1,11 @@
package obiiter
type Pipeable func(input IBioSequence) IBioSequence
type Pipeable func(input IBioSequenceBatch) IBioSequenceBatch
func Pipeline(start Pipeable,parts ...Pipeable) Pipeable {
p := func (input IBioSequenceBatch) IBioSequenceBatch {
func Pipeline(start Pipeable, parts ...Pipeable) Pipeable {
p := func(input IBioSequence) IBioSequence {
data := start(input)
for _,part := range parts {
for _, part := range parts {
data = part(data)
}
return data
@ -15,17 +14,16 @@ func Pipeline(start Pipeable,parts ...Pipeable) Pipeable {
return p
}
func (input IBioSequenceBatch) Pipe(start Pipeable, parts ...Pipeable) IBioSequenceBatch {
p := Pipeline(start,parts...)
func (input IBioSequence) Pipe(start Pipeable, parts ...Pipeable) IBioSequence {
p := Pipeline(start, parts...)
return p(input)
}
type Teeable func(input IBioSequence) (IBioSequence, IBioSequence)
type Teeable func(input IBioSequenceBatch) (IBioSequenceBatch,IBioSequenceBatch)
func (input IBioSequenceBatch) CopyTee() (IBioSequenceBatch,IBioSequenceBatch) {
first := MakeIBioSequenceBatch()
second:= MakeIBioSequenceBatch()
func (input IBioSequence) CopyTee() (IBioSequence, IBioSequence) {
first := MakeIBioSequence()
second := MakeIBioSequence()
first.Add(1)
@ -36,11 +34,11 @@ func (input IBioSequenceBatch) CopyTee() (IBioSequenceBatch,IBioSequenceBatch) {
go func() {
for input.Next() {
b:=input.Get()
b := input.Get()
first.Push(b)
second.Push(b)
}
}()
return first,second
return first, second
}

View File

@ -6,8 +6,8 @@ import (
"github.com/schollz/progressbar/v3"
)
func (iterator IBioSequenceBatch) Speed(message ...string) IBioSequenceBatch {
newIter := MakeIBioSequenceBatch()
func (iterator IBioSequence) Speed(message ...string) IBioSequence {
newIter := MakeIBioSequence()
newIter.Add(1)
@ -51,7 +51,7 @@ func (iterator IBioSequenceBatch) Speed(message ...string) IBioSequenceBatch {
}
func SpeedPipe(message ...string) Pipeable {
f := func(iterator IBioSequenceBatch) IBioSequenceBatch {
f := func(iterator IBioSequence) IBioSequence {
return iterator.Speed(message...)
}

View File

@ -27,7 +27,7 @@ func AnnotatorToSeqWorker(function SeqAnnotator) SeqWorker {
// Moreover the SeqWorker function, the method accepted two optional integer parameters.
// - First is allowing to indicates the number of workers running in parallele (default 4)
// - The second the size of the chanel buffer. By default set to the same value than the input buffer.
func (iterator IBioSequenceBatch) MakeIWorker(worker SeqWorker, sizes ...int) IBioSequenceBatch {
func (iterator IBioSequence) MakeIWorker(worker SeqWorker, sizes ...int) IBioSequence {
nworkers := 4
buffsize := iterator.BufferSize()
@ -39,7 +39,7 @@ func (iterator IBioSequenceBatch) MakeIWorker(worker SeqWorker, sizes ...int) IB
buffsize = sizes[1]
}
newIter := MakeIBioSequenceBatch(buffsize)
newIter := MakeIBioSequence(buffsize)
newIter.Add(nworkers)
@ -49,7 +49,7 @@ func (iterator IBioSequenceBatch) MakeIWorker(worker SeqWorker, sizes ...int) IB
}()
f := func(iterator IBioSequenceBatch) {
f := func(iterator IBioSequence) {
for iterator.Next() {
batch := iterator.Get()
for i, seq := range batch.slice {
@ -69,8 +69,8 @@ func (iterator IBioSequenceBatch) MakeIWorker(worker SeqWorker, sizes ...int) IB
return newIter
}
func (iterator IBioSequenceBatch) MakeIConditionalWorker(predicate obiseq.SequencePredicate,
worker SeqWorker, sizes ...int) IBioSequenceBatch {
func (iterator IBioSequence) MakeIConditionalWorker(predicate obiseq.SequencePredicate,
worker SeqWorker, sizes ...int) IBioSequence {
nworkers := 4
buffsize := iterator.BufferSize()
@ -82,7 +82,7 @@ func (iterator IBioSequenceBatch) MakeIConditionalWorker(predicate obiseq.Sequen
buffsize = sizes[1]
}
newIter := MakeIBioSequenceBatch(buffsize)
newIter := MakeIBioSequence(buffsize)
newIter.Add(nworkers)
@ -92,7 +92,7 @@ func (iterator IBioSequenceBatch) MakeIConditionalWorker(predicate obiseq.Sequen
}()
f := func(iterator IBioSequenceBatch) {
f := func(iterator IBioSequence) {
for iterator.Next() {
batch := iterator.Get()
for i, seq := range batch.slice {
@ -114,7 +114,7 @@ func (iterator IBioSequenceBatch) MakeIConditionalWorker(predicate obiseq.Sequen
return newIter
}
func (iterator IBioSequenceBatch) MakeISliceWorker(worker SeqSliceWorker, sizes ...int) IBioSequenceBatch {
func (iterator IBioSequence) MakeISliceWorker(worker SeqSliceWorker, sizes ...int) IBioSequence {
nworkers := 4
buffsize := iterator.BufferSize()
@ -126,7 +126,7 @@ func (iterator IBioSequenceBatch) MakeISliceWorker(worker SeqSliceWorker, sizes
buffsize = sizes[1]
}
newIter := MakeIBioSequenceBatch(buffsize)
newIter := MakeIBioSequence(buffsize)
newIter.Add(nworkers)
@ -135,7 +135,7 @@ func (iterator IBioSequenceBatch) MakeISliceWorker(worker SeqSliceWorker, sizes
log.Println("End of the batch slice workers")
}()
f := func(iterator IBioSequenceBatch) {
f := func(iterator IBioSequence) {
for iterator.Next() {
batch := iterator.Get()
batch.slice = worker(batch.slice)
@ -154,7 +154,7 @@ func (iterator IBioSequenceBatch) MakeISliceWorker(worker SeqSliceWorker, sizes
}
func WorkerPipe(worker SeqWorker, sizes ...int) Pipeable {
f := func(iterator IBioSequenceBatch) IBioSequenceBatch {
f := func(iterator IBioSequence) IBioSequence {
return iterator.MakeIWorker(worker, sizes...)
}
@ -162,7 +162,7 @@ func WorkerPipe(worker SeqWorker, sizes ...int) Pipeable {
}
func SliceWorkerPipe(worker SeqSliceWorker, sizes ...int) Pipeable {
f := func(iterator IBioSequenceBatch) IBioSequenceBatch {
f := func(iterator IBioSequence) IBioSequence {
return iterator.MakeISliceWorker(worker, sizes...)
}

193
pkg/obiseq/attributes.go Normal file
View File

@ -0,0 +1,193 @@
package obiseq
import (
"fmt"
"strconv"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
log "github.com/sirupsen/logrus"
)
// A method that returns the value of the key in the annotation map.
func (s *BioSequence) GetAttribute(key string) (interface{}, bool) {
var val interface{}
ok := s.annotations != nil
if ok {
val, ok = s.annotations[key]
}
return val, ok
}
// A method that sets the value of the key in the annotation map.
func (s *BioSequence) SetAttribute(key string, value interface{}) {
annot := s.Annotations()
annot[key] = value
}
// A method that returns the value of the key in the annotation map.
func (s *BioSequence) GetIntAttribute(key string) (int, bool) {
var val int
var err error
v, ok := s.GetAttribute(key)
if ok {
val, err = goutils.InterfaceToInt(v)
ok = err == nil
}
return val, ok
}
// Deleting the key from the annotation map.
func (s *BioSequence) DeleteAttribute(key string) {
delete(s.Annotations(),key)
}
// Renaming the key in the annotation map.
func (s *BioSequence) RenameAttribute(newName, oldName string) {
val,ok := s.GetAttribute(oldName)
if (ok) {
s.SetAttribute(newName,val)
s.DeleteAttribute(oldName)
}
}
// A method that returns the value of the key in the annotation map.
func (s *BioSequence) GetNumericAttribute(key string) (float64, bool) {
var val float64
var err error
v, ok := s.GetAttribute(key)
if ok {
val, err = goutils.InterfaceToFloat64(v)
ok = err == nil
}
return val, ok
}
// A method that returns the value of the key in the annotation map.
func (s *BioSequence) GetStringAttribute(key string) (string, bool) {
var val string
v, ok := s.GetAttribute(key)
if ok {
val = fmt.Sprint(v)
}
return val, ok
}
// A method that returns the value of the key in the annotation map.
func (s *BioSequence) GetBoolAttribute(key string) (bool, bool) {
var val bool
var err error
v, ok := s.GetAttribute(key)
if ok {
val, err = goutils.InterfaceToBool(v)
ok = err == nil
}
return val, ok
}
func (s *BioSequence) GetIntMap(key string) (map[string]int, bool) {
var val map[string]int
var err error
v, ok := s.GetAttribute(key)
if ok {
val, err = goutils.InterfaceToIntMap(v)
ok = err == nil
}
return val, ok
}
// Returning the number of times the sequence has been observed.
func (s *BioSequence) Count() int {
count, ok := s.GetIntAttribute("count")
if !ok {
count = 1
}
return count
}
// Setting the number of times the sequence has been observed.
func (s *BioSequence) SetCount(count int) {
annot := s.Annotations()
annot["count"] = count
}
// Returning the taxid of the sequence.
func (s *BioSequence) Taxid() int {
taxid, ok := s.GetIntAttribute("taxid")
if !ok {
taxid = 1
}
return taxid
}
// Setting the taxid of the sequence.
func (s *BioSequence) SetTaxid(taxid int) {
annot := s.Annotations()
annot["taxid"] = taxid
}
func (s *BioSequence) OBITagRefIndex() map[int]string {
var val map[int]string
i, ok := s.GetAttribute("obitag_ref_index")
if !ok {
return nil
}
switch i := i.(type) {
case map[int]string:
val = i
case map[string]interface{}:
val = make(map[int]string, len(i))
for k, v := range i {
score, err := strconv.Atoi(k)
if err != nil {
log.Panicln(err)
}
val[score], err = goutils.InterfaceToString(v)
if err != nil {
log.Panicln(err)
}
}
case map[string]string:
val = make(map[int]string, len(i))
for k, v := range i {
score, err := strconv.Atoi(k)
if err != nil {
log.Panicln(err)
}
val[score] = v
}
default:
log.Panicln("value of attribute obitag_ref_index cannot be casted to a map[int]string")
}
return val
}

View File

@ -13,20 +13,17 @@ package obiseq
import (
"bytes"
"crypto/md5"
"fmt"
"strconv"
"sync/atomic"
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
)
var _NewSeq = int32(0)
var _RecycleSeq = int32(0)
var _InMemSeq = int32(0)
var _MaxInMemSeq = int32(0)
var _BioLogRate = int(100000)
// var _MaxInMemSeq = int32(0)
// var _BioLogRate = int(100000)
func LogBioSeqStatus() {
log.Debugf("Created seq : %d Destroyed : %d In Memory : %d", _NewSeq, _RecycleSeq, _InMemSeq)
@ -200,154 +197,11 @@ func (s *BioSequence) Annotations() Annotation {
return s.annotations
}
// A method that returns the value of the key in the annotation map.
func (s *BioSequence) GetAttribute(key string) (interface{}, bool) {
var val interface{}
ok := s.annotations != nil
if ok {
val, ok = s.annotations[key]
}
return val, ok
}
func (s *BioSequence) SetAttribute(key string, value interface{}) {
annot := s.Annotations()
annot[key] = value
}
// A method that returns the value of the key in the annotation map.
func (s *BioSequence) GetIntAttribute(key string) (int, bool) {
var val int
var err error
v, ok := s.GetAttribute(key)
if ok {
val, err = goutils.InterfaceToInt(v)
ok = err == nil
}
return val, ok
}
// A method that returns the value of the key in the annotation map.
func (s *BioSequence) GetStringAttribute(key string) (string, bool) {
var val string
v, ok := s.GetAttribute(key)
if ok {
val = fmt.Sprint(v)
}
return val, ok
}
// A method that returns the value of the key in the annotation map.
func (s *BioSequence) GetBool(key string) (bool, bool) {
var val bool
var err error
v, ok := s.GetAttribute(key)
if ok {
val, err = goutils.InterfaceToBool(v)
ok = err == nil
}
return val, ok
}
func (s *BioSequence) GetIntMap(key string) (map[string]int, bool) {
var val map[string]int
var err error
v, ok := s.GetAttribute(key)
if ok {
val, err = goutils.InterfaceToIntMap(v)
ok = err == nil
}
return val, ok
}
// Returning the MD5 hash of the sequence.
func (s *BioSequence) MD5() [16]byte {
return md5.Sum(s.sequence)
}
// Returning the number of times the sequence has been observed.
func (s *BioSequence) Count() int {
count, ok := s.GetIntAttribute("count")
if !ok {
count = 1
}
return count
}
// Returning the taxid of the sequence.
func (s *BioSequence) Taxid() int {
taxid, ok := s.GetIntAttribute("taxid")
if !ok {
taxid = 1
}
return taxid
}
func (s *BioSequence) OBITagRefIndex() map[int]string {
var val map[int]string
i, ok := s.GetAttribute("obitag_ref_index")
if !ok {
return nil
}
switch i := i.(type) {
case map[int]string:
val = i
case map[string]interface{}:
val = make(map[int]string, len(i))
for k, v := range i {
score, err := strconv.Atoi(k)
if err != nil {
log.Panicln(err)
}
val[score], err = goutils.InterfaceToString(v)
if err != nil {
log.Panicln(err)
}
}
case map[string]string:
val = make(map[int]string, len(i))
for k, v := range i {
score, err := strconv.Atoi(k)
if err != nil {
log.Panicln(err)
}
val[score] = v
}
default:
log.Panicln("value of attribute obitag_ref_index cannot be casted to a map[int]string")
}
return val
}
func (s *BioSequence) SetTaxid(taxid int) {
annot := s.Annotations()
annot["taxid"] = taxid
}
// Setting the id of the BioSequence.
func (s *BioSequence) SetId(id string) {
s.id = id

View File

@ -0,0 +1,2 @@
package obiannotate

View File

@ -19,7 +19,7 @@ type seqPCR struct {
SonCount int
AddedSons int
Edges []Edge
Cluster map[int]bool // used as the set of head sequences associated to that sequence
Cluster map[int]bool // used as the set of head sequences associated to that sequence
}
// buildSamples sorts the sequences by samples
@ -58,7 +58,7 @@ func buildSamples(dataset obiseq.BioSequenceSlice,
func annotateOBIClean(dataset obiseq.BioSequenceSlice,
sample map[string]*([]*seqPCR),
tag, NAValue string) obiiter.IBioSequenceBatch {
tag, NAValue string) obiiter.IBioSequence {
batchsize := 1000
var annot = func(data obiseq.BioSequenceSlice) obiseq.BioSequenceSlice {
@ -207,7 +207,6 @@ func GetCluster(sequence *obiseq.BioSequence) map[string]string {
return cluster
}
// func Cluster(sample map[string]*([]*seqPCR)) {
// for _, graph := range sample {
// for _, s := range *graph {
@ -215,7 +214,7 @@ func GetCluster(sequence *obiseq.BioSequence) map[string]string {
// if len(s.Edges) > 0 {
// for _, f := range s.Edges {
// }
// }
// } else {
// cluster
// }
@ -286,7 +285,7 @@ func Weight(sequence *obiseq.BioSequence) map[string]int {
return weight
}
func IOBIClean(itertator obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch {
func IOBIClean(itertator obiiter.IBioSequence) obiiter.IBioSequence {
db := itertator.Load()
@ -318,7 +317,6 @@ func IOBIClean(itertator obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch {
}
}
Mutation(samples)
pbopt := make([]progressbar.Option, 0, 5)
@ -352,7 +350,6 @@ func IOBIClean(itertator obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch {
EmpiricalDistCsv(RatioTableFilename(), all_ratio)
}
iter := annotateOBIClean(db, samples, SampleAttribute(), "NA")
if OnlyHead() {

View File

@ -67,9 +67,9 @@ func _ExpandListOfFiles(check_ext bool, filenames ...string) ([]string, error) {
return list_of_files, nil
}
func ReadBioSequences(filenames ...string) (obiiter.IBioSequenceBatch, error) {
var iterator obiiter.IBioSequenceBatch
var reader func(string, ...obiformats.WithOption) (obiiter.IBioSequenceBatch, error)
func ReadBioSequences(filenames ...string) (obiiter.IBioSequence, error) {
var iterator obiiter.IBioSequence
var reader func(string, ...obiformats.WithOption) (obiiter.IBioSequence, error)
opts := make([]obiformats.WithOption, 0, 10)
@ -109,7 +109,7 @@ func ReadBioSequences(filenames ...string) (obiiter.IBioSequenceBatch, error) {
list_of_files, err := _ExpandListOfFiles(false, filenames...)
if err != nil {
return obiiter.NilIBioSequenceBatch, err
return obiiter.NilIBioSequence, err
}
switch CLIInputFormat() {
@ -140,7 +140,7 @@ func ReadBioSequences(filenames ...string) (obiiter.IBioSequenceBatch, error) {
iterator, err = reader(list_of_files[0], opts...)
if err != nil {
return obiiter.NilIBioSequenceBatch, err
return obiiter.NilIBioSequence, err
}
}

View File

@ -8,10 +8,10 @@ import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
)
func WriteBioSequences(iterator obiiter.IBioSequenceBatch,
terminalAction bool, filenames ...string) (obiiter.IBioSequenceBatch, error) {
func WriteBioSequences(iterator obiiter.IBioSequence,
terminalAction bool, filenames ...string) (obiiter.IBioSequence, error) {
var newIter obiiter.IBioSequenceBatch
var newIter obiiter.IBioSequence
opts := make([]obiformats.WithOption, 0, 10)
@ -62,12 +62,12 @@ func WriteBioSequences(iterator obiiter.IBioSequenceBatch,
if err != nil {
log.Fatalf("Write file error: %v", err)
return obiiter.NilIBioSequenceBatch, err
return obiiter.NilIBioSequence, err
}
if terminalAction {
newIter.Recycle()
return obiiter.NilIBioSequenceBatch, nil
return obiiter.NilIBioSequence, nil
}
return newIter, nil

View File

@ -9,7 +9,7 @@ import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
)
func DistributeSequence(sequences obiiter.IBioSequenceBatch) {
func DistributeSequence(sequences obiiter.IBioSequence) {
opts := make([]obiformats.WithOption, 0, 10)

View File

@ -8,14 +8,14 @@ import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
)
func IFilterSequence(iterator obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch {
var newIter obiiter.IBioSequenceBatch
func IFilterSequence(iterator obiiter.IBioSequence) obiiter.IBioSequence {
var newIter obiiter.IBioSequence
predicate := CLISequenceSelectionPredicate()
if predicate != nil {
if CLISaveDiscardedSequences() {
var discarded obiiter.IBioSequenceBatch
var discarded obiiter.IBioSequence
log.Printf("Discarded sequences saved in file: %s\n", CLIDiscardedFileName())
newIter, discarded = iterator.DivideOn(predicate,

View File

@ -10,7 +10,7 @@ import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
)
func IExtractBarcode(iterator obiiter.IBioSequenceBatch) (obiiter.IBioSequenceBatch, error) {
func IExtractBarcode(iterator obiiter.IBioSequence) (obiiter.IBioSequence, error) {
opts := make([]obingslibrary.WithOption, 0, 10)
@ -37,7 +37,7 @@ func IExtractBarcode(iterator obiiter.IBioSequenceBatch) (obiiter.IBioSequenceBa
newIter = newIter.Rebatch(obioptions.CLIBatchSize())
}
var unidentified obiiter.IBioSequenceBatch
var unidentified obiiter.IBioSequence
if CLIUnidentifiedFileName() != "" {
log.Printf("Unassigned sequences saved in file: %s\n", CLIUnidentifiedFileName())
unidentified, newIter = newIter.DivideOn(obiseq.HasAttribute("demultiplex_error"),

View File

@ -206,7 +206,7 @@ func AssemblePESequences(seqA, seqB *obiseq.BioSequence,
func IAssemblePESequencesBatch(iterator obiiter.IPairedBioSequenceBatch,
gap float64, delta, minOverlap int,
minIdentity float64,
withStats bool, sizes ...int) obiiter.IBioSequenceBatch {
withStats bool, sizes ...int) obiiter.IBioSequence {
nworkers := runtime.NumCPU() * 3 / 2
buffsize := iterator.BufferSize()
@ -219,7 +219,7 @@ func IAssemblePESequencesBatch(iterator obiiter.IPairedBioSequenceBatch,
buffsize = sizes[1]
}
newIter := obiiter.MakeIBioSequenceBatch(buffsize)
newIter := obiiter.MakeIBioSequence(buffsize)
newIter.Add(nworkers)

View File

@ -9,7 +9,7 @@ import (
// PCR iterates over sequences provided by a obiseq.IBioSequenceBatch
// and returns an other obiseq.IBioSequenceBatch distributing
// obiseq.BioSequenceBatch containing the selected amplicon sequences.
func PCR(iterator obiiter.IBioSequenceBatch) (obiiter.IBioSequenceBatch, error) {
func PCR(iterator obiiter.IBioSequence) (obiiter.IBioSequence, error) {
opts := make([]obiapat.WithOption, 0, 10)

View File

@ -32,7 +32,7 @@ func IndexSequence(seqidx int,
// r := 0
// w := 0
for i, ref := range references {
lcs, alilength := obialign.FastLCSScore(sequence, ref, -1 , &matrix)
lcs, alilength := obialign.FastLCSScore(sequence, ref, -1, &matrix)
score[i] = alilength - lcs
}
@ -88,7 +88,7 @@ func IndexSequence(seqidx int,
return obitag_index
}
func IndexReferenceDB(iterator obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch {
func IndexReferenceDB(iterator obiiter.IBioSequence) obiiter.IBioSequence {
references := iterator.Load()
refcounts := make(
@ -118,7 +118,7 @@ func IndexReferenceDB(iterator obiiter.IBioSequenceBatch) obiiter.IBioSequenceBa
bar := progressbar.NewOptions(len(references), pbopt...)
limits := make(chan [2]int)
indexed := obiiter.MakeIBioSequenceBatch()
indexed := obiiter.MakeIBioSequence()
go func() {
for i := 0; i < len(references); i += 10 {
limits <- [2]int{i, goutils.MinInt(i+10, len(references))}

View File

@ -171,7 +171,7 @@ func IdentifySeqWorker(references obiseq.BioSequenceSlice,
}
}
func AssignTaxonomy(iterator obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch {
func AssignTaxonomy(iterator obiiter.IBioSequence) obiiter.IBioSequence {
references := CLIRefDB()
refcounts := make(

View File

@ -8,7 +8,7 @@ import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
)
func Unique(sequences obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch {
func Unique(sequences obiiter.IBioSequence) obiiter.IBioSequence {
options := make([]obichunk.WithOption, 0, 30)

BIN
sample/.DS_Store vendored Normal file

Binary file not shown.