Some code refactoring, a new version of obiuniq more efficient in memory and a first make file allowing to build obitools

This commit is contained in:
2022-02-24 07:08:40 +01:00
parent 2e7c1834b0
commit eaf65fbcce
39 changed files with 1225 additions and 241 deletions

View File

@ -2,7 +2,6 @@ package main
import ( import (
"fmt" "fmt"
"log"
"os" "os"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
@ -34,20 +33,8 @@ func main() {
_, args, _ := optionParser(os.Args) _, args, _ := optionParser(os.Args)
fs, _ := obiconvert.ReadBioSequences(args...) fs, _ := obiconvert.ReadBioSequencesBatch(args...)
nread := 0 nread, nvariant, nsymbol := fs.Count(true)
nvariant := 0
nsymbol := 0
for fs.Next() {
s := fs.Get()
if s==nil {
log.Panicln("Read sequence is nil")
}
nread += s.Count()
nvariant++
nsymbol += s.Length()
s.Recycle()
}
if obicount.CLIIsPrintingVariantCount() { if obicount.CLIIsPrintingVariantCount() {
fmt.Printf(" %d", nvariant) fmt.Printf(" %d", nvariant)

View File

@ -4,6 +4,7 @@ import (
"log" "log"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils" "git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
) )
@ -429,7 +430,7 @@ func PCRSlice(sequences obiseq.BioSequenceSlice,
// PCRSliceWorker is a worker function builder which produce // PCRSliceWorker is a worker function builder which produce
// job function usable by the obiseq.MakeISliceWorker function. // job function usable by the obiseq.MakeISliceWorker function.
func PCRSliceWorker(options ...WithOption) obiseq.SeqSliceWorker { func PCRSliceWorker(options ...WithOption) obiiter.SeqSliceWorker {
opt := MakeOptions(options) opt := MakeOptions(options)
worker := func(sequences obiseq.BioSequenceSlice) obiseq.BioSequenceSlice { worker := func(sequences obiseq.BioSequenceSlice) obiseq.BioSequenceSlice {

View File

@ -8,6 +8,7 @@ import (
"path/filepath" "path/filepath"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
) )
@ -33,12 +34,12 @@ func find(root, ext string) []string {
return a return a
} }
func ISequenceChunkOnDisk(iterator obiseq.IBioSequenceBatch, func ISequenceChunkOnDisk(iterator obiiter.IBioSequenceBatch,
classifier *obiseq.BioSequenceClassifier, classifier *obiseq.BioSequenceClassifier,
sizes ...int) (obiseq.IBioSequenceBatch, error) { sizes ...int) (obiiter.IBioSequenceBatch, error) {
dir, err := tempDir() dir, err := tempDir()
if err != nil { if err != nil {
return obiseq.NilIBioSequenceBatch, err return obiiter.NilIBioSequenceBatch, err
} }
bufferSize := iterator.BufferSize() bufferSize := iterator.BufferSize()
@ -47,7 +48,7 @@ func ISequenceChunkOnDisk(iterator obiseq.IBioSequenceBatch,
bufferSize = sizes[0] bufferSize = sizes[0]
} }
newIter := obiseq.MakeIBioSequenceBatch(bufferSize) newIter := obiiter.MakeIBioSequenceBatch(bufferSize)
newIter.Add(1) newIter.Add(1)
@ -86,7 +87,7 @@ func ISequenceChunkOnDisk(iterator obiseq.IBioSequenceBatch,
b.Recycle() b.Recycle()
} }
newIter.Push(obiseq.MakeBioSequenceBatch(order, chunck)) newIter.Push(obiiter.MakeBioSequenceBatch(order, chunck))
} }

View File

@ -4,12 +4,13 @@ import (
"log" "log"
"sync" "sync"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
) )
func ISequenceChunk(iterator obiseq.IBioSequenceBatch, func ISequenceChunk(iterator obiiter.IBioSequenceBatch,
classifier *obiseq.BioSequenceClassifier, classifier *obiseq.BioSequenceClassifier,
sizes ...int) (obiseq.IBioSequenceBatch, error) { sizes ...int) (obiiter.IBioSequenceBatch, error) {
bufferSize := iterator.BufferSize() bufferSize := iterator.BufferSize()
@ -17,7 +18,7 @@ func ISequenceChunk(iterator obiseq.IBioSequenceBatch,
bufferSize = sizes[0] bufferSize = sizes[0]
} }
newIter := obiseq.MakeIBioSequenceBatch(bufferSize) newIter := obiiter.MakeIBioSequenceBatch(bufferSize)
newIter.Add(1) newIter.Add(1)
@ -64,7 +65,7 @@ func ISequenceChunk(iterator obiseq.IBioSequenceBatch,
for _, chunck := range chunks { for _, chunck := range chunks {
if len(*chunck) > 0 { if len(*chunck) > 0 {
newIter.Push(obiseq.MakeBioSequenceBatch(order, *chunck)) newIter.Push(obiiter.MakeBioSequenceBatch(order, *chunck))
order++ order++
} }

View File

@ -5,6 +5,7 @@ import (
"sort" "sort"
"sync/atomic" "sync/atomic"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
) )
@ -54,9 +55,9 @@ func (by _By) Sort(seqs []sSS) {
// End of the sort interface // End of the sort interface
// //
func ISequenceSubChunk(iterator obiseq.IBioSequenceBatch, func ISequenceSubChunk(iterator obiiter.IBioSequenceBatch,
classifier *obiseq.BioSequenceClassifier, classifier *obiseq.BioSequenceClassifier,
sizes ...int) (obiseq.IBioSequenceBatch, error) { sizes ...int) (obiiter.IBioSequenceBatch, error) {
bufferSize := iterator.BufferSize() bufferSize := iterator.BufferSize()
nworkers := 4 nworkers := 4
@ -69,7 +70,7 @@ func ISequenceSubChunk(iterator obiseq.IBioSequenceBatch,
bufferSize = sizes[1] bufferSize = sizes[1]
} }
newIter := obiseq.MakeIBioSequenceBatch(bufferSize) newIter := obiiter.MakeIBioSequenceBatch(bufferSize)
newIter.Add(nworkers) newIter.Add(nworkers)
@ -86,7 +87,7 @@ func ISequenceSubChunk(iterator obiseq.IBioSequenceBatch,
return neworder return neworder
} }
ff := func(iterator obiseq.IBioSequenceBatch, ff := func(iterator obiiter.IBioSequenceBatch,
classifier *obiseq.BioSequenceClassifier) { classifier *obiseq.BioSequenceClassifier) {
ordered := make([]sSS, 100) ordered := make([]sSS, 100)
@ -121,7 +122,7 @@ func ISequenceSubChunk(iterator obiseq.IBioSequenceBatch,
ss := obiseq.MakeBioSequenceSlice() ss := obiseq.MakeBioSequenceSlice()
for i, v := range ordered { for i, v := range ordered {
if v.code != last { if v.code != last {
newIter.Push(obiseq.MakeBioSequenceBatch(nextOrder(), ss)) newIter.Push(obiiter.MakeBioSequenceBatch(nextOrder(), ss))
ss = obiseq.MakeBioSequenceSlice() ss = obiseq.MakeBioSequenceSlice()
last = v.code last = v.code
} }
@ -131,7 +132,7 @@ func ISequenceSubChunk(iterator obiseq.IBioSequenceBatch,
} }
if len(ss) > 0 { if len(ss) > 0 {
newIter.Push(obiseq.MakeBioSequenceBatch(nextOrder(), ss)) newIter.Push(obiiter.MakeBioSequenceBatch(nextOrder(), ss))
} }
} else { } else {
newIter.Push(batch.Reorder(nextOrder())) newIter.Push(batch.Reorder(nextOrder()))

View File

@ -3,26 +3,27 @@ package obichunk
import ( import (
"sync" "sync"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
) )
func IUniqueSequence(iterator obiseq.IBioSequenceBatch, func IUniqueSequence(iterator obiiter.IBioSequenceBatch,
options ...WithOption) (obiseq.IBioSequenceBatch, error) { options ...WithOption) (obiiter.IBioSequenceBatch, error) {
var err error var err error
opts := MakeOptions(options) opts := MakeOptions(options)
nworkers := opts.ParallelWorkers() nworkers := opts.ParallelWorkers()
iUnique := obiseq.MakeIBioSequenceBatch(opts.BufferSize()) iUnique := obiiter.MakeIBioSequenceBatch(opts.BufferSize())
if opts.SortOnDisk() { if opts.SortOnDisk() {
nworkers = 1 nworkers = 1
iterator, err = ISequenceChunkOnDisk(iterator, iterator, err = ISequenceChunkOnDisk(iterator,
obiseq.HashClassifier(opts.BatchCount()), obiseq.HashClassifier(opts.BatchCount()),
opts.BufferSize()) 0)
if err != nil { if err != nil {
return obiseq.NilIBioSequenceBatch, err return obiiter.NilIBioSequenceBatch, err
} }
} else { } else {
@ -31,7 +32,7 @@ func IUniqueSequence(iterator obiseq.IBioSequenceBatch,
opts.BufferSize()) opts.BufferSize())
if err != nil { if err != nil {
return obiseq.NilIBioSequenceBatch, err return obiiter.NilIBioSequenceBatch, err
} }
} }
@ -53,12 +54,12 @@ func IUniqueSequence(iterator obiseq.IBioSequenceBatch,
return neworder return neworder
} }
var ff func(obiseq.IBioSequenceBatch, *obiseq.BioSequenceClassifier, int) var ff func(obiiter.IBioSequenceBatch, *obiseq.BioSequenceClassifier, int)
cat := opts.Categories() cat := opts.Categories()
na := opts.NAValue() na := opts.NAValue()
ff = func(input obiseq.IBioSequenceBatch, ff = func(input obiiter.IBioSequenceBatch,
classifier *obiseq.BioSequenceClassifier, classifier *obiseq.BioSequenceClassifier,
icat int) { icat int) {
icat-- icat--
@ -67,9 +68,9 @@ func IUniqueSequence(iterator obiseq.IBioSequenceBatch,
1, 1,
opts.BufferSize()) opts.BufferSize())
var next obiseq.IBioSequenceBatch var next obiiter.IBioSequenceBatch
if icat >= 0 { if icat >= 0 {
next = obiseq.MakeIBioSequenceBatch(opts.BufferSize()) next = obiiter.MakeIBioSequenceBatch(opts.BufferSize())
iUnique.Add(1) iUnique.Add(1)
go ff(next, go ff(next,

View File

@ -5,15 +5,15 @@ import (
"log" "log"
"sync" "sync"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
) )
type SequenceBatchWriterToFile func(iterator obiseq.IBioSequenceBatch, type SequenceBatchWriterToFile func(iterator obiiter.IBioSequenceBatch,
filename string, filename string,
options ...WithOption) (obiseq.IBioSequenceBatch, error) options ...WithOption) (obiiter.IBioSequenceBatch, error)
func WriterDispatcher(prototypename string, func WriterDispatcher(prototypename string,
dispatcher obiseq.IDistribute, dispatcher obiiter.IDistribute,
formater SequenceBatchWriterToFile, formater SequenceBatchWriterToFile,
options ...WithOption) { options ...WithOption) {

View File

@ -10,6 +10,7 @@ import (
"strconv" "strconv"
"strings" "strings"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
) )
@ -117,7 +118,7 @@ func __read_ecopcr_bioseq__(file *__ecopcr_file__) (*obiseq.BioSequence, error)
return bseq, nil return bseq, nil
} }
func ReadEcoPCRBatch(reader io.Reader, options ...WithOption) obiseq.IBioSequenceBatch { func ReadEcoPCRBatch(reader io.Reader, options ...WithOption) obiiter.IBioSequenceBatch {
tag := make([]byte, 11) tag := make([]byte, 11)
n, _ := reader.Read(tag) n, _ := reader.Read(tag)
@ -163,7 +164,7 @@ func ReadEcoPCRBatch(reader io.Reader, options ...WithOption) obiseq.IBioSequenc
opt := MakeOptions(options) opt := MakeOptions(options)
newIter := obiseq.MakeIBioSequenceBatch(opt.BufferSize()) newIter := obiiter.MakeIBioSequenceBatch(opt.BufferSize())
newIter.Add(1) newIter.Add(1)
go func() { go func() {
@ -181,7 +182,7 @@ func ReadEcoPCRBatch(reader io.Reader, options ...WithOption) obiseq.IBioSequenc
slice = append(slice, seq) slice = append(slice, seq)
ii++ ii++
if ii >= opt.BatchSize() { if ii >= opt.BatchSize() {
newIter.Push(obiseq.MakeBioSequenceBatch(i, slice)) newIter.Push(obiiter.MakeBioSequenceBatch(i, slice))
slice = obiseq.MakeBioSequenceSlice() slice = obiseq.MakeBioSequenceSlice()
i++ i++
ii = 0 ii = 0
@ -191,7 +192,7 @@ func ReadEcoPCRBatch(reader io.Reader, options ...WithOption) obiseq.IBioSequenc
} }
if len(slice) > 0 { if len(slice) > 0 {
newIter.Push(obiseq.MakeBioSequenceBatch(i, slice)) newIter.Push(obiiter.MakeBioSequenceBatch(i, slice))
} }
newIter.Done() newIter.Done()
@ -205,12 +206,12 @@ func ReadEcoPCRBatch(reader io.Reader, options ...WithOption) obiseq.IBioSequenc
return newIter return newIter
} }
func ReadEcoPCR(reader io.Reader, options ...WithOption) obiseq.IBioSequence { func ReadEcoPCR(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
ib := ReadEcoPCRBatch(reader, options...) ib := ReadEcoPCRBatch(reader, options...)
return ib.SortBatches().IBioSequence() return ib.SortBatches().IBioSequence()
} }
func ReadEcoPCRBatchFromFile(filename string, options ...WithOption) (obiseq.IBioSequenceBatch, error) { func ReadEcoPCRBatchFromFile(filename string, options ...WithOption) (obiiter.IBioSequenceBatch, error) {
var reader io.Reader var reader io.Reader
var greader io.Reader var greader io.Reader
var err error var err error
@ -218,7 +219,7 @@ func ReadEcoPCRBatchFromFile(filename string, options ...WithOption) (obiseq.IBi
reader, err = os.Open(filename) reader, err = os.Open(filename)
if err != nil { if err != nil {
log.Printf("open file error: %+v", err) log.Printf("open file error: %+v", err)
return obiseq.NilIBioSequenceBatch, err return obiiter.NilIBioSequenceBatch, err
} }
// Test if the flux is compressed by gzip // Test if the flux is compressed by gzip
@ -230,7 +231,7 @@ func ReadEcoPCRBatchFromFile(filename string, options ...WithOption) (obiseq.IBi
return ReadEcoPCRBatch(reader, options...), nil return ReadEcoPCRBatch(reader, options...), nil
} }
func ReadEcoPCRFromFile(filename string, options ...WithOption) (obiseq.IBioSequence, error) { func ReadEcoPCRFromFile(filename string, options ...WithOption) (obiiter.IBioSequence, error) {
ib, err := ReadEcoPCRBatchFromFile(filename, options...) ib, err := ReadEcoPCRBatchFromFile(filename, options...)
return ib.SortBatches().IBioSequence(), err return ib.SortBatches().IBioSequence(), err

View File

@ -10,6 +10,7 @@ import (
"strconv" "strconv"
"strings" "strings"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
) )
@ -80,7 +81,7 @@ func _EndOfLastEntry(buff []byte) int {
return -1 return -1
} }
func _ParseEmblFile(input <-chan _FileChunk, out obiseq.IBioSequenceBatch) { func _ParseEmblFile(input <-chan _FileChunk, out obiiter.IBioSequenceBatch) {
for chunks := range input { for chunks := range input {
scanner := bufio.NewScanner(chunks.raw) scanner := bufio.NewScanner(chunks.raw)
@ -139,7 +140,7 @@ func _ParseEmblFile(input <-chan _FileChunk, out obiseq.IBioSequenceBatch) {
seqBytes = new(bytes.Buffer) seqBytes = new(bytes.Buffer)
} }
} }
out.Push(obiseq.MakeBioSequenceBatch(order, sequences)) out.Push(obiiter.MakeBioSequenceBatch(order, sequences))
} }
out.Done() out.Done()
@ -176,11 +177,11 @@ func _ReadFlatFileChunk(reader io.Reader, readers chan _FileChunk) {
// 6 5 43 2 1 // 6 5 43 2 1
// <CR>?<LF>//<CR>?<LF> // <CR>?<LF>//<CR>?<LF>
func ReadEMBLBatch(reader io.Reader, options ...WithOption) obiseq.IBioSequenceBatch { func ReadEMBLBatch(reader io.Reader, options ...WithOption) obiiter.IBioSequenceBatch {
opt := MakeOptions(options) opt := MakeOptions(options)
entry_channel := make(chan _FileChunk, opt.BufferSize()) entry_channel := make(chan _FileChunk, opt.BufferSize())
newIter := obiseq.MakeIBioSequenceBatch(opt.BufferSize()) newIter := obiiter.MakeIBioSequenceBatch(opt.BufferSize())
nworkers := opt.ParallelWorkers() nworkers := opt.ParallelWorkers()
newIter.Add(nworkers) newIter.Add(nworkers)
@ -199,12 +200,12 @@ func ReadEMBLBatch(reader io.Reader, options ...WithOption) obiseq.IBioSequenceB
return newIter return newIter
} }
func ReadEMBL(reader io.Reader, options ...WithOption) obiseq.IBioSequence { func ReadEMBL(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
ib := ReadEMBLBatch(reader, options...) ib := ReadEMBLBatch(reader, options...)
return ib.SortBatches().IBioSequence() return ib.SortBatches().IBioSequence()
} }
func ReadEMBLBatchFromFile(filename string, options ...WithOption) (obiseq.IBioSequenceBatch, error) { func ReadEMBLBatchFromFile(filename string, options ...WithOption) (obiiter.IBioSequenceBatch, error) {
var reader io.Reader var reader io.Reader
var greader io.Reader var greader io.Reader
var err error var err error
@ -212,7 +213,7 @@ func ReadEMBLBatchFromFile(filename string, options ...WithOption) (obiseq.IBioS
reader, err = os.Open(filename) reader, err = os.Open(filename)
if err != nil { if err != nil {
log.Printf("open file error: %+v", err) log.Printf("open file error: %+v", err)
return obiseq.NilIBioSequenceBatch, err return obiiter.NilIBioSequenceBatch, err
} }
// Test if the flux is compressed by gzip // Test if the flux is compressed by gzip
@ -224,7 +225,7 @@ func ReadEMBLBatchFromFile(filename string, options ...WithOption) (obiseq.IBioS
return ReadEMBLBatch(reader, options...), nil return ReadEMBLBatch(reader, options...), nil
} }
func ReadEMBLFromFile(filename string, options ...WithOption) (obiseq.IBioSequence, error) { func ReadEMBLFromFile(filename string, options ...WithOption) (obiiter.IBioSequence, error) {
ib, err := ReadEMBLBatchFromFile(filename, options...) ib, err := ReadEMBLBatchFromFile(filename, options...)
return ib.SortBatches().IBioSequence(), err return ib.SortBatches().IBioSequence(), err

View File

@ -3,6 +3,7 @@ package obiformats
import ( import (
"strings" "strings"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
) )
@ -14,14 +15,16 @@ func ParseGuessedFastSeqHeader(sequence *obiseq.BioSequence) {
} }
} }
func IParseFastSeqHeaderBatch(iterator obiseq.IBioSequenceBatch, options ...WithOption) obiseq.IBioSequenceBatch { func IParseFastSeqHeaderBatch(iterator obiiter.IBioSequenceBatch,
options ...WithOption) obiiter.IBioSequenceBatch {
opt := MakeOptions(options) opt := MakeOptions(options)
return iterator.MakeIWorker(obiseq.AnnotatorToSeqWorker(opt.ParseFastSeqHeader()), return iterator.MakeIWorker(obiiter.AnnotatorToSeqWorker(opt.ParseFastSeqHeader()),
opt.ParallelWorkers(), opt.ParallelWorkers(),
opt.BufferSize()) opt.BufferSize())
} }
func IParseFastSeqHeader(iterator obiseq.IBioSequence, options ...WithOption) obiseq.IBioSequence { func IParseFastSeqHeader(iterator obiiter.IBioSequence,
options ...WithOption) obiiter.IBioSequence {
opt := MakeOptions(options) opt := MakeOptions(options)
return IParseFastSeqHeaderBatch(iterator.IBioSequenceBatch(opt.BatchSize(), return IParseFastSeqHeaderBatch(iterator.IBioSequenceBatch(opt.BatchSize(),

View File

@ -13,11 +13,12 @@ import (
"unsafe" "unsafe"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/cutils" "git.metabarcoding.org/lecasofts/go/obitools/pkg/cutils"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
) )
func _FastseqReader(seqfile C.fast_kseq_p, func _FastseqReader(seqfile C.fast_kseq_p,
iterator obiseq.IBioSequenceBatch, iterator obiiter.IBioSequenceBatch,
batch_size int) { batch_size int) {
var comment string var comment string
i := 0 i := 0
@ -63,7 +64,7 @@ func _FastseqReader(seqfile C.fast_kseq_p,
// log.Printf("\n==> Pushing sequence batch\n") // log.Printf("\n==> Pushing sequence batch\n")
// start := time.Now() // start := time.Now()
iterator.Push(obiseq.MakeBioSequenceBatch(i, slice)) iterator.Push(obiiter.MakeBioSequenceBatch(i, slice))
// elapsed := time.Since(start) // elapsed := time.Since(start)
// log.Printf("\n==>sequences pushed after %s\n", elapsed) // log.Printf("\n==>sequences pushed after %s\n", elapsed)
@ -73,13 +74,13 @@ func _FastseqReader(seqfile C.fast_kseq_p,
} }
} }
if len(slice) > 0 { if len(slice) > 0 {
iterator.Push(obiseq.MakeBioSequenceBatch(i, slice)) iterator.Push(obiiter.MakeBioSequenceBatch(i, slice))
} }
iterator.Done() iterator.Done()
} }
func ReadFastSeqBatchFromFile(filename string, options ...WithOption) (obiseq.IBioSequenceBatch, error) { func ReadFastSeqBatchFromFile(filename string, options ...WithOption) (obiiter.IBioSequenceBatch, error) {
opt := MakeOptions(options) opt := MakeOptions(options)
name := C.CString(filename) name := C.CString(filename)
@ -92,7 +93,7 @@ func ReadFastSeqBatchFromFile(filename string, options ...WithOption) (obiseq.IB
if pointer == nil { if pointer == nil {
err = fmt.Errorf("cannot open file %s", filename) err = fmt.Errorf("cannot open file %s", filename)
return obiseq.NilIBioSequenceBatch, err return obiiter.NilIBioSequenceBatch, err
} }
size := int64(-1) size := int64(-1)
@ -104,7 +105,7 @@ func ReadFastSeqBatchFromFile(filename string, options ...WithOption) (obiseq.IB
size = -1 size = -1
} }
newIter := obiseq.MakeIBioSequenceBatch(opt.BufferSize()) newIter := obiiter.MakeIBioSequenceBatch(opt.BufferSize())
newIter.Add(1) newIter.Add(1)
go func() { go func() {
@ -124,14 +125,14 @@ func ReadFastSeqBatchFromFile(filename string, options ...WithOption) (obiseq.IB
return newIter, err return newIter, err
} }
func ReadFastSeqFromFile(filename string, options ...WithOption) (obiseq.IBioSequence, error) { func ReadFastSeqFromFile(filename string, options ...WithOption) (obiiter.IBioSequence, error) {
ib, err := ReadFastSeqBatchFromFile(filename, options...) ib, err := ReadFastSeqBatchFromFile(filename, options...)
return ib.SortBatches().IBioSequence(), err return ib.SortBatches().IBioSequence(), err
} }
func ReadFastSeqBatchFromStdin(options ...WithOption) obiseq.IBioSequenceBatch { func ReadFastSeqBatchFromStdin(options ...WithOption) obiiter.IBioSequenceBatch {
opt := MakeOptions(options) opt := MakeOptions(options)
newIter := obiseq.MakeIBioSequenceBatch(opt.BufferSize()) newIter := obiiter.MakeIBioSequenceBatch(opt.BufferSize())
newIter.Add(1) newIter.Add(1)
@ -139,12 +140,13 @@ func ReadFastSeqBatchFromStdin(options ...WithOption) obiseq.IBioSequenceBatch {
newIter.WaitAndClose() newIter.WaitAndClose()
}() }()
go _FastseqReader(C.open_fast_sek_stdin(C.int32_t(opt.QualityShift())), newIter, opt.BatchSize()) go _FastseqReader(C.open_fast_sek_stdin(C.int32_t(opt.QualityShift())),
newIter, opt.BatchSize())
return newIter return newIter
} }
func ReadFastSeqFromStdin(options ...WithOption) obiseq.IBioSequence { func ReadFastSeqFromStdin(options ...WithOption) obiiter.IBioSequence {
ib := ReadFastSeqBatchFromStdin(options...) ib := ReadFastSeqBatchFromStdin(options...)
return ib.SortBatches().IBioSequence() return ib.SortBatches().IBioSequence()
} }

View File

@ -8,6 +8,7 @@ import (
"os" "os"
"strings" "strings"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
) )
@ -21,7 +22,7 @@ func min(x, y int) int {
func FormatFasta(seq *obiseq.BioSequence, formater FormatHeader) string { func FormatFasta(seq *obiseq.BioSequence, formater FormatHeader) string {
var fragments strings.Builder var fragments strings.Builder
if seq==nil { if seq == nil {
log.Panicln("try to format a nil BioSequence") log.Panicln("try to format a nil BioSequence")
} }
@ -44,7 +45,7 @@ func FormatFasta(seq *obiseq.BioSequence, formater FormatHeader) string {
folded) folded)
} }
func FormatFastaBatch(batch obiseq.BioSequenceBatch, formater FormatHeader) []byte { func FormatFastaBatch(batch obiiter.BioSequenceBatch, formater FormatHeader) []byte {
var bs bytes.Buffer var bs bytes.Buffer
for _, seq := range batch.Slice() { for _, seq := range batch.Slice() {
bs.WriteString(FormatFasta(seq, formater)) bs.WriteString(FormatFasta(seq, formater))
@ -53,7 +54,7 @@ func FormatFastaBatch(batch obiseq.BioSequenceBatch, formater FormatHeader) []by
return bs.Bytes() return bs.Bytes()
} }
func WriteFasta(iterator obiseq.IBioSequence, file io.Writer, options ...WithOption) error { func WriteFasta(iterator obiiter.IBioSequence, file io.Writer, options ...WithOption) error {
opt := MakeOptions(options) opt := MakeOptions(options)
header_format := opt.FormatFastSeqHeader() header_format := opt.FormatFastSeqHeader()
@ -73,7 +74,7 @@ func WriteFasta(iterator obiseq.IBioSequence, file io.Writer, options ...WithOpt
return nil return nil
} }
func WriteFastaToFile(iterator obiseq.IBioSequence, func WriteFastaToFile(iterator obiiter.IBioSequence,
filename string, filename string,
options ...WithOption) error { options ...WithOption) error {
@ -89,16 +90,18 @@ func WriteFastaToFile(iterator obiseq.IBioSequence,
return WriteFasta(iterator, file, options...) return WriteFasta(iterator, file, options...)
} }
func WriteFastaToStdout(iterator obiseq.IBioSequence, options ...WithOption) error { func WriteFastaToStdout(iterator obiiter.IBioSequence, options ...WithOption) error {
options = append(options, OptionDontCloseFile()) options = append(options, OptionDontCloseFile())
return WriteFasta(iterator, os.Stdout, options...) return WriteFasta(iterator, os.Stdout, options...)
} }
func WriteFastaBatch(iterator obiseq.IBioSequenceBatch, file io.Writer, options ...WithOption) (obiseq.IBioSequenceBatch, error) { func WriteFastaBatch(iterator obiiter.IBioSequenceBatch,
file io.Writer,
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
opt := MakeOptions(options) opt := MakeOptions(options)
buffsize := iterator.BufferSize() buffsize := iterator.BufferSize()
newIter := obiseq.MakeIBioSequenceBatch(buffsize) newIter := obiiter.MakeIBioSequenceBatch(buffsize)
nwriters := opt.ParallelWorkers() nwriters := opt.ParallelWorkers()
@ -113,7 +116,7 @@ func WriteFastaBatch(iterator obiseq.IBioSequenceBatch, file io.Writer, options
close(chunkchan) close(chunkchan)
}() }()
ff := func(iterator obiseq.IBioSequenceBatch) { ff := func(iterator obiiter.IBioSequenceBatch) {
for iterator.Next() { for iterator.Next() {
batch := iterator.Get() batch := iterator.Get()
chunkchan <- FileChunck{ chunkchan <- FileChunck{
@ -164,20 +167,21 @@ func WriteFastaBatch(iterator obiseq.IBioSequenceBatch, file io.Writer, options
return newIter, nil return newIter, nil
} }
func WriteFastaBatchToStdout(iterator obiseq.IBioSequenceBatch, options ...WithOption) (obiseq.IBioSequenceBatch, error) { func WriteFastaBatchToStdout(iterator obiiter.IBioSequenceBatch,
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
options = append(options, OptionDontCloseFile()) options = append(options, OptionDontCloseFile())
return WriteFastaBatch(iterator, os.Stdout, options...) return WriteFastaBatch(iterator, os.Stdout, options...)
} }
func WriteFastaBatchToFile(iterator obiseq.IBioSequenceBatch, func WriteFastaBatchToFile(iterator obiiter.IBioSequenceBatch,
filename string, filename string,
options ...WithOption) (obiseq.IBioSequenceBatch, error) { options ...WithOption) (obiiter.IBioSequenceBatch, error) {
file, err := os.Create(filename) file, err := os.Create(filename)
if err != nil { if err != nil {
log.Fatalf("open file error: %v", err) log.Fatalf("open file error: %v", err)
return obiseq.NilIBioSequenceBatch, err return obiiter.NilIBioSequenceBatch, err
} }
options = append(options, OptionCloseFile()) options = append(options, OptionCloseFile())

View File

@ -8,6 +8,7 @@ import (
"os" "os"
"time" "time"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
) )
@ -34,7 +35,7 @@ func FormatFastq(seq *obiseq.BioSequence, quality_shift int, formater FormatHead
) )
} }
func FormatFastqBatch(batch obiseq.BioSequenceBatch, quality_shift int, func FormatFastqBatch(batch obiiter.BioSequenceBatch, quality_shift int,
formater FormatHeader) []byte { formater FormatHeader) []byte {
var bs bytes.Buffer var bs bytes.Buffer
for _, seq := range batch.Slice() { for _, seq := range batch.Slice() {
@ -44,7 +45,7 @@ func FormatFastqBatch(batch obiseq.BioSequenceBatch, quality_shift int,
return bs.Bytes() return bs.Bytes()
} }
func WriteFastq(iterator obiseq.IBioSequence, file io.Writer, options ...WithOption) error { func WriteFastq(iterator obiiter.IBioSequence, file io.Writer, options ...WithOption) error {
opt := MakeOptions(options) opt := MakeOptions(options)
header_format := opt.FormatFastSeqHeader() header_format := opt.FormatFastSeqHeader()
@ -65,7 +66,7 @@ func WriteFastq(iterator obiseq.IBioSequence, file io.Writer, options ...WithOpt
return nil return nil
} }
func WriteFastqToFile(iterator obiseq.IBioSequence, func WriteFastqToFile(iterator obiiter.IBioSequence,
filename string, filename string,
options ...WithOption) error { options ...WithOption) error {
@ -80,7 +81,7 @@ func WriteFastqToFile(iterator obiseq.IBioSequence,
return WriteFastq(iterator, file, options...) return WriteFastq(iterator, file, options...)
} }
func WriteFastqToStdout(iterator obiseq.IBioSequence, options ...WithOption) error { func WriteFastqToStdout(iterator obiiter.IBioSequence, options ...WithOption) error {
options = append(options, OptionDontCloseFile()) options = append(options, OptionDontCloseFile())
return WriteFastq(iterator, os.Stdout, options...) return WriteFastq(iterator, os.Stdout, options...)
} }
@ -90,11 +91,13 @@ type FileChunck struct {
order int order int
} }
func WriteFastqBatch(iterator obiseq.IBioSequenceBatch, file io.Writer, options ...WithOption) (obiseq.IBioSequenceBatch, error) { func WriteFastqBatch(iterator obiiter.IBioSequenceBatch,
file io.Writer,
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
opt := MakeOptions(options) opt := MakeOptions(options)
buffsize := iterator.BufferSize() buffsize := iterator.BufferSize()
newIter := obiseq.MakeIBioSequenceBatch(buffsize) newIter := obiiter.MakeIBioSequenceBatch(buffsize)
nwriters := opt.ParallelWorkers() nwriters := opt.ParallelWorkers()
@ -113,7 +116,7 @@ func WriteFastqBatch(iterator obiseq.IBioSequenceBatch, file io.Writer, options
close(chunkchan) close(chunkchan)
}() }()
ff := func(iterator obiseq.IBioSequenceBatch) { ff := func(iterator obiiter.IBioSequenceBatch) {
for iterator.Next() { for iterator.Next() {
batch := iterator.Get() batch := iterator.Get()
chunk := FileChunck{ chunk := FileChunck{
@ -165,20 +168,21 @@ func WriteFastqBatch(iterator obiseq.IBioSequenceBatch, file io.Writer, options
return newIter, nil return newIter, nil
} }
func WriteFastqBatchToStdout(iterator obiseq.IBioSequenceBatch, options ...WithOption) (obiseq.IBioSequenceBatch, error) { func WriteFastqBatchToStdout(iterator obiiter.IBioSequenceBatch,
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
options = append(options, OptionDontCloseFile()) options = append(options, OptionDontCloseFile())
return WriteFastqBatch(iterator, os.Stdout, options...) return WriteFastqBatch(iterator, os.Stdout, options...)
} }
func WriteFastqBatchToFile(iterator obiseq.IBioSequenceBatch, func WriteFastqBatchToFile(iterator obiiter.IBioSequenceBatch,
filename string, filename string,
options ...WithOption) (obiseq.IBioSequenceBatch, error) { options ...WithOption) (obiiter.IBioSequenceBatch, error) {
file, err := os.Create(filename) file, err := os.Create(filename)
if err != nil { if err != nil {
log.Fatalf("open file error: %v", err) log.Fatalf("open file error: %v", err)
return obiseq.NilIBioSequenceBatch, err return obiiter.NilIBioSequenceBatch, err
} }
options = append(options, OptionCloseFile()) options = append(options, OptionCloseFile())

View File

@ -1,11 +1,12 @@
package obiformats package obiformats
import ( import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
) )
type __options__ struct { type __options__ struct {
fastseq_header_parser obiseq.SeqAnnotator fastseq_header_parser obiiter.SeqAnnotator
fastseq_header_writer func(*obiseq.BioSequence) string fastseq_header_writer func(*obiseq.BioSequence) string
with_progress_bar bool with_progress_bar bool
buffer_size int buffer_size int
@ -58,7 +59,7 @@ func (opt Options) ParallelWorkers() int {
return opt.pointer.parallel_workers return opt.pointer.parallel_workers
} }
func (opt Options) ParseFastSeqHeader() obiseq.SeqAnnotator { func (opt Options) ParseFastSeqHeader() obiiter.SeqAnnotator {
return opt.pointer.fastseq_header_parser return opt.pointer.fastseq_header_parser
} }
@ -123,7 +124,7 @@ func OptionsQualitySolexa() WithOption {
return OptionsQualityShift(64) return OptionsQualityShift(64)
} }
func OptionsFastSeqHeaderParser(parser obiseq.SeqAnnotator) WithOption { func OptionsFastSeqHeaderParser(parser obiiter.SeqAnnotator) WithOption {
f := WithOption(func(opt Options) { f := WithOption(func(opt Options) {
opt.pointer.fastseq_header_parser = parser opt.pointer.fastseq_header_parser = parser
}) })

View File

@ -8,7 +8,7 @@ import (
"os" "os"
"strings" "strings"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
) )
func GuessSeqFileType(firstline string) string { func GuessSeqFileType(firstline string) string {
@ -36,7 +36,8 @@ func GuessSeqFileType(firstline string) string {
} }
} }
func ReadSequencesBatchFromFile(filename string, options ...WithOption) (obiseq.IBioSequenceBatch, error) { func ReadSequencesBatchFromFile(filename string,
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
var file *os.File var file *os.File
var reader io.Reader var reader io.Reader
var greader io.Reader var greader io.Reader
@ -46,7 +47,7 @@ func ReadSequencesBatchFromFile(filename string, options ...WithOption) (obiseq.
if err != nil { if err != nil {
log.Fatalf("open file error: %v", err) log.Fatalf("open file error: %v", err)
return obiseq.NilIBioSequenceBatch, err return obiiter.NilIBioSequenceBatch, err
} }
reader = file reader = file
@ -65,7 +66,7 @@ func ReadSequencesBatchFromFile(filename string, options ...WithOption) (obiseq.
tag, _ := breader.Peek(30) tag, _ := breader.Peek(30)
if len(tag) < 30 { if len(tag) < 30 {
newIter := obiseq.MakeIBioSequenceBatch() newIter := obiiter.MakeIBioSequenceBatch()
newIter.Close() newIter.Close()
return newIter, nil return newIter, nil
} }
@ -89,10 +90,11 @@ func ReadSequencesBatchFromFile(filename string, options ...WithOption) (obiseq.
filename, filetype) filename, filetype)
} }
return obiseq.NilIBioSequenceBatch, nil return obiiter.NilIBioSequenceBatch, nil
} }
func ReadSequencesFromFile(filename string, options ...WithOption) (obiseq.IBioSequence, error) { func ReadSequencesFromFile(filename string,
options ...WithOption) (obiiter.IBioSequence, error) {
ib, err := ReadSequencesBatchFromFile(filename, options...) ib, err := ReadSequencesBatchFromFile(filename, options...)
return ib.SortBatches().IBioSequence(), err return ib.SortBatches().IBioSequence(), err

View File

@ -6,10 +6,10 @@ import (
"log" "log"
"os" "os"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
) )
func WriteSequences(iterator obiseq.IBioSequence, func WriteSequences(iterator obiiter.IBioSequence,
file io.Writer, file io.Writer,
options ...WithOption) error { options ...WithOption) error {
@ -34,7 +34,7 @@ func WriteSequences(iterator obiseq.IBioSequence,
return nil return nil
} }
func WriteSequencesToFile(iterator obiseq.IBioSequence, func WriteSequencesToFile(iterator obiiter.IBioSequence,
filename string, filename string,
options ...WithOption) error { options ...WithOption) error {
@ -48,13 +48,13 @@ func WriteSequencesToFile(iterator obiseq.IBioSequence,
return WriteSequences(iterator, file, options...) return WriteSequences(iterator, file, options...)
} }
func WriteSequencesToStdout(iterator obiseq.IBioSequence, options ...WithOption) error { func WriteSequencesToStdout(iterator obiiter.IBioSequence, options ...WithOption) error {
return WriteSequences(iterator, os.Stdout, options...) return WriteSequences(iterator, os.Stdout, options...)
} }
func WriteSequenceBatch(iterator obiseq.IBioSequenceBatch, func WriteSequenceBatch(iterator obiiter.IBioSequenceBatch,
file io.Writer, file io.Writer,
options ...WithOption) (obiseq.IBioSequenceBatch, error) { options ...WithOption) (obiiter.IBioSequenceBatch, error) {
iterator = iterator.Rebatch(1000) iterator = iterator.Rebatch(1000)
@ -64,7 +64,7 @@ func WriteSequenceBatch(iterator obiseq.IBioSequenceBatch,
batch := iterator.Get() batch := iterator.Get()
iterator.PushBack() iterator.PushBack()
var newIter obiseq.IBioSequenceBatch var newIter obiiter.IBioSequenceBatch
var err error var err error
if len(batch.Slice()) > 0 { if len(batch.Slice()) > 0 {
@ -84,24 +84,24 @@ func WriteSequenceBatch(iterator obiseq.IBioSequenceBatch,
return iterator, nil return iterator, nil
} }
return obiseq.NilIBioSequenceBatch, fmt.Errorf("input iterator not ready") return obiiter.NilIBioSequenceBatch, fmt.Errorf("input iterator not ready")
} }
func WriteSequencesBatchToStdout(iterator obiseq.IBioSequenceBatch, func WriteSequencesBatchToStdout(iterator obiiter.IBioSequenceBatch,
options ...WithOption) (obiseq.IBioSequenceBatch, error) { options ...WithOption) (obiiter.IBioSequenceBatch, error) {
options = append(options, OptionDontCloseFile()) options = append(options, OptionDontCloseFile())
return WriteSequenceBatch(iterator, os.Stdout, options...) return WriteSequenceBatch(iterator, os.Stdout, options...)
} }
func WriteSequencesBatchToFile(iterator obiseq.IBioSequenceBatch, func WriteSequencesBatchToFile(iterator obiiter.IBioSequenceBatch,
filename string, filename string,
options ...WithOption) (obiseq.IBioSequenceBatch, error) { options ...WithOption) (obiiter.IBioSequenceBatch, error) {
file, err := os.Create(filename) file, err := os.Create(filename)
if err != nil { if err != nil {
log.Fatalf("open file error: %v", err) log.Fatalf("open file error: %v", err)
return obiseq.NilIBioSequenceBatch, err return obiiter.NilIBioSequenceBatch, err
} }
options = append(options, OptionCloseFile()) options = append(options, OptionCloseFile())

53
pkg/obiiter/batch.go Normal file
View File

@ -0,0 +1,53 @@
package obiiter
import "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
type BioSequenceBatch struct {
slice obiseq.BioSequenceSlice
order int
}
var NilBioSequenceBatch = BioSequenceBatch{nil, -1}
func MakeBioSequenceBatch(order int,
sequences obiseq.BioSequenceSlice) BioSequenceBatch {
return BioSequenceBatch{
slice: sequences,
order: order,
}
}
func (batch BioSequenceBatch) Order() int {
return batch.order
}
func (batch BioSequenceBatch) Reorder(newOrder int) BioSequenceBatch {
batch.order = newOrder
return batch
}
func (batch BioSequenceBatch) Slice() obiseq.BioSequenceSlice {
return batch.slice
}
func (batch BioSequenceBatch) Length() int {
return len(batch.slice)
}
func (batch BioSequenceBatch) NotEmpty() bool {
return batch.slice.NotEmpty()
}
func (batch BioSequenceBatch) Pop0() *obiseq.BioSequence {
return batch.slice.Pop0()
}
func (batch BioSequenceBatch) IsNil() bool {
return batch.slice == nil
}
func (batch BioSequenceBatch) Recycle() {
batch.slice.Recycle()
batch.slice = nil
}

View File

@ -0,0 +1,560 @@
package obiiter
import (
"fmt"
"log"
"sync"
"sync/atomic"
"time"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
"github.com/tevino/abool/v2"
)
// Structure implementing an iterator over bioseq.BioSequenceBatch
// based on a channel.
type _IBioSequenceBatch struct {
channel chan BioSequenceBatch
current BioSequenceBatch
pushBack *abool.AtomicBool
all_done *sync.WaitGroup
lock *sync.RWMutex
buffer_size int32
batch_size int32
sequence_format string
finished *abool.AtomicBool
}
type IBioSequenceBatch struct {
pointer *_IBioSequenceBatch
}
// NilIBioSequenceBatch nil instance for IBioSequenceBatch
//
// NilIBioSequenceBatch is the nil instance for the
// IBioSequenceBatch type.
//
var NilIBioSequenceBatch = IBioSequenceBatch{pointer: nil}
func MakeIBioSequenceBatch(sizes ...int) IBioSequenceBatch {
buffsize := int32(1)
if len(sizes) > 0 {
buffsize = int32(sizes[0])
}
i := _IBioSequenceBatch{
channel: make(chan BioSequenceBatch, buffsize),
current: NilBioSequenceBatch,
pushBack: abool.New(),
buffer_size: buffsize,
batch_size: -1,
sequence_format: "",
finished: abool.New(),
}
waiting := sync.WaitGroup{}
i.all_done = &waiting
lock := sync.RWMutex{}
i.lock = &lock
ii := IBioSequenceBatch{&i}
return ii
}
func (iterator IBioSequenceBatch) Add(n int) {
if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.Add method on NilIBioSequenceBatch")
}
iterator.pointer.all_done.Add(n)
}
func (iterator IBioSequenceBatch) Done() {
if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.Done method on NilIBioSequenceBatch")
}
iterator.pointer.all_done.Done()
}
func (iterator IBioSequenceBatch) Unlock() {
if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.Unlock method on NilIBioSequenceBatch")
}
iterator.pointer.lock.Unlock()
}
func (iterator IBioSequenceBatch) Lock() {
if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.Lock method on NilIBioSequenceBatch")
}
iterator.pointer.lock.Lock()
}
func (iterator IBioSequenceBatch) RLock() {
if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.RLock method on NilIBioSequenceBatch")
}
iterator.pointer.lock.RLock()
}
func (iterator IBioSequenceBatch) RUnlock() {
if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.RUnlock method on NilIBioSequenceBatch")
}
iterator.pointer.lock.RUnlock()
}
func (iterator IBioSequenceBatch) Wait() {
if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.Wait method on NilIBioSequenceBatch")
}
iterator.pointer.all_done.Wait()
}
func (iterator IBioSequenceBatch) Channel() chan BioSequenceBatch {
if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.Channel method on NilIBioSequenceBatch")
}
return iterator.pointer.channel
}
func (iterator IBioSequenceBatch) IsNil() bool {
if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.IsNil method on NilIBioSequenceBatch")
}
return iterator.pointer == nil
}
func (iterator IBioSequenceBatch) BufferSize() int {
if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.BufferSize method on NilIBioSequenceBatch")
}
return int(atomic.LoadInt32(&iterator.pointer.buffer_size))
}
func (iterator IBioSequenceBatch) BatchSize() int {
if iterator.pointer == nil {
log.Panic("call of IBioSequenceBatch.BatchSize method on NilIBioSequenceBatch")
}
return int(atomic.LoadInt32(&iterator.pointer.batch_size))
}
func (iterator IBioSequenceBatch) SetBatchSize(size int) error {
if size >= 0 {
atomic.StoreInt32(&iterator.pointer.batch_size, int32(size))
return nil
}
return fmt.Errorf("size (%d) cannot be negative", size)
}
func (iterator IBioSequenceBatch) Split() IBioSequenceBatch {
iterator.pointer.lock.RLock()
defer iterator.pointer.lock.RUnlock()
i := _IBioSequenceBatch{
channel: iterator.pointer.channel,
current: NilBioSequenceBatch,
pushBack: abool.New(),
all_done: iterator.pointer.all_done,
buffer_size: iterator.pointer.buffer_size,
batch_size: iterator.pointer.batch_size,
sequence_format: iterator.pointer.sequence_format,
finished: iterator.pointer.finished}
lock := sync.RWMutex{}
i.lock = &lock
newIter := IBioSequenceBatch{&i}
return newIter
}
func (iterator IBioSequenceBatch) Next() bool {
if iterator.pointer.pushBack.IsSet() {
iterator.pointer.pushBack.UnSet()
return true
}
if iterator.pointer.finished.IsSet() {
return false
}
next, ok := (<-iterator.pointer.channel)
if ok {
iterator.pointer.current = next
return true
}
iterator.pointer.current = NilBioSequenceBatch
iterator.pointer.finished.Set()
return false
}
func (iterator IBioSequenceBatch) PushBack() {
if !iterator.pointer.current.IsNil() {
iterator.pointer.pushBack.Set()
}
}
// The 'Get' method returns the instance of BioSequenceBatch
// currently pointed by the iterator. You have to use the
// 'Next' method to move to the next entry before calling
// 'Get' to retreive the following instance.
func (iterator IBioSequenceBatch) Get() BioSequenceBatch {
return iterator.pointer.current
}
func (iterator IBioSequenceBatch) Push(batch BioSequenceBatch) {
if batch.IsNil() {
log.Panicln("An Nil batch is pushed on the channel")
}
if batch.Length() == 0 {
log.Panicln("An empty batch is pushed on the channel")
}
iterator.pointer.channel <- batch
}
func (iterator IBioSequenceBatch) Close() {
close(iterator.pointer.channel)
}
func (iterator IBioSequenceBatch) WaitAndClose() {
iterator.Wait()
for len(iterator.Channel()) > 0 {
time.Sleep(time.Millisecond)
}
iterator.Close()
}
// Finished returns 'true' value if no more data is available
// from the iterator.
func (iterator IBioSequenceBatch) Finished() bool {
return iterator.pointer.finished.IsSet()
}
func (iterator IBioSequenceBatch) IBioSequence(sizes ...int) IBioSequence {
buffsize := iterator.BufferSize()
if len(sizes) > 0 {
buffsize = sizes[0]
}
newIter := MakeIBioSequence(buffsize)
newIter.Add(1)
go func() {
newIter.Wait()
close(newIter.Channel())
}()
go func() {
for iterator.Next() {
batch := iterator.Get()
for batch.NotEmpty() {
newIter.Channel() <- batch.Pop0()
}
batch.Recycle()
}
newIter.Done()
}()
return newIter
}
func (iterator IBioSequenceBatch) SortBatches(sizes ...int) IBioSequenceBatch {
buffsize := iterator.BufferSize()
if len(sizes) > 0 {
buffsize = sizes[0]
}
newIter := MakeIBioSequenceBatch(buffsize)
newIter.Add(1)
go func() {
newIter.Wait()
close(newIter.pointer.channel)
}()
next_to_send := 0
received := make(map[int]BioSequenceBatch)
go func() {
for iterator.Next() {
batch := iterator.Get()
if batch.order == next_to_send {
newIter.pointer.channel <- batch
next_to_send++
batch, ok := received[next_to_send]
for ok {
newIter.pointer.channel <- batch
delete(received, next_to_send)
next_to_send++
batch, ok = received[next_to_send]
}
} else {
received[batch.order] = batch
}
}
newIter.Done()
}()
return newIter
}
func (iterator IBioSequenceBatch) Concat(iterators ...IBioSequenceBatch) IBioSequenceBatch {
if len(iterators) == 0 {
return iterator
}
buffsize := iterator.BufferSize()
newIter := MakeIBioSequenceBatch(buffsize)
newIter.Add(1)
go func() {
newIter.Wait()
close(newIter.Channel())
}()
go func() {
previous_max := 0
max_order := 0
for iterator.Next() {
s := iterator.Get()
if s.order > max_order {
max_order = s.order
}
newIter.Push(s.Reorder(s.order + previous_max))
}
previous_max = max_order + 1
for _, iter := range iterators {
for iter.Next() {
s := iter.Get()
if (s.order + previous_max) > max_order {
max_order = s.order + previous_max
}
newIter.Push(s.Reorder(s.order + previous_max))
}
previous_max = max_order + 1
}
newIter.Done()
}()
return newIter
}
// Redistributes sequences from a IBioSequenceBatch into a new
// IBioSequenceBatch with every batches having the same size
// indicated in parameter. Rebatching implies to sort the
// source IBioSequenceBatch.
func (iterator IBioSequenceBatch) Rebatch(size int, sizes ...int) IBioSequenceBatch {
buffsize := iterator.BufferSize()
if len(sizes) > 0 {
buffsize = sizes[0]
}
newIter := MakeIBioSequenceBatch(buffsize)
newIter.Add(1)
go func() {
newIter.Wait()
close(newIter.pointer.channel)
}()
go func() {
order := 0
iterator = iterator.SortBatches()
buffer := obiseq.MakeBioSequenceSlice()
for iterator.Next() {
seqs := iterator.Get()
for _, s := range seqs.slice {
buffer = append(buffer, s)
if len(buffer) == size {
newIter.Push(MakeBioSequenceBatch(order, buffer))
order++
buffer = obiseq.MakeBioSequenceSlice()
}
}
seqs.Recycle()
}
if len(buffer) > 0 {
newIter.Push(MakeBioSequenceBatch(order, buffer))
}
newIter.Done()
}()
return newIter
}
func (iterator IBioSequenceBatch) Recycle() {
log.Println("Start recycling of Bioseq objects")
recycled := 0
for iterator.Next() {
// iterator.Get()
batch := iterator.Get()
for _, seq := range batch.Slice() {
seq.Recycle()
recycled++
}
batch.Recycle()
}
log.Printf("End of the recycling of %d Bioseq objects", recycled)
}
func (iterator IBioSequenceBatch) Count(recycle bool) (int, int, int) {
variants := 0
reads := 0
nucleotides := 0
log.Println("Start counting of Bioseq objects")
for iterator.Next() {
// iterator.Get()
batch := iterator.Get()
for _, seq := range batch.Slice() {
variants++
reads += seq.Count()
nucleotides += seq.Length()
if recycle {
seq.Recycle()
}
}
batch.Recycle()
}
log.Printf("End of the counting of %d Bioseq objects", variants)
return variants, reads, nucleotides
}
func (iterator IBioSequenceBatch) PairWith(reverse IBioSequenceBatch,
sizes ...int) IPairedBioSequenceBatch {
buffsize := iterator.BufferSize()
batchsize := 5000
if len(sizes) > 0 {
batchsize = sizes[0]
}
if len(sizes) > 1 {
buffsize = sizes[1]
}
iterator = iterator.Rebatch(batchsize)
reverse = reverse.Rebatch(batchsize)
newIter := MakeIPairedBioSequenceBatch(buffsize)
newIter.Add(1)
go func() {
newIter.Wait()
close(newIter.Channel())
log.Println("End of association of paired reads")
}()
log.Println("Start association of paired reads")
go func() {
for iterator.Next() {
if !reverse.Next() {
log.Panicln("Etrange reverse pas prêt")
}
newIter.Channel() <- MakePairedBioSequenceBatch(iterator.Get(),
reverse.Get())
}
newIter.Done()
}()
return newIter
}
func (iterator IBioSequenceBatch) DivideOn(predicate obiseq.SequencePredicate,
size int, sizes ...int) (IBioSequenceBatch, IBioSequenceBatch) {
buffsize := iterator.BufferSize()
if len(sizes) > 0 {
buffsize = sizes[0]
}
trueIter := MakeIBioSequenceBatch(buffsize)
falseIter := MakeIBioSequenceBatch(buffsize)
trueIter.Add(1)
falseIter.Add(1)
go func() {
trueIter.WaitAndClose()
falseIter.WaitAndClose()
}()
go func() {
trueOrder := 0
falseOrder := 0
iterator = iterator.SortBatches()
trueSlice := obiseq.MakeBioSequenceSlice()
falseSlice := obiseq.MakeBioSequenceSlice()
for iterator.Next() {
seqs := iterator.Get()
for _, s := range seqs.slice {
if predicate(s) {
trueSlice = append(trueSlice, s)
} else {
falseSlice = append(falseSlice, s)
}
if len(trueSlice) == size {
trueIter.Push(MakeBioSequenceBatch(trueOrder, trueSlice))
trueOrder++
trueSlice = obiseq.MakeBioSequenceSlice()
}
if len(falseSlice) == size {
falseIter.Push(MakeBioSequenceBatch(falseOrder, falseSlice))
falseOrder++
falseSlice = obiseq.MakeBioSequenceSlice()
}
}
seqs.Recycle()
}
if len(trueSlice) > 0 {
trueIter.Push(MakeBioSequenceBatch(trueOrder, trueSlice))
}
if len(falseSlice) > 0 {
falseIter.Push(MakeBioSequenceBatch(falseOrder, falseSlice))
}
trueIter.Done()
falseIter.Done()
}()
return trueIter, falseIter
}

View File

@ -1,14 +1,16 @@
package obiseq package obiiter
import ( import (
"fmt" "fmt"
"sync" "sync"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
) )
type IDistribute struct { type IDistribute struct {
outputs map[int]IBioSequenceBatch outputs map[int]IBioSequenceBatch
news chan int news chan int
classifier *BioSequenceClassifier classifier *obiseq.BioSequenceClassifier
lock *sync.Mutex lock *sync.Mutex
} }
@ -28,16 +30,16 @@ func (dist *IDistribute) News() chan int {
return dist.news return dist.news
} }
func (dist *IDistribute) Classifier() *BioSequenceClassifier { func (dist *IDistribute) Classifier() *obiseq.BioSequenceClassifier {
return dist.classifier return dist.classifier
} }
func (iterator IBioSequenceBatch) Distribute(class *BioSequenceClassifier, sizes ...int) IDistribute { func (iterator IBioSequenceBatch) Distribute(class *obiseq.BioSequenceClassifier, sizes ...int) IDistribute {
batchsize := 5000 batchsize := 5000
buffsize := 2 buffsize := 2
outputs := make(map[int]IBioSequenceBatch, 100) outputs := make(map[int]IBioSequenceBatch, 100)
slices := make(map[int]*BioSequenceSlice, 100) slices := make(map[int]*obiseq.BioSequenceSlice, 100)
orders := make(map[int]int, 100) orders := make(map[int]int, 100)
news := make(chan int) news := make(chan int)
@ -72,7 +74,7 @@ func (iterator IBioSequenceBatch) Distribute(class *BioSequenceClassifier, sizes
slice, ok := slices[key] slice, ok := slices[key]
if !ok { if !ok {
s := MakeBioSequenceSlice() s := obiseq.MakeBioSequenceSlice()
slice = &s slice = &s
slices[key] = slice slices[key] = slice
orders[key] = 0 orders[key] = 0
@ -89,7 +91,7 @@ func (iterator IBioSequenceBatch) Distribute(class *BioSequenceClassifier, sizes
if len(*slice) == batchsize { if len(*slice) == batchsize {
outputs[key].Push(MakeBioSequenceBatch(orders[key], *slice)) outputs[key].Push(MakeBioSequenceBatch(orders[key], *slice))
orders[key]++ orders[key]++
s := MakeBioSequenceSlice() s := obiseq.MakeBioSequenceSlice()
slices[key] = &s slices[key] = &s
} }
} }

View File

@ -1,14 +1,16 @@
package obiseq package obiiter
import ( import (
"sync" "sync"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
) )
// Private structure implementing an iterator over // Private structure implementing an iterator over
// bioseq.BioSequence based on a channel. // bioseq.BioSequence based on a channel.
type __ibiosequence__ struct { type __ibiosequence__ struct {
channel chan *BioSequence channel chan *obiseq.BioSequence
current *BioSequence current *obiseq.BioSequence
pushBack bool pushBack bool
all_done *sync.WaitGroup all_done *sync.WaitGroup
buffer_size int buffer_size int
@ -38,10 +40,10 @@ func (iterator IBioSequence) Wait() {
iterator.pointer.all_done.Wait() iterator.pointer.all_done.Wait()
} }
func (iterator IBioSequence) Channel() chan *BioSequence { func (iterator IBioSequence) Channel() chan *obiseq.BioSequence {
return iterator.pointer.channel return iterator.pointer.channel
} }
func (iterator IBioSequence) PChannel() *chan *BioSequence { func (iterator IBioSequence) PChannel() *chan *obiseq.BioSequence {
return &(iterator.pointer.channel) return &(iterator.pointer.channel)
} }
@ -53,7 +55,7 @@ func MakeIBioSequence(sizes ...int) IBioSequence {
} }
i := __ibiosequence__{ i := __ibiosequence__{
channel: make(chan *BioSequence, buffsize), channel: make(chan *obiseq.BioSequence, buffsize),
current: nil, current: nil,
pushBack: false, pushBack: false,
buffer_size: buffsize, buffer_size: buffsize,
@ -117,7 +119,7 @@ func (iterator IBioSequence) PushBack() {
// currently pointed by the iterator. You have to use the // currently pointed by the iterator. You have to use the
// 'Next' method to move to the next entry before calling // 'Next' method to move to the next entry before calling
// 'Get' to retreive the following instance. // 'Get' to retreive the following instance.
func (iterator IBioSequence) Get() *BioSequence { func (iterator IBioSequence) Get() *obiseq.BioSequence {
return iterator.pointer.current return iterator.pointer.current
} }
@ -161,7 +163,7 @@ func (iterator IBioSequence) IBioSequenceBatch(sizes ...int) IBioSequenceBatch {
go func() { go func() {
for j := 0; !iterator.Finished(); j++ { for j := 0; !iterator.Finished(); j++ {
batch := BioSequenceBatch{ batch := BioSequenceBatch{
slice: MakeBioSequenceSlice(), slice: obiseq.MakeBioSequenceSlice(),
order: j} order: j}
for i := 0; i < batchsize && iterator.Next(); i++ { for i := 0; i < batchsize && iterator.Next(); i++ {
seq := iterator.Get() seq := iterator.Get()
@ -275,7 +277,7 @@ func (iterator IBioSequence) Tail(n int, sizes ...int) IBioSequence {
} }
newIter := MakeIBioSequence(buffsize) newIter := MakeIBioSequence(buffsize)
buffseq := MakeBioSequenceSlice() buffseq := obiseq.MakeBioSequenceSlice()
newIter.Add(1) newIter.Add(1)

50
pkg/obiiter/merge.go Normal file
View File

@ -0,0 +1,50 @@
package obiiter
import "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
func (iterator IBioSequenceBatch) IMergeSequenceBatch(na string, statsOn []string, sizes ...int) IBioSequenceBatch {
batchsize := 100
buffsize := iterator.BufferSize()
if len(sizes) > 0 {
batchsize = sizes[0]
}
if len(sizes) > 1 {
buffsize = sizes[1]
}
newIter := MakeIBioSequenceBatch(buffsize)
newIter.Add(1)
go func() {
newIter.WaitAndClose()
}()
go func() {
for j := 0; !iterator.Finished(); j++ {
batch := BioSequenceBatch{
slice: obiseq.MakeBioSequenceSlice(),
order: j}
for i := 0; i < batchsize && iterator.Next(); i++ {
seqs := iterator.Get()
batch.slice = append(batch.slice, seqs.slice.Merge(na, statsOn))
}
if batch.Length() > 0 {
newIter.Push(batch)
}
}
newIter.Done()
}()
return newIter
}
func MergePipe(na string, statsOn []string, sizes ...int) Pipeable {
f := func(iterator IBioSequenceBatch) IBioSequenceBatch {
return iterator.IMergeSequenceBatch(na,statsOn,sizes...)
}
return f
}

View File

@ -0,0 +1,221 @@
package obiiter
import (
"log"
"sync"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
)
type PairedBioSequenceBatch struct {
forward obiseq.BioSequenceSlice
reverse obiseq.BioSequenceSlice
order int
}
var NilPairedBioSequenceBatch = PairedBioSequenceBatch{nil, nil, -1}
func MakePairedBioSequenceBatch(forward, reverse BioSequenceBatch) PairedBioSequenceBatch {
if forward.order != reverse.order {
log.Fatalf("Forward order : %d and reverse order : %d are not matching",
forward.order, reverse.order)
}
for i := range reverse.slice {
reverse.slice[i].ReverseComplement(true)
}
return PairedBioSequenceBatch{
forward: forward.slice,
reverse: reverse.slice,
order: forward.order,
}
}
func (batch PairedBioSequenceBatch) Order() int {
return batch.order
}
func (batch PairedBioSequenceBatch) Reorder(newOrder int) PairedBioSequenceBatch {
batch.order = newOrder
return batch
}
func (batch PairedBioSequenceBatch) Length() int {
return len(batch.forward)
}
func (batch PairedBioSequenceBatch) Forward() obiseq.BioSequenceSlice {
return batch.forward
}
func (batch PairedBioSequenceBatch) Reverse() obiseq.BioSequenceSlice {
return batch.reverse
}
func (batch PairedBioSequenceBatch) IsNil() bool {
return batch.forward == nil
}
// Structure implementing an iterator over bioseq.BioSequenceBatch
// based on a channel.
type __ipairedbiosequencebatch__ struct {
channel chan PairedBioSequenceBatch
current PairedBioSequenceBatch
pushBack bool
all_done *sync.WaitGroup
buffer_size int
finished bool
p_finished *bool
}
type IPairedBioSequenceBatch struct {
pointer *__ipairedbiosequencebatch__
}
var NilIPairedBioSequenceBatch = IPairedBioSequenceBatch{pointer: nil}
func MakeIPairedBioSequenceBatch(sizes ...int) IPairedBioSequenceBatch {
buffsize := 1
if len(sizes) > 0 {
buffsize = sizes[0]
}
i := __ipairedbiosequencebatch__{
channel: make(chan PairedBioSequenceBatch, buffsize),
current: NilPairedBioSequenceBatch,
pushBack: false,
buffer_size: buffsize,
finished: false,
p_finished: nil,
}
i.p_finished = &i.finished
waiting := sync.WaitGroup{}
i.all_done = &waiting
ii := IPairedBioSequenceBatch{&i}
return ii
}
func (iterator IPairedBioSequenceBatch) Add(n int) {
iterator.pointer.all_done.Add(n)
}
func (iterator IPairedBioSequenceBatch) Done() {
iterator.pointer.all_done.Done()
}
func (iterator IPairedBioSequenceBatch) Wait() {
iterator.pointer.all_done.Wait()
}
func (iterator IPairedBioSequenceBatch) Channel() chan PairedBioSequenceBatch {
return iterator.pointer.channel
}
func (iterator IPairedBioSequenceBatch) IsNil() bool {
return iterator.pointer == nil
}
func (iterator IPairedBioSequenceBatch) BufferSize() int {
return iterator.pointer.buffer_size
}
func (iterator IPairedBioSequenceBatch) Split() IPairedBioSequenceBatch {
i := __ipairedbiosequencebatch__{
channel: iterator.pointer.channel,
current: NilPairedBioSequenceBatch,
pushBack: false,
all_done: iterator.pointer.all_done,
buffer_size: iterator.pointer.buffer_size,
finished: false,
p_finished: iterator.pointer.p_finished}
newIter := IPairedBioSequenceBatch{&i}
return newIter
}
func (iterator IPairedBioSequenceBatch) Next() bool {
if *(iterator.pointer.p_finished) {
return false
}
if iterator.pointer.pushBack {
iterator.pointer.pushBack = false
return true
}
next, ok := (<-iterator.pointer.channel)
if ok {
iterator.pointer.current = next
return true
}
iterator.pointer.current = NilPairedBioSequenceBatch
*iterator.pointer.p_finished = true
return false
}
func (iterator IPairedBioSequenceBatch) PushBack() {
if !iterator.pointer.current.IsNil() {
iterator.pointer.pushBack = true
}
}
// The 'Get' method returns the instance of BioSequenceBatch
// currently pointed by the iterator. You have to use the
// 'Next' method to move to the next entry before calling
// 'Get' to retreive the following instance.
func (iterator IPairedBioSequenceBatch) Get() PairedBioSequenceBatch {
return iterator.pointer.current
}
// Finished returns 'true' value if no more data is available
// from the iterator.
func (iterator IPairedBioSequenceBatch) Finished() bool {
return *iterator.pointer.p_finished
}
func (iterator IPairedBioSequenceBatch) SortBatches(sizes ...int) IPairedBioSequenceBatch {
buffsize := iterator.BufferSize()
if len(sizes) > 0 {
buffsize = sizes[0]
}
newIter := MakeIPairedBioSequenceBatch(buffsize)
newIter.Add(1)
go func() {
newIter.Wait()
close(newIter.pointer.channel)
}()
next_to_send := 0
received := make(map[int]PairedBioSequenceBatch)
go func() {
for iterator.Next() {
batch := iterator.Get()
if batch.order == next_to_send {
newIter.pointer.channel <- batch
next_to_send++
batch, ok := received[next_to_send]
for ok {
newIter.pointer.channel <- batch
delete(received, next_to_send)
next_to_send++
batch, ok = received[next_to_send]
}
} else {
received[batch.order] = batch
}
}
newIter.Done()
}()
return newIter
}

46
pkg/obiiter/pipe.go Normal file
View File

@ -0,0 +1,46 @@
package obiiter
type Pipeable func(input IBioSequenceBatch) IBioSequenceBatch
func Pipeline(start Pipeable,parts ...Pipeable) Pipeable {
p := func (input IBioSequenceBatch) IBioSequenceBatch {
data := start(input)
for _,part := range parts {
data = part(data)
}
return data
}
return p
}
func (input IBioSequenceBatch) Pipe(start Pipeable, parts ...Pipeable) IBioSequenceBatch {
p := Pipeline(start,parts...)
return p(input)
}
type Teeable func(input IBioSequenceBatch) (IBioSequenceBatch,IBioSequenceBatch)
func (input IBioSequenceBatch) CopyTee() (IBioSequenceBatch,IBioSequenceBatch) {
first := MakeIBioSequenceBatch()
second:= MakeIBioSequenceBatch()
first.Add(1)
go func() {
first.WaitAndClose()
second.Close()
}()
go func() {
for input.Next() {
b:=input.Get()
first.Push(b)
second.Push(b)
}
}()
return first,second
}

View File

@ -1,4 +1,4 @@
package obiseq package obiiter
import ( import (
"os" "os"
@ -37,3 +37,12 @@ func (iterator IBioSequenceBatch) Speed() IBioSequenceBatch {
return newIter return newIter
} }
func SpeedPipe() Pipeable {
f := func(iterator IBioSequenceBatch) IBioSequenceBatch {
return iterator.Speed()
}
return f
}

View File

@ -1,49 +1,25 @@
package obiseq package obiiter
import ( import (
"log" "log"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
) )
type SeqAnnotator func(*BioSequence) type SeqAnnotator func(*obiseq.BioSequence)
type SeqWorker func(*BioSequence) *BioSequence type SeqWorker func(*obiseq.BioSequence) *obiseq.BioSequence
type SeqSliceWorker func(BioSequenceSlice) BioSequenceSlice type SeqSliceWorker func(obiseq.BioSequenceSlice) obiseq.BioSequenceSlice
func AnnotatorToSeqWorker(function SeqAnnotator) SeqWorker { func AnnotatorToSeqWorker(function SeqAnnotator) SeqWorker {
f := func(seq *BioSequence) *BioSequence { f := func(seq *obiseq.BioSequence) *obiseq.BioSequence {
function(seq) function(seq)
return seq return seq
} }
return f return f
} }
func (iterator IBioSequence) MakeIWorker(worker SeqWorker, sizes ...int) IBioSequence {
buffsize := iterator.BufferSize()
if len(sizes) > 0 {
buffsize = sizes[0]
}
newIter := MakeIBioSequence(buffsize)
newIter.Add(1)
go func() {
newIter.Wait()
close(newIter.pointer.channel)
}()
go func() {
for iterator.Next() {
seq := iterator.Get()
seq = worker(seq)
newIter.pointer.channel <- seq
}
newIter.Done()
}()
return newIter
}
func (iterator IBioSequenceBatch) MakeIWorker(worker SeqWorker, sizes ...int) IBioSequenceBatch { func (iterator IBioSequenceBatch) MakeIWorker(worker SeqWorker, sizes ...int) IBioSequenceBatch {
nworkers := 4 nworkers := 4
@ -125,3 +101,48 @@ func (iterator IBioSequenceBatch) MakeISliceWorker(worker SeqSliceWorker, sizes
return newIter return newIter
} }
func (iterator IBioSequence) MakeIWorker(worker SeqWorker, sizes ...int) IBioSequence {
buffsize := iterator.BufferSize()
if len(sizes) > 0 {
buffsize = sizes[0]
}
newIter := MakeIBioSequence(buffsize)
newIter.Add(1)
go func() {
newIter.Wait()
close(newIter.pointer.channel)
}()
go func() {
for iterator.Next() {
seq := iterator.Get()
seq = worker(seq)
newIter.pointer.channel <- seq
}
newIter.Done()
}()
return newIter
}
func WorkerPipe(worker SeqWorker, sizes ...int) Pipeable {
f := func(iterator IBioSequenceBatch) IBioSequenceBatch {
return iterator.MakeIWorker(worker,sizes...)
}
return f
}
func SliceWorkerPipe(worker SeqSliceWorker, sizes ...int) Pipeable {
f := func(iterator IBioSequenceBatch) IBioSequenceBatch {
return iterator.MakeISliceWorker(worker,sizes...)
}
return f
}

View File

@ -1,6 +1,9 @@
package obingslibrary package obingslibrary
import "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
)
type _Options struct { type _Options struct {
discardErrors bool discardErrors bool
@ -167,7 +170,7 @@ func ExtractBarcodeSlice(ngslibrary NGSLibrary,
} }
func ExtractBarcodeSliceWorker(ngslibrary NGSLibrary, func ExtractBarcodeSliceWorker(ngslibrary NGSLibrary,
options ...WithOption) obiseq.SeqSliceWorker { options ...WithOption) obiiter.SeqSliceWorker {
opt := MakeOptions(options) opt := MakeOptions(options)
@ -179,3 +182,5 @@ func ExtractBarcodeSliceWorker(ngslibrary NGSLibrary,
return worker return worker
} }

View File

@ -165,41 +165,3 @@ func (sequences BioSequenceSlice) Merge(na string, statsOn []string) *BioSequenc
return seq return seq
} }
func (iterator IBioSequenceBatch) IMergeSequenceBatch(na string, statsOn []string, sizes ...int) IBioSequenceBatch {
batchsize := 100
buffsize := iterator.BufferSize()
if len(sizes) > 0 {
batchsize = sizes[0]
}
if len(sizes) > 1 {
buffsize = sizes[1]
}
newIter := MakeIBioSequenceBatch(buffsize)
newIter.Add(1)
go func() {
newIter.WaitAndClose()
}()
go func() {
for j := 0; !iterator.Finished(); j++ {
batch := BioSequenceBatch{
slice: MakeBioSequenceSlice(),
order: j}
for i := 0; i < batchsize && iterator.Next(); i++ {
seqs := iterator.Get()
batch.slice = append(batch.slice, seqs.slice.Merge(na, statsOn))
}
if batch.Length() > 0 {
newIter.Push(batch)
}
}
newIter.Done()
}()
return newIter
}

View File

@ -1,5 +1,12 @@
package obiseq package obiseq
import (
"context"
"log"
"github.com/PaesslerAG/gval"
)
type SequencePredicate func(*BioSequence) bool type SequencePredicate func(*BioSequence) bool
func (predicate1 SequencePredicate) And(predicate2 SequencePredicate) SequencePredicate { func (predicate1 SequencePredicate) And(predicate2 SequencePredicate) SequencePredicate {
@ -73,3 +80,33 @@ func IsShorterOrEqualTo(length int) SequencePredicate {
return f return f
} }
func ExrpessionPredicat(expression string) SequencePredicate {
exp, err := gval.Full().NewEvaluable(expression)
if err != nil {
log.Fatalf("Error in the expression : %s", expression)
}
f := func(sequence *BioSequence) bool {
value, err := exp.EvalBool(context.Background(),
map[string]interface{}{
"annot": sequence.Annotations(),
"count": sequence.Count(),
"length": sequence.Length(),
"sequence": sequence,
},
)
if err != nil {
log.Fatalf("Expression '%s' cannot be evaluated on sequence %s",
expression,
sequence.Id())
}
return value
}
return f
}

2
pkg/obiseq/worker.go Normal file
View File

@ -0,0 +1,2 @@
package obiseq

View File

@ -7,8 +7,8 @@ import (
"strings" "strings"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
) )
func _ExpandListOfFiles(check_ext bool, filenames ...string) ([]string, error) { func _ExpandListOfFiles(check_ext bool, filenames ...string) ([]string, error) {
@ -66,9 +66,9 @@ func _ExpandListOfFiles(check_ext bool, filenames ...string) ([]string, error) {
return list_of_files, nil return list_of_files, nil
} }
func ReadBioSequencesBatch(filenames ...string) (obiseq.IBioSequenceBatch, error) { func ReadBioSequencesBatch(filenames ...string) (obiiter.IBioSequenceBatch, error) {
var iterator obiseq.IBioSequenceBatch var iterator obiiter.IBioSequenceBatch
var reader func(string, ...obiformats.WithOption) (obiseq.IBioSequenceBatch, error) var reader func(string, ...obiformats.WithOption) (obiiter.IBioSequenceBatch, error)
opts := make([]obiformats.WithOption, 0, 10) opts := make([]obiformats.WithOption, 0, 10)
@ -106,7 +106,7 @@ func ReadBioSequencesBatch(filenames ...string) (obiseq.IBioSequenceBatch, error
list_of_files, err := _ExpandListOfFiles(false, filenames...) list_of_files, err := _ExpandListOfFiles(false, filenames...)
if err != nil { if err != nil {
return obiseq.NilIBioSequenceBatch, err return obiiter.NilIBioSequenceBatch, err
} }
switch InputFormat() { switch InputFormat() {
@ -121,16 +121,16 @@ func ReadBioSequencesBatch(filenames ...string) (obiseq.IBioSequenceBatch, error
iterator, err = reader(list_of_files[0], opts...) iterator, err = reader(list_of_files[0], opts...)
if err != nil { if err != nil {
return obiseq.NilIBioSequenceBatch, err return obiiter.NilIBioSequenceBatch, err
} }
list_of_files = list_of_files[1:] list_of_files = list_of_files[1:]
others := make([]obiseq.IBioSequenceBatch, 0, len(list_of_files)) others := make([]obiiter.IBioSequenceBatch, 0, len(list_of_files))
for _, fn := range list_of_files { for _, fn := range list_of_files {
r, err := reader(fn, opts...) r, err := reader(fn, opts...)
if err != nil { if err != nil {
return obiseq.NilIBioSequenceBatch, err return obiiter.NilIBioSequenceBatch, err
} }
others = append(others, r) others = append(others, r)
} }
@ -152,7 +152,7 @@ func ReadBioSequencesBatch(filenames ...string) (obiseq.IBioSequenceBatch, error
return iterator, nil return iterator, nil
} }
func ReadBioSequences(filenames ...string) (obiseq.IBioSequence, error) { func ReadBioSequences(filenames ...string) (obiiter.IBioSequence, error) {
ib, err := ReadBioSequencesBatch(filenames...) ib, err := ReadBioSequencesBatch(filenames...)
return ib.SortBatches().IBioSequence(), err return ib.SortBatches().IBioSequence(), err

View File

@ -4,11 +4,11 @@ import (
"log" "log"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
) )
func WriteBioSequences(iterator obiseq.IBioSequence, filenames ...string) error { func WriteBioSequences(iterator obiiter.IBioSequence, filenames ...string) error {
opts := make([]obiformats.WithOption, 0, 10) opts := make([]obiformats.WithOption, 0, 10)
@ -65,10 +65,10 @@ func WriteBioSequences(iterator obiseq.IBioSequence, filenames ...string) error
return nil return nil
} }
func WriteBioSequencesBatch(iterator obiseq.IBioSequenceBatch, func WriteBioSequencesBatch(iterator obiiter.IBioSequenceBatch,
terminalAction bool, filenames ...string) (obiseq.IBioSequenceBatch, error) { terminalAction bool, filenames ...string) (obiiter.IBioSequenceBatch, error) {
var newIter obiseq.IBioSequenceBatch var newIter obiiter.IBioSequenceBatch
opts := make([]obiformats.WithOption, 0, 10) opts := make([]obiformats.WithOption, 0, 10)
@ -119,12 +119,12 @@ func WriteBioSequencesBatch(iterator obiseq.IBioSequenceBatch,
if err != nil { if err != nil {
log.Fatalf("Write file error: %v", err) log.Fatalf("Write file error: %v", err)
return obiseq.NilIBioSequenceBatch, err return obiiter.NilIBioSequenceBatch, err
} }
if terminalAction { if terminalAction {
newIter.Recycle() newIter.Recycle()
return obiseq.NilIBioSequenceBatch, nil return obiiter.NilIBioSequenceBatch, nil
} }
return newIter, nil return newIter, nil

View File

@ -4,12 +4,12 @@ import (
"log" "log"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
) )
func DistributeSequence(sequences obiseq.IBioSequenceBatch) { func DistributeSequence(sequences obiiter.IBioSequenceBatch) {
opts := make([]obiformats.WithOption, 0, 10) opts := make([]obiformats.WithOption, 0, 10)

View File

@ -3,13 +3,14 @@ package obimultiplex
import ( import (
"log" "log"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obingslibrary" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obingslibrary"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
) )
func IExtractBarcodeBatches(iterator obiseq.IBioSequenceBatch) (obiseq.IBioSequenceBatch, error) { func IExtractBarcodeBatches(iterator obiiter.IBioSequenceBatch) (obiiter.IBioSequenceBatch, error) {
opts := make([]obingslibrary.WithOption, 0, 10) opts := make([]obingslibrary.WithOption, 0, 10)
@ -36,7 +37,7 @@ func IExtractBarcodeBatches(iterator obiseq.IBioSequenceBatch) (obiseq.IBioSeque
newIter = newIter.Rebatch(obioptions.CLIBatchSize()) newIter = newIter.Rebatch(obioptions.CLIBatchSize())
} }
var unidentified obiseq.IBioSequenceBatch var unidentified obiiter.IBioSequenceBatch
if CLIUnidentifiedFileName() != "" { if CLIUnidentifiedFileName() != "" {
log.Printf("Unassigned sequences saved in file: %s\n", CLIUnidentifiedFileName()) log.Printf("Unassigned sequences saved in file: %s\n", CLIUnidentifiedFileName())
unidentified, newIter = newIter.DivideOn(obiseq.HasAttribute("demultiplex_error"), unidentified, newIter = newIter.DivideOn(obiseq.HasAttribute("demultiplex_error"),

View File

@ -1,7 +1,7 @@
package obipairing package obipairing
import ( import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
"github.com/DavidGamba/go-getoptions" "github.com/DavidGamba/go-getoptions"
) )
@ -47,15 +47,15 @@ func OptionSet(options *getoptions.GetOpt) {
PairingOptionSet(options) PairingOptionSet(options)
} }
func IBatchPairedSequence() (obiseq.IPairedBioSequenceBatch, error) { func IBatchPairedSequence() (obiiter.IPairedBioSequenceBatch, error) {
forward, err := obiconvert.ReadBioSequencesBatch(_ForwardFiles...) forward, err := obiconvert.ReadBioSequencesBatch(_ForwardFiles...)
if err != nil { if err != nil {
return obiseq.NilIPairedBioSequenceBatch, err return obiiter.NilIPairedBioSequenceBatch, err
} }
reverse, err := obiconvert.ReadBioSequencesBatch(_ReverseFiles...) reverse, err := obiconvert.ReadBioSequencesBatch(_ReverseFiles...)
if err != nil { if err != nil {
return obiseq.NilIPairedBioSequenceBatch, err return obiiter.NilIPairedBioSequenceBatch, err
} }
paired := forward.PairWith(reverse) paired := forward.PairWith(reverse)

View File

@ -7,6 +7,7 @@ import (
"runtime" "runtime"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obialign" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obialign"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
"github.com/schollz/progressbar/v3" "github.com/schollz/progressbar/v3"
) )
@ -202,8 +203,10 @@ func AssemblePESequences(seqA, seqB *obiseq.BioSequence,
// The function returns an iterator over batches of obiseq.Biosequence object. // The function returns an iterator over batches of obiseq.Biosequence object.
// each pair of processed sequences produces one sequence in the result iterator. // each pair of processed sequences produces one sequence in the result iterator.
// //
func IAssemblePESequencesBatch(iterator obiseq.IPairedBioSequenceBatch, func IAssemblePESequencesBatch(iterator obiiter.IPairedBioSequenceBatch,
gap float64, delta, minOverlap int, minIdentity float64, withStats bool, sizes ...int) obiseq.IBioSequenceBatch { gap float64, delta, minOverlap int,
minIdentity float64,
withStats bool, sizes ...int) obiiter.IBioSequenceBatch {
nworkers := runtime.NumCPU() * 3 / 2 nworkers := runtime.NumCPU() * 3 / 2
buffsize := iterator.BufferSize() buffsize := iterator.BufferSize()
@ -216,7 +219,7 @@ func IAssemblePESequencesBatch(iterator obiseq.IPairedBioSequenceBatch,
buffsize = sizes[1] buffsize = sizes[1]
} }
newIter := obiseq.MakeIBioSequenceBatch(buffsize) newIter := obiiter.MakeIBioSequenceBatch(buffsize)
newIter.Add(nworkers) newIter.Add(nworkers)
@ -233,7 +236,7 @@ func IAssemblePESequencesBatch(iterator obiseq.IPairedBioSequenceBatch,
progressbar.OptionShowIts(), progressbar.OptionShowIts(),
progressbar.OptionSetDescription("[Sequence Pairing]")) progressbar.OptionSetDescription("[Sequence Pairing]"))
f := func(iterator obiseq.IPairedBioSequenceBatch, wid int) { f := func(iterator obiiter.IPairedBioSequenceBatch, wid int) {
arena := obialign.MakePEAlignArena(150, 150) arena := obialign.MakePEAlignArena(150, 150)
for iterator.Next() { for iterator.Next() {
@ -249,7 +252,7 @@ func IAssemblePESequencesBatch(iterator obiseq.IPairedBioSequenceBatch,
} }
} }
bar.Add(batch.Length() - processed) bar.Add(batch.Length() - processed)
newIter.Push(obiseq.MakeBioSequenceBatch( newIter.Push(obiiter.MakeBioSequenceBatch(
batch.Order(), batch.Order(),
cons, cons,
)) ))

View File

@ -2,13 +2,13 @@ package obipcr
import ( import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiapat" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiapat"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
) )
// PCR iterates over sequences provided by a obiseq.IBioSequenceBatch // PCR iterates over sequences provided by a obiseq.IBioSequenceBatch
// and returns an other obiseq.IBioSequenceBatch distributing // and returns an other obiseq.IBioSequenceBatch distributing
// obiseq.BioSequenceBatch containing the selected amplicon sequences. // obiseq.BioSequenceBatch containing the selected amplicon sequences.
func PCR(iterator obiseq.IBioSequenceBatch) (obiseq.IBioSequenceBatch, error) { func PCR(iterator obiiter.IBioSequenceBatch) (obiiter.IBioSequenceBatch, error) {
opts := make([]obiapat.WithOption, 0, 10) opts := make([]obiapat.WithOption, 0, 10)

View File

@ -4,11 +4,11 @@ import (
"log" "log"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obichunk" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obichunk"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
) )
func Unique(sequences obiseq.IBioSequenceBatch) obiseq.IBioSequenceBatch { func Unique(sequences obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch {
options := make([]obichunk.WithOption, 0, 30) options := make([]obichunk.WithOption, 0, 30)