mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Some code refactoring, a new version of obiuniq more efficient in memory and a first make file allowing to build obitools
This commit is contained in:
@ -2,7 +2,6 @@ package main
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
||||||
@ -34,20 +33,8 @@ func main() {
|
|||||||
|
|
||||||
_, args, _ := optionParser(os.Args)
|
_, args, _ := optionParser(os.Args)
|
||||||
|
|
||||||
fs, _ := obiconvert.ReadBioSequences(args...)
|
fs, _ := obiconvert.ReadBioSequencesBatch(args...)
|
||||||
nread := 0
|
nread, nvariant, nsymbol := fs.Count(true)
|
||||||
nvariant := 0
|
|
||||||
nsymbol := 0
|
|
||||||
for fs.Next() {
|
|
||||||
s := fs.Get()
|
|
||||||
if s==nil {
|
|
||||||
log.Panicln("Read sequence is nil")
|
|
||||||
}
|
|
||||||
nread += s.Count()
|
|
||||||
nvariant++
|
|
||||||
nsymbol += s.Length()
|
|
||||||
s.Recycle()
|
|
||||||
}
|
|
||||||
|
|
||||||
if obicount.CLIIsPrintingVariantCount() {
|
if obicount.CLIIsPrintingVariantCount() {
|
||||||
fmt.Printf(" %d", nvariant)
|
fmt.Printf(" %d", nvariant)
|
||||||
|
@ -4,6 +4,7 @@ import (
|
|||||||
"log"
|
"log"
|
||||||
|
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -429,7 +430,7 @@ func PCRSlice(sequences obiseq.BioSequenceSlice,
|
|||||||
|
|
||||||
// PCRSliceWorker is a worker function builder which produce
|
// PCRSliceWorker is a worker function builder which produce
|
||||||
// job function usable by the obiseq.MakeISliceWorker function.
|
// job function usable by the obiseq.MakeISliceWorker function.
|
||||||
func PCRSliceWorker(options ...WithOption) obiseq.SeqSliceWorker {
|
func PCRSliceWorker(options ...WithOption) obiiter.SeqSliceWorker {
|
||||||
|
|
||||||
opt := MakeOptions(options)
|
opt := MakeOptions(options)
|
||||||
worker := func(sequences obiseq.BioSequenceSlice) obiseq.BioSequenceSlice {
|
worker := func(sequences obiseq.BioSequenceSlice) obiseq.BioSequenceSlice {
|
||||||
|
@ -8,6 +8,7 @@ import (
|
|||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -33,12 +34,12 @@ func find(root, ext string) []string {
|
|||||||
return a
|
return a
|
||||||
}
|
}
|
||||||
|
|
||||||
func ISequenceChunkOnDisk(iterator obiseq.IBioSequenceBatch,
|
func ISequenceChunkOnDisk(iterator obiiter.IBioSequenceBatch,
|
||||||
classifier *obiseq.BioSequenceClassifier,
|
classifier *obiseq.BioSequenceClassifier,
|
||||||
sizes ...int) (obiseq.IBioSequenceBatch, error) {
|
sizes ...int) (obiiter.IBioSequenceBatch, error) {
|
||||||
dir, err := tempDir()
|
dir, err := tempDir()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return obiseq.NilIBioSequenceBatch, err
|
return obiiter.NilIBioSequenceBatch, err
|
||||||
}
|
}
|
||||||
|
|
||||||
bufferSize := iterator.BufferSize()
|
bufferSize := iterator.BufferSize()
|
||||||
@ -47,7 +48,7 @@ func ISequenceChunkOnDisk(iterator obiseq.IBioSequenceBatch,
|
|||||||
bufferSize = sizes[0]
|
bufferSize = sizes[0]
|
||||||
}
|
}
|
||||||
|
|
||||||
newIter := obiseq.MakeIBioSequenceBatch(bufferSize)
|
newIter := obiiter.MakeIBioSequenceBatch(bufferSize)
|
||||||
|
|
||||||
newIter.Add(1)
|
newIter.Add(1)
|
||||||
|
|
||||||
@ -86,7 +87,7 @@ func ISequenceChunkOnDisk(iterator obiseq.IBioSequenceBatch,
|
|||||||
b.Recycle()
|
b.Recycle()
|
||||||
}
|
}
|
||||||
|
|
||||||
newIter.Push(obiseq.MakeBioSequenceBatch(order, chunck))
|
newIter.Push(obiiter.MakeBioSequenceBatch(order, chunck))
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4,12 +4,13 @@ import (
|
|||||||
"log"
|
"log"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
)
|
)
|
||||||
|
|
||||||
func ISequenceChunk(iterator obiseq.IBioSequenceBatch,
|
func ISequenceChunk(iterator obiiter.IBioSequenceBatch,
|
||||||
classifier *obiseq.BioSequenceClassifier,
|
classifier *obiseq.BioSequenceClassifier,
|
||||||
sizes ...int) (obiseq.IBioSequenceBatch, error) {
|
sizes ...int) (obiiter.IBioSequenceBatch, error) {
|
||||||
|
|
||||||
bufferSize := iterator.BufferSize()
|
bufferSize := iterator.BufferSize()
|
||||||
|
|
||||||
@ -17,7 +18,7 @@ func ISequenceChunk(iterator obiseq.IBioSequenceBatch,
|
|||||||
bufferSize = sizes[0]
|
bufferSize = sizes[0]
|
||||||
}
|
}
|
||||||
|
|
||||||
newIter := obiseq.MakeIBioSequenceBatch(bufferSize)
|
newIter := obiiter.MakeIBioSequenceBatch(bufferSize)
|
||||||
|
|
||||||
newIter.Add(1)
|
newIter.Add(1)
|
||||||
|
|
||||||
@ -64,7 +65,7 @@ func ISequenceChunk(iterator obiseq.IBioSequenceBatch,
|
|||||||
for _, chunck := range chunks {
|
for _, chunck := range chunks {
|
||||||
|
|
||||||
if len(*chunck) > 0 {
|
if len(*chunck) > 0 {
|
||||||
newIter.Push(obiseq.MakeBioSequenceBatch(order, *chunck))
|
newIter.Push(obiiter.MakeBioSequenceBatch(order, *chunck))
|
||||||
order++
|
order++
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5,6 +5,7 @@ import (
|
|||||||
"sort"
|
"sort"
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -54,9 +55,9 @@ func (by _By) Sort(seqs []sSS) {
|
|||||||
// End of the sort interface
|
// End of the sort interface
|
||||||
//
|
//
|
||||||
|
|
||||||
func ISequenceSubChunk(iterator obiseq.IBioSequenceBatch,
|
func ISequenceSubChunk(iterator obiiter.IBioSequenceBatch,
|
||||||
classifier *obiseq.BioSequenceClassifier,
|
classifier *obiseq.BioSequenceClassifier,
|
||||||
sizes ...int) (obiseq.IBioSequenceBatch, error) {
|
sizes ...int) (obiiter.IBioSequenceBatch, error) {
|
||||||
|
|
||||||
bufferSize := iterator.BufferSize()
|
bufferSize := iterator.BufferSize()
|
||||||
nworkers := 4
|
nworkers := 4
|
||||||
@ -69,7 +70,7 @@ func ISequenceSubChunk(iterator obiseq.IBioSequenceBatch,
|
|||||||
bufferSize = sizes[1]
|
bufferSize = sizes[1]
|
||||||
}
|
}
|
||||||
|
|
||||||
newIter := obiseq.MakeIBioSequenceBatch(bufferSize)
|
newIter := obiiter.MakeIBioSequenceBatch(bufferSize)
|
||||||
|
|
||||||
newIter.Add(nworkers)
|
newIter.Add(nworkers)
|
||||||
|
|
||||||
@ -86,7 +87,7 @@ func ISequenceSubChunk(iterator obiseq.IBioSequenceBatch,
|
|||||||
return neworder
|
return neworder
|
||||||
}
|
}
|
||||||
|
|
||||||
ff := func(iterator obiseq.IBioSequenceBatch,
|
ff := func(iterator obiiter.IBioSequenceBatch,
|
||||||
classifier *obiseq.BioSequenceClassifier) {
|
classifier *obiseq.BioSequenceClassifier) {
|
||||||
|
|
||||||
ordered := make([]sSS, 100)
|
ordered := make([]sSS, 100)
|
||||||
@ -121,7 +122,7 @@ func ISequenceSubChunk(iterator obiseq.IBioSequenceBatch,
|
|||||||
ss := obiseq.MakeBioSequenceSlice()
|
ss := obiseq.MakeBioSequenceSlice()
|
||||||
for i, v := range ordered {
|
for i, v := range ordered {
|
||||||
if v.code != last {
|
if v.code != last {
|
||||||
newIter.Push(obiseq.MakeBioSequenceBatch(nextOrder(), ss))
|
newIter.Push(obiiter.MakeBioSequenceBatch(nextOrder(), ss))
|
||||||
ss = obiseq.MakeBioSequenceSlice()
|
ss = obiseq.MakeBioSequenceSlice()
|
||||||
last = v.code
|
last = v.code
|
||||||
}
|
}
|
||||||
@ -131,7 +132,7 @@ func ISequenceSubChunk(iterator obiseq.IBioSequenceBatch,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if len(ss) > 0 {
|
if len(ss) > 0 {
|
||||||
newIter.Push(obiseq.MakeBioSequenceBatch(nextOrder(), ss))
|
newIter.Push(obiiter.MakeBioSequenceBatch(nextOrder(), ss))
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
newIter.Push(batch.Reorder(nextOrder()))
|
newIter.Push(batch.Reorder(nextOrder()))
|
||||||
|
@ -3,26 +3,27 @@ package obichunk
|
|||||||
import (
|
import (
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
)
|
)
|
||||||
|
|
||||||
func IUniqueSequence(iterator obiseq.IBioSequenceBatch,
|
func IUniqueSequence(iterator obiiter.IBioSequenceBatch,
|
||||||
options ...WithOption) (obiseq.IBioSequenceBatch, error) {
|
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
|
||||||
|
|
||||||
var err error
|
var err error
|
||||||
opts := MakeOptions(options)
|
opts := MakeOptions(options)
|
||||||
nworkers := opts.ParallelWorkers()
|
nworkers := opts.ParallelWorkers()
|
||||||
|
|
||||||
iUnique := obiseq.MakeIBioSequenceBatch(opts.BufferSize())
|
iUnique := obiiter.MakeIBioSequenceBatch(opts.BufferSize())
|
||||||
|
|
||||||
if opts.SortOnDisk() {
|
if opts.SortOnDisk() {
|
||||||
nworkers = 1
|
nworkers = 1
|
||||||
iterator, err = ISequenceChunkOnDisk(iterator,
|
iterator, err = ISequenceChunkOnDisk(iterator,
|
||||||
obiseq.HashClassifier(opts.BatchCount()),
|
obiseq.HashClassifier(opts.BatchCount()),
|
||||||
opts.BufferSize())
|
0)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return obiseq.NilIBioSequenceBatch, err
|
return obiiter.NilIBioSequenceBatch, err
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
@ -31,7 +32,7 @@ func IUniqueSequence(iterator obiseq.IBioSequenceBatch,
|
|||||||
opts.BufferSize())
|
opts.BufferSize())
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return obiseq.NilIBioSequenceBatch, err
|
return obiiter.NilIBioSequenceBatch, err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -53,12 +54,12 @@ func IUniqueSequence(iterator obiseq.IBioSequenceBatch,
|
|||||||
return neworder
|
return neworder
|
||||||
}
|
}
|
||||||
|
|
||||||
var ff func(obiseq.IBioSequenceBatch, *obiseq.BioSequenceClassifier, int)
|
var ff func(obiiter.IBioSequenceBatch, *obiseq.BioSequenceClassifier, int)
|
||||||
|
|
||||||
cat := opts.Categories()
|
cat := opts.Categories()
|
||||||
na := opts.NAValue()
|
na := opts.NAValue()
|
||||||
|
|
||||||
ff = func(input obiseq.IBioSequenceBatch,
|
ff = func(input obiiter.IBioSequenceBatch,
|
||||||
classifier *obiseq.BioSequenceClassifier,
|
classifier *obiseq.BioSequenceClassifier,
|
||||||
icat int) {
|
icat int) {
|
||||||
icat--
|
icat--
|
||||||
@ -67,9 +68,9 @@ func IUniqueSequence(iterator obiseq.IBioSequenceBatch,
|
|||||||
1,
|
1,
|
||||||
opts.BufferSize())
|
opts.BufferSize())
|
||||||
|
|
||||||
var next obiseq.IBioSequenceBatch
|
var next obiiter.IBioSequenceBatch
|
||||||
if icat >= 0 {
|
if icat >= 0 {
|
||||||
next = obiseq.MakeIBioSequenceBatch(opts.BufferSize())
|
next = obiiter.MakeIBioSequenceBatch(opts.BufferSize())
|
||||||
|
|
||||||
iUnique.Add(1)
|
iUnique.Add(1)
|
||||||
go ff(next,
|
go ff(next,
|
||||||
|
@ -5,15 +5,15 @@ import (
|
|||||||
"log"
|
"log"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||||
)
|
)
|
||||||
|
|
||||||
type SequenceBatchWriterToFile func(iterator obiseq.IBioSequenceBatch,
|
type SequenceBatchWriterToFile func(iterator obiiter.IBioSequenceBatch,
|
||||||
filename string,
|
filename string,
|
||||||
options ...WithOption) (obiseq.IBioSequenceBatch, error)
|
options ...WithOption) (obiiter.IBioSequenceBatch, error)
|
||||||
|
|
||||||
func WriterDispatcher(prototypename string,
|
func WriterDispatcher(prototypename string,
|
||||||
dispatcher obiseq.IDistribute,
|
dispatcher obiiter.IDistribute,
|
||||||
formater SequenceBatchWriterToFile,
|
formater SequenceBatchWriterToFile,
|
||||||
options ...WithOption) {
|
options ...WithOption) {
|
||||||
|
|
||||||
|
@ -10,6 +10,7 @@ import (
|
|||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -117,7 +118,7 @@ func __read_ecopcr_bioseq__(file *__ecopcr_file__) (*obiseq.BioSequence, error)
|
|||||||
return bseq, nil
|
return bseq, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func ReadEcoPCRBatch(reader io.Reader, options ...WithOption) obiseq.IBioSequenceBatch {
|
func ReadEcoPCRBatch(reader io.Reader, options ...WithOption) obiiter.IBioSequenceBatch {
|
||||||
tag := make([]byte, 11)
|
tag := make([]byte, 11)
|
||||||
n, _ := reader.Read(tag)
|
n, _ := reader.Read(tag)
|
||||||
|
|
||||||
@ -163,7 +164,7 @@ func ReadEcoPCRBatch(reader io.Reader, options ...WithOption) obiseq.IBioSequenc
|
|||||||
|
|
||||||
opt := MakeOptions(options)
|
opt := MakeOptions(options)
|
||||||
|
|
||||||
newIter := obiseq.MakeIBioSequenceBatch(opt.BufferSize())
|
newIter := obiiter.MakeIBioSequenceBatch(opt.BufferSize())
|
||||||
newIter.Add(1)
|
newIter.Add(1)
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
@ -181,7 +182,7 @@ func ReadEcoPCRBatch(reader io.Reader, options ...WithOption) obiseq.IBioSequenc
|
|||||||
slice = append(slice, seq)
|
slice = append(slice, seq)
|
||||||
ii++
|
ii++
|
||||||
if ii >= opt.BatchSize() {
|
if ii >= opt.BatchSize() {
|
||||||
newIter.Push(obiseq.MakeBioSequenceBatch(i, slice))
|
newIter.Push(obiiter.MakeBioSequenceBatch(i, slice))
|
||||||
slice = obiseq.MakeBioSequenceSlice()
|
slice = obiseq.MakeBioSequenceSlice()
|
||||||
i++
|
i++
|
||||||
ii = 0
|
ii = 0
|
||||||
@ -191,7 +192,7 @@ func ReadEcoPCRBatch(reader io.Reader, options ...WithOption) obiseq.IBioSequenc
|
|||||||
}
|
}
|
||||||
|
|
||||||
if len(slice) > 0 {
|
if len(slice) > 0 {
|
||||||
newIter.Push(obiseq.MakeBioSequenceBatch(i, slice))
|
newIter.Push(obiiter.MakeBioSequenceBatch(i, slice))
|
||||||
}
|
}
|
||||||
|
|
||||||
newIter.Done()
|
newIter.Done()
|
||||||
@ -205,12 +206,12 @@ func ReadEcoPCRBatch(reader io.Reader, options ...WithOption) obiseq.IBioSequenc
|
|||||||
return newIter
|
return newIter
|
||||||
}
|
}
|
||||||
|
|
||||||
func ReadEcoPCR(reader io.Reader, options ...WithOption) obiseq.IBioSequence {
|
func ReadEcoPCR(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
|
||||||
ib := ReadEcoPCRBatch(reader, options...)
|
ib := ReadEcoPCRBatch(reader, options...)
|
||||||
return ib.SortBatches().IBioSequence()
|
return ib.SortBatches().IBioSequence()
|
||||||
}
|
}
|
||||||
|
|
||||||
func ReadEcoPCRBatchFromFile(filename string, options ...WithOption) (obiseq.IBioSequenceBatch, error) {
|
func ReadEcoPCRBatchFromFile(filename string, options ...WithOption) (obiiter.IBioSequenceBatch, error) {
|
||||||
var reader io.Reader
|
var reader io.Reader
|
||||||
var greader io.Reader
|
var greader io.Reader
|
||||||
var err error
|
var err error
|
||||||
@ -218,7 +219,7 @@ func ReadEcoPCRBatchFromFile(filename string, options ...WithOption) (obiseq.IBi
|
|||||||
reader, err = os.Open(filename)
|
reader, err = os.Open(filename)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("open file error: %+v", err)
|
log.Printf("open file error: %+v", err)
|
||||||
return obiseq.NilIBioSequenceBatch, err
|
return obiiter.NilIBioSequenceBatch, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test if the flux is compressed by gzip
|
// Test if the flux is compressed by gzip
|
||||||
@ -230,7 +231,7 @@ func ReadEcoPCRBatchFromFile(filename string, options ...WithOption) (obiseq.IBi
|
|||||||
return ReadEcoPCRBatch(reader, options...), nil
|
return ReadEcoPCRBatch(reader, options...), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func ReadEcoPCRFromFile(filename string, options ...WithOption) (obiseq.IBioSequence, error) {
|
func ReadEcoPCRFromFile(filename string, options ...WithOption) (obiiter.IBioSequence, error) {
|
||||||
ib, err := ReadEcoPCRBatchFromFile(filename, options...)
|
ib, err := ReadEcoPCRBatchFromFile(filename, options...)
|
||||||
return ib.SortBatches().IBioSequence(), err
|
return ib.SortBatches().IBioSequence(), err
|
||||||
|
|
||||||
|
@ -10,6 +10,7 @@ import (
|
|||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -80,7 +81,7 @@ func _EndOfLastEntry(buff []byte) int {
|
|||||||
return -1
|
return -1
|
||||||
}
|
}
|
||||||
|
|
||||||
func _ParseEmblFile(input <-chan _FileChunk, out obiseq.IBioSequenceBatch) {
|
func _ParseEmblFile(input <-chan _FileChunk, out obiiter.IBioSequenceBatch) {
|
||||||
|
|
||||||
for chunks := range input {
|
for chunks := range input {
|
||||||
scanner := bufio.NewScanner(chunks.raw)
|
scanner := bufio.NewScanner(chunks.raw)
|
||||||
@ -139,7 +140,7 @@ func _ParseEmblFile(input <-chan _FileChunk, out obiseq.IBioSequenceBatch) {
|
|||||||
seqBytes = new(bytes.Buffer)
|
seqBytes = new(bytes.Buffer)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
out.Push(obiseq.MakeBioSequenceBatch(order, sequences))
|
out.Push(obiiter.MakeBioSequenceBatch(order, sequences))
|
||||||
}
|
}
|
||||||
|
|
||||||
out.Done()
|
out.Done()
|
||||||
@ -176,11 +177,11 @@ func _ReadFlatFileChunk(reader io.Reader, readers chan _FileChunk) {
|
|||||||
|
|
||||||
// 6 5 43 2 1
|
// 6 5 43 2 1
|
||||||
// <CR>?<LF>//<CR>?<LF>
|
// <CR>?<LF>//<CR>?<LF>
|
||||||
func ReadEMBLBatch(reader io.Reader, options ...WithOption) obiseq.IBioSequenceBatch {
|
func ReadEMBLBatch(reader io.Reader, options ...WithOption) obiiter.IBioSequenceBatch {
|
||||||
opt := MakeOptions(options)
|
opt := MakeOptions(options)
|
||||||
entry_channel := make(chan _FileChunk, opt.BufferSize())
|
entry_channel := make(chan _FileChunk, opt.BufferSize())
|
||||||
|
|
||||||
newIter := obiseq.MakeIBioSequenceBatch(opt.BufferSize())
|
newIter := obiiter.MakeIBioSequenceBatch(opt.BufferSize())
|
||||||
|
|
||||||
nworkers := opt.ParallelWorkers()
|
nworkers := opt.ParallelWorkers()
|
||||||
newIter.Add(nworkers)
|
newIter.Add(nworkers)
|
||||||
@ -199,12 +200,12 @@ func ReadEMBLBatch(reader io.Reader, options ...WithOption) obiseq.IBioSequenceB
|
|||||||
return newIter
|
return newIter
|
||||||
}
|
}
|
||||||
|
|
||||||
func ReadEMBL(reader io.Reader, options ...WithOption) obiseq.IBioSequence {
|
func ReadEMBL(reader io.Reader, options ...WithOption) obiiter.IBioSequence {
|
||||||
ib := ReadEMBLBatch(reader, options...)
|
ib := ReadEMBLBatch(reader, options...)
|
||||||
return ib.SortBatches().IBioSequence()
|
return ib.SortBatches().IBioSequence()
|
||||||
}
|
}
|
||||||
|
|
||||||
func ReadEMBLBatchFromFile(filename string, options ...WithOption) (obiseq.IBioSequenceBatch, error) {
|
func ReadEMBLBatchFromFile(filename string, options ...WithOption) (obiiter.IBioSequenceBatch, error) {
|
||||||
var reader io.Reader
|
var reader io.Reader
|
||||||
var greader io.Reader
|
var greader io.Reader
|
||||||
var err error
|
var err error
|
||||||
@ -212,7 +213,7 @@ func ReadEMBLBatchFromFile(filename string, options ...WithOption) (obiseq.IBioS
|
|||||||
reader, err = os.Open(filename)
|
reader, err = os.Open(filename)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("open file error: %+v", err)
|
log.Printf("open file error: %+v", err)
|
||||||
return obiseq.NilIBioSequenceBatch, err
|
return obiiter.NilIBioSequenceBatch, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test if the flux is compressed by gzip
|
// Test if the flux is compressed by gzip
|
||||||
@ -224,7 +225,7 @@ func ReadEMBLBatchFromFile(filename string, options ...WithOption) (obiseq.IBioS
|
|||||||
return ReadEMBLBatch(reader, options...), nil
|
return ReadEMBLBatch(reader, options...), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func ReadEMBLFromFile(filename string, options ...WithOption) (obiseq.IBioSequence, error) {
|
func ReadEMBLFromFile(filename string, options ...WithOption) (obiiter.IBioSequence, error) {
|
||||||
ib, err := ReadEMBLBatchFromFile(filename, options...)
|
ib, err := ReadEMBLBatchFromFile(filename, options...)
|
||||||
return ib.SortBatches().IBioSequence(), err
|
return ib.SortBatches().IBioSequence(), err
|
||||||
|
|
||||||
|
@ -3,6 +3,7 @@ package obiformats
|
|||||||
import (
|
import (
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -14,14 +15,16 @@ func ParseGuessedFastSeqHeader(sequence *obiseq.BioSequence) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func IParseFastSeqHeaderBatch(iterator obiseq.IBioSequenceBatch, options ...WithOption) obiseq.IBioSequenceBatch {
|
func IParseFastSeqHeaderBatch(iterator obiiter.IBioSequenceBatch,
|
||||||
|
options ...WithOption) obiiter.IBioSequenceBatch {
|
||||||
opt := MakeOptions(options)
|
opt := MakeOptions(options)
|
||||||
return iterator.MakeIWorker(obiseq.AnnotatorToSeqWorker(opt.ParseFastSeqHeader()),
|
return iterator.MakeIWorker(obiiter.AnnotatorToSeqWorker(opt.ParseFastSeqHeader()),
|
||||||
opt.ParallelWorkers(),
|
opt.ParallelWorkers(),
|
||||||
opt.BufferSize())
|
opt.BufferSize())
|
||||||
}
|
}
|
||||||
|
|
||||||
func IParseFastSeqHeader(iterator obiseq.IBioSequence, options ...WithOption) obiseq.IBioSequence {
|
func IParseFastSeqHeader(iterator obiiter.IBioSequence,
|
||||||
|
options ...WithOption) obiiter.IBioSequence {
|
||||||
opt := MakeOptions(options)
|
opt := MakeOptions(options)
|
||||||
|
|
||||||
return IParseFastSeqHeaderBatch(iterator.IBioSequenceBatch(opt.BatchSize(),
|
return IParseFastSeqHeaderBatch(iterator.IBioSequenceBatch(opt.BatchSize(),
|
||||||
|
@ -13,11 +13,12 @@ import (
|
|||||||
"unsafe"
|
"unsafe"
|
||||||
|
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/cutils"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/cutils"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
)
|
)
|
||||||
|
|
||||||
func _FastseqReader(seqfile C.fast_kseq_p,
|
func _FastseqReader(seqfile C.fast_kseq_p,
|
||||||
iterator obiseq.IBioSequenceBatch,
|
iterator obiiter.IBioSequenceBatch,
|
||||||
batch_size int) {
|
batch_size int) {
|
||||||
var comment string
|
var comment string
|
||||||
i := 0
|
i := 0
|
||||||
@ -63,7 +64,7 @@ func _FastseqReader(seqfile C.fast_kseq_p,
|
|||||||
// log.Printf("\n==> Pushing sequence batch\n")
|
// log.Printf("\n==> Pushing sequence batch\n")
|
||||||
// start := time.Now()
|
// start := time.Now()
|
||||||
|
|
||||||
iterator.Push(obiseq.MakeBioSequenceBatch(i, slice))
|
iterator.Push(obiiter.MakeBioSequenceBatch(i, slice))
|
||||||
// elapsed := time.Since(start)
|
// elapsed := time.Since(start)
|
||||||
// log.Printf("\n==>sequences pushed after %s\n", elapsed)
|
// log.Printf("\n==>sequences pushed after %s\n", elapsed)
|
||||||
|
|
||||||
@ -73,13 +74,13 @@ func _FastseqReader(seqfile C.fast_kseq_p,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if len(slice) > 0 {
|
if len(slice) > 0 {
|
||||||
iterator.Push(obiseq.MakeBioSequenceBatch(i, slice))
|
iterator.Push(obiiter.MakeBioSequenceBatch(i, slice))
|
||||||
}
|
}
|
||||||
iterator.Done()
|
iterator.Done()
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func ReadFastSeqBatchFromFile(filename string, options ...WithOption) (obiseq.IBioSequenceBatch, error) {
|
func ReadFastSeqBatchFromFile(filename string, options ...WithOption) (obiiter.IBioSequenceBatch, error) {
|
||||||
opt := MakeOptions(options)
|
opt := MakeOptions(options)
|
||||||
|
|
||||||
name := C.CString(filename)
|
name := C.CString(filename)
|
||||||
@ -92,7 +93,7 @@ func ReadFastSeqBatchFromFile(filename string, options ...WithOption) (obiseq.IB
|
|||||||
|
|
||||||
if pointer == nil {
|
if pointer == nil {
|
||||||
err = fmt.Errorf("cannot open file %s", filename)
|
err = fmt.Errorf("cannot open file %s", filename)
|
||||||
return obiseq.NilIBioSequenceBatch, err
|
return obiiter.NilIBioSequenceBatch, err
|
||||||
}
|
}
|
||||||
|
|
||||||
size := int64(-1)
|
size := int64(-1)
|
||||||
@ -104,7 +105,7 @@ func ReadFastSeqBatchFromFile(filename string, options ...WithOption) (obiseq.IB
|
|||||||
size = -1
|
size = -1
|
||||||
}
|
}
|
||||||
|
|
||||||
newIter := obiseq.MakeIBioSequenceBatch(opt.BufferSize())
|
newIter := obiiter.MakeIBioSequenceBatch(opt.BufferSize())
|
||||||
newIter.Add(1)
|
newIter.Add(1)
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
@ -124,14 +125,14 @@ func ReadFastSeqBatchFromFile(filename string, options ...WithOption) (obiseq.IB
|
|||||||
return newIter, err
|
return newIter, err
|
||||||
}
|
}
|
||||||
|
|
||||||
func ReadFastSeqFromFile(filename string, options ...WithOption) (obiseq.IBioSequence, error) {
|
func ReadFastSeqFromFile(filename string, options ...WithOption) (obiiter.IBioSequence, error) {
|
||||||
ib, err := ReadFastSeqBatchFromFile(filename, options...)
|
ib, err := ReadFastSeqBatchFromFile(filename, options...)
|
||||||
return ib.SortBatches().IBioSequence(), err
|
return ib.SortBatches().IBioSequence(), err
|
||||||
}
|
}
|
||||||
|
|
||||||
func ReadFastSeqBatchFromStdin(options ...WithOption) obiseq.IBioSequenceBatch {
|
func ReadFastSeqBatchFromStdin(options ...WithOption) obiiter.IBioSequenceBatch {
|
||||||
opt := MakeOptions(options)
|
opt := MakeOptions(options)
|
||||||
newIter := obiseq.MakeIBioSequenceBatch(opt.BufferSize())
|
newIter := obiiter.MakeIBioSequenceBatch(opt.BufferSize())
|
||||||
|
|
||||||
newIter.Add(1)
|
newIter.Add(1)
|
||||||
|
|
||||||
@ -139,12 +140,13 @@ func ReadFastSeqBatchFromStdin(options ...WithOption) obiseq.IBioSequenceBatch {
|
|||||||
newIter.WaitAndClose()
|
newIter.WaitAndClose()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
go _FastseqReader(C.open_fast_sek_stdin(C.int32_t(opt.QualityShift())), newIter, opt.BatchSize())
|
go _FastseqReader(C.open_fast_sek_stdin(C.int32_t(opt.QualityShift())),
|
||||||
|
newIter, opt.BatchSize())
|
||||||
|
|
||||||
return newIter
|
return newIter
|
||||||
}
|
}
|
||||||
|
|
||||||
func ReadFastSeqFromStdin(options ...WithOption) obiseq.IBioSequence {
|
func ReadFastSeqFromStdin(options ...WithOption) obiiter.IBioSequence {
|
||||||
ib := ReadFastSeqBatchFromStdin(options...)
|
ib := ReadFastSeqBatchFromStdin(options...)
|
||||||
return ib.SortBatches().IBioSequence()
|
return ib.SortBatches().IBioSequence()
|
||||||
}
|
}
|
||||||
|
@ -8,6 +8,7 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -21,7 +22,7 @@ func min(x, y int) int {
|
|||||||
func FormatFasta(seq *obiseq.BioSequence, formater FormatHeader) string {
|
func FormatFasta(seq *obiseq.BioSequence, formater FormatHeader) string {
|
||||||
var fragments strings.Builder
|
var fragments strings.Builder
|
||||||
|
|
||||||
if seq==nil {
|
if seq == nil {
|
||||||
log.Panicln("try to format a nil BioSequence")
|
log.Panicln("try to format a nil BioSequence")
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -44,7 +45,7 @@ func FormatFasta(seq *obiseq.BioSequence, formater FormatHeader) string {
|
|||||||
folded)
|
folded)
|
||||||
}
|
}
|
||||||
|
|
||||||
func FormatFastaBatch(batch obiseq.BioSequenceBatch, formater FormatHeader) []byte {
|
func FormatFastaBatch(batch obiiter.BioSequenceBatch, formater FormatHeader) []byte {
|
||||||
var bs bytes.Buffer
|
var bs bytes.Buffer
|
||||||
for _, seq := range batch.Slice() {
|
for _, seq := range batch.Slice() {
|
||||||
bs.WriteString(FormatFasta(seq, formater))
|
bs.WriteString(FormatFasta(seq, formater))
|
||||||
@ -53,7 +54,7 @@ func FormatFastaBatch(batch obiseq.BioSequenceBatch, formater FormatHeader) []by
|
|||||||
return bs.Bytes()
|
return bs.Bytes()
|
||||||
}
|
}
|
||||||
|
|
||||||
func WriteFasta(iterator obiseq.IBioSequence, file io.Writer, options ...WithOption) error {
|
func WriteFasta(iterator obiiter.IBioSequence, file io.Writer, options ...WithOption) error {
|
||||||
opt := MakeOptions(options)
|
opt := MakeOptions(options)
|
||||||
|
|
||||||
header_format := opt.FormatFastSeqHeader()
|
header_format := opt.FormatFastSeqHeader()
|
||||||
@ -73,7 +74,7 @@ func WriteFasta(iterator obiseq.IBioSequence, file io.Writer, options ...WithOpt
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func WriteFastaToFile(iterator obiseq.IBioSequence,
|
func WriteFastaToFile(iterator obiiter.IBioSequence,
|
||||||
filename string,
|
filename string,
|
||||||
options ...WithOption) error {
|
options ...WithOption) error {
|
||||||
|
|
||||||
@ -89,16 +90,18 @@ func WriteFastaToFile(iterator obiseq.IBioSequence,
|
|||||||
return WriteFasta(iterator, file, options...)
|
return WriteFasta(iterator, file, options...)
|
||||||
}
|
}
|
||||||
|
|
||||||
func WriteFastaToStdout(iterator obiseq.IBioSequence, options ...WithOption) error {
|
func WriteFastaToStdout(iterator obiiter.IBioSequence, options ...WithOption) error {
|
||||||
options = append(options, OptionDontCloseFile())
|
options = append(options, OptionDontCloseFile())
|
||||||
return WriteFasta(iterator, os.Stdout, options...)
|
return WriteFasta(iterator, os.Stdout, options...)
|
||||||
}
|
}
|
||||||
|
|
||||||
func WriteFastaBatch(iterator obiseq.IBioSequenceBatch, file io.Writer, options ...WithOption) (obiseq.IBioSequenceBatch, error) {
|
func WriteFastaBatch(iterator obiiter.IBioSequenceBatch,
|
||||||
|
file io.Writer,
|
||||||
|
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
|
||||||
opt := MakeOptions(options)
|
opt := MakeOptions(options)
|
||||||
|
|
||||||
buffsize := iterator.BufferSize()
|
buffsize := iterator.BufferSize()
|
||||||
newIter := obiseq.MakeIBioSequenceBatch(buffsize)
|
newIter := obiiter.MakeIBioSequenceBatch(buffsize)
|
||||||
|
|
||||||
nwriters := opt.ParallelWorkers()
|
nwriters := opt.ParallelWorkers()
|
||||||
|
|
||||||
@ -113,7 +116,7 @@ func WriteFastaBatch(iterator obiseq.IBioSequenceBatch, file io.Writer, options
|
|||||||
close(chunkchan)
|
close(chunkchan)
|
||||||
}()
|
}()
|
||||||
|
|
||||||
ff := func(iterator obiseq.IBioSequenceBatch) {
|
ff := func(iterator obiiter.IBioSequenceBatch) {
|
||||||
for iterator.Next() {
|
for iterator.Next() {
|
||||||
batch := iterator.Get()
|
batch := iterator.Get()
|
||||||
chunkchan <- FileChunck{
|
chunkchan <- FileChunck{
|
||||||
@ -164,20 +167,21 @@ func WriteFastaBatch(iterator obiseq.IBioSequenceBatch, file io.Writer, options
|
|||||||
return newIter, nil
|
return newIter, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func WriteFastaBatchToStdout(iterator obiseq.IBioSequenceBatch, options ...WithOption) (obiseq.IBioSequenceBatch, error) {
|
func WriteFastaBatchToStdout(iterator obiiter.IBioSequenceBatch,
|
||||||
|
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
|
||||||
options = append(options, OptionDontCloseFile())
|
options = append(options, OptionDontCloseFile())
|
||||||
return WriteFastaBatch(iterator, os.Stdout, options...)
|
return WriteFastaBatch(iterator, os.Stdout, options...)
|
||||||
}
|
}
|
||||||
|
|
||||||
func WriteFastaBatchToFile(iterator obiseq.IBioSequenceBatch,
|
func WriteFastaBatchToFile(iterator obiiter.IBioSequenceBatch,
|
||||||
filename string,
|
filename string,
|
||||||
options ...WithOption) (obiseq.IBioSequenceBatch, error) {
|
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
|
||||||
|
|
||||||
file, err := os.Create(filename)
|
file, err := os.Create(filename)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("open file error: %v", err)
|
log.Fatalf("open file error: %v", err)
|
||||||
return obiseq.NilIBioSequenceBatch, err
|
return obiiter.NilIBioSequenceBatch, err
|
||||||
}
|
}
|
||||||
|
|
||||||
options = append(options, OptionCloseFile())
|
options = append(options, OptionCloseFile())
|
||||||
|
@ -8,6 +8,7 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -34,7 +35,7 @@ func FormatFastq(seq *obiseq.BioSequence, quality_shift int, formater FormatHead
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
func FormatFastqBatch(batch obiseq.BioSequenceBatch, quality_shift int,
|
func FormatFastqBatch(batch obiiter.BioSequenceBatch, quality_shift int,
|
||||||
formater FormatHeader) []byte {
|
formater FormatHeader) []byte {
|
||||||
var bs bytes.Buffer
|
var bs bytes.Buffer
|
||||||
for _, seq := range batch.Slice() {
|
for _, seq := range batch.Slice() {
|
||||||
@ -44,7 +45,7 @@ func FormatFastqBatch(batch obiseq.BioSequenceBatch, quality_shift int,
|
|||||||
return bs.Bytes()
|
return bs.Bytes()
|
||||||
}
|
}
|
||||||
|
|
||||||
func WriteFastq(iterator obiseq.IBioSequence, file io.Writer, options ...WithOption) error {
|
func WriteFastq(iterator obiiter.IBioSequence, file io.Writer, options ...WithOption) error {
|
||||||
opt := MakeOptions(options)
|
opt := MakeOptions(options)
|
||||||
|
|
||||||
header_format := opt.FormatFastSeqHeader()
|
header_format := opt.FormatFastSeqHeader()
|
||||||
@ -65,7 +66,7 @@ func WriteFastq(iterator obiseq.IBioSequence, file io.Writer, options ...WithOpt
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func WriteFastqToFile(iterator obiseq.IBioSequence,
|
func WriteFastqToFile(iterator obiiter.IBioSequence,
|
||||||
filename string,
|
filename string,
|
||||||
options ...WithOption) error {
|
options ...WithOption) error {
|
||||||
|
|
||||||
@ -80,7 +81,7 @@ func WriteFastqToFile(iterator obiseq.IBioSequence,
|
|||||||
return WriteFastq(iterator, file, options...)
|
return WriteFastq(iterator, file, options...)
|
||||||
}
|
}
|
||||||
|
|
||||||
func WriteFastqToStdout(iterator obiseq.IBioSequence, options ...WithOption) error {
|
func WriteFastqToStdout(iterator obiiter.IBioSequence, options ...WithOption) error {
|
||||||
options = append(options, OptionDontCloseFile())
|
options = append(options, OptionDontCloseFile())
|
||||||
return WriteFastq(iterator, os.Stdout, options...)
|
return WriteFastq(iterator, os.Stdout, options...)
|
||||||
}
|
}
|
||||||
@ -90,11 +91,13 @@ type FileChunck struct {
|
|||||||
order int
|
order int
|
||||||
}
|
}
|
||||||
|
|
||||||
func WriteFastqBatch(iterator obiseq.IBioSequenceBatch, file io.Writer, options ...WithOption) (obiseq.IBioSequenceBatch, error) {
|
func WriteFastqBatch(iterator obiiter.IBioSequenceBatch,
|
||||||
|
file io.Writer,
|
||||||
|
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
|
||||||
opt := MakeOptions(options)
|
opt := MakeOptions(options)
|
||||||
|
|
||||||
buffsize := iterator.BufferSize()
|
buffsize := iterator.BufferSize()
|
||||||
newIter := obiseq.MakeIBioSequenceBatch(buffsize)
|
newIter := obiiter.MakeIBioSequenceBatch(buffsize)
|
||||||
|
|
||||||
nwriters := opt.ParallelWorkers()
|
nwriters := opt.ParallelWorkers()
|
||||||
|
|
||||||
@ -113,7 +116,7 @@ func WriteFastqBatch(iterator obiseq.IBioSequenceBatch, file io.Writer, options
|
|||||||
close(chunkchan)
|
close(chunkchan)
|
||||||
}()
|
}()
|
||||||
|
|
||||||
ff := func(iterator obiseq.IBioSequenceBatch) {
|
ff := func(iterator obiiter.IBioSequenceBatch) {
|
||||||
for iterator.Next() {
|
for iterator.Next() {
|
||||||
batch := iterator.Get()
|
batch := iterator.Get()
|
||||||
chunk := FileChunck{
|
chunk := FileChunck{
|
||||||
@ -165,20 +168,21 @@ func WriteFastqBatch(iterator obiseq.IBioSequenceBatch, file io.Writer, options
|
|||||||
return newIter, nil
|
return newIter, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func WriteFastqBatchToStdout(iterator obiseq.IBioSequenceBatch, options ...WithOption) (obiseq.IBioSequenceBatch, error) {
|
func WriteFastqBatchToStdout(iterator obiiter.IBioSequenceBatch,
|
||||||
|
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
|
||||||
options = append(options, OptionDontCloseFile())
|
options = append(options, OptionDontCloseFile())
|
||||||
return WriteFastqBatch(iterator, os.Stdout, options...)
|
return WriteFastqBatch(iterator, os.Stdout, options...)
|
||||||
}
|
}
|
||||||
|
|
||||||
func WriteFastqBatchToFile(iterator obiseq.IBioSequenceBatch,
|
func WriteFastqBatchToFile(iterator obiiter.IBioSequenceBatch,
|
||||||
filename string,
|
filename string,
|
||||||
options ...WithOption) (obiseq.IBioSequenceBatch, error) {
|
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
|
||||||
|
|
||||||
file, err := os.Create(filename)
|
file, err := os.Create(filename)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("open file error: %v", err)
|
log.Fatalf("open file error: %v", err)
|
||||||
return obiseq.NilIBioSequenceBatch, err
|
return obiiter.NilIBioSequenceBatch, err
|
||||||
}
|
}
|
||||||
|
|
||||||
options = append(options, OptionCloseFile())
|
options = append(options, OptionCloseFile())
|
||||||
|
@ -1,11 +1,12 @@
|
|||||||
package obiformats
|
package obiformats
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
)
|
)
|
||||||
|
|
||||||
type __options__ struct {
|
type __options__ struct {
|
||||||
fastseq_header_parser obiseq.SeqAnnotator
|
fastseq_header_parser obiiter.SeqAnnotator
|
||||||
fastseq_header_writer func(*obiseq.BioSequence) string
|
fastseq_header_writer func(*obiseq.BioSequence) string
|
||||||
with_progress_bar bool
|
with_progress_bar bool
|
||||||
buffer_size int
|
buffer_size int
|
||||||
@ -58,7 +59,7 @@ func (opt Options) ParallelWorkers() int {
|
|||||||
return opt.pointer.parallel_workers
|
return opt.pointer.parallel_workers
|
||||||
}
|
}
|
||||||
|
|
||||||
func (opt Options) ParseFastSeqHeader() obiseq.SeqAnnotator {
|
func (opt Options) ParseFastSeqHeader() obiiter.SeqAnnotator {
|
||||||
return opt.pointer.fastseq_header_parser
|
return opt.pointer.fastseq_header_parser
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -123,7 +124,7 @@ func OptionsQualitySolexa() WithOption {
|
|||||||
return OptionsQualityShift(64)
|
return OptionsQualityShift(64)
|
||||||
}
|
}
|
||||||
|
|
||||||
func OptionsFastSeqHeaderParser(parser obiseq.SeqAnnotator) WithOption {
|
func OptionsFastSeqHeaderParser(parser obiiter.SeqAnnotator) WithOption {
|
||||||
f := WithOption(func(opt Options) {
|
f := WithOption(func(opt Options) {
|
||||||
opt.pointer.fastseq_header_parser = parser
|
opt.pointer.fastseq_header_parser = parser
|
||||||
})
|
})
|
||||||
|
@ -8,7 +8,7 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||||
)
|
)
|
||||||
|
|
||||||
func GuessSeqFileType(firstline string) string {
|
func GuessSeqFileType(firstline string) string {
|
||||||
@ -36,7 +36,8 @@ func GuessSeqFileType(firstline string) string {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func ReadSequencesBatchFromFile(filename string, options ...WithOption) (obiseq.IBioSequenceBatch, error) {
|
func ReadSequencesBatchFromFile(filename string,
|
||||||
|
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
|
||||||
var file *os.File
|
var file *os.File
|
||||||
var reader io.Reader
|
var reader io.Reader
|
||||||
var greader io.Reader
|
var greader io.Reader
|
||||||
@ -46,7 +47,7 @@ func ReadSequencesBatchFromFile(filename string, options ...WithOption) (obiseq.
|
|||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("open file error: %v", err)
|
log.Fatalf("open file error: %v", err)
|
||||||
return obiseq.NilIBioSequenceBatch, err
|
return obiiter.NilIBioSequenceBatch, err
|
||||||
}
|
}
|
||||||
|
|
||||||
reader = file
|
reader = file
|
||||||
@ -65,7 +66,7 @@ func ReadSequencesBatchFromFile(filename string, options ...WithOption) (obiseq.
|
|||||||
tag, _ := breader.Peek(30)
|
tag, _ := breader.Peek(30)
|
||||||
|
|
||||||
if len(tag) < 30 {
|
if len(tag) < 30 {
|
||||||
newIter := obiseq.MakeIBioSequenceBatch()
|
newIter := obiiter.MakeIBioSequenceBatch()
|
||||||
newIter.Close()
|
newIter.Close()
|
||||||
return newIter, nil
|
return newIter, nil
|
||||||
}
|
}
|
||||||
@ -89,10 +90,11 @@ func ReadSequencesBatchFromFile(filename string, options ...WithOption) (obiseq.
|
|||||||
filename, filetype)
|
filename, filetype)
|
||||||
}
|
}
|
||||||
|
|
||||||
return obiseq.NilIBioSequenceBatch, nil
|
return obiiter.NilIBioSequenceBatch, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func ReadSequencesFromFile(filename string, options ...WithOption) (obiseq.IBioSequence, error) {
|
func ReadSequencesFromFile(filename string,
|
||||||
|
options ...WithOption) (obiiter.IBioSequence, error) {
|
||||||
ib, err := ReadSequencesBatchFromFile(filename, options...)
|
ib, err := ReadSequencesBatchFromFile(filename, options...)
|
||||||
return ib.SortBatches().IBioSequence(), err
|
return ib.SortBatches().IBioSequence(), err
|
||||||
|
|
||||||
|
@ -6,10 +6,10 @@ import (
|
|||||||
"log"
|
"log"
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||||
)
|
)
|
||||||
|
|
||||||
func WriteSequences(iterator obiseq.IBioSequence,
|
func WriteSequences(iterator obiiter.IBioSequence,
|
||||||
file io.Writer,
|
file io.Writer,
|
||||||
options ...WithOption) error {
|
options ...WithOption) error {
|
||||||
|
|
||||||
@ -34,7 +34,7 @@ func WriteSequences(iterator obiseq.IBioSequence,
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func WriteSequencesToFile(iterator obiseq.IBioSequence,
|
func WriteSequencesToFile(iterator obiiter.IBioSequence,
|
||||||
filename string,
|
filename string,
|
||||||
options ...WithOption) error {
|
options ...WithOption) error {
|
||||||
|
|
||||||
@ -48,13 +48,13 @@ func WriteSequencesToFile(iterator obiseq.IBioSequence,
|
|||||||
return WriteSequences(iterator, file, options...)
|
return WriteSequences(iterator, file, options...)
|
||||||
}
|
}
|
||||||
|
|
||||||
func WriteSequencesToStdout(iterator obiseq.IBioSequence, options ...WithOption) error {
|
func WriteSequencesToStdout(iterator obiiter.IBioSequence, options ...WithOption) error {
|
||||||
return WriteSequences(iterator, os.Stdout, options...)
|
return WriteSequences(iterator, os.Stdout, options...)
|
||||||
}
|
}
|
||||||
|
|
||||||
func WriteSequenceBatch(iterator obiseq.IBioSequenceBatch,
|
func WriteSequenceBatch(iterator obiiter.IBioSequenceBatch,
|
||||||
file io.Writer,
|
file io.Writer,
|
||||||
options ...WithOption) (obiseq.IBioSequenceBatch, error) {
|
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
|
||||||
|
|
||||||
iterator = iterator.Rebatch(1000)
|
iterator = iterator.Rebatch(1000)
|
||||||
|
|
||||||
@ -64,7 +64,7 @@ func WriteSequenceBatch(iterator obiseq.IBioSequenceBatch,
|
|||||||
batch := iterator.Get()
|
batch := iterator.Get()
|
||||||
iterator.PushBack()
|
iterator.PushBack()
|
||||||
|
|
||||||
var newIter obiseq.IBioSequenceBatch
|
var newIter obiiter.IBioSequenceBatch
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
if len(batch.Slice()) > 0 {
|
if len(batch.Slice()) > 0 {
|
||||||
@ -84,24 +84,24 @@ func WriteSequenceBatch(iterator obiseq.IBioSequenceBatch,
|
|||||||
return iterator, nil
|
return iterator, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return obiseq.NilIBioSequenceBatch, fmt.Errorf("input iterator not ready")
|
return obiiter.NilIBioSequenceBatch, fmt.Errorf("input iterator not ready")
|
||||||
}
|
}
|
||||||
|
|
||||||
func WriteSequencesBatchToStdout(iterator obiseq.IBioSequenceBatch,
|
func WriteSequencesBatchToStdout(iterator obiiter.IBioSequenceBatch,
|
||||||
options ...WithOption) (obiseq.IBioSequenceBatch, error) {
|
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
|
||||||
options = append(options, OptionDontCloseFile())
|
options = append(options, OptionDontCloseFile())
|
||||||
return WriteSequenceBatch(iterator, os.Stdout, options...)
|
return WriteSequenceBatch(iterator, os.Stdout, options...)
|
||||||
}
|
}
|
||||||
|
|
||||||
func WriteSequencesBatchToFile(iterator obiseq.IBioSequenceBatch,
|
func WriteSequencesBatchToFile(iterator obiiter.IBioSequenceBatch,
|
||||||
filename string,
|
filename string,
|
||||||
options ...WithOption) (obiseq.IBioSequenceBatch, error) {
|
options ...WithOption) (obiiter.IBioSequenceBatch, error) {
|
||||||
|
|
||||||
file, err := os.Create(filename)
|
file, err := os.Create(filename)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("open file error: %v", err)
|
log.Fatalf("open file error: %v", err)
|
||||||
return obiseq.NilIBioSequenceBatch, err
|
return obiiter.NilIBioSequenceBatch, err
|
||||||
}
|
}
|
||||||
|
|
||||||
options = append(options, OptionCloseFile())
|
options = append(options, OptionCloseFile())
|
||||||
|
53
pkg/obiiter/batch.go
Normal file
53
pkg/obiiter/batch.go
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
package obiiter
|
||||||
|
|
||||||
|
import "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
|
|
||||||
|
type BioSequenceBatch struct {
|
||||||
|
slice obiseq.BioSequenceSlice
|
||||||
|
order int
|
||||||
|
}
|
||||||
|
|
||||||
|
var NilBioSequenceBatch = BioSequenceBatch{nil, -1}
|
||||||
|
|
||||||
|
func MakeBioSequenceBatch(order int,
|
||||||
|
sequences obiseq.BioSequenceSlice) BioSequenceBatch {
|
||||||
|
|
||||||
|
return BioSequenceBatch{
|
||||||
|
slice: sequences,
|
||||||
|
order: order,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (batch BioSequenceBatch) Order() int {
|
||||||
|
return batch.order
|
||||||
|
}
|
||||||
|
|
||||||
|
func (batch BioSequenceBatch) Reorder(newOrder int) BioSequenceBatch {
|
||||||
|
batch.order = newOrder
|
||||||
|
return batch
|
||||||
|
}
|
||||||
|
|
||||||
|
func (batch BioSequenceBatch) Slice() obiseq.BioSequenceSlice {
|
||||||
|
return batch.slice
|
||||||
|
}
|
||||||
|
|
||||||
|
func (batch BioSequenceBatch) Length() int {
|
||||||
|
return len(batch.slice)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (batch BioSequenceBatch) NotEmpty() bool {
|
||||||
|
return batch.slice.NotEmpty()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (batch BioSequenceBatch) Pop0() *obiseq.BioSequence {
|
||||||
|
return batch.slice.Pop0()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (batch BioSequenceBatch) IsNil() bool {
|
||||||
|
return batch.slice == nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (batch BioSequenceBatch) Recycle() {
|
||||||
|
batch.slice.Recycle()
|
||||||
|
batch.slice = nil
|
||||||
|
}
|
560
pkg/obiiter/batchiterator.go
Normal file
560
pkg/obiiter/batchiterator.go
Normal file
@ -0,0 +1,560 @@
|
|||||||
|
package obiiter
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"log"
|
||||||
|
"sync"
|
||||||
|
"sync/atomic"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
|
"github.com/tevino/abool/v2"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Structure implementing an iterator over bioseq.BioSequenceBatch
|
||||||
|
// based on a channel.
|
||||||
|
type _IBioSequenceBatch struct {
|
||||||
|
channel chan BioSequenceBatch
|
||||||
|
current BioSequenceBatch
|
||||||
|
pushBack *abool.AtomicBool
|
||||||
|
all_done *sync.WaitGroup
|
||||||
|
lock *sync.RWMutex
|
||||||
|
buffer_size int32
|
||||||
|
batch_size int32
|
||||||
|
sequence_format string
|
||||||
|
finished *abool.AtomicBool
|
||||||
|
}
|
||||||
|
|
||||||
|
type IBioSequenceBatch struct {
|
||||||
|
pointer *_IBioSequenceBatch
|
||||||
|
}
|
||||||
|
|
||||||
|
// NilIBioSequenceBatch nil instance for IBioSequenceBatch
|
||||||
|
//
|
||||||
|
// NilIBioSequenceBatch is the nil instance for the
|
||||||
|
// IBioSequenceBatch type.
|
||||||
|
//
|
||||||
|
var NilIBioSequenceBatch = IBioSequenceBatch{pointer: nil}
|
||||||
|
|
||||||
|
func MakeIBioSequenceBatch(sizes ...int) IBioSequenceBatch {
|
||||||
|
buffsize := int32(1)
|
||||||
|
|
||||||
|
if len(sizes) > 0 {
|
||||||
|
buffsize = int32(sizes[0])
|
||||||
|
}
|
||||||
|
|
||||||
|
i := _IBioSequenceBatch{
|
||||||
|
channel: make(chan BioSequenceBatch, buffsize),
|
||||||
|
current: NilBioSequenceBatch,
|
||||||
|
pushBack: abool.New(),
|
||||||
|
buffer_size: buffsize,
|
||||||
|
batch_size: -1,
|
||||||
|
sequence_format: "",
|
||||||
|
finished: abool.New(),
|
||||||
|
}
|
||||||
|
|
||||||
|
waiting := sync.WaitGroup{}
|
||||||
|
i.all_done = &waiting
|
||||||
|
lock := sync.RWMutex{}
|
||||||
|
i.lock = &lock
|
||||||
|
ii := IBioSequenceBatch{&i}
|
||||||
|
return ii
|
||||||
|
}
|
||||||
|
|
||||||
|
func (iterator IBioSequenceBatch) Add(n int) {
|
||||||
|
if iterator.pointer == nil {
|
||||||
|
log.Panic("call of IBioSequenceBatch.Add method on NilIBioSequenceBatch")
|
||||||
|
}
|
||||||
|
|
||||||
|
iterator.pointer.all_done.Add(n)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (iterator IBioSequenceBatch) Done() {
|
||||||
|
if iterator.pointer == nil {
|
||||||
|
log.Panic("call of IBioSequenceBatch.Done method on NilIBioSequenceBatch")
|
||||||
|
}
|
||||||
|
|
||||||
|
iterator.pointer.all_done.Done()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (iterator IBioSequenceBatch) Unlock() {
|
||||||
|
if iterator.pointer == nil {
|
||||||
|
log.Panic("call of IBioSequenceBatch.Unlock method on NilIBioSequenceBatch")
|
||||||
|
}
|
||||||
|
|
||||||
|
iterator.pointer.lock.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (iterator IBioSequenceBatch) Lock() {
|
||||||
|
if iterator.pointer == nil {
|
||||||
|
log.Panic("call of IBioSequenceBatch.Lock method on NilIBioSequenceBatch")
|
||||||
|
}
|
||||||
|
|
||||||
|
iterator.pointer.lock.Lock()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (iterator IBioSequenceBatch) RLock() {
|
||||||
|
if iterator.pointer == nil {
|
||||||
|
log.Panic("call of IBioSequenceBatch.RLock method on NilIBioSequenceBatch")
|
||||||
|
}
|
||||||
|
|
||||||
|
iterator.pointer.lock.RLock()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (iterator IBioSequenceBatch) RUnlock() {
|
||||||
|
if iterator.pointer == nil {
|
||||||
|
log.Panic("call of IBioSequenceBatch.RUnlock method on NilIBioSequenceBatch")
|
||||||
|
}
|
||||||
|
|
||||||
|
iterator.pointer.lock.RUnlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (iterator IBioSequenceBatch) Wait() {
|
||||||
|
if iterator.pointer == nil {
|
||||||
|
log.Panic("call of IBioSequenceBatch.Wait method on NilIBioSequenceBatch")
|
||||||
|
}
|
||||||
|
|
||||||
|
iterator.pointer.all_done.Wait()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (iterator IBioSequenceBatch) Channel() chan BioSequenceBatch {
|
||||||
|
if iterator.pointer == nil {
|
||||||
|
log.Panic("call of IBioSequenceBatch.Channel method on NilIBioSequenceBatch")
|
||||||
|
}
|
||||||
|
|
||||||
|
return iterator.pointer.channel
|
||||||
|
}
|
||||||
|
|
||||||
|
func (iterator IBioSequenceBatch) IsNil() bool {
|
||||||
|
if iterator.pointer == nil {
|
||||||
|
log.Panic("call of IBioSequenceBatch.IsNil method on NilIBioSequenceBatch")
|
||||||
|
}
|
||||||
|
|
||||||
|
return iterator.pointer == nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (iterator IBioSequenceBatch) BufferSize() int {
|
||||||
|
if iterator.pointer == nil {
|
||||||
|
log.Panic("call of IBioSequenceBatch.BufferSize method on NilIBioSequenceBatch")
|
||||||
|
}
|
||||||
|
|
||||||
|
return int(atomic.LoadInt32(&iterator.pointer.buffer_size))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (iterator IBioSequenceBatch) BatchSize() int {
|
||||||
|
if iterator.pointer == nil {
|
||||||
|
log.Panic("call of IBioSequenceBatch.BatchSize method on NilIBioSequenceBatch")
|
||||||
|
}
|
||||||
|
|
||||||
|
return int(atomic.LoadInt32(&iterator.pointer.batch_size))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (iterator IBioSequenceBatch) SetBatchSize(size int) error {
|
||||||
|
if size >= 0 {
|
||||||
|
atomic.StoreInt32(&iterator.pointer.batch_size, int32(size))
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return fmt.Errorf("size (%d) cannot be negative", size)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (iterator IBioSequenceBatch) Split() IBioSequenceBatch {
|
||||||
|
iterator.pointer.lock.RLock()
|
||||||
|
defer iterator.pointer.lock.RUnlock()
|
||||||
|
i := _IBioSequenceBatch{
|
||||||
|
channel: iterator.pointer.channel,
|
||||||
|
current: NilBioSequenceBatch,
|
||||||
|
pushBack: abool.New(),
|
||||||
|
all_done: iterator.pointer.all_done,
|
||||||
|
buffer_size: iterator.pointer.buffer_size,
|
||||||
|
batch_size: iterator.pointer.batch_size,
|
||||||
|
sequence_format: iterator.pointer.sequence_format,
|
||||||
|
finished: iterator.pointer.finished}
|
||||||
|
lock := sync.RWMutex{}
|
||||||
|
i.lock = &lock
|
||||||
|
|
||||||
|
newIter := IBioSequenceBatch{&i}
|
||||||
|
return newIter
|
||||||
|
}
|
||||||
|
|
||||||
|
func (iterator IBioSequenceBatch) Next() bool {
|
||||||
|
if iterator.pointer.pushBack.IsSet() {
|
||||||
|
iterator.pointer.pushBack.UnSet()
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
if iterator.pointer.finished.IsSet() {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
next, ok := (<-iterator.pointer.channel)
|
||||||
|
|
||||||
|
if ok {
|
||||||
|
iterator.pointer.current = next
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
iterator.pointer.current = NilBioSequenceBatch
|
||||||
|
iterator.pointer.finished.Set()
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (iterator IBioSequenceBatch) PushBack() {
|
||||||
|
if !iterator.pointer.current.IsNil() {
|
||||||
|
iterator.pointer.pushBack.Set()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// The 'Get' method returns the instance of BioSequenceBatch
|
||||||
|
// currently pointed by the iterator. You have to use the
|
||||||
|
// 'Next' method to move to the next entry before calling
|
||||||
|
// 'Get' to retreive the following instance.
|
||||||
|
func (iterator IBioSequenceBatch) Get() BioSequenceBatch {
|
||||||
|
return iterator.pointer.current
|
||||||
|
}
|
||||||
|
|
||||||
|
func (iterator IBioSequenceBatch) Push(batch BioSequenceBatch) {
|
||||||
|
if batch.IsNil() {
|
||||||
|
log.Panicln("An Nil batch is pushed on the channel")
|
||||||
|
}
|
||||||
|
if batch.Length() == 0 {
|
||||||
|
log.Panicln("An empty batch is pushed on the channel")
|
||||||
|
}
|
||||||
|
|
||||||
|
iterator.pointer.channel <- batch
|
||||||
|
}
|
||||||
|
|
||||||
|
func (iterator IBioSequenceBatch) Close() {
|
||||||
|
close(iterator.pointer.channel)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (iterator IBioSequenceBatch) WaitAndClose() {
|
||||||
|
iterator.Wait()
|
||||||
|
|
||||||
|
for len(iterator.Channel()) > 0 {
|
||||||
|
time.Sleep(time.Millisecond)
|
||||||
|
}
|
||||||
|
iterator.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Finished returns 'true' value if no more data is available
|
||||||
|
// from the iterator.
|
||||||
|
func (iterator IBioSequenceBatch) Finished() bool {
|
||||||
|
return iterator.pointer.finished.IsSet()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (iterator IBioSequenceBatch) IBioSequence(sizes ...int) IBioSequence {
|
||||||
|
buffsize := iterator.BufferSize()
|
||||||
|
|
||||||
|
if len(sizes) > 0 {
|
||||||
|
buffsize = sizes[0]
|
||||||
|
}
|
||||||
|
|
||||||
|
newIter := MakeIBioSequence(buffsize)
|
||||||
|
|
||||||
|
newIter.Add(1)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
newIter.Wait()
|
||||||
|
close(newIter.Channel())
|
||||||
|
}()
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
for iterator.Next() {
|
||||||
|
batch := iterator.Get()
|
||||||
|
|
||||||
|
for batch.NotEmpty() {
|
||||||
|
newIter.Channel() <- batch.Pop0()
|
||||||
|
}
|
||||||
|
batch.Recycle()
|
||||||
|
}
|
||||||
|
newIter.Done()
|
||||||
|
}()
|
||||||
|
|
||||||
|
return newIter
|
||||||
|
}
|
||||||
|
|
||||||
|
func (iterator IBioSequenceBatch) SortBatches(sizes ...int) IBioSequenceBatch {
|
||||||
|
buffsize := iterator.BufferSize()
|
||||||
|
|
||||||
|
if len(sizes) > 0 {
|
||||||
|
buffsize = sizes[0]
|
||||||
|
}
|
||||||
|
|
||||||
|
newIter := MakeIBioSequenceBatch(buffsize)
|
||||||
|
|
||||||
|
newIter.Add(1)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
newIter.Wait()
|
||||||
|
close(newIter.pointer.channel)
|
||||||
|
}()
|
||||||
|
|
||||||
|
next_to_send := 0
|
||||||
|
received := make(map[int]BioSequenceBatch)
|
||||||
|
go func() {
|
||||||
|
for iterator.Next() {
|
||||||
|
batch := iterator.Get()
|
||||||
|
if batch.order == next_to_send {
|
||||||
|
newIter.pointer.channel <- batch
|
||||||
|
next_to_send++
|
||||||
|
batch, ok := received[next_to_send]
|
||||||
|
for ok {
|
||||||
|
newIter.pointer.channel <- batch
|
||||||
|
delete(received, next_to_send)
|
||||||
|
next_to_send++
|
||||||
|
batch, ok = received[next_to_send]
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
received[batch.order] = batch
|
||||||
|
}
|
||||||
|
}
|
||||||
|
newIter.Done()
|
||||||
|
}()
|
||||||
|
|
||||||
|
return newIter
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func (iterator IBioSequenceBatch) Concat(iterators ...IBioSequenceBatch) IBioSequenceBatch {
|
||||||
|
|
||||||
|
if len(iterators) == 0 {
|
||||||
|
return iterator
|
||||||
|
}
|
||||||
|
|
||||||
|
buffsize := iterator.BufferSize()
|
||||||
|
newIter := MakeIBioSequenceBatch(buffsize)
|
||||||
|
|
||||||
|
newIter.Add(1)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
newIter.Wait()
|
||||||
|
close(newIter.Channel())
|
||||||
|
}()
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
previous_max := 0
|
||||||
|
max_order := 0
|
||||||
|
|
||||||
|
for iterator.Next() {
|
||||||
|
s := iterator.Get()
|
||||||
|
if s.order > max_order {
|
||||||
|
max_order = s.order
|
||||||
|
}
|
||||||
|
newIter.Push(s.Reorder(s.order + previous_max))
|
||||||
|
}
|
||||||
|
|
||||||
|
previous_max = max_order + 1
|
||||||
|
for _, iter := range iterators {
|
||||||
|
for iter.Next() {
|
||||||
|
s := iter.Get()
|
||||||
|
if (s.order + previous_max) > max_order {
|
||||||
|
max_order = s.order + previous_max
|
||||||
|
}
|
||||||
|
|
||||||
|
newIter.Push(s.Reorder(s.order + previous_max))
|
||||||
|
}
|
||||||
|
previous_max = max_order + 1
|
||||||
|
}
|
||||||
|
newIter.Done()
|
||||||
|
}()
|
||||||
|
|
||||||
|
return newIter
|
||||||
|
}
|
||||||
|
|
||||||
|
// Redistributes sequences from a IBioSequenceBatch into a new
|
||||||
|
// IBioSequenceBatch with every batches having the same size
|
||||||
|
// indicated in parameter. Rebatching implies to sort the
|
||||||
|
// source IBioSequenceBatch.
|
||||||
|
func (iterator IBioSequenceBatch) Rebatch(size int, sizes ...int) IBioSequenceBatch {
|
||||||
|
buffsize := iterator.BufferSize()
|
||||||
|
|
||||||
|
if len(sizes) > 0 {
|
||||||
|
buffsize = sizes[0]
|
||||||
|
}
|
||||||
|
|
||||||
|
newIter := MakeIBioSequenceBatch(buffsize)
|
||||||
|
|
||||||
|
newIter.Add(1)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
newIter.Wait()
|
||||||
|
close(newIter.pointer.channel)
|
||||||
|
}()
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
order := 0
|
||||||
|
iterator = iterator.SortBatches()
|
||||||
|
buffer := obiseq.MakeBioSequenceSlice()
|
||||||
|
|
||||||
|
for iterator.Next() {
|
||||||
|
seqs := iterator.Get()
|
||||||
|
for _, s := range seqs.slice {
|
||||||
|
buffer = append(buffer, s)
|
||||||
|
if len(buffer) == size {
|
||||||
|
newIter.Push(MakeBioSequenceBatch(order, buffer))
|
||||||
|
order++
|
||||||
|
buffer = obiseq.MakeBioSequenceSlice()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
seqs.Recycle()
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(buffer) > 0 {
|
||||||
|
newIter.Push(MakeBioSequenceBatch(order, buffer))
|
||||||
|
}
|
||||||
|
|
||||||
|
newIter.Done()
|
||||||
|
|
||||||
|
}()
|
||||||
|
|
||||||
|
return newIter
|
||||||
|
}
|
||||||
|
|
||||||
|
func (iterator IBioSequenceBatch) Recycle() {
|
||||||
|
|
||||||
|
log.Println("Start recycling of Bioseq objects")
|
||||||
|
recycled := 0
|
||||||
|
for iterator.Next() {
|
||||||
|
// iterator.Get()
|
||||||
|
batch := iterator.Get()
|
||||||
|
for _, seq := range batch.Slice() {
|
||||||
|
seq.Recycle()
|
||||||
|
recycled++
|
||||||
|
}
|
||||||
|
batch.Recycle()
|
||||||
|
}
|
||||||
|
log.Printf("End of the recycling of %d Bioseq objects", recycled)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (iterator IBioSequenceBatch) Count(recycle bool) (int, int, int) {
|
||||||
|
variants := 0
|
||||||
|
reads := 0
|
||||||
|
nucleotides := 0
|
||||||
|
|
||||||
|
log.Println("Start counting of Bioseq objects")
|
||||||
|
for iterator.Next() {
|
||||||
|
// iterator.Get()
|
||||||
|
batch := iterator.Get()
|
||||||
|
for _, seq := range batch.Slice() {
|
||||||
|
variants++
|
||||||
|
reads += seq.Count()
|
||||||
|
nucleotides += seq.Length()
|
||||||
|
|
||||||
|
if recycle {
|
||||||
|
seq.Recycle()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
batch.Recycle()
|
||||||
|
}
|
||||||
|
log.Printf("End of the counting of %d Bioseq objects", variants)
|
||||||
|
return variants, reads, nucleotides
|
||||||
|
}
|
||||||
|
|
||||||
|
func (iterator IBioSequenceBatch) PairWith(reverse IBioSequenceBatch,
|
||||||
|
sizes ...int) IPairedBioSequenceBatch {
|
||||||
|
buffsize := iterator.BufferSize()
|
||||||
|
batchsize := 5000
|
||||||
|
|
||||||
|
if len(sizes) > 0 {
|
||||||
|
batchsize = sizes[0]
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(sizes) > 1 {
|
||||||
|
buffsize = sizes[1]
|
||||||
|
}
|
||||||
|
|
||||||
|
iterator = iterator.Rebatch(batchsize)
|
||||||
|
reverse = reverse.Rebatch(batchsize)
|
||||||
|
|
||||||
|
newIter := MakeIPairedBioSequenceBatch(buffsize)
|
||||||
|
|
||||||
|
newIter.Add(1)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
newIter.Wait()
|
||||||
|
close(newIter.Channel())
|
||||||
|
log.Println("End of association of paired reads")
|
||||||
|
}()
|
||||||
|
|
||||||
|
log.Println("Start association of paired reads")
|
||||||
|
go func() {
|
||||||
|
for iterator.Next() {
|
||||||
|
if !reverse.Next() {
|
||||||
|
log.Panicln("Etrange reverse pas prêt")
|
||||||
|
}
|
||||||
|
newIter.Channel() <- MakePairedBioSequenceBatch(iterator.Get(),
|
||||||
|
reverse.Get())
|
||||||
|
}
|
||||||
|
|
||||||
|
newIter.Done()
|
||||||
|
}()
|
||||||
|
|
||||||
|
return newIter
|
||||||
|
}
|
||||||
|
|
||||||
|
func (iterator IBioSequenceBatch) DivideOn(predicate obiseq.SequencePredicate,
|
||||||
|
size int, sizes ...int) (IBioSequenceBatch, IBioSequenceBatch) {
|
||||||
|
buffsize := iterator.BufferSize()
|
||||||
|
|
||||||
|
if len(sizes) > 0 {
|
||||||
|
buffsize = sizes[0]
|
||||||
|
}
|
||||||
|
|
||||||
|
trueIter := MakeIBioSequenceBatch(buffsize)
|
||||||
|
falseIter := MakeIBioSequenceBatch(buffsize)
|
||||||
|
|
||||||
|
trueIter.Add(1)
|
||||||
|
falseIter.Add(1)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
trueIter.WaitAndClose()
|
||||||
|
falseIter.WaitAndClose()
|
||||||
|
}()
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
trueOrder := 0
|
||||||
|
falseOrder := 0
|
||||||
|
iterator = iterator.SortBatches()
|
||||||
|
|
||||||
|
trueSlice := obiseq.MakeBioSequenceSlice()
|
||||||
|
falseSlice := obiseq.MakeBioSequenceSlice()
|
||||||
|
|
||||||
|
for iterator.Next() {
|
||||||
|
seqs := iterator.Get()
|
||||||
|
for _, s := range seqs.slice {
|
||||||
|
if predicate(s) {
|
||||||
|
trueSlice = append(trueSlice, s)
|
||||||
|
} else {
|
||||||
|
falseSlice = append(falseSlice, s)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(trueSlice) == size {
|
||||||
|
trueIter.Push(MakeBioSequenceBatch(trueOrder, trueSlice))
|
||||||
|
trueOrder++
|
||||||
|
trueSlice = obiseq.MakeBioSequenceSlice()
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(falseSlice) == size {
|
||||||
|
falseIter.Push(MakeBioSequenceBatch(falseOrder, falseSlice))
|
||||||
|
falseOrder++
|
||||||
|
falseSlice = obiseq.MakeBioSequenceSlice()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
seqs.Recycle()
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(trueSlice) > 0 {
|
||||||
|
trueIter.Push(MakeBioSequenceBatch(trueOrder, trueSlice))
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(falseSlice) > 0 {
|
||||||
|
falseIter.Push(MakeBioSequenceBatch(falseOrder, falseSlice))
|
||||||
|
}
|
||||||
|
|
||||||
|
trueIter.Done()
|
||||||
|
falseIter.Done()
|
||||||
|
}()
|
||||||
|
|
||||||
|
return trueIter, falseIter
|
||||||
|
}
|
@ -1,14 +1,16 @@
|
|||||||
package obiseq
|
package obiiter
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
)
|
)
|
||||||
|
|
||||||
type IDistribute struct {
|
type IDistribute struct {
|
||||||
outputs map[int]IBioSequenceBatch
|
outputs map[int]IBioSequenceBatch
|
||||||
news chan int
|
news chan int
|
||||||
classifier *BioSequenceClassifier
|
classifier *obiseq.BioSequenceClassifier
|
||||||
lock *sync.Mutex
|
lock *sync.Mutex
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -28,16 +30,16 @@ func (dist *IDistribute) News() chan int {
|
|||||||
return dist.news
|
return dist.news
|
||||||
}
|
}
|
||||||
|
|
||||||
func (dist *IDistribute) Classifier() *BioSequenceClassifier {
|
func (dist *IDistribute) Classifier() *obiseq.BioSequenceClassifier {
|
||||||
return dist.classifier
|
return dist.classifier
|
||||||
}
|
}
|
||||||
|
|
||||||
func (iterator IBioSequenceBatch) Distribute(class *BioSequenceClassifier, sizes ...int) IDistribute {
|
func (iterator IBioSequenceBatch) Distribute(class *obiseq.BioSequenceClassifier, sizes ...int) IDistribute {
|
||||||
batchsize := 5000
|
batchsize := 5000
|
||||||
buffsize := 2
|
buffsize := 2
|
||||||
|
|
||||||
outputs := make(map[int]IBioSequenceBatch, 100)
|
outputs := make(map[int]IBioSequenceBatch, 100)
|
||||||
slices := make(map[int]*BioSequenceSlice, 100)
|
slices := make(map[int]*obiseq.BioSequenceSlice, 100)
|
||||||
orders := make(map[int]int, 100)
|
orders := make(map[int]int, 100)
|
||||||
news := make(chan int)
|
news := make(chan int)
|
||||||
|
|
||||||
@ -72,7 +74,7 @@ func (iterator IBioSequenceBatch) Distribute(class *BioSequenceClassifier, sizes
|
|||||||
slice, ok := slices[key]
|
slice, ok := slices[key]
|
||||||
|
|
||||||
if !ok {
|
if !ok {
|
||||||
s := MakeBioSequenceSlice()
|
s := obiseq.MakeBioSequenceSlice()
|
||||||
slice = &s
|
slice = &s
|
||||||
slices[key] = slice
|
slices[key] = slice
|
||||||
orders[key] = 0
|
orders[key] = 0
|
||||||
@ -89,7 +91,7 @@ func (iterator IBioSequenceBatch) Distribute(class *BioSequenceClassifier, sizes
|
|||||||
if len(*slice) == batchsize {
|
if len(*slice) == batchsize {
|
||||||
outputs[key].Push(MakeBioSequenceBatch(orders[key], *slice))
|
outputs[key].Push(MakeBioSequenceBatch(orders[key], *slice))
|
||||||
orders[key]++
|
orders[key]++
|
||||||
s := MakeBioSequenceSlice()
|
s := obiseq.MakeBioSequenceSlice()
|
||||||
slices[key] = &s
|
slices[key] = &s
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -1,14 +1,16 @@
|
|||||||
package obiseq
|
package obiiter
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Private structure implementing an iterator over
|
// Private structure implementing an iterator over
|
||||||
// bioseq.BioSequence based on a channel.
|
// bioseq.BioSequence based on a channel.
|
||||||
type __ibiosequence__ struct {
|
type __ibiosequence__ struct {
|
||||||
channel chan *BioSequence
|
channel chan *obiseq.BioSequence
|
||||||
current *BioSequence
|
current *obiseq.BioSequence
|
||||||
pushBack bool
|
pushBack bool
|
||||||
all_done *sync.WaitGroup
|
all_done *sync.WaitGroup
|
||||||
buffer_size int
|
buffer_size int
|
||||||
@ -38,10 +40,10 @@ func (iterator IBioSequence) Wait() {
|
|||||||
iterator.pointer.all_done.Wait()
|
iterator.pointer.all_done.Wait()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (iterator IBioSequence) Channel() chan *BioSequence {
|
func (iterator IBioSequence) Channel() chan *obiseq.BioSequence {
|
||||||
return iterator.pointer.channel
|
return iterator.pointer.channel
|
||||||
}
|
}
|
||||||
func (iterator IBioSequence) PChannel() *chan *BioSequence {
|
func (iterator IBioSequence) PChannel() *chan *obiseq.BioSequence {
|
||||||
return &(iterator.pointer.channel)
|
return &(iterator.pointer.channel)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -53,7 +55,7 @@ func MakeIBioSequence(sizes ...int) IBioSequence {
|
|||||||
}
|
}
|
||||||
|
|
||||||
i := __ibiosequence__{
|
i := __ibiosequence__{
|
||||||
channel: make(chan *BioSequence, buffsize),
|
channel: make(chan *obiseq.BioSequence, buffsize),
|
||||||
current: nil,
|
current: nil,
|
||||||
pushBack: false,
|
pushBack: false,
|
||||||
buffer_size: buffsize,
|
buffer_size: buffsize,
|
||||||
@ -117,7 +119,7 @@ func (iterator IBioSequence) PushBack() {
|
|||||||
// currently pointed by the iterator. You have to use the
|
// currently pointed by the iterator. You have to use the
|
||||||
// 'Next' method to move to the next entry before calling
|
// 'Next' method to move to the next entry before calling
|
||||||
// 'Get' to retreive the following instance.
|
// 'Get' to retreive the following instance.
|
||||||
func (iterator IBioSequence) Get() *BioSequence {
|
func (iterator IBioSequence) Get() *obiseq.BioSequence {
|
||||||
return iterator.pointer.current
|
return iterator.pointer.current
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -161,7 +163,7 @@ func (iterator IBioSequence) IBioSequenceBatch(sizes ...int) IBioSequenceBatch {
|
|||||||
go func() {
|
go func() {
|
||||||
for j := 0; !iterator.Finished(); j++ {
|
for j := 0; !iterator.Finished(); j++ {
|
||||||
batch := BioSequenceBatch{
|
batch := BioSequenceBatch{
|
||||||
slice: MakeBioSequenceSlice(),
|
slice: obiseq.MakeBioSequenceSlice(),
|
||||||
order: j}
|
order: j}
|
||||||
for i := 0; i < batchsize && iterator.Next(); i++ {
|
for i := 0; i < batchsize && iterator.Next(); i++ {
|
||||||
seq := iterator.Get()
|
seq := iterator.Get()
|
||||||
@ -275,7 +277,7 @@ func (iterator IBioSequence) Tail(n int, sizes ...int) IBioSequence {
|
|||||||
}
|
}
|
||||||
|
|
||||||
newIter := MakeIBioSequence(buffsize)
|
newIter := MakeIBioSequence(buffsize)
|
||||||
buffseq := MakeBioSequenceSlice()
|
buffseq := obiseq.MakeBioSequenceSlice()
|
||||||
|
|
||||||
newIter.Add(1)
|
newIter.Add(1)
|
||||||
|
|
50
pkg/obiiter/merge.go
Normal file
50
pkg/obiiter/merge.go
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
package obiiter
|
||||||
|
|
||||||
|
import "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
|
|
||||||
|
func (iterator IBioSequenceBatch) IMergeSequenceBatch(na string, statsOn []string, sizes ...int) IBioSequenceBatch {
|
||||||
|
batchsize := 100
|
||||||
|
buffsize := iterator.BufferSize()
|
||||||
|
|
||||||
|
if len(sizes) > 0 {
|
||||||
|
batchsize = sizes[0]
|
||||||
|
}
|
||||||
|
if len(sizes) > 1 {
|
||||||
|
buffsize = sizes[1]
|
||||||
|
}
|
||||||
|
|
||||||
|
newIter := MakeIBioSequenceBatch(buffsize)
|
||||||
|
|
||||||
|
newIter.Add(1)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
newIter.WaitAndClose()
|
||||||
|
}()
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
for j := 0; !iterator.Finished(); j++ {
|
||||||
|
batch := BioSequenceBatch{
|
||||||
|
slice: obiseq.MakeBioSequenceSlice(),
|
||||||
|
order: j}
|
||||||
|
for i := 0; i < batchsize && iterator.Next(); i++ {
|
||||||
|
seqs := iterator.Get()
|
||||||
|
batch.slice = append(batch.slice, seqs.slice.Merge(na, statsOn))
|
||||||
|
}
|
||||||
|
if batch.Length() > 0 {
|
||||||
|
newIter.Push(batch)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
newIter.Done()
|
||||||
|
}()
|
||||||
|
|
||||||
|
return newIter
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
func MergePipe(na string, statsOn []string, sizes ...int) Pipeable {
|
||||||
|
f := func(iterator IBioSequenceBatch) IBioSequenceBatch {
|
||||||
|
return iterator.IMergeSequenceBatch(na,statsOn,sizes...)
|
||||||
|
}
|
||||||
|
|
||||||
|
return f
|
||||||
|
}
|
221
pkg/obiiter/pairedbatchiterator.go
Normal file
221
pkg/obiiter/pairedbatchiterator.go
Normal file
@ -0,0 +1,221 @@
|
|||||||
|
package obiiter
|
||||||
|
|
||||||
|
import (
|
||||||
|
"log"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
|
)
|
||||||
|
|
||||||
|
type PairedBioSequenceBatch struct {
|
||||||
|
forward obiseq.BioSequenceSlice
|
||||||
|
reverse obiseq.BioSequenceSlice
|
||||||
|
order int
|
||||||
|
}
|
||||||
|
|
||||||
|
var NilPairedBioSequenceBatch = PairedBioSequenceBatch{nil, nil, -1}
|
||||||
|
|
||||||
|
func MakePairedBioSequenceBatch(forward, reverse BioSequenceBatch) PairedBioSequenceBatch {
|
||||||
|
if forward.order != reverse.order {
|
||||||
|
log.Fatalf("Forward order : %d and reverse order : %d are not matching",
|
||||||
|
forward.order, reverse.order)
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := range reverse.slice {
|
||||||
|
reverse.slice[i].ReverseComplement(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
return PairedBioSequenceBatch{
|
||||||
|
forward: forward.slice,
|
||||||
|
reverse: reverse.slice,
|
||||||
|
order: forward.order,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (batch PairedBioSequenceBatch) Order() int {
|
||||||
|
return batch.order
|
||||||
|
}
|
||||||
|
|
||||||
|
func (batch PairedBioSequenceBatch) Reorder(newOrder int) PairedBioSequenceBatch {
|
||||||
|
batch.order = newOrder
|
||||||
|
return batch
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
func (batch PairedBioSequenceBatch) Length() int {
|
||||||
|
return len(batch.forward)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (batch PairedBioSequenceBatch) Forward() obiseq.BioSequenceSlice {
|
||||||
|
return batch.forward
|
||||||
|
}
|
||||||
|
|
||||||
|
func (batch PairedBioSequenceBatch) Reverse() obiseq.BioSequenceSlice {
|
||||||
|
return batch.reverse
|
||||||
|
}
|
||||||
|
|
||||||
|
func (batch PairedBioSequenceBatch) IsNil() bool {
|
||||||
|
return batch.forward == nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Structure implementing an iterator over bioseq.BioSequenceBatch
|
||||||
|
// based on a channel.
|
||||||
|
type __ipairedbiosequencebatch__ struct {
|
||||||
|
channel chan PairedBioSequenceBatch
|
||||||
|
current PairedBioSequenceBatch
|
||||||
|
pushBack bool
|
||||||
|
all_done *sync.WaitGroup
|
||||||
|
buffer_size int
|
||||||
|
finished bool
|
||||||
|
p_finished *bool
|
||||||
|
}
|
||||||
|
|
||||||
|
type IPairedBioSequenceBatch struct {
|
||||||
|
pointer *__ipairedbiosequencebatch__
|
||||||
|
}
|
||||||
|
|
||||||
|
var NilIPairedBioSequenceBatch = IPairedBioSequenceBatch{pointer: nil}
|
||||||
|
|
||||||
|
func MakeIPairedBioSequenceBatch(sizes ...int) IPairedBioSequenceBatch {
|
||||||
|
buffsize := 1
|
||||||
|
|
||||||
|
if len(sizes) > 0 {
|
||||||
|
buffsize = sizes[0]
|
||||||
|
}
|
||||||
|
|
||||||
|
i := __ipairedbiosequencebatch__{
|
||||||
|
channel: make(chan PairedBioSequenceBatch, buffsize),
|
||||||
|
current: NilPairedBioSequenceBatch,
|
||||||
|
pushBack: false,
|
||||||
|
buffer_size: buffsize,
|
||||||
|
finished: false,
|
||||||
|
p_finished: nil,
|
||||||
|
}
|
||||||
|
|
||||||
|
i.p_finished = &i.finished
|
||||||
|
waiting := sync.WaitGroup{}
|
||||||
|
i.all_done = &waiting
|
||||||
|
ii := IPairedBioSequenceBatch{&i}
|
||||||
|
return ii
|
||||||
|
}
|
||||||
|
|
||||||
|
func (iterator IPairedBioSequenceBatch) Add(n int) {
|
||||||
|
iterator.pointer.all_done.Add(n)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (iterator IPairedBioSequenceBatch) Done() {
|
||||||
|
iterator.pointer.all_done.Done()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (iterator IPairedBioSequenceBatch) Wait() {
|
||||||
|
iterator.pointer.all_done.Wait()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (iterator IPairedBioSequenceBatch) Channel() chan PairedBioSequenceBatch {
|
||||||
|
return iterator.pointer.channel
|
||||||
|
}
|
||||||
|
|
||||||
|
func (iterator IPairedBioSequenceBatch) IsNil() bool {
|
||||||
|
return iterator.pointer == nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (iterator IPairedBioSequenceBatch) BufferSize() int {
|
||||||
|
return iterator.pointer.buffer_size
|
||||||
|
}
|
||||||
|
|
||||||
|
func (iterator IPairedBioSequenceBatch) Split() IPairedBioSequenceBatch {
|
||||||
|
i := __ipairedbiosequencebatch__{
|
||||||
|
channel: iterator.pointer.channel,
|
||||||
|
current: NilPairedBioSequenceBatch,
|
||||||
|
pushBack: false,
|
||||||
|
all_done: iterator.pointer.all_done,
|
||||||
|
buffer_size: iterator.pointer.buffer_size,
|
||||||
|
finished: false,
|
||||||
|
p_finished: iterator.pointer.p_finished}
|
||||||
|
newIter := IPairedBioSequenceBatch{&i}
|
||||||
|
return newIter
|
||||||
|
}
|
||||||
|
|
||||||
|
func (iterator IPairedBioSequenceBatch) Next() bool {
|
||||||
|
if *(iterator.pointer.p_finished) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
if iterator.pointer.pushBack {
|
||||||
|
iterator.pointer.pushBack = false
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
next, ok := (<-iterator.pointer.channel)
|
||||||
|
|
||||||
|
if ok {
|
||||||
|
iterator.pointer.current = next
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
iterator.pointer.current = NilPairedBioSequenceBatch
|
||||||
|
*iterator.pointer.p_finished = true
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (iterator IPairedBioSequenceBatch) PushBack() {
|
||||||
|
if !iterator.pointer.current.IsNil() {
|
||||||
|
iterator.pointer.pushBack = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// The 'Get' method returns the instance of BioSequenceBatch
|
||||||
|
// currently pointed by the iterator. You have to use the
|
||||||
|
// 'Next' method to move to the next entry before calling
|
||||||
|
// 'Get' to retreive the following instance.
|
||||||
|
func (iterator IPairedBioSequenceBatch) Get() PairedBioSequenceBatch {
|
||||||
|
return iterator.pointer.current
|
||||||
|
}
|
||||||
|
|
||||||
|
// Finished returns 'true' value if no more data is available
|
||||||
|
// from the iterator.
|
||||||
|
func (iterator IPairedBioSequenceBatch) Finished() bool {
|
||||||
|
return *iterator.pointer.p_finished
|
||||||
|
}
|
||||||
|
|
||||||
|
func (iterator IPairedBioSequenceBatch) SortBatches(sizes ...int) IPairedBioSequenceBatch {
|
||||||
|
buffsize := iterator.BufferSize()
|
||||||
|
|
||||||
|
if len(sizes) > 0 {
|
||||||
|
buffsize = sizes[0]
|
||||||
|
}
|
||||||
|
|
||||||
|
newIter := MakeIPairedBioSequenceBatch(buffsize)
|
||||||
|
|
||||||
|
newIter.Add(1)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
newIter.Wait()
|
||||||
|
close(newIter.pointer.channel)
|
||||||
|
}()
|
||||||
|
|
||||||
|
next_to_send := 0
|
||||||
|
received := make(map[int]PairedBioSequenceBatch)
|
||||||
|
go func() {
|
||||||
|
for iterator.Next() {
|
||||||
|
batch := iterator.Get()
|
||||||
|
if batch.order == next_to_send {
|
||||||
|
newIter.pointer.channel <- batch
|
||||||
|
next_to_send++
|
||||||
|
batch, ok := received[next_to_send]
|
||||||
|
for ok {
|
||||||
|
newIter.pointer.channel <- batch
|
||||||
|
delete(received, next_to_send)
|
||||||
|
next_to_send++
|
||||||
|
batch, ok = received[next_to_send]
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
received[batch.order] = batch
|
||||||
|
}
|
||||||
|
}
|
||||||
|
newIter.Done()
|
||||||
|
}()
|
||||||
|
|
||||||
|
return newIter
|
||||||
|
|
||||||
|
}
|
46
pkg/obiiter/pipe.go
Normal file
46
pkg/obiiter/pipe.go
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
package obiiter
|
||||||
|
|
||||||
|
|
||||||
|
type Pipeable func(input IBioSequenceBatch) IBioSequenceBatch
|
||||||
|
|
||||||
|
func Pipeline(start Pipeable,parts ...Pipeable) Pipeable {
|
||||||
|
p := func (input IBioSequenceBatch) IBioSequenceBatch {
|
||||||
|
data := start(input)
|
||||||
|
for _,part := range parts {
|
||||||
|
data = part(data)
|
||||||
|
}
|
||||||
|
return data
|
||||||
|
}
|
||||||
|
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
|
||||||
|
func (input IBioSequenceBatch) Pipe(start Pipeable, parts ...Pipeable) IBioSequenceBatch {
|
||||||
|
p := Pipeline(start,parts...)
|
||||||
|
return p(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
type Teeable func(input IBioSequenceBatch) (IBioSequenceBatch,IBioSequenceBatch)
|
||||||
|
|
||||||
|
func (input IBioSequenceBatch) CopyTee() (IBioSequenceBatch,IBioSequenceBatch) {
|
||||||
|
first := MakeIBioSequenceBatch()
|
||||||
|
second:= MakeIBioSequenceBatch()
|
||||||
|
|
||||||
|
first.Add(1)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
first.WaitAndClose()
|
||||||
|
second.Close()
|
||||||
|
}()
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
for input.Next() {
|
||||||
|
b:=input.Get()
|
||||||
|
first.Push(b)
|
||||||
|
second.Push(b)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
return first,second
|
||||||
|
}
|
@ -1,4 +1,4 @@
|
|||||||
package obiseq
|
package obiiter
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"os"
|
"os"
|
||||||
@ -37,3 +37,12 @@ func (iterator IBioSequenceBatch) Speed() IBioSequenceBatch {
|
|||||||
|
|
||||||
return newIter
|
return newIter
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
func SpeedPipe() Pipeable {
|
||||||
|
f := func(iterator IBioSequenceBatch) IBioSequenceBatch {
|
||||||
|
return iterator.Speed()
|
||||||
|
}
|
||||||
|
|
||||||
|
return f
|
||||||
|
}
|
@ -1,49 +1,25 @@
|
|||||||
package obiseq
|
package obiiter
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"log"
|
"log"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
)
|
)
|
||||||
|
|
||||||
type SeqAnnotator func(*BioSequence)
|
type SeqAnnotator func(*obiseq.BioSequence)
|
||||||
|
|
||||||
type SeqWorker func(*BioSequence) *BioSequence
|
type SeqWorker func(*obiseq.BioSequence) *obiseq.BioSequence
|
||||||
type SeqSliceWorker func(BioSequenceSlice) BioSequenceSlice
|
type SeqSliceWorker func(obiseq.BioSequenceSlice) obiseq.BioSequenceSlice
|
||||||
|
|
||||||
func AnnotatorToSeqWorker(function SeqAnnotator) SeqWorker {
|
func AnnotatorToSeqWorker(function SeqAnnotator) SeqWorker {
|
||||||
f := func(seq *BioSequence) *BioSequence {
|
f := func(seq *obiseq.BioSequence) *obiseq.BioSequence {
|
||||||
function(seq)
|
function(seq)
|
||||||
return seq
|
return seq
|
||||||
}
|
}
|
||||||
return f
|
return f
|
||||||
}
|
}
|
||||||
|
|
||||||
func (iterator IBioSequence) MakeIWorker(worker SeqWorker, sizes ...int) IBioSequence {
|
|
||||||
buffsize := iterator.BufferSize()
|
|
||||||
|
|
||||||
if len(sizes) > 0 {
|
|
||||||
buffsize = sizes[0]
|
|
||||||
}
|
|
||||||
|
|
||||||
newIter := MakeIBioSequence(buffsize)
|
|
||||||
|
|
||||||
newIter.Add(1)
|
|
||||||
|
|
||||||
go func() {
|
|
||||||
newIter.Wait()
|
|
||||||
close(newIter.pointer.channel)
|
|
||||||
}()
|
|
||||||
|
|
||||||
go func() {
|
|
||||||
for iterator.Next() {
|
|
||||||
seq := iterator.Get()
|
|
||||||
seq = worker(seq)
|
|
||||||
newIter.pointer.channel <- seq
|
|
||||||
}
|
|
||||||
newIter.Done()
|
|
||||||
}()
|
|
||||||
|
|
||||||
return newIter
|
|
||||||
}
|
|
||||||
|
|
||||||
func (iterator IBioSequenceBatch) MakeIWorker(worker SeqWorker, sizes ...int) IBioSequenceBatch {
|
func (iterator IBioSequenceBatch) MakeIWorker(worker SeqWorker, sizes ...int) IBioSequenceBatch {
|
||||||
nworkers := 4
|
nworkers := 4
|
||||||
@ -125,3 +101,48 @@ func (iterator IBioSequenceBatch) MakeISliceWorker(worker SeqSliceWorker, sizes
|
|||||||
|
|
||||||
return newIter
|
return newIter
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
func (iterator IBioSequence) MakeIWorker(worker SeqWorker, sizes ...int) IBioSequence {
|
||||||
|
buffsize := iterator.BufferSize()
|
||||||
|
|
||||||
|
if len(sizes) > 0 {
|
||||||
|
buffsize = sizes[0]
|
||||||
|
}
|
||||||
|
|
||||||
|
newIter := MakeIBioSequence(buffsize)
|
||||||
|
|
||||||
|
newIter.Add(1)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
newIter.Wait()
|
||||||
|
close(newIter.pointer.channel)
|
||||||
|
}()
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
for iterator.Next() {
|
||||||
|
seq := iterator.Get()
|
||||||
|
seq = worker(seq)
|
||||||
|
newIter.pointer.channel <- seq
|
||||||
|
}
|
||||||
|
newIter.Done()
|
||||||
|
}()
|
||||||
|
|
||||||
|
return newIter
|
||||||
|
}
|
||||||
|
|
||||||
|
func WorkerPipe(worker SeqWorker, sizes ...int) Pipeable {
|
||||||
|
f := func(iterator IBioSequenceBatch) IBioSequenceBatch {
|
||||||
|
return iterator.MakeIWorker(worker,sizes...)
|
||||||
|
}
|
||||||
|
|
||||||
|
return f
|
||||||
|
}
|
||||||
|
|
||||||
|
func SliceWorkerPipe(worker SeqSliceWorker, sizes ...int) Pipeable {
|
||||||
|
f := func(iterator IBioSequenceBatch) IBioSequenceBatch {
|
||||||
|
return iterator.MakeISliceWorker(worker,sizes...)
|
||||||
|
}
|
||||||
|
|
||||||
|
return f
|
||||||
|
}
|
@ -1,6 +1,9 @@
|
|||||||
package obingslibrary
|
package obingslibrary
|
||||||
|
|
||||||
import "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
import (
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
|
)
|
||||||
|
|
||||||
type _Options struct {
|
type _Options struct {
|
||||||
discardErrors bool
|
discardErrors bool
|
||||||
@ -167,7 +170,7 @@ func ExtractBarcodeSlice(ngslibrary NGSLibrary,
|
|||||||
}
|
}
|
||||||
|
|
||||||
func ExtractBarcodeSliceWorker(ngslibrary NGSLibrary,
|
func ExtractBarcodeSliceWorker(ngslibrary NGSLibrary,
|
||||||
options ...WithOption) obiseq.SeqSliceWorker {
|
options ...WithOption) obiiter.SeqSliceWorker {
|
||||||
|
|
||||||
opt := MakeOptions(options)
|
opt := MakeOptions(options)
|
||||||
|
|
||||||
@ -179,3 +182,5 @@ func ExtractBarcodeSliceWorker(ngslibrary NGSLibrary,
|
|||||||
|
|
||||||
return worker
|
return worker
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -165,41 +165,3 @@ func (sequences BioSequenceSlice) Merge(na string, statsOn []string) *BioSequenc
|
|||||||
return seq
|
return seq
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (iterator IBioSequenceBatch) IMergeSequenceBatch(na string, statsOn []string, sizes ...int) IBioSequenceBatch {
|
|
||||||
batchsize := 100
|
|
||||||
buffsize := iterator.BufferSize()
|
|
||||||
|
|
||||||
if len(sizes) > 0 {
|
|
||||||
batchsize = sizes[0]
|
|
||||||
}
|
|
||||||
if len(sizes) > 1 {
|
|
||||||
buffsize = sizes[1]
|
|
||||||
}
|
|
||||||
|
|
||||||
newIter := MakeIBioSequenceBatch(buffsize)
|
|
||||||
|
|
||||||
newIter.Add(1)
|
|
||||||
|
|
||||||
go func() {
|
|
||||||
newIter.WaitAndClose()
|
|
||||||
}()
|
|
||||||
|
|
||||||
go func() {
|
|
||||||
for j := 0; !iterator.Finished(); j++ {
|
|
||||||
batch := BioSequenceBatch{
|
|
||||||
slice: MakeBioSequenceSlice(),
|
|
||||||
order: j}
|
|
||||||
for i := 0; i < batchsize && iterator.Next(); i++ {
|
|
||||||
seqs := iterator.Get()
|
|
||||||
batch.slice = append(batch.slice, seqs.slice.Merge(na, statsOn))
|
|
||||||
}
|
|
||||||
if batch.Length() > 0 {
|
|
||||||
newIter.Push(batch)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
newIter.Done()
|
|
||||||
}()
|
|
||||||
|
|
||||||
return newIter
|
|
||||||
}
|
|
||||||
|
@ -1,5 +1,12 @@
|
|||||||
package obiseq
|
package obiseq
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"log"
|
||||||
|
|
||||||
|
"github.com/PaesslerAG/gval"
|
||||||
|
)
|
||||||
|
|
||||||
type SequencePredicate func(*BioSequence) bool
|
type SequencePredicate func(*BioSequence) bool
|
||||||
|
|
||||||
func (predicate1 SequencePredicate) And(predicate2 SequencePredicate) SequencePredicate {
|
func (predicate1 SequencePredicate) And(predicate2 SequencePredicate) SequencePredicate {
|
||||||
@ -73,3 +80,33 @@ func IsShorterOrEqualTo(length int) SequencePredicate {
|
|||||||
|
|
||||||
return f
|
return f
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func ExrpessionPredicat(expression string) SequencePredicate {
|
||||||
|
|
||||||
|
exp, err := gval.Full().NewEvaluable(expression)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Error in the expression : %s", expression)
|
||||||
|
}
|
||||||
|
|
||||||
|
f := func(sequence *BioSequence) bool {
|
||||||
|
value, err := exp.EvalBool(context.Background(),
|
||||||
|
map[string]interface{}{
|
||||||
|
"annot": sequence.Annotations(),
|
||||||
|
"count": sequence.Count(),
|
||||||
|
"length": sequence.Length(),
|
||||||
|
"sequence": sequence,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Expression '%s' cannot be evaluated on sequence %s",
|
||||||
|
expression,
|
||||||
|
sequence.Id())
|
||||||
|
}
|
||||||
|
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
|
||||||
|
return f
|
||||||
|
}
|
||||||
|
2
pkg/obiseq/worker.go
Normal file
2
pkg/obiseq/worker.go
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
package obiseq
|
||||||
|
|
@ -7,8 +7,8 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func _ExpandListOfFiles(check_ext bool, filenames ...string) ([]string, error) {
|
func _ExpandListOfFiles(check_ext bool, filenames ...string) ([]string, error) {
|
||||||
@ -66,9 +66,9 @@ func _ExpandListOfFiles(check_ext bool, filenames ...string) ([]string, error) {
|
|||||||
return list_of_files, nil
|
return list_of_files, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func ReadBioSequencesBatch(filenames ...string) (obiseq.IBioSequenceBatch, error) {
|
func ReadBioSequencesBatch(filenames ...string) (obiiter.IBioSequenceBatch, error) {
|
||||||
var iterator obiseq.IBioSequenceBatch
|
var iterator obiiter.IBioSequenceBatch
|
||||||
var reader func(string, ...obiformats.WithOption) (obiseq.IBioSequenceBatch, error)
|
var reader func(string, ...obiformats.WithOption) (obiiter.IBioSequenceBatch, error)
|
||||||
|
|
||||||
opts := make([]obiformats.WithOption, 0, 10)
|
opts := make([]obiformats.WithOption, 0, 10)
|
||||||
|
|
||||||
@ -106,7 +106,7 @@ func ReadBioSequencesBatch(filenames ...string) (obiseq.IBioSequenceBatch, error
|
|||||||
|
|
||||||
list_of_files, err := _ExpandListOfFiles(false, filenames...)
|
list_of_files, err := _ExpandListOfFiles(false, filenames...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return obiseq.NilIBioSequenceBatch, err
|
return obiiter.NilIBioSequenceBatch, err
|
||||||
}
|
}
|
||||||
|
|
||||||
switch InputFormat() {
|
switch InputFormat() {
|
||||||
@ -121,16 +121,16 @@ func ReadBioSequencesBatch(filenames ...string) (obiseq.IBioSequenceBatch, error
|
|||||||
iterator, err = reader(list_of_files[0], opts...)
|
iterator, err = reader(list_of_files[0], opts...)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return obiseq.NilIBioSequenceBatch, err
|
return obiiter.NilIBioSequenceBatch, err
|
||||||
}
|
}
|
||||||
|
|
||||||
list_of_files = list_of_files[1:]
|
list_of_files = list_of_files[1:]
|
||||||
others := make([]obiseq.IBioSequenceBatch, 0, len(list_of_files))
|
others := make([]obiiter.IBioSequenceBatch, 0, len(list_of_files))
|
||||||
|
|
||||||
for _, fn := range list_of_files {
|
for _, fn := range list_of_files {
|
||||||
r, err := reader(fn, opts...)
|
r, err := reader(fn, opts...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return obiseq.NilIBioSequenceBatch, err
|
return obiiter.NilIBioSequenceBatch, err
|
||||||
}
|
}
|
||||||
others = append(others, r)
|
others = append(others, r)
|
||||||
}
|
}
|
||||||
@ -152,7 +152,7 @@ func ReadBioSequencesBatch(filenames ...string) (obiseq.IBioSequenceBatch, error
|
|||||||
return iterator, nil
|
return iterator, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func ReadBioSequences(filenames ...string) (obiseq.IBioSequence, error) {
|
func ReadBioSequences(filenames ...string) (obiiter.IBioSequence, error) {
|
||||||
ib, err := ReadBioSequencesBatch(filenames...)
|
ib, err := ReadBioSequencesBatch(filenames...)
|
||||||
return ib.SortBatches().IBioSequence(), err
|
return ib.SortBatches().IBioSequence(), err
|
||||||
|
|
||||||
|
@ -4,11 +4,11 @@ import (
|
|||||||
"log"
|
"log"
|
||||||
|
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func WriteBioSequences(iterator obiseq.IBioSequence, filenames ...string) error {
|
func WriteBioSequences(iterator obiiter.IBioSequence, filenames ...string) error {
|
||||||
|
|
||||||
opts := make([]obiformats.WithOption, 0, 10)
|
opts := make([]obiformats.WithOption, 0, 10)
|
||||||
|
|
||||||
@ -65,10 +65,10 @@ func WriteBioSequences(iterator obiseq.IBioSequence, filenames ...string) error
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func WriteBioSequencesBatch(iterator obiseq.IBioSequenceBatch,
|
func WriteBioSequencesBatch(iterator obiiter.IBioSequenceBatch,
|
||||||
terminalAction bool, filenames ...string) (obiseq.IBioSequenceBatch, error) {
|
terminalAction bool, filenames ...string) (obiiter.IBioSequenceBatch, error) {
|
||||||
|
|
||||||
var newIter obiseq.IBioSequenceBatch
|
var newIter obiiter.IBioSequenceBatch
|
||||||
|
|
||||||
opts := make([]obiformats.WithOption, 0, 10)
|
opts := make([]obiformats.WithOption, 0, 10)
|
||||||
|
|
||||||
@ -119,12 +119,12 @@ func WriteBioSequencesBatch(iterator obiseq.IBioSequenceBatch,
|
|||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("Write file error: %v", err)
|
log.Fatalf("Write file error: %v", err)
|
||||||
return obiseq.NilIBioSequenceBatch, err
|
return obiiter.NilIBioSequenceBatch, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if terminalAction {
|
if terminalAction {
|
||||||
newIter.Recycle()
|
newIter.Recycle()
|
||||||
return obiseq.NilIBioSequenceBatch, nil
|
return obiiter.NilIBioSequenceBatch, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return newIter, nil
|
return newIter, nil
|
||||||
|
@ -4,12 +4,12 @@ import (
|
|||||||
"log"
|
"log"
|
||||||
|
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
||||||
)
|
)
|
||||||
|
|
||||||
func DistributeSequence(sequences obiseq.IBioSequenceBatch) {
|
func DistributeSequence(sequences obiiter.IBioSequenceBatch) {
|
||||||
|
|
||||||
opts := make([]obiformats.WithOption, 0, 10)
|
opts := make([]obiformats.WithOption, 0, 10)
|
||||||
|
|
||||||
|
@ -3,13 +3,14 @@ package obimultiplex
|
|||||||
import (
|
import (
|
||||||
"log"
|
"log"
|
||||||
|
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obingslibrary"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obingslibrary"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
||||||
)
|
)
|
||||||
|
|
||||||
func IExtractBarcodeBatches(iterator obiseq.IBioSequenceBatch) (obiseq.IBioSequenceBatch, error) {
|
func IExtractBarcodeBatches(iterator obiiter.IBioSequenceBatch) (obiiter.IBioSequenceBatch, error) {
|
||||||
|
|
||||||
opts := make([]obingslibrary.WithOption, 0, 10)
|
opts := make([]obingslibrary.WithOption, 0, 10)
|
||||||
|
|
||||||
@ -36,7 +37,7 @@ func IExtractBarcodeBatches(iterator obiseq.IBioSequenceBatch) (obiseq.IBioSeque
|
|||||||
newIter = newIter.Rebatch(obioptions.CLIBatchSize())
|
newIter = newIter.Rebatch(obioptions.CLIBatchSize())
|
||||||
}
|
}
|
||||||
|
|
||||||
var unidentified obiseq.IBioSequenceBatch
|
var unidentified obiiter.IBioSequenceBatch
|
||||||
if CLIUnidentifiedFileName() != "" {
|
if CLIUnidentifiedFileName() != "" {
|
||||||
log.Printf("Unassigned sequences saved in file: %s\n", CLIUnidentifiedFileName())
|
log.Printf("Unassigned sequences saved in file: %s\n", CLIUnidentifiedFileName())
|
||||||
unidentified, newIter = newIter.DivideOn(obiseq.HasAttribute("demultiplex_error"),
|
unidentified, newIter = newIter.DivideOn(obiseq.HasAttribute("demultiplex_error"),
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
package obipairing
|
package obipairing
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
||||||
"github.com/DavidGamba/go-getoptions"
|
"github.com/DavidGamba/go-getoptions"
|
||||||
)
|
)
|
||||||
@ -47,15 +47,15 @@ func OptionSet(options *getoptions.GetOpt) {
|
|||||||
PairingOptionSet(options)
|
PairingOptionSet(options)
|
||||||
}
|
}
|
||||||
|
|
||||||
func IBatchPairedSequence() (obiseq.IPairedBioSequenceBatch, error) {
|
func IBatchPairedSequence() (obiiter.IPairedBioSequenceBatch, error) {
|
||||||
forward, err := obiconvert.ReadBioSequencesBatch(_ForwardFiles...)
|
forward, err := obiconvert.ReadBioSequencesBatch(_ForwardFiles...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return obiseq.NilIPairedBioSequenceBatch, err
|
return obiiter.NilIPairedBioSequenceBatch, err
|
||||||
}
|
}
|
||||||
|
|
||||||
reverse, err := obiconvert.ReadBioSequencesBatch(_ReverseFiles...)
|
reverse, err := obiconvert.ReadBioSequencesBatch(_ReverseFiles...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return obiseq.NilIPairedBioSequenceBatch, err
|
return obiiter.NilIPairedBioSequenceBatch, err
|
||||||
}
|
}
|
||||||
|
|
||||||
paired := forward.PairWith(reverse)
|
paired := forward.PairWith(reverse)
|
||||||
|
@ -7,6 +7,7 @@ import (
|
|||||||
"runtime"
|
"runtime"
|
||||||
|
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obialign"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obialign"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
"github.com/schollz/progressbar/v3"
|
"github.com/schollz/progressbar/v3"
|
||||||
)
|
)
|
||||||
@ -202,8 +203,10 @@ func AssemblePESequences(seqA, seqB *obiseq.BioSequence,
|
|||||||
// The function returns an iterator over batches of obiseq.Biosequence object.
|
// The function returns an iterator over batches of obiseq.Biosequence object.
|
||||||
// each pair of processed sequences produces one sequence in the result iterator.
|
// each pair of processed sequences produces one sequence in the result iterator.
|
||||||
//
|
//
|
||||||
func IAssemblePESequencesBatch(iterator obiseq.IPairedBioSequenceBatch,
|
func IAssemblePESequencesBatch(iterator obiiter.IPairedBioSequenceBatch,
|
||||||
gap float64, delta, minOverlap int, minIdentity float64, withStats bool, sizes ...int) obiseq.IBioSequenceBatch {
|
gap float64, delta, minOverlap int,
|
||||||
|
minIdentity float64,
|
||||||
|
withStats bool, sizes ...int) obiiter.IBioSequenceBatch {
|
||||||
|
|
||||||
nworkers := runtime.NumCPU() * 3 / 2
|
nworkers := runtime.NumCPU() * 3 / 2
|
||||||
buffsize := iterator.BufferSize()
|
buffsize := iterator.BufferSize()
|
||||||
@ -216,7 +219,7 @@ func IAssemblePESequencesBatch(iterator obiseq.IPairedBioSequenceBatch,
|
|||||||
buffsize = sizes[1]
|
buffsize = sizes[1]
|
||||||
}
|
}
|
||||||
|
|
||||||
newIter := obiseq.MakeIBioSequenceBatch(buffsize)
|
newIter := obiiter.MakeIBioSequenceBatch(buffsize)
|
||||||
|
|
||||||
newIter.Add(nworkers)
|
newIter.Add(nworkers)
|
||||||
|
|
||||||
@ -233,7 +236,7 @@ func IAssemblePESequencesBatch(iterator obiseq.IPairedBioSequenceBatch,
|
|||||||
progressbar.OptionShowIts(),
|
progressbar.OptionShowIts(),
|
||||||
progressbar.OptionSetDescription("[Sequence Pairing]"))
|
progressbar.OptionSetDescription("[Sequence Pairing]"))
|
||||||
|
|
||||||
f := func(iterator obiseq.IPairedBioSequenceBatch, wid int) {
|
f := func(iterator obiiter.IPairedBioSequenceBatch, wid int) {
|
||||||
arena := obialign.MakePEAlignArena(150, 150)
|
arena := obialign.MakePEAlignArena(150, 150)
|
||||||
|
|
||||||
for iterator.Next() {
|
for iterator.Next() {
|
||||||
@ -249,7 +252,7 @@ func IAssemblePESequencesBatch(iterator obiseq.IPairedBioSequenceBatch,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
bar.Add(batch.Length() - processed)
|
bar.Add(batch.Length() - processed)
|
||||||
newIter.Push(obiseq.MakeBioSequenceBatch(
|
newIter.Push(obiiter.MakeBioSequenceBatch(
|
||||||
batch.Order(),
|
batch.Order(),
|
||||||
cons,
|
cons,
|
||||||
))
|
))
|
||||||
|
@ -2,13 +2,13 @@ package obipcr
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiapat"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiapat"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||||
)
|
)
|
||||||
|
|
||||||
// PCR iterates over sequences provided by a obiseq.IBioSequenceBatch
|
// PCR iterates over sequences provided by a obiseq.IBioSequenceBatch
|
||||||
// and returns an other obiseq.IBioSequenceBatch distributing
|
// and returns an other obiseq.IBioSequenceBatch distributing
|
||||||
// obiseq.BioSequenceBatch containing the selected amplicon sequences.
|
// obiseq.BioSequenceBatch containing the selected amplicon sequences.
|
||||||
func PCR(iterator obiseq.IBioSequenceBatch) (obiseq.IBioSequenceBatch, error) {
|
func PCR(iterator obiiter.IBioSequenceBatch) (obiiter.IBioSequenceBatch, error) {
|
||||||
|
|
||||||
opts := make([]obiapat.WithOption, 0, 10)
|
opts := make([]obiapat.WithOption, 0, 10)
|
||||||
|
|
||||||
|
@ -4,11 +4,11 @@ import (
|
|||||||
"log"
|
"log"
|
||||||
|
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obichunk"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obichunk"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func Unique(sequences obiseq.IBioSequenceBatch) obiseq.IBioSequenceBatch {
|
func Unique(sequences obiiter.IBioSequenceBatch) obiiter.IBioSequenceBatch {
|
||||||
|
|
||||||
options := make([]obichunk.WithOption, 0, 30)
|
options := make([]obichunk.WithOption, 0, 30)
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user