mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Work on iterators and recycling of biosequences
This commit is contained in:
@ -13,6 +13,6 @@ func main() {
|
|||||||
|
|
||||||
_, args, _ := optionParser(os.Args)
|
_, args, _ := optionParser(os.Args)
|
||||||
|
|
||||||
fs, _ := obiconvert.ReadBioSequences(args...)
|
fs, _ := obiconvert.ReadBioSequencesBatch(args...)
|
||||||
obiconvert.WriteBioSequences(fs)
|
obiconvert.WriteBioSequencesBatch(fs,true)
|
||||||
}
|
}
|
||||||
|
@ -4,7 +4,6 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
"os"
|
"os"
|
||||||
"runtime/trace"
|
|
||||||
|
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obicount"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obicount"
|
||||||
@ -21,12 +20,12 @@ func main() {
|
|||||||
// pprof.StartCPUProfile(f)
|
// pprof.StartCPUProfile(f)
|
||||||
// defer pprof.StopCPUProfile()
|
// defer pprof.StopCPUProfile()
|
||||||
|
|
||||||
ftrace, err := os.Create("cpu.trace")
|
// ftrace, err := os.Create("cpu.trace")
|
||||||
if err != nil {
|
// if err != nil {
|
||||||
log.Fatal(err)
|
// log.Fatal(err)
|
||||||
}
|
// }
|
||||||
trace.Start(ftrace)
|
// trace.Start(ftrace)
|
||||||
defer trace.Stop()
|
// defer trace.Stop()
|
||||||
|
|
||||||
optionParser := obioptions.GenerateOptionParser(
|
optionParser := obioptions.GenerateOptionParser(
|
||||||
obiconvert.InputOptionSet,
|
obiconvert.InputOptionSet,
|
||||||
@ -47,6 +46,7 @@ func main() {
|
|||||||
nread += s.Count()
|
nread += s.Count()
|
||||||
nvariant++
|
nvariant++
|
||||||
nsymbol += s.Length()
|
nsymbol += s.Length()
|
||||||
|
(&s).Recycle()
|
||||||
}
|
}
|
||||||
|
|
||||||
if obicount.IsPrintingVariantCount() {
|
if obicount.IsPrintingVariantCount() {
|
||||||
|
@ -1,24 +1,22 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"log"
|
|
||||||
"os"
|
"os"
|
||||||
"runtime/pprof"
|
|
||||||
|
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats"
|
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obipairing"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obipairing"
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
|
|
||||||
// go tool pprof -http=":8000" ./obipairing ./cpu.pprof
|
// go tool pprof -http=":8000" ./obipairing ./cpu.pprof
|
||||||
f, err := os.Create("cpu.pprof")
|
// f, err := os.Create("cpu.pprof")
|
||||||
if err != nil {
|
// if err != nil {
|
||||||
log.Fatal(err)
|
// log.Fatal(err)
|
||||||
}
|
// }
|
||||||
pprof.StartCPUProfile(f)
|
// pprof.StartCPUProfile(f)
|
||||||
defer pprof.StopCPUProfile()
|
// defer pprof.StopCPUProfile()
|
||||||
|
|
||||||
// go tool trace cpu.trace
|
// go tool trace cpu.trace
|
||||||
// ftrace, err := os.Create("cpu.trace")
|
// ftrace, err := os.Create("cpu.trace")
|
||||||
@ -33,6 +31,5 @@ func main() {
|
|||||||
optionParser(os.Args)
|
optionParser(os.Args)
|
||||||
pairs, _ := obipairing.IBatchPairedSequence()
|
pairs, _ := obipairing.IBatchPairedSequence()
|
||||||
paired := obipairing.IAssemblePESequencesBatch(pairs, 2, 50, 20, true)
|
paired := obipairing.IAssemblePESequencesBatch(pairs, 2, 50, 20, true)
|
||||||
written, _ := obiformats.WriteFastqBatchToStdout(paired)
|
obiconvert.WriteBioSequencesBatch(paired, true)
|
||||||
written.Destroy()
|
|
||||||
}
|
}
|
||||||
|
@ -30,5 +30,5 @@ func main() {
|
|||||||
|
|
||||||
sequences, _ := obiconvert.ReadBioSequencesBatch(args...)
|
sequences, _ := obiconvert.ReadBioSequencesBatch(args...)
|
||||||
amplicons, _ := obipcr.PCR(sequences)
|
amplicons, _ := obipcr.PCR(sequences)
|
||||||
obiconvert.WriteBioSequences(amplicons)
|
obiconvert.WriteBioSequencesBatch(amplicons,true)
|
||||||
}
|
}
|
||||||
|
@ -59,4 +59,9 @@ func main() {
|
|||||||
sA.ReverseComplement(true)
|
sA.ReverseComplement(true)
|
||||||
fmt.Println(string(sA.Sequence()))
|
fmt.Println(string(sA.Sequence()))
|
||||||
fmt.Println(string(sA.Id()))
|
fmt.Println(string(sA.Id()))
|
||||||
|
|
||||||
|
sA.Reset()
|
||||||
|
fmt.Println(sA.Length())
|
||||||
|
fmt.Println(sA.String())
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -282,6 +282,11 @@ func PEAlign(seqA, seqB obiseq.BioSequence,
|
|||||||
_InitDNAScoreMatrix()
|
_InitDNAScoreMatrix()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// log.Println("==============")
|
||||||
|
// log.Println(seqA.String())
|
||||||
|
// log.Println(seqB.String())
|
||||||
|
// log.Println("--------------")
|
||||||
|
|
||||||
index := obikmer.Index4mer(seqA,
|
index := obikmer.Index4mer(seqA,
|
||||||
&arena.pointer.fastIndex,
|
&arena.pointer.fastIndex,
|
||||||
&arena.pointer.fastBuffer)
|
&arena.pointer.fastBuffer)
|
||||||
@ -294,6 +299,10 @@ func PEAlign(seqA, seqB obiseq.BioSequence,
|
|||||||
over = seqB.Length() + shift
|
over = seqB.Length() + shift
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// log.Println(seqA.String())
|
||||||
|
// log.Println(seqB.String())
|
||||||
|
// log.Printf("Shift : %d Score : %d Over : %d La : %d:%d Lb: %d:%d\n", shift, fastScore, over, seqA.Length(), len(seqA.Qualities()), seqB.Length(), len(seqB.Qualities()))
|
||||||
|
|
||||||
if fastScore+3 < over {
|
if fastScore+3 < over {
|
||||||
if shift > 0 {
|
if shift > 0 {
|
||||||
startA = shift - delta
|
startA = shift - delta
|
||||||
|
@ -241,7 +241,7 @@ func _Pcr(seq ApatSequence, sequence obiseq.BioSequence,
|
|||||||
|
|
||||||
match, _ := sequence.Subsequence(fm[0], fm[1], opt.pointer.circular)
|
match, _ := sequence.Subsequence(fm[0], fm[1], opt.pointer.circular)
|
||||||
annot["forward_match"] = match.String()
|
annot["forward_match"] = match.String()
|
||||||
match.Destroy()
|
(&match).Recycle()
|
||||||
|
|
||||||
annot["forward_error"] = erri
|
annot["forward_error"] = erri
|
||||||
|
|
||||||
@ -249,7 +249,7 @@ func _Pcr(seq ApatSequence, sequence obiseq.BioSequence,
|
|||||||
match, _ = sequence.Subsequence(rm[0], rm[1], opt.pointer.circular)
|
match, _ = sequence.Subsequence(rm[0], rm[1], opt.pointer.circular)
|
||||||
match = match.ReverseComplement(true)
|
match = match.ReverseComplement(true)
|
||||||
annot["reverse_match"] = match.String()
|
annot["reverse_match"] = match.String()
|
||||||
match.Destroy()
|
(&match).Recycle()
|
||||||
|
|
||||||
annot["reverse_error"] = errj
|
annot["reverse_error"] = errj
|
||||||
results = append(results, amplicon)
|
results = append(results, amplicon)
|
||||||
@ -315,14 +315,14 @@ func _Pcr(seq ApatSequence, sequence obiseq.BioSequence,
|
|||||||
match, _ := sequence.Subsequence(rm[0], rm[1], opt.pointer.circular)
|
match, _ := sequence.Subsequence(rm[0], rm[1], opt.pointer.circular)
|
||||||
match.ReverseComplement(true)
|
match.ReverseComplement(true)
|
||||||
annot["forward_match"] = match.String()
|
annot["forward_match"] = match.String()
|
||||||
match.Destroy()
|
(&match).Recycle()
|
||||||
|
|
||||||
annot["forward_error"] = errj
|
annot["forward_error"] = errj
|
||||||
|
|
||||||
annot["reverse_primer"] = reverse.String()
|
annot["reverse_primer"] = reverse.String()
|
||||||
match, _ = sequence.Subsequence(fm[0], fm[1], opt.pointer.circular)
|
match, _ = sequence.Subsequence(fm[0], fm[1], opt.pointer.circular)
|
||||||
annot["reverse_match"] = match.String()
|
annot["reverse_match"] = match.String()
|
||||||
match.Destroy()
|
(&match).Recycle()
|
||||||
|
|
||||||
annot["reverse_error"] = erri
|
annot["reverse_error"] = erri
|
||||||
results = append(results, amplicon)
|
results = append(results, amplicon)
|
||||||
|
@ -7,7 +7,6 @@ import (
|
|||||||
"log"
|
"log"
|
||||||
"os"
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
@ -82,26 +81,30 @@ func WriteFastaToStdout(iterator obiseq.IBioSequence, options ...WithOption) err
|
|||||||
return WriteFasta(iterator, os.Stdout, options...)
|
return WriteFasta(iterator, os.Stdout, options...)
|
||||||
}
|
}
|
||||||
|
|
||||||
func WriteFastaBatch(iterator obiseq.IBioSequenceBatch, file io.Writer, options ...WithOption) error {
|
func WriteFastaBatch(iterator obiseq.IBioSequenceBatch, file io.Writer, options ...WithOption) (obiseq.IBioSequenceBatch, error) {
|
||||||
|
opt := MakeOptions(options)
|
||||||
|
|
||||||
buffsize := iterator.BufferSize()
|
buffsize := iterator.BufferSize()
|
||||||
newIter := obiseq.MakeIBioSequenceBatch(buffsize)
|
newIter := obiseq.MakeIBioSequenceBatch(buffsize)
|
||||||
|
|
||||||
opt := MakeOptions(options)
|
nwriters := opt.ParallelWorkers()
|
||||||
nwriters := 4
|
|
||||||
|
|
||||||
chunkchan := make(chan FileChunck)
|
chunkchan := make(chan FileChunck)
|
||||||
chunkwait := sync.WaitGroup{}
|
|
||||||
|
|
||||||
header_format := opt.FormatFastSeqHeader()
|
header_format := opt.FormatFastSeqHeader()
|
||||||
|
|
||||||
chunkwait.Add(nwriters)
|
newIter.Add(nwriters)
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
chunkwait.Wait()
|
newIter.Wait()
|
||||||
for len(chunkchan) > 0 {
|
for len(chunkchan) > 0 {
|
||||||
time.Sleep(time.Millisecond)
|
time.Sleep(time.Millisecond)
|
||||||
}
|
}
|
||||||
close(chunkchan)
|
close(chunkchan)
|
||||||
|
for len(newIter.Channel()) > 0 {
|
||||||
|
time.Sleep(time.Millisecond)
|
||||||
|
}
|
||||||
|
close(newIter.Channel())
|
||||||
}()
|
}()
|
||||||
|
|
||||||
ff := func(iterator obiseq.IBioSequenceBatch) {
|
ff := func(iterator obiseq.IBioSequenceBatch) {
|
||||||
@ -116,9 +119,11 @@ func WriteFastaBatch(iterator obiseq.IBioSequenceBatch, file io.Writer, options
|
|||||||
newIter.Done()
|
newIter.Done()
|
||||||
}
|
}
|
||||||
|
|
||||||
for i := 0; i < nwriters; i++ {
|
log.Println("Start of the fasta file writing")
|
||||||
|
for i := 0; i < nwriters-1; i++ {
|
||||||
go ff(iterator.Split())
|
go ff(iterator.Split())
|
||||||
}
|
}
|
||||||
|
go ff(iterator)
|
||||||
|
|
||||||
next_to_send := 0
|
next_to_send := 0
|
||||||
received := make(map[int]FileChunck, 100)
|
received := make(map[int]FileChunck, 100)
|
||||||
@ -142,22 +147,22 @@ func WriteFastaBatch(iterator obiseq.IBioSequenceBatch, file io.Writer, options
|
|||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
return nil
|
return newIter, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func WriteFastaBatchToStdout(iterator obiseq.IBioSequenceBatch, options ...WithOption) error {
|
func WriteFastaBatchToStdout(iterator obiseq.IBioSequenceBatch, options ...WithOption) (obiseq.IBioSequenceBatch, error) {
|
||||||
return WriteFastaBatch(iterator, os.Stdout, options...)
|
return WriteFastaBatch(iterator, os.Stdout, options...)
|
||||||
}
|
}
|
||||||
|
|
||||||
func WriteFastaBatchToFile(iterator obiseq.IBioSequenceBatch,
|
func WriteFastaBatchToFile(iterator obiseq.IBioSequenceBatch,
|
||||||
filename string,
|
filename string,
|
||||||
options ...WithOption) error {
|
options ...WithOption) (obiseq.IBioSequenceBatch, error) {
|
||||||
|
|
||||||
file, err := os.Create(filename)
|
file, err := os.Create(filename)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("open file error: %v", err)
|
log.Fatalf("open file error: %v", err)
|
||||||
return err
|
return obiseq.NilIBioSequenceBatch, err
|
||||||
}
|
}
|
||||||
|
|
||||||
return WriteFastaBatch(iterator, file, options...)
|
return WriteFastaBatch(iterator, file, options...)
|
||||||
|
@ -82,11 +82,12 @@ type FileChunck struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func WriteFastqBatch(iterator obiseq.IBioSequenceBatch, file io.Writer, options ...WithOption) (obiseq.IBioSequenceBatch, error) {
|
func WriteFastqBatch(iterator obiseq.IBioSequenceBatch, file io.Writer, options ...WithOption) (obiseq.IBioSequenceBatch, error) {
|
||||||
|
opt := MakeOptions(options)
|
||||||
|
|
||||||
buffsize := iterator.BufferSize()
|
buffsize := iterator.BufferSize()
|
||||||
newIter := obiseq.MakeIBioSequenceBatch(buffsize)
|
newIter := obiseq.MakeIBioSequenceBatch(buffsize)
|
||||||
|
|
||||||
opt := MakeOptions(options)
|
nwriters := opt.ParallelWorkers()
|
||||||
nwriters := 4
|
|
||||||
|
|
||||||
chunkchan := make(chan FileChunck)
|
chunkchan := make(chan FileChunck)
|
||||||
|
|
||||||
@ -110,19 +111,21 @@ func WriteFastqBatch(iterator obiseq.IBioSequenceBatch, file io.Writer, options
|
|||||||
ff := func(iterator obiseq.IBioSequenceBatch) {
|
ff := func(iterator obiseq.IBioSequenceBatch) {
|
||||||
for iterator.Next() {
|
for iterator.Next() {
|
||||||
batch := iterator.Get()
|
batch := iterator.Get()
|
||||||
chunkchan <- FileChunck{
|
chunk := FileChunck{
|
||||||
FormatFastqBatch(batch, quality, header_format),
|
FormatFastqBatch(batch, quality, header_format),
|
||||||
batch.Order(),
|
batch.Order(),
|
||||||
}
|
}
|
||||||
|
chunkchan <- chunk
|
||||||
newIter.Channel() <- batch
|
newIter.Channel() <- batch
|
||||||
}
|
}
|
||||||
newIter.Done()
|
newIter.Done()
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Println("Start of the fastq file reading")
|
log.Println("Start of the fastq file writing")
|
||||||
for i := 0; i < nwriters; i++ {
|
for i := 0; i < nwriters-1; i++ {
|
||||||
go ff(iterator.Split())
|
go ff(iterator.Split())
|
||||||
}
|
}
|
||||||
|
go ff(iterator)
|
||||||
|
|
||||||
next_to_send := 0
|
next_to_send := 0
|
||||||
received := make(map[int]FileChunck, 100)
|
received := make(map[int]FileChunck, 100)
|
||||||
|
@ -52,28 +52,45 @@ func WriteSequencesToStdout(iterator obiseq.IBioSequence, options ...WithOption)
|
|||||||
return WriteSequences(iterator, os.Stdout, options...)
|
return WriteSequences(iterator, os.Stdout, options...)
|
||||||
}
|
}
|
||||||
|
|
||||||
// func WriteSequenceBatch(iterator obiseq.IBioSequenceBatch,
|
func WriteSequenceBatch(iterator obiseq.IBioSequenceBatch,
|
||||||
// file io.Writer,
|
file io.Writer,
|
||||||
// options ...WithOption) error {
|
options ...WithOption) (obiseq.IBioSequenceBatch,error) {
|
||||||
|
|
||||||
// opts := MakeOptions(options)
|
var newIter obiseq.IBioSequenceBatch
|
||||||
|
var err error
|
||||||
|
|
||||||
// header_format := opts.FormatFastSeqHeader()
|
ok := iterator.Next()
|
||||||
// quality := opts.QualityShift()
|
|
||||||
|
|
||||||
// ok := iterator.Next()
|
if ok {
|
||||||
|
iterator.PushBack()
|
||||||
|
batch := iterator.Get()
|
||||||
|
if batch.Slice()[0].HasQualities() {
|
||||||
|
newIter,err = WriteFastqBatch(iterator, file, options...)
|
||||||
|
} else {
|
||||||
|
newIter,err = WriteFastaBatch(iterator, file, options...)
|
||||||
|
}
|
||||||
|
|
||||||
// if ok {
|
return newIter,err
|
||||||
// batch := iterator.Get()
|
}
|
||||||
// if batch.Slice()[0].HasQualities() {
|
|
||||||
// file.Write()
|
|
||||||
// fmt.Fprintln(file, FormatFastq(seq, quality, header_format))
|
|
||||||
// WriteFastq(iterator, file, options...)
|
|
||||||
// } else {
|
|
||||||
// fmt.Fprintln(file, FormatFasta(seq, header_format))
|
|
||||||
// WriteFasta(iterator, file, options...)
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
// return nil
|
return obiseq.NilIBioSequenceBatch,fmt.Errorf("input iterator not ready")
|
||||||
// }
|
}
|
||||||
|
|
||||||
|
func WriteSequencesBatchToStdout(iterator obiseq.IBioSequenceBatch,
|
||||||
|
options ...WithOption) (obiseq.IBioSequenceBatch,error) {
|
||||||
|
return WriteSequenceBatch(iterator, os.Stdout, options...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func WriteSequencesBatchToFile(iterator obiseq.IBioSequenceBatch,
|
||||||
|
filename string,
|
||||||
|
options ...WithOption) (obiseq.IBioSequenceBatch,error) {
|
||||||
|
|
||||||
|
file, err := os.Create(filename)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("open file error: %v", err)
|
||||||
|
return obiseq.NilIBioSequenceBatch, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return WriteSequenceBatch(iterator, file, options...)
|
||||||
|
}
|
||||||
|
@ -3,18 +3,26 @@ package obioptions
|
|||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
|
"runtime"
|
||||||
|
|
||||||
"github.com/DavidGamba/go-getoptions"
|
"github.com/DavidGamba/go-getoptions"
|
||||||
)
|
)
|
||||||
|
|
||||||
var __debug__ = false
|
var _Debug = false
|
||||||
|
var _ParallelWorkers = runtime.NumCPU() - 1
|
||||||
|
var _BufferSize = 1
|
||||||
|
var _BatchSize = 5000
|
||||||
|
|
||||||
type ArgumentParser func([]string) (*getoptions.GetOpt, []string, error)
|
type ArgumentParser func([]string) (*getoptions.GetOpt, []string, error)
|
||||||
|
|
||||||
func GenerateOptionParser(optionset ...func(*getoptions.GetOpt)) ArgumentParser {
|
func GenerateOptionParser(optionset ...func(*getoptions.GetOpt)) ArgumentParser {
|
||||||
options := getoptions.New()
|
options := getoptions.New()
|
||||||
options.Bool("help", false, options.Alias("h", "?"))
|
options.Bool("help", false, options.Alias("h", "?"))
|
||||||
options.BoolVar(&__debug__, "debug", false)
|
options.BoolVar(&_Debug, "debug", false)
|
||||||
|
|
||||||
|
options.IntVar(&_ParallelWorkers, "workers", runtime.NumCPU()-1,
|
||||||
|
options.Alias("w"),
|
||||||
|
options.Description("Number of parallele threads computing the result"))
|
||||||
|
|
||||||
for _, o := range optionset {
|
for _, o := range optionset {
|
||||||
o(options)
|
o(options)
|
||||||
@ -32,15 +40,33 @@ func GenerateOptionParser(optionset ...func(*getoptions.GetOpt)) ArgumentParser
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Predicate indicating if the debug mode is activated
|
// Predicate indicating if the debug mode is activated.
|
||||||
func IsDebugMode() bool {
|
func IsDebugMode() bool {
|
||||||
return __debug__
|
return _Debug
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ParallelWorkers returns the number of parallel workers requested by
|
||||||
|
// the command line option --workers|-w.
|
||||||
|
func ParallelWorkers() int {
|
||||||
|
return _ParallelWorkers
|
||||||
|
}
|
||||||
|
|
||||||
|
// BufferSize returns the expeted channel buffer size for obitools
|
||||||
|
func BufferSize() int {
|
||||||
|
return _BufferSize
|
||||||
|
}
|
||||||
|
|
||||||
|
// BatchSize returns the expeted size of the sequence batches
|
||||||
|
func BatchSize() int {
|
||||||
|
return _BatchSize
|
||||||
|
}
|
||||||
|
|
||||||
|
// DebugOn sets the debug mode on.
|
||||||
func DebugOn() {
|
func DebugOn() {
|
||||||
__debug__ = true
|
_Debug = true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// DebugOff sets the debug mode off.
|
||||||
func DebugOff() {
|
func DebugOff() {
|
||||||
__debug__ = false
|
_Debug = false
|
||||||
}
|
}
|
||||||
|
@ -39,6 +39,7 @@ func (batch BioSequenceBatch) IsNil() bool {
|
|||||||
type __ibiosequencebatch__ struct {
|
type __ibiosequencebatch__ struct {
|
||||||
channel chan BioSequenceBatch
|
channel chan BioSequenceBatch
|
||||||
current BioSequenceBatch
|
current BioSequenceBatch
|
||||||
|
pushBack bool
|
||||||
all_done *sync.WaitGroup
|
all_done *sync.WaitGroup
|
||||||
buffer_size int
|
buffer_size int
|
||||||
finished bool
|
finished bool
|
||||||
@ -61,9 +62,11 @@ func MakeIBioSequenceBatch(sizes ...int) IBioSequenceBatch {
|
|||||||
i := __ibiosequencebatch__{
|
i := __ibiosequencebatch__{
|
||||||
channel: make(chan BioSequenceBatch, buffsize),
|
channel: make(chan BioSequenceBatch, buffsize),
|
||||||
current: NilBioSequenceBatch,
|
current: NilBioSequenceBatch,
|
||||||
|
pushBack: false,
|
||||||
buffer_size: buffsize,
|
buffer_size: buffsize,
|
||||||
finished: false,
|
finished: false,
|
||||||
p_finished: nil}
|
p_finished: nil,
|
||||||
|
}
|
||||||
i.p_finished = &i.finished
|
i.p_finished = &i.finished
|
||||||
waiting := sync.WaitGroup{}
|
waiting := sync.WaitGroup{}
|
||||||
i.all_done = &waiting
|
i.all_done = &waiting
|
||||||
@ -99,6 +102,7 @@ func (iterator IBioSequenceBatch) Split() IBioSequenceBatch {
|
|||||||
i := __ibiosequencebatch__{
|
i := __ibiosequencebatch__{
|
||||||
channel: iterator.pointer.channel,
|
channel: iterator.pointer.channel,
|
||||||
current: NilBioSequenceBatch,
|
current: NilBioSequenceBatch,
|
||||||
|
pushBack: false,
|
||||||
all_done: iterator.pointer.all_done,
|
all_done: iterator.pointer.all_done,
|
||||||
buffer_size: iterator.pointer.buffer_size,
|
buffer_size: iterator.pointer.buffer_size,
|
||||||
finished: false,
|
finished: false,
|
||||||
@ -111,6 +115,12 @@ func (iterator IBioSequenceBatch) Next() bool {
|
|||||||
if *(iterator.pointer.p_finished) {
|
if *(iterator.pointer.p_finished) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if iterator.pointer.pushBack {
|
||||||
|
iterator.pointer.pushBack = false
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
next, ok := (<-iterator.pointer.channel)
|
next, ok := (<-iterator.pointer.channel)
|
||||||
|
|
||||||
if ok {
|
if ok {
|
||||||
@ -123,6 +133,12 @@ func (iterator IBioSequenceBatch) Next() bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (iterator IBioSequenceBatch) PushBack() {
|
||||||
|
if !iterator.pointer.current.IsNil() {
|
||||||
|
iterator.pointer.pushBack = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// The 'Get' method returns the instance of BioSequenceBatch
|
// The 'Get' method returns the instance of BioSequenceBatch
|
||||||
// currently pointed by the iterator. You have to use the
|
// currently pointed by the iterator. You have to use the
|
||||||
// 'Next' method to move to the next entry before calling
|
// 'Next' method to move to the next entry before calling
|
||||||
@ -303,14 +319,14 @@ func (iterator IBioSequenceBatch) Rebatch(size int, sizes ...int) IBioSequenceBa
|
|||||||
return newIter
|
return newIter
|
||||||
}
|
}
|
||||||
|
|
||||||
func (iterator IBioSequenceBatch) Destroy() {
|
func (iterator IBioSequenceBatch) Recycle() {
|
||||||
|
|
||||||
log.Println("Start recycling of Bioseq objects")
|
log.Println("Start recycling of Bioseq objects")
|
||||||
|
|
||||||
for iterator.Next() {
|
for iterator.Next() {
|
||||||
batch := iterator.Get()
|
batch := iterator.Get()
|
||||||
for _, seq := range batch.Slice() {
|
for _, seq := range batch.Slice() {
|
||||||
(&seq).Destroy()
|
(&seq).Recycle()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
log.Println("End of the recycling of Bioseq objects")
|
log.Println("End of the recycling of Bioseq objects")
|
||||||
|
@ -44,7 +44,7 @@ func (s BioSequence) IsNil() bool {
|
|||||||
return s.sequence == nil
|
return s.sequence == nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s BioSequence) Reset() {
|
func (s *BioSequence) Reset() {
|
||||||
s.sequence.id.Reset()
|
s.sequence.id.Reset()
|
||||||
s.sequence.definition.Reset()
|
s.sequence.definition.Reset()
|
||||||
s.sequence.sequence.Reset()
|
s.sequence.sequence.Reset()
|
||||||
@ -168,6 +168,10 @@ func (s BioSequence) SetQualities(qualities Quality) {
|
|||||||
s.sequence.qualities.Write(qualities)
|
s.sequence.qualities.Write(qualities)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s BioSequence) WriteQualities(data []byte) (int, error) {
|
||||||
|
return s.sequence.qualities.Write(data)
|
||||||
|
}
|
||||||
|
|
||||||
func (s BioSequence) Write(data []byte) (int, error) {
|
func (s BioSequence) Write(data []byte) (int, error) {
|
||||||
return s.sequence.sequence.Write(data)
|
return s.sequence.sequence.Write(data)
|
||||||
}
|
}
|
||||||
|
@ -10,10 +10,11 @@ import (
|
|||||||
type __ibiosequence__ struct {
|
type __ibiosequence__ struct {
|
||||||
channel chan BioSequence
|
channel chan BioSequence
|
||||||
current BioSequence
|
current BioSequence
|
||||||
|
pushBack bool
|
||||||
all_done *sync.WaitGroup
|
all_done *sync.WaitGroup
|
||||||
buffer_size int
|
buffer_size int
|
||||||
finished bool
|
finished bool
|
||||||
p_finished *bool
|
pFinished *bool
|
||||||
}
|
}
|
||||||
|
|
||||||
type IBioSequence struct {
|
type IBioSequence struct {
|
||||||
@ -55,10 +56,13 @@ func MakeIBioSequence(sizes ...int) IBioSequence {
|
|||||||
i := __ibiosequence__{
|
i := __ibiosequence__{
|
||||||
channel: make(chan BioSequence, buffsize),
|
channel: make(chan BioSequence, buffsize),
|
||||||
current: NilBioSequence,
|
current: NilBioSequence,
|
||||||
|
pushBack: false,
|
||||||
buffer_size: buffsize,
|
buffer_size: buffsize,
|
||||||
finished: false,
|
finished: false,
|
||||||
p_finished: nil}
|
pFinished: nil,
|
||||||
i.p_finished = &i.finished
|
}
|
||||||
|
|
||||||
|
i.pFinished = &i.finished
|
||||||
waiting := sync.WaitGroup{}
|
waiting := sync.WaitGroup{}
|
||||||
i.all_done = &waiting
|
i.all_done = &waiting
|
||||||
ii := IBioSequence{&i}
|
ii := IBioSequence{&i}
|
||||||
@ -66,23 +70,32 @@ func MakeIBioSequence(sizes ...int) IBioSequence {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (iterator IBioSequence) Split() IBioSequence {
|
func (iterator IBioSequence) Split() IBioSequence {
|
||||||
|
|
||||||
i := __ibiosequence__{
|
i := __ibiosequence__{
|
||||||
channel: iterator.pointer.channel,
|
channel: iterator.pointer.channel,
|
||||||
current: NilBioSequence,
|
current: NilBioSequence,
|
||||||
|
pushBack: false,
|
||||||
finished: false,
|
finished: false,
|
||||||
all_done: iterator.pointer.all_done,
|
all_done: iterator.pointer.all_done,
|
||||||
buffer_size: iterator.pointer.buffer_size,
|
buffer_size: iterator.pointer.buffer_size,
|
||||||
p_finished: iterator.pointer.p_finished}
|
pFinished: iterator.pointer.pFinished,
|
||||||
|
}
|
||||||
|
|
||||||
newIter := IBioSequence{&i}
|
newIter := IBioSequence{&i}
|
||||||
return newIter
|
return newIter
|
||||||
}
|
}
|
||||||
|
|
||||||
func (iterator IBioSequence) Next() bool {
|
func (iterator IBioSequence) Next() bool {
|
||||||
if iterator.IsNil() || *(iterator.pointer.p_finished) {
|
if iterator.IsNil() || *(iterator.pointer.pFinished) {
|
||||||
iterator.pointer.current = NilBioSequence
|
iterator.pointer.current = NilBioSequence
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if iterator.pointer.pushBack {
|
||||||
|
iterator.pointer.pushBack = false
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
next, ok := (<-iterator.pointer.channel)
|
next, ok := (<-iterator.pointer.channel)
|
||||||
|
|
||||||
if ok {
|
if ok {
|
||||||
@ -91,10 +104,16 @@ func (iterator IBioSequence) Next() bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
iterator.pointer.current = NilBioSequence
|
iterator.pointer.current = NilBioSequence
|
||||||
*iterator.pointer.p_finished = true
|
*iterator.pointer.pFinished = true
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (iterator IBioSequence) PushBack() {
|
||||||
|
if !iterator.pointer.current.IsNil() {
|
||||||
|
iterator.pointer.pushBack = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// The 'Get' method returns the instance of BioSequence
|
// The 'Get' method returns the instance of BioSequence
|
||||||
// currently pointed by the iterator. You have to use the
|
// currently pointed by the iterator. You have to use the
|
||||||
// 'Next' method to move to the next entry before calling
|
// 'Next' method to move to the next entry before calling
|
||||||
@ -106,7 +125,7 @@ func (iterator IBioSequence) Get() BioSequence {
|
|||||||
// Finished returns 'true' value if no more data is available
|
// Finished returns 'true' value if no more data is available
|
||||||
// from the iterator.
|
// from the iterator.
|
||||||
func (iterator IBioSequence) Finished() bool {
|
func (iterator IBioSequence) Finished() bool {
|
||||||
return *iterator.pointer.p_finished
|
return *iterator.pointer.pFinished
|
||||||
}
|
}
|
||||||
|
|
||||||
func (iterator IBioSequence) BufferSize() int {
|
func (iterator IBioSequence) BufferSize() int {
|
||||||
|
@ -55,6 +55,7 @@ func (batch PairedBioSequenceBatch) IsNil() bool {
|
|||||||
type __ipairedbiosequencebatch__ struct {
|
type __ipairedbiosequencebatch__ struct {
|
||||||
channel chan PairedBioSequenceBatch
|
channel chan PairedBioSequenceBatch
|
||||||
current PairedBioSequenceBatch
|
current PairedBioSequenceBatch
|
||||||
|
pushBack bool
|
||||||
all_done *sync.WaitGroup
|
all_done *sync.WaitGroup
|
||||||
buffer_size int
|
buffer_size int
|
||||||
finished bool
|
finished bool
|
||||||
@ -77,9 +78,12 @@ func MakeIPairedBioSequenceBatch(sizes ...int) IPairedBioSequenceBatch {
|
|||||||
i := __ipairedbiosequencebatch__{
|
i := __ipairedbiosequencebatch__{
|
||||||
channel: make(chan PairedBioSequenceBatch, buffsize),
|
channel: make(chan PairedBioSequenceBatch, buffsize),
|
||||||
current: NilPairedBioSequenceBatch,
|
current: NilPairedBioSequenceBatch,
|
||||||
|
pushBack: false,
|
||||||
buffer_size: buffsize,
|
buffer_size: buffsize,
|
||||||
finished: false,
|
finished: false,
|
||||||
p_finished: nil}
|
p_finished: nil,
|
||||||
|
}
|
||||||
|
|
||||||
i.p_finished = &i.finished
|
i.p_finished = &i.finished
|
||||||
waiting := sync.WaitGroup{}
|
waiting := sync.WaitGroup{}
|
||||||
i.all_done = &waiting
|
i.all_done = &waiting
|
||||||
@ -115,6 +119,7 @@ func (iterator IPairedBioSequenceBatch) Split() IPairedBioSequenceBatch {
|
|||||||
i := __ipairedbiosequencebatch__{
|
i := __ipairedbiosequencebatch__{
|
||||||
channel: iterator.pointer.channel,
|
channel: iterator.pointer.channel,
|
||||||
current: NilPairedBioSequenceBatch,
|
current: NilPairedBioSequenceBatch,
|
||||||
|
pushBack: false,
|
||||||
all_done: iterator.pointer.all_done,
|
all_done: iterator.pointer.all_done,
|
||||||
buffer_size: iterator.pointer.buffer_size,
|
buffer_size: iterator.pointer.buffer_size,
|
||||||
finished: false,
|
finished: false,
|
||||||
@ -127,6 +132,12 @@ func (iterator IPairedBioSequenceBatch) Next() bool {
|
|||||||
if *(iterator.pointer.p_finished) {
|
if *(iterator.pointer.p_finished) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if iterator.pointer.pushBack {
|
||||||
|
iterator.pointer.pushBack = false
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
next, ok := (<-iterator.pointer.channel)
|
next, ok := (<-iterator.pointer.channel)
|
||||||
|
|
||||||
if ok {
|
if ok {
|
||||||
@ -139,6 +150,12 @@ func (iterator IPairedBioSequenceBatch) Next() bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (iterator IPairedBioSequenceBatch) PushBack() {
|
||||||
|
if !iterator.pointer.current.IsNil() {
|
||||||
|
iterator.pointer.pushBack = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// The 'Get' method returns the instance of BioSequenceBatch
|
// The 'Get' method returns the instance of BioSequenceBatch
|
||||||
// currently pointed by the iterator. You have to use the
|
// currently pointed by the iterator. You have to use the
|
||||||
// 'Next' method to move to the next entry before calling
|
// 'Next' method to move to the next entry before calling
|
||||||
|
@ -14,7 +14,6 @@ var __bioseq__pool__ = sync.Pool{
|
|||||||
|
|
||||||
func MakeEmptyBioSequence() BioSequence {
|
func MakeEmptyBioSequence() BioSequence {
|
||||||
bs := BioSequence{__bioseq__pool__.Get().(*__sequence__)}
|
bs := BioSequence{__bioseq__pool__.Get().(*__sequence__)}
|
||||||
bs.Reset()
|
|
||||||
return bs
|
return bs
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -23,12 +22,13 @@ func MakeBioSequence(id string,
|
|||||||
definition string) BioSequence {
|
definition string) BioSequence {
|
||||||
bs := MakeEmptyBioSequence()
|
bs := MakeEmptyBioSequence()
|
||||||
bs.SetId(id)
|
bs.SetId(id)
|
||||||
bs.SetSequence(sequence)
|
bs.Write(sequence)
|
||||||
bs.SetDefinition(definition)
|
bs.SetDefinition(definition)
|
||||||
return bs
|
return bs
|
||||||
}
|
}
|
||||||
|
|
||||||
func (sequence *BioSequence) Destroy() {
|
func (sequence *BioSequence) Recycle() {
|
||||||
|
sequence.Reset()
|
||||||
__bioseq__pool__.Put(sequence.sequence)
|
__bioseq__pool__.Put(sequence.sequence)
|
||||||
sequence.sequence = nil
|
sequence.sequence = nil
|
||||||
}
|
}
|
||||||
|
@ -84,9 +84,10 @@ func (iterator IBioSequenceBatch) MakeIWorker(worker SeqWorker, sizes ...int) IB
|
|||||||
}
|
}
|
||||||
|
|
||||||
log.Println("Start of the batch workers")
|
log.Println("Start of the batch workers")
|
||||||
for i := 0; i < nworkers; i++ {
|
for i := 0; i < nworkers-1; i++ {
|
||||||
go f(iterator.Split())
|
go f(iterator.Split())
|
||||||
}
|
}
|
||||||
|
go f(iterator)
|
||||||
|
|
||||||
return newIter
|
return newIter
|
||||||
}
|
}
|
||||||
@ -126,9 +127,10 @@ func (iterator IBioSequenceBatch) MakeISliceWorker(worker SeqSliceWorker, sizes
|
|||||||
}
|
}
|
||||||
|
|
||||||
log.Println("Start of the batch slice workers")
|
log.Println("Start of the batch slice workers")
|
||||||
for i := 0; i < nworkers; i++ {
|
for i := 0; i < nworkers - 1; i++ {
|
||||||
go f(iterator.Split())
|
go f(iterator.Split())
|
||||||
}
|
}
|
||||||
|
go f(iterator)
|
||||||
|
|
||||||
return newIter
|
return newIter
|
||||||
}
|
}
|
||||||
|
@ -7,10 +7,11 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
)
|
)
|
||||||
|
|
||||||
func __expand_list_of_files__(check_ext bool, filenames ...string) ([]string, error) {
|
func _ExpandListOfFiles(check_ext bool, filenames ...string) ([]string, error) {
|
||||||
var err error
|
var err error
|
||||||
list_of_files := make([]string, 0, 100)
|
list_of_files := make([]string, 0, 100)
|
||||||
for _, fn := range filenames {
|
for _, fn := range filenames {
|
||||||
@ -32,7 +33,7 @@ func __expand_list_of_files__(check_ext bool, filenames ...string) ([]string, er
|
|||||||
|
|
||||||
if info.IsDir() {
|
if info.IsDir() {
|
||||||
if path != fn {
|
if path != fn {
|
||||||
subdir, e := __expand_list_of_files__(true, path)
|
subdir, e := _ExpandListOfFiles(true, path)
|
||||||
if e != nil {
|
if e != nil {
|
||||||
return e
|
return e
|
||||||
}
|
}
|
||||||
@ -80,6 +81,15 @@ func ReadBioSequencesBatch(filenames ...string) (obiseq.IBioSequenceBatch, error
|
|||||||
opts = append(opts, obiformats.OptionsFastSeqHeaderParser(obiformats.ParseGuessedFastSeqHeader))
|
opts = append(opts, obiformats.OptionsFastSeqHeaderParser(obiformats.ParseGuessedFastSeqHeader))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
nworkers := obioptions.ParallelWorkers() / 4
|
||||||
|
if nworkers < 2 {
|
||||||
|
nworkers = 2
|
||||||
|
}
|
||||||
|
|
||||||
|
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers))
|
||||||
|
opts = append(opts, obiformats.OptionsBufferSize(obioptions.BufferSize()))
|
||||||
|
opts = append(opts, obiformats.OptionsBatchSize(obioptions.BatchSize()))
|
||||||
|
|
||||||
opts = append(opts, obiformats.OptionsQualityShift(InputQualityShift()))
|
opts = append(opts, obiformats.OptionsQualityShift(InputQualityShift()))
|
||||||
|
|
||||||
if len(filenames) == 0 {
|
if len(filenames) == 0 {
|
||||||
@ -94,7 +104,7 @@ func ReadBioSequencesBatch(filenames ...string) (obiseq.IBioSequenceBatch, error
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
list_of_files, err := __expand_list_of_files__(false, filenames...)
|
list_of_files, err := _ExpandListOfFiles(false, filenames...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return obiseq.NilIBioSequenceBatch, err
|
return obiseq.NilIBioSequenceBatch, err
|
||||||
}
|
}
|
||||||
|
@ -4,6 +4,7 @@ import (
|
|||||||
"log"
|
"log"
|
||||||
|
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats"
|
||||||
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -23,6 +24,15 @@ func WriteBioSequences(iterator obiseq.IBioSequence, filenames ...string) error
|
|||||||
opts = append(opts, obiformats.OptionsFastSeqHeaderFormat(obiformats.FormatFastSeqJsonHeader))
|
opts = append(opts, obiformats.OptionsFastSeqHeaderFormat(obiformats.FormatFastSeqJsonHeader))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
nworkers := obioptions.ParallelWorkers() / 4
|
||||||
|
if nworkers < 2 {
|
||||||
|
nworkers = 2
|
||||||
|
}
|
||||||
|
|
||||||
|
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers))
|
||||||
|
opts = append(opts, obiformats.OptionsBufferSize(obioptions.BufferSize()))
|
||||||
|
opts = append(opts, obiformats.OptionsBatchSize(obioptions.BatchSize()))
|
||||||
|
|
||||||
opts = append(opts, obiformats.OptionsQualityShift(OutputQualityShift()))
|
opts = append(opts, obiformats.OptionsQualityShift(OutputQualityShift()))
|
||||||
|
|
||||||
var err error
|
var err error
|
||||||
@ -54,3 +64,68 @@ func WriteBioSequences(iterator obiseq.IBioSequence, filenames ...string) error
|
|||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func WriteBioSequencesBatch(iterator obiseq.IBioSequenceBatch,
|
||||||
|
terminalAction bool, filenames ...string) (obiseq.IBioSequenceBatch, error) {
|
||||||
|
|
||||||
|
var newIter obiseq.IBioSequenceBatch
|
||||||
|
|
||||||
|
opts := make([]obiformats.WithOption, 0, 10)
|
||||||
|
|
||||||
|
switch OutputFastHeaderFormat() {
|
||||||
|
case "json":
|
||||||
|
log.Println("On output use JSON headers")
|
||||||
|
opts = append(opts, obiformats.OptionsFastSeqHeaderFormat(obiformats.FormatFastSeqJsonHeader))
|
||||||
|
case "obi":
|
||||||
|
log.Println("On output use OBI headers")
|
||||||
|
opts = append(opts, obiformats.OptionsFastSeqHeaderFormat(obiformats.FormatFastSeqOBIHeader))
|
||||||
|
default:
|
||||||
|
log.Println("On output use JSON headers")
|
||||||
|
opts = append(opts, obiformats.OptionsFastSeqHeaderFormat(obiformats.FormatFastSeqJsonHeader))
|
||||||
|
}
|
||||||
|
|
||||||
|
nworkers := obioptions.ParallelWorkers() / 4
|
||||||
|
if nworkers < 2 {
|
||||||
|
nworkers = 2
|
||||||
|
}
|
||||||
|
|
||||||
|
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers))
|
||||||
|
opts = append(opts, obiformats.OptionsBufferSize(obioptions.BufferSize()))
|
||||||
|
opts = append(opts, obiformats.OptionsBatchSize(obioptions.BatchSize()))
|
||||||
|
|
||||||
|
opts = append(opts, obiformats.OptionsQualityShift(OutputQualityShift()))
|
||||||
|
|
||||||
|
var err error
|
||||||
|
|
||||||
|
if len(filenames) == 0 {
|
||||||
|
switch OutputFormat() {
|
||||||
|
case "fastq":
|
||||||
|
newIter, err = obiformats.WriteFastqBatchToStdout(iterator, opts...)
|
||||||
|
case "fasta":
|
||||||
|
newIter, err = obiformats.WriteFastaBatchToStdout(iterator, opts...)
|
||||||
|
default:
|
||||||
|
newIter, err = obiformats.WriteSequencesBatchToStdout(iterator, opts...)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
switch OutputFormat() {
|
||||||
|
case "fastq":
|
||||||
|
newIter, err = obiformats.WriteFastqBatchToFile(iterator, filenames[0], opts...)
|
||||||
|
case "fasta":
|
||||||
|
newIter, err = obiformats.WriteFastaBatchToFile(iterator, filenames[0], opts...)
|
||||||
|
default:
|
||||||
|
newIter, err = obiformats.WriteSequencesBatchToFile(iterator, filenames[0], opts...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Write file error: %v", err)
|
||||||
|
return obiseq.NilIBioSequenceBatch, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if terminalAction {
|
||||||
|
newIter.Recycle()
|
||||||
|
return obiseq.NilIBioSequenceBatch, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return newIter, nil
|
||||||
|
}
|
||||||
|
@ -4,6 +4,7 @@ import (
|
|||||||
"log"
|
"log"
|
||||||
"math"
|
"math"
|
||||||
"os"
|
"os"
|
||||||
|
"runtime"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obialign"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obialign"
|
||||||
@ -11,34 +12,40 @@ import (
|
|||||||
"github.com/schollz/progressbar/v3"
|
"github.com/schollz/progressbar/v3"
|
||||||
)
|
)
|
||||||
|
|
||||||
func __abs__(x int) int {
|
func _Abs(x int) int {
|
||||||
if x < 0 {
|
if x < 0 {
|
||||||
return -x
|
return -x
|
||||||
}
|
}
|
||||||
return x
|
return x
|
||||||
}
|
}
|
||||||
|
|
||||||
func JoinPairedSequence(seqA, seqB obiseq.BioSequence) obiseq.BioSequence {
|
func JoinPairedSequence(seqA, seqB obiseq.BioSequence, inplace bool) obiseq.BioSequence {
|
||||||
js := make([]byte, seqA.Length(), seqA.Length()+seqB.Length()+10)
|
|
||||||
jq := make([]byte, seqA.Length(), seqA.Length()+seqB.Length()+10)
|
|
||||||
|
|
||||||
copy(js, seqA.Sequence())
|
if !inplace {
|
||||||
copy(jq, seqA.Qualities())
|
seqA = seqA.Copy()
|
||||||
|
}
|
||||||
|
|
||||||
js = append(js, '.', '.', '.', '.', '.', '.', '.', '.', '.', '.')
|
seqA.WriteString("..........")
|
||||||
jq = append(jq, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
|
seqA.Write(seqB.Sequence())
|
||||||
|
|
||||||
js = append(js, seqB.Sequence()...)
|
seqA.WriteQualities(obiseq.Quality{0, 0, 0, 0, 0, 0, 0, 0, 0, 0})
|
||||||
jq = append(jq, seqB.Qualities()...)
|
seqA.WriteQualities(seqB.Qualities())
|
||||||
|
|
||||||
rep := obiseq.MakeBioSequence(seqA.Id(), js, seqA.Definition())
|
return seqA
|
||||||
rep.SetQualities(jq)
|
|
||||||
|
|
||||||
return rep
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// AssemblePESequences assembles two paired sequences following
|
||||||
|
// the obipairing strategy implemented in obialign.PEAlign using
|
||||||
|
// the gap and delta parametters.
|
||||||
|
// If the length of the overlap between both sequences is less than
|
||||||
|
// overlap_min, The alignment is substituted by a simple pasting
|
||||||
|
// of the sequences with a strech of 10 dots in between them.
|
||||||
|
// the quality of the dots is set to 0.
|
||||||
|
// If the inplace parameter is set to true, the seqA and seqB are
|
||||||
|
// destroyed during the assembling process and cannot be reuse later on.
|
||||||
func AssemblePESequences(seqA, seqB obiseq.BioSequence,
|
func AssemblePESequences(seqA, seqB obiseq.BioSequence,
|
||||||
gap, delta, overlap_min int, with_stats bool,
|
gap, delta, overlap_min int, with_stats bool,
|
||||||
|
inplace bool,
|
||||||
arena_align obialign.PEAlignArena,
|
arena_align obialign.PEAlignArena,
|
||||||
arena_cons obialign.BuildAlignArena,
|
arena_cons obialign.BuildAlignArena,
|
||||||
arena_qual obialign.BuildAlignArena) obiseq.BioSequence {
|
arena_qual obialign.BuildAlignArena) obiseq.BioSequence {
|
||||||
@ -53,7 +60,7 @@ func AssemblePESequences(seqA, seqB obiseq.BioSequence,
|
|||||||
right = path[len(path)-2]
|
right = path[len(path)-2]
|
||||||
}
|
}
|
||||||
lcons := cons.Length()
|
lcons := cons.Length()
|
||||||
ali_length := lcons - __abs__(left) - __abs__(right)
|
ali_length := lcons - _Abs(left) - _Abs(right)
|
||||||
|
|
||||||
if ali_length >= overlap_min {
|
if ali_length >= overlap_min {
|
||||||
if with_stats {
|
if with_stats {
|
||||||
@ -85,14 +92,22 @@ func AssemblePESequences(seqA, seqB obiseq.BioSequence,
|
|||||||
annot["seq_ab_match"] = match
|
annot["seq_ab_match"] = match
|
||||||
annot["score_norm"] = score_norm
|
annot["score_norm"] = score_norm
|
||||||
|
|
||||||
|
if inplace {
|
||||||
|
(&seqA).Recycle()
|
||||||
|
(&seqB).Recycle()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
cons = JoinPairedSequence(seqA, seqB)
|
cons = JoinPairedSequence(seqA, seqB, inplace)
|
||||||
|
|
||||||
if with_stats {
|
if with_stats {
|
||||||
annot := cons.Annotations()
|
annot := cons.Annotations()
|
||||||
annot["mode"] = "join"
|
annot["mode"] = "join"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if inplace {
|
||||||
|
(&seqB).Recycle()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return cons
|
return cons
|
||||||
@ -101,7 +116,7 @@ func AssemblePESequences(seqA, seqB obiseq.BioSequence,
|
|||||||
func IAssemblePESequencesBatch(iterator obiseq.IPairedBioSequenceBatch,
|
func IAssemblePESequencesBatch(iterator obiseq.IPairedBioSequenceBatch,
|
||||||
gap, delta, overlap_min int, with_stats bool, sizes ...int) obiseq.IBioSequenceBatch {
|
gap, delta, overlap_min int, with_stats bool, sizes ...int) obiseq.IBioSequenceBatch {
|
||||||
|
|
||||||
nworkers := 7
|
nworkers := runtime.NumCPU() - 1
|
||||||
buffsize := iterator.BufferSize()
|
buffsize := iterator.BufferSize()
|
||||||
|
|
||||||
if len(sizes) > 0 {
|
if len(sizes) > 0 {
|
||||||
@ -148,13 +163,11 @@ func IAssemblePESequencesBatch(iterator obiseq.IPairedBioSequenceBatch,
|
|||||||
processed := 0
|
processed := 0
|
||||||
for i, A := range batch.Forward() {
|
for i, A := range batch.Forward() {
|
||||||
B := batch.Reverse()[i]
|
B := batch.Reverse()[i]
|
||||||
cons[i] = AssemblePESequences(A, B, 2, 5, 20, true, arena, barena1, barena2)
|
cons[i] = AssemblePESequences(A, B, 2, 5, 20, true, true, arena, barena1, barena2)
|
||||||
if i%59 == 0 {
|
if i%59 == 0 {
|
||||||
bar.Add(59)
|
bar.Add(59)
|
||||||
processed += 59
|
processed += 59
|
||||||
}
|
}
|
||||||
A.Destroy()
|
|
||||||
B.Destroy()
|
|
||||||
}
|
}
|
||||||
bar.Add(batch.Length() - processed)
|
bar.Add(batch.Length() - processed)
|
||||||
newIter.Channel() <- obiseq.MakeBioSequenceBatch(
|
newIter.Channel() <- obiseq.MakeBioSequenceBatch(
|
||||||
@ -169,9 +182,10 @@ func IAssemblePESequencesBatch(iterator obiseq.IPairedBioSequenceBatch,
|
|||||||
|
|
||||||
log.Printf("Start of the sequence Pairing")
|
log.Printf("Start of the sequence Pairing")
|
||||||
|
|
||||||
for i := 0; i < nworkers; i++ {
|
for i := 0; i < nworkers-1; i++ {
|
||||||
go f(iterator.Split(), i)
|
go f(iterator.Split(), i)
|
||||||
}
|
}
|
||||||
|
go f(iterator, nworkers-1)
|
||||||
|
|
||||||
return newIter
|
return newIter
|
||||||
|
|
||||||
|
@ -15,6 +15,8 @@ var _AllowedMismatch = 0
|
|||||||
var _MinimumLength = 0
|
var _MinimumLength = 0
|
||||||
var _MaximumLength = -1
|
var _MaximumLength = -1
|
||||||
|
|
||||||
|
// PCROptionSet adds to a command line option set every options
|
||||||
|
// needed by the PCR algorithm.
|
||||||
func PCROptionSet(options *getoptions.GetOpt) {
|
func PCROptionSet(options *getoptions.GetOpt) {
|
||||||
options.BoolVar(&_Circular, "circular", false,
|
options.BoolVar(&_Circular, "circular", false,
|
||||||
options.Alias("c"),
|
options.Alias("c"),
|
||||||
@ -40,11 +42,15 @@ func PCROptionSet(options *getoptions.GetOpt) {
|
|||||||
options.Description("Maximum length of the barcode (primers excluded)."))
|
options.Description("Maximum length of the barcode (primers excluded)."))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// OptionSet adds to the basic option set every options declared for
|
||||||
|
// the obipcr command
|
||||||
func OptionSet(options *getoptions.GetOpt) {
|
func OptionSet(options *getoptions.GetOpt) {
|
||||||
obiconvert.OptionSet(options)
|
obiconvert.OptionSet(options)
|
||||||
PCROptionSet(options)
|
PCROptionSet(options)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ForwardPrimer returns the sequence of the forward primer as indicated by the
|
||||||
|
// --forward command line option
|
||||||
func ForwardPrimer() string {
|
func ForwardPrimer() string {
|
||||||
pattern, err := obiapat.MakeApatPattern(_ForwardPrimer, _AllowedMismatch)
|
pattern, err := obiapat.MakeApatPattern(_ForwardPrimer, _AllowedMismatch)
|
||||||
|
|
||||||
@ -57,6 +63,8 @@ func ForwardPrimer() string {
|
|||||||
return _ForwardPrimer
|
return _ForwardPrimer
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ReversePrimer returns the sequence of the reverse primer as indicated by the
|
||||||
|
// --reverse command line option
|
||||||
func ReversePrimer() string {
|
func ReversePrimer() string {
|
||||||
pattern, err := obiapat.MakeApatPattern(_ReversePrimer, _AllowedMismatch)
|
pattern, err := obiapat.MakeApatPattern(_ReversePrimer, _AllowedMismatch)
|
||||||
|
|
||||||
@ -69,18 +77,27 @@ func ReversePrimer() string {
|
|||||||
return _ReversePrimer
|
return _ReversePrimer
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// AllowedMismatch returns the allowed mistmatch count between each
|
||||||
|
// primer and the sequences as indicated by the
|
||||||
|
// --allowed-mismatches|-e command line option
|
||||||
func AllowedMismatch() int {
|
func AllowedMismatch() int {
|
||||||
return _AllowedMismatch
|
return _AllowedMismatch
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Circular returns the considered sequence topology as indicated by the
|
||||||
|
// --circular|-c command line option
|
||||||
func Circular() bool {
|
func Circular() bool {
|
||||||
return _Circular
|
return _Circular
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// MinLength returns the amplicon minimum length as indicated by the
|
||||||
|
// --min-length|-l command line option
|
||||||
func MinLength() int {
|
func MinLength() int {
|
||||||
return _MinimumLength
|
return _MinimumLength
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// MaxLength returns the amplicon maximum length as indicated by the
|
||||||
|
// --max-length|-L command line option
|
||||||
func MaxLength() int {
|
func MaxLength() int {
|
||||||
return _MaximumLength
|
return _MaximumLength
|
||||||
}
|
}
|
||||||
|
@ -5,7 +5,10 @@ import (
|
|||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
|
||||||
)
|
)
|
||||||
|
|
||||||
func PCR(iterator obiseq.IBioSequenceBatch) (obiseq.IBioSequence, error) {
|
// PCR iterates over sequences provided by a obiseq.IBioSequenceBatch
|
||||||
|
// and returns an other obiseq.IBioSequenceBatch distributing
|
||||||
|
// obiseq.BioSequenceBatch containing the selected amplicon sequences.
|
||||||
|
func PCR(iterator obiseq.IBioSequenceBatch) (obiseq.IBioSequenceBatch, error) {
|
||||||
|
|
||||||
forward := ForwardPrimer()
|
forward := ForwardPrimer()
|
||||||
reverse := ReversePrimer()
|
reverse := ReversePrimer()
|
||||||
@ -28,5 +31,5 @@ func PCR(iterator obiseq.IBioSequenceBatch) (obiseq.IBioSequence, error) {
|
|||||||
|
|
||||||
worker := obiapat.PCRSliceWorker(forward, reverse, opts...)
|
worker := obiapat.PCRSliceWorker(forward, reverse, opts...)
|
||||||
|
|
||||||
return iterator.MakeISliceWorker(worker).IBioSequence(), nil
|
return iterator.MakeISliceWorker(worker), nil
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user