second version of obidistribute and a first buggy version of obiuniq

This commit is contained in:
2022-02-15 00:47:02 +01:00
parent b931321ba1
commit 3586ecc483
15 changed files with 402 additions and 21 deletions

View File

@@ -4,7 +4,6 @@ import (
"fmt"
"log"
"sync"
"sync/atomic"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
)
@@ -22,26 +21,28 @@ func WriterDispatcher(prototypename string,
jobDone.Add(1)
go func() {
n := int32(0)
for newflux := range dispatcher.News() {
jobDone.Add(1)
go func(newflux string) {
data, _ := dispatcher.Outputs(newflux)
data, err := dispatcher.Outputs(newflux)
if err != nil {
log.Fatalf("Cannot retreive the new chanel : %v", err)
}
out, err := formater(data,
fmt.Sprintf(prototypename, newflux),
options...)
if err != nil {
log.Fatalf("cannot open the output file for key %s", newflux)
}
atomic.AddInt32(&n, 1)
if atomic.LoadInt32(&n) > 1 {
jobDone.Add(1)
}
out.Recycle()
jobDone.Done()
}(newflux)
}
jobDone.Done()
}()
jobDone.Wait()

View File

@@ -60,6 +60,13 @@ func WriteFasta(iterator obiseq.IBioSequence, file io.Writer, options ...WithOpt
fmt.Fprintln(file, FormatFasta(seq, header_format))
}
if opt.CloseFile() {
switch file := file.(type) {
case *os.File:
file.Close()
}
}
return nil
}
@@ -74,10 +81,13 @@ func WriteFastaToFile(iterator obiseq.IBioSequence,
return err
}
options = append(options, OptionCloseFile())
return WriteFasta(iterator, file, options...)
}
func WriteFastaToStdout(iterator obiseq.IBioSequence, options ...WithOption) error {
options = append(options, OptionDontCloseFile())
return WriteFasta(iterator, os.Stdout, options...)
}
@@ -105,6 +115,7 @@ func WriteFastaBatch(iterator obiseq.IBioSequenceBatch, file io.Writer, options
time.Sleep(time.Millisecond)
}
close(newIter.Channel())
}()
ff := func(iterator obiseq.IBioSequenceBatch) {
@@ -145,12 +156,21 @@ func WriteFastaBatch(iterator obiseq.IBioSequenceBatch, file io.Writer, options
}
}
if opt.CloseFile() {
switch file := file.(type) {
case *os.File:
file.Close()
}
}
}()
return newIter, nil
}
func WriteFastaBatchToStdout(iterator obiseq.IBioSequenceBatch, options ...WithOption) (obiseq.IBioSequenceBatch, error) {
options = append(options, OptionDontCloseFile())
return WriteFastaBatch(iterator, os.Stdout, options...)
}
@@ -165,5 +185,7 @@ func WriteFastaBatchToFile(iterator obiseq.IBioSequenceBatch,
return obiseq.NilIBioSequenceBatch, err
}
options = append(options, OptionCloseFile())
return WriteFastaBatch(iterator, file, options...)
}

View File

@@ -55,6 +55,13 @@ func WriteFastq(iterator obiseq.IBioSequence, file io.Writer, options ...WithOpt
fmt.Fprintln(file, FormatFastq(seq, quality, header_format))
}
if opt.CloseFile() {
switch file := file.(type) {
case *os.File:
file.Close()
}
}
return nil
}
@@ -69,10 +76,12 @@ func WriteFastqToFile(iterator obiseq.IBioSequence,
return err
}
options = append(options, OptionCloseFile())
return WriteFastq(iterator, file, options...)
}
func WriteFastqToStdout(iterator obiseq.IBioSequence, options ...WithOption) error {
options = append(options, OptionDontCloseFile())
return WriteFastq(iterator, os.Stdout, options...)
}
@@ -122,10 +131,10 @@ func WriteFastqBatch(iterator obiseq.IBioSequenceBatch, file io.Writer, options
}
log.Println("Start of the fastq file writing")
go ff(iterator)
for i := 0; i < nwriters-1; i++ {
go ff(iterator.Split())
}
go ff(iterator)
next_to_send := 0
received := make(map[int]FileChunck, 100)
@@ -147,12 +156,21 @@ func WriteFastqBatch(iterator obiseq.IBioSequenceBatch, file io.Writer, options
}
}
if opt.CloseFile() {
switch file := file.(type) {
case *os.File:
file.Close()
}
}
}()
return newIter, nil
}
func WriteFastqBatchToStdout(iterator obiseq.IBioSequenceBatch, options ...WithOption) (obiseq.IBioSequenceBatch, error) {
options = append(options, OptionDontCloseFile())
return WriteFastqBatch(iterator, os.Stdout, options...)
}
@@ -167,5 +185,7 @@ func WriteFastqBatchToFile(iterator obiseq.IBioSequenceBatch,
return obiseq.NilIBioSequenceBatch, err
}
options = append(options, OptionCloseFile())
return WriteFastqBatch(iterator, file, options...)
}

View File

@@ -1,6 +1,8 @@
package obiformats
import "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
)
type __options__ struct {
fastseq_header_parser obiseq.SeqAnnotator
@@ -10,6 +12,7 @@ type __options__ struct {
batch_size int
quality_shift int
parallel_workers int
closefile bool
}
type Options struct {
@@ -27,6 +30,7 @@ func MakeOptions(setters []WithOption) Options {
quality_shift: 33,
parallel_workers: 4,
batch_size: 5000,
closefile: false,
}
opt := Options{&o}
@@ -66,6 +70,10 @@ func (opt Options) ProgressBar() bool {
return opt.pointer.with_progress_bar
}
func (opt Options) CloseFile() bool {
return opt.pointer.closefile
}
func OptionsBufferSize(size int) WithOption {
f := WithOption(func(opt Options) {
opt.pointer.buffer_size = size
@@ -74,6 +82,22 @@ func OptionsBufferSize(size int) WithOption {
return f
}
func OptionCloseFile() WithOption {
f := WithOption(func(opt Options) {
opt.pointer.closefile = true
})
return f
}
func OptionDontCloseFile() WithOption {
f := WithOption(func(opt Options) {
opt.pointer.closefile = false
})
return f
}
// Allows to specify the ascii code corresponding to
// a quality of 0 in fastq encoded quality scores.
func OptionsQualityShift(shift int) WithOption {

View File

@@ -64,6 +64,12 @@ func ReadSequencesBatchFromFile(filename string, options ...WithOption) (obiseq.
tag, _ := breader.Peek(30)
if len(tag) < 30 {
newIter := obiseq.MakeIBioSequenceBatch()
close(newIter.Channel())
return newIter, nil
}
filetype := GuessSeqFileType(string(tag))
log.Printf("File guessed format : %s (tag: %s)",
filetype, (strings.Split(string(tag), "\n"))[0])

View File

@@ -89,6 +89,7 @@ func WriteSequenceBatch(iterator obiseq.IBioSequenceBatch,
func WriteSequencesBatchToStdout(iterator obiseq.IBioSequenceBatch,
options ...WithOption) (obiseq.IBioSequenceBatch, error) {
options = append(options, OptionDontCloseFile())
return WriteSequenceBatch(iterator, os.Stdout, options...)
}
@@ -103,5 +104,6 @@ func WriteSequencesBatchToFile(iterator obiseq.IBioSequenceBatch,
return obiseq.NilIBioSequenceBatch, err
}
options = append(options, OptionCloseFile())
return WriteSequenceBatch(iterator, file, options...)
}