Adds to every obitools the ability to save in gzip compressed files

This commit is contained in:
2023-02-18 12:06:52 +01:00
parent 1a08ad4254
commit 56722895e4
9 changed files with 85 additions and 66 deletions

View File

@ -3,12 +3,14 @@ package goutils
import (
"bufio"
"compress/gzip"
"io"
"os"
)
type Wfile struct {
compressed bool
f *os.File
close bool
out io.WriteCloser
gf *gzip.Writer
fw *bufio.Writer
}
@ -36,10 +38,33 @@ func OpenWritingFile(name string, compressed bool, append bool) (*Wfile, error)
fw = bufio.NewWriter(fi)
}
return &Wfile{compressed: compressed,
f: fi,
gf: gf,
fw: fw,
return &Wfile{
compressed: compressed,
close: true,
out: fi,
gf: gf,
fw: fw,
}, nil
}
func CompressStream(out io.WriteCloser, compressed bool, close bool) (*Wfile, error) {
var gf *gzip.Writer
var fw *bufio.Writer
if compressed {
gf = gzip.NewWriter(out)
fw = bufio.NewWriter(gf)
} else {
gf = nil
fw = bufio.NewWriter(out)
}
return &Wfile{
compressed: compressed,
close: close,
out: out,
gf: gf,
fw: fw,
}, nil
}
@ -56,12 +81,17 @@ func (w *Wfile) Close() error {
err = nil
w.fw.Flush()
if w.compressed {
err = w.gf.Close()
}
err2 := w.f.Close()
var err2 error
err2 = nil
if w.close {
err2 = w.out.Close()
}
if err == nil {
err = err2

View File

@ -65,10 +65,12 @@ func FormatFastaBatch(batch obiiter.BioSequenceBatch, formater FormatHeader) []b
}
func WriteFasta(iterator obiiter.IBioSequence,
file io.Writer,
file io.WriteCloser,
options ...WithOption) (obiiter.IBioSequence, error) {
opt := MakeOptions(options)
file,_ = goutils.CompressStream(file,opt.CompressedFile(),opt.CloseFile())
buffsize := iterator.BufferSize()
newIter := obiiter.MakeIBioSequence(buffsize)
@ -133,15 +135,8 @@ func WriteFasta(iterator obiiter.IBioSequence,
}
if opt.CloseFile() {
switch file := file.(type) {
case *os.File:
file.Close()
case *goutils.Wfile:
file.Close()
}
}
file.Close()
log.Debugln("End of the fasta file writing")
obiiter.UnregisterPipe()
waitWriter.Done()
@ -163,11 +158,13 @@ func WriteFastaToFile(iterator obiiter.IBioSequence,
opt := MakeOptions(options)
flags := os.O_WRONLY | os.O_CREATE
file,err := goutils.OpenWritingFile(filename,
opt.CompressedFile(),
opt.AppendFile(),
)
if opt.AppendFile() {
flags |= os.O_APPEND
}
file, err := os.OpenFile(filename, flags, 0660)
if err != nil {
log.Fatalf("open file error: %v", err)

View File

@ -54,10 +54,12 @@ type FileChunck struct {
}
func WriteFastq(iterator obiiter.IBioSequence,
file io.Writer,
file io.WriteCloser,
options ...WithOption) (obiiter.IBioSequence, error) {
opt := MakeOptions(options)
file,_ = goutils.CompressStream(file,opt.CompressedFile(),opt.CloseFile())
buffsize := iterator.BufferSize()
newIter := obiiter.MakeIBioSequence(buffsize)
@ -123,14 +125,7 @@ func WriteFastq(iterator obiiter.IBioSequence,
}
if opt.CloseFile() {
switch file := file.(type) {
case *os.File:
file.Close()
case *goutils.Wfile:
file.Close()
}
}
file.Close()
log.Debugln("End of the fastq file writing")
obiiter.UnregisterPipe()
@ -151,12 +146,13 @@ func WriteFastqToFile(iterator obiiter.IBioSequence,
options ...WithOption) (obiiter.IBioSequence, error) {
opt := MakeOptions(options)
flags := os.O_WRONLY | os.O_CREATE
file, err := goutils.OpenWritingFile(filename,
opt.CompressedFile(),
opt.AppendFile(),
)
if opt.AppendFile() {
flags |= os.O_APPEND
}
file, err := os.OpenFile(filename, flags, 0660)
if err != nil {
log.Fatalf("open file error: %v", err)
return obiiter.NilIBioSequence, err

View File

@ -110,17 +110,17 @@ func OptionDontCloseFile() WithOption {
return f
}
func OptionsAppendFile() WithOption {
func OptionsAppendFile(append bool) WithOption {
f := WithOption(func(opt Options) {
opt.pointer.appendfile = true
opt.pointer.appendfile = append
})
return f
}
func OptionsCompressed() WithOption {
func OptionsCompressed(compressed bool) WithOption {
f := WithOption(func(opt Options) {
opt.pointer.compressed = true
opt.pointer.compressed = compressed
})
return f

View File

@ -7,12 +7,11 @@ import (
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
)
func WriteSequence(iterator obiiter.IBioSequence,
file io.Writer,
file io.WriteCloser,
options ...WithOption) (obiiter.IBioSequence, error) {
iterator = iterator.Rebatch(1000)
@ -56,13 +55,13 @@ func WriteSequencesToFile(iterator obiiter.IBioSequence,
filename string,
options ...WithOption) (obiiter.IBioSequence, error) {
opt := MakeOptions(options)
flags := os.O_WRONLY | os.O_CREATE
file, err := goutils.OpenWritingFile(filename,
opt.CompressedFile(),
opt.AppendFile(),
)
if opt.AppendFile() {
flags |= os.O_APPEND
}
file, err := os.OpenFile(filename, flags, 0660)
if err != nil {
log.Fatalf("open file error: %v", err)

View File

@ -25,6 +25,7 @@ var __output_fastobi_format__ = false
var __output_solexa_quality__ = false
var __no_progress_bar__ = false
var __compressed__ = false
func InputOptionSet(options *getoptions.GetOpt) {
// options.IntVar(&__skipped_entries__, "skip", __skipped_entries__,
@ -72,6 +73,10 @@ func OutputOptionSet(options *getoptions.GetOpt) {
options.BoolVar(&__no_progress_bar__, "no-progressbar", false,
options.Description("Disable the progress bar printing"))
options.BoolVar(&__compressed__, "--compress", false,
options.Alias("Z"),
options.Description("Output is compressed"))
}
func OptionSet(options *getoptions.GetOpt) {
@ -110,6 +115,10 @@ func CLIOutputFormat() string {
}
}
func CLICompressed() bool {
return __compressed__
}
func CLIInputFastHeaderFormat() string {
switch {
case __input_fastjson_format__:

View File

@ -41,6 +41,8 @@ func CLIWriteBioSequences(iterator obiiter.IBioSequence,
opts = append(opts, obiformats.OptionsQualityShift(CLIOutputQualityShift()))
opts = append(opts, obiformats.OptionsCompressed(CLICompressed()))
var err error
if len(filenames) == 0 {

View File

@ -30,19 +30,13 @@ func DistributeSequence(sequences obiiter.IBioSequence) {
nworkers = 2
}
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers))
opts = append(opts, obiformats.OptionsBufferSize(obioptions.CLIBufferSize()))
opts = append(opts, obiformats.OptionsBatchSize(obioptions.CLIBatchSize()))
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers),
obiformats.OptionsBufferSize(obioptions.CLIBufferSize()),
obiformats.OptionsBatchSize(obioptions.CLIBatchSize()),
obiformats.OptionsQualityShift(obiconvert.CLIOutputQualityShift()),
obiformats.OptionsAppendFile(CLIAppendSequences()),
obiformats.OptionsCompressed(obiconvert.CLICompressed()))
opts = append(opts, obiformats.OptionsQualityShift(obiconvert.CLIOutputQualityShift()))
if CLIAppendSequences() {
opts = append(opts, obiformats.OptionsAppendFile())
}
if CLICompressed() {
opts = append(opts, obiformats.OptionsCompressed())
}
var formater obiformats.SequenceBatchWriterToFile
switch obiconvert.CLIOutputFormat() {

View File

@ -18,7 +18,6 @@ var _BatchCount = 0
var _HashSize = 0
var _NAValue = "NA"
var _append = false
var _compressed = false
func DistributeOptionSet(options *getoptions.GetOpt) {
options.StringVar(&_FilenamePattern, "pattern", _FilenamePattern,
@ -52,10 +51,6 @@ func DistributeOptionSet(options *getoptions.GetOpt) {
options.Alias("A"),
options.Description("Indicates to append sequence to files if they already exist."))
options.BoolVar(&_compressed, "--compress", false,
options.Alias("Z"),
options.Description("Output is compressed"))
options.IntVar(&_HashSize, "hash", 0,
options.Alias("H"),
options.Description("Indicates to split the input into at most <n> batch based on a hash code of the seequence."))
@ -71,9 +66,6 @@ func CLIAppendSequences() bool {
return _append
}
func CLICompressed() bool {
return _compressed
}
func CLISequenceClassifier() *obiseq.BioSequenceClassifier {
switch {