mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-06-29 16:20:46 +00:00
Adds to every obitools the ability to save in gzip compressed files
This commit is contained in:
@ -3,12 +3,14 @@ package goutils
|
|||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
"compress/gzip"
|
"compress/gzip"
|
||||||
|
"io"
|
||||||
"os"
|
"os"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Wfile struct {
|
type Wfile struct {
|
||||||
compressed bool
|
compressed bool
|
||||||
f *os.File
|
close bool
|
||||||
|
out io.WriteCloser
|
||||||
gf *gzip.Writer
|
gf *gzip.Writer
|
||||||
fw *bufio.Writer
|
fw *bufio.Writer
|
||||||
}
|
}
|
||||||
@ -36,8 +38,31 @@ func OpenWritingFile(name string, compressed bool, append bool) (*Wfile, error)
|
|||||||
fw = bufio.NewWriter(fi)
|
fw = bufio.NewWriter(fi)
|
||||||
}
|
}
|
||||||
|
|
||||||
return &Wfile{compressed: compressed,
|
return &Wfile{
|
||||||
f: fi,
|
compressed: compressed,
|
||||||
|
close: true,
|
||||||
|
out: fi,
|
||||||
|
gf: gf,
|
||||||
|
fw: fw,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func CompressStream(out io.WriteCloser, compressed bool, close bool) (*Wfile, error) {
|
||||||
|
var gf *gzip.Writer
|
||||||
|
var fw *bufio.Writer
|
||||||
|
|
||||||
|
if compressed {
|
||||||
|
gf = gzip.NewWriter(out)
|
||||||
|
fw = bufio.NewWriter(gf)
|
||||||
|
} else {
|
||||||
|
gf = nil
|
||||||
|
fw = bufio.NewWriter(out)
|
||||||
|
}
|
||||||
|
|
||||||
|
return &Wfile{
|
||||||
|
compressed: compressed,
|
||||||
|
close: close,
|
||||||
|
out: out,
|
||||||
gf: gf,
|
gf: gf,
|
||||||
fw: fw,
|
fw: fw,
|
||||||
}, nil
|
}, nil
|
||||||
@ -61,7 +86,12 @@ func (w *Wfile) Close() error {
|
|||||||
err = w.gf.Close()
|
err = w.gf.Close()
|
||||||
}
|
}
|
||||||
|
|
||||||
err2 := w.f.Close()
|
var err2 error
|
||||||
|
err2 = nil
|
||||||
|
|
||||||
|
if w.close {
|
||||||
|
err2 = w.out.Close()
|
||||||
|
}
|
||||||
|
|
||||||
if err == nil {
|
if err == nil {
|
||||||
err = err2
|
err = err2
|
||||||
|
@ -65,10 +65,12 @@ func FormatFastaBatch(batch obiiter.BioSequenceBatch, formater FormatHeader) []b
|
|||||||
}
|
}
|
||||||
|
|
||||||
func WriteFasta(iterator obiiter.IBioSequence,
|
func WriteFasta(iterator obiiter.IBioSequence,
|
||||||
file io.Writer,
|
file io.WriteCloser,
|
||||||
options ...WithOption) (obiiter.IBioSequence, error) {
|
options ...WithOption) (obiiter.IBioSequence, error) {
|
||||||
opt := MakeOptions(options)
|
opt := MakeOptions(options)
|
||||||
|
|
||||||
|
file,_ = goutils.CompressStream(file,opt.CompressedFile(),opt.CloseFile())
|
||||||
|
|
||||||
buffsize := iterator.BufferSize()
|
buffsize := iterator.BufferSize()
|
||||||
newIter := obiiter.MakeIBioSequence(buffsize)
|
newIter := obiiter.MakeIBioSequence(buffsize)
|
||||||
|
|
||||||
@ -133,14 +135,7 @@ func WriteFasta(iterator obiiter.IBioSequence,
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if opt.CloseFile() {
|
|
||||||
switch file := file.(type) {
|
|
||||||
case *os.File:
|
|
||||||
file.Close()
|
file.Close()
|
||||||
case *goutils.Wfile:
|
|
||||||
file.Close()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Debugln("End of the fasta file writing")
|
log.Debugln("End of the fasta file writing")
|
||||||
obiiter.UnregisterPipe()
|
obiiter.UnregisterPipe()
|
||||||
@ -163,11 +158,13 @@ func WriteFastaToFile(iterator obiiter.IBioSequence,
|
|||||||
|
|
||||||
|
|
||||||
opt := MakeOptions(options)
|
opt := MakeOptions(options)
|
||||||
|
flags := os.O_WRONLY | os.O_CREATE
|
||||||
|
|
||||||
|
if opt.AppendFile() {
|
||||||
|
flags |= os.O_APPEND
|
||||||
|
}
|
||||||
|
file, err := os.OpenFile(filename, flags, 0660)
|
||||||
|
|
||||||
file,err := goutils.OpenWritingFile(filename,
|
|
||||||
opt.CompressedFile(),
|
|
||||||
opt.AppendFile(),
|
|
||||||
)
|
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("open file error: %v", err)
|
log.Fatalf("open file error: %v", err)
|
||||||
|
@ -54,10 +54,12 @@ type FileChunck struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func WriteFastq(iterator obiiter.IBioSequence,
|
func WriteFastq(iterator obiiter.IBioSequence,
|
||||||
file io.Writer,
|
file io.WriteCloser,
|
||||||
options ...WithOption) (obiiter.IBioSequence, error) {
|
options ...WithOption) (obiiter.IBioSequence, error) {
|
||||||
opt := MakeOptions(options)
|
opt := MakeOptions(options)
|
||||||
|
|
||||||
|
file,_ = goutils.CompressStream(file,opt.CompressedFile(),opt.CloseFile())
|
||||||
|
|
||||||
buffsize := iterator.BufferSize()
|
buffsize := iterator.BufferSize()
|
||||||
newIter := obiiter.MakeIBioSequence(buffsize)
|
newIter := obiiter.MakeIBioSequence(buffsize)
|
||||||
|
|
||||||
@ -123,14 +125,7 @@ func WriteFastq(iterator obiiter.IBioSequence,
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if opt.CloseFile() {
|
|
||||||
switch file := file.(type) {
|
|
||||||
case *os.File:
|
|
||||||
file.Close()
|
file.Close()
|
||||||
case *goutils.Wfile:
|
|
||||||
file.Close()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Debugln("End of the fastq file writing")
|
log.Debugln("End of the fastq file writing")
|
||||||
obiiter.UnregisterPipe()
|
obiiter.UnregisterPipe()
|
||||||
@ -151,11 +146,12 @@ func WriteFastqToFile(iterator obiiter.IBioSequence,
|
|||||||
options ...WithOption) (obiiter.IBioSequence, error) {
|
options ...WithOption) (obiiter.IBioSequence, error) {
|
||||||
|
|
||||||
opt := MakeOptions(options)
|
opt := MakeOptions(options)
|
||||||
|
flags := os.O_WRONLY | os.O_CREATE
|
||||||
|
|
||||||
file, err := goutils.OpenWritingFile(filename,
|
if opt.AppendFile() {
|
||||||
opt.CompressedFile(),
|
flags |= os.O_APPEND
|
||||||
opt.AppendFile(),
|
}
|
||||||
)
|
file, err := os.OpenFile(filename, flags, 0660)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("open file error: %v", err)
|
log.Fatalf("open file error: %v", err)
|
||||||
|
@ -110,17 +110,17 @@ func OptionDontCloseFile() WithOption {
|
|||||||
return f
|
return f
|
||||||
}
|
}
|
||||||
|
|
||||||
func OptionsAppendFile() WithOption {
|
func OptionsAppendFile(append bool) WithOption {
|
||||||
f := WithOption(func(opt Options) {
|
f := WithOption(func(opt Options) {
|
||||||
opt.pointer.appendfile = true
|
opt.pointer.appendfile = append
|
||||||
})
|
})
|
||||||
|
|
||||||
return f
|
return f
|
||||||
}
|
}
|
||||||
|
|
||||||
func OptionsCompressed() WithOption {
|
func OptionsCompressed(compressed bool) WithOption {
|
||||||
f := WithOption(func(opt Options) {
|
f := WithOption(func(opt Options) {
|
||||||
opt.pointer.compressed = true
|
opt.pointer.compressed = compressed
|
||||||
})
|
})
|
||||||
|
|
||||||
return f
|
return f
|
||||||
|
@ -7,12 +7,11 @@ import (
|
|||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/goutils"
|
|
||||||
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
|
||||||
)
|
)
|
||||||
|
|
||||||
func WriteSequence(iterator obiiter.IBioSequence,
|
func WriteSequence(iterator obiiter.IBioSequence,
|
||||||
file io.Writer,
|
file io.WriteCloser,
|
||||||
options ...WithOption) (obiiter.IBioSequence, error) {
|
options ...WithOption) (obiiter.IBioSequence, error) {
|
||||||
|
|
||||||
iterator = iterator.Rebatch(1000)
|
iterator = iterator.Rebatch(1000)
|
||||||
@ -56,13 +55,13 @@ func WriteSequencesToFile(iterator obiiter.IBioSequence,
|
|||||||
filename string,
|
filename string,
|
||||||
options ...WithOption) (obiiter.IBioSequence, error) {
|
options ...WithOption) (obiiter.IBioSequence, error) {
|
||||||
|
|
||||||
|
|
||||||
opt := MakeOptions(options)
|
opt := MakeOptions(options)
|
||||||
|
flags := os.O_WRONLY | os.O_CREATE
|
||||||
|
|
||||||
file, err := goutils.OpenWritingFile(filename,
|
if opt.AppendFile() {
|
||||||
opt.CompressedFile(),
|
flags |= os.O_APPEND
|
||||||
opt.AppendFile(),
|
}
|
||||||
)
|
file, err := os.OpenFile(filename, flags, 0660)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("open file error: %v", err)
|
log.Fatalf("open file error: %v", err)
|
||||||
|
@ -25,6 +25,7 @@ var __output_fastobi_format__ = false
|
|||||||
var __output_solexa_quality__ = false
|
var __output_solexa_quality__ = false
|
||||||
|
|
||||||
var __no_progress_bar__ = false
|
var __no_progress_bar__ = false
|
||||||
|
var __compressed__ = false
|
||||||
|
|
||||||
func InputOptionSet(options *getoptions.GetOpt) {
|
func InputOptionSet(options *getoptions.GetOpt) {
|
||||||
// options.IntVar(&__skipped_entries__, "skip", __skipped_entries__,
|
// options.IntVar(&__skipped_entries__, "skip", __skipped_entries__,
|
||||||
@ -72,6 +73,10 @@ func OutputOptionSet(options *getoptions.GetOpt) {
|
|||||||
options.BoolVar(&__no_progress_bar__, "no-progressbar", false,
|
options.BoolVar(&__no_progress_bar__, "no-progressbar", false,
|
||||||
options.Description("Disable the progress bar printing"))
|
options.Description("Disable the progress bar printing"))
|
||||||
|
|
||||||
|
options.BoolVar(&__compressed__, "--compress", false,
|
||||||
|
options.Alias("Z"),
|
||||||
|
options.Description("Output is compressed"))
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func OptionSet(options *getoptions.GetOpt) {
|
func OptionSet(options *getoptions.GetOpt) {
|
||||||
@ -110,6 +115,10 @@ func CLIOutputFormat() string {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func CLICompressed() bool {
|
||||||
|
return __compressed__
|
||||||
|
}
|
||||||
|
|
||||||
func CLIInputFastHeaderFormat() string {
|
func CLIInputFastHeaderFormat() string {
|
||||||
switch {
|
switch {
|
||||||
case __input_fastjson_format__:
|
case __input_fastjson_format__:
|
||||||
|
@ -41,6 +41,8 @@ func CLIWriteBioSequences(iterator obiiter.IBioSequence,
|
|||||||
|
|
||||||
opts = append(opts, obiformats.OptionsQualityShift(CLIOutputQualityShift()))
|
opts = append(opts, obiformats.OptionsQualityShift(CLIOutputQualityShift()))
|
||||||
|
|
||||||
|
opts = append(opts, obiformats.OptionsCompressed(CLICompressed()))
|
||||||
|
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
if len(filenames) == 0 {
|
if len(filenames) == 0 {
|
||||||
|
@ -30,19 +30,13 @@ func DistributeSequence(sequences obiiter.IBioSequence) {
|
|||||||
nworkers = 2
|
nworkers = 2
|
||||||
}
|
}
|
||||||
|
|
||||||
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers))
|
opts = append(opts, obiformats.OptionsParallelWorkers(nworkers),
|
||||||
opts = append(opts, obiformats.OptionsBufferSize(obioptions.CLIBufferSize()))
|
obiformats.OptionsBufferSize(obioptions.CLIBufferSize()),
|
||||||
opts = append(opts, obiformats.OptionsBatchSize(obioptions.CLIBatchSize()))
|
obiformats.OptionsBatchSize(obioptions.CLIBatchSize()),
|
||||||
|
obiformats.OptionsQualityShift(obiconvert.CLIOutputQualityShift()),
|
||||||
|
obiformats.OptionsAppendFile(CLIAppendSequences()),
|
||||||
|
obiformats.OptionsCompressed(obiconvert.CLICompressed()))
|
||||||
|
|
||||||
opts = append(opts, obiformats.OptionsQualityShift(obiconvert.CLIOutputQualityShift()))
|
|
||||||
|
|
||||||
if CLIAppendSequences() {
|
|
||||||
opts = append(opts, obiformats.OptionsAppendFile())
|
|
||||||
}
|
|
||||||
|
|
||||||
if CLICompressed() {
|
|
||||||
opts = append(opts, obiformats.OptionsCompressed())
|
|
||||||
}
|
|
||||||
var formater obiformats.SequenceBatchWriterToFile
|
var formater obiformats.SequenceBatchWriterToFile
|
||||||
|
|
||||||
switch obiconvert.CLIOutputFormat() {
|
switch obiconvert.CLIOutputFormat() {
|
||||||
|
@ -18,7 +18,6 @@ var _BatchCount = 0
|
|||||||
var _HashSize = 0
|
var _HashSize = 0
|
||||||
var _NAValue = "NA"
|
var _NAValue = "NA"
|
||||||
var _append = false
|
var _append = false
|
||||||
var _compressed = false
|
|
||||||
|
|
||||||
func DistributeOptionSet(options *getoptions.GetOpt) {
|
func DistributeOptionSet(options *getoptions.GetOpt) {
|
||||||
options.StringVar(&_FilenamePattern, "pattern", _FilenamePattern,
|
options.StringVar(&_FilenamePattern, "pattern", _FilenamePattern,
|
||||||
@ -52,10 +51,6 @@ func DistributeOptionSet(options *getoptions.GetOpt) {
|
|||||||
options.Alias("A"),
|
options.Alias("A"),
|
||||||
options.Description("Indicates to append sequence to files if they already exist."))
|
options.Description("Indicates to append sequence to files if they already exist."))
|
||||||
|
|
||||||
options.BoolVar(&_compressed, "--compress", false,
|
|
||||||
options.Alias("Z"),
|
|
||||||
options.Description("Output is compressed"))
|
|
||||||
|
|
||||||
options.IntVar(&_HashSize, "hash", 0,
|
options.IntVar(&_HashSize, "hash", 0,
|
||||||
options.Alias("H"),
|
options.Alias("H"),
|
||||||
options.Description("Indicates to split the input into at most <n> batch based on a hash code of the seequence."))
|
options.Description("Indicates to split the input into at most <n> batch based on a hash code of the seequence."))
|
||||||
@ -71,9 +66,6 @@ func CLIAppendSequences() bool {
|
|||||||
return _append
|
return _append
|
||||||
}
|
}
|
||||||
|
|
||||||
func CLICompressed() bool {
|
|
||||||
return _compressed
|
|
||||||
}
|
|
||||||
|
|
||||||
func CLISequenceClassifier() *obiseq.BioSequenceClassifier {
|
func CLISequenceClassifier() *obiseq.BioSequenceClassifier {
|
||||||
switch {
|
switch {
|
||||||
|
Reference in New Issue
Block a user