From 29750429826fb31155f4430e3ac7ab73bad63de9 Mon Sep 17 00:00:00 2001 From: Eric Coissac Date: Thu, 16 Feb 2023 16:13:13 +0100 Subject: [PATCH] Adds the possiblility to append to files to obidistribute --- pkg/obiformats/fastseq_write_fasta.go | 14 +++++++++++++- pkg/obiformats/fastseq_write_fastq.go | 13 ++++++++++++- pkg/obiformats/options.go | 22 ++++++++++++++++++++++ pkg/obiformats/universal_write.go | 13 ++++++++++++- pkg/obitools/obidistribute/distribute.go | 4 ++++ pkg/obitools/obidistribute/options.go | 12 +++++++++++- 6 files changed, 74 insertions(+), 4 deletions(-) diff --git a/pkg/obiformats/fastseq_write_fasta.go b/pkg/obiformats/fastseq_write_fasta.go index 4db2ef7..c61714d 100644 --- a/pkg/obiformats/fastseq_write_fasta.go +++ b/pkg/obiformats/fastseq_write_fasta.go @@ -157,7 +157,19 @@ func WriteFastaToFile(iterator obiiter.IBioSequence, filename string, options ...WithOption) (obiiter.IBioSequence, error) { - file, err := os.Create(filename) + var file *os.File + var err error + + opt := MakeOptions(options) + + if opt.AppendFile() { + log.Debug("Open files in appending mode") + file, err = os.OpenFile(filename, + os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) + } else { + file, err = os.Create(filename) + } + if err != nil { log.Fatalf("open file error: %v", err) diff --git a/pkg/obiformats/fastseq_write_fastq.go b/pkg/obiformats/fastseq_write_fastq.go index 403fe6a..273435a 100644 --- a/pkg/obiformats/fastseq_write_fastq.go +++ b/pkg/obiformats/fastseq_write_fastq.go @@ -147,7 +147,18 @@ func WriteFastqToFile(iterator obiiter.IBioSequence, filename string, options ...WithOption) (obiiter.IBioSequence, error) { - file, err := os.Create(filename) + var file *os.File + var err error + + opt := MakeOptions(options) + + if opt.AppendFile() { + log.Debug("Open files in appending mode") + file, err = os.OpenFile(filename, + os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) + } else { + file, err = os.Create(filename) + } if err != nil { log.Fatalf("open file error: %v", err) diff --git a/pkg/obiformats/options.go b/pkg/obiformats/options.go index 9790a0c..a6e0346 100644 --- a/pkg/obiformats/options.go +++ b/pkg/obiformats/options.go @@ -13,6 +13,7 @@ type __options__ struct { quality_shift int parallel_workers int closefile bool + appendfile bool } type Options struct { @@ -31,6 +32,7 @@ func MakeOptions(setters []WithOption) Options { parallel_workers: 4, batch_size: 5000, closefile: false, + appendfile: false, } opt := Options{&o} @@ -74,6 +76,10 @@ func (opt Options) CloseFile() bool { return opt.pointer.closefile } +func (opt Options) AppendFile() bool { + return opt.pointer.appendfile +} + func OptionsBufferSize(size int) WithOption { f := WithOption(func(opt Options) { opt.pointer.buffer_size = size @@ -98,6 +104,22 @@ func OptionDontCloseFile() WithOption { return f } +func OptionsAppendFile() WithOption { + f := WithOption(func(opt Options) { + opt.pointer.appendfile = true + }) + + return f +} + +func OptionsNewFile() WithOption { + f := WithOption(func(opt Options) { + opt.pointer.appendfile = false + }) + + return f +} + // Allows to specify the ascii code corresponding to // a quality of 0 in fastq encoded quality scores. func OptionsQualityShift(shift int) WithOption { diff --git a/pkg/obiformats/universal_write.go b/pkg/obiformats/universal_write.go index 1f40083..d875ebf 100644 --- a/pkg/obiformats/universal_write.go +++ b/pkg/obiformats/universal_write.go @@ -55,7 +55,18 @@ func WriteSequencesToFile(iterator obiiter.IBioSequence, filename string, options ...WithOption) (obiiter.IBioSequence, error) { - file, err := os.Create(filename) + var file *os.File + var err error + + opt := MakeOptions(options) + + if opt.AppendFile() { + log.Debug("Open files in appending mode") + file, err = os.OpenFile(filename, + os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) + } else { + file, err = os.Create(filename) + } if err != nil { log.Fatalf("open file error: %v", err) diff --git a/pkg/obitools/obidistribute/distribute.go b/pkg/obitools/obidistribute/distribute.go index 7b1d9cb..b56d21c 100644 --- a/pkg/obitools/obidistribute/distribute.go +++ b/pkg/obitools/obidistribute/distribute.go @@ -36,6 +36,10 @@ func DistributeSequence(sequences obiiter.IBioSequence) { opts = append(opts, obiformats.OptionsQualityShift(obiconvert.CLIOutputQualityShift())) + if CLIAppendSequences() { + opts = append(opts, obiformats.OptionsAppendFile()) + } + var formater obiformats.SequenceBatchWriterToFile switch obiconvert.CLIOutputFormat() { diff --git a/pkg/obitools/obidistribute/options.go b/pkg/obitools/obidistribute/options.go index 3349428..8fd873e 100644 --- a/pkg/obitools/obidistribute/options.go +++ b/pkg/obitools/obidistribute/options.go @@ -2,9 +2,10 @@ package obidistribute import ( "fmt" - log "github.com/sirupsen/logrus" "strings" + log "github.com/sirupsen/logrus" + "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert" "github.com/DavidGamba/go-getoptions" @@ -15,6 +16,7 @@ var _SequenceClassifierTag = "" var _BatchCount = 0 var _HashSize = 0 var _NAValue = "NA" +var _append = false func DistributeOptionSet(options *getoptions.GetOpt) { options.StringVar(&_FilenamePattern, "pattern", _FilenamePattern, @@ -37,6 +39,10 @@ func DistributeOptionSet(options *getoptions.GetOpt) { options.Alias("n"), options.Description("Indicates in how many batches the input file must bee splitted.")) + options.BoolVar(&_append, "append", _append, + options.Alias("A"), + options.Description("Indicates to append sequence to files if they already exist.")) + options.IntVar(&_HashSize, "hash", 0, options.Alias("H"), options.Description("Indicates to split the input into at most batch based on a hash code of the seequence.")) @@ -48,6 +54,10 @@ func OptionSet(options *getoptions.GetOpt) { DistributeOptionSet(options) } +func CLIAppendSequences() bool { + return _append +} + func CLISequenceClassifier() *obiseq.BioSequenceClassifier { switch { case _SequenceClassifierTag != "":