Adds the possiblility to append to files to obidistribute

This commit is contained in:
2023-02-16 16:13:13 +01:00
parent 85349668d0
commit 2975042982
6 changed files with 74 additions and 4 deletions

View File

@ -157,7 +157,19 @@ func WriteFastaToFile(iterator obiiter.IBioSequence,
filename string, filename string,
options ...WithOption) (obiiter.IBioSequence, error) { options ...WithOption) (obiiter.IBioSequence, error) {
file, err := os.Create(filename) var file *os.File
var err error
opt := MakeOptions(options)
if opt.AppendFile() {
log.Debug("Open files in appending mode")
file, err = os.OpenFile(filename,
os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
} else {
file, err = os.Create(filename)
}
if err != nil { if err != nil {
log.Fatalf("open file error: %v", err) log.Fatalf("open file error: %v", err)

View File

@ -147,7 +147,18 @@ func WriteFastqToFile(iterator obiiter.IBioSequence,
filename string, filename string,
options ...WithOption) (obiiter.IBioSequence, error) { options ...WithOption) (obiiter.IBioSequence, error) {
file, err := os.Create(filename) var file *os.File
var err error
opt := MakeOptions(options)
if opt.AppendFile() {
log.Debug("Open files in appending mode")
file, err = os.OpenFile(filename,
os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
} else {
file, err = os.Create(filename)
}
if err != nil { if err != nil {
log.Fatalf("open file error: %v", err) log.Fatalf("open file error: %v", err)

View File

@ -13,6 +13,7 @@ type __options__ struct {
quality_shift int quality_shift int
parallel_workers int parallel_workers int
closefile bool closefile bool
appendfile bool
} }
type Options struct { type Options struct {
@ -31,6 +32,7 @@ func MakeOptions(setters []WithOption) Options {
parallel_workers: 4, parallel_workers: 4,
batch_size: 5000, batch_size: 5000,
closefile: false, closefile: false,
appendfile: false,
} }
opt := Options{&o} opt := Options{&o}
@ -74,6 +76,10 @@ func (opt Options) CloseFile() bool {
return opt.pointer.closefile return opt.pointer.closefile
} }
func (opt Options) AppendFile() bool {
return opt.pointer.appendfile
}
func OptionsBufferSize(size int) WithOption { func OptionsBufferSize(size int) WithOption {
f := WithOption(func(opt Options) { f := WithOption(func(opt Options) {
opt.pointer.buffer_size = size opt.pointer.buffer_size = size
@ -98,6 +104,22 @@ func OptionDontCloseFile() WithOption {
return f return f
} }
func OptionsAppendFile() WithOption {
f := WithOption(func(opt Options) {
opt.pointer.appendfile = true
})
return f
}
func OptionsNewFile() WithOption {
f := WithOption(func(opt Options) {
opt.pointer.appendfile = false
})
return f
}
// Allows to specify the ascii code corresponding to // Allows to specify the ascii code corresponding to
// a quality of 0 in fastq encoded quality scores. // a quality of 0 in fastq encoded quality scores.
func OptionsQualityShift(shift int) WithOption { func OptionsQualityShift(shift int) WithOption {

View File

@ -55,7 +55,18 @@ func WriteSequencesToFile(iterator obiiter.IBioSequence,
filename string, filename string,
options ...WithOption) (obiiter.IBioSequence, error) { options ...WithOption) (obiiter.IBioSequence, error) {
file, err := os.Create(filename) var file *os.File
var err error
opt := MakeOptions(options)
if opt.AppendFile() {
log.Debug("Open files in appending mode")
file, err = os.OpenFile(filename,
os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
} else {
file, err = os.Create(filename)
}
if err != nil { if err != nil {
log.Fatalf("open file error: %v", err) log.Fatalf("open file error: %v", err)

View File

@ -36,6 +36,10 @@ func DistributeSequence(sequences obiiter.IBioSequence) {
opts = append(opts, obiformats.OptionsQualityShift(obiconvert.CLIOutputQualityShift())) opts = append(opts, obiformats.OptionsQualityShift(obiconvert.CLIOutputQualityShift()))
if CLIAppendSequences() {
opts = append(opts, obiformats.OptionsAppendFile())
}
var formater obiformats.SequenceBatchWriterToFile var formater obiformats.SequenceBatchWriterToFile
switch obiconvert.CLIOutputFormat() { switch obiconvert.CLIOutputFormat() {

View File

@ -2,9 +2,10 @@ package obidistribute
import ( import (
"fmt" "fmt"
log "github.com/sirupsen/logrus"
"strings" "strings"
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiseq"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obitools/obiconvert"
"github.com/DavidGamba/go-getoptions" "github.com/DavidGamba/go-getoptions"
@ -15,6 +16,7 @@ var _SequenceClassifierTag = ""
var _BatchCount = 0 var _BatchCount = 0
var _HashSize = 0 var _HashSize = 0
var _NAValue = "NA" var _NAValue = "NA"
var _append = false
func DistributeOptionSet(options *getoptions.GetOpt) { func DistributeOptionSet(options *getoptions.GetOpt) {
options.StringVar(&_FilenamePattern, "pattern", _FilenamePattern, options.StringVar(&_FilenamePattern, "pattern", _FilenamePattern,
@ -37,6 +39,10 @@ func DistributeOptionSet(options *getoptions.GetOpt) {
options.Alias("n"), options.Alias("n"),
options.Description("Indicates in how many batches the input file must bee splitted.")) options.Description("Indicates in how many batches the input file must bee splitted."))
options.BoolVar(&_append, "append", _append,
options.Alias("A"),
options.Description("Indicates to append sequence to files if they already exist."))
options.IntVar(&_HashSize, "hash", 0, options.IntVar(&_HashSize, "hash", 0,
options.Alias("H"), options.Alias("H"),
options.Description("Indicates to split the input into at most <n> batch based on a hash code of the seequence.")) options.Description("Indicates to split the input into at most <n> batch based on a hash code of the seequence."))
@ -48,6 +54,10 @@ func OptionSet(options *getoptions.GetOpt) {
DistributeOptionSet(options) DistributeOptionSet(options)
} }
func CLIAppendSequences() bool {
return _append
}
func CLISequenceClassifier() *obiseq.BioSequenceClassifier { func CLISequenceClassifier() *obiseq.BioSequenceClassifier {
switch { switch {
case _SequenceClassifierTag != "": case _SequenceClassifierTag != "":