mirror of
https://github.com/metabarcoding/obitools4.git
synced 2025-12-08 16:50:27 +00:00
add the --skip-empty option
Former-commit-id: ec9cb0ecaf90a61bf9289cf4c089b5cc2fcb65a5
This commit is contained in:
@@ -55,11 +55,19 @@ func FormatFasta(seq *obiseq.BioSequence, formater FormatHeader) string {
|
||||
folded)
|
||||
}
|
||||
|
||||
func FormatFastaBatch(batch obiiter.BioSequenceBatch, formater FormatHeader) []byte {
|
||||
func FormatFastaBatch(batch obiiter.BioSequenceBatch, formater FormatHeader, skipEmpty bool) []byte {
|
||||
var bs bytes.Buffer
|
||||
for _, seq := range batch.Slice() {
|
||||
bs.WriteString(FormatFasta(seq, formater))
|
||||
bs.WriteString("\n")
|
||||
if seq.Len() > 0 {
|
||||
bs.WriteString(FormatFasta(seq, formater))
|
||||
bs.WriteString("\n")
|
||||
} else {
|
||||
if skipEmpty {
|
||||
log.Warnf("Sequence %s is empty and skiped in output",seq.Id())
|
||||
} else {
|
||||
log.Fatalf("Sequence %s is empty",seq.Id())
|
||||
}
|
||||
}
|
||||
}
|
||||
return bs.Bytes()
|
||||
}
|
||||
@@ -99,7 +107,7 @@ func WriteFasta(iterator obiiter.IBioSequence,
|
||||
batch := iterator.Get()
|
||||
|
||||
chunkchan <- FileChunck{
|
||||
FormatFastaBatch(batch, header_format),
|
||||
FormatFastaBatch(batch, header_format, opt.SkipEmptySequence()),
|
||||
batch.Order(),
|
||||
}
|
||||
newIter.Push(batch)
|
||||
|
||||
@@ -39,11 +39,20 @@ func FormatFastq(seq *obiseq.BioSequence, quality_shift int, formater FormatHead
|
||||
}
|
||||
|
||||
func FormatFastqBatch(batch obiiter.BioSequenceBatch, quality_shift int,
|
||||
formater FormatHeader) []byte {
|
||||
formater FormatHeader, skipEmpty bool) []byte {
|
||||
var bs bytes.Buffer
|
||||
for _, seq := range batch.Slice() {
|
||||
bs.WriteString(FormatFastq(seq, quality_shift, formater))
|
||||
bs.WriteString("\n")
|
||||
if seq.Len() > 0 {
|
||||
bs.WriteString(FormatFastq(seq, quality_shift, formater))
|
||||
bs.WriteString("\n")
|
||||
} else {
|
||||
if skipEmpty {
|
||||
log.Warnf("Sequence %s is empty and skiped in output", seq.Id())
|
||||
} else {
|
||||
log.Fatalf("Sequence %s is empty", seq.Id())
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
return bs.Bytes()
|
||||
}
|
||||
@@ -90,7 +99,7 @@ func WriteFastq(iterator obiiter.IBioSequence,
|
||||
for iterator.Next() {
|
||||
batch := iterator.Get()
|
||||
chunk := FileChunck{
|
||||
FormatFastqBatch(batch, quality, header_format),
|
||||
FormatFastqBatch(batch, quality, header_format, opt.SkipEmptySequence()),
|
||||
batch.Order(),
|
||||
}
|
||||
chunkchan <- chunk
|
||||
|
||||
@@ -16,6 +16,7 @@ type __options__ struct {
|
||||
closefile bool
|
||||
appendfile bool
|
||||
compressed bool
|
||||
skip_empty bool
|
||||
csv_id bool
|
||||
csv_sequence bool
|
||||
csv_quality bool
|
||||
@@ -48,6 +49,7 @@ func MakeOptions(setters []WithOption) Options {
|
||||
closefile: false,
|
||||
appendfile: false,
|
||||
compressed: false,
|
||||
skip_empty: false,
|
||||
csv_id: true,
|
||||
csv_definition: false,
|
||||
csv_count: false,
|
||||
@@ -110,6 +112,10 @@ func (opt Options) CompressedFile() bool {
|
||||
return opt.pointer.compressed
|
||||
}
|
||||
|
||||
func (opt Options) SkipEmptySequence() bool {
|
||||
return opt.pointer.skip_empty
|
||||
}
|
||||
|
||||
func (opt Options) CSVId() bool {
|
||||
return opt.pointer.csv_id
|
||||
}
|
||||
@@ -194,6 +200,14 @@ func OptionsCompressed(compressed bool) WithOption {
|
||||
return f
|
||||
}
|
||||
|
||||
func OptionsSkipEmptySequence(skip bool) WithOption {
|
||||
f := WithOption(func(opt Options) {
|
||||
opt.pointer.skip_empty = skip
|
||||
})
|
||||
|
||||
return f
|
||||
}
|
||||
|
||||
func OptionsNewFile() WithOption {
|
||||
f := WithOption(func(opt Options) {
|
||||
opt.pointer.appendfile = false
|
||||
|
||||
@@ -285,7 +285,7 @@ func Weight(sequence *obiseq.BioSequence) map[string]int {
|
||||
return weight
|
||||
}
|
||||
|
||||
func IOBIClean(itertator obiiter.IBioSequence) obiiter.IBioSequence {
|
||||
func CLIOBIClean(itertator obiiter.IBioSequence) obiiter.IBioSequence {
|
||||
|
||||
db := itertator.Load()
|
||||
|
||||
|
||||
@@ -26,6 +26,7 @@ var __output_solexa_quality__ = false
|
||||
|
||||
var __no_progress_bar__ = false
|
||||
var __compressed__ = false
|
||||
var __skip_empty__ = false
|
||||
|
||||
var __output_file_name__ = "-"
|
||||
var __paired_file_name__ = ""
|
||||
@@ -70,6 +71,9 @@ func OutputModeOptionSet(options *getoptions.GetOpt) {
|
||||
options.Alias("Z"),
|
||||
options.Description("Output is compressed"))
|
||||
|
||||
options.BoolVar(&__skip_empty__, "skip-empty", __skip_empty__,
|
||||
options.Description("Sequences of length equal to zero are suppressed from the output"))
|
||||
|
||||
options.StringVar(&__output_file_name__, "out", __output_file_name__,
|
||||
options.Alias("o"),
|
||||
options.ArgName("FILENAME"),
|
||||
@@ -141,6 +145,10 @@ func CLICompressed() bool {
|
||||
return __compressed__
|
||||
}
|
||||
|
||||
func CLISkipEmpty() bool {
|
||||
return __skip_empty__
|
||||
}
|
||||
|
||||
func CLIInputFastHeaderFormat() string {
|
||||
switch {
|
||||
case __input_fastjson_format__:
|
||||
|
||||
@@ -80,6 +80,8 @@ func CLIWriteBioSequences(iterator obiiter.IBioSequence,
|
||||
var reverse string
|
||||
fn, reverse = BuildPairedFileNames(fn)
|
||||
opts = append(opts, obiformats.WritePairedReadsTo(reverse))
|
||||
} else {
|
||||
opts = append(opts, obiformats.OptionsSkipEmptySequence(CLISkipEmpty()))
|
||||
}
|
||||
|
||||
switch CLIOutputFormat() {
|
||||
@@ -91,6 +93,7 @@ func CLIWriteBioSequences(iterator obiiter.IBioSequence,
|
||||
newIter, err = obiformats.WriteSequencesToFile(iterator, fn, opts...)
|
||||
}
|
||||
} else {
|
||||
opts = append(opts, obiformats.OptionsSkipEmptySequence(CLISkipEmpty()))
|
||||
switch CLIOutputFormat() {
|
||||
case "fastq":
|
||||
newIter, err = obiformats.WriteFastqToStdout(iterator, opts...)
|
||||
@@ -99,6 +102,7 @@ func CLIWriteBioSequences(iterator obiiter.IBioSequence,
|
||||
default:
|
||||
newIter, err = obiformats.WriteSequencesToStdout(iterator, opts...)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
|
||||
Reference in New Issue
Block a user