add the --skip-empty option

Former-commit-id: ec9cb0ecaf90a61bf9289cf4c089b5cc2fcb65a5
This commit is contained in:
2023-07-17 14:24:02 +02:00
parent b44fcfb2a0
commit be47ec909c
10 changed files with 65 additions and 12 deletions

View File

@ -4,7 +4,9 @@
### Bugs ### Bugs
- Patch a bug in the install script for correctly follow download redirection. - Patch a bug in the install-script for correctly follow download redirection.
- Patch a bug in `obitagpcr` to consider the renaming of the `forward_mismatch` and `reverse_mismatch` tags
to `forward_error` and `reverse_error`.
### Enhancement ### Enhancement
@ -14,6 +16,10 @@
### New feature ### New feature
- In every *OBITools*, writing an empty sequence (sequence of length equal to zero) through an error and
stops the execution of the tool, except if the **--skip-empty** option is set. In that case, the empty
sequence is ignored and not printed to the output. When output involved paired sequence the **--skip-empty**
option is ignored.
- In `obiannotate` adds the **--set-identifier** option to edit the sequence identifier - In `obiannotate` adds the **--set-identifier** option to edit the sequence identifier
- In `obitag` adds the **--save-db** option allowing at the end of the run of `obitag` to save a - In `obitag` adds the **--save-db** option allowing at the end of the run of `obitag` to save a
modified version of the reference database containing the computed index. This allows next modified version of the reference database containing the computed index. This allows next

View File

@ -2,6 +2,7 @@ package main
import ( import (
"os" "os"
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter" "git.metabarcoding.org/lecasofts/go/obitools/pkg/obiiter"
@ -23,7 +24,7 @@ func main() {
os.Exit(1) os.Exit(1)
} }
cleaned := obiclean.IOBIClean(fs) cleaned := obiclean.CLIOBIClean(fs)
obiconvert.CLIWriteBioSequences(cleaned, true) obiconvert.CLIWriteBioSequences(cleaned, true)

1
go.mod
View File

@ -19,6 +19,7 @@ require (
) )
require ( require (
github.com/chen3feng/stl4go v0.1.1 // indirect
github.com/klauspost/compress v1.16.3 // indirect github.com/klauspost/compress v1.16.3 // indirect
github.com/mattn/go-runewidth v0.0.14 // indirect github.com/mattn/go-runewidth v0.0.14 // indirect
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect

2
go.sum
View File

@ -6,6 +6,8 @@ github.com/PaesslerAG/jsonpath v0.1.0 h1:gADYeifvlqK3R3i2cR5B4DGgxLXIPb3TRTH1mGi
github.com/PaesslerAG/jsonpath v0.1.0/go.mod h1:4BzmtoM/PI8fPO4aQGIusjGxGir2BzcV0grWtFzq1Y8= github.com/PaesslerAG/jsonpath v0.1.0/go.mod h1:4BzmtoM/PI8fPO4aQGIusjGxGir2BzcV0grWtFzq1Y8=
github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df h1:GSoSVRLoBaFpOOds6QyY1L8AX7uoY+Ln3BHc22W40X0= github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df h1:GSoSVRLoBaFpOOds6QyY1L8AX7uoY+Ln3BHc22W40X0=
github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df/go.mod h1:hiVxq5OP2bUGBRNS3Z/bt/reCLFNbdcST6gISi1fiOM= github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df/go.mod h1:hiVxq5OP2bUGBRNS3Z/bt/reCLFNbdcST6gISi1fiOM=
github.com/chen3feng/stl4go v0.1.1 h1:0L1+mDw7pomftKDruM23f1mA7miavOj6C6MZeadzN2Q=
github.com/chen3feng/stl4go v0.1.1/go.mod h1:5ml3psLgETJjRJnMbPE+JiHLrCpt+Ajc2weeTECXzWU=
github.com/daichi-m/go18ds v1.12.1 h1:Pjc3IApmN4qtDiovGP9MvMpIzgZle3SHUcNaA5j46bg= github.com/daichi-m/go18ds v1.12.1 h1:Pjc3IApmN4qtDiovGP9MvMpIzgZle3SHUcNaA5j46bg=
github.com/daichi-m/go18ds v1.12.1/go.mod h1:wc2dURUr8aMxxC4Mn5ObJGVM7uIKU8JagY4nhtonXq8= github.com/daichi-m/go18ds v1.12.1/go.mod h1:wc2dURUr8aMxxC4Mn5ObJGVM7uIKU8JagY4nhtonXq8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=

View File

@ -55,11 +55,19 @@ func FormatFasta(seq *obiseq.BioSequence, formater FormatHeader) string {
folded) folded)
} }
func FormatFastaBatch(batch obiiter.BioSequenceBatch, formater FormatHeader) []byte { func FormatFastaBatch(batch obiiter.BioSequenceBatch, formater FormatHeader, skipEmpty bool) []byte {
var bs bytes.Buffer var bs bytes.Buffer
for _, seq := range batch.Slice() { for _, seq := range batch.Slice() {
if seq.Len() > 0 {
bs.WriteString(FormatFasta(seq, formater)) bs.WriteString(FormatFasta(seq, formater))
bs.WriteString("\n") bs.WriteString("\n")
} else {
if skipEmpty {
log.Warnf("Sequence %s is empty and skiped in output",seq.Id())
} else {
log.Fatalf("Sequence %s is empty",seq.Id())
}
}
} }
return bs.Bytes() return bs.Bytes()
} }
@ -99,7 +107,7 @@ func WriteFasta(iterator obiiter.IBioSequence,
batch := iterator.Get() batch := iterator.Get()
chunkchan <- FileChunck{ chunkchan <- FileChunck{
FormatFastaBatch(batch, header_format), FormatFastaBatch(batch, header_format, opt.SkipEmptySequence()),
batch.Order(), batch.Order(),
} }
newIter.Push(batch) newIter.Push(batch)

View File

@ -39,11 +39,20 @@ func FormatFastq(seq *obiseq.BioSequence, quality_shift int, formater FormatHead
} }
func FormatFastqBatch(batch obiiter.BioSequenceBatch, quality_shift int, func FormatFastqBatch(batch obiiter.BioSequenceBatch, quality_shift int,
formater FormatHeader) []byte { formater FormatHeader, skipEmpty bool) []byte {
var bs bytes.Buffer var bs bytes.Buffer
for _, seq := range batch.Slice() { for _, seq := range batch.Slice() {
if seq.Len() > 0 {
bs.WriteString(FormatFastq(seq, quality_shift, formater)) bs.WriteString(FormatFastq(seq, quality_shift, formater))
bs.WriteString("\n") bs.WriteString("\n")
} else {
if skipEmpty {
log.Warnf("Sequence %s is empty and skiped in output", seq.Id())
} else {
log.Fatalf("Sequence %s is empty", seq.Id())
}
}
} }
return bs.Bytes() return bs.Bytes()
} }
@ -90,7 +99,7 @@ func WriteFastq(iterator obiiter.IBioSequence,
for iterator.Next() { for iterator.Next() {
batch := iterator.Get() batch := iterator.Get()
chunk := FileChunck{ chunk := FileChunck{
FormatFastqBatch(batch, quality, header_format), FormatFastqBatch(batch, quality, header_format, opt.SkipEmptySequence()),
batch.Order(), batch.Order(),
} }
chunkchan <- chunk chunkchan <- chunk

View File

@ -16,6 +16,7 @@ type __options__ struct {
closefile bool closefile bool
appendfile bool appendfile bool
compressed bool compressed bool
skip_empty bool
csv_id bool csv_id bool
csv_sequence bool csv_sequence bool
csv_quality bool csv_quality bool
@ -48,6 +49,7 @@ func MakeOptions(setters []WithOption) Options {
closefile: false, closefile: false,
appendfile: false, appendfile: false,
compressed: false, compressed: false,
skip_empty: false,
csv_id: true, csv_id: true,
csv_definition: false, csv_definition: false,
csv_count: false, csv_count: false,
@ -110,6 +112,10 @@ func (opt Options) CompressedFile() bool {
return opt.pointer.compressed return opt.pointer.compressed
} }
func (opt Options) SkipEmptySequence() bool {
return opt.pointer.skip_empty
}
func (opt Options) CSVId() bool { func (opt Options) CSVId() bool {
return opt.pointer.csv_id return opt.pointer.csv_id
} }
@ -194,6 +200,14 @@ func OptionsCompressed(compressed bool) WithOption {
return f return f
} }
func OptionsSkipEmptySequence(skip bool) WithOption {
f := WithOption(func(opt Options) {
opt.pointer.skip_empty = skip
})
return f
}
func OptionsNewFile() WithOption { func OptionsNewFile() WithOption {
f := WithOption(func(opt Options) { f := WithOption(func(opt Options) {
opt.pointer.appendfile = false opt.pointer.appendfile = false

View File

@ -285,7 +285,7 @@ func Weight(sequence *obiseq.BioSequence) map[string]int {
return weight return weight
} }
func IOBIClean(itertator obiiter.IBioSequence) obiiter.IBioSequence { func CLIOBIClean(itertator obiiter.IBioSequence) obiiter.IBioSequence {
db := itertator.Load() db := itertator.Load()

View File

@ -26,6 +26,7 @@ var __output_solexa_quality__ = false
var __no_progress_bar__ = false var __no_progress_bar__ = false
var __compressed__ = false var __compressed__ = false
var __skip_empty__ = false
var __output_file_name__ = "-" var __output_file_name__ = "-"
var __paired_file_name__ = "" var __paired_file_name__ = ""
@ -70,6 +71,9 @@ func OutputModeOptionSet(options *getoptions.GetOpt) {
options.Alias("Z"), options.Alias("Z"),
options.Description("Output is compressed")) options.Description("Output is compressed"))
options.BoolVar(&__skip_empty__, "skip-empty", __skip_empty__,
options.Description("Sequences of length equal to zero are suppressed from the output"))
options.StringVar(&__output_file_name__, "out", __output_file_name__, options.StringVar(&__output_file_name__, "out", __output_file_name__,
options.Alias("o"), options.Alias("o"),
options.ArgName("FILENAME"), options.ArgName("FILENAME"),
@ -141,6 +145,10 @@ func CLICompressed() bool {
return __compressed__ return __compressed__
} }
func CLISkipEmpty() bool {
return __skip_empty__
}
func CLIInputFastHeaderFormat() string { func CLIInputFastHeaderFormat() string {
switch { switch {
case __input_fastjson_format__: case __input_fastjson_format__:

View File

@ -80,6 +80,8 @@ func CLIWriteBioSequences(iterator obiiter.IBioSequence,
var reverse string var reverse string
fn, reverse = BuildPairedFileNames(fn) fn, reverse = BuildPairedFileNames(fn)
opts = append(opts, obiformats.WritePairedReadsTo(reverse)) opts = append(opts, obiformats.WritePairedReadsTo(reverse))
} else {
opts = append(opts, obiformats.OptionsSkipEmptySequence(CLISkipEmpty()))
} }
switch CLIOutputFormat() { switch CLIOutputFormat() {
@ -91,6 +93,7 @@ func CLIWriteBioSequences(iterator obiiter.IBioSequence,
newIter, err = obiformats.WriteSequencesToFile(iterator, fn, opts...) newIter, err = obiformats.WriteSequencesToFile(iterator, fn, opts...)
} }
} else { } else {
opts = append(opts, obiformats.OptionsSkipEmptySequence(CLISkipEmpty()))
switch CLIOutputFormat() { switch CLIOutputFormat() {
case "fastq": case "fastq":
newIter, err = obiformats.WriteFastqToStdout(iterator, opts...) newIter, err = obiformats.WriteFastqToStdout(iterator, opts...)
@ -99,6 +102,7 @@ func CLIWriteBioSequences(iterator obiiter.IBioSequence,
default: default:
newIter, err = obiformats.WriteSequencesToStdout(iterator, opts...) newIter, err = obiformats.WriteSequencesToStdout(iterator, opts...)
} }
} }
if err != nil { if err != nil {