change the model for representing paired reads and extend its usage to other commands

This commit is contained in:
2023-02-23 23:35:58 +01:00
parent ebb05fcdf7
commit 072b85e155
23 changed files with 598 additions and 338 deletions
+29 -1
View File
@@ -27,6 +27,9 @@ var __output_solexa_quality__ = false
var __no_progress_bar__ = false
var __compressed__ = false
var __output_file_name__ = "-"
var __paired_file_name__ = ""
func InputOptionSet(options *getoptions.GetOpt) {
// options.IntVar(&__skipped_entries__, "skip", __skipped_entries__,
// options.Description("The N first sequence records of the file are discarded from the analysis and not reported to the output file."))
@@ -73,15 +76,29 @@ func OutputOptionSet(options *getoptions.GetOpt) {
options.BoolVar(&__no_progress_bar__, "no-progressbar", false,
options.Description("Disable the progress bar printing"))
options.BoolVar(&__compressed__, "--compress", false,
options.BoolVar(&__compressed__, "compress", false,
options.Alias("Z"),
options.Description("Output is compressed"))
options.StringVar(&__output_file_name__, "out", __output_file_name__,
options.Alias("o"),
options.ArgName("FILENAME"),
options.Description("Filename used for saving the output"),
)
}
func PairedFilesOptionSet(options *getoptions.GetOpt) {
options.StringVar(&__paired_file_name__, "paired-with", __paired_file_name__,
options.ArgName("FILENAME"),
options.Description("Filename containing the paired reads"),
)
}
func OptionSet(options *getoptions.GetOpt) {
InputOptionSet(options)
OutputOptionSet(options)
PairedFilesOptionSet(options)
}
// Returns true if the number of reads described in the
@@ -170,3 +187,14 @@ func CLIOutputQualityShift() int {
func CLIProgressBar() bool {
return !__no_progress_bar__
}
func CLIOutPutFileName() string {
return __output_file_name__
}
func CLIHasPairedFile() bool {
return __paired_file_name__ != ""
}
func CLIPairedFileName() string {
return __paired_file_name__
}
+12 -1
View File
@@ -67,7 +67,7 @@ func _ExpandListOfFiles(check_ext bool, filenames ...string) ([]string, error) {
return list_of_files, nil
}
func ReadBioSequences(filenames ...string) (obiiter.IBioSequence, error) {
func CLIReadBioSequences(filenames ...string) (obiiter.IBioSequence, error) {
var iterator obiiter.IBioSequence
var reader func(string, ...obiformats.WithOption) (obiiter.IBioSequence, error)
@@ -142,6 +142,17 @@ func ReadBioSequences(filenames ...string) (obiiter.IBioSequence, error) {
if err != nil {
return obiiter.NilIBioSequence, err
}
if CLIPairedFileName() != "" {
ip, err := reader(CLIPairedFileName(), opts...)
if err != nil {
return obiiter.NilIBioSequence, err
}
iterator = iterator.PairTo(ip)
}
}
// list_of_files = list_of_files[1:]
+50 -10
View File
@@ -1,6 +1,9 @@
package obiconvert
import (
"path/filepath"
"strings"
log "github.com/sirupsen/logrus"
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obiformats"
@@ -8,6 +11,27 @@ import (
"git.metabarcoding.org/lecasofts/go/obitools/pkg/obioptions"
)
func BuildPairedFileNames(filename string) (string, string) {
dir, name := filepath.Split(filename)
parts := strings.SplitN(name, ".", 2)
forward := parts[0] + "_R1"
reverse := parts[0] + "_R2"
if parts[1] != "" {
suffix := "." + parts[1]
forward += suffix
reverse += suffix
}
if dir != "" {
forward = filepath.Join(dir, forward)
reverse = filepath.Join(dir, reverse)
}
return forward, reverse
}
func CLIWriteBioSequences(iterator obiiter.IBioSequence,
terminalAction bool, filenames ...string) (obiiter.IBioSequence, error) {
@@ -45,7 +69,32 @@ func CLIWriteBioSequences(iterator obiiter.IBioSequence,
var err error
if len(filenames) == 0 {
// No file names are specified or it is "-" : the output is done on stdout
if CLIOutPutFileName() != "-" || (len(filenames) > 0 && filenames[0] != "-") {
var fn string
if len(filenames) == 0 {
fn = CLIOutPutFileName()
} else {
fn = filenames[0]
}
if iterator.IsPaired() {
var reverse string
fn, reverse = BuildPairedFileNames(fn)
opts = append(opts, obiformats.WritePairedReadsTo(reverse))
}
switch CLIOutputFormat() {
case "fastq":
newIter, err = obiformats.WriteFastqToFile(iterator, fn, opts...)
case "fasta":
newIter, err = obiformats.WriteFastaToFile(iterator, fn, opts...)
default:
newIter, err = obiformats.WriteSequencesToFile(iterator, fn, opts...)
}
} else {
switch CLIOutputFormat() {
case "fastq":
newIter, err = obiformats.WriteFastqToStdout(iterator, opts...)
@@ -54,15 +103,6 @@ func CLIWriteBioSequences(iterator obiiter.IBioSequence,
default:
newIter, err = obiformats.WriteSequencesToStdout(iterator, opts...)
}
} else {
switch CLIOutputFormat() {
case "fastq":
newIter, err = obiformats.WriteFastqToFile(iterator, filenames[0], opts...)
case "fasta":
newIter, err = obiformats.WriteFastaToFile(iterator, filenames[0], opts...)
default:
newIter, err = obiformats.WriteSequencesToFile(iterator, filenames[0], opts...)
}
}
if err != nil {