From 2b8c066f8e87c7a745a8274895b179b5a45523b0 Mon Sep 17 00:00:00 2001 From: Celine Mercier Date: Sun, 31 Mar 2019 15:39:38 +0200 Subject: [PATCH] Cython: added possibility to output in tabular format --- .../obitools3/apps/optiongroups/__init__.py | 27 ++++++++++++++++--- python/obitools3/uri/decode.pyx | 15 ++++++++--- 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/python/obitools3/apps/optiongroups/__init__.py b/python/obitools3/apps/optiongroups/__init__.py index bbde3cf..43391fb 100755 --- a/python/obitools3/apps/optiongroups/__init__.py +++ b/python/obitools3/apps/optiongroups/__init__.py @@ -115,9 +115,9 @@ def __addImportInputOption(optionManager): type=str, help="String associated with Non Available (NA) values in the input") - -def __addTabularInputOption(optionManager): - group = optionManager.add_argument_group("Input format options for tabular files") + +def __addTabularOption(optionManager): + group = optionManager.add_argument_group("Input and output format options for tabular files") group.add_argument('--header', action="store_true", dest="obi:header", @@ -126,9 +126,15 @@ def __addTabularInputOption(optionManager): group.add_argument('--sep', action="store", dest="obi:sep", - default=None, + default="\t", type=str, help="Column separator") + + +def __addTabularInputOption(optionManager): + group = optionManager.add_argument_group("Input format options for tabular files") + + __addTabularOption(optionManager) group.add_argument('--dec', action="store", dest="obi:dec", @@ -244,6 +250,12 @@ def __addExportOutputOption(optionManager): const=b'fastq', help="Output file is in fastq format") + group.add_argument('--tab-output', + action="store_const", dest="obi:outputformat", + default=None, + const=b'tabular', + help="Output file is in tabular format") + group.add_argument('--print-na', action="store_true", dest="obi:printna", default=False, @@ -260,13 +272,20 @@ def addMinimalOutputOption(optionManager): __addOutputOption(optionManager) __addDMSOutputOption(optionManager) + +def addTabularOutputOption(optionManager): + __addTabularOption(optionManager) + + def addExportOutputOption(optionManager): __addOutputOption(optionManager) __addExportOutputOption(optionManager) + __addTabularOption(optionManager) def addAllOutputOption(optionManager): __addOutputOption(optionManager) __addDMSOutputOption(optionManager) __addExportOutputOption(optionManager) + __addTabularOption(optionManager) diff --git a/python/obitools3/uri/decode.pyx b/python/obitools3/uri/decode.pyx index 14dba86..d7c15d2 100755 --- a/python/obitools3/uri/decode.pyx +++ b/python/obitools3/uri/decode.pyx @@ -15,14 +15,18 @@ from obitools3.parsers.universal import entryIteratorFactory from obitools3.writers.fasta import FastaNucWriter from obitools3.writers.fastq import FastqWriter +from obitools3.writers.tab import TabWriter from obitools3.format.fasta import FastaFormat from obitools3.format.fastq import FastqFormat +from obitools3.format.tab import TabFormat from obitools3.dms.obiseq import Nuc_Seq from obitools3.apps.config import getConfiguration,logger from obitools3.apps.temp import get_temp_dms from obitools3.utils cimport tobytes, count_entries # TODO tobytes because can't read options as bytes +from obitools3.files.universalopener cimport uopen + from obitools3.dms.capi.obierrno cimport obi_errno, \ OBIVIEW_ALREADY_EXISTS_ERROR @@ -283,17 +287,18 @@ def open_uri(uri, format=config["obi"][formatkey] except KeyError: format=None - + if b'seqtype' in qualifiers: seqtype=qualifiers[b'seqtype'][0] else: - if format == b"ngsfilter": # TODO discuss + if format == b"ngsfilter" or format == b"tabular": # TODO discuss seqtype=None else: try: seqtype=config["obi"]["seqtype"] except KeyError: seqtype=b"nuc" + config["obi"]["seqtype"] = seqtype if b'skip' in qualifiers: skip=int(qualifiers[b"skip"][0]) @@ -504,7 +509,11 @@ def open_uri(uri, skip = skip, only = only) else: - raise NotImplementedError('Output sequence file format not implemented') + iseq = TabWriter(TabFormat(header=header, NAString=nastring, sep=sep), + file, + skip=skip, + only=only, + header=header) elif format==b"ngsfilter": objclass = dict if input: