From f6353fbf289eef7714c6e5727d891f2edabb4e82 Mon Sep 17 00:00:00 2001 From: mercierc Date: Thu, 11 Nov 2021 15:24:12 +1300 Subject: [PATCH] obi export: added options to export to metabaR compatible format --- .../obitools3/apps/optiongroups/__init__.py | 23 +++++ python/obitools3/commands/export.pyx | 85 +++++++++++++++++++ python/obitools3/format/tab.pxd | 4 +- python/obitools3/format/tab.pyx | 16 +++- python/obitools3/parsers/ngsfilter.pyx | 6 +- 5 files changed, 127 insertions(+), 7 deletions(-) diff --git a/python/obitools3/apps/optiongroups/__init__.py b/python/obitools3/apps/optiongroups/__init__.py index 79ccbe0..bc0aab5 100755 --- a/python/obitools3/apps/optiongroups/__init__.py +++ b/python/obitools3/apps/optiongroups/__init__.py @@ -297,6 +297,29 @@ def __addExportOutputOption(optionManager): const=b'tabular', help="Output file is in tabular format") + group.add_argument('--metabaR-output', + action="store_const", dest="obi:outputformat", + default=None, + const=b'metabaR', + help="Export the files needed by the obifiles_to_metabarlist function of the metabaR package") + + group.add_argument('--metabaR-prefix', + action="store", dest="obi:metabarprefix", + type=str, + help="Prefix for the files when using --metabaR-output option") + + group.add_argument('--metabaR-ngsfilter', + action="store", dest="obi:metabarngsfilter", + type=str, + default=None, + help="URI to the ngsfilter view when using --metabaR-output option (if not provided, it is not exported)") + + group.add_argument('--metabaR-samples', + action="store", dest="obi:metabarsamples", + type=str, + default=None, + help="URI to the sample metadata view when using --metabaR-output option (if not provided, it is built as just a list of the sample names)") + group.add_argument('--only-keys', action="append", dest="obi:only_keys", type=str, diff --git a/python/obitools3/commands/export.pyx b/python/obitools3/commands/export.pyx index aacf425..842dd74 100755 --- a/python/obitools3/commands/export.pyx +++ b/python/obitools3/commands/export.pyx @@ -6,6 +6,9 @@ from obitools3.apps.config import logger from obitools3.dms import DMS from obitools3.dms.obiseq import Nuc_Seq from obitools3.dms.capi.obiview cimport QUALITY_COLUMN +from obitools3.writers.tab import TabWriter +from obitools3.format.tab import TabFormat +from obitools3.utils cimport tobytes, tostr from obitools3.apps.optiongroups import addMinimalInputOption, \ addExportOutputOption, \ @@ -76,6 +79,13 @@ def run(config): else: pb = ProgressBar(withoutskip - skip, config) + if config['obi']['outputformat'] == b'metabaR': + # Check prefix + if "metabarprefix" not in config["obi"]: + raise Exception("Prefix needed when exporting for metabaR (--metabaR-prefix option)") + else: + metabaRprefix = config["obi"]["metabarprefix"] + i=0 for seq in iview : PyErr_CheckSignals() @@ -91,6 +101,81 @@ def run(config): pb(i, force=True) print("", file=sys.stderr) + if config['obi']['outputformat'] == b'metabaR': + + # Export ngsfilter file if view provided + if 'metabarngsfilter' in config['obi']: + ngsfilter_input = open_uri(config['obi']['metabarngsfilter']) + if ngsfilter_input is None: + raise Exception("Could not read ngsfilter view for metabaR output") + ngsfilter_view = ngsfilter_input[1] + + ngsfilter_output = open(config['obi']['metabarprefix']+'.ngsfilter', 'w') + + for line in ngsfilter_view: + + line_to_print = b"" + line_to_print += line[b'experiment'] + line_to_print += b"\t" + line_to_print += line[b'sample'] + line_to_print += b"\t" + line_to_print += line[b'forward_tag'] + line_to_print += b":" + line_to_print += line[b'reverse_tag'] + line_to_print += b"\t" + line_to_print += line[b'forward_primer'] + line_to_print += b"\t" + line_to_print += line[b'reverse_primer'] + line_to_print += b"\t" + line_to_print += line[b'additional_info'] + + print(tostr(line_to_print), file=ngsfilter_output) + + if ngsfilter_input[0] != input[0]: + ngsfilter_input[0].close() + ngsfilter_output.close() + + # Export sample metadata + samples_output = open(config['obi']['metabarprefix']+'_samples.csv', 'w') + + # Export sample metadata file if view provided + if 'metabarsamples' in config['obi']: + samples_input = open_uri(config['obi']['metabarsamples']) + if samples_input is None: + raise Exception("Could not read sample view for metabaR output") + samples_view = samples_input[1] + + # Export with tab formatter + TabWriter(TabFormat(header=True, sep='\t',), + samples_output, + header=True) + + if samples_input[0] != input[0]: + samples_input[0].close() + + # Else export just sample names from main view + else: + + sample_list = [] + if 'MERGED_sample' in iview: + sample_list = iview['MERGED_sample'].keys() + elif 'sample' not in iview: + for seq in iview: + sample = seq['sample'] + if sample not in sample_list: + sample_list.append(sample) + else: + logger("warning", "Can not read sample list from main view for metabaR sample list export") + + print("sample_id", file=samples_output) + for sample in sample_list: + line_to_print = b"" + line_to_print += sample + line_to_print += b"\t" + print(tostr(line_to_print), file=samples_output) + + samples_output.close() + # TODO save command in input dms? if not BrokenPipeError and not IOError: diff --git a/python/obitools3/format/tab.pxd b/python/obitools3/format/tab.pxd index 9574fd2..cb20f00 100755 --- a/python/obitools3/format/tab.pxd +++ b/python/obitools3/format/tab.pxd @@ -6,4 +6,6 @@ cdef class TabFormat: cdef bytes NAString cdef set tags cdef bytes sep - cdef bint NAIntTo0 \ No newline at end of file + cdef bint NAIntTo0 + cdef bint metabaR + cdef bint ngsfilter diff --git a/python/obitools3/format/tab.pyx b/python/obitools3/format/tab.pyx index cabdcd2..d05b4c9 100755 --- a/python/obitools3/format/tab.pyx +++ b/python/obitools3/format/tab.pyx @@ -10,13 +10,15 @@ import sys cdef class TabFormat: - def __init__(self, list tags=[], header=True, bytes NAString=b"NA", bytes sep=b"\t", bint NAIntTo0=True): + def __init__(self, list tags=[], header=True, bytes NAString=b"NA", bytes sep=b"\t", bint NAIntTo0=True, metabaR=False, ngsfilter=False): self.tags = set(tags) self.header = header self.first_line = True self.NAString = NAString self.sep = sep self.NAIntTo0 = NAIntTo0 + self.metabaR = metabaR + self.ngsfilter = ngsfilter @cython.boundscheck(False) def __call__(self, object data): @@ -34,13 +36,21 @@ cdef class TabFormat: if self.header and self.first_line: for k in ktags: if k in tags: + if self.metabaR: + if k == b'NUC_SEQ': + ktoprint = b'sequence' + else: + ktoprint = k.lower() + ktoprint = ktoprint.replace(b'merged_', b'') + else: + ktoprint = k if isinstance(data.view[k], Column_multi_elts): keys = data.view[k].keys() keys.sort() for k2 in keys: - line.append(tobytes(k)+b':'+tobytes(k2)) + line.append(tobytes(ktoprint)+b':'+tobytes(k2)) else: - line.append(tobytes(k)) + line.append(tobytes(ktoprint)) r = self.sep.join(value for value in line) r += b'\n' line = [] diff --git a/python/obitools3/parsers/ngsfilter.pyx b/python/obitools3/parsers/ngsfilter.pyx index 0761294..1077e11 100644 --- a/python/obitools3/parsers/ngsfilter.pyx +++ b/python/obitools3/parsers/ngsfilter.pyx @@ -48,13 +48,13 @@ def ngsfilterIterator(lineiterator, all_lines.insert(0, firstline) # Insert header for column names - column_names = [b"experiment", b"sample", b"forward_tag", b"reverse_tag", b"forward_primer", b"reverse_primer"] + column_names = [b"experiment", b"sample", b"forward_tag", b"reverse_tag", b"forward_primer", b"reverse_primer",b"additional_info"] header = out_sep.join(column_names) new_lines.append(header) for line in all_lines: - split_line = line.split() + split_line = line.split(maxsplit=5) tags = split_line.pop(2) tags = tags.split(b":") for t_idx in range(len(tags)): @@ -64,7 +64,7 @@ def ngsfilterIterator(lineiterator, tags.append(tags[0]) split_line.insert(2, tags[0]) split_line.insert(3, tags[1]) - new_lines.append(out_sep.join(split_line[0:6])) + new_lines.append(out_sep.join(split_line[0:7])) return tabIterator(iter(new_lines), header = True,