obi export: added options to export to metabaR compatible format
This commit is contained in:
@ -297,6 +297,29 @@ def __addExportOutputOption(optionManager):
|
|||||||
const=b'tabular',
|
const=b'tabular',
|
||||||
help="Output file is in tabular format")
|
help="Output file is in tabular format")
|
||||||
|
|
||||||
|
group.add_argument('--metabaR-output',
|
||||||
|
action="store_const", dest="obi:outputformat",
|
||||||
|
default=None,
|
||||||
|
const=b'metabaR',
|
||||||
|
help="Export the files needed by the obifiles_to_metabarlist function of the metabaR package")
|
||||||
|
|
||||||
|
group.add_argument('--metabaR-prefix',
|
||||||
|
action="store", dest="obi:metabarprefix",
|
||||||
|
type=str,
|
||||||
|
help="Prefix for the files when using --metabaR-output option")
|
||||||
|
|
||||||
|
group.add_argument('--metabaR-ngsfilter',
|
||||||
|
action="store", dest="obi:metabarngsfilter",
|
||||||
|
type=str,
|
||||||
|
default=None,
|
||||||
|
help="URI to the ngsfilter view when using --metabaR-output option (if not provided, it is not exported)")
|
||||||
|
|
||||||
|
group.add_argument('--metabaR-samples',
|
||||||
|
action="store", dest="obi:metabarsamples",
|
||||||
|
type=str,
|
||||||
|
default=None,
|
||||||
|
help="URI to the sample metadata view when using --metabaR-output option (if not provided, it is built as just a list of the sample names)")
|
||||||
|
|
||||||
group.add_argument('--only-keys',
|
group.add_argument('--only-keys',
|
||||||
action="append", dest="obi:only_keys",
|
action="append", dest="obi:only_keys",
|
||||||
type=str,
|
type=str,
|
||||||
|
@ -6,6 +6,9 @@ from obitools3.apps.config import logger
|
|||||||
from obitools3.dms import DMS
|
from obitools3.dms import DMS
|
||||||
from obitools3.dms.obiseq import Nuc_Seq
|
from obitools3.dms.obiseq import Nuc_Seq
|
||||||
from obitools3.dms.capi.obiview cimport QUALITY_COLUMN
|
from obitools3.dms.capi.obiview cimport QUALITY_COLUMN
|
||||||
|
from obitools3.writers.tab import TabWriter
|
||||||
|
from obitools3.format.tab import TabFormat
|
||||||
|
from obitools3.utils cimport tobytes, tostr
|
||||||
|
|
||||||
from obitools3.apps.optiongroups import addMinimalInputOption, \
|
from obitools3.apps.optiongroups import addMinimalInputOption, \
|
||||||
addExportOutputOption, \
|
addExportOutputOption, \
|
||||||
@ -76,6 +79,13 @@ def run(config):
|
|||||||
else:
|
else:
|
||||||
pb = ProgressBar(withoutskip - skip, config)
|
pb = ProgressBar(withoutskip - skip, config)
|
||||||
|
|
||||||
|
if config['obi']['outputformat'] == b'metabaR':
|
||||||
|
# Check prefix
|
||||||
|
if "metabarprefix" not in config["obi"]:
|
||||||
|
raise Exception("Prefix needed when exporting for metabaR (--metabaR-prefix option)")
|
||||||
|
else:
|
||||||
|
metabaRprefix = config["obi"]["metabarprefix"]
|
||||||
|
|
||||||
i=0
|
i=0
|
||||||
for seq in iview :
|
for seq in iview :
|
||||||
PyErr_CheckSignals()
|
PyErr_CheckSignals()
|
||||||
@ -91,6 +101,81 @@ def run(config):
|
|||||||
pb(i, force=True)
|
pb(i, force=True)
|
||||||
print("", file=sys.stderr)
|
print("", file=sys.stderr)
|
||||||
|
|
||||||
|
if config['obi']['outputformat'] == b'metabaR':
|
||||||
|
|
||||||
|
# Export ngsfilter file if view provided
|
||||||
|
if 'metabarngsfilter' in config['obi']:
|
||||||
|
ngsfilter_input = open_uri(config['obi']['metabarngsfilter'])
|
||||||
|
if ngsfilter_input is None:
|
||||||
|
raise Exception("Could not read ngsfilter view for metabaR output")
|
||||||
|
ngsfilter_view = ngsfilter_input[1]
|
||||||
|
|
||||||
|
ngsfilter_output = open(config['obi']['metabarprefix']+'.ngsfilter', 'w')
|
||||||
|
|
||||||
|
for line in ngsfilter_view:
|
||||||
|
|
||||||
|
line_to_print = b""
|
||||||
|
line_to_print += line[b'experiment']
|
||||||
|
line_to_print += b"\t"
|
||||||
|
line_to_print += line[b'sample']
|
||||||
|
line_to_print += b"\t"
|
||||||
|
line_to_print += line[b'forward_tag']
|
||||||
|
line_to_print += b":"
|
||||||
|
line_to_print += line[b'reverse_tag']
|
||||||
|
line_to_print += b"\t"
|
||||||
|
line_to_print += line[b'forward_primer']
|
||||||
|
line_to_print += b"\t"
|
||||||
|
line_to_print += line[b'reverse_primer']
|
||||||
|
line_to_print += b"\t"
|
||||||
|
line_to_print += line[b'additional_info']
|
||||||
|
|
||||||
|
print(tostr(line_to_print), file=ngsfilter_output)
|
||||||
|
|
||||||
|
if ngsfilter_input[0] != input[0]:
|
||||||
|
ngsfilter_input[0].close()
|
||||||
|
ngsfilter_output.close()
|
||||||
|
|
||||||
|
# Export sample metadata
|
||||||
|
samples_output = open(config['obi']['metabarprefix']+'_samples.csv', 'w')
|
||||||
|
|
||||||
|
# Export sample metadata file if view provided
|
||||||
|
if 'metabarsamples' in config['obi']:
|
||||||
|
samples_input = open_uri(config['obi']['metabarsamples'])
|
||||||
|
if samples_input is None:
|
||||||
|
raise Exception("Could not read sample view for metabaR output")
|
||||||
|
samples_view = samples_input[1]
|
||||||
|
|
||||||
|
# Export with tab formatter
|
||||||
|
TabWriter(TabFormat(header=True, sep='\t',),
|
||||||
|
samples_output,
|
||||||
|
header=True)
|
||||||
|
|
||||||
|
if samples_input[0] != input[0]:
|
||||||
|
samples_input[0].close()
|
||||||
|
|
||||||
|
# Else export just sample names from main view
|
||||||
|
else:
|
||||||
|
|
||||||
|
sample_list = []
|
||||||
|
if 'MERGED_sample' in iview:
|
||||||
|
sample_list = iview['MERGED_sample'].keys()
|
||||||
|
elif 'sample' not in iview:
|
||||||
|
for seq in iview:
|
||||||
|
sample = seq['sample']
|
||||||
|
if sample not in sample_list:
|
||||||
|
sample_list.append(sample)
|
||||||
|
else:
|
||||||
|
logger("warning", "Can not read sample list from main view for metabaR sample list export")
|
||||||
|
|
||||||
|
print("sample_id", file=samples_output)
|
||||||
|
for sample in sample_list:
|
||||||
|
line_to_print = b""
|
||||||
|
line_to_print += sample
|
||||||
|
line_to_print += b"\t"
|
||||||
|
print(tostr(line_to_print), file=samples_output)
|
||||||
|
|
||||||
|
samples_output.close()
|
||||||
|
|
||||||
# TODO save command in input dms?
|
# TODO save command in input dms?
|
||||||
|
|
||||||
if not BrokenPipeError and not IOError:
|
if not BrokenPipeError and not IOError:
|
||||||
|
@ -6,4 +6,6 @@ cdef class TabFormat:
|
|||||||
cdef bytes NAString
|
cdef bytes NAString
|
||||||
cdef set tags
|
cdef set tags
|
||||||
cdef bytes sep
|
cdef bytes sep
|
||||||
cdef bint NAIntTo0
|
cdef bint NAIntTo0
|
||||||
|
cdef bint metabaR
|
||||||
|
cdef bint ngsfilter
|
||||||
|
@ -10,13 +10,15 @@ import sys
|
|||||||
|
|
||||||
cdef class TabFormat:
|
cdef class TabFormat:
|
||||||
|
|
||||||
def __init__(self, list tags=[], header=True, bytes NAString=b"NA", bytes sep=b"\t", bint NAIntTo0=True):
|
def __init__(self, list tags=[], header=True, bytes NAString=b"NA", bytes sep=b"\t", bint NAIntTo0=True, metabaR=False, ngsfilter=False):
|
||||||
self.tags = set(tags)
|
self.tags = set(tags)
|
||||||
self.header = header
|
self.header = header
|
||||||
self.first_line = True
|
self.first_line = True
|
||||||
self.NAString = NAString
|
self.NAString = NAString
|
||||||
self.sep = sep
|
self.sep = sep
|
||||||
self.NAIntTo0 = NAIntTo0
|
self.NAIntTo0 = NAIntTo0
|
||||||
|
self.metabaR = metabaR
|
||||||
|
self.ngsfilter = ngsfilter
|
||||||
|
|
||||||
@cython.boundscheck(False)
|
@cython.boundscheck(False)
|
||||||
def __call__(self, object data):
|
def __call__(self, object data):
|
||||||
@ -34,13 +36,21 @@ cdef class TabFormat:
|
|||||||
if self.header and self.first_line:
|
if self.header and self.first_line:
|
||||||
for k in ktags:
|
for k in ktags:
|
||||||
if k in tags:
|
if k in tags:
|
||||||
|
if self.metabaR:
|
||||||
|
if k == b'NUC_SEQ':
|
||||||
|
ktoprint = b'sequence'
|
||||||
|
else:
|
||||||
|
ktoprint = k.lower()
|
||||||
|
ktoprint = ktoprint.replace(b'merged_', b'')
|
||||||
|
else:
|
||||||
|
ktoprint = k
|
||||||
if isinstance(data.view[k], Column_multi_elts):
|
if isinstance(data.view[k], Column_multi_elts):
|
||||||
keys = data.view[k].keys()
|
keys = data.view[k].keys()
|
||||||
keys.sort()
|
keys.sort()
|
||||||
for k2 in keys:
|
for k2 in keys:
|
||||||
line.append(tobytes(k)+b':'+tobytes(k2))
|
line.append(tobytes(ktoprint)+b':'+tobytes(k2))
|
||||||
else:
|
else:
|
||||||
line.append(tobytes(k))
|
line.append(tobytes(ktoprint))
|
||||||
r = self.sep.join(value for value in line)
|
r = self.sep.join(value for value in line)
|
||||||
r += b'\n'
|
r += b'\n'
|
||||||
line = []
|
line = []
|
||||||
|
@ -48,13 +48,13 @@ def ngsfilterIterator(lineiterator,
|
|||||||
all_lines.insert(0, firstline)
|
all_lines.insert(0, firstline)
|
||||||
|
|
||||||
# Insert header for column names
|
# Insert header for column names
|
||||||
column_names = [b"experiment", b"sample", b"forward_tag", b"reverse_tag", b"forward_primer", b"reverse_primer"]
|
column_names = [b"experiment", b"sample", b"forward_tag", b"reverse_tag", b"forward_primer", b"reverse_primer",b"additional_info"]
|
||||||
header = out_sep.join(column_names)
|
header = out_sep.join(column_names)
|
||||||
|
|
||||||
new_lines.append(header)
|
new_lines.append(header)
|
||||||
|
|
||||||
for line in all_lines:
|
for line in all_lines:
|
||||||
split_line = line.split()
|
split_line = line.split(maxsplit=5)
|
||||||
tags = split_line.pop(2)
|
tags = split_line.pop(2)
|
||||||
tags = tags.split(b":")
|
tags = tags.split(b":")
|
||||||
for t_idx in range(len(tags)):
|
for t_idx in range(len(tags)):
|
||||||
@ -64,7 +64,7 @@ def ngsfilterIterator(lineiterator,
|
|||||||
tags.append(tags[0])
|
tags.append(tags[0])
|
||||||
split_line.insert(2, tags[0])
|
split_line.insert(2, tags[0])
|
||||||
split_line.insert(3, tags[1])
|
split_line.insert(3, tags[1])
|
||||||
new_lines.append(out_sep.join(split_line[0:6]))
|
new_lines.append(out_sep.join(split_line[0:7]))
|
||||||
|
|
||||||
return tabIterator(iter(new_lines),
|
return tabIterator(iter(new_lines),
|
||||||
header = True,
|
header = True,
|
||||||
|
Reference in New Issue
Block a user