obi export: added options to export to metabaR compatible format
This commit is contained in:
@ -297,6 +297,29 @@ def __addExportOutputOption(optionManager):
|
||||
const=b'tabular',
|
||||
help="Output file is in tabular format")
|
||||
|
||||
group.add_argument('--metabaR-output',
|
||||
action="store_const", dest="obi:outputformat",
|
||||
default=None,
|
||||
const=b'metabaR',
|
||||
help="Export the files needed by the obifiles_to_metabarlist function of the metabaR package")
|
||||
|
||||
group.add_argument('--metabaR-prefix',
|
||||
action="store", dest="obi:metabarprefix",
|
||||
type=str,
|
||||
help="Prefix for the files when using --metabaR-output option")
|
||||
|
||||
group.add_argument('--metabaR-ngsfilter',
|
||||
action="store", dest="obi:metabarngsfilter",
|
||||
type=str,
|
||||
default=None,
|
||||
help="URI to the ngsfilter view when using --metabaR-output option (if not provided, it is not exported)")
|
||||
|
||||
group.add_argument('--metabaR-samples',
|
||||
action="store", dest="obi:metabarsamples",
|
||||
type=str,
|
||||
default=None,
|
||||
help="URI to the sample metadata view when using --metabaR-output option (if not provided, it is built as just a list of the sample names)")
|
||||
|
||||
group.add_argument('--only-keys',
|
||||
action="append", dest="obi:only_keys",
|
||||
type=str,
|
||||
|
@ -6,6 +6,9 @@ from obitools3.apps.config import logger
|
||||
from obitools3.dms import DMS
|
||||
from obitools3.dms.obiseq import Nuc_Seq
|
||||
from obitools3.dms.capi.obiview cimport QUALITY_COLUMN
|
||||
from obitools3.writers.tab import TabWriter
|
||||
from obitools3.format.tab import TabFormat
|
||||
from obitools3.utils cimport tobytes, tostr
|
||||
|
||||
from obitools3.apps.optiongroups import addMinimalInputOption, \
|
||||
addExportOutputOption, \
|
||||
@ -76,6 +79,13 @@ def run(config):
|
||||
else:
|
||||
pb = ProgressBar(withoutskip - skip, config)
|
||||
|
||||
if config['obi']['outputformat'] == b'metabaR':
|
||||
# Check prefix
|
||||
if "metabarprefix" not in config["obi"]:
|
||||
raise Exception("Prefix needed when exporting for metabaR (--metabaR-prefix option)")
|
||||
else:
|
||||
metabaRprefix = config["obi"]["metabarprefix"]
|
||||
|
||||
i=0
|
||||
for seq in iview :
|
||||
PyErr_CheckSignals()
|
||||
@ -91,6 +101,81 @@ def run(config):
|
||||
pb(i, force=True)
|
||||
print("", file=sys.stderr)
|
||||
|
||||
if config['obi']['outputformat'] == b'metabaR':
|
||||
|
||||
# Export ngsfilter file if view provided
|
||||
if 'metabarngsfilter' in config['obi']:
|
||||
ngsfilter_input = open_uri(config['obi']['metabarngsfilter'])
|
||||
if ngsfilter_input is None:
|
||||
raise Exception("Could not read ngsfilter view for metabaR output")
|
||||
ngsfilter_view = ngsfilter_input[1]
|
||||
|
||||
ngsfilter_output = open(config['obi']['metabarprefix']+'.ngsfilter', 'w')
|
||||
|
||||
for line in ngsfilter_view:
|
||||
|
||||
line_to_print = b""
|
||||
line_to_print += line[b'experiment']
|
||||
line_to_print += b"\t"
|
||||
line_to_print += line[b'sample']
|
||||
line_to_print += b"\t"
|
||||
line_to_print += line[b'forward_tag']
|
||||
line_to_print += b":"
|
||||
line_to_print += line[b'reverse_tag']
|
||||
line_to_print += b"\t"
|
||||
line_to_print += line[b'forward_primer']
|
||||
line_to_print += b"\t"
|
||||
line_to_print += line[b'reverse_primer']
|
||||
line_to_print += b"\t"
|
||||
line_to_print += line[b'additional_info']
|
||||
|
||||
print(tostr(line_to_print), file=ngsfilter_output)
|
||||
|
||||
if ngsfilter_input[0] != input[0]:
|
||||
ngsfilter_input[0].close()
|
||||
ngsfilter_output.close()
|
||||
|
||||
# Export sample metadata
|
||||
samples_output = open(config['obi']['metabarprefix']+'_samples.csv', 'w')
|
||||
|
||||
# Export sample metadata file if view provided
|
||||
if 'metabarsamples' in config['obi']:
|
||||
samples_input = open_uri(config['obi']['metabarsamples'])
|
||||
if samples_input is None:
|
||||
raise Exception("Could not read sample view for metabaR output")
|
||||
samples_view = samples_input[1]
|
||||
|
||||
# Export with tab formatter
|
||||
TabWriter(TabFormat(header=True, sep='\t',),
|
||||
samples_output,
|
||||
header=True)
|
||||
|
||||
if samples_input[0] != input[0]:
|
||||
samples_input[0].close()
|
||||
|
||||
# Else export just sample names from main view
|
||||
else:
|
||||
|
||||
sample_list = []
|
||||
if 'MERGED_sample' in iview:
|
||||
sample_list = iview['MERGED_sample'].keys()
|
||||
elif 'sample' not in iview:
|
||||
for seq in iview:
|
||||
sample = seq['sample']
|
||||
if sample not in sample_list:
|
||||
sample_list.append(sample)
|
||||
else:
|
||||
logger("warning", "Can not read sample list from main view for metabaR sample list export")
|
||||
|
||||
print("sample_id", file=samples_output)
|
||||
for sample in sample_list:
|
||||
line_to_print = b""
|
||||
line_to_print += sample
|
||||
line_to_print += b"\t"
|
||||
print(tostr(line_to_print), file=samples_output)
|
||||
|
||||
samples_output.close()
|
||||
|
||||
# TODO save command in input dms?
|
||||
|
||||
if not BrokenPipeError and not IOError:
|
||||
|
@ -7,3 +7,5 @@ cdef class TabFormat:
|
||||
cdef set tags
|
||||
cdef bytes sep
|
||||
cdef bint NAIntTo0
|
||||
cdef bint metabaR
|
||||
cdef bint ngsfilter
|
||||
|
@ -10,13 +10,15 @@ import sys
|
||||
|
||||
cdef class TabFormat:
|
||||
|
||||
def __init__(self, list tags=[], header=True, bytes NAString=b"NA", bytes sep=b"\t", bint NAIntTo0=True):
|
||||
def __init__(self, list tags=[], header=True, bytes NAString=b"NA", bytes sep=b"\t", bint NAIntTo0=True, metabaR=False, ngsfilter=False):
|
||||
self.tags = set(tags)
|
||||
self.header = header
|
||||
self.first_line = True
|
||||
self.NAString = NAString
|
||||
self.sep = sep
|
||||
self.NAIntTo0 = NAIntTo0
|
||||
self.metabaR = metabaR
|
||||
self.ngsfilter = ngsfilter
|
||||
|
||||
@cython.boundscheck(False)
|
||||
def __call__(self, object data):
|
||||
@ -34,13 +36,21 @@ cdef class TabFormat:
|
||||
if self.header and self.first_line:
|
||||
for k in ktags:
|
||||
if k in tags:
|
||||
if self.metabaR:
|
||||
if k == b'NUC_SEQ':
|
||||
ktoprint = b'sequence'
|
||||
else:
|
||||
ktoprint = k.lower()
|
||||
ktoprint = ktoprint.replace(b'merged_', b'')
|
||||
else:
|
||||
ktoprint = k
|
||||
if isinstance(data.view[k], Column_multi_elts):
|
||||
keys = data.view[k].keys()
|
||||
keys.sort()
|
||||
for k2 in keys:
|
||||
line.append(tobytes(k)+b':'+tobytes(k2))
|
||||
line.append(tobytes(ktoprint)+b':'+tobytes(k2))
|
||||
else:
|
||||
line.append(tobytes(k))
|
||||
line.append(tobytes(ktoprint))
|
||||
r = self.sep.join(value for value in line)
|
||||
r += b'\n'
|
||||
line = []
|
||||
|
@ -48,13 +48,13 @@ def ngsfilterIterator(lineiterator,
|
||||
all_lines.insert(0, firstline)
|
||||
|
||||
# Insert header for column names
|
||||
column_names = [b"experiment", b"sample", b"forward_tag", b"reverse_tag", b"forward_primer", b"reverse_primer"]
|
||||
column_names = [b"experiment", b"sample", b"forward_tag", b"reverse_tag", b"forward_primer", b"reverse_primer",b"additional_info"]
|
||||
header = out_sep.join(column_names)
|
||||
|
||||
new_lines.append(header)
|
||||
|
||||
for line in all_lines:
|
||||
split_line = line.split()
|
||||
split_line = line.split(maxsplit=5)
|
||||
tags = split_line.pop(2)
|
||||
tags = tags.split(b":")
|
||||
for t_idx in range(len(tags)):
|
||||
@ -64,7 +64,7 @@ def ngsfilterIterator(lineiterator,
|
||||
tags.append(tags[0])
|
||||
split_line.insert(2, tags[0])
|
||||
split_line.insert(3, tags[1])
|
||||
new_lines.append(out_sep.join(split_line[0:6]))
|
||||
new_lines.append(out_sep.join(split_line[0:7]))
|
||||
|
||||
return tabIterator(iter(new_lines),
|
||||
header = True,
|
||||
|
Reference in New Issue
Block a user