Compare commits
13 Commits
Author | SHA1 | Date | |
---|---|---|---|
ce2833c04b | |||
f64b3da30b | |||
388b3e0410 | |||
c9db990b83 | |||
3f253feb5e | |||
85d2bab607 | |||
53b3d81137 | |||
f6353fbf28 | |||
5a8b9dca5d | |||
8bd6d6c8e9 | |||
405e6ef420 | |||
fedacfafe7 | |||
2d66e0e965 |
@ -297,6 +297,29 @@ def __addExportOutputOption(optionManager):
|
|||||||
const=b'tabular',
|
const=b'tabular',
|
||||||
help="Output file is in tabular format")
|
help="Output file is in tabular format")
|
||||||
|
|
||||||
|
group.add_argument('--metabaR-output',
|
||||||
|
action="store_const", dest="obi:outputformat",
|
||||||
|
default=None,
|
||||||
|
const=b'metabaR',
|
||||||
|
help="Export the files needed by the obifiles_to_metabarlist function of the metabaR package")
|
||||||
|
|
||||||
|
group.add_argument('--metabaR-prefix',
|
||||||
|
action="store", dest="obi:metabarprefix",
|
||||||
|
type=str,
|
||||||
|
help="Prefix for the files when using --metabaR-output option")
|
||||||
|
|
||||||
|
group.add_argument('--metabaR-ngsfilter',
|
||||||
|
action="store", dest="obi:metabarngsfilter",
|
||||||
|
type=str,
|
||||||
|
default=None,
|
||||||
|
help="URI to the ngsfilter view when using --metabaR-output option (if not provided, it is not exported)")
|
||||||
|
|
||||||
|
group.add_argument('--metabaR-samples',
|
||||||
|
action="store", dest="obi:metabarsamples",
|
||||||
|
type=str,
|
||||||
|
default=None,
|
||||||
|
help="URI to the sample metadata view when using --metabaR-output option (if not provided, it is built as just a list of the sample names)")
|
||||||
|
|
||||||
group.add_argument('--only-keys',
|
group.add_argument('--only-keys',
|
||||||
action="append", dest="obi:only_keys",
|
action="append", dest="obi:only_keys",
|
||||||
type=str,
|
type=str,
|
||||||
|
@ -6,6 +6,9 @@ from obitools3.apps.config import logger
|
|||||||
from obitools3.dms import DMS
|
from obitools3.dms import DMS
|
||||||
from obitools3.dms.obiseq import Nuc_Seq
|
from obitools3.dms.obiseq import Nuc_Seq
|
||||||
from obitools3.dms.capi.obiview cimport QUALITY_COLUMN
|
from obitools3.dms.capi.obiview cimport QUALITY_COLUMN
|
||||||
|
from obitools3.writers.tab import TabWriter
|
||||||
|
from obitools3.format.tab import TabFormat
|
||||||
|
from obitools3.utils cimport tobytes, tostr
|
||||||
|
|
||||||
from obitools3.apps.optiongroups import addMinimalInputOption, \
|
from obitools3.apps.optiongroups import addMinimalInputOption, \
|
||||||
addExportOutputOption, \
|
addExportOutputOption, \
|
||||||
@ -76,6 +79,13 @@ def run(config):
|
|||||||
else:
|
else:
|
||||||
pb = ProgressBar(withoutskip - skip, config)
|
pb = ProgressBar(withoutskip - skip, config)
|
||||||
|
|
||||||
|
if config['obi']['outputformat'] == b'metabaR':
|
||||||
|
# Check prefix
|
||||||
|
if "metabarprefix" not in config["obi"]:
|
||||||
|
raise Exception("Prefix needed when exporting for metabaR (--metabaR-prefix option)")
|
||||||
|
else:
|
||||||
|
metabaRprefix = config["obi"]["metabarprefix"]
|
||||||
|
|
||||||
i=0
|
i=0
|
||||||
for seq in iview :
|
for seq in iview :
|
||||||
PyErr_CheckSignals()
|
PyErr_CheckSignals()
|
||||||
@ -91,6 +101,81 @@ def run(config):
|
|||||||
pb(i, force=True)
|
pb(i, force=True)
|
||||||
print("", file=sys.stderr)
|
print("", file=sys.stderr)
|
||||||
|
|
||||||
|
if config['obi']['outputformat'] == b'metabaR':
|
||||||
|
|
||||||
|
# Export ngsfilter file if view provided
|
||||||
|
if 'metabarngsfilter' in config['obi']:
|
||||||
|
ngsfilter_input = open_uri(config['obi']['metabarngsfilter'])
|
||||||
|
if ngsfilter_input is None:
|
||||||
|
raise Exception("Could not read ngsfilter view for metabaR output")
|
||||||
|
ngsfilter_view = ngsfilter_input[1]
|
||||||
|
|
||||||
|
ngsfilter_output = open(config['obi']['metabarprefix']+'.ngsfilter', 'w')
|
||||||
|
|
||||||
|
for line in ngsfilter_view:
|
||||||
|
|
||||||
|
line_to_print = b""
|
||||||
|
line_to_print += line[b'experiment']
|
||||||
|
line_to_print += b"\t"
|
||||||
|
line_to_print += line[b'sample']
|
||||||
|
line_to_print += b"\t"
|
||||||
|
line_to_print += line[b'forward_tag']
|
||||||
|
line_to_print += b":"
|
||||||
|
line_to_print += line[b'reverse_tag']
|
||||||
|
line_to_print += b"\t"
|
||||||
|
line_to_print += line[b'forward_primer']
|
||||||
|
line_to_print += b"\t"
|
||||||
|
line_to_print += line[b'reverse_primer']
|
||||||
|
line_to_print += b"\t"
|
||||||
|
line_to_print += line[b'additional_info']
|
||||||
|
|
||||||
|
print(tostr(line_to_print), file=ngsfilter_output)
|
||||||
|
|
||||||
|
if ngsfilter_input[0] != input[0]:
|
||||||
|
ngsfilter_input[0].close()
|
||||||
|
ngsfilter_output.close()
|
||||||
|
|
||||||
|
# Export sample metadata
|
||||||
|
samples_output = open(config['obi']['metabarprefix']+'_samples.csv', 'w')
|
||||||
|
|
||||||
|
# Export sample metadata file if view provided
|
||||||
|
if 'metabarsamples' in config['obi']:
|
||||||
|
samples_input = open_uri(config['obi']['metabarsamples'])
|
||||||
|
if samples_input is None:
|
||||||
|
raise Exception("Could not read sample view for metabaR output")
|
||||||
|
samples_view = samples_input[1]
|
||||||
|
|
||||||
|
# Export with tab formatter
|
||||||
|
TabWriter(TabFormat(header=True, sep='\t',),
|
||||||
|
samples_output,
|
||||||
|
header=True)
|
||||||
|
|
||||||
|
if samples_input[0] != input[0]:
|
||||||
|
samples_input[0].close()
|
||||||
|
|
||||||
|
# Else export just sample names from main view
|
||||||
|
else:
|
||||||
|
|
||||||
|
sample_list = []
|
||||||
|
if 'MERGED_sample' in iview:
|
||||||
|
sample_list = iview['MERGED_sample'].keys()
|
||||||
|
elif 'sample' not in iview:
|
||||||
|
for seq in iview:
|
||||||
|
sample = seq['sample']
|
||||||
|
if sample not in sample_list:
|
||||||
|
sample_list.append(sample)
|
||||||
|
else:
|
||||||
|
logger("warning", "Can not read sample list from main view for metabaR sample list export")
|
||||||
|
|
||||||
|
print("sample_id", file=samples_output)
|
||||||
|
for sample in sample_list:
|
||||||
|
line_to_print = b""
|
||||||
|
line_to_print += sample
|
||||||
|
line_to_print += b"\t"
|
||||||
|
print(tostr(line_to_print), file=samples_output)
|
||||||
|
|
||||||
|
samples_output.close()
|
||||||
|
|
||||||
# TODO save command in input dms?
|
# TODO save command in input dms?
|
||||||
|
|
||||||
if not BrokenPipeError and not IOError:
|
if not BrokenPipeError and not IOError:
|
||||||
|
@ -91,7 +91,7 @@ def addOptions(parser):
|
|||||||
metavar="<ATTRIBUTE_NAME>",
|
metavar="<ATTRIBUTE_NAME>",
|
||||||
help="Select records with the attribute <ATTRIBUTE_NAME> "
|
help="Select records with the attribute <ATTRIBUTE_NAME> "
|
||||||
"defined (not set to NA value). "
|
"defined (not set to NA value). "
|
||||||
"Several -a options can be used on the same "
|
"Several -A options can be used on the same "
|
||||||
"command line.")
|
"command line.")
|
||||||
|
|
||||||
group.add_argument("-L", "--lmax",
|
group.add_argument("-L", "--lmax",
|
||||||
|
105
python/obitools3/commands/split.pyx
Normal file
105
python/obitools3/commands/split.pyx
Normal file
@ -0,0 +1,105 @@
|
|||||||
|
#cython: language_level=3
|
||||||
|
|
||||||
|
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||||
|
from obitools3.dms import DMS
|
||||||
|
from obitools3.dms.view.view cimport View, Line_selection
|
||||||
|
from obitools3.uri.decode import open_uri
|
||||||
|
from obitools3.apps.optiongroups import addMinimalInputOption, addTaxonomyOption, addMinimalOutputOption, addNoProgressBarOption
|
||||||
|
from obitools3.dms.view import RollbackException
|
||||||
|
from obitools3.apps.config import logger
|
||||||
|
from obitools3.utils cimport tobytes
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from cpython.exc cimport PyErr_CheckSignals
|
||||||
|
|
||||||
|
|
||||||
|
__title__="Split"
|
||||||
|
|
||||||
|
|
||||||
|
def addOptions(parser):
|
||||||
|
|
||||||
|
addMinimalInputOption(parser)
|
||||||
|
addNoProgressBarOption(parser)
|
||||||
|
|
||||||
|
group=parser.add_argument_group("obi split specific options")
|
||||||
|
|
||||||
|
group.add_argument('-p','--prefix',
|
||||||
|
action="store", dest="split:prefix",
|
||||||
|
metavar="<PREFIX>",
|
||||||
|
help="Prefix added to each subview name (included undefined)")
|
||||||
|
|
||||||
|
group.add_argument('-t','--tag-name',
|
||||||
|
action="store", dest="split:tagname",
|
||||||
|
metavar="<TAG_NAME>",
|
||||||
|
help="Attribute/tag used to split the input")
|
||||||
|
|
||||||
|
group.add_argument('-u','--undefined',
|
||||||
|
action="store", dest="split:undefined",
|
||||||
|
default=b'UNDEFINED',
|
||||||
|
metavar="<VIEW_NAME>",
|
||||||
|
help="Name of the view where undefined sequenced are stored (will be PREFIX_VIEW_NAME)")
|
||||||
|
|
||||||
|
|
||||||
|
def run(config):
|
||||||
|
|
||||||
|
DMS.obi_atexit()
|
||||||
|
|
||||||
|
logger("info", "obi split")
|
||||||
|
|
||||||
|
# Open the input
|
||||||
|
input = open_uri(config["obi"]["inputURI"])
|
||||||
|
if input is None:
|
||||||
|
raise Exception("Could not read input view")
|
||||||
|
i_dms = input[0]
|
||||||
|
i_view = input[1]
|
||||||
|
|
||||||
|
# Initialize the progress bar
|
||||||
|
if config['obi']['noprogressbar'] == False:
|
||||||
|
pb = ProgressBar(len(i_view), config)
|
||||||
|
else:
|
||||||
|
pb = None
|
||||||
|
|
||||||
|
tag_to_split = config["split"]["tagname"]
|
||||||
|
undefined = tobytes(config["split"]["undefined"])
|
||||||
|
selections = {}
|
||||||
|
|
||||||
|
# Go through input view and split
|
||||||
|
for i in range(len(i_view)):
|
||||||
|
PyErr_CheckSignals()
|
||||||
|
if pb is not None:
|
||||||
|
pb(i)
|
||||||
|
line = i_view[i]
|
||||||
|
if tag_to_split not in line or line[tag_to_split] is None or len(line[tag_to_split])==0:
|
||||||
|
value = undefined
|
||||||
|
else:
|
||||||
|
value = line[tag_to_split]
|
||||||
|
if value not in selections:
|
||||||
|
selections[value] = Line_selection(i_view)
|
||||||
|
selections[value].append(i)
|
||||||
|
|
||||||
|
if pb is not None:
|
||||||
|
pb(len(i_view), force=True)
|
||||||
|
print("", file=sys.stderr)
|
||||||
|
|
||||||
|
# Create output views with the line selection
|
||||||
|
try:
|
||||||
|
for cat in selections:
|
||||||
|
o_view_name = config["split"]["prefix"].encode()+cat
|
||||||
|
o_view = selections[cat].materialize(o_view_name)
|
||||||
|
# Save command config in View and DMS comments
|
||||||
|
command_line = " ".join(sys.argv[1:])
|
||||||
|
input_dms_name=[input[0].name]
|
||||||
|
input_view_name=[input[1].name]
|
||||||
|
if 'taxoURI' in config['obi'] and config['obi']['taxoURI'] is not None:
|
||||||
|
input_dms_name.append(config['obi']['taxoURI'].split("/")[-3])
|
||||||
|
input_view_name.append("taxonomy/"+config['obi']['taxoURI'].split("/")[-1])
|
||||||
|
o_view.write_config(config, "split", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
|
||||||
|
o_view.close()
|
||||||
|
except Exception, e:
|
||||||
|
raise RollbackException("obi split error, rollbacking view: "+str(e), o_view)
|
||||||
|
|
||||||
|
i_dms.record_command_line(command_line)
|
||||||
|
i_dms.close(force=True)
|
||||||
|
|
||||||
|
logger("info", "Done.")
|
||||||
|
|
@ -799,7 +799,8 @@ cdef class Line :
|
|||||||
|
|
||||||
|
|
||||||
def keys(self):
|
def keys(self):
|
||||||
return self._view.keys()
|
cdef bytes key
|
||||||
|
return [key for key in self._view.keys()]
|
||||||
|
|
||||||
|
|
||||||
def __contains__(self, object column_name):
|
def __contains__(self, object column_name):
|
||||||
|
@ -6,4 +6,6 @@ cdef class TabFormat:
|
|||||||
cdef bytes NAString
|
cdef bytes NAString
|
||||||
cdef set tags
|
cdef set tags
|
||||||
cdef bytes sep
|
cdef bytes sep
|
||||||
cdef bint NAIntTo0
|
cdef bint NAIntTo0
|
||||||
|
cdef bint metabaR
|
||||||
|
cdef bint ngsfilter
|
||||||
|
@ -10,13 +10,15 @@ import sys
|
|||||||
|
|
||||||
cdef class TabFormat:
|
cdef class TabFormat:
|
||||||
|
|
||||||
def __init__(self, list tags=[], header=True, bytes NAString=b"NA", bytes sep=b"\t", bint NAIntTo0=True):
|
def __init__(self, list tags=[], header=True, bytes NAString=b"NA", bytes sep=b"\t", bint NAIntTo0=True, metabaR=False, ngsfilter=False):
|
||||||
self.tags = set(tags)
|
self.tags = set(tags)
|
||||||
self.header = header
|
self.header = header
|
||||||
self.first_line = True
|
self.first_line = True
|
||||||
self.NAString = NAString
|
self.NAString = NAString
|
||||||
self.sep = sep
|
self.sep = sep
|
||||||
self.NAIntTo0 = NAIntTo0
|
self.NAIntTo0 = NAIntTo0
|
||||||
|
self.metabaR = metabaR
|
||||||
|
self.ngsfilter = ngsfilter
|
||||||
|
|
||||||
@cython.boundscheck(False)
|
@cython.boundscheck(False)
|
||||||
def __call__(self, object data):
|
def __call__(self, object data):
|
||||||
@ -34,13 +36,21 @@ cdef class TabFormat:
|
|||||||
if self.header and self.first_line:
|
if self.header and self.first_line:
|
||||||
for k in ktags:
|
for k in ktags:
|
||||||
if k in tags:
|
if k in tags:
|
||||||
|
if self.metabaR:
|
||||||
|
if k == b'NUC_SEQ':
|
||||||
|
ktoprint = b'sequence'
|
||||||
|
else:
|
||||||
|
ktoprint = k.lower()
|
||||||
|
ktoprint = ktoprint.replace(b'merged_', b'')
|
||||||
|
else:
|
||||||
|
ktoprint = k
|
||||||
if isinstance(data.view[k], Column_multi_elts):
|
if isinstance(data.view[k], Column_multi_elts):
|
||||||
keys = data.view[k].keys()
|
keys = data.view[k].keys()
|
||||||
keys.sort()
|
keys.sort()
|
||||||
for k2 in keys:
|
for k2 in keys:
|
||||||
line.append(tobytes(k)+b':'+tobytes(k2))
|
line.append(tobytes(ktoprint)+b':'+tobytes(k2))
|
||||||
else:
|
else:
|
||||||
line.append(tobytes(k))
|
line.append(tobytes(ktoprint))
|
||||||
r = self.sep.join(value for value in line)
|
r = self.sep.join(value for value in line)
|
||||||
r += b'\n'
|
r += b'\n'
|
||||||
line = []
|
line = []
|
||||||
|
@ -22,11 +22,11 @@ from libc.stdlib cimport free, malloc, realloc
|
|||||||
from libc.string cimport strcpy, strlen
|
from libc.string cimport strcpy, strlen
|
||||||
|
|
||||||
|
|
||||||
_featureMatcher = re.compile(b'^FEATURES.+\n(?=ORIGIN )',re.DOTALL + re.M)
|
_featureMatcher = re.compile(b'^FEATURES.+\n(?=ORIGIN(\s*))',re.DOTALL + re.M)
|
||||||
|
|
||||||
_headerMatcher = re.compile(b'^LOCUS.+(?=\nFEATURES)', re.DOTALL + re.M)
|
_headerMatcher = re.compile(b'^LOCUS.+(?=\nFEATURES)', re.DOTALL + re.M)
|
||||||
_seqMatcher = re.compile(b'^ORIGIN .+(?=//\n)', re.DOTALL + re.M)
|
_seqMatcher = re.compile(b'^ORIGIN.+(?=//\n)', re.DOTALL + re.M)
|
||||||
_cleanSeq1 = re.compile(b'ORIGIN.+\n')
|
_cleanSeq1 = re.compile(b'ORIGIN(\s*)\n')
|
||||||
_cleanSeq2 = re.compile(b'[ \n0-9]+')
|
_cleanSeq2 = re.compile(b'[ \n0-9]+')
|
||||||
_acMatcher = re.compile(b'(?<=^ACCESSION ).+',re.M)
|
_acMatcher = re.compile(b'(?<=^ACCESSION ).+',re.M)
|
||||||
_deMatcher = re.compile(b'(?<=^DEFINITION ).+\n( .+\n)*',re.M)
|
_deMatcher = re.compile(b'(?<=^DEFINITION ).+\n( .+\n)*',re.M)
|
||||||
@ -155,10 +155,10 @@ def genbankIterator_file(lineiterator,
|
|||||||
yield seq
|
yield seq
|
||||||
read+=1
|
read+=1
|
||||||
|
|
||||||
# Last sequence
|
# Last sequence if not empty lines
|
||||||
seq = genbankParser(entry)
|
if entry.strip():
|
||||||
|
seq = genbankParser(entry)
|
||||||
yield seq
|
yield seq
|
||||||
|
|
||||||
free(entry)
|
free(entry)
|
||||||
|
|
||||||
|
@ -48,13 +48,13 @@ def ngsfilterIterator(lineiterator,
|
|||||||
all_lines.insert(0, firstline)
|
all_lines.insert(0, firstline)
|
||||||
|
|
||||||
# Insert header for column names
|
# Insert header for column names
|
||||||
column_names = [b"experiment", b"sample", b"forward_tag", b"reverse_tag", b"forward_primer", b"reverse_primer"]
|
column_names = [b"experiment", b"sample", b"forward_tag", b"reverse_tag", b"forward_primer", b"reverse_primer",b"additional_info"]
|
||||||
header = out_sep.join(column_names)
|
header = out_sep.join(column_names)
|
||||||
|
|
||||||
new_lines.append(header)
|
new_lines.append(header)
|
||||||
|
|
||||||
for line in all_lines:
|
for line in all_lines:
|
||||||
split_line = line.split()
|
split_line = line.split(maxsplit=5)
|
||||||
tags = split_line.pop(2)
|
tags = split_line.pop(2)
|
||||||
tags = tags.split(b":")
|
tags = tags.split(b":")
|
||||||
for t_idx in range(len(tags)):
|
for t_idx in range(len(tags)):
|
||||||
@ -64,7 +64,7 @@ def ngsfilterIterator(lineiterator,
|
|||||||
tags.append(tags[0])
|
tags.append(tags[0])
|
||||||
split_line.insert(2, tags[0])
|
split_line.insert(2, tags[0])
|
||||||
split_line.insert(3, tags[1])
|
split_line.insert(3, tags[1])
|
||||||
new_lines.append(out_sep.join(split_line[0:6]))
|
new_lines.append(out_sep.join(split_line[0:7]))
|
||||||
|
|
||||||
return tabIterator(iter(new_lines),
|
return tabIterator(iter(new_lines),
|
||||||
header = True,
|
header = True,
|
||||||
|
@ -173,7 +173,10 @@ def open_uri(uri,
|
|||||||
type newviewtype=View,
|
type newviewtype=View,
|
||||||
dms_only=False,
|
dms_only=False,
|
||||||
force_file=False):
|
force_file=False):
|
||||||
|
|
||||||
|
if type(uri) == str and not uri.isascii():
|
||||||
|
raise Exception("Paths must be ASCII characters only")
|
||||||
|
|
||||||
cdef bytes urib = tobytes(uri)
|
cdef bytes urib = tobytes(uri)
|
||||||
cdef bytes scheme
|
cdef bytes scheme
|
||||||
cdef tuple dms
|
cdef tuple dms
|
||||||
@ -277,7 +280,12 @@ def open_uri(uri,
|
|||||||
iseq = urib
|
iseq = urib
|
||||||
objclass = bytes
|
objclass = bytes
|
||||||
else: # TODO update uopen to be able to write?
|
else: # TODO update uopen to be able to write?
|
||||||
if not urip.path or urip.path == b'-':
|
if config['obi']['outputformat'] == b'metabaR':
|
||||||
|
if 'metabarprefix' not in config['obi']:
|
||||||
|
raise Exception("Prefix needed when exporting for metabaR (--metabaR-prefix option)")
|
||||||
|
else:
|
||||||
|
file = open(config['obi']['metabarprefix']+'.tab', 'wb')
|
||||||
|
elif not urip.path or urip.path == b'-':
|
||||||
file = sys.stdout.buffer
|
file = sys.stdout.buffer
|
||||||
else:
|
else:
|
||||||
file = open(urip.path, 'wb')
|
file = open(urip.path, 'wb')
|
||||||
@ -299,11 +307,11 @@ def open_uri(uri,
|
|||||||
format=config["obi"][formatkey]
|
format=config["obi"][formatkey]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
format=None
|
format=None
|
||||||
|
|
||||||
if b'seqtype' in qualifiers:
|
if b'seqtype' in qualifiers:
|
||||||
seqtype=qualifiers[b'seqtype'][0]
|
seqtype=qualifiers[b'seqtype'][0]
|
||||||
else:
|
else:
|
||||||
if format == b"ngsfilter" or format == b"tabular": # TODO discuss
|
if format == b"ngsfilter" or format == b"tabular" or format == b"metabaR": # TODO discuss
|
||||||
seqtype=None
|
seqtype=None
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
@ -437,7 +445,7 @@ def open_uri(uri,
|
|||||||
try:
|
try:
|
||||||
na_int_to_0=config["obi"]["na_int_to_0"]
|
na_int_to_0=config["obi"]["na_int_to_0"]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
if format==b"tabular":
|
if format==b"tabular" or format==b"metabaR":
|
||||||
na_int_to_0=True
|
na_int_to_0=True
|
||||||
else:
|
else:
|
||||||
na_int_to_0=False
|
na_int_to_0=False
|
||||||
@ -487,6 +495,13 @@ def open_uri(uri,
|
|||||||
except KeyError:
|
except KeyError:
|
||||||
only_keys=[]
|
only_keys=[]
|
||||||
|
|
||||||
|
if b"metabaR_prefix" in qualifiers:
|
||||||
|
metabaR_prefix = tobytes(qualifiers[b"metabaR_prefix"][0][0])
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
metabaR_prefix = tobytes(config["obi"]["metabarprefix"])
|
||||||
|
except KeyError:
|
||||||
|
metabaR_prefix=None
|
||||||
|
|
||||||
if format is not None:
|
if format is not None:
|
||||||
if seqtype==b"nuc":
|
if seqtype==b"nuc":
|
||||||
@ -552,6 +567,16 @@ def open_uri(uri,
|
|||||||
skip=skip,
|
skip=skip,
|
||||||
only=only,
|
only=only,
|
||||||
header=header)
|
header=header)
|
||||||
|
elif format==b"metabaR":
|
||||||
|
objclass = dict
|
||||||
|
if input:
|
||||||
|
raise NotImplementedError('Input data file format not implemented')
|
||||||
|
else:
|
||||||
|
iseq = TabWriter(TabFormat(tags=only_keys, header=header, NAString=nastring, sep=sep, NAIntTo0=na_int_to_0, metabaR=True),
|
||||||
|
file,
|
||||||
|
skip=skip,
|
||||||
|
only=only,
|
||||||
|
header=header)
|
||||||
elif format==b"ngsfilter":
|
elif format==b"ngsfilter":
|
||||||
objclass = dict
|
objclass = dict
|
||||||
if input:
|
if input:
|
||||||
@ -565,7 +590,7 @@ def open_uri(uri,
|
|||||||
skip = skip,
|
skip = skip,
|
||||||
only = only)
|
only = only)
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError('Output sequence file format not implemented')
|
raise NotImplementedError('Output data file format not implemented')
|
||||||
else:
|
else:
|
||||||
if input:
|
if input:
|
||||||
iseq, objclass, format = entryIteratorFactory(file,
|
iseq, objclass, format = entryIteratorFactory(file,
|
||||||
|
@ -264,7 +264,7 @@ cdef obitype_t update_obitype(obitype_t obitype, object new_value) :
|
|||||||
if new_value == None or new_type==list or new_type==dict or new_type==tuple:
|
if new_value == None or new_type==list or new_type==dict or new_type==tuple:
|
||||||
return obitype
|
return obitype
|
||||||
|
|
||||||
# TODO BOOL vers INT/FLOAT
|
# TODO BOOL to INT/FLOAT
|
||||||
if new_type == str or new_type == bytes :
|
if new_type == str or new_type == bytes :
|
||||||
if obitype == OBI_SEQ and is_a_DNA_seq(tobytes(new_value)) :
|
if obitype == OBI_SEQ and is_a_DNA_seq(tobytes(new_value)) :
|
||||||
pass
|
pass
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
major = 3
|
major = 3
|
||||||
minor = 0
|
minor = 0
|
||||||
serial= '1b12'
|
serial= '1b15'
|
||||||
|
|
||||||
version ="%d.%d.%s" % (major,minor,serial)
|
version ="%d.%d.%s" % (major,minor,serial)
|
||||||
|
@ -77,6 +77,7 @@ static inline ecotx_t* get_lca_from_merged_taxids(Obiview_p view, OBIDMS_column_
|
|||||||
{
|
{
|
||||||
ecotx_t* taxon = NULL;
|
ecotx_t* taxon = NULL;
|
||||||
ecotx_t* lca = NULL;
|
ecotx_t* lca = NULL;
|
||||||
|
ecotx_t* lca1 = NULL;
|
||||||
int32_t taxid;
|
int32_t taxid;
|
||||||
index_t taxid_idx;
|
index_t taxid_idx;
|
||||||
int64_t taxid_str_idx;
|
int64_t taxid_str_idx;
|
||||||
@ -108,10 +109,11 @@ static inline ecotx_t* get_lca_from_merged_taxids(Obiview_p view, OBIDMS_column_
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
// Compute LCA
|
// Compute LCA
|
||||||
|
lca1 = lca;
|
||||||
lca = obi_taxo_get_lca(taxon, lca);
|
lca = obi_taxo_get_lca(taxon, lca);
|
||||||
if (lca == NULL)
|
if (lca == NULL)
|
||||||
{
|
{
|
||||||
obidebug(1, "\nError getting the last common ancestor of two taxa when building a reference database");
|
obidebug(1, "\nError getting the last common ancestor of two taxa when building a reference database, %d %d", taxid, lca1->taxid);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -185,7 +187,7 @@ int build_reference_db(const char* dms_name,
|
|||||||
matrix_view_name = strcpy(matrix_view_name, o_view_name);
|
matrix_view_name = strcpy(matrix_view_name, o_view_name);
|
||||||
strcat(matrix_view_name, "_matrix");
|
strcat(matrix_view_name, "_matrix");
|
||||||
|
|
||||||
fprintf(stderr, "Aligning queries with reference database...\n");
|
fprintf(stderr, "Aligning sequences...\n");
|
||||||
if (obi_lcs_align_one_column(dms_name,
|
if (obi_lcs_align_one_column(dms_name,
|
||||||
refs_view_name,
|
refs_view_name,
|
||||||
"",
|
"",
|
||||||
|
Reference in New Issue
Block a user