obi import: skip on error more robust
This commit is contained in:
@ -270,124 +270,133 @@ def run(config):
|
|||||||
elif not i%50000:
|
elif not i%50000:
|
||||||
logger("info", "Imported %d entries", i)
|
logger("info", "Imported %d entries", i)
|
||||||
|
|
||||||
if NUC_SEQS_view:
|
try:
|
||||||
id_col[i] = entry.id
|
|
||||||
def_col[i] = entry.definition
|
if NUC_SEQS_view:
|
||||||
seq_col[i] = entry.seq
|
id_col[i] = entry.id
|
||||||
# Check if there is a sequencing quality associated by checking the first entry # TODO haven't found a more robust solution yet
|
def_col[i] = entry.definition
|
||||||
if i == 0:
|
seq_col[i] = entry.seq
|
||||||
get_quality = QUALITY_COLUMN in entry
|
# Check if there is a sequencing quality associated by checking the first entry # TODO haven't found a more robust solution yet
|
||||||
|
if i == 0:
|
||||||
|
get_quality = QUALITY_COLUMN in entry
|
||||||
|
if get_quality:
|
||||||
|
Column.new_column(view, QUALITY_COLUMN, OBI_QUAL)
|
||||||
|
qual_col = view[QUALITY_COLUMN]
|
||||||
if get_quality:
|
if get_quality:
|
||||||
Column.new_column(view, QUALITY_COLUMN, OBI_QUAL)
|
qual_col[i] = entry.quality
|
||||||
qual_col = view[QUALITY_COLUMN]
|
|
||||||
if get_quality:
|
|
||||||
qual_col[i] = entry.quality
|
|
||||||
|
|
||||||
for tag in entry :
|
for tag in entry :
|
||||||
|
|
||||||
if tag != ID_COLUMN and tag != DEFINITION_COLUMN and tag != NUC_SEQUENCE_COLUMN and tag != QUALITY_COLUMN : # TODO dirty
|
if tag != ID_COLUMN and tag != DEFINITION_COLUMN and tag != NUC_SEQUENCE_COLUMN and tag != QUALITY_COLUMN : # TODO dirty
|
||||||
|
|
||||||
value = entry[tag]
|
value = entry[tag]
|
||||||
if tag == b"taxid":
|
if tag == b"taxid":
|
||||||
tag = TAXID_COLUMN
|
tag = TAXID_COLUMN
|
||||||
if tag == b"count":
|
if tag == b"count":
|
||||||
tag = COUNT_COLUMN
|
tag = COUNT_COLUMN
|
||||||
if tag[:7] == b"merged_":
|
if tag[:7] == b"merged_":
|
||||||
tag = MERGED_PREFIX+tag[7:]
|
tag = MERGED_PREFIX+tag[7:]
|
||||||
|
|
||||||
if tag not in dcols :
|
if tag not in dcols :
|
||||||
|
|
||||||
value_type = type(value)
|
|
||||||
nb_elts = 1
|
|
||||||
value_obitype = OBI_VOID
|
|
||||||
|
|
||||||
if value_type == dict or value_type == list :
|
|
||||||
nb_elts = len(value)
|
|
||||||
elt_names = list(value)
|
|
||||||
else :
|
|
||||||
nb_elts = 1
|
|
||||||
elt_names = None
|
|
||||||
|
|
||||||
value_obitype = get_obitype(value)
|
|
||||||
|
|
||||||
if value_obitype != OBI_VOID :
|
|
||||||
dcols[tag] = (Column.new_column(view, tag, value_obitype, nb_elements_per_line=nb_elts, elements_names=elt_names), value_obitype)
|
|
||||||
|
|
||||||
# Fill value
|
|
||||||
if value_type == dict and nb_elts == 1: # special case that makes the OBI3 create a 1 elt/line column which won't read a dict value
|
|
||||||
value = value[list(value.keys())[0]] # The solution is to transform the value in a simple atomic one acceptable by the column
|
|
||||||
dcols[tag][0][i] = value
|
|
||||||
|
|
||||||
# TODO else log error?
|
|
||||||
|
|
||||||
else :
|
|
||||||
|
|
||||||
rewrite = False
|
|
||||||
|
|
||||||
# Check type adequation
|
|
||||||
old_type = dcols[tag][1]
|
|
||||||
new_type = OBI_VOID
|
|
||||||
new_type = update_obitype(old_type, value)
|
|
||||||
if old_type != new_type :
|
|
||||||
rewrite = True
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Check that it's not the case where the first entry contained a dict of length 1 and now there is a new key
|
|
||||||
if type(value) == dict and \
|
|
||||||
dcols[tag][0].nb_elements_per_line == 1 \
|
|
||||||
and set(dcols[tag][0].elements_names) != set(value.keys()) :
|
|
||||||
raise IndexError # trigger column rewrite
|
|
||||||
|
|
||||||
# Fill value
|
|
||||||
dcols[tag][0][i] = value
|
|
||||||
|
|
||||||
except IndexError :
|
|
||||||
|
|
||||||
value_type = type(value)
|
value_type = type(value)
|
||||||
old_column = dcols[tag][0]
|
nb_elts = 1
|
||||||
old_nb_elements_per_line = old_column.nb_elements_per_line
|
value_obitype = OBI_VOID
|
||||||
new_nb_elements_per_line = 0
|
|
||||||
old_elements_names = old_column.elements_names
|
|
||||||
new_elements_names = None
|
|
||||||
|
|
||||||
#####################################################################
|
if value_type == dict or value_type == list :
|
||||||
|
nb_elts = len(value)
|
||||||
|
elt_names = list(value)
|
||||||
|
else :
|
||||||
|
nb_elts = 1
|
||||||
|
elt_names = None
|
||||||
|
|
||||||
# Check the length and keys of column lines if needed
|
value_obitype = get_obitype(value)
|
||||||
if value_type == dict : # Check dictionary keys
|
|
||||||
for k in value :
|
|
||||||
if k not in old_elements_names :
|
|
||||||
new_elements_names = list(set(old_elements_names+[tobytes(k) for k in value]))
|
|
||||||
rewrite = True
|
|
||||||
break
|
|
||||||
|
|
||||||
elif value_type == list or value_type == tuple : # Check vector length
|
if value_obitype != OBI_VOID :
|
||||||
if old_nb_elements_per_line < len(value) :
|
dcols[tag] = (Column.new_column(view, tag, value_obitype, nb_elements_per_line=nb_elts, elements_names=elt_names), value_obitype)
|
||||||
new_nb_elements_per_line = len(value)
|
|
||||||
rewrite = True
|
|
||||||
|
|
||||||
#####################################################################
|
# Fill value
|
||||||
|
if value_type == dict and nb_elts == 1: # special case that makes the OBI3 create a 1 elt/line column which won't read a dict value
|
||||||
|
value = value[list(value.keys())[0]] # The solution is to transform the value in a simple atomic one acceptable by the column
|
||||||
|
dcols[tag][0][i] = value
|
||||||
|
|
||||||
if rewrite :
|
# TODO else log error?
|
||||||
if new_nb_elements_per_line == 0 and new_elements_names is not None :
|
|
||||||
new_nb_elements_per_line = len(new_elements_names)
|
|
||||||
|
|
||||||
# Reset obierrno
|
else :
|
||||||
obi_errno = 0
|
|
||||||
|
|
||||||
dcols[tag] = (view.rewrite_column_with_diff_attributes(old_column.name,
|
rewrite = False
|
||||||
new_data_type=new_type,
|
|
||||||
new_nb_elements_per_line=new_nb_elements_per_line,
|
|
||||||
new_elements_names=new_elements_names,
|
|
||||||
rewrite_last_line=False),
|
|
||||||
new_type)
|
|
||||||
|
|
||||||
# Update the dictionary:
|
# Check type adequation
|
||||||
for t in dcols :
|
old_type = dcols[tag][1]
|
||||||
dcols[t] = (view[t], dcols[t][1])
|
new_type = OBI_VOID
|
||||||
|
new_type = update_obitype(old_type, value)
|
||||||
|
if old_type != new_type :
|
||||||
|
rewrite = True
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Check that it's not the case where the first entry contained a dict of length 1 and now there is a new key
|
||||||
|
if type(value) == dict and \
|
||||||
|
dcols[tag][0].nb_elements_per_line == 1 \
|
||||||
|
and set(dcols[tag][0].elements_names) != set(value.keys()) :
|
||||||
|
raise IndexError # trigger column rewrite
|
||||||
|
|
||||||
# Fill value
|
# Fill value
|
||||||
dcols[tag][0][i] = value
|
dcols[tag][0][i] = value
|
||||||
|
|
||||||
|
except IndexError :
|
||||||
|
|
||||||
|
value_type = type(value)
|
||||||
|
old_column = dcols[tag][0]
|
||||||
|
old_nb_elements_per_line = old_column.nb_elements_per_line
|
||||||
|
new_nb_elements_per_line = 0
|
||||||
|
old_elements_names = old_column.elements_names
|
||||||
|
new_elements_names = None
|
||||||
|
|
||||||
|
#####################################################################
|
||||||
|
|
||||||
|
# Check the length and keys of column lines if needed
|
||||||
|
if value_type == dict : # Check dictionary keys
|
||||||
|
for k in value :
|
||||||
|
if k not in old_elements_names :
|
||||||
|
new_elements_names = list(set(old_elements_names+[tobytes(k) for k in value]))
|
||||||
|
rewrite = True
|
||||||
|
break
|
||||||
|
|
||||||
|
elif value_type == list or value_type == tuple : # Check vector length
|
||||||
|
if old_nb_elements_per_line < len(value) :
|
||||||
|
new_nb_elements_per_line = len(value)
|
||||||
|
rewrite = True
|
||||||
|
|
||||||
|
#####################################################################
|
||||||
|
|
||||||
|
if rewrite :
|
||||||
|
if new_nb_elements_per_line == 0 and new_elements_names is not None :
|
||||||
|
new_nb_elements_per_line = len(new_elements_names)
|
||||||
|
|
||||||
|
# Reset obierrno
|
||||||
|
obi_errno = 0
|
||||||
|
|
||||||
|
dcols[tag] = (view.rewrite_column_with_diff_attributes(old_column.name,
|
||||||
|
new_data_type=new_type,
|
||||||
|
new_nb_elements_per_line=new_nb_elements_per_line,
|
||||||
|
new_elements_names=new_elements_names,
|
||||||
|
rewrite_last_line=False),
|
||||||
|
new_type)
|
||||||
|
|
||||||
|
# Update the dictionary:
|
||||||
|
for t in dcols :
|
||||||
|
dcols[t] = (view[t], dcols[t][1])
|
||||||
|
|
||||||
|
# Fill value
|
||||||
|
dcols[tag][0][i] = value
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print("\nCould not import sequence id:", entry.id, "(error raised:", e, ")")
|
||||||
|
if 'skiperror' in config['obi'] and not config['obi']['skiperror']:
|
||||||
|
raise e
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
|
||||||
i+=1
|
i+=1
|
||||||
|
|
||||||
if pb is not None:
|
if pb is not None:
|
||||||
|
Reference in New Issue
Block a user