obi uniq: fixed bug where dictionary indexes were not read properly, and

added view rollback in case of an exception.
This commit is contained in:
Celine Mercier
2017-10-26 19:00:05 +02:00
parent dfd51939a0
commit 43f65e7fd0

View File

@ -3,6 +3,7 @@
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
from obitools3.dms import DMS from obitools3.dms import DMS
from obitools3.dms.view.view cimport View, Line from obitools3.dms.view.view cimport View, Line
from obitools3.dms.view import RollbackException
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
from obitools3.dms.column.column cimport Column, Column_line from obitools3.dms.column.column cimport Column, Column_line
from obitools3.dms.capi.obiview cimport QUALITY_COLUMN, COUNT_COLUMN, NUC_SEQUENCE_COLUMN, ID_COLUMN from obitools3.dms.capi.obiview cimport QUALITY_COLUMN, COUNT_COLUMN, NUC_SEQUENCE_COLUMN, ID_COLUMN
@ -44,16 +45,6 @@ def addOptions(parser):
"used to group sequences before dereplication " "used to group sequences before dereplication "
"(option can be used several times).") "(option can be used several times).")
# TODO discuss
# group.add_argument('--prefix', '-p',
# action="store_true", dest="uniq:prefix",
# default=False,
# help="Dereplication is done based on prefix matching: "
# "(i) The shortest sequence of each group is a prefix "
# "of any sequence of its group (ii) Two shortest "
# "sequences of any couple of groups are not the"
# "prefix of the other one.")
cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) : cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
@ -300,7 +291,9 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
mkey = "merged_%s" % key mkey = "merged_%s" % key
#cas ou on met a jour les merged_keys mais il n'y a pas de merged_keys dans la sequence qui arrive #cas ou on met a jour les merged_keys mais il n'y a pas de merged_keys dans la sequence qui arrive
if key in i_seq: if key in i_seq:
to_merge = str(i_seq[key]) to_merge = i_seq[key]
if type(to_merge) != bytes :
to_merge = str(to_merge)
mcol = o_seq[mkey] mcol = o_seq[mkey]
if mcol[to_merge] is None: if mcol[to_merge] is None:
mcol[to_merge] = i_count mcol[to_merge] = i_count
@ -325,7 +318,6 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
if mergeIds : if mergeIds :
merged_ids_dict[o_seq.id].append(i_seq.id) merged_ids_dict[o_seq.id].append(i_seq.id)
#o_seq['merged'].append(i_seq.id)
else: else:
o_view[o_idx] = i_seq o_view[o_idx] = i_seq
@ -344,7 +336,9 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
taxid_dist_dict[i_seq.id][o_seq.id] = o_seq['taxid'] taxid_dist_dict[i_seq.id][o_seq.id] = o_seq['taxid']
mkey = "merged_%s" % key mkey = "merged_%s" % key
if key in o_seq: if key in o_seq:
to_merge = str(o_seq[key]) to_merge = o_seq[key]
if type(to_merge) != bytes :
to_merge = str(to_merge)
mcol = o_seq[mkey] mcol = o_seq[mkey]
if to_merge in mcol and mcol[to_merge] is not None: if to_merge in mcol and mcol[to_merge] is not None:
mcol[to_merge] = mcol[to_merge] + o_seq[COUNT_COLUMN] mcol[to_merge] = mcol[to_merge] + o_seq[COUNT_COLUMN]
@ -354,7 +348,6 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
if mergeIds: if mergeIds:
merged_ids_dict[o_seq.id] = [o_seq.id] merged_ids_dict[o_seq.id] = [o_seq.id]
#o_seq['merged']=[o_seq.id]
i+=1 i+=1
@ -436,9 +429,12 @@ def run(config):
# Initialize the progress bar # Initialize the progress bar
pb = ProgressBar(len(entries), config, seconde=5) pb = ProgressBar(len(entries), config, seconde=5)
try:
uniq_sequences(entries, o_view, pb, mergedKeys_list=config['uniq']['merge'], taxonomy=taxo, mergeIds=config['uniq']['mergeids'], categories=config['uniq']['categories'])
except:
raise RollbackException("obi uniq error, rollbacking view", o_view)
uniq_sequences(entries, o_view, pb, mergedKeys_list=config['uniq']['merge'], taxonomy=taxo, mergeIds=config['uniq']['mergeids'], categories=config['uniq']['categories'])
print("\n") print("\n")
print(repr(o_view)) print(repr(o_view))