obi uniq: fixed bug where dictionary indexes were not read properly, and
added view rollback in case of an exception.
This commit is contained in:
@ -3,6 +3,7 @@
|
|||||||
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||||
from obitools3.dms import DMS
|
from obitools3.dms import DMS
|
||||||
from obitools3.dms.view.view cimport View, Line
|
from obitools3.dms.view.view cimport View, Line
|
||||||
|
from obitools3.dms.view import RollbackException
|
||||||
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
||||||
from obitools3.dms.column.column cimport Column, Column_line
|
from obitools3.dms.column.column cimport Column, Column_line
|
||||||
from obitools3.dms.capi.obiview cimport QUALITY_COLUMN, COUNT_COLUMN, NUC_SEQUENCE_COLUMN, ID_COLUMN
|
from obitools3.dms.capi.obiview cimport QUALITY_COLUMN, COUNT_COLUMN, NUC_SEQUENCE_COLUMN, ID_COLUMN
|
||||||
@ -44,16 +45,6 @@ def addOptions(parser):
|
|||||||
"used to group sequences before dereplication "
|
"used to group sequences before dereplication "
|
||||||
"(option can be used several times).")
|
"(option can be used several times).")
|
||||||
|
|
||||||
# TODO discuss
|
|
||||||
# group.add_argument('--prefix', '-p',
|
|
||||||
# action="store_true", dest="uniq:prefix",
|
|
||||||
# default=False,
|
|
||||||
# help="Dereplication is done based on prefix matching: "
|
|
||||||
# "(i) The shortest sequence of each group is a prefix "
|
|
||||||
# "of any sequence of its group (ii) Two shortest "
|
|
||||||
# "sequences of any couple of groups are not the"
|
|
||||||
# "prefix of the other one.")
|
|
||||||
|
|
||||||
|
|
||||||
cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
|
cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
|
||||||
|
|
||||||
@ -300,7 +291,9 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
|
|||||||
mkey = "merged_%s" % key
|
mkey = "merged_%s" % key
|
||||||
#cas ou on met a jour les merged_keys mais il n'y a pas de merged_keys dans la sequence qui arrive
|
#cas ou on met a jour les merged_keys mais il n'y a pas de merged_keys dans la sequence qui arrive
|
||||||
if key in i_seq:
|
if key in i_seq:
|
||||||
to_merge = str(i_seq[key])
|
to_merge = i_seq[key]
|
||||||
|
if type(to_merge) != bytes :
|
||||||
|
to_merge = str(to_merge)
|
||||||
mcol = o_seq[mkey]
|
mcol = o_seq[mkey]
|
||||||
if mcol[to_merge] is None:
|
if mcol[to_merge] is None:
|
||||||
mcol[to_merge] = i_count
|
mcol[to_merge] = i_count
|
||||||
@ -325,7 +318,6 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
|
|||||||
|
|
||||||
if mergeIds :
|
if mergeIds :
|
||||||
merged_ids_dict[o_seq.id].append(i_seq.id)
|
merged_ids_dict[o_seq.id].append(i_seq.id)
|
||||||
#o_seq['merged'].append(i_seq.id)
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
o_view[o_idx] = i_seq
|
o_view[o_idx] = i_seq
|
||||||
@ -344,7 +336,9 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
|
|||||||
taxid_dist_dict[i_seq.id][o_seq.id] = o_seq['taxid']
|
taxid_dist_dict[i_seq.id][o_seq.id] = o_seq['taxid']
|
||||||
mkey = "merged_%s" % key
|
mkey = "merged_%s" % key
|
||||||
if key in o_seq:
|
if key in o_seq:
|
||||||
to_merge = str(o_seq[key])
|
to_merge = o_seq[key]
|
||||||
|
if type(to_merge) != bytes :
|
||||||
|
to_merge = str(to_merge)
|
||||||
mcol = o_seq[mkey]
|
mcol = o_seq[mkey]
|
||||||
if to_merge in mcol and mcol[to_merge] is not None:
|
if to_merge in mcol and mcol[to_merge] is not None:
|
||||||
mcol[to_merge] = mcol[to_merge] + o_seq[COUNT_COLUMN]
|
mcol[to_merge] = mcol[to_merge] + o_seq[COUNT_COLUMN]
|
||||||
@ -354,7 +348,6 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
|
|||||||
|
|
||||||
if mergeIds:
|
if mergeIds:
|
||||||
merged_ids_dict[o_seq.id] = [o_seq.id]
|
merged_ids_dict[o_seq.id] = [o_seq.id]
|
||||||
#o_seq['merged']=[o_seq.id]
|
|
||||||
|
|
||||||
i+=1
|
i+=1
|
||||||
|
|
||||||
@ -437,7 +430,10 @@ def run(config):
|
|||||||
# Initialize the progress bar
|
# Initialize the progress bar
|
||||||
pb = ProgressBar(len(entries), config, seconde=5)
|
pb = ProgressBar(len(entries), config, seconde=5)
|
||||||
|
|
||||||
|
try:
|
||||||
uniq_sequences(entries, o_view, pb, mergedKeys_list=config['uniq']['merge'], taxonomy=taxo, mergeIds=config['uniq']['mergeids'], categories=config['uniq']['categories'])
|
uniq_sequences(entries, o_view, pb, mergedKeys_list=config['uniq']['merge'], taxonomy=taxo, mergeIds=config['uniq']['mergeids'], categories=config['uniq']['categories'])
|
||||||
|
except:
|
||||||
|
raise RollbackException("obi uniq error, rollbacking view", o_view)
|
||||||
|
|
||||||
print("\n")
|
print("\n")
|
||||||
print(repr(o_view))
|
print(repr(o_view))
|
||||||
|
Reference in New Issue
Block a user