obi uniq: fixed bug where dictionary indexes were not read properly, and
added view rollback in case of an exception.
This commit is contained in:
@ -3,6 +3,7 @@
|
||||
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.dms import DMS
|
||||
from obitools3.dms.view.view cimport View, Line
|
||||
from obitools3.dms.view import RollbackException
|
||||
from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
|
||||
from obitools3.dms.column.column cimport Column, Column_line
|
||||
from obitools3.dms.capi.obiview cimport QUALITY_COLUMN, COUNT_COLUMN, NUC_SEQUENCE_COLUMN, ID_COLUMN
|
||||
@ -44,16 +45,6 @@ def addOptions(parser):
|
||||
"used to group sequences before dereplication "
|
||||
"(option can be used several times).")
|
||||
|
||||
# TODO discuss
|
||||
# group.add_argument('--prefix', '-p',
|
||||
# action="store_true", dest="uniq:prefix",
|
||||
# default=False,
|
||||
# help="Dereplication is done based on prefix matching: "
|
||||
# "(i) The shortest sequence of each group is a prefix "
|
||||
# "of any sequence of its group (ii) Two shortest "
|
||||
# "sequences of any couple of groups are not the"
|
||||
# "prefix of the other one.")
|
||||
|
||||
|
||||
cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
|
||||
|
||||
@ -300,7 +291,9 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
|
||||
mkey = "merged_%s" % key
|
||||
#cas ou on met a jour les merged_keys mais il n'y a pas de merged_keys dans la sequence qui arrive
|
||||
if key in i_seq:
|
||||
to_merge = str(i_seq[key])
|
||||
to_merge = i_seq[key]
|
||||
if type(to_merge) != bytes :
|
||||
to_merge = str(to_merge)
|
||||
mcol = o_seq[mkey]
|
||||
if mcol[to_merge] is None:
|
||||
mcol[to_merge] = i_count
|
||||
@ -325,7 +318,6 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
|
||||
|
||||
if mergeIds :
|
||||
merged_ids_dict[o_seq.id].append(i_seq.id)
|
||||
#o_seq['merged'].append(i_seq.id)
|
||||
|
||||
else:
|
||||
o_view[o_idx] = i_seq
|
||||
@ -344,7 +336,9 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
|
||||
taxid_dist_dict[i_seq.id][o_seq.id] = o_seq['taxid']
|
||||
mkey = "merged_%s" % key
|
||||
if key in o_seq:
|
||||
to_merge = str(o_seq[key])
|
||||
to_merge = o_seq[key]
|
||||
if type(to_merge) != bytes :
|
||||
to_merge = str(to_merge)
|
||||
mcol = o_seq[mkey]
|
||||
if to_merge in mcol and mcol[to_merge] is not None:
|
||||
mcol[to_merge] = mcol[to_merge] + o_seq[COUNT_COLUMN]
|
||||
@ -354,7 +348,6 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
|
||||
|
||||
if mergeIds:
|
||||
merged_ids_dict[o_seq.id] = [o_seq.id]
|
||||
#o_seq['merged']=[o_seq.id]
|
||||
|
||||
i+=1
|
||||
|
||||
@ -437,7 +430,10 @@ def run(config):
|
||||
# Initialize the progress bar
|
||||
pb = ProgressBar(len(entries), config, seconde=5)
|
||||
|
||||
try:
|
||||
uniq_sequences(entries, o_view, pb, mergedKeys_list=config['uniq']['merge'], taxonomy=taxo, mergeIds=config['uniq']['mergeids'], categories=config['uniq']['categories'])
|
||||
except:
|
||||
raise RollbackException("obi uniq error, rollbacking view", o_view)
|
||||
|
||||
print("\n")
|
||||
print(repr(o_view))
|
||||
|
Reference in New Issue
Block a user