obi uniq: fixed a bug where merged values were wrongly reinitialized
This commit is contained in:
@ -15,7 +15,7 @@ from obitools3.apps.config import logger
|
|||||||
from obitools3.utils cimport tobytes
|
from obitools3.utils cimport tobytes
|
||||||
|
|
||||||
|
|
||||||
__title__="Groups sequence records together"
|
__title__="Group sequence records together"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -278,7 +278,7 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
|
|||||||
for k in range(k_count):
|
for k in range(k_count):
|
||||||
key = mergedKeys[k]
|
key = mergedKeys[k]
|
||||||
mkey = mergedKeys_m[k]
|
mkey = mergedKeys_m[k]
|
||||||
if key in i_seq: # TODO what if mkey already in i_seq?
|
if key in i_seq: # TODO what if mkey already in i_seq? --> should update
|
||||||
if mkey not in merged_infos:
|
if mkey not in merged_infos:
|
||||||
merged_infos[mkey] = {}
|
merged_infos[mkey] = {}
|
||||||
mkey_infos = merged_infos[mkey]
|
mkey_infos = merged_infos[mkey]
|
||||||
@ -388,8 +388,12 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
|
|||||||
else:
|
else:
|
||||||
taxid_dist_dict = {}
|
taxid_dist_dict = {}
|
||||||
|
|
||||||
|
merged_dict = {}
|
||||||
|
for mkey in mergedKeys_m:
|
||||||
|
merged_dict[mkey] = {}
|
||||||
|
|
||||||
for i_idx in merged_sequences:
|
for i_idx in merged_sequences:
|
||||||
|
|
||||||
i_id = i_id_col[i_idx]
|
i_id = i_id_col[i_idx]
|
||||||
i_seq = view[i_idx]
|
i_seq = view[i_idx]
|
||||||
|
|
||||||
@ -399,13 +403,9 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
|
|||||||
i_count = i_seq[COUNT_COLUMN]
|
i_count = i_seq[COUNT_COLUMN]
|
||||||
|
|
||||||
o_seq[COUNT_COLUMN] += i_count
|
o_seq[COUNT_COLUMN] += i_count
|
||||||
|
|
||||||
merged_dict = {}
|
|
||||||
for mkey in mergedKeys_m:
|
|
||||||
merged_dict[mkey] = {}
|
|
||||||
|
|
||||||
for k in range(k_count):
|
for k in range(k_count):
|
||||||
|
|
||||||
key = mergedKeys[k]
|
key = mergedKeys[k]
|
||||||
mkey = mergedKeys_m[k]
|
mkey = mergedKeys_m[k]
|
||||||
|
|
||||||
@ -419,10 +419,10 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
|
|||||||
if key in i_seq:
|
if key in i_seq:
|
||||||
to_merge = i_seq[key]
|
to_merge = i_seq[key]
|
||||||
if to_merge is not None:
|
if to_merge is not None:
|
||||||
if type(to_merge) != bytes :
|
if type(to_merge) != bytes:
|
||||||
to_merge = tobytes(str(to_merge))
|
to_merge = tobytes(str(to_merge))
|
||||||
mcol = merged_dict[mkey]
|
mcol = merged_dict[mkey]
|
||||||
if to_merge not in mcol or mcol[to_merge] is None:
|
if to_merge not in mcol or mcol[to_merge] is None:
|
||||||
mcol[to_merge] = i_count
|
mcol[to_merge] = i_count
|
||||||
else:
|
else:
|
||||||
mcol[to_merge] = mcol[to_merge] + i_count
|
mcol[to_merge] = mcol[to_merge] + i_count
|
||||||
@ -452,6 +452,10 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
|
|||||||
mkey_cols[mkey][o_idx] = str(merged_dict[mkey])
|
mkey_cols[mkey][o_idx] = str(merged_dict[mkey])
|
||||||
else:
|
else:
|
||||||
mkey_cols[mkey][o_idx] = merged_dict[mkey]
|
mkey_cols[mkey][o_idx] = merged_dict[mkey]
|
||||||
|
# Sets NA values to 0 # TODO discuss, maybe keep as None and test for None instead of testing for 0 in tools
|
||||||
|
#for key in mkey_cols[mkey][o_idx]:
|
||||||
|
# if mkey_cols[mkey][o_idx][key] is None:
|
||||||
|
# mkey_cols[mkey][o_idx][key] = 0
|
||||||
|
|
||||||
for key in i_seq.keys():
|
for key in i_seq.keys():
|
||||||
# Delete informations that differ between the merged sequences
|
# Delete informations that differ between the merged sequences
|
||||||
|
Reference in New Issue
Block a user