From b91b3176b08cac53f1a86b27c6ba62aa0b253817 Mon Sep 17 00:00:00 2001 From: Celine Mercier Date: Thu, 17 May 2018 14:58:15 +0200 Subject: [PATCH] obi uniq: fixed a bug where merged values were wrongly reinitialized --- python/obitools3/commands/uniq.pyx | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/python/obitools3/commands/uniq.pyx b/python/obitools3/commands/uniq.pyx index 28c7a61..232628a 100644 --- a/python/obitools3/commands/uniq.pyx +++ b/python/obitools3/commands/uniq.pyx @@ -15,7 +15,7 @@ from obitools3.apps.config import logger from obitools3.utils cimport tobytes -__title__="Groups sequence records together" +__title__="Group sequence records together" @@ -278,7 +278,7 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li for k in range(k_count): key = mergedKeys[k] mkey = mergedKeys_m[k] - if key in i_seq: # TODO what if mkey already in i_seq? + if key in i_seq: # TODO what if mkey already in i_seq? --> should update if mkey not in merged_infos: merged_infos[mkey] = {} mkey_infos = merged_infos[mkey] @@ -388,8 +388,12 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li else: taxid_dist_dict = {} + merged_dict = {} + for mkey in mergedKeys_m: + merged_dict[mkey] = {} + for i_idx in merged_sequences: - + i_id = i_id_col[i_idx] i_seq = view[i_idx] @@ -399,13 +403,9 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li i_count = i_seq[COUNT_COLUMN] o_seq[COUNT_COLUMN] += i_count - - merged_dict = {} - for mkey in mergedKeys_m: - merged_dict[mkey] = {} - + for k in range(k_count): - + key = mergedKeys[k] mkey = mergedKeys_m[k] @@ -419,10 +419,10 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li if key in i_seq: to_merge = i_seq[key] if to_merge is not None: - if type(to_merge) != bytes : - to_merge = tobytes(str(to_merge)) + if type(to_merge) != bytes: + to_merge = tobytes(str(to_merge)) mcol = merged_dict[mkey] - if to_merge not in mcol or mcol[to_merge] is None: + if to_merge not in mcol or mcol[to_merge] is None: mcol[to_merge] = i_count else: mcol[to_merge] = mcol[to_merge] + i_count @@ -452,6 +452,10 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li mkey_cols[mkey][o_idx] = str(merged_dict[mkey]) else: mkey_cols[mkey][o_idx] = merged_dict[mkey] + # Sets NA values to 0 # TODO discuss, maybe keep as None and test for None instead of testing for 0 in tools + #for key in mkey_cols[mkey][o_idx]: + # if mkey_cols[mkey][o_idx][key] is None: + # mkey_cols[mkey][o_idx][key] = 0 for key in i_seq.keys(): # Delete informations that differ between the merged sequences