diff --git a/python/obitools3/commands/uniq.pxd b/python/obitools3/commands/uniq.pxd index 3452e17..7402339 100644 --- a/python/obitools3/commands/uniq.pxd +++ b/python/obitools3/commands/uniq.pxd @@ -5,4 +5,4 @@ from obitools3.dms.taxo.taxo cimport Taxonomy from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS -cdef uniqSequence(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, Taxonomy taxonomy=*, list mergedKeys_list=*, bint mergeIds=*, list categories=*) +cdef uniqSequence(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, list mergedKeys_list=*, Taxonomy taxonomy=*, bint mergeIds=*, list categories=*) diff --git a/python/obitools3/commands/uniq.pyx b/python/obitools3/commands/uniq.pyx index 37ff7a5..b7db87a 100644 --- a/python/obitools3/commands/uniq.pyx +++ b/python/obitools3/commands/uniq.pyx @@ -2,7 +2,6 @@ from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport from obitools3.dms.dms cimport DMS -from obitools3.dms.taxo.taxo cimport Taxonomy from obitools3.dms.view.view cimport View, Line from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS from obitools3.dms.column.column cimport Column, Column_line @@ -59,12 +58,12 @@ def addOptions(parser): # TODO taxonomy -cdef uniqSequence(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, Taxonomy taxonomy=None, list mergedKeys_list=None, bint mergeIds=False, list categories=None) : +cdef uniqSequence(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, list mergedKeys_list=None, Taxonomy taxonomy=None, bint mergeIds=False, list categories=None) : cdef int i cdef int o_idx cdef int u_idx - cdef int u_id + cdef tuple u_id cdef int i_count cdef set mergedKeys cdef dict uniques @@ -80,23 +79,26 @@ cdef uniqSequence(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, Taxo cdef Column seq_col cdef object to_merge cdef Column_line mcol - cdef Column_line i_mcol - + cdef Column_line i_mcol + cdef list catl + + #print(categories) + uniques = {} - - if categories is None: - categories=[] if mergedKeys_list is not None: mergedKeys=set(mergedKeys_list) else: mergedKeys=set() -# if taxonomy is not None: -# mergedKeys.add('taxid') + if taxonomy is not None: + mergedKeys.add('taxid') + + if categories is None: + categories = [] # Going through columns to merge a first time to create merged columns with the good number of elements per line and elemnts names - #logger("info", "obi uniq", "First browsing through the input") + logger("info", "First browsing through the input") merged_infos = {} i = 0 iter_view = iter(view) @@ -131,20 +133,28 @@ cdef uniqSequence(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, Taxo del(merged_infos) - #logger("info", "obi uniq", "Second browsing through the input") + logger("info", "Second browsing through the input") i = 0 o_idx = 0 seq_col = view[NUC_SEQUENCE_COLUMN] - + iter_view = iter(view) for i_seq in iter_view : pb(i) - #u_id = tuple(i_seq[x] for x in categories) + (seq_col.get_line_idx(i),) - u_id = seq_col.get_line_idx(i) + # This can't be done in the same line as the u_id tuple creation because it generates a bug + # where Cython (version 0.25.2) does not detect the reference to the categs_list variable and deallocates + # it at the beginning of the function. + # (Only happens if categs_list is an optional parameter, which it is). + catl = [] + for x in categories : + catl.append(i_seq[x]) + + u_id = tuple(catl) + (seq_col.get_line_idx(i),) + #u_id = tuple(i_seq[x] for x in categories) + (seq_col.get_line_idx(i),) # The line that cython can't read properly if u_id in uniques: - + if COUNT_COLUMN not in i_seq or i_seq[COUNT_COLUMN] is None: i_count = 1 else: @@ -248,9 +258,11 @@ def run(config): input=False, newviewtype=View_NUC_SEQS) + # TODO exceptions not handled like they should be + entries = input[1] o_view = output[1] - + # Initialize the progress bar pb = ProgressBar(len(entries), config, seconde=5) @@ -259,9 +271,14 @@ def run(config): # usm = uniqPrefixSequence # else: usm = uniqSequence - - usm(entries, o_view, pb, taxonomy=None, mergedKeys_list=config['uniq']['merge'], mergeIds=config['uniq']['mergeids'], categories=config['uniq']['categories']) +# if 'taxoURI' in config['obi'] : # TODO default None problem +# taxo = open_uri(config['obi']['taxoURI']) +# else : + taxo = None + + usm(entries, o_view, pb, mergedKeys_list=config['uniq']['merge'], taxonomy=taxo, mergeIds=config['uniq']['mergeids'], categories=config['uniq']['categories']) + # if 'merge' in config['uniq'] : # merged_keys=set(config['uniq']['merge']) # else: