switch to version 3.0.0b26

export: dictionaries are now formatted like in the original OBITools
when exporting in tabular format and tuple formatting is cleaner
2020-06-17 18:56:07 +02:00 · 2020-06-17 18:55:46 +02:00 · 2020-06-17 18:54:14 +02:00 · 2020-06-17 18:53:41 +02:00 · 2020-06-17 18:52:35 +02:00 · 2020-06-13 21:48:49 +02:00
31 changed files with 474 additions and 274 deletions
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -6,7 +6,7 @@ recursive-include doc/sphinx/source *.txt *.rst *.py
 recursive-include doc/sphinx/sphinxext *.py
 include doc/sphinx/Makefile
 include doc/sphinx/Doxyfile
-include README.txt
+include README.md
 include requirements.txt
 include scripts/obi
--- a/python/obitools3/commands/annotate.pyx
+++ b/python/obitools3/commands/annotate.pyx
@ -13,7 +13,8 @@ from obitools3.dms.capi.obiview cimport NUC_SEQUENCE_COLUMN, \
                                        ID_COLUMN, \
                                        DEFINITION_COLUMN, \
                                        QUALITY_COLUMN, \
-                                        COUNT_COLUMN
+                                        COUNT_COLUMN, \
                                        TAXID_COLUMN
 import time
 import math 
@ -175,8 +176,8 @@ def sequenceTaggerGenerator(config, taxo=None):
            counter[0]+=1
        for rank in annoteRank:
-            if 'taxid' in seq:
+            if TAXID_COLUMN in seq:
-                taxid = seq['taxid']
+                taxid = seq[TAXID_COLUMN]
                if taxid is not None:
                    rtaxid = taxo.get_taxon_at_rank(taxid, rank)
                    if rtaxid is not None:
--- a/python/obitools3/commands/ecopcr.pyx
+++ b/python/obitools3/commands/ecopcr.pyx
@ -35,12 +35,14 @@ def addOptions(parser):
                       action="store", dest="ecopcr:primer1",
                       metavar='<PRIMER>',
                       type=str,
                       required=True,
                       help="Forward primer, length must be less than or equal to 32")
    group.add_argument('--primer2', '-R',
                       action="store", dest="ecopcr:primer2",
                       metavar='<PRIMER>',
                       type=str,
                       required=True,
                       help="Reverse primer, length must be less than or equal to 32")
    group.add_argument('--error', '-e',
--- a/python/obitools3/commands/ecotag.pyx
+++ b/python/obitools3/commands/ecotag.pyx
@ -107,8 +107,8 @@ def run(config):
    comments = View.print_config(config, "ecotag", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
    if obi_ecotag(i_dms.name_with_full_path, tobytes(i_view_name), \
-                  tobytes(ref_dms_name), tobytes(ref_view_name), \
+                  ref_dms.name_with_full_path, tobytes(ref_view_name), \
-                  tobytes(taxo_dms_name), tobytes(taxonomy_name), \
+                  taxo_dms.name_with_full_path, tobytes(taxonomy_name), \
                  tobytes(o_view_name), comments, 
                  config['ecotag']['threshold']) < 0:
        raise Exception("Error running ecotag")
--- a/python/obitools3/commands/grep.pyx
+++ b/python/obitools3/commands/grep.pyx
@ -161,8 +161,7 @@ def obi_eval(compiled_expr, loc_env, line):
    return obi_eval_result
-def Filter_generator(options, tax_filter):
+def Filter_generator(options, tax_filter, i_view):
    #taxfilter = taxonomyFilterGenerator(options)
    # Initialize conditions
    predicates = None
@ -171,6 +170,9 @@ def Filter_generator(options, tax_filter):
    attributes = None
    if "attributes" in options and len(options["attributes"]) > 0:
        attributes = options["attributes"]
        for attribute in attributes:
            if attribute not in i_view:
                return None
    lmax = None
    if "lmax" in options:
        lmax = options["lmax"]
@ -196,6 +198,8 @@ def Filter_generator(options, tax_filter):
    if "attribute_patterns" in options and len(options["attribute_patterns"]) > 0:
        for p in options["attribute_patterns"]:
            attribute, pattern = p.split(":", 1)
            if attribute not in i_view:
                return None
            attribute_patterns[tobytes(attribute)] = re.compile(tobytes(pattern))
    def filter(line, loc_env):
@ -324,8 +328,16 @@ def run(config):
    # Apply filter
    tax_filter = Taxonomy_filter_generator(taxo, config["grep"])
-    filter = Filter_generator(config["grep"], tax_filter)
+    filter = Filter_generator(config["grep"], tax_filter, i_view)        
    selection = Line_selection(i_view)
    if filter is None and config["grep"]["invert_selection"]: # all sequences are selected: filter is None if no line will be selected because some columns don't exist
        for i in range(len(i_view)):
            PyErr_CheckSignals()
            pb(i)
            selection.append(i)        
    elif filter is not None :   # filter is None if no line will be selected because some columns don't exist
        for i in range(len(i_view)):
            PyErr_CheckSignals()
            pb(i)
@ -338,7 +350,7 @@ def run(config):
            if good :
                selection.append(i)
-    pb(i, force=True)
+    pb(len(i_view), force=True)
    print("", file=sys.stderr)
    # Create output view with the line selection
--- a/python/obitools3/commands/import.pyx
+++ b/python/obitools3/commands/import.pyx
@ -25,7 +25,8 @@ from obitools3.dms.capi.obiview cimport VIEW_TYPE_NUC_SEQS, \
                                        DEFINITION_COLUMN, \
                                        QUALITY_COLUMN, \
                                        COUNT_COLUMN, \
-                                        TAXID_COLUMN
+                                        TAXID_COLUMN, \
                                        MERGED_PREFIX
 from obitools3.dms.capi.obidms cimport obi_import_view
@ -72,7 +73,7 @@ def addOptions(parser):
                     action="store_true", dest="import:preread",
                     default=False,
                     help="Do a first readthrough of the dataset if it contains huge dictionaries (more than 100 keys) for "
-                          "a much faster import.")
+                          "a much faster import. This option is not recommended and will slow down the import in any other case.")
 def run(config):
@ -163,7 +164,7 @@ def run(config):
        taxo.write(taxo_name)
        taxo.close()
        o_dms.record_command_line(" ".join(sys.argv[1:]))
-        o_dms.close()
+        o_dms.close(force=True)
        logger("info", "Done.")
        return
@ -217,11 +218,14 @@ def run(config):
                logger("info", "Read %d entries", i)
            for tag in entry :
                newtag = tag
                if tag[:7] == b"merged_":
                    newtag = MERGED_PREFIX+tag[7:]
                if type(entry[tag]) == dict :
                    if tag in dict_dict:
-                        dict_dict[tag][0].update(entry[tag].keys())
+                        dict_dict[newtag][0].update(entry[tag].keys())
                    else:
-                        dict_dict[tag] = [set(entry[tag].keys()), get_obitype(entry[tag])]
+                        dict_dict[newtag] = [set(entry[tag].keys()), get_obitype(entry[tag])]
            i+=1
        if pb is not None:
@ -232,7 +236,7 @@ def run(config):
            dcols[tag] = (Column.new_column(view, tag, dict_dict[tag][1], \
                              nb_elements_per_line=len(dict_dict[tag][0]), \
                              elements_names=list(dict_dict[tag][0])), \
-                          value_obitype)
+                          dict_dict[tag][1])
        # Reinitialize the input
@ -256,7 +260,6 @@ def run(config):
        if entry is None:  # error or exception handled at lower level, not raised because Python generators can't resume after any exception is raised
            if config['obi']['skiperror']:
                i-=1
                continue
            else:
                raise RollbackException("obi import error, rollbacking view", view)
@ -266,6 +269,8 @@ def run(config):
        elif not i%50000:
            logger("info", "Imported %d entries", i)
        try:
            if NUC_SEQS_view: 
                id_col[i] = entry.id
                def_col[i] = entry.definition
@ -288,6 +293,8 @@ def run(config):
                        tag = TAXID_COLUMN
                    if tag == b"count":
                        tag = COUNT_COLUMN
                    if tag[:7] == b"merged_":
                        tag = MERGED_PREFIX+tag[7:]
                    if tag not in dcols :
@ -328,8 +335,8 @@ def run(config):
                        try:
                            # Check that it's not the case where the first entry contained a dict of length 1 and now there is a new key                        
                            if type(value) == dict and \
-                            dcols[tag][0].nb_elements_per_line == 1 and len(value.keys()) == 1 \
+                                dcols[tag][0].nb_elements_per_line == 1 \
-                            and dcols[tag][0].elements_names[0] != list(value.keys())[0] :
+                                and set(dcols[tag][0].elements_names) != set(value.keys()) :
                                raise IndexError  # trigger column rewrite
                            # Fill value
@ -382,6 +389,13 @@ def run(config):
                                # Fill value
                                dcols[tag][0][i] = value
        except Exception as e:
            print("\nCould not import sequence id:", entry.id, "(error raised:", e, ")")
            if 'skiperror' in config['obi'] and not config['obi']['skiperror']:
                raise e
            else:
                pass
        i+=1
    if pb is not None:
@ -402,7 +416,7 @@ def run(config):
    except AttributeError:
        pass
    try:
-        output[0].close()
+        output[0].close(force=True)
    except AttributeError:
        pass
--- a/python/obitools3/commands/ls.pyx
+++ b/python/obitools3/commands/ls.pyx
@ -34,9 +34,10 @@ def run(config):
    if input[2] == DMS and not config['ls']['longformat']:
        dms = input[0]
        l = []
-        for view in input[0]:
+        for viewname in input[0]:
-            l.append(tostr(view) + "\t(Date created: " + str(bytes2str_object(dms[view].comments["Date created"]))+")")
+            view = dms[viewname]
-            dms[view].close()
+            l.append(tostr(viewname) + "\t(Date created: " + str(bytes2str_object(view.comments["Date created"]))+")")
            view.close()
        l.sort()
        for v in l:
            print(v)
--- a/python/obitools3/commands/ngsfilter.pyx
+++ b/python/obitools3/commands/ngsfilter.pyx
@ -42,8 +42,9 @@ def addOptions(parser):
                     metavar="<URI>",
                     type=str,
                     default=None,
-                     help="URI to the view containing the samples definition (with tags, primers, sample names,...)"
+                     required=True,
-                          "Warning: primer lengths must be less than or equal to 32")
+                     help="URI to the view containing the samples definition (with tags, primers, sample names,...).\n"
                          "\nWarning: primer lengths must be less than or equal to 32")
    group.add_argument('-R', '--reverse-reads',
                     action="store", dest="ngsfilter:reverse",
@ -478,6 +479,8 @@ cdef tuple annotate(sequences, infos, no_tags, verbose=False):
    if not directmatch[0].forward:
        sequences[0] = sequences[0].reverse_complement
        sequences[0][b'reversed'] = True   # used by the alignpairedend tool (in kmer_similarity.c)
    else:
        sequences[0][b'reversed'] = False   # used by the alignpairedend tool (in kmer_similarity.c)
    sample=None
    if not no_tags:
@ -505,7 +508,7 @@ cdef tuple annotate(sequences, infos, no_tags, verbose=False):
                    sample=None
        if sample is None:
-            sequences[0][b'error']=b"No tags found"
+            sequences[0][b'error']=b"No sample with that tag combination"
            return False, sequences[0]
        sequences[0].update(sample)
--- a/python/obitools3/commands/uniq.pxd
+++ b/python/obitools3/commands/uniq.pxd
@ -5,5 +5,5 @@ from obitools3.dms.taxo.taxo cimport Taxonomy
 from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
-cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy)
+cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy, dict config)
-cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, list mergedKeys_list=*, Taxonomy taxonomy=*, bint mergeIds=*, list categories=*, int max_elts=*)
+cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, dict config, list mergedKeys_list=*, Taxonomy taxonomy=*, bint mergeIds=*, list categories=*, int max_elts=*)
--- a/python/obitools3/commands/uniq.pyx
+++ b/python/obitools3/commands/uniq.pyx
@ -56,7 +56,7 @@ def addOptions(parser):
                             "(option can be used several times).")
-cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
+cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy, dict config) :
    cdef int             taxid
    cdef Nuc_Seq_Stored  seq
@ -69,7 +69,7 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
    cdef object          gn_sn
    cdef object          fa_sn
-    # Create columns
+    # Create columns and save them for efficiency
    if b"species" in o_view and o_view[b"species"].data_type_int != OBI_INT :
        o_view.delete_column(b"species")
    if b"species" not in o_view:
@ -77,6 +77,7 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
                          b"species", 
                          OBI_INT
                         )
    species_column = o_view[b"species"]
    if b"genus" in o_view and o_view[b"genus"].data_type_int != OBI_INT :
        o_view.delete_column(b"genus")
@ -85,6 +86,7 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
                          b"genus", 
                          OBI_INT
                         )
    genus_column = o_view[b"genus"]
    if b"family" in o_view and o_view[b"family"].data_type_int != OBI_INT :
        o_view.delete_column(b"family")
@ -93,6 +95,7 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
                          b"family", 
                          OBI_INT
                         )
    family_column = o_view[b"family"]
    if b"species_name" in o_view and o_view[b"species_name"].data_type_int != OBI_STR :
        o_view.delete_column(b"species_name")
@ -101,6 +104,7 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
                          b"species_name", 
                          OBI_STR
                         )
    species_name_column = o_view[b"species_name"]
    if b"genus_name" in o_view and o_view[b"genus_name"].data_type_int != OBI_STR :
        o_view.delete_column(b"genus_name")
@ -109,6 +113,7 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
                          b"genus_name", 
                          OBI_STR
                         )
    genus_name_column = o_view[b"genus_name"]
    if b"family_name" in o_view and o_view[b"family_name"].data_type_int != OBI_STR :
        o_view.delete_column(b"family_name")
@ -117,6 +122,7 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
                          b"family_name", 
                          OBI_STR
                         )
    family_name_column = o_view[b"family_name"]
    if b"rank" in o_view and o_view[b"rank"].data_type_int != OBI_STR :
        o_view.delete_column(b"rank")
@ -125,6 +131,7 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
                          b"rank", 
                          OBI_STR
                         )
    rank_column = o_view[b"rank"]
    if b"scientific_name" in o_view and o_view[b"scientific_name"].data_type_int != OBI_STR :
        o_view.delete_column(b"scientific_name")
@ -133,9 +140,15 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
                          b"scientific_name", 
                          OBI_STR
                         )
    scientific_name_column = o_view[b"scientific_name"]  
    # Initialize the progress bar
    pb = ProgressBar(len(o_view), config, seconde=5)
    i=0
    for seq in o_view:
        PyErr_CheckSignals()
        pb(i)
        if MERGED_TAXID_COLUMN in seq :
            m_taxids = []            
            m_taxids_dict = seq[MERGED_TAXID_COLUMN]
@ -166,19 +179,22 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
                fa_sn = None
                tfa = None
-            seq[b"species"] = tsp
+            species_column[i] = tsp
-            seq[b"genus"] = tgn
+            genus_column[i] = tgn
-            seq[b"family"] = tfa
+            family_column[i] = tfa
-            seq[b"species_name"] = sp_sn
+            species_name_column[i] = sp_sn
-            seq[b"genus_name"] = gn_sn
+            genus_name_column[i] = gn_sn
-            seq[b"family_name"] = fa_sn
+            family_name_column[i] = fa_sn
-            seq[b"rank"] = taxonomy.get_rank(taxid)
+            rank_column[i] = taxonomy.get_rank(taxid)
-            seq[b"scientific_name"] = taxonomy.get_scientific_name(taxid)
+            scientific_name_column[i] = taxonomy.get_scientific_name(taxid)
        i+=1
    pb(len(o_view), force=True)
-cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, list mergedKeys_list=None, Taxonomy taxonomy=None, bint mergeIds=False, list categories=None, int max_elts=1000000) :
+cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, dict config, list mergedKeys_list=None, Taxonomy taxonomy=None, bint mergeIds=False, list categories=None, int max_elts=1000000) :
    cdef int            i
    cdef int            k
@ -187,6 +203,7 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
    cdef int            u_idx
    cdef int            i_idx
    cdef int            i_count
    cdef int            o_count
    cdef str            key_str
    cdef bytes          key
    cdef bytes          mkey
@ -209,7 +226,6 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
    cdef Nuc_Seq_Stored i_seq
    cdef Nuc_Seq_Stored o_seq
    cdef Nuc_Seq_Stored u_seq
    cdef Column         i_col
    cdef Column         i_seq_col
    cdef Column         i_id_col
    cdef Column         i_taxid_col
@ -217,6 +233,8 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
    cdef Column         o_id_col
    cdef Column         o_taxid_dist_col
    cdef Column         o_merged_col
    cdef Column         o_count_col
    cdef Column         i_count_col
    cdef Column_line    i_mcol  
    cdef object         taxid_dist_dict
    cdef object         iter_view
@ -253,6 +271,11 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
    for k in range(k_count):
        mergedKeys_m.append(MERGED_PREFIX + mergedKeys[k])
    # Check that not trying to remerge without total count information
    for key in mergedKeys_m:
        if key in view and COUNT_COLUMN not in view:
            raise Exception("\n>>>>\nError: trying to re-merge tags without total count tag. Run obi annotate to add the count tag from the relevant merged tag, i.e.: \nobi annotate --set-tag COUNT:'sum([value for key,value in sequence['MERGED_sample'].items()])' dms/input dms/output\n")
    if categories is None:
        categories = []
@ -320,6 +343,10 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
    for k in range(k_count):
        key = mergedKeys[k]
        merged_col_name = mergedKeys_m[k]
        if merged_col_name in view:
            i_col = view[merged_col_name]
        else:
            i_col = view[key]
        if merged_infos[merged_col_name]['nb_elts'] > max_elts:
@ -374,23 +401,30 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
                          alias=MERGED_COLUMN
                         )
-    # Keep columns that are going to be used a lot in variables 
+    # Keep columns in variables for efficiency
    o_id_col = o_view[ID_COLUMN]
    if TAXID_DIST_COLUMN in o_view:
        o_taxid_dist_col = o_view[TAXID_DIST_COLUMN]
    if MERGED_COLUMN in o_view:
        o_merged_col = o_view[MERGED_COLUMN]
    if COUNT_COLUMN not in o_view:
        Column.new_column(o_view,
                          COUNT_COLUMN,
                          OBI_INT)
    o_count_col = o_view[COUNT_COLUMN]
    if COUNT_COLUMN in view:
        i_count_col = view[COUNT_COLUMN]
    pb(len(view), force=True)
    print("")
    logger("info", "Second browsing through the input")
    # Initialize the progress bar
-    pb = ProgressBar(len(uniques), seconde=5)
+    pb = ProgressBar(len(view), seconde=5)
    o_idx = 0
    total_treated = 0
    for unique_id in uniques :
        PyErr_CheckSignals()
        pb(o_idx)
        merged_sequences = uniques[unique_id]
@ -407,7 +441,7 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
            merged_list = list(set(merged_list)) # deduplicate the list
            o_merged_col[o_idx] = merged_list
-        o_seq[COUNT_COLUMN] = 0
+        o_count = 0
        if TAXID_DIST_COLUMN in u_seq and i_taxid_dist_col[u_idx] is not None:
            taxid_dist_dict = i_taxid_dist_col[u_idx]
@ -419,16 +453,17 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
            merged_dict[mkey] = {}
        for i_idx in merged_sequences:
            pb(total_treated)
            i_id = i_id_col[i_idx]
            i_seq = view[i_idx]
-            if COUNT_COLUMN not in i_seq or i_seq[COUNT_COLUMN] is None:
+            if COUNT_COLUMN not in i_seq or i_count_col[i_idx] is None:
                i_count = 1
            else:
-                i_count = i_seq[COUNT_COLUMN]
+                i_count = i_count_col[i_idx]
-            o_seq[COUNT_COLUMN] += i_count
+            o_count += i_count
            for k in range(k_count):
@ -464,12 +499,14 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
                            else:
                                mcol[key2] = mcol[key2] + i_mcol[key2]
-            # Write taxid_dist
+            for key in i_seq.keys():
-            if mergeIds and TAXID_COLUMN in mergedKeys:
+                # Delete informations that differ between the merged sequences
-                if TAXID_DIST_COLUMN in str_merged_cols:
+                # TODO make special columns list? // could be more efficient
-                    o_taxid_dist_col[o_idx] = str(taxid_dist_dict)
+                if key != COUNT_COLUMN and key != ID_COLUMN and key != NUC_SEQUENCE_COLUMN and key in o_seq and o_seq[key] != i_seq[key] \
-                else:
+                    and key not in merged_dict :
-                    o_taxid_dist_col[o_idx] = taxid_dist_dict
+                    o_seq[key] = None
            total_treated += 1
        # Write merged dicts
        for mkey in merged_dict: 
@ -482,25 +519,33 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
                #    if mkey_cols[mkey][o_idx][key] is None:
                #        mkey_cols[mkey][o_idx][key] = 0
-            for key in i_seq.keys():
+        # Write taxid_dist
-                # Delete informations that differ between the merged sequences
+        if mergeIds and TAXID_COLUMN in mergedKeys:
-                # TODO make special columns list?
+            if TAXID_DIST_COLUMN in str_merged_cols:
-                if key != COUNT_COLUMN and key != ID_COLUMN and key != NUC_SEQUENCE_COLUMN and key in o_seq and o_seq[key] != i_seq[key] \
+                o_taxid_dist_col[o_idx] = str(taxid_dist_dict)
-                    and key not in merged_dict :
+            else:
-                    o_seq[key] = None
+                o_taxid_dist_col[o_idx] = taxid_dist_dict
        o_count_col[o_idx] = o_count
        o_idx += 1
    pb(len(view), force=True)
    # Deletes quality columns if there is one because the matching between sequence and quality will be broken (quality set to NA when sequence not)
    if QUALITY_COLUMN in view:
        o_view.delete_column(QUALITY_COLUMN)
    if REVERSE_QUALITY_COLUMN in view:
        o_view.delete_column(REVERSE_QUALITY_COLUMN)
    # Delete old columns that are now merged
    for k in range(k_count):
        if mergedKeys[k] in o_view:
            o_view.delete_column(mergedKeys[k])
    if taxonomy is not None:
        print("")  # TODO because in the middle of progress bar. Better solution?
        logger("info", "Merging taxonomy classification")
-        merge_taxonomy_classification(o_view, taxonomy)
+        merge_taxonomy_classification(o_view, taxonomy, config)
@ -546,12 +591,12 @@ def run(config):
    # Initialize the progress bar
    pb = ProgressBar(len(entries), config, seconde=5)
    if len(entries) > 0:
        try:
-        uniq_sequences(entries, o_view, pb, mergedKeys_list=config['uniq']['merge'], taxonomy=taxo, mergeIds=config['uniq']['mergeids'], categories=config['uniq']['categories'], max_elts=config['obi']['maxelts'])       
+            uniq_sequences(entries, o_view, pb, config, mergedKeys_list=config['uniq']['merge'], taxonomy=taxo, mergeIds=config['uniq']['mergeids'], categories=config['uniq']['categories'], max_elts=config['obi']['maxelts'])       
        except Exception, e:
            raise RollbackException("obi uniq error, rollbacking view: "+str(e), o_view)
    pb(len(entries), force=True)
    print("", file=sys.stderr)
    # Save command config in View and DMS comments
@ -567,8 +612,8 @@ def run(config):
    #print("\n\nOutput view:\n````````````", file=sys.stderr)
    #print(repr(o_view), file=sys.stderr)
-    input[0].close()
+    input[0].close(force=True)
-    output[0].close()
+    output[0].close(force=True)
    logger("info", "Done.")
--- a/python/obitools3/dms/capi/obidmscolumn.pxd
+++ b/python/obitools3/dms/capi/obidmscolumn.pxd
@ -63,6 +63,8 @@ cdef extern from "obidmscolumn.h" nogil:
    char* obi_get_elements_names(OBIDMS_column_p column)
    char* obi_column_formatted_infos(OBIDMS_column_p column)
    index_t obi_column_get_element_index_from_name(OBIDMS_column_p column, const char* element_name)
    int obi_column_write_comments(OBIDMS_column_p column, const char* comments)
--- a/python/obitools3/dms/column/column.pxd
+++ b/python/obitools3/dms/column/column.pxd
@ -22,6 +22,7 @@ cdef class Column(OBIWrapper) :
    cdef inline OBIDMS_column_p pointer(self)
    cdef read_elements_names(self)
    cpdef list keys(self)
    @staticmethod
    cdef type get_column_class(obitype_t obitype, bint multi_elts, bint tuples)
--- a/python/obitools3/dms/column/column.pyx
+++ b/python/obitools3/dms/column/column.pyx
@ -14,6 +14,7 @@ from ..capi.obidms cimport obi_import_column
 from ..capi.obidmscolumn cimport OBIDMS_column_header_p, \
                                 obi_close_column, \
                                 obi_get_elements_names, \
                                 obi_column_formatted_infos, \
                                 obi_column_write_comments
 from ..capi.obiutils cimport obi_format_date
@ -288,7 +289,13 @@ cdef class Column(OBIWrapper) :
    @OBIWrapper.checkIsActive
    def __repr__(self) :
        cdef bytes s
        #cdef char* s_b
        #cdef str s_str
        #s_b = obi_column_formatted_infos(self.pointer())
        #s_str = bytes2str(s_b)
        #free(s_b)
        s = self._alias + b", data type: " + self.data_type
        #return s_str
        return bytes2str(s)
@ -316,6 +323,9 @@ cdef class Column(OBIWrapper) :
        free(elts_names_b)
        return elts_names_list
    cpdef list keys(self):
        return self._elements_names
    # Column alias property getter and setter
    @property
--- a/python/obitools3/dms/dms.pyx
+++ b/python/obitools3/dms/dms.pyx
@ -227,7 +227,9 @@ cdef class DMS(OBIWrapper):
        cdef str s 
        s=""
        for view_name in self.keys():
-            s = s + repr(self.get_view(view_name)) + "\n"
+            view = self.get_view(view_name)
            s = s + repr(view) + "\n"
            view.close()
        return s
--- a/python/obitools3/dms/view/view.pyx
+++ b/python/obitools3/dms/view/view.pyx
@ -531,6 +531,7 @@ cdef class View(OBIWrapper) :
        for level in self.view_history:
            command_list = [level[input][b"command_line"] for input in level.keys()]
            for command in command_list:
                s+=b"obi "
                s+=command
                s+=b"\n"
        return s
--- a/python/obitools3/format/tab.pyx
+++ b/python/obitools3/format/tab.pyx
@ -3,7 +3,7 @@
 cimport cython
 from obitools3.dms.view.view cimport Line
 from obitools3.utils cimport bytes2str_object, str2bytes, tobytes
-from obitools3.dms.column.column cimport Column_line
+from obitools3.dms.column.column cimport Column_line, Column_multi_elts
 cdef class TabFormat:
@ -25,18 +25,28 @@ cdef class TabFormat:
        for k in self.tags:
            if self.header and self.first_line:
-                value = tobytes(k)
+                if isinstance(data.view[k], Column_multi_elts):
                    for k2 in data.view[k].keys():
                        line.append(tobytes(k)+b':'+tobytes(k2))
                else:
                    line.append(tobytes(k))
            else:
                value = data[k]
-                if value is not None:
+                if isinstance(data.view[k], Column_multi_elts):
-                    if type(value) == Column_line:
+                    if value is None:  # all keys at None
-                        value = value.bytes()
+                        for k2 in data.view[k].keys(): # TODO could be much more efficient
                            line.append(self.NAString)
                    else:
-                        value = str2bytes(str(bytes2str_object(value))) # genius programming
+                        for k2 in data.view[k].keys(): # TODO could be much more efficient
-                if value is None:
+                            if value[k2] is not None:
-                    value = self.NAString
+                                line.append(str2bytes(str(bytes2str_object(value[k2]))))  # genius programming
-            
+                            else:
-            line.append(value)
+                                line.append(self.NAString)
                else:
                    if value is not None:
                        line.append(str2bytes(str(bytes2str_object(value))))
                    else:
                        line.append(self.NAString)
        if self.first_line:
            self.first_line = False
--- a/python/obitools3/libalign/_solexapairend.pyx
+++ b/python/obitools3/libalign/_solexapairend.pyx
@ -188,7 +188,7 @@ def buildConsensus(ali, seq, ref_tags=None):
        seq[b'shift']=ali.shift
    else:
        if len(ali[0])>999:   # TODO why?
-            raise AssertionError,"Too long alignemnt"
+            raise AssertionError,"Too long alignment"
        ic=IterOnConsensus(ali)
@ -250,11 +250,21 @@ def buildJoinedSequence(ali, reverse, seq, forward=None):
    quality.extend(reverse.quality)
    seq.set(forward.id +b"_PairedEnd", s, definition=forward.definition, quality=quality)
    seq[b"score"]=ali.score
    if len(ali.direction) > 0:
        seq[b"ali_direction"]=ali.direction
    else:
        seq[b"ali_direction"]=None
    seq[b"mode"]=b"joined"
    seq[b"pairedend_limit"]=len(forward)
    seq[b"ali_length"] = ali.consensus_len
    if ali.consensus_len > 0:
        seq[b"score_norm"]=float(ali.score)/ali.consensus_len
    else:
        seq[b"score_norm"]=0.0
    for tag in forward:
-        if tag != REVERSE_SEQUENCE_COLUMN and tag != REVERSE_QUALITY_COLUMN:
+        if tag != REVERSE_SEQUENCE_COLUMN and tag != REVERSE_QUALITY_COLUMN and \
            tag != NUC_SEQUENCE_COLUMN and tag != QUALITY_COLUMN:
            seq[tag] = forward[tag]
    return seq
--- a/python/obitools3/parsers/embl.pyx
+++ b/python/obitools3/parsers/embl.pyx
@ -177,7 +177,7 @@ def emblIterator_dir(dir_path,
    for filename in files:
        if read==only:
            return
-        print("Parsing file %s (%d/%d)" % (tostr(filename), read_files, len(files)))
+        print("Parsing file %s (%d/%d)" % (tostr(filename), read_files+1, len(files)))
        f = uopen(filename)
        if only is not None:
            only_f = only-read
--- a/python/obitools3/parsers/fasta.pyx
+++ b/python/obitools3/parsers/fasta.pyx
@ -104,6 +104,7 @@ def fastaNucIterator(lineiterator,
    cdef bytes      sequence
    cdef int        skipped, ionly, read
    cdef Nuc_Seq    seq
    cdef bint       stop
    if only is None:
        ionly = -1
@ -130,7 +131,8 @@ def fastaNucIterator(lineiterator,
    else:
        line = firstline       
-    while True:
+    stop=False
    while not stop:
        if ionly >= 0 and read >= ionly:
            break
@ -153,7 +155,7 @@ def fastaNucIterator(lineiterator,
                s.append(line[0:-1])
                line = next(iterator)
        except StopIteration:
-            pass
+            stop=True
        sequence  = b"".join(s)        
--- a/python/obitools3/parsers/genbank.pyx
+++ b/python/obitools3/parsers/genbank.pyx
@ -25,8 +25,9 @@ from libc.string cimport strcpy, strlen
 _featureMatcher = re.compile(b'^FEATURES.+\n(?=ORIGIN)',re.DOTALL + re.M)
 _headerMatcher = re.compile(b'^LOCUS.+(?=\nFEATURES)', re.DOTALL + re.M)
-_seqMatcher    = re.compile(b'(?<=ORIGIN).+(?=//\n)', re.DOTALL + re.M)
+_seqMatcher    = re.compile(b'ORIGIN.+(?=//\n)', re.DOTALL + re.M)
-_cleanSeq      = re.compile(b'[ \n0-9]+')
+_cleanSeq1     = re.compile(b'ORIGIN.+\n')
 _cleanSeq2     = re.compile(b'[ \n0-9]+')
 _acMatcher     = re.compile(b'(?<=^ACCESSION   ).+',re.M)
 _deMatcher     = re.compile(b'(?<=^DEFINITION  ).+\n( .+\n)*',re.M)
 _cleanDe       = re.compile(b'\n *')
@ -42,7 +43,8 @@ def genbankParser(bytes text):
        ft     = _featureMatcher.search(text).group()
        s      = _seqMatcher.search(text).group()
-        s      = _cleanSeq.sub(b'', s).upper()
+        s      = _cleanSeq1.sub(b'', s)
        s      = _cleanSeq2.sub(b'', s)
        acs    = _acMatcher.search(text).group()
        acs    = acs.split()
@ -52,12 +54,6 @@ def genbankParser(bytes text):
        de     = _deMatcher.search(header).group()
        de     = _cleanDe.sub(b' ',de).strip().strip(b'.')
    except Exception as e:
        print("\nCould not import sequence id:", text.split()[1], "(error raised:", e, ")")
        # Do not raise any Exception if you need the possibility to resume the generator
        # (Python generators can't resume after any exception is raised)
        return None
        tags = {}
        extractTaxon(ft, tags)
@ -68,6 +64,12 @@ def genbankParser(bytes text):
                      offset=-1,
                      tags=tags)
    except Exception as e:
        print("\nCould not import sequence id:", text.split()[1], "(error raised:", e, ")")
        # Do not raise any Exception if you need the possibility to resume the generator
        # (Python generators can't resume after any exception is raised)
        return None
    return seq
@ -171,10 +173,12 @@ def genbankIterator_dir(dir_path,
    read = 0
    read_files = 0
    files = [filename for filename in glob.glob(os.path.join(path, b'*.gbff*'))]
    files.extend([filename for filename in glob.glob(os.path.join(path, b'*.seq*'))])  # new genbank extension
    files = list(set(files))
    for filename in files:
        if read==only:
            return
-        print("Parsing file %s (%d/%d)" % (tostr(filename), read_files, len(files)))
+        print("Parsing file %s (%d/%d)" % (tostr(filename), read_files+1, len(files)))
        f = uopen(filename)
        if only is not None:
            only_f = only-read
--- a/python/obitools3/utils.pyx
+++ b/python/obitools3/utils.pyx
@ -166,7 +166,9 @@ cdef object bytes2str_object(object value):  # Only works if complex types are d
                    value[k] = bytes2str(v)
            if type(k) == bytes:
                value[bytes2str(k)] = value.pop(k)
-    elif isinstance(value, list):
+    elif isinstance(value, list) or isinstance(value, tuple):
        if isinstance(value, tuple):
            value = list(value)
        for i in range(len(value)):
            if isinstance(value[i], list) or isinstance(value[i], dict):
                value[i] = bytes2str_object(value[i])
--- a/python/obitools3/version.py
+++ b/python/obitools3/version.py
@ -1,5 +1,5 @@
 major = 3
 minor = 0
-serial= '0-beta15'
+serial= '0b26'
-version ="%d.%02d.%s" % (major,minor,serial)
+version ="%d.%d.%s" % (major,minor,serial)
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,5 @@
 --extra-index-url https://pypi.python.org/simple/
 Cython>=0.24
 Sphinx>=1.2.0
 ipython>=3.0.0
 breathe>=4.0.0
--- a/setup.py
+++ b/setup.py
@ -5,7 +5,8 @@ import re
 import subprocess
 from distutils import log
-from distutils.core import setup
+#from distutils.core import setup
 from setuptools import setup    # to work with pip
 from distutils.core import Extension
 from distutils.sysconfig import get_python_lib
@ -26,10 +27,11 @@ class Distribution(ori_Distribution):
        ori_Distribution.__init__(self, attrs)
-        self.global_options.insert(0,('cobitools3', None, "intall location of the C library"
+        self.global_options.insert(0,('cobitools3', None, "install location of the C library"
                                     ))
 from distutils.command.build import build as build_ori
 from setuptools.command.bdist_egg import bdist_egg as bdist_egg_ori
 from distutils.core import Command
@ -70,6 +72,12 @@ class build(build_ori):
        build_ori.run(self)
 class bdist_egg(bdist_egg_ori):
    def run(self):
        self.run_command('build_clib')
        bdist_egg_ori.run(self)
 sys.path.append(os.path.abspath("python"))
@ -88,9 +96,10 @@ PACKAGE     = "OBITools3"
 VERSION     = version
 AUTHOR      = 'Celine Mercier'
 EMAIL       = 'celine.mercier@metabarcoding.org'
-URL         = "http://metabarcoding.org/obitools3"
+URL         = "https://metabarcoding.org/obitools3"
 PLATFORMS   = "posix"
 LICENSE     = "CeCILL-V2"
-DESCRIPTION = "Tools and library for DNA metabarcoding",
+DESCRIPTION = "A package for the management of analyses and data in DNA metabarcoding."
 PYTHONMIN   = '3.5'
 SRC       = 'python'
@ -147,17 +156,24 @@ classifiers=['Development Status :: 4 - Beta',
             'Topic :: Utilities',
             ]
 with open("README.md", "r") as fh:
    long_description = fh.read()
 setup(name=PACKAGE,
      description=DESCRIPTION,
      long_description=long_description,
      long_description_content_type="text/markdown",
      classifiers=classifiers,
      version=VERSION,
      author=AUTHOR,
      author_email=EMAIL,
      platforms=PLATFORMS,
      license=LICENSE,
      url=URL,
      ext_modules=xx,
      distclass=Distribution,
      cmdclass={'build': build,
                'bdist_egg': bdist_egg,
                'build_clib': build_clib},
      cobitools3=get_python_lib(),
      packages = findPackage('python'),
--- a/src/kmer_similarity.c
+++ b/src/kmer_similarity.c
@ -413,7 +413,10 @@ Obi_ali_p kmer_similarity(Obiview_p view1, OBIDMS_column_p column1, index_t idx1
 	 	return NULL;
 	}
 	if (max_common_kmers > 0)
 		score = max_common_kmers + kmer_size - 1;  // aka the number of nucleotides in the longest stretch of kmers perfectly matching
 	else
 		score = 0;
 	abs_shift = abs(best_shift);
 	// Save result in Obi_ali structure
@ -423,10 +426,15 @@ Obi_ali_p kmer_similarity(Obiview_p view1, OBIDMS_column_p column1, index_t idx1
 	ali->shift = abs_shift;
 	ali->consensus_seq = NULL;
 	ali->consensus_qual = NULL;
 	if (score == 0)
 		ali->direction[0] = '\0';
 	else
 	{
 		if (((best_shift <= 0) && (!switched_seqs)) || ((best_shift > 0) && switched_seqs))
 			strcpy(ali->direction, "left");
 		else
 			strcpy(ali->direction, "right");
 	}
 	// Build the consensus sequence if asked
 	if (build_consensus)
--- a/src/obi_ecotag.c
+++ b/src/obi_ecotag.c
@ -71,9 +71,12 @@ static int create_output_columns(Obiview_p o_view);
 * @param name The assigned scientific name.
 * @param assigned_status_column A pointer on the column where the assigned status should be written.
 * @param assigned The assigned status (whether the sequence was assigned to a taxon or not).
- * @param best_match_column A pointer on the column where the list of ids of the best matches should be written.
+ * @param best_match_ids_column A pointer on the column where the list of ids of the best matches should be written.
 * @param best_match_ids The list of ids of the best matches as an array of the concatenated ids separated by '\0'.
 * @param best_match_ids_length The total length of the array of ids of best matches.
 * @param best_match_taxids_column A pointer on the column where the list of taxids of the best matches should be written.
 * @param best_match_taxids The list of taxids of the best matches as an array of the taxids.
 * @param best_match_taxids_length The length of the array of taxids of best matches.
 * @param score_column A pointer on the column where the score should be written.
 * @param score The similarity score of the sequence with its best match(es).
 *
@ -87,7 +90,8 @@ int print_assignment_result(Obiview_p output_view, index_t line,
 							OBIDMS_column_p assigned_taxid_column, int32_t taxid,
 							OBIDMS_column_p assigned_name_column, const char* name,
 							OBIDMS_column_p assigned_status_column, bool assigned,
-							OBIDMS_column_p best_match_column, const char* best_match_ids, int best_match_ids_length,
+							OBIDMS_column_p best_match_ids_column, const char* best_match_ids, int best_match_ids_length,
 							OBIDMS_column_p best_match_taxids_column, const int32_t* best_match_taxids, int best_match_taxids_length,
 							OBIDMS_column_p score_column, double score);
@ -130,7 +134,14 @@ static int create_output_columns(Obiview_p o_view)
 	// Column for array of best match ids
 	if (obi_view_add_column(o_view, ECOTAG_BEST_MATCH_IDS_COLUMN_NAME, -1, NULL, OBI_STR, 0, 1, NULL, false, true, false, NULL, NULL, -1, "{}", true) < 0)
 	{
-		obidebug(1, "\nError creating the column for the array of ids of the best match in ecotag");
+		obidebug(1, "\nError creating the column for the array of ids of best matches in ecotag");
 		return -1;
 	}
 	// Column for array of best match taxids
 	if (obi_view_add_column(o_view, ECOTAG_BEST_MATCH_TAXIDS_COLUMN_NAME, -1, NULL, OBI_INT, 0, 1, NULL, false, true, false, NULL, NULL, -1, "{}", true) < 0)
 	{
 		obidebug(1, "\nError creating the column for the array of taxids of best matches in ecotag");
 		return -1;
 	}
@ -142,7 +153,8 @@ int print_assignment_result(Obiview_p output_view, index_t line,
 							OBIDMS_column_p assigned_taxid_column, int32_t taxid,
 							OBIDMS_column_p assigned_name_column, const char* name,
 							OBIDMS_column_p assigned_status_column, bool assigned,
-							OBIDMS_column_p best_match_column, const char* best_match_ids, int best_match_ids_length,
+							OBIDMS_column_p best_match_ids_column, const char* best_match_ids, int best_match_ids_length,
 							OBIDMS_column_p best_match_taxids_column, const int32_t* best_match_taxids, int best_match_taxids_length,
 							OBIDMS_column_p score_column, double score)
 {
 	// Write the assigned taxid
@ -167,9 +179,16 @@ int print_assignment_result(Obiview_p output_view, index_t line,
 	}
 	// Write the best match ids
-	if (obi_set_array_with_col_p_in_view(output_view, best_match_column, line, best_match_ids, (uint8_t)(sizeof(char)*8), best_match_ids_length) < 0)
+	if (obi_set_array_with_col_p_in_view(output_view, best_match_ids_column, line, best_match_ids, (uint8_t)(sizeof(char)*8), best_match_ids_length) < 0)
 	{
-		obidebug(1, "\nError writing a assignment status in a column when writing ecotag results");
+		obidebug(1, "\nError writing the array of best match ids in a column when writing ecotag results");
 		return -1;
 	}
 	// Write the best match taxids
 	if (obi_set_array_with_col_p_in_view(output_view, best_match_taxids_column, line, best_match_taxids, (uint8_t)(sizeof(OBI_INT)*8), best_match_taxids_length) < 0)
 	{
 		obidebug(1, "\nError writing the array of best match taxids in a column when writing ecotag results");
 		return -1;
 	}
@ -235,6 +254,8 @@ int obi_ecotag(const char* dms_name,
 	char*     		best_match_ids;
 	char*			best_match_ids_to_store;
 	int32_t         best_match_ids_length;
 	int32_t*   		best_match_taxids;
 	int32_t*		best_match_taxids_to_store;
 	int				best_match_count;
 	int             buffer_size;
 	int 			best_match_ids_buffer_size;
@ -263,7 +284,8 @@ int obi_ecotag(const char* dms_name,
 	OBIDMS_column_p assigned_taxid_column = NULL;
 	OBIDMS_column_p assigned_name_column = NULL;
 	OBIDMS_column_p assigned_status_column = NULL;
-	OBIDMS_column_p best_match_column = NULL;
+	OBIDMS_column_p best_match_ids_column = NULL;
 	OBIDMS_column_p best_match_taxids_column = NULL;
 	OBIDMS_column_p lca_taxid_a_column = NULL;
 	OBIDMS_column_p score_a_column = NULL;
 	OBIDMS_column_p ref_taxid_column = NULL;
@ -396,7 +418,8 @@ int obi_ecotag(const char* dms_name,
 	assigned_taxid_column = obi_view_get_column(output_view, ECOTAG_TAXID_COLUMN_NAME);
 	assigned_name_column = obi_view_get_column(output_view, ECOTAG_NAME_COLUMN_NAME);
 	assigned_status_column = obi_view_get_column(output_view, ECOTAG_STATUS_COLUMN_NAME);
-	best_match_column = obi_view_get_column(output_view, ECOTAG_BEST_MATCH_IDS_COLUMN_NAME);
+	best_match_ids_column = obi_view_get_column(output_view, ECOTAG_BEST_MATCH_IDS_COLUMN_NAME);
 	best_match_taxids_column = obi_view_get_column(output_view, ECOTAG_BEST_MATCH_TAXIDS_COLUMN_NAME);
 	score_column = obi_view_get_column(output_view, ECOTAG_SCORE_COLUMN_NAME);
 	// Open the used reference columns
@ -453,6 +476,14 @@ int obi_ecotag(const char* dms_name,
 		return -1;
 	}
 	best_match_taxids = (int32_t*) malloc(buffer_size* sizeof(int32_t));
 	if (best_match_taxids == NULL)
 	{
 		obi_set_errno(OBI_MALLOC_ERROR);
 		obidebug(1, "\nError allocating memory for the best match taxid array in ecotag");
 		return -1;
 	}
 	for (i=0; i < query_count; i++)
 	{
 		if (i%1000 == 0)
@ -514,7 +545,7 @@ int obi_ecotag(const char* dms_name,
 				// Store in best match array
-				// Grow match array if needed
+				// Grow match and taxid array if needed
 				if (best_match_count == buffer_size)
 				{
 					buffer_size = buffer_size*2;
@ -525,6 +556,13 @@ int obi_ecotag(const char* dms_name,
 						obidebug(1, "\nError reallocating match array when assigning");
 						return -1;
 					}
 					best_match_taxids = (int32_t*) realloc(best_match_taxids, buffer_size*sizeof(int32_t));
 					if (best_match_taxids == NULL)
 					{
 						obi_set_errno(OBI_MALLOC_ERROR);
 						obidebug(1, "\nError reallocating match taxids array when assigning");
 						return -1;
 					}
 				}
 				id = obi_get_str_with_elt_idx_and_col_p_in_view(ref_view, ref_id_column, j, 0);
@ -545,6 +583,7 @@ int obi_ecotag(const char* dms_name,
 				// Save match
 				best_match_array[best_match_count] = j;
 				best_match_taxids[best_match_count] = obi_get_int_with_elt_idx_and_col_p_in_view(ref_view, ref_taxid_column, j, 0);
 				best_match_count++;
 				strcpy(best_match_ids+best_match_ids_length, id);
 				best_match_ids_length = best_match_ids_length + id_len + 1;
@ -629,6 +668,7 @@ int obi_ecotag(const char* dms_name,
 			else
 				lca_name = lca->name;
 			best_match_ids_to_store = best_match_ids;
 			best_match_taxids_to_store = best_match_taxids;
 		}
 		else
 		{
@ -636,6 +676,7 @@ int obi_ecotag(const char* dms_name,
 			lca_name = OBIStr_NA;
 			lca_taxid = OBIInt_NA;
 			best_match_ids_to_store = OBITuple_NA;
 			best_match_taxids_to_store = OBITuple_NA;
 			score = OBIFloat_NA;
 		}
@ -644,7 +685,8 @@ int obi_ecotag(const char* dms_name,
 								    assigned_taxid_column, lca_taxid,
 									assigned_name_column, lca_name,
 									assigned_status_column, assigned,
-									best_match_column, best_match_ids_to_store, best_match_ids_length,
+									best_match_ids_column, best_match_ids_to_store, best_match_ids_length,
 									best_match_taxids_column, best_match_taxids_to_store, best_match_count,
 									score_column, best_score
 									) < 0)
 							return -1;
@ -652,6 +694,7 @@ int obi_ecotag(const char* dms_name,
 	free(best_match_array);
 	free(best_match_ids);
 	free(best_match_taxids);
 	obi_close_taxonomy(taxonomy);
 	obi_save_and_close_view(query_view);
--- a/src/obi_ecotag.h
+++ b/src/obi_ecotag.h
@ -23,7 +23,8 @@
 #define ECOTAG_TAXID_COLUMN_NAME "TAXID"
 #define ECOTAG_NAME_COLUMN_NAME "SCIENTIFIC_NAME"
 #define ECOTAG_STATUS_COLUMN_NAME "ID_STATUS"
-#define ECOTAG_BEST_MATCH_IDS_COLUMN_NAME "BEST_MATCH"
+#define ECOTAG_BEST_MATCH_IDS_COLUMN_NAME "BEST_MATCH_IDS"
 #define ECOTAG_BEST_MATCH_TAXIDS_COLUMN_NAME "BEST_MATCH_TAXIDS"
 #define ECOTAG_SCORE_COLUMN_NAME "BEST_IDENTITY"
--- a/src/obiavl.c
+++ b/src/obiavl.c
@ -648,7 +648,7 @@ int truncate_avl_data_to_size_used(OBIDMS_avl_data_p avl_data)	// TODO is it nec
 	new_data_size = ((index_t) multiple) * getpagesize();
 	// Check that it is actually greater than the current size of the file, otherwise no need to truncate
-	if ((avl_data->header)->data_size_max == new_data_size)
+	if ((avl_data->header)->data_size_max >= new_data_size)
 		return 0;
 	// Get the file descriptor
@ -667,7 +667,7 @@ int truncate_avl_data_to_size_used(OBIDMS_avl_data_p avl_data)	// TODO is it nec
 	if (ftruncate(file_descriptor, file_size) < 0)
 	{
 		obi_set_errno(OBI_AVL_ERROR);
-		obidebug(1, "\nError truncating an AVL data file");
+		obidebug(1, "\nError truncating an AVL data file, old data size = %lld, new data size = %lld", (avl_data->header)->data_size_max, new_data_size);
 		return -1;
 	}
--- a/src/obidms.c
+++ b/src/obidms.c
@ -1496,7 +1496,7 @@ obiversion_t obi_import_column(const char* dms_path_1, const char* dms_path_2, c
 	memcpy(column_2->data, column_1->data, header_1->data_size);
 	// Copy the AVL files if there are some (overwriting the automatically created files)
-	if ((header_1->returned_data_type == OBI_STR) || (header_1->returned_data_type == OBI_SEQ) || (header_1->returned_data_type == OBI_QUAL))
+	if ((header_1->tuples) || ((header_1->returned_data_type == OBI_STR) || (header_1->returned_data_type == OBI_SEQ) || (header_1->returned_data_type == OBI_QUAL)))
 	{
 		avl_name_1 = (char*) malloc((strlen(header_1->indexer_name) + 1)  * sizeof(char));
 		if (avl_name_1 == NULL)
--- a/src/obidmscolumn.c
+++ b/src/obidmscolumn.c
@ -1350,6 +1350,8 @@ OBIDMS_column_p obi_create_column(OBIDMS_p     dms,
 		}
 		strncpy(header->indexer_name, final_indexer_name, INDEXER_MAX_NAME);
 	}
 	else
 		new_column->indexer = NULL;
 	// Fill the data with NA values
 	obi_ini_to_NA_values(new_column, 0, nb_lines);
@ -1558,6 +1560,8 @@ OBIDMS_column_p obi_open_column(OBIDMS_p     dms,
 			return NULL;
 		}
 	}
 	else
 		column->indexer = NULL;
 	if (close(column_file_descriptor) < 0)
 	{
@ -1693,8 +1697,8 @@ int obi_close_column(OBIDMS_column_p column)
 		if (obi_dms_unlist_column(column->dms, column) < 0)
 			ret_val = -1;
-		// If the data type is OBI_STR, OBI_SEQ or OBI_QUAL, the associated indexer is closed
+		// If it's a tuple column or the data type is OBI_STR, OBI_SEQ or OBI_QUAL, the associated indexer is closed
-		if (((column->header)->returned_data_type == OBI_STR) || ((column->header)->returned_data_type == OBI_SEQ) || ((column->header)->returned_data_type == OBI_QUAL))
+		if ((column->indexer) != NULL)
 			if (obi_close_indexer(column->indexer) < 0)
 				ret_val = -1;
--- a/src/obiview.c
+++ b/src/obiview.c
@ -1037,8 +1037,9 @@ static int finish_view(Obiview_p view)
 		return -1;
 	}
-	// Add count column if it's a NUC_SEQ_VIEW with no count column // TODO discuss
+	// Add count column if it's a NUC_SEQ_VIEW with no count column (and there's no MERGED_sample column) // TODO discuss
-	if ((!strcmp((view->infos)->view_type, VIEW_TYPE_NUC_SEQS)) && (!obi_view_column_exists(view, COUNT_COLUMN)))
+	if ((!strcmp((view->infos)->view_type, VIEW_TYPE_NUC_SEQS)) && (!obi_view_column_exists(view, COUNT_COLUMN))
 			&& (!obi_view_column_exists(view, "MERGED_sample")))  // TODO should eventually compute from merged samples?
 	{
 		if (obi_create_auto_count_column(view) < 0)
 		{
Author	SHA1	Message	Date
Celine Mercier	ade1107b42	switch to version 3.0.0b26	2020-06-17 18:56:07 +02:00
Celine Mercier	9c7d24406f	export: dictionaries are now formatted like in the original OBITools when exporting in tabular format and tuple formatting is cleaner	2020-06-17 18:55:46 +02:00
Celine Mercier	03bc9915f2	Cython: utils: added handling of tuples to bytes2str_object function	2020-06-17 18:54:14 +02:00
Celine Mercier	24b1dab573	Cython: Columns: added a keys() method that returns all element names	2020-06-17 18:53:41 +02:00
Celine Mercier	7593673f3f	ngsfilter: now setting 'reversed' tag to False instead of None when false	2020-06-17 18:52:35 +02:00
Celine Mercier	aa01236cae	switch to version 3.0.0b25	2020-06-13 21:48:49 +02:00
Celine Mercier	49b8810a76	C: made indexer opening/closing cleaner	2020-06-13 21:47:03 +02:00
Celine Mercier	7a39df54c0	ls: fixed an issue where big DMS couldn't be read by ls	2020-06-13 21:45:22 +02:00
Celine Mercier	09e483b0d6	switch to temporary version 3.0.0b24a	2020-06-10 17:47:56 +02:00
Celine Mercier	14a2579173	uniq: now outputs an empty view if input view is empty instead of displaying an error	2020-06-10 17:47:26 +02:00
Celine Mercier	36a8aaa92e	grep: now creating empty views instead of displaying an error when selecting on an unexisting column/tag	2020-06-10 16:57:42 +02:00
Celine Mercier	a17eb445c2	ngsfilter: made one of the tag error messages more accurate	2020-06-10 16:27:36 +02:00
Celine Mercier	e4a32788c2	Switch to version 3.0.0b24	2020-06-09 14:36:58 +02:00
Celine Mercier	2442cc80bf	Cython: View: fixed bash history display	2020-06-09 14:36:37 +02:00
Celine Mercier	aa836b2ace	uniq: improved progress bar of second browsing	2020-06-09 14:36:02 +02:00
Celine Mercier	8776ce22e6	C: fixed a bug where indexers referring to tuples of certain types were not properly closed and imported	2020-06-09 14:34:43 +02:00
Celine Mercier	4aa772c405	ecotag: Added list of taxids for all best matches (closes #80 )	2020-06-09 14:33:14 +02:00
Celine Mercier	b0b96ac37a	version 3.0.0b23a	2020-06-05 16:10:24 +02:00
Celine Mercier	687e42ad22	C: kmer alignment: fixed a bug where scores of 0 were at (0+kmer_length-1) (and now setting alignment direction to None if score is 0	2020-06-05 16:09:33 +02:00
Celine Mercier	5fbbb6d304	alignpairedend: fixed a bug when rebuilding joined (unaligned) sequences where only the forward sequence was kept	2020-06-05 16:06:43 +02:00
Celine Mercier	359a9fe237	Switch to version 3.0.0b23	2020-06-04 15:35:03 +02:00
Celine Mercier	f9b6851f75	Python: correctly flagged some mandatory options as required	2020-06-04 15:34:24 +02:00
cmercier	29a2652bbf	Fixed installation on Ubuntu without pip	2020-06-04 15:06:35 +02:00
Celine Mercier	2a2c233936	obi import: fixed a bug when skipping an entry	2020-05-29 21:19:42 +02:00
Celine Mercier	faf8ea9d86	Switch to version 3.0.0b21	2020-05-28 20:42:09 +02:00
Celine Mercier	ffe2485e94	Genbank parser: now reading ORIGIN lines with comments without triggering error	2020-05-28 20:41:34 +02:00
Celine Mercier	6094ce2bbc	obi import: skip on error more robust	2020-05-28 20:40:36 +02:00
Celine Mercier	a7dcf16c06	Minor changes for pip release	2020-05-20 15:59:04 +02:00
Celine Mercier	f13f8f6165	obi import: minor doc/display improvements	2020-05-20 11:46:29 +02:00
Celine Mercier	b5a29ac413	Switch to version 3.0.0b19	2020-05-20 10:29:36 +02:00
Celine Mercier	efd2b9d338	Cleaner installation	2020-05-20 10:29:12 +02:00
Celine Mercier	ca6e3e7aad	obi import: fixed to work with `seq` genbank extension	2020-05-20 10:28:14 +02:00
Celine Mercier	76ed8e18e5	Switch to version 3.0.0b18 with version formatting that fits setuptools	2020-05-18 17:08:55 +02:00
Celine Mercier	1d17f28aec	setup: now using setuptools instead of distutils to work with pip	2020-05-18 17:08:09 +02:00
Celine Mercier	fa834e4b8b	obi import: small bug fix	2020-05-18 17:06:58 +02:00
Celine Mercier	a72fea3cc9	Python: fasta parser: fixed a bug stopping the program when the last line contained a single nucleotide	2020-05-12 11:24:12 +02:00
Celine Mercier	e9a37d8a6e	Switch to version 3.0.0-beta16	2020-05-07 17:09:26 +02:00
Celine Mercier	ef074f8455	typo	2020-05-07 17:08:59 +02:00
Celine Mercier	aec5e69f2c	C, views: no more automatic COUNT column if MERGED_sample column exists	2020-05-07 17:08:07 +02:00
Celine Mercier	170ef3f1ba	Views: added obi prefix to commands in bash history	2020-05-07 17:07:01 +02:00
Celine Mercier	f999946582	obi uniq: fixed the remerging of already merged informations, and efficiency improvements	2020-05-07 17:05:54 +02:00
Celine Mercier	773b36ec37	obi import: fixed the import of old obitools files with premerged informations, and other minor improvements	2020-05-07 17:03:04 +02:00
Celine Mercier	69cb434a6c	version 3.0.0-beta15c	2020-04-29 14:25:33 +02:00
Celine Mercier	55d4f98d60	obi annotate: fixed annotation at ranks	2020-04-29 14:24:40 +02:00
Celine Mercier	0bec2631e8	ecotag: fixed a bug where all the full DMS path weren't properly sent to the C layer	2020-04-29 10:35:55 +02:00
Celine Mercier	e6b6c6fa84	AVLs: Made an error message more informative	2020-04-29 10:14:04 +02:00