Switch to version 3.0.0-beta16

typo
C, views: no more automatic COUNT column if MERGED_sample column exists
2020-05-07 17:09:26 +02:00 · 2020-05-07 17:08:59 +02:00 · 2020-05-07 17:08:07 +02:00 · 2020-05-07 17:07:01 +02:00 · 2020-05-07 17:05:54 +02:00 · 2020-05-07 17:03:04 +02:00
40 changed files with 422 additions and 214 deletions
--- a/obi_completion_script.bash
+++ b/obi_completion_script.bash
@ -1,4 +1,3 @@
 #/usr/bin/env bash
 _obi_comp ()
 {
--- a/python/obitools3/commands/align.pyx
+++ b/python/obitools3/commands/align.pyx
@ -266,9 +266,9 @@ def run(config):
    # If the input and the output DMS are different, delete the temporary result view in the input DMS
    if i_dms != o_dms:
        View.delete_view(i_dms, o_view_name)
-        o_dms.close()
+        o_dms.close(force=True)
-    i_dms.close()
+    i_dms.close(force=True)
    logger("info", "Done.")
--- a/python/obitools3/commands/alignpairedend.pyx
+++ b/python/obitools3/commands/alignpairedend.pyx
@ -247,10 +247,10 @@ def run(config):
    #print("\n\nOutput view:\n````````````", file=sys.stderr)
    #print(repr(view), file=sys.stderr)
-    input[0].close()
+    input[0].close(force=True)
    if two_views:
-        rinput[0].close()
+        rinput[0].close(force=True)
-    output[0].close()
+    output[0].close(force=True)
    logger("info", "Done.")
--- a/python/obitools3/commands/annotate.pyx
+++ b/python/obitools3/commands/annotate.pyx
@ -13,7 +13,8 @@ from obitools3.dms.capi.obiview cimport NUC_SEQUENCE_COLUMN, \
                                        ID_COLUMN, \
                                        DEFINITION_COLUMN, \
                                        QUALITY_COLUMN, \
-                                        COUNT_COLUMN
+                                        COUNT_COLUMN, \
                                        TAXID_COLUMN
 import time
 import math 
@ -175,8 +176,8 @@ def sequenceTaggerGenerator(config, taxo=None):
            counter[0]+=1
        for rank in annoteRank:
-            if 'taxid' in seq:
+            if TAXID_COLUMN in seq:
-                taxid = seq['taxid']
+                taxid = seq[TAXID_COLUMN]
                if taxid is not None:
                    rtaxid = taxo.get_taxon_at_rank(taxid, rank)
                    if rtaxid is not None:
@ -190,58 +191,50 @@ def sequenceTaggerGenerator(config, taxo=None):
            seq['seq_rank']=counter[0]
        for i,v in toSet:
-            #try:
+            try:
                if taxo is not None:
                    environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
                else:
                    environ = {'sequence':seq, 'counter':counter[0], 'math':math}
                val = eval(v, environ, seq)
-            #except Exception,e:       # TODO discuss usefulness of this
+            except Exception:       # set string if not a valid expression
-            #    if options.onlyValid:
+                val = v
            #        raise e
            #    val = v
            seq[i]=val
        if length:
            seq['seq_length']=len(seq)
        if newId is not None:
-#            try:
+            try:
                if taxo is not None:
                    environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
                else:
                    environ = {'sequence':seq, 'counter':counter[0], 'math':math}     
                val = eval(newId, environ, seq)
-#            except Exception,e:
+            except Exception:      # set string if not a valid expression
-#                if options.onlyValid:
+                val = newId
 #                    raise e
 #                val = newId
            seq.id=val
        if newDef is not None:
-#            try:
+            try:
                if taxo is not None:
                    environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
                else:
                    environ = {'sequence':seq, 'counter':counter[0], 'math':math}     
                val = eval(newDef, environ, seq)
-#            except Exception,e:
+            except Exception:    # set string if not a valid expression
-#                if options.onlyValid:
+                val = newDef
 #                    raise e
 #                val = newDef
            seq.definition=val
-#             
+             
        if newSeq is not None:
-#            try:
+            try:
                if taxo is not None:
                    environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
                else:
                    environ = {'sequence':seq, 'counter':counter[0], 'math':math}     
                val = eval(newSeq, environ, seq)
-#            except Exception,e:
+            except Exception:    # set string if not a valid expression
-#                if options.onlyValid:
+                val = newSeq
 #                    raise e
 #                val = newSeq
            seq.seq=val
            if 'seq_length' in seq:
                seq['seq_length']=len(seq)
@ -251,15 +244,14 @@ def sequenceTaggerGenerator(config, taxo=None):
                seq.view.delete_column(QUALITY_COLUMN)
        if run is not None:
-#            try:
+            try:
                if taxo is not None:
                    environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
                else:
                    environ = {'sequence':seq, 'counter':counter[0], 'math':math}     
                eval(run, environ, seq)
-#            except Exception,e:
+            except Exception,e:
-#                if options.onlyValid:
+                raise e
 #                    raise e
    return sequenceTagger
@ -379,7 +371,7 @@ def run(config):
    # If the input and the output DMS are different, delete the temporary imported view used to create the final view
    if i_dms != o_dms:
        View.delete_view(o_dms, imported_view_name)
-        o_dms.close()
+        o_dms.close(force=True)
-    i_dms.close()
+    i_dms.close(force=True)
    logger("info", "Done.")
--- a/python/obitools3/commands/build_ref_db.pyx
+++ b/python/obitools3/commands/build_ref_db.pyx
@ -97,9 +97,9 @@ def run(config):
    # If the input and the output DMS are different, delete the temporary result view in the input DMS
    if i_dms != o_dms:
        View.delete_view(i_dms, o_view_name)
-        o_dms.close()
+        o_dms.close(force=True)
-    i_dms.close()
+    i_dms.close(force=True)
    logger("info", "Done.")
--- a/python/obitools3/commands/cat.pyx
+++ b/python/obitools3/commands/cat.pyx
@ -133,7 +133,7 @@ def run(config):
    #print(repr(view), file=sys.stderr)
    for d in idms_list:
-        d.close()
+        d.close(force=True)
-    o_dms.close()
+    o_dms.close(force=True)
    logger("info", "Done.")
--- a/python/obitools3/commands/clean.pyx
+++ b/python/obitools3/commands/clean.pyx
@ -124,8 +124,8 @@ def run(config):
    # If the input and the output DMS are different, delete the temporary result view in the input DMS
    if i_dms != o_dms:
        View.delete_view(i_dms, o_view_name)
-        o_dms.close()
+        o_dms.close(force=True)
-    i_dms.close()
+    i_dms.close(force=True)
    logger("info", "Done.")
--- a/python/obitools3/commands/count.pyx
+++ b/python/obitools3/commands/count.pyx
@ -56,3 +56,5 @@ def run(config):
        print(count2)
    else:
        print(count1)
    input[0].close(force=True)
--- a/python/obitools3/commands/ecopcr.pyx
+++ b/python/obitools3/commands/ecopcr.pyx
@ -35,13 +35,13 @@ def addOptions(parser):
                       action="store", dest="ecopcr:primer1",
                       metavar='<PRIMER>',
                       type=str,
-                       help="Forward primer.")
+                       help="Forward primer, length must be less than or equal to 32")
    group.add_argument('--primer2', '-R',
                       action="store", dest="ecopcr:primer2",
                       metavar='<PRIMER>',
                       type=str,
-                       help="Reverse primer.")
+                       help="Reverse primer, length must be less than or equal to 32")
    group.add_argument('--error', '-e',
                       action="store", dest="ecopcr:error",
@ -203,6 +203,7 @@ def run(config):
    #print("\n\nOutput view:\n````````````", file=sys.stderr)
    #print(repr(o_dms[o_view_name]), file=sys.stderr)
-    o_dms.close()
+    i_dms.close(force=True)
    o_dms.close(force=True)
    logger("info", "Done.")
--- a/python/obitools3/commands/ecotag.pyx
+++ b/python/obitools3/commands/ecotag.pyx
@ -64,9 +64,9 @@ def run(config):
    ref_view_name = ref[1]
    # Check that the threshold demanded is greater than or equal to the threshold used to build the reference database
-    if config['ecotag']['threshold'] < eval(i_dms[ref_view_name].comments["ref_db_threshold"]) :
+    if config['ecotag']['threshold'] < eval(ref_dms[ref_view_name].comments["ref_db_threshold"]) :
        print("Error: The threshold demanded (%f) is lower than the threshold used to build the reference database (%f).", 
-              config['ecotag']['threshold'], i_dms[ref_view_name].comments["ref_db_threshold"])
+              config['ecotag']['threshold'], ref_dms[ref_view_name].comments["ref_db_threshold"])
    # Open the output: only the DMS
    output = open_uri(config['obi']['outputURI'],
@ -107,8 +107,8 @@ def run(config):
    comments = View.print_config(config, "ecotag", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)
    if obi_ecotag(i_dms.name_with_full_path, tobytes(i_view_name), \
-                  tobytes(ref_dms_name), tobytes(ref_view_name), \
+                  ref_dms.name_with_full_path, tobytes(ref_view_name), \
-                  tobytes(taxo_dms_name), tobytes(taxonomy_name), \
+                  taxo_dms.name_with_full_path, tobytes(taxonomy_name), \
                  tobytes(o_view_name), comments, 
                  config['ecotag']['threshold']) < 0:
        raise Exception("Error running ecotag")
@ -126,9 +126,11 @@ def run(config):
    # If the input and the output DMS are different, delete the temporary result view in the input DMS
    if i_dms != o_dms:
        View.delete_view(i_dms, o_view_name)
-        o_dms.close()
+        o_dms.close(force=True)
-    i_dms.close()
+    taxo_dms.close(force=True)
    ref_dms.close(force=True)
    i_dms.close(force=True)
    logger("info", "Done.")
--- a/python/obitools3/commands/export.pyx
+++ b/python/obitools3/commands/export.pyx
@ -60,11 +60,21 @@ def run(config):
    if (output[2] == Nuc_Seq) and (iview.type != b"NUC_SEQS_VIEW") :  # Nuc_Seq_Stored? TODO
        raise Exception("Error: the view to export in fasta or fastq format is not a NUC_SEQS view")
    if config['obi']['only'] is not None:
        withoutskip = min(input[4], config['obi']['only'])
    else:
        withoutskip = input[4]
    if config['obi']['skip'] is not None:
        skip = min(input[4], config['obi']['skip'])
    else:
        skip = 0
    # Initialize the progress bar
    if config['obi']['noprogressbar']:
        pb = None
    else:
-        pb = ProgressBar(len(iview), config, seconde=5)
+        pb = ProgressBar(withoutskip - skip, config, seconde=5)
    i=0
    for seq in iview :
@ -86,7 +96,7 @@ def run(config):
    if not BrokenPipeError and not IOError:
        output_object.close()
    iview.close()
-    input[0].close()
+    input[0].close(force=True)
    logger("info", "Done.")
--- a/python/obitools3/commands/grep.pyx
+++ b/python/obitools3/commands/grep.pyx
@ -370,7 +370,7 @@ def run(config):
    # If the input and the output DMS are different, delete the temporary imported view used to create the final view
    if i_dms != o_dms:
        View.delete_view(i_dms, o_view_name)
-        o_dms.close()
+        o_dms.close(force=True)
-    i_dms.close()
+    i_dms.close(force=True)
    logger("info", "Done.")
--- a/python/obitools3/commands/head.pyx
+++ b/python/obitools3/commands/head.pyx
@ -103,7 +103,7 @@ def run(config):
    # If the input and the output DMS are different, delete the temporary imported view used to create the final view
    if i_dms != o_dms:
        View.delete_view(i_dms, o_view_name)
-        o_dms.close()
+        o_dms.close(force=True)
-    i_dms.close()
+    i_dms.close(force=True)
    logger("info", "Done.")
--- a/python/obitools3/commands/history.pyx
+++ b/python/obitools3/commands/history.pyx
@ -55,3 +55,4 @@ def run(config):
        else:
            raise Exception("ASCII history only available for views")
    input[0].close(force=True)
--- a/python/obitools3/commands/import.pyx
+++ b/python/obitools3/commands/import.pyx
@ -25,7 +25,8 @@ from obitools3.dms.capi.obiview cimport VIEW_TYPE_NUC_SEQS, \
                                        DEFINITION_COLUMN, \
                                        QUALITY_COLUMN, \
                                        COUNT_COLUMN, \
-                                        TAXID_COLUMN
+                                        TAXID_COLUMN, \
                                        MERGED_PREFIX
 from obitools3.dms.capi.obidms cimport obi_import_view
@ -163,7 +164,7 @@ def run(config):
        taxo.write(taxo_name)
        taxo.close()
        o_dms.record_command_line(" ".join(sys.argv[1:]))
-        o_dms.close()
+        o_dms.close(force=True)
        logger("info", "Done.")
        return
@ -217,11 +218,14 @@ def run(config):
                logger("info", "Read %d entries", i)
            for tag in entry :
                newtag = tag
                if tag[:7] == b"merged_":
                    newtag = MERGED_PREFIX+tag[7:]
                if type(entry[tag]) == dict :
                    if tag in dict_dict:
-                        dict_dict[tag][0].update(entry[tag].keys())
+                        dict_dict[newtag][0].update(entry[tag].keys())
                    else:
-                        dict_dict[tag] = [set(entry[tag].keys()), get_obitype(entry[tag])]
+                        dict_dict[newtag] = [set(entry[tag].keys()), get_obitype(entry[tag])]
            i+=1
        if pb is not None:
@ -288,6 +292,8 @@ def run(config):
                    tag = TAXID_COLUMN
                if tag == b"count":
                    tag = COUNT_COLUMN
                if tag[:7] == b"merged_":
                    tag = MERGED_PREFIX+tag[7:]
                if tag not in dcols :
@ -328,8 +334,8 @@ def run(config):
                    try:
                        # Check that it's not the case where the first entry contained a dict of length 1 and now there is a new key                        
                        if type(value) == dict and \
-                            dcols[tag][0].nb_elements_per_line == 1 and len(value.keys()) == 1 \
+                            dcols[tag][0].nb_elements_per_line == 1 \
-                            and dcols[tag][0].elements_names[0] != list(value.keys())[0] :
+                            and set(dcols[tag][0].elements_names) != set(value.keys()) :
                            raise IndexError  # trigger column rewrite
                        # Fill value
@ -402,7 +408,7 @@ def run(config):
    except AttributeError:
        pass
    try:
-        output[0].close()
+        output[0].close(force=True)
    except AttributeError:
        pass
--- a/python/obitools3/commands/less.pyx
+++ b/python/obitools3/commands/less.pyx
@ -46,5 +46,5 @@ def run(config):
    process.wait()
    iview.close()
-    input[0].close()
+    input[0].close(force=True)
--- a/python/obitools3/commands/ls.pyx
+++ b/python/obitools3/commands/ls.pyx
@ -36,6 +36,7 @@ def run(config):
        l = []
        for view in input[0]:
            l.append(tostr(view) + "\t(Date created: " + str(bytes2str_object(dms[view].comments["Date created"]))+")")
            dms[view].close()
        l.sort()
        for v in l:
            print(v)
@ -52,3 +53,4 @@ def run(config):
        print("\n### Comments:")
        print(str(input[1].comments))
    input[0].close(force=True)
--- a/python/obitools3/commands/ngsfilter.pyx
+++ b/python/obitools3/commands/ngsfilter.pyx
@ -42,7 +42,8 @@ def addOptions(parser):
                     metavar="<URI>",
                     type=str,
                     default=None,
-                     help="URI to the view containing the samples definition (with tags, primers, sample names,...)")
+                     help="URI to the view containing the samples definition (with tags, primers, sample names,...).\n"
                          "\nWarning: primer lengths must be less than or equal to 32")
    group.add_argument('-R', '--reverse-reads',
                     action="store", dest="ngsfilter:reverse",
@ -172,6 +173,13 @@ cdef read_info_view(info_view, max_errors=2, verbose=False, not_aligned=False):
    primer_list = []
    i=0
    for p in info_view:
        # Check primer length: should not be longer than 32, the max allowed by the apat lib
        if len(p[b'forward_primer']) > 32:
            raise RollbackException("Error: primers can not be longer than 32bp, rollbacking views")
        if len(p[b'reverse_primer']) > 32:
            raise RollbackException("Error: primers can not be longer than 32bp, rollbacking views")
        forward=Primer(p[b'forward_primer'],
                       len(p[b'forward_tag']) if (b'forward_tag' in p and p[b'forward_tag']!=None) else None,
                       True,
@ -594,7 +602,13 @@ def run(config):
    pb = ProgressBar(entries_len, config, seconde=5)
    # Check and store primers and tags
    try:
        infos, primer_list = read_info_view(info_view, max_errors=config['ngsfilter']['error'], verbose=False, not_aligned=not_aligned)   # TODO obi verbose option
    except RollbackException, e:
        if unidentified is not None:
            raise RollbackException("obi ngsfilter error, rollbacking views: "+str(e), o_view, unidentified)
        else:
            raise RollbackException("obi ngsfilter error, rollbacking view: "+str(e), o_view)
    aligner = Primer_search(primer_list, config['ngsfilter']['error'])
@ -652,11 +666,11 @@ def run(config):
    #print("\n\nOutput view:\n````````````", file=sys.stderr)
    #print(repr(o_view), file=sys.stderr)
-    input[0].close()
+    input[0].close(force=True)
-    output[0].close()
+    output[0].close(force=True)
-    info_input[0].close()
+    info_input[0].close(force=True)
    if unidentified is not None:
-        unidentified_input[0].close()
+        unidentified_input[0].close(force=True)
    aligner.free()
    logger("info", "Done.")
--- a/python/obitools3/commands/sort.pyx
+++ b/python/obitools3/commands/sort.pyx
@ -141,7 +141,7 @@ def run(config):
    # If the input and the output DMS are different, delete the temporary imported view used to create the final view
    if i_dms != o_dms:
        View.delete_view(i_dms, o_view_name)
-        o_dms.close()
+        o_dms.close(force=True)
-    i_dms.close()
+    i_dms.close(force=True)
    logger("info", "Done.")
--- a/python/obitools3/commands/stats.pyx
+++ b/python/obitools3/commands/stats.pyx
@ -251,7 +251,7 @@ def run(config):
    for i in range(len(sorted_stats)):
        c = sorted_stats[i][0]
        for v in c:
-            if v is not None:
+            if type(v) == bytes:
                print(pcat % tostr(v)+"\t", end="")
            else:
                print(pcat % str(v)+"\t", end="")
@ -268,6 +268,6 @@ def run(config):
        print("%7d" %catcount[c], end="")
        print("%9d" %totcount[c])
-    input[0].close()
+    input[0].close(force=True)
    logger("info", "Done.")
--- a/python/obitools3/commands/tail.pyx
+++ b/python/obitools3/commands/tail.pyx
@ -106,7 +106,7 @@ def run(config):
    # If the input and the output DMS are different, delete the temporary imported view used to create the final view
    if i_dms != o_dms:
        View.delete_view(i_dms, o_view_name)
-        o_dms.close()
+        o_dms.close(force=True)
-    i_dms.close()
+    i_dms.close(force=True)
    logger("info", "Done.")
--- a/python/obitools3/commands/test.pyx
+++ b/python/obitools3/commands/test.pyx
@ -529,7 +529,7 @@ def run(config):
        test_taxo(config, infos)
    infos['view'].close()
-    infos['dms'].close()
+    infos['dms'].close(force=True)
    shutil.rmtree(config['obi']['defaultdms']+'.obidms', ignore_errors=True)
    print("Done.")
--- a/python/obitools3/commands/uniq.pxd
+++ b/python/obitools3/commands/uniq.pxd
@ -5,5 +5,5 @@ from obitools3.dms.taxo.taxo cimport Taxonomy
 from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS
-cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy)
+cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy, dict config)
-cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, list mergedKeys_list=*, Taxonomy taxonomy=*, bint mergeIds=*, list categories=*, int max_elts=*)
+cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, dict config, list mergedKeys_list=*, Taxonomy taxonomy=*, bint mergeIds=*, list categories=*, int max_elts=*)
--- a/python/obitools3/commands/uniq.pyx
+++ b/python/obitools3/commands/uniq.pyx
@ -56,7 +56,7 @@ def addOptions(parser):
                             "(option can be used several times).")
-cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
+cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy, dict config) :
    cdef int             taxid
    cdef Nuc_Seq_Stored  seq
@ -69,7 +69,7 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
    cdef object          gn_sn
    cdef object          fa_sn
-    # Create columns
+    # Create columns and save them for efficiency
    if b"species" in o_view and o_view[b"species"].data_type_int != OBI_INT :
        o_view.delete_column(b"species")
    if b"species" not in o_view:
@ -77,6 +77,7 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
                          b"species", 
                          OBI_INT
                         )
    species_column = o_view[b"species"]
    if b"genus" in o_view and o_view[b"genus"].data_type_int != OBI_INT :
        o_view.delete_column(b"genus")
@ -85,6 +86,7 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
                          b"genus", 
                          OBI_INT
                         )
    genus_column = o_view[b"genus"]
    if b"family" in o_view and o_view[b"family"].data_type_int != OBI_INT :
        o_view.delete_column(b"family")
@ -93,6 +95,7 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
                          b"family", 
                          OBI_INT
                         )
    family_column = o_view[b"family"]
    if b"species_name" in o_view and o_view[b"species_name"].data_type_int != OBI_STR :
        o_view.delete_column(b"species_name")
@ -101,6 +104,7 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
                          b"species_name", 
                          OBI_STR
                         )
    species_name_column = o_view[b"species_name"]
    if b"genus_name" in o_view and o_view[b"genus_name"].data_type_int != OBI_STR :
        o_view.delete_column(b"genus_name")
@ -109,6 +113,7 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
                          b"genus_name", 
                          OBI_STR
                         )
    genus_name_column = o_view[b"genus_name"]
    if b"family_name" in o_view and o_view[b"family_name"].data_type_int != OBI_STR :
        o_view.delete_column(b"family_name")
@ -117,6 +122,7 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
                          b"family_name", 
                          OBI_STR
                         )
    family_name_column = o_view[b"family_name"]
    if b"rank" in o_view and o_view[b"rank"].data_type_int != OBI_STR :
        o_view.delete_column(b"rank")
@ -125,6 +131,7 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
                          b"rank", 
                          OBI_STR
                         )
    rank_column = o_view[b"rank"]
    if b"scientific_name" in o_view and o_view[b"scientific_name"].data_type_int != OBI_STR :
        o_view.delete_column(b"scientific_name")
@ -133,9 +140,15 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
                          b"scientific_name", 
                          OBI_STR
                         )
    scientific_name_column = o_view[b"scientific_name"]  
    # Initialize the progress bar
    pb = ProgressBar(len(o_view), config, seconde=5)
    i=0
    for seq in o_view:
        PyErr_CheckSignals()
        pb(i)
        if MERGED_TAXID_COLUMN in seq :
            m_taxids = []            
            m_taxids_dict = seq[MERGED_TAXID_COLUMN]
@ -166,19 +179,22 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
                fa_sn = None
                tfa = None
-            seq[b"species"] = tsp
+            species_column[i] = tsp
-            seq[b"genus"] = tgn
+            genus_column[i] = tgn
-            seq[b"family"] = tfa
+            family_column[i] = tfa
-            seq[b"species_name"] = sp_sn
+            species_name_column[i] = sp_sn
-            seq[b"genus_name"] = gn_sn
+            genus_name_column[i] = gn_sn
-            seq[b"family_name"] = fa_sn
+            family_name_column[i] = fa_sn
-            seq[b"rank"] = taxonomy.get_rank(taxid)
+            rank_column[i] = taxonomy.get_rank(taxid)
-            seq[b"scientific_name"] = taxonomy.get_scientific_name(taxid)
+            scientific_name_column[i] = taxonomy.get_scientific_name(taxid)
        i+=1
    pb(len(o_view), force=True)
-cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, list mergedKeys_list=None, Taxonomy taxonomy=None, bint mergeIds=False, list categories=None, int max_elts=1000000) :
+cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, dict config, list mergedKeys_list=None, Taxonomy taxonomy=None, bint mergeIds=False, list categories=None, int max_elts=1000000) :
    cdef int            i
    cdef int            k
@ -187,6 +203,7 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
    cdef int            u_idx
    cdef int            i_idx
    cdef int            i_count
    cdef int            o_count
    cdef str            key_str
    cdef bytes          key
    cdef bytes          mkey
@ -209,7 +226,6 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
    cdef Nuc_Seq_Stored i_seq
    cdef Nuc_Seq_Stored o_seq
    cdef Nuc_Seq_Stored u_seq
    cdef Column         i_col
    cdef Column         i_seq_col
    cdef Column         i_id_col
    cdef Column         i_taxid_col
@ -217,6 +233,8 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
    cdef Column         o_id_col
    cdef Column         o_taxid_dist_col
    cdef Column         o_merged_col
    cdef Column         o_count_col
    cdef Column         i_count_col
    cdef Column_line    i_mcol  
    cdef object         taxid_dist_dict
    cdef object         iter_view
@ -253,6 +271,11 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
    for k in range(k_count):
        mergedKeys_m.append(MERGED_PREFIX + mergedKeys[k])
    # Check that not trying to remerge without total count information
    for key in mergedKeys_m:
        if key in view and COUNT_COLUMN not in view:
            raise Exception("\n>>>>\nError: trying to re-merge tags without total count tag. Run obi annotate to add the count tag from the relevant merged tag, i.e.: \nobi annotate --set-tag COUNT:'sum([value for key,value in sequence['MERGED_sample'].items()])' dms/input dms/output\n")
    if categories is None:
        categories = []
@ -320,6 +343,10 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
    for k in range(k_count):
        key = mergedKeys[k]
        merged_col_name = mergedKeys_m[k]
        if merged_col_name in view:
            i_col = view[merged_col_name]
        else:
            i_col = view[key]
        if merged_infos[merged_col_name]['nb_elts'] > max_elts:
@ -374,12 +401,19 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
                          alias=MERGED_COLUMN
                         )
-    # Keep columns that are going to be used a lot in variables 
+    # Keep columns in variables for efficiency
    o_id_col = o_view[ID_COLUMN]
    if TAXID_DIST_COLUMN in o_view:
        o_taxid_dist_col = o_view[TAXID_DIST_COLUMN]
    if MERGED_COLUMN in o_view:
        o_merged_col = o_view[MERGED_COLUMN]
    if COUNT_COLUMN not in o_view:
        Column.new_column(o_view,
                          COUNT_COLUMN,
                          OBI_INT)
    o_count_col = o_view[COUNT_COLUMN]
    if COUNT_COLUMN in view:
        i_count_col = view[COUNT_COLUMN]
    pb(len(view), force=True)
    print("")
@ -407,7 +441,7 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
            merged_list = list(set(merged_list)) # deduplicate the list
            o_merged_col[o_idx] = merged_list
-        o_seq[COUNT_COLUMN] = 0
+        o_count = 0
        if TAXID_DIST_COLUMN in u_seq and i_taxid_dist_col[u_idx] is not None:
            taxid_dist_dict = i_taxid_dist_col[u_idx]
@ -423,12 +457,12 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
            i_id = i_id_col[i_idx]
            i_seq = view[i_idx]
-            if COUNT_COLUMN not in i_seq or i_seq[COUNT_COLUMN] is None:
+            if COUNT_COLUMN not in i_seq or i_count_col[i_idx] is None:
                i_count = 1
            else:
-                i_count = i_seq[COUNT_COLUMN]
+                i_count = i_count_col[i_idx]
-            o_seq[COUNT_COLUMN] += i_count
+            o_count += i_count
            for k in range(k_count):
@ -464,12 +498,12 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
                            else:
                                mcol[key2] = mcol[key2] + i_mcol[key2]
-            # Write taxid_dist
+            for key in i_seq.keys():
-            if mergeIds and TAXID_COLUMN in mergedKeys:
+                # Delete informations that differ between the merged sequences
-                if TAXID_DIST_COLUMN in str_merged_cols:
+                # TODO make special columns list? // could be more efficient
-                    o_taxid_dist_col[o_idx] = str(taxid_dist_dict)
+                if key != COUNT_COLUMN and key != ID_COLUMN and key != NUC_SEQUENCE_COLUMN and key in o_seq and o_seq[key] != i_seq[key] \
-                else:
+                    and key not in merged_dict :
-                    o_taxid_dist_col[o_idx] = taxid_dist_dict
+                    o_seq[key] = None
        # Write merged dicts
        for mkey in merged_dict: 
@ -482,25 +516,33 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
                #    if mkey_cols[mkey][o_idx][key] is None:
                #        mkey_cols[mkey][o_idx][key] = 0
-            for key in i_seq.keys():
+        # Write taxid_dist
-                # Delete informations that differ between the merged sequences
+        if mergeIds and TAXID_COLUMN in mergedKeys:
-                # TODO make special columns list?
+            if TAXID_DIST_COLUMN in str_merged_cols:
-                if key != COUNT_COLUMN and key != ID_COLUMN and key != NUC_SEQUENCE_COLUMN and key in o_seq and o_seq[key] != i_seq[key] \
+                o_taxid_dist_col[o_idx] = str(taxid_dist_dict)
-                    and key not in merged_dict :
+            else:
-                    o_seq[key] = None
+                o_taxid_dist_col[o_idx] = taxid_dist_dict
        o_count_col[o_idx] = o_count
        o_idx += 1
    pb(len(uniques), force=True)
    # Deletes quality columns if there is one because the matching between sequence and quality will be broken (quality set to NA when sequence not)
    if QUALITY_COLUMN in view:
        o_view.delete_column(QUALITY_COLUMN)
    if REVERSE_QUALITY_COLUMN in view:
        o_view.delete_column(REVERSE_QUALITY_COLUMN)
    # Delete old columns that are now merged
    for k in range(k_count):
        if mergedKeys[k] in o_view:
            o_view.delete_column(mergedKeys[k])
    if taxonomy is not None:
        print("")  # TODO because in the middle of progress bar. Better solution?
        logger("info", "Merging taxonomy classification")
-        merge_taxonomy_classification(o_view, taxonomy)
+        merge_taxonomy_classification(o_view, taxonomy, config)
@ -547,11 +589,10 @@ def run(config):
    pb = ProgressBar(len(entries), config, seconde=5)
    try:
-        uniq_sequences(entries, o_view, pb, mergedKeys_list=config['uniq']['merge'], taxonomy=taxo, mergeIds=config['uniq']['mergeids'], categories=config['uniq']['categories'], max_elts=config['obi']['maxelts'])       
+        uniq_sequences(entries, o_view, pb, config, mergedKeys_list=config['uniq']['merge'], taxonomy=taxo, mergeIds=config['uniq']['mergeids'], categories=config['uniq']['categories'], max_elts=config['obi']['maxelts'])       
    except Exception, e:
        raise RollbackException("obi uniq error, rollbacking view: "+str(e), o_view)
    pb(len(entries), force=True)
    print("", file=sys.stderr)
    # Save command config in View and DMS comments
@ -567,8 +608,8 @@ def run(config):
    #print("\n\nOutput view:\n````````````", file=sys.stderr)
    #print(repr(o_view), file=sys.stderr)
-    input[0].close()
+    input[0].close(force=True)
-    output[0].close()
+    output[0].close(force=True)
    logger("info", "Done.")
--- a/python/obitools3/dms/dms.pyx
+++ b/python/obitools3/dms/dms.pyx
@ -94,16 +94,16 @@ cdef class DMS(OBIWrapper):
        return dms
-    def close(self) :
+    def close(self, force=False) :
        '''
-        Closes the DMS instance and free the associated memory
+        Closes the DMS instance and free the associated memory (no counter, closing is final)
        The `close` method is automatically called by the object destructor.
        '''
        cdef OBIDMS_p pointer = self.pointer()
        if self.active() :
            OBIWrapper.close(self)
-            if (obi_close_dms(pointer, False)) < 0 :
+            if (obi_close_dms(pointer, force=force)) < 0 :
                raise Exception("Problem closing an OBIDMS")
@ -254,7 +254,8 @@ cdef class DMS(OBIWrapper):
    # bash command history property getter
    @property
    def bash_history(self):
-        s = b"#!/bin/bash\n\n"
+        #s = b"#!${bash}/bin/bash\n\n"
        s = b""
        first = True
        for command in self.command_line_history:
            s+=b"#"
--- a/python/obitools3/dms/view/view.pyx
+++ b/python/obitools3/dms/view/view.pyx
@ -526,13 +526,13 @@ cdef class View(OBIWrapper) :
    # bash command history property getter
    @property
    def bash_history(self):
-        s = b"#!/bin/bash\n\n"
+        s = b""
        first = True
        for level in self.view_history:
            command_list = [level[input][b"command_line"] for input in level.keys()]
            for command in command_list:
                s+=b"obi "
                s+=command
                s+=b"\n"
        return s
--- a/python/obitools3/parsers/embl.pyx
+++ b/python/obitools3/parsers/embl.pyx
@ -156,6 +156,9 @@ def emblIterator_file(lineiterator,
        yield seq
        read+=1
    # Last sequence
    seq = emblParser(entry)
    yield seq
    free(entry)
--- a/python/obitools3/parsers/genbank.pyx
+++ b/python/obitools3/parsers/genbank.pyx
@ -153,6 +153,9 @@ def genbankIterator_file(lineiterator,
        yield seq
        read+=1
    # Last sequence
    seq = genbankParser(entry)
    yield seq
    free(entry)
--- a/python/obitools3/utils.pyx
+++ b/python/obitools3/utils.pyx
@ -72,7 +72,7 @@ cpdef int count_entries(file, bytes format):
                return -1
            mmapped_file = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
            total_count += len(re.findall(sep, mmapped_file))
-            if format != b"ngsfilter" and format != b"tabular":
+            if format != b"ngsfilter" and format != b"tabular" and format != b"embl" and format != b"genbank":
                total_count += 1 # adding +1 for 1st entry because separators include \n (ngsfilter and tabular already count one more because of last \n)
    except:
--- a/python/obitools3/version.py
+++ b/python/obitools3/version.py
@ -1,5 +1,5 @@
 major = 3
 minor = 0
-serial= '0-beta11'
+serial= '0-beta16'
 version ="%d.%02d.%s" % (major,minor,serial)
--- a/src/build_reference_db.c
+++ b/src/build_reference_db.c
@ -157,7 +157,7 @@ int build_reference_db(const char* dms_name,
 	ecotx_t* lca_2 = NULL;
 	ecotx_t* lca = NULL;
 	index_t idx1, idx2;
-	index_t i, j, k;
+	index_t i, j, k, count;
 	int32_t taxid_array_length;
 	int32_t score_array_length;
 	int32_t taxid_array_writable_length;
@ -185,6 +185,7 @@ int build_reference_db(const char* dms_name,
 	matrix_view_name = strcpy(matrix_view_name, o_view_name);
 	strcat(matrix_view_name, "_matrix");
 	fprintf(stderr, "Aligning queries with reference database...\n");
 	if (obi_lcs_align_one_column(dms_name,
 								 refs_view_name,
 								 "",
@ -320,13 +321,19 @@ int build_reference_db(const char* dms_name,
 		return -1;
 	}
 	count = (matrix_with_lca_view->infos)->line_count;
 	fprintf(stderr, "Computing LCAs...\n");
 	// Compute all the LCAs
 		// For each pair
-	for (i=0; i<(matrix_with_lca_view->infos)->line_count; i++)
+	for (i=0; i<count; i++)
 	{
 		if (! keep_running)
 			return -1;
 		if (i%1000 == 0)
 			fprintf(stderr,"\rDone : %f %%       ", (i / (float) count)*100);
 		// Read all taxids associated with the first sequence and compute their LCA
 		// Read line index
 		idx1 = obi_get_int_with_elt_idx_and_col_p_in_view(matrix_with_lca_view, matrix_idx1_column, i, 0);
@ -363,6 +370,7 @@ int build_reference_db(const char* dms_name,
 			return -1;
 		}
 	}
 	fprintf(stderr,"\rDone : 100 %%           \n");
 	// Clone refs view, add 2 arrays columns for lca and score, compute and write them
@ -442,13 +450,18 @@ int build_reference_db(const char* dms_name,
 		return -1;
 	}
 	fprintf(stderr, "Building LCA arrays...\n");
 	// For each sequence, look for all its alignments in the matrix, and for each different LCA taxid/score, order them and write them
 	// Going through matrix once, filling refs arrays on the go for efficiency
-	for (i=0; i<(matrix_with_lca_view->infos)->line_count; i++)
+	for (i=0; i<count; i++)
 	{
 		if (! keep_running)
 			return -1;
 		if (i%1000 == 0)
 			fprintf(stderr,"\rDone : %f %%       ", (i / (float) count)*100);
 		// Read ref line indexes
 		idx1 = obi_get_int_with_elt_idx_and_col_p_in_view(matrix_with_lca_view, matrix_idx1_column, i, 0);
 		idx2 = obi_get_int_with_elt_idx_and_col_p_in_view(matrix_with_lca_view, matrix_idx2_column, i, 0);
@ -464,6 +477,8 @@ int build_reference_db(const char* dms_name,
 		// Read alignment score
 		score = obi_get_float_with_elt_idx_and_col_p_in_view(matrix_with_lca_view, matrix_score_column, i, 0);
 		//fprintf(stderr, "\n\ntaxid_lca=%d, score=%f, idx1=%d, idx2=%d", taxid_lca, score, idx1, idx2);
 		///////////////// Compute for first sequence \\\\\\\\\\\\\\\\\\\\\\\     (TODO function)
 		// Read arrays
@ -480,9 +495,11 @@ int build_reference_db(const char* dms_name,
 //			return -1;
 //		}
 		//fprintf(stderr, "\n1st sequence");
 		// If empty, add values
 		if (taxid_array_length == 0)
 		{
 			//fprintf(stderr, "\nEmpty, add value");
 			if (obi_set_array_with_col_p_in_view(o_view, final_lca_taxid_a_column, idx1, &taxid_lca, (uint8_t) (obi_sizeof(OBI_INT) * 8), 1) < 0)
 			{
 				obidebug(1, "\nError setting a LCA taxid array in a column when building a reference database");
@ -496,6 +513,8 @@ int build_reference_db(const char* dms_name,
 		}
 		else
 		{
 			//fprintf(stderr, "\nNot empty");
 			j = 0;
 			modified = false;
 			while (j < taxid_array_length)
@ -509,6 +528,9 @@ int build_reference_db(const char* dms_name,
 						memcpy(score_array_writable, score_array, score_array_length*sizeof(obifloat_t));
 						modified = true;
 						//fprintf(stderr, "\nSame LCA, replace %d and %f with %d and %f", lca_taxid_array_writable[j],
 						//		score_array_writable[j], taxid_lca, score);
 						// Better score for the same LCA, replace this LCA/score pair
 						lca_taxid_array_writable[j] = taxid_lca;
 						score_array_writable[j] = score;
@ -535,6 +557,8 @@ int build_reference_db(const char* dms_name,
 				{
 					if (score > score_array[j])
 					{
 						//fprintf(stderr, "\nInsert new");
 						memcpy(lca_taxid_array_writable, lca_taxid_array, taxid_array_length*sizeof(obiint_t));
 						memcpy(score_array_writable, score_array, score_array_length*sizeof(obifloat_t));
 						modified = true;
@ -579,10 +603,15 @@ int build_reference_db(const char* dms_name,
 				memcpy(score_array_writable, score_array, score_array_length*sizeof(obifloat_t));
 				modified = true;
 				//fprintf(stderr, "\nAppend at the end");
 				// Append LCA
 				lca_taxid_array_writable[taxid_array_writable_length] = taxid_lca;
 				score_array_writable[score_array_writable_length] = score;
 				taxid_array_writable_length++;
 				score_array_writable_length++;
 				// Remove the previous (children) LCAs from the array if their score is equal or lower
 				while ((j>0) && (score_array_writable[j-1] <= score))
 				{
@ -603,6 +632,13 @@ int build_reference_db(const char* dms_name,
 			// Write new arrays
 			if (modified)
 			{
 //				fprintf(stderr, "\n\nnew array:");
 //				for (k=0;k<taxid_array_writable_length;k++)
 //				{
 //					lca = obi_taxo_get_taxon_with_taxid(tax, lca_taxid_array_writable[k]);
 //					fprintf(stderr, "\nLCA=%d, %s, score=%f", lca_taxid_array_writable[k], lca->name, score_array_writable[k]);
 //				}
 				if (obi_set_array_with_col_p_in_view(o_view, final_lca_taxid_a_column, idx1, lca_taxid_array_writable, (uint8_t) (obi_sizeof(OBI_INT) * 8), taxid_array_writable_length) < 0)
 				{
 					obidebug(1, "\nError setting a LCA taxid array in a column when building a reference database");
@ -632,9 +668,13 @@ int build_reference_db(const char* dms_name,
 //			return -1;
 //		}
 		//fprintf(stderr, "\n2nd sequence");
 		// If empty, add values
 		if (taxid_array_length == 0)
 		{
 			//fprintf(stderr, "\nEmpty, add value");
 			if (obi_set_array_with_col_p_in_view(o_view, final_lca_taxid_a_column, idx2, &taxid_lca, (uint8_t) (obi_sizeof(OBI_INT) * 8), 1) < 0)
 			{
 				obidebug(1, "\nError setting a LCA taxid array in a column when building a reference database");
@ -648,6 +688,8 @@ int build_reference_db(const char* dms_name,
 		}
 		else
 		{
 			//fprintf(stderr, "\nNot empty");
 			j = 0;
 			modified = false;
 			while (j < taxid_array_length)
@ -661,6 +703,9 @@ int build_reference_db(const char* dms_name,
 						memcpy(score_array_writable, score_array, score_array_length*sizeof(obifloat_t));
 						modified = true;
 						//fprintf(stderr, "\nSame LCA, replace %d and %f with %d and %f", lca_taxid_array_writable[j],
 						//		score_array_writable[j], taxid_lca, score);
 						// Better score for the same LCA, replace this LCA/score pair
 						lca_taxid_array_writable[j] = taxid_lca;
 						score_array_writable[j] = score;
@ -687,6 +732,8 @@ int build_reference_db(const char* dms_name,
 				{
 					if (score > score_array[j])
 					{
 						//fprintf(stderr, "\nInsert new");
 						memcpy(lca_taxid_array_writable, lca_taxid_array, taxid_array_length*sizeof(obiint_t));
 						memcpy(score_array_writable, score_array, score_array_length*sizeof(obifloat_t));
 						modified = true;
@ -727,6 +774,8 @@ int build_reference_db(const char* dms_name,
 			if (j == taxid_array_length) // same or parent LCA not found, need to be appended at the end
 			{
 				//fprintf(stderr, "\nAppend at the end");
 				memcpy(lca_taxid_array_writable, lca_taxid_array, taxid_array_length*sizeof(obiint_t));
 				memcpy(score_array_writable, score_array, score_array_length*sizeof(obifloat_t));
 				modified = true;
@ -735,6 +784,9 @@ int build_reference_db(const char* dms_name,
 				lca_taxid_array_writable[taxid_array_writable_length] = taxid_lca;
 				score_array_writable[score_array_writable_length] = score;
 				taxid_array_writable_length++;
 				score_array_writable_length++;
 				// Remove the previous (children) LCAs from the array if their score is equal or lower
 				while ((j>0) && (score_array_writable[j-1] <= score))
 				{
@ -769,11 +821,17 @@ int build_reference_db(const char* dms_name,
 			}
 		}
 	}
 	fprintf(stderr,"\rDone : 100 %%           \n");
 	fprintf(stderr, "Writing results...\n");
 	count = (o_view->infos)->line_count;
 	// Fill empty LCA informations (because filling from potentially sparse alignment matrix) with the sequence taxid
 	score=1.0;  // technically getting LCA of identical sequences
-	for (i=0; i<(o_view->infos)->line_count; i++)
+	for (i=0; i<count; i++)
 	{
 		if (i%1000 == 0)
 			fprintf(stderr,"\rDone : %f %%       ", (i / (float) count)*100);
 		obi_get_array_with_col_p_in_view(o_view, final_lca_taxid_a_column, i, &taxid_array_length);
 		if (taxid_array_length == 0)  // no LCA set
 		{
@ -799,6 +857,7 @@ int build_reference_db(const char* dms_name,
 			}
 		}
 	}
 	fprintf(stderr,"\rDone : 100 %%           \n");
 	// Add information about the threshold used to build the DB
 	snprintf(threshold_str, 5, "%f", threshold);
@ -858,7 +917,6 @@ int build_reference_db(const char* dms_name,
 	free(matrix_view_name);
 	free(matrix_with_lca_view_name);
 	fprintf(stderr,"\rDone : 100 %%           \n");
 	return 0;
 }
--- a/src/obi_ecopcr.c
+++ b/src/obi_ecopcr.c
@ -105,7 +105,8 @@ static int create_output_columns(Obiview_p o_view, bool kingdom_mode);
 * @param o_temp1_column A pointer on the output column for the temperature for the first primer.
 * @param o_temp2_column A pointer on the output column for the temperature for the second primer.
 *
- * @retval 0 if the operation was successfully completed.
+ * @retval 0 if the sequence was skipped (taxid not found, warning printed).
 * @retval 1 if the sequence was successfully printed to the output.
 * @retval -1 if an error occurred.
 *
 * @since July 2018
@ -366,6 +367,17 @@ static int print_seq(Obiview_p i_view, Obiview_p o_view,
 	// TODO add check for primer longer than MAX_PAT_LEN (32)
 	// Get sequence id
 	seq_id = obi_get_str_with_elt_idx_and_col_p_in_view(i_view, i_id_column, i_idx, 0);
 	// Get the taxon structure
 	main_taxon = obi_taxo_get_taxon_with_taxid(taxonomy, taxid);
 	if (main_taxon == NULL)
 	{
 		obidebug(1, "\nWarning: error reading the taxonomic information of a sequence. Seq id: %s, taxid: %d. Probably deprecated taxid. Skipping this sequence.", seq_id, taxid);
 		return 0;
 	}
 	ldelta = (pos1 <= keep_nucleotides)?pos1:keep_nucleotides;
 	rdelta = ((pos2+keep_nucleotides)>=seq_len)?seq_len-pos2:keep_nucleotides;
@ -431,16 +443,7 @@ static int print_seq(Obiview_p i_view, Obiview_p o_view,
 	if (isnan(tm2))
 		tm2 = OBIFloat_NA;
 	// Get the taxon structure
 	main_taxon = obi_taxo_get_taxon_with_taxid(taxonomy, taxid);
 	if (main_taxon == NULL)
 	{
 		obidebug(1, "\nError reading the taxonomic information of a sequence");
 		return -1;
 	}
 	// Write sequence id
 	seq_id = obi_get_str_with_elt_idx_and_col_p_in_view(i_view, i_id_column, i_idx, 0);
 	if (obi_set_str_with_elt_idx_and_col_p_in_view(o_view, o_id_column, o_idx, 0, seq_id) < 0)
 	{
 		obidebug(1, "\nError writing the sequence id");
@ -629,7 +632,7 @@ static int print_seq(Obiview_p i_view, Obiview_p o_view,
 		return -1;
 	}
-	return 0;
+	return 1;
 }
@ -698,6 +701,7 @@ int obi_ecopcr(const char* i_dms_name,
 	obiint_t      taxid;
 	char*         sequence;
 	int			  printed;
 	SeqPtr        apatseq=NULL;
 	int32_t       o1Hits;
@ -1057,14 +1061,14 @@ int obi_ecopcr(const char* i_dms_name,
 											length = 0;
 											if (posj > posi)
 												length = posj - posi - o1->patlen - o2->patlen;
-											if (posj < posi)
+											else if (circular > 0)
 												length = posj + apatseq->seqlen - posi - o1->patlen - o2->patlen;
 											if ((length>0) &&	// For when primers touch or overlap
 												(!min_len || (length >= min_len)) &&
 												(!max_len || (length <= max_len)))
 											{
 												// Print the found amplicon
-												if (print_seq(i_view, o_view,
+												printed = print_seq(i_view, o_view,
 														  	  i_idx, o_idx,
 															  taxonomy,
 															  sequence,
@ -1090,11 +1094,13 @@ int obi_ecopcr(const char* i_dms_name,
 															  o_strand_column,
 															  o_primer1_column, o_primer2_column,
 															  o_error1_column, o_error2_column,
-															  o_temp1_column,  o_temp2_column) < 0)
+															  o_temp1_column,  o_temp2_column);
 												if (printed < 0)
 												{
 													obidebug(1, "\nError writing the ecopcr result");
 													return -1;
 												}
 												else if (printed > 0)
 													o_idx++;
 											}
 										}
@ -1145,14 +1151,14 @@ int obi_ecopcr(const char* i_dms_name,
 											length = 0;
 											if (posj > posi)
 												length = posj - posi + 1  - o2->patlen - o1->patlen; /* - o1->patlen : deleted by <EC> (prior to the OBITools3) */
-											if (posj < posi)
+											else if (circular > 0)
 												length = posj + apatseq->seqlen - posi - o1->patlen - o2->patlen;
 											if ((length>0) &&	// For when primers touch or overlap
 												(!min_len || (length >= min_len)) &&
 												(!max_len || (length <= max_len)))
 											{
 												// Print the found amplicon
-												if (print_seq(i_view, o_view,
+												printed = print_seq(i_view, o_view,
 														  	  i_idx, o_idx,
 															  taxonomy,
 															  sequence,
@ -1178,11 +1184,13 @@ int obi_ecopcr(const char* i_dms_name,
 															  o_strand_column,
 															  o_primer1_column, o_primer2_column,
 															  o_error1_column, o_error2_column,
-															  o_temp1_column,  o_temp2_column) < 0)
+															  o_temp1_column,  o_temp2_column);
 												if (printed < 0)
 												{
 													obidebug(1, "\nError writing the ecopcr result");
 													return -1;
 												}
 												else if (printed > 0)
 													o_idx++;
 											}
 										}
@ -1224,7 +1232,7 @@ int obi_ecopcr(const char* i_dms_name,
 		return -1;
 	}
-	fprintf(stderr,"\rDone : 100 %%           ");
+	fprintf(stderr,"\rDone : 100 %%           \n");
 	return 0;
 	return 0;
--- a/src/obi_ecopcr.h
+++ b/src/obi_ecopcr.h
@ -81,8 +81,8 @@
 * @param o_dms_name The path to the output DMS.
 * @param o_view_name The name of the output view.
 * @param o_view_comments The comments to associate with the output view.
- * @param primer1 The first primer.
+ * @param primer1 The first primer, length must be less than or equal to 32 (because of apat lib limitation).
- * @param primer2 The second primer.
+ * @param primer2 The second primer, length must be less than or equal to 32 (because of apat lib limitation).
 * @param error_max The maximum number of errors allowed per primer for amplification.
 * @param min_len The minimum length of an amplicon.
 * @param max_len The maximum length of an amplicon.
--- a/src/obi_ecotag.c
+++ b/src/obi_ecotag.c
@ -455,7 +455,7 @@ int obi_ecotag(const char* dms_name,
 	for (i=0; i < query_count; i++)
 	{
-		if (i%100 == 0)
+		if (i%1000 == 0)
 			fprintf(stderr,"\rDone : %f %%       ", (i / (float) query_count)*100);
 		best_match_count = 0;
@ -562,7 +562,7 @@ int obi_ecotag(const char* dms_name,
 			score_array = obi_get_array_with_col_p_in_view(ref_view, score_a_column, best_match_idx, &lca_array_length);
 			k = 0;
-			while ((k < lca_array_length) && (score_array[k] >= ecotag_threshold))
+			while ((k < lca_array_length) && (score_array[k] >= best_score))
 				k++;
 			if (k>0)
@ -570,12 +570,12 @@ int obi_ecotag(const char* dms_name,
 				lca_array = obi_get_array_with_col_p_in_view(ref_view, lca_taxid_a_column, best_match_idx, &lca_array_length);
 				if (j>0)
 				{
-					lca = obi_taxo_get_taxon_with_taxid(taxonomy, lca_taxid);
+//					lca = obi_taxo_get_taxon_with_taxid(taxonomy, lca_taxid);
-					if (lca == NULL)
+//					if (lca == NULL)
-					{
+//					{
-						obidebug(1, "\nError getting a taxon from a taxid when doing taxonomic assignment");
+//						obidebug(1, "\nError getting a taxon from a taxid when doing taxonomic assignment");
-						return -1;
+//						return -1;
-					}
+//					}
 					lca_in_array = obi_taxo_get_taxon_with_taxid(taxonomy, lca_array[k-1]);
 					if (lca_in_array == NULL)
 					{
--- a/src/obiavl.c
+++ b/src/obiavl.c
@ -648,7 +648,7 @@ int truncate_avl_data_to_size_used(OBIDMS_avl_data_p avl_data)	// TODO is it nec
 	new_data_size = ((index_t) multiple) * getpagesize();
 	// Check that it is actually greater than the current size of the file, otherwise no need to truncate
-	if ((avl_data->header)->data_size_max == new_data_size)
+	if ((avl_data->header)->data_size_max >= new_data_size)
 		return 0;
 	// Get the file descriptor
@ -667,7 +667,7 @@ int truncate_avl_data_to_size_used(OBIDMS_avl_data_p avl_data)	// TODO is it nec
 	if (ftruncate(file_descriptor, file_size) < 0)
 	{
 		obi_set_errno(OBI_AVL_ERROR);
-		obidebug(1, "\nError truncating an AVL data file");
+		obidebug(1, "\nError truncating an AVL data file, old data size = %lld, new data size = %lld", (avl_data->header)->data_size_max, new_data_size);
 		return -1;
 	}
--- a/src/obidms_taxonomy.c
+++ b/src/obidms_taxonomy.c
@ -2376,9 +2376,10 @@ int read_merged_dmp(const char* taxdump, OBIDMS_taxonomy_p tax, int32_t* delnode
 				// and the deleted taxids with no current reference. An element of the list is composed of the taxid, and the index
 				// of the taxon in the taxa structure, or -1 for deleted taxids.
 				// Creating the merged list requires to merge the 3 ordered lists into one.
-				while (((nT < (tax->taxa)->count) && ((tax->taxa)->taxon[nT].taxid < old_taxid)) || ((nD >= 0) && (delnodes[nD] < old_taxid)))
+				while (((nT < (tax->taxa)->count) && ((tax->taxa)->taxon[nT].taxid < old_taxid)) ||
 						((nD >= 0) && (delnodes[nD] < old_taxid)))
 				{
-					if ((tax->taxa)->taxon[nT].taxid < delnodes[nD])
+					if ((nT < (tax->taxa)->count) && (tax->taxa)->taxon[nT].taxid < delnodes[nD])
 					{ // Add element from taxa list
 						// Enlarge structure if needed
 						if (n == buffer_size)
@ -2401,7 +2402,7 @@ int read_merged_dmp(const char* taxdump, OBIDMS_taxonomy_p tax, int32_t* delnode
 						nT++;
 						n++;
 					}
-					else if (delnodes[nD] < (tax->taxa)->taxon[nT].taxid)
+					else
 					{ // Add element from deleted taxids list
 						// Enlarge structure if needed
 						if (n == buffer_size)
@ -3036,12 +3037,12 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
 	strcpy(tax->tax_name, taxonomy_name);
 	buffer_size = 2048;
 	taxonomy_path = get_taxonomy_path(dms, taxonomy_name);
 	if (taxonomy_path == NULL)
 		return NULL;
 	buffer_size = strlen(taxonomy_path) + strlen(taxonomy_name) + 6;
 	// Read ranks
 	ranks_file_name = (char*) malloc(buffer_size*sizeof(char));
 	if (ranks_file_name == NULL)
--- a/src/obidmscolumn.c
+++ b/src/obidmscolumn.c
@ -1973,7 +1973,11 @@ int obi_enlarge_column(OBIDMS_column_p column)
 	// Calculate the new file size
 	old_line_count = (column->header)->line_count;
-	new_line_count = old_line_count * COLUMN_GROWTH_FACTOR;
+	new_line_count = ceil((double) old_line_count * (double) COLUMN_GROWTH_FACTOR);
 	if (new_line_count > old_line_count+100000)
 		new_line_count = old_line_count+100000;
 	else if (new_line_count < old_line_count+1000)
 		new_line_count = old_line_count+1000;
 	if (new_line_count > MAXIMUM_LINE_COUNT)
 	{
@ -2381,6 +2385,54 @@ char* obi_get_elements_names(OBIDMS_column_p column)
 }
 char* obi_get_formatted_elements_names(OBIDMS_column_p column)
 {
 	char* elements_names;
 	int   i, j;
 	int   elt_idx;
 	int   len;
 	elements_names = (char*) malloc(((column->header)->elements_names_length + (column->header)->nb_elements_per_line) * sizeof(char));
 	if (elements_names == NULL)
 	{
 		obi_set_errno(OBI_MALLOC_ERROR);
 		obidebug(1, "\nError allocating memory for elements names");
 		return NULL;
 	}
 	j = 0;
 	for (i=0; i < (column->header)->nb_elements_per_line; i++)
 	{
 		elt_idx = ((column->header)->elements_names_idx)[i];
 		len = strlen(((column->header)->elements_names)+elt_idx);
 		memcpy(elements_names+j, ((column->header)->elements_names)+elt_idx, len*sizeof(char));
 		j = j + len;
 		elements_names[j] = ';';
 		j++;
 		elements_names[j] = ' ';
 		j++;
 	}
 	elements_names[j - 1] = '\0';
 	return elements_names;
 }
 char* obi_column_formatted_infos(OBIDMS_column_p column)
 {
 	char* column_infos;
 	char* elt_names;
 	column_infos = malloc(1024 * sizeof(char));
 	elt_names = obi_get_formatted_elements_names(column);
 	free(elt_names);
 	return column_infos;
 }
 int obi_column_prepare_to_set_value(OBIDMS_column_p column, index_t line_nb, index_t elt_idx)
 {
--- a/src/obidmscolumn.h
+++ b/src/obidmscolumn.h
@ -505,6 +505,14 @@ index_t obi_column_get_element_index_from_name(OBIDMS_column_p column, const cha
 char* obi_get_elements_names(OBIDMS_column_p column);
 // TODO
 //char* obi_get_formatted_elements_names(OBIDMS_column_p column);
 // TODO
 //char* obi_column_formatted_infos(OBIDMS_column_p column);
 /**
 * @brief Prepares a column to set a value.
 *
--- a/src/obiview.c
+++ b/src/obiview.c
@ -1037,8 +1037,9 @@ static int finish_view(Obiview_p view)
 		return -1;
 	}
-	// Add count column if it's a NUC_SEQ_VIEW with no count column // TODO discuss
+	// Add count column if it's a NUC_SEQ_VIEW with no count column (and there's no MERGED_sample column) // TODO discuss
-	if ((!strcmp((view->infos)->view_type, VIEW_TYPE_NUC_SEQS)) && (!obi_view_column_exists(view, COUNT_COLUMN)))
+	if ((!strcmp((view->infos)->view_type, VIEW_TYPE_NUC_SEQS)) && (!obi_view_column_exists(view, COUNT_COLUMN))
 			&& (!obi_view_column_exists(view, "MERGED_sample")))  // TODO should eventually compute from merged samples?
 	{
 		if (obi_create_auto_count_column(view) < 0)
 		{
@ -1407,7 +1408,7 @@ static char* view_check_qual_match_seqs(Obiview_p view)
 						// Test that the lengths of the quality and the sequence are equal
 						if ((size_t)qual_len != strlen(seq))
 						{
-							obidebug(1, "\nError checking the predicate for view %s: The sequences and sequence quality arrays match.", (view->infos)->name);
+							obidebug(1, "\nError checking the predicate for view %s: The sequences and sequence quality arrays match (index %lld, seq=%s, quality length = %d).", (view->infos)->name, j, seq, qual_len);
 							return NULL;
 						}
 					}
--- a/src/sse_banded_LCS_alignment.c
+++ b/src/sse_banded_LCS_alignment.c
@ -686,6 +686,9 @@ int calculateSizeToAllocate(int maxLen, int LCSmin)
 	size *=  3;
 	size +=  16;
 	size += 10;  // band-aid for memory bug I don't understand (triggered on specific db on ubuntu)
 				 // bug might have to do with the way different systems behave when aligning the address in obi_get_memory_aligned_on_16
 	return(size*sizeof(int16_t));
 }
Author	SHA1	Message	Date
Celine Mercier	e9a37d8a6e	Switch to version 3.0.0-beta16	2020-05-07 17:09:26 +02:00
Celine Mercier	ef074f8455	typo	2020-05-07 17:08:59 +02:00
Celine Mercier	aec5e69f2c	C, views: no more automatic COUNT column if MERGED_sample column exists	2020-05-07 17:08:07 +02:00
Celine Mercier	170ef3f1ba	Views: added obi prefix to commands in bash history	2020-05-07 17:07:01 +02:00
Celine Mercier	f999946582	obi uniq: fixed the remerging of already merged informations, and efficiency improvements	2020-05-07 17:05:54 +02:00
Celine Mercier	773b36ec37	obi import: fixed the import of old obitools files with premerged informations, and other minor improvements	2020-05-07 17:03:04 +02:00
Celine Mercier	69cb434a6c	version 3.0.0-beta15c	2020-04-29 14:25:33 +02:00
Celine Mercier	55d4f98d60	obi annotate: fixed annotation at ranks	2020-04-29 14:24:40 +02:00
Celine Mercier	0bec2631e8	ecotag: fixed a bug where all the full DMS path weren't properly sent to the C layer	2020-04-29 10:35:55 +02:00
Celine Mercier	e6b6c6fa84	AVLs: Made an error message more informative	2020-04-29 10:14:04 +02:00
Celine Mercier	974528b2e6	build_ref_db: fixed bug erasing some of the higher LCAs (i.e. lowest similarities)	2020-04-28 15:56:06 +02:00
Celine Mercier	1b346b54f9	ecotag: better specificity by now correctly looking for similarities within refs above best score instead of ecotag threshold	2020-04-28 15:10:07 +02:00
Celine Mercier	058f2ad8b3	ecopcr: fixed a bug where sequences were considered circular (generating false positives)	2020-04-27 14:44:35 +02:00
Celine Mercier	60bfd3ae8d	obi annotate: now defaults to setting str if expression is not valid	2020-04-24 11:35:20 +02:00
Celine Mercier	67bdee105a	C: build_ref_db: added progress display for each step	2020-04-18 14:24:08 +02:00
Celine Mercier	0f745e0113	C: Columns: optimizing column file growth	2020-04-18 13:55:47 +02:00
cmercier	da8de52ba4	export: fixed progress bar bug	2020-04-17 15:09:10 +02:00
cmercier	4d36538c6e	C: SSE lcs alignment: band-aid for memory bug I don't understand (triggered on specific db on ubuntu)	2020-04-17 15:07:52 +02:00
Celine Mercier	8d0b17d87d	Switch to version 3.0.0-beta14	2020-04-15 17:47:26 +02:00
Celine Mercier	343999a627	Taxonomy: fixed a critical memory bug when building the list of merged taxids	2020-04-15 17:46:13 +02:00
Celine Mercier	e9a40630e9	C: Columns: rounding column growth to ceil to avoid looping on small values	2020-04-13 19:02:10 +02:00
Celine Mercier	8dbcd3025a	C: Columns: reduced column growth factor from 2 to 1.3 to avoid errno28	2020-04-13 14:47:56 +02:00
Celine Mercier	4cf635d001	Switch to version 3.0.0-beta13	2020-04-12 17:42:58 +02:00
Celine Mercier	b7e7cc232a	Made completion script cleaner	2020-04-12 17:41:59 +02:00
Celine Mercier	b6ab792ceb	C: made error message more detailed when checking that sequences and qualities match	2020-04-12 17:40:24 +02:00
Celine Mercier	ddea5a2964	obi import: fixed inconsequential error when precomputing number of entries in some formats	2020-04-12 17:38:42 +02:00
Celine Mercier	30852ab7d5	View bash history: removed useless shebang	2020-04-12 17:36:04 +02:00
Celine Mercier	4d0299904e	all commands (almost): cleaner DMS closing at the end	2020-04-12 17:31:58 +02:00
Celine Mercier	eef5156d95	obi stats: fixed error when printing bool keys	2020-04-12 17:12:04 +02:00
Celine Mercier	e62c991bbc	goes with previous commit	2020-04-10 11:22:26 +02:00
Celine Mercier	1218eed7fd	ecopcr: now printing a warning instead of interrupting with an error when a taxid is not found	2020-04-10 11:22:04 +02:00
Celine Mercier	cd9cea8c97	obi import: fixed critical bug where the last entry of embl and genbank files was not imported	2020-04-09 19:26:27 +02:00
Celine Mercier	98cfb70d73	ecopcr: made some errors more informative	2020-04-09 09:15:28 +02:00
Celine Mercier	b9f68c76c8	ecopcr: added warnings and check of primer length (related to #75 )	2020-04-05 18:40:56 +02:00
Celine Mercier	0b98371688	ngsfilter: added warning about primer length in -h (#75 )	2020-04-05 18:39:20 +02:00
Celine Mercier	f0d152fcbd	ngsfilter: now checking primer length (fixes #75 )	2020-04-05 18:29:10 +02:00
Celine Mercier	8019dee68e	ecotag: now closing all DMS properly	2020-04-05 13:20:49 +02:00