build_ref_db: fixed bug erasing some of the higher LCAs (i.e. lowest

similarities)
ecotag: better specificity by now correctly looking for similarities
2020-04-28 15:56:06 +02:00 · 2020-04-28 15:10:07 +02:00 · 2020-04-27 14:44:35 +02:00 · 2020-04-24 11:35:20 +02:00 · 2020-04-18 14:24:08 +02:00 · 2020-04-18 13:55:47 +02:00
38 changed files with 394 additions and 156 deletions
--- a/obi_completion_script.bash
+++ b/obi_completion_script.bash
@ -1,4 +1,3 @@
 #/usr/bin/env bash
 _obi_comp ()
 {
--- a/python/obitools3/commands/align.pyx
+++ b/python/obitools3/commands/align.pyx
@ -266,9 +266,9 @@ def run(config):
    # If the input and the output DMS are different, delete the temporary result view in the input DMS
    if i_dms != o_dms:
        View.delete_view(i_dms, o_view_name)
-        o_dms.close()
+        o_dms.close(force=True)
-    i_dms.close()
+    i_dms.close(force=True)
    logger("info", "Done.")
--- a/python/obitools3/commands/alignpairedend.pyx
+++ b/python/obitools3/commands/alignpairedend.pyx
@ -247,10 +247,10 @@ def run(config):
    #print("\n\nOutput view:\n````````````", file=sys.stderr)
    #print(repr(view), file=sys.stderr)
-    input[0].close()
+    input[0].close(force=True)
    if two_views:
-        rinput[0].close()
+        rinput[0].close(force=True)
-    output[0].close()
+    output[0].close(force=True)
    logger("info", "Done.")
--- a/python/obitools3/commands/annotate.pyx
+++ b/python/obitools3/commands/annotate.pyx
@ -190,58 +190,50 @@ def sequenceTaggerGenerator(config, taxo=None):
            seq['seq_rank']=counter[0]
        for i,v in toSet:
-            #try:
+            try:
-            if taxo is not None:
+                if taxo is not None:
-                environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
+                    environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
-            else:
+                else:
-                environ = {'sequence':seq, 'counter':counter[0], 'math':math}
+                    environ = {'sequence':seq, 'counter':counter[0], 'math':math}
-            val = eval(v, environ, seq)
+                val = eval(v, environ, seq)
-            #except Exception,e:       # TODO discuss usefulness of this
+            except Exception:       # set string if not a valid expression
-            #    if options.onlyValid:
+                val = v
            #        raise e
            #    val = v
            seq[i]=val
        if length:
            seq['seq_length']=len(seq)
        if newId is not None:
-#            try:
+            try:
-            if taxo is not None:
+                if taxo is not None:
-                environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
+                    environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
-            else:
+                else:
-                environ = {'sequence':seq, 'counter':counter[0], 'math':math}     
+                    environ = {'sequence':seq, 'counter':counter[0], 'math':math}     
-            val = eval(newId, environ, seq)
+                val = eval(newId, environ, seq)
-#            except Exception,e:
+            except Exception:      # set string if not a valid expression
-#                if options.onlyValid:
+                val = newId
 #                    raise e
 #                val = newId
            seq.id=val
        if newDef is not None:
-#            try:
+            try:
-            if taxo is not None:
+                if taxo is not None:
-                environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
+                    environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
-            else:
+                else:
-                environ = {'sequence':seq, 'counter':counter[0], 'math':math}     
+                    environ = {'sequence':seq, 'counter':counter[0], 'math':math}     
-            val = eval(newDef, environ, seq)
+                val = eval(newDef, environ, seq)
-#            except Exception,e:
+            except Exception:    # set string if not a valid expression
-#                if options.onlyValid:
+                val = newDef
 #                    raise e
 #                val = newDef
            seq.definition=val
-#             
+             
        if newSeq is not None:
-#            try:
+            try:
-            if taxo is not None:
+                if taxo is not None:
-                environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
+                    environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
-            else:
+                else:
-                environ = {'sequence':seq, 'counter':counter[0], 'math':math}     
+                    environ = {'sequence':seq, 'counter':counter[0], 'math':math}     
-            val = eval(newSeq, environ, seq)
+                val = eval(newSeq, environ, seq)
-#            except Exception,e:
+            except Exception:    # set string if not a valid expression
-#                if options.onlyValid:
+                val = newSeq
 #                    raise e
 #                val = newSeq
            seq.seq=val
            if 'seq_length' in seq:
                seq['seq_length']=len(seq)
@ -251,15 +243,14 @@ def sequenceTaggerGenerator(config, taxo=None):
                seq.view.delete_column(QUALITY_COLUMN)
        if run is not None:
-#            try:
+            try:
-            if taxo is not None:
+                if taxo is not None:
-                environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
+                    environ = {'taxonomy' : taxo, 'sequence':seq, 'counter':counter[0], 'math':math}
-            else:
+                else:
-                environ = {'sequence':seq, 'counter':counter[0], 'math':math}     
+                    environ = {'sequence':seq, 'counter':counter[0], 'math':math}     
-            eval(run, environ, seq)
+                eval(run, environ, seq)
-#            except Exception,e:
+            except Exception,e:
-#                if options.onlyValid:
+                raise e
 #                    raise e
    return sequenceTagger
@ -379,7 +370,7 @@ def run(config):
    # If the input and the output DMS are different, delete the temporary imported view used to create the final view
    if i_dms != o_dms:
        View.delete_view(o_dms, imported_view_name)
-        o_dms.close()
+        o_dms.close(force=True)
-    i_dms.close()
+    i_dms.close(force=True)
    logger("info", "Done.")
--- a/python/obitools3/commands/build_ref_db.pyx
+++ b/python/obitools3/commands/build_ref_db.pyx
@ -97,9 +97,9 @@ def run(config):
    # If the input and the output DMS are different, delete the temporary result view in the input DMS
    if i_dms != o_dms:
        View.delete_view(i_dms, o_view_name)
-        o_dms.close()
+        o_dms.close(force=True)
-    i_dms.close()
+    i_dms.close(force=True)
    logger("info", "Done.")
--- a/python/obitools3/commands/cat.pyx
+++ b/python/obitools3/commands/cat.pyx
@ -86,7 +86,24 @@ def run(config):
    if not remove_rev_qual:
        Column.new_column(o_view, REVERSE_SEQUENCE_COLUMN, OBI_SEQ)
        Column.new_column(o_view, REVERSE_QUALITY_COLUMN, OBI_QUAL, associated_column_name=REVERSE_SEQUENCE_COLUMN, associated_column_version=o_view[REVERSE_SEQUENCE_COLUMN].version)
-
+        
    # Initialize multiple elements columns
    dict_cols = {}
    for v in iview_list:
        for coln in v.keys():
            if v[coln].nb_elements_per_line > 1:
                if coln not in dict_cols:
                    dict_cols[coln] = {}
                    dict_cols[coln]['eltnames'] = set(v[coln].elements_names)
                    dict_cols[coln]['nbelts'] = v[coln].nb_elements_per_line
                    dict_cols[coln]['obitype'] = v[coln].data_type_int
                else:
                    dict_cols[coln]['eltnames'] = set(v[coln].elements_names + list(dict_cols[coln]['eltnames']))
                    dict_cols[coln]['nbelts'] = len(dict_cols[coln]['eltnames'])
    for coln in dict_cols:
        Column.new_column(o_view, coln, dict_cols[coln]['obitype'], 
                          nb_elements_per_line=dict_cols[coln]['nbelts'], elements_names=list(dict_cols[coln]['eltnames']))
    # Initialize the progress bar
    pb = ProgressBar(total_len, config, seconde=5)
@ -116,7 +133,7 @@ def run(config):
    #print(repr(view), file=sys.stderr)
    for d in idms_list:
-        d.close()
+        d.close(force=True)
-    o_dms.close()
+    o_dms.close(force=True)
    logger("info", "Done.")
--- a/python/obitools3/commands/clean.pyx
+++ b/python/obitools3/commands/clean.pyx
@ -124,8 +124,8 @@ def run(config):
    # If the input and the output DMS are different, delete the temporary result view in the input DMS
    if i_dms != o_dms:
        View.delete_view(i_dms, o_view_name)
-        o_dms.close()
+        o_dms.close(force=True)
-    i_dms.close()
+    i_dms.close(force=True)
    logger("info", "Done.")
--- a/python/obitools3/commands/count.pyx
+++ b/python/obitools3/commands/count.pyx
@ -56,3 +56,5 @@ def run(config):
        print(count2)
    else:
        print(count1)
    input[0].close(force=True)
--- a/python/obitools3/commands/ecopcr.pyx
+++ b/python/obitools3/commands/ecopcr.pyx
@ -35,13 +35,13 @@ def addOptions(parser):
                       action="store", dest="ecopcr:primer1",
                       metavar='<PRIMER>',
                       type=str,
-                       help="Forward primer.")
+                       help="Forward primer, length must be less than or equal to 32")
    group.add_argument('--primer2', '-R',
                       action="store", dest="ecopcr:primer2",
                       metavar='<PRIMER>',
                       type=str,
-                       help="Reverse primer.")
+                       help="Reverse primer, length must be less than or equal to 32")
    group.add_argument('--error', '-e',
                       action="store", dest="ecopcr:error",
@ -203,6 +203,7 @@ def run(config):
    #print("\n\nOutput view:\n````````````", file=sys.stderr)
    #print(repr(o_dms[o_view_name]), file=sys.stderr)
-    o_dms.close()
+    i_dms.close(force=True)
    o_dms.close(force=True)
    logger("info", "Done.")
--- a/python/obitools3/commands/ecotag.pyx
+++ b/python/obitools3/commands/ecotag.pyx
@ -64,10 +64,10 @@ def run(config):
    ref_view_name = ref[1]
    # Check that the threshold demanded is greater than or equal to the threshold used to build the reference database
-    if config['ecotag']['threshold'] < eval(i_dms[ref_view_name].comments["ref_db_threshold"]) :
+    if config['ecotag']['threshold'] < eval(ref_dms[ref_view_name].comments["ref_db_threshold"]) :
        print("Error: The threshold demanded (%f) is lower than the threshold used to build the reference database (%f).", 
-              config['ecotag']['threshold'], i_dms[ref_view_name].comments["ref_db_threshold"])
+              config['ecotag']['threshold'], ref_dms[ref_view_name].comments["ref_db_threshold"])
-
+    
    # Open the output: only the DMS
    output = open_uri(config['obi']['outputURI'],
                      input=False,
@ -126,9 +126,11 @@ def run(config):
    # If the input and the output DMS are different, delete the temporary result view in the input DMS
    if i_dms != o_dms:
        View.delete_view(i_dms, o_view_name)
-        o_dms.close()
+        o_dms.close(force=True)
-    i_dms.close()
+    taxo_dms.close(force=True)
    ref_dms.close(force=True)
    i_dms.close(force=True)
    logger("info", "Done.")
--- a/python/obitools3/commands/export.pyx
+++ b/python/obitools3/commands/export.pyx
@ -59,13 +59,23 @@ def run(config):
     # Check that the input view has the type NUC_SEQS if needed    # TODO discuss, maybe bool property
    if (output[2] == Nuc_Seq) and (iview.type != b"NUC_SEQS_VIEW") :  # Nuc_Seq_Stored? TODO
        raise Exception("Error: the view to export in fasta or fastq format is not a NUC_SEQS view")
-   
+    
    if config['obi']['only'] is not None:
        withoutskip = min(input[4], config['obi']['only'])
    else:
        withoutskip = input[4]
    if config['obi']['skip'] is not None:
        skip = min(input[4], config['obi']['skip'])
    else:
        skip = 0
    # Initialize the progress bar
    if config['obi']['noprogressbar']:
        pb = None
    else:
-        pb = ProgressBar(len(iview), config, seconde=5)
+        pb = ProgressBar(withoutskip - skip, config, seconde=5)
-    
+
    i=0
    for seq in iview :
        PyErr_CheckSignals()
@ -86,7 +96,7 @@ def run(config):
    if not BrokenPipeError and not IOError:
        output_object.close()
    iview.close()
-    input[0].close()
+    input[0].close(force=True)
    logger("info", "Done.")
--- a/python/obitools3/commands/grep.pyx
+++ b/python/obitools3/commands/grep.pyx
@ -370,7 +370,7 @@ def run(config):
    # If the input and the output DMS are different, delete the temporary imported view used to create the final view
    if i_dms != o_dms:
        View.delete_view(i_dms, o_view_name)
-        o_dms.close()
+        o_dms.close(force=True)
-    i_dms.close()
+    i_dms.close(force=True)
    logger("info", "Done.")
--- a/python/obitools3/commands/head.pyx
+++ b/python/obitools3/commands/head.pyx
@ -103,7 +103,7 @@ def run(config):
    # If the input and the output DMS are different, delete the temporary imported view used to create the final view
    if i_dms != o_dms:
        View.delete_view(i_dms, o_view_name)
-        o_dms.close()
+        o_dms.close(force=True)
-    i_dms.close()
+    i_dms.close(force=True)
    logger("info", "Done.")
--- a/python/obitools3/commands/history.pyx
+++ b/python/obitools3/commands/history.pyx
@ -54,4 +54,5 @@ def run(config):
            print(bytes2str(entries.ascii_history))
        else:
            raise Exception("ASCII history only available for views")
-            
+    
    input[0].close(force=True)
--- a/python/obitools3/commands/import.pyx
+++ b/python/obitools3/commands/import.pyx
@ -11,6 +11,7 @@ from obitools3.dms.column.column cimport Column
 from obitools3.dms.obiseq cimport Nuc_Seq
 from obitools3.dms import DMS
 from obitools3.dms.taxo.taxo cimport Taxonomy
 from obitools3.files.uncompress cimport CompressedFile
 from obitools3.utils cimport tobytes, \
@ -65,6 +66,14 @@ def addOptions(parser):
    addTaxdumpInputOption(parser)
    addMinimalOutputOption(parser)
    group = parser.add_argument_group('obi import specific options')
    group.add_argument('--preread',
                     action="store_true", dest="import:preread",
                     default=False,
                     help="Do a first readthrough of the dataset if it contains huge dictionaries (more than 100 keys) for "
                          "a much faster import.")
 def run(config):
@ -169,8 +178,6 @@ def run(config):
    if entry_count >= 0:
        pb = ProgressBar(entry_count, config, seconde=5)
    entries = input[1]
    NUC_SEQS_view = False
    if isinstance(output[1], View) :
@ -188,6 +195,60 @@ def run(config):
    dcols = {}
    # First read through the entries to prepare columns with dictionaries as they are very time-expensive to rewrite
    if config['import']['preread']:
        logger("info", "First readthrough...")
        entries = input[1]
        i = 0
        dict_dict = {}
        for entry in entries:
            PyErr_CheckSignals()
            if entry is None:  # error or exception handled at lower level, not raised because Python generators can't resume after any exception is raised
                if config['obi']['skiperror']:
                    i-=1
                    continue
                else:
                    raise Exception("obi import error in first readthrough")
            if pb is not None:
                pb(i)
            elif not i%50000:
                logger("info", "Read %d entries", i)
            for tag in entry :
                if type(entry[tag]) == dict :
                    if tag in dict_dict:
                        dict_dict[tag][0].update(entry[tag].keys())
                    else:
                        dict_dict[tag] = [set(entry[tag].keys()), get_obitype(entry[tag])]
            i+=1
        if pb is not None:
            pb(i, force=True)
            print("", file=sys.stderr)
        for tag in dict_dict:
            dcols[tag] = (Column.new_column(view, tag, dict_dict[tag][1], \
                              nb_elements_per_line=len(dict_dict[tag][0]), \
                              elements_names=list(dict_dict[tag][0])), \
                          value_obitype)
        # Reinitialize the input
        if isinstance(input[0], CompressedFile):
            input_is_file = True
        if entry_count >= 0:
            pb = ProgressBar(entry_count, config, seconde=5)
        try:
            input[0].close()
        except AttributeError:
            pass
        input = open_uri(config['obi']['inputURI'], force_file=input_is_file)
        if input is None:
            raise Exception("Could not open input URI")
    entries = input[1]
    i = 0
    for entry in entries :
--- a/python/obitools3/commands/less.pyx
+++ b/python/obitools3/commands/less.pyx
@ -46,5 +46,5 @@ def run(config):
    process.wait()
    iview.close()
-    input[0].close()
+    input[0].close(force=True)
--- a/python/obitools3/commands/ls.pyx
+++ b/python/obitools3/commands/ls.pyx
@ -36,6 +36,7 @@ def run(config):
        l = []
        for view in input[0]:
            l.append(tostr(view) + "\t(Date created: " + str(bytes2str_object(dms[view].comments["Date created"]))+")")
            dms[view].close()
        l.sort()
        for v in l:
            print(v)
@ -51,4 +52,5 @@ def run(config):
    if config['ls']['longformat'] and len(input[1].comments) > 0:
        print("\n### Comments:")
        print(str(input[1].comments))
-     
+    
    input[0].close(force=True)
--- a/python/obitools3/commands/ngsfilter.pyx
+++ b/python/obitools3/commands/ngsfilter.pyx
@ -42,7 +42,8 @@ def addOptions(parser):
                     metavar="<URI>",
                     type=str,
                     default=None,
-                     help="URI to the view containing the samples definition (with tags, primers, sample names,...)")
+                     help="URI to the view containing the samples definition (with tags, primers, sample names,...)"
                          "Warning: primer lengths must be less than or equal to 32")
    group.add_argument('-R', '--reverse-reads',
                     action="store", dest="ngsfilter:reverse",
@ -172,6 +173,13 @@ cdef read_info_view(info_view, max_errors=2, verbose=False, not_aligned=False):
    primer_list = []
    i=0
    for p in info_view:
        # Check primer length: should not be longer than 32, the max allowed by the apat lib
        if len(p[b'forward_primer']) > 32:
            raise RollbackException("Error: primers can not be longer than 32bp, rollbacking views")
        if len(p[b'reverse_primer']) > 32:
            raise RollbackException("Error: primers can not be longer than 32bp, rollbacking views")
        forward=Primer(p[b'forward_primer'],
                       len(p[b'forward_tag']) if (b'forward_tag' in p and p[b'forward_tag']!=None) else None,
                       True,
@ -594,7 +602,13 @@ def run(config):
    pb = ProgressBar(entries_len, config, seconde=5)
    # Check and store primers and tags
-    infos, primer_list = read_info_view(info_view, max_errors=config['ngsfilter']['error'], verbose=False, not_aligned=not_aligned)   # TODO obi verbose option
+    try:
        infos, primer_list = read_info_view(info_view, max_errors=config['ngsfilter']['error'], verbose=False, not_aligned=not_aligned)   # TODO obi verbose option
    except RollbackException, e:
        if unidentified is not None:
            raise RollbackException("obi ngsfilter error, rollbacking views: "+str(e), o_view, unidentified)
        else:
            raise RollbackException("obi ngsfilter error, rollbacking view: "+str(e), o_view)
    aligner = Primer_search(primer_list, config['ngsfilter']['error'])
@ -652,11 +666,11 @@ def run(config):
    #print("\n\nOutput view:\n````````````", file=sys.stderr)
    #print(repr(o_view), file=sys.stderr)
-    input[0].close()
+    input[0].close(force=True)
-    output[0].close()
+    output[0].close(force=True)
-    info_input[0].close()
+    info_input[0].close(force=True)
    if unidentified is not None:
-        unidentified_input[0].close()
+        unidentified_input[0].close(force=True)
    aligner.free()
    logger("info", "Done.")
--- a/python/obitools3/commands/sort.pyx
+++ b/python/obitools3/commands/sort.pyx
@ -141,7 +141,7 @@ def run(config):
    # If the input and the output DMS are different, delete the temporary imported view used to create the final view
    if i_dms != o_dms:
        View.delete_view(i_dms, o_view_name)
-        o_dms.close()
+        o_dms.close(force=True)
-    i_dms.close()
+    i_dms.close(force=True)
    logger("info", "Done.")
--- a/python/obitools3/commands/stats.pyx
+++ b/python/obitools3/commands/stats.pyx
@ -251,7 +251,7 @@ def run(config):
    for i in range(len(sorted_stats)):
        c = sorted_stats[i][0]
        for v in c:
-            if v is not None:
+            if type(v) == bytes:
                print(pcat % tostr(v)+"\t", end="")
            else:
                print(pcat % str(v)+"\t", end="")
@ -268,6 +268,6 @@ def run(config):
        print("%7d" %catcount[c], end="")
        print("%9d" %totcount[c])
-    input[0].close()
+    input[0].close(force=True)
    logger("info", "Done.")
--- a/python/obitools3/commands/tail.pyx
+++ b/python/obitools3/commands/tail.pyx
@ -106,7 +106,7 @@ def run(config):
    # If the input and the output DMS are different, delete the temporary imported view used to create the final view
    if i_dms != o_dms:
        View.delete_view(i_dms, o_view_name)
-        o_dms.close()
+        o_dms.close(force=True)
-    i_dms.close()
+    i_dms.close(force=True)
    logger("info", "Done.")
--- a/python/obitools3/commands/test.pyx
+++ b/python/obitools3/commands/test.pyx
@ -529,7 +529,7 @@ def run(config):
        test_taxo(config, infos)
    infos['view'].close()
-    infos['dms'].close()
+    infos['dms'].close(force=True)
    shutil.rmtree(config['obi']['defaultdms']+'.obidms', ignore_errors=True)
    print("Done.")
--- a/python/obitools3/dms/dms.pyx
+++ b/python/obitools3/dms/dms.pyx
@ -94,16 +94,16 @@ cdef class DMS(OBIWrapper):
        return dms
-    def close(self) :
+    def close(self, force=False) :
        '''
-        Closes the DMS instance and free the associated memory
+        Closes the DMS instance and free the associated memory (no counter, closing is final)
        The `close` method is automatically called by the object destructor.
        '''
        cdef OBIDMS_p pointer = self.pointer()
        if self.active() :
            OBIWrapper.close(self)
-            if (obi_close_dms(pointer, False)) < 0 :
+            if (obi_close_dms(pointer, force=force)) < 0 :
                raise Exception("Problem closing an OBIDMS")
@ -254,7 +254,8 @@ cdef class DMS(OBIWrapper):
    # bash command history property getter
    @property
    def bash_history(self):
-        s = b"#!/bin/bash\n\n"
+        #s = b"#!${bash}/bin/bash\n\n"
        s = b""
        first = True
        for command in self.command_line_history:
            s+=b"#"
--- a/python/obitools3/dms/view/view.pyx
+++ b/python/obitools3/dms/view/view.pyx
@ -526,7 +526,7 @@ cdef class View(OBIWrapper) :
    # bash command history property getter
    @property
    def bash_history(self):
-        s = b"#!/bin/bash\n\n"
+        s = b""
        first = True
        for level in self.view_history:
            command_list = [level[input][b"command_line"] for input in level.keys()]
--- a/python/obitools3/parsers/embl.pyx
+++ b/python/obitools3/parsers/embl.pyx
@ -156,6 +156,9 @@ def emblIterator_file(lineiterator,
        yield seq
        read+=1
    # Last sequence
    seq = emblParser(entry)
    yield seq
    free(entry)
--- a/python/obitools3/parsers/genbank.pyx
+++ b/python/obitools3/parsers/genbank.pyx
@ -153,6 +153,9 @@ def genbankIterator_file(lineiterator,
        yield seq
        read+=1
    # Last sequence
    seq = genbankParser(entry)
    yield seq
    free(entry)
--- a/python/obitools3/uri/decode.pyx
+++ b/python/obitools3/uri/decode.pyx
@ -171,7 +171,8 @@ Reads an URI and returns a tuple containing:
 def open_uri(uri,
             bint input=True,
             type newviewtype=View,
-             dms_only=False):
+             dms_only=False,
             force_file=False):
    cdef bytes urib = tobytes(uri)
    cdef bytes scheme
@ -195,9 +196,9 @@ def open_uri(uri,
    if 'obi' not in config:
        config['obi']={}
-    try:
+    if not force_file and "defaultdms" in config["obi"]:
        default_dms=config["obi"]["defaultdms"]
-    except KeyError:
+    else:
        default_dms=None
    try:
--- a/python/obitools3/utils.pyx
+++ b/python/obitools3/utils.pyx
@ -72,7 +72,7 @@ cpdef int count_entries(file, bytes format):
                return -1
            mmapped_file = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
            total_count += len(re.findall(sep, mmapped_file))
-            if format != b"ngsfilter" and format != b"tabular":
+            if format != b"ngsfilter" and format != b"tabular" and format != b"embl" and format != b"genbank":
                total_count += 1 # adding +1 for 1st entry because separators include \n (ngsfilter and tabular already count one more because of last \n)
    except:
--- a/python/obitools3/version.py
+++ b/python/obitools3/version.py
@ -1,5 +1,5 @@
 major = 3
 minor = 0
-serial= '0-beta9'
+serial= '0-beta15'
 version ="%d.%02d.%s" % (major,minor,serial)
--- a/src/build_reference_db.c
+++ b/src/build_reference_db.c
@ -157,7 +157,7 @@ int build_reference_db(const char* dms_name,
 	ecotx_t* lca_2 = NULL;
 	ecotx_t* lca = NULL;
 	index_t idx1, idx2;
-	index_t i, j, k;
+	index_t i, j, k, count;
 	int32_t taxid_array_length;
 	int32_t score_array_length;
 	int32_t taxid_array_writable_length;
@ -185,6 +185,7 @@ int build_reference_db(const char* dms_name,
 	matrix_view_name = strcpy(matrix_view_name, o_view_name);
 	strcat(matrix_view_name, "_matrix");
 	fprintf(stderr, "Aligning queries with reference database...\n");
 	if (obi_lcs_align_one_column(dms_name,
 								 refs_view_name,
 								 "",
@ -320,13 +321,19 @@ int build_reference_db(const char* dms_name,
 		return -1;
 	}
 	count = (matrix_with_lca_view->infos)->line_count;
 	fprintf(stderr, "Computing LCAs...\n");
 	// Compute all the LCAs
 		// For each pair
-	for (i=0; i<(matrix_with_lca_view->infos)->line_count; i++)
+	for (i=0; i<count; i++)
 	{
 		if (! keep_running)
 			return -1;
 		if (i%1000 == 0)
 			fprintf(stderr,"\rDone : %f %%       ", (i / (float) count)*100);
 		// Read all taxids associated with the first sequence and compute their LCA
 		// Read line index
 		idx1 = obi_get_int_with_elt_idx_and_col_p_in_view(matrix_with_lca_view, matrix_idx1_column, i, 0);
@ -363,6 +370,7 @@ int build_reference_db(const char* dms_name,
 			return -1;
 		}
 	}
 	fprintf(stderr,"\rDone : 100 %%           \n");
 	// Clone refs view, add 2 arrays columns for lca and score, compute and write them
@ -442,13 +450,18 @@ int build_reference_db(const char* dms_name,
 		return -1;
 	}
 	fprintf(stderr, "Building LCA arrays...\n");
 	// For each sequence, look for all its alignments in the matrix, and for each different LCA taxid/score, order them and write them
 	// Going through matrix once, filling refs arrays on the go for efficiency
-	for (i=0; i<(matrix_with_lca_view->infos)->line_count; i++)
+	for (i=0; i<count; i++)
 	{
 		if (! keep_running)
 			return -1;
 		if (i%1000 == 0)
 			fprintf(stderr,"\rDone : %f %%       ", (i / (float) count)*100);
 		// Read ref line indexes
 		idx1 = obi_get_int_with_elt_idx_and_col_p_in_view(matrix_with_lca_view, matrix_idx1_column, i, 0);
 		idx2 = obi_get_int_with_elt_idx_and_col_p_in_view(matrix_with_lca_view, matrix_idx2_column, i, 0);
@ -464,6 +477,8 @@ int build_reference_db(const char* dms_name,
 		// Read alignment score
 		score = obi_get_float_with_elt_idx_and_col_p_in_view(matrix_with_lca_view, matrix_score_column, i, 0);
 		//fprintf(stderr, "\n\ntaxid_lca=%d, score=%f, idx1=%d, idx2=%d", taxid_lca, score, idx1, idx2);
 		///////////////// Compute for first sequence \\\\\\\\\\\\\\\\\\\\\\\     (TODO function)
 		// Read arrays
@ -480,9 +495,11 @@ int build_reference_db(const char* dms_name,
 //			return -1;
 //		}
 		//fprintf(stderr, "\n1st sequence");
 		// If empty, add values
 		if (taxid_array_length == 0)
 		{
 			//fprintf(stderr, "\nEmpty, add value");
 			if (obi_set_array_with_col_p_in_view(o_view, final_lca_taxid_a_column, idx1, &taxid_lca, (uint8_t) (obi_sizeof(OBI_INT) * 8), 1) < 0)
 			{
 				obidebug(1, "\nError setting a LCA taxid array in a column when building a reference database");
@ -496,6 +513,8 @@ int build_reference_db(const char* dms_name,
 		}
 		else
 		{
 			//fprintf(stderr, "\nNot empty");
 			j = 0;
 			modified = false;
 			while (j < taxid_array_length)
@ -509,6 +528,9 @@ int build_reference_db(const char* dms_name,
 						memcpy(score_array_writable, score_array, score_array_length*sizeof(obifloat_t));
 						modified = true;
 						//fprintf(stderr, "\nSame LCA, replace %d and %f with %d and %f", lca_taxid_array_writable[j],
 						//		score_array_writable[j], taxid_lca, score);
 						// Better score for the same LCA, replace this LCA/score pair
 						lca_taxid_array_writable[j] = taxid_lca;
 						score_array_writable[j] = score;
@ -535,6 +557,8 @@ int build_reference_db(const char* dms_name,
 				{
 					if (score > score_array[j])
 					{
 						//fprintf(stderr, "\nInsert new");
 						memcpy(lca_taxid_array_writable, lca_taxid_array, taxid_array_length*sizeof(obiint_t));
 						memcpy(score_array_writable, score_array, score_array_length*sizeof(obifloat_t));
 						modified = true;
@ -579,10 +603,15 @@ int build_reference_db(const char* dms_name,
 				memcpy(score_array_writable, score_array, score_array_length*sizeof(obifloat_t));
 				modified = true;
 				//fprintf(stderr, "\nAppend at the end");
 				// Append LCA
 				lca_taxid_array_writable[taxid_array_writable_length] = taxid_lca;
 				score_array_writable[score_array_writable_length] = score;
 				taxid_array_writable_length++;
 				score_array_writable_length++;
 				// Remove the previous (children) LCAs from the array if their score is equal or lower
 				while ((j>0) && (score_array_writable[j-1] <= score))
 				{
@ -603,6 +632,13 @@ int build_reference_db(const char* dms_name,
 			// Write new arrays
 			if (modified)
 			{
 //				fprintf(stderr, "\n\nnew array:");
 //				for (k=0;k<taxid_array_writable_length;k++)
 //				{
 //					lca = obi_taxo_get_taxon_with_taxid(tax, lca_taxid_array_writable[k]);
 //					fprintf(stderr, "\nLCA=%d, %s, score=%f", lca_taxid_array_writable[k], lca->name, score_array_writable[k]);
 //				}
 				if (obi_set_array_with_col_p_in_view(o_view, final_lca_taxid_a_column, idx1, lca_taxid_array_writable, (uint8_t) (obi_sizeof(OBI_INT) * 8), taxid_array_writable_length) < 0)
 				{
 					obidebug(1, "\nError setting a LCA taxid array in a column when building a reference database");
@ -632,9 +668,13 @@ int build_reference_db(const char* dms_name,
 //			return -1;
 //		}
 		//fprintf(stderr, "\n2nd sequence");
 		// If empty, add values
 		if (taxid_array_length == 0)
 		{
 			//fprintf(stderr, "\nEmpty, add value");
 			if (obi_set_array_with_col_p_in_view(o_view, final_lca_taxid_a_column, idx2, &taxid_lca, (uint8_t) (obi_sizeof(OBI_INT) * 8), 1) < 0)
 			{
 				obidebug(1, "\nError setting a LCA taxid array in a column when building a reference database");
@ -648,6 +688,8 @@ int build_reference_db(const char* dms_name,
 		}
 		else
 		{
 			//fprintf(stderr, "\nNot empty");
 			j = 0;
 			modified = false;
 			while (j < taxid_array_length)
@ -661,6 +703,9 @@ int build_reference_db(const char* dms_name,
 						memcpy(score_array_writable, score_array, score_array_length*sizeof(obifloat_t));
 						modified = true;
 						//fprintf(stderr, "\nSame LCA, replace %d and %f with %d and %f", lca_taxid_array_writable[j],
 						//		score_array_writable[j], taxid_lca, score);
 						// Better score for the same LCA, replace this LCA/score pair
 						lca_taxid_array_writable[j] = taxid_lca;
 						score_array_writable[j] = score;
@ -687,6 +732,8 @@ int build_reference_db(const char* dms_name,
 				{
 					if (score > score_array[j])
 					{
 						//fprintf(stderr, "\nInsert new");
 						memcpy(lca_taxid_array_writable, lca_taxid_array, taxid_array_length*sizeof(obiint_t));
 						memcpy(score_array_writable, score_array, score_array_length*sizeof(obifloat_t));
 						modified = true;
@ -727,6 +774,8 @@ int build_reference_db(const char* dms_name,
 			if (j == taxid_array_length) // same or parent LCA not found, need to be appended at the end
 			{
 				//fprintf(stderr, "\nAppend at the end");
 				memcpy(lca_taxid_array_writable, lca_taxid_array, taxid_array_length*sizeof(obiint_t));
 				memcpy(score_array_writable, score_array, score_array_length*sizeof(obifloat_t));
 				modified = true;
@ -735,6 +784,9 @@ int build_reference_db(const char* dms_name,
 				lca_taxid_array_writable[taxid_array_writable_length] = taxid_lca;
 				score_array_writable[score_array_writable_length] = score;
 				taxid_array_writable_length++;
 				score_array_writable_length++;
 				// Remove the previous (children) LCAs from the array if their score is equal or lower
 				while ((j>0) && (score_array_writable[j-1] <= score))
 				{
@ -769,11 +821,17 @@ int build_reference_db(const char* dms_name,
 			}
 		}
 	}
 	fprintf(stderr,"\rDone : 100 %%           \n");
 	fprintf(stderr, "Writing results...\n");
 	count = (o_view->infos)->line_count;
 	// Fill empty LCA informations (because filling from potentially sparse alignment matrix) with the sequence taxid
 	score=1.0;  // technically getting LCA of identical sequences
-	for (i=0; i<(o_view->infos)->line_count; i++)
+	for (i=0; i<count; i++)
 	{
 		if (i%1000 == 0)
 			fprintf(stderr,"\rDone : %f %%       ", (i / (float) count)*100);
 		obi_get_array_with_col_p_in_view(o_view, final_lca_taxid_a_column, i, &taxid_array_length);
 		if (taxid_array_length == 0)  // no LCA set
 		{
@ -799,6 +857,7 @@ int build_reference_db(const char* dms_name,
 			}
 		}
 	}
 	fprintf(stderr,"\rDone : 100 %%           \n");
 	// Add information about the threshold used to build the DB
 	snprintf(threshold_str, 5, "%f", threshold);
@ -858,7 +917,6 @@ int build_reference_db(const char* dms_name,
 	free(matrix_view_name);
 	free(matrix_with_lca_view_name);
 	fprintf(stderr,"\rDone : 100 %%           \n");
 	return 0;
 }
--- a/src/obi_ecopcr.c
+++ b/src/obi_ecopcr.c
@ -105,7 +105,8 @@ static int create_output_columns(Obiview_p o_view, bool kingdom_mode);
 * @param o_temp1_column A pointer on the output column for the temperature for the first primer.
 * @param o_temp2_column A pointer on the output column for the temperature for the second primer.
 *
- * @retval 0 if the operation was successfully completed.
+ * @retval 0 if the sequence was skipped (taxid not found, warning printed).
 * @retval 1 if the sequence was successfully printed to the output.
 * @retval -1 if an error occurred.
 *
 * @since July 2018
@ -366,6 +367,17 @@ static int print_seq(Obiview_p i_view, Obiview_p o_view,
 	// TODO add check for primer longer than MAX_PAT_LEN (32)
 	// Get sequence id
 	seq_id = obi_get_str_with_elt_idx_and_col_p_in_view(i_view, i_id_column, i_idx, 0);
 	// Get the taxon structure
 	main_taxon = obi_taxo_get_taxon_with_taxid(taxonomy, taxid);
 	if (main_taxon == NULL)
 	{
 		obidebug(1, "\nWarning: error reading the taxonomic information of a sequence. Seq id: %s, taxid: %d. Probably deprecated taxid. Skipping this sequence.", seq_id, taxid);
 		return 0;
 	}
 	ldelta = (pos1 <= keep_nucleotides)?pos1:keep_nucleotides;
 	rdelta = ((pos2+keep_nucleotides)>=seq_len)?seq_len-pos2:keep_nucleotides;
@ -431,16 +443,7 @@ static int print_seq(Obiview_p i_view, Obiview_p o_view,
 	if (isnan(tm2))
 		tm2 = OBIFloat_NA;
 	// Get the taxon structure
 	main_taxon = obi_taxo_get_taxon_with_taxid(taxonomy, taxid);
 	if (main_taxon == NULL)
 	{
 		obidebug(1, "\nError reading the taxonomic information of a sequence");
 		return -1;
 	}
 	// Write sequence id
 	seq_id = obi_get_str_with_elt_idx_and_col_p_in_view(i_view, i_id_column, i_idx, 0);
 	if (obi_set_str_with_elt_idx_and_col_p_in_view(o_view, o_id_column, o_idx, 0, seq_id) < 0)
 	{
 		obidebug(1, "\nError writing the sequence id");
@ -629,7 +632,7 @@ static int print_seq(Obiview_p i_view, Obiview_p o_view,
 		return -1;
 	}
-	return 0;
+	return 1;
 }
@ -698,6 +701,7 @@ int obi_ecopcr(const char* i_dms_name,
 	obiint_t      taxid;
 	char*         sequence;
 	int			  printed;
 	SeqPtr        apatseq=NULL;
 	int32_t       o1Hits;
@ -1057,14 +1061,14 @@ int obi_ecopcr(const char* i_dms_name,
 											length = 0;
 											if (posj > posi)
 												length = posj - posi - o1->patlen - o2->patlen;
-											if (posj < posi)
+											else if (circular > 0)
 												length = posj + apatseq->seqlen - posi - o1->patlen - o2->patlen;
 											if ((length>0) &&	// For when primers touch or overlap
 												(!min_len || (length >= min_len)) &&
 												(!max_len || (length <= max_len)))
 											{
 												// Print the found amplicon
-												if (print_seq(i_view, o_view,
+												printed = print_seq(i_view, o_view,
 														  	  i_idx, o_idx,
 															  taxonomy,
 															  sequence,
@ -1090,12 +1094,14 @@ int obi_ecopcr(const char* i_dms_name,
 															  o_strand_column,
 															  o_primer1_column, o_primer2_column,
 															  o_error1_column, o_error2_column,
-															  o_temp1_column,  o_temp2_column) < 0)
+															  o_temp1_column,  o_temp2_column);
 												if (printed < 0)
 												{
 													obidebug(1, "\nError writing the ecopcr result");
 													return -1;
 												}
-												o_idx++;
+												else if (printed > 0)
 													o_idx++;
 											}
 										}
 									}
@ -1145,14 +1151,14 @@ int obi_ecopcr(const char* i_dms_name,
 											length = 0;
 											if (posj > posi)
 												length = posj - posi + 1  - o2->patlen - o1->patlen; /* - o1->patlen : deleted by <EC> (prior to the OBITools3) */
-											if (posj < posi)
+											else if (circular > 0)
 												length = posj + apatseq->seqlen - posi - o1->patlen - o2->patlen;
 											if ((length>0) &&	// For when primers touch or overlap
 												(!min_len || (length >= min_len)) &&
 												(!max_len || (length <= max_len)))
 											{
 												// Print the found amplicon
-												if (print_seq(i_view, o_view,
+												printed = print_seq(i_view, o_view,
 														  	  i_idx, o_idx,
 															  taxonomy,
 															  sequence,
@ -1178,12 +1184,14 @@ int obi_ecopcr(const char* i_dms_name,
 															  o_strand_column,
 															  o_primer1_column, o_primer2_column,
 															  o_error1_column, o_error2_column,
-															  o_temp1_column,  o_temp2_column) < 0)
+															  o_temp1_column,  o_temp2_column);
 												if (printed < 0)
 												{
 													obidebug(1, "\nError writing the ecopcr result");
 													return -1;
 												}
-												o_idx++;
+												else if (printed > 0)
 													o_idx++;
 											}
 										}
 									}
@ -1224,7 +1232,7 @@ int obi_ecopcr(const char* i_dms_name,
 		return -1;
 	}
-	fprintf(stderr,"\rDone : 100 %%           ");
+	fprintf(stderr,"\rDone : 100 %%           \n");
 	return 0;
 	return 0;
--- a/src/obi_ecopcr.h
+++ b/src/obi_ecopcr.h
@ -81,8 +81,8 @@
 * @param o_dms_name The path to the output DMS.
 * @param o_view_name The name of the output view.
 * @param o_view_comments The comments to associate with the output view.
- * @param primer1 The first primer.
+ * @param primer1 The first primer, length must be less than or equal to 32 (because of apat lib limitation).
- * @param primer2 The second primer.
+ * @param primer2 The second primer, length must be less than or equal to 32 (because of apat lib limitation).
 * @param error_max The maximum number of errors allowed per primer for amplification.
 * @param min_len The minimum length of an amplicon.
 * @param max_len The maximum length of an amplicon.
--- a/src/obi_ecotag.c
+++ b/src/obi_ecotag.c
@ -100,35 +100,35 @@ int print_assignment_result(Obiview_p output_view, index_t line,
 static int create_output_columns(Obiview_p o_view)
 {
 	// Score column
-	if (obi_view_add_column(o_view, ECOTAG_SCORE_COLUMN_NAME, -1, NULL, OBI_FLOAT, 0, 1, NULL, false, false, false, NULL, NULL, -1, ECOTAG_SCORE_COLUMN_NAME, true) < 0)
+	if (obi_view_add_column(o_view, ECOTAG_SCORE_COLUMN_NAME, -1, NULL, OBI_FLOAT, 0, 1, NULL, false, false, false, NULL, NULL, -1, "{}", true) < 0)
 	{
 		obidebug(1, "\nError creating the column for the score in ecotag");
 		return -1;
 	}
 	// Assigned taxid column
-	if (obi_view_add_column(o_view, ECOTAG_TAXID_COLUMN_NAME, -1, NULL, OBI_INT, 0, 1, NULL, false, false, false, NULL, NULL, -1, ECOTAG_TAXID_COLUMN_NAME, true) < 0)
+	if (obi_view_add_column(o_view, ECOTAG_TAXID_COLUMN_NAME, -1, NULL, OBI_INT, 0, 1, NULL, false, false, false, NULL, NULL, -1, "{}", true) < 0)
 	{
 		obidebug(1, "\nError creating the column for the assigned taxid in ecotag");
 		return -1;
 	}
 	// Assigned scientific name column
-	if (obi_view_add_column(o_view, ECOTAG_NAME_COLUMN_NAME, -1, NULL, OBI_STR, 0, 1, NULL, false, false, false, NULL, NULL, -1, ECOTAG_NAME_COLUMN_NAME, true) < 0)
+	if (obi_view_add_column(o_view, ECOTAG_NAME_COLUMN_NAME, -1, NULL, OBI_STR, 0, 1, NULL, false, false, false, NULL, NULL, -1, "{}", true) < 0)
 	{
 		obidebug(1, "\nError creating the column for the assigned scientific name in ecotag");
 		return -1;
 	}
 	// Assignement status column
-	if (obi_view_add_column(o_view, ECOTAG_STATUS_COLUMN_NAME, -1, NULL, OBI_BOOL, 0, 1, NULL, false, false, false, NULL, NULL, -1, ECOTAG_STATUS_COLUMN_NAME, true) < 0)
+	if (obi_view_add_column(o_view, ECOTAG_STATUS_COLUMN_NAME, -1, NULL, OBI_BOOL, 0, 1, NULL, false, false, false, NULL, NULL, -1, "{}", true) < 0)
 	{
 		obidebug(1, "\nError creating the column for the assignment status in ecotag");
 		return -1;
 	}
 	// Column for array of best match ids
-	if (obi_view_add_column(o_view, ECOTAG_BEST_MATCH_IDS_COLUMN_NAME, -1, NULL, OBI_STR, 0, 1, NULL, false, true, false, NULL, NULL, -1, ECOTAG_BEST_MATCH_IDS_COLUMN_NAME, true) < 0)
+	if (obi_view_add_column(o_view, ECOTAG_BEST_MATCH_IDS_COLUMN_NAME, -1, NULL, OBI_STR, 0, 1, NULL, false, true, false, NULL, NULL, -1, "{}", true) < 0)
 	{
 		obidebug(1, "\nError creating the column for the array of ids of the best match in ecotag");
 		return -1;
@ -455,7 +455,7 @@ int obi_ecotag(const char* dms_name,
 	for (i=0; i < query_count; i++)
 	{
-		if (i%100 == 0)
+		if (i%1000 == 0)
 			fprintf(stderr,"\rDone : %f %%       ", (i / (float) query_count)*100);
 		best_match_count = 0;
@ -562,7 +562,7 @@ int obi_ecotag(const char* dms_name,
 			score_array = obi_get_array_with_col_p_in_view(ref_view, score_a_column, best_match_idx, &lca_array_length);
 			k = 0;
-			while ((k < lca_array_length) && (score_array[k] >= ecotag_threshold))
+			while ((k < lca_array_length) && (score_array[k] >= best_score))
 				k++;
 			if (k>0)
@ -570,12 +570,12 @@ int obi_ecotag(const char* dms_name,
 				lca_array = obi_get_array_with_col_p_in_view(ref_view, lca_taxid_a_column, best_match_idx, &lca_array_length);
 				if (j>0)
 				{
-					lca = obi_taxo_get_taxon_with_taxid(taxonomy, lca_taxid);
+//					lca = obi_taxo_get_taxon_with_taxid(taxonomy, lca_taxid);
-					if (lca == NULL)
+//					if (lca == NULL)
-					{
+//					{
-						obidebug(1, "\nError getting a taxon from a taxid when doing taxonomic assignment");
+//						obidebug(1, "\nError getting a taxon from a taxid when doing taxonomic assignment");
-						return -1;
+//						return -1;
-					}
+//					}
 					lca_in_array = obi_taxo_get_taxon_with_taxid(taxonomy, lca_array[k-1]);
 					if (lca_in_array == NULL)
 					{
--- a/src/obidms_taxonomy.c
+++ b/src/obidms_taxonomy.c
@ -2376,9 +2376,10 @@ int read_merged_dmp(const char* taxdump, OBIDMS_taxonomy_p tax, int32_t* delnode
 				// and the deleted taxids with no current reference. An element of the list is composed of the taxid, and the index
 				// of the taxon in the taxa structure, or -1 for deleted taxids.
 				// Creating the merged list requires to merge the 3 ordered lists into one.
-				while (((nT < (tax->taxa)->count) && ((tax->taxa)->taxon[nT].taxid < old_taxid)) || ((nD >= 0) && (delnodes[nD] < old_taxid)))
+				while (((nT < (tax->taxa)->count) && ((tax->taxa)->taxon[nT].taxid < old_taxid)) ||
 						((nD >= 0) && (delnodes[nD] < old_taxid)))
 				{
-					if ((tax->taxa)->taxon[nT].taxid < delnodes[nD])
+					if ((nT < (tax->taxa)->count) && (tax->taxa)->taxon[nT].taxid < delnodes[nD])
 					{ // Add element from taxa list
 						// Enlarge structure if needed
 						if (n == buffer_size)
@ -2401,7 +2402,7 @@ int read_merged_dmp(const char* taxdump, OBIDMS_taxonomy_p tax, int32_t* delnode
 						nT++;
 						n++;
 					}
-					else if (delnodes[nD] < (tax->taxa)->taxon[nT].taxid)
+					else
 					{ // Add element from deleted taxids list
 						// Enlarge structure if needed
 						if (n == buffer_size)
@ -3036,12 +3037,12 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
 	strcpy(tax->tax_name, taxonomy_name);
 	buffer_size = 2048;
 	taxonomy_path = get_taxonomy_path(dms, taxonomy_name);
 	if (taxonomy_path == NULL)
 		return NULL;
 	buffer_size = strlen(taxonomy_path) + strlen(taxonomy_name) + 6;
 	// Read ranks
 	ranks_file_name = (char*) malloc(buffer_size*sizeof(char));
 	if (ranks_file_name == NULL)
--- a/src/obidmscolumn.c
+++ b/src/obidmscolumn.c
@ -1973,7 +1973,11 @@ int obi_enlarge_column(OBIDMS_column_p column)
 	// Calculate the new file size
 	old_line_count = (column->header)->line_count;
-	new_line_count = old_line_count * COLUMN_GROWTH_FACTOR;
+	new_line_count = ceil((double) old_line_count * (double) COLUMN_GROWTH_FACTOR);
 	if (new_line_count > old_line_count+100000)
 		new_line_count = old_line_count+100000;
 	else if (new_line_count < old_line_count+1000)
 		new_line_count = old_line_count+1000;
 	if (new_line_count > MAXIMUM_LINE_COUNT)
 	{
@ -2381,6 +2385,54 @@ char* obi_get_elements_names(OBIDMS_column_p column)
 }
 char* obi_get_formatted_elements_names(OBIDMS_column_p column)
 {
 	char* elements_names;
 	int   i, j;
 	int   elt_idx;
 	int   len;
 	elements_names = (char*) malloc(((column->header)->elements_names_length + (column->header)->nb_elements_per_line) * sizeof(char));
 	if (elements_names == NULL)
 	{
 		obi_set_errno(OBI_MALLOC_ERROR);
 		obidebug(1, "\nError allocating memory for elements names");
 		return NULL;
 	}
 	j = 0;
 	for (i=0; i < (column->header)->nb_elements_per_line; i++)
 	{
 		elt_idx = ((column->header)->elements_names_idx)[i];
 		len = strlen(((column->header)->elements_names)+elt_idx);
 		memcpy(elements_names+j, ((column->header)->elements_names)+elt_idx, len*sizeof(char));
 		j = j + len;
 		elements_names[j] = ';';
 		j++;
 		elements_names[j] = ' ';
 		j++;
 	}
 	elements_names[j - 1] = '\0';
 	return elements_names;
 }
 char* obi_column_formatted_infos(OBIDMS_column_p column)
 {
 	char* column_infos;
 	char* elt_names;
 	column_infos = malloc(1024 * sizeof(char));
 	elt_names = obi_get_formatted_elements_names(column);
 	free(elt_names);
 	return column_infos;
 }
 int obi_column_prepare_to_set_value(OBIDMS_column_p column, index_t line_nb, index_t elt_idx)
 {
--- a/src/obidmscolumn.h
+++ b/src/obidmscolumn.h
@ -505,6 +505,14 @@ index_t obi_column_get_element_index_from_name(OBIDMS_column_p column, const cha
 char* obi_get_elements_names(OBIDMS_column_p column);
 // TODO
 //char* obi_get_formatted_elements_names(OBIDMS_column_p column);
 // TODO
 //char* obi_column_formatted_infos(OBIDMS_column_p column);
 /**
 * @brief Prepares a column to set a value.
 *
--- a/src/obiview.c
+++ b/src/obiview.c
@ -1407,7 +1407,7 @@ static char* view_check_qual_match_seqs(Obiview_p view)
 						// Test that the lengths of the quality and the sequence are equal
 						if ((size_t)qual_len != strlen(seq))
 						{
-							obidebug(1, "\nError checking the predicate for view %s: The sequences and sequence quality arrays match.", (view->infos)->name);
+							obidebug(1, "\nError checking the predicate for view %s: The sequences and sequence quality arrays match (index %lld, seq=%s, quality length = %d).", (view->infos)->name, j, seq, qual_len);
 							return NULL;
 						}
 					}
--- a/src/sse_banded_LCS_alignment.c
+++ b/src/sse_banded_LCS_alignment.c
@ -686,6 +686,9 @@ int calculateSizeToAllocate(int maxLen, int LCSmin)
 	size *=  3;
 	size +=  16;
 	size += 10;  // band-aid for memory bug I don't understand (triggered on specific db on ubuntu)
 				 // bug might have to do with the way different systems behave when aligning the address in obi_get_memory_aligned_on_16
 	return(size*sizeof(int16_t));
 }
Author	SHA1	Message	Date
Celine Mercier	974528b2e6	build_ref_db: fixed bug erasing some of the higher LCAs (i.e. lowest similarities)	2020-04-28 15:56:06 +02:00
Celine Mercier	1b346b54f9	ecotag: better specificity by now correctly looking for similarities within refs above best score instead of ecotag threshold	2020-04-28 15:10:07 +02:00
Celine Mercier	058f2ad8b3	ecopcr: fixed a bug where sequences were considered circular (generating false positives)	2020-04-27 14:44:35 +02:00
Celine Mercier	60bfd3ae8d	obi annotate: now defaults to setting str if expression is not valid	2020-04-24 11:35:20 +02:00
Celine Mercier	67bdee105a	C: build_ref_db: added progress display for each step	2020-04-18 14:24:08 +02:00
Celine Mercier	0f745e0113	C: Columns: optimizing column file growth	2020-04-18 13:55:47 +02:00
cmercier	da8de52ba4	export: fixed progress bar bug	2020-04-17 15:09:10 +02:00
cmercier	4d36538c6e	C: SSE lcs alignment: band-aid for memory bug I don't understand (triggered on specific db on ubuntu)	2020-04-17 15:07:52 +02:00
Celine Mercier	8d0b17d87d	Switch to version 3.0.0-beta14	2020-04-15 17:47:26 +02:00
Celine Mercier	343999a627	Taxonomy: fixed a critical memory bug when building the list of merged taxids	2020-04-15 17:46:13 +02:00
Celine Mercier	e9a40630e9	C: Columns: rounding column growth to ceil to avoid looping on small values	2020-04-13 19:02:10 +02:00
Celine Mercier	8dbcd3025a	C: Columns: reduced column growth factor from 2 to 1.3 to avoid errno28	2020-04-13 14:47:56 +02:00
Celine Mercier	4cf635d001	Switch to version 3.0.0-beta13	2020-04-12 17:42:58 +02:00
Celine Mercier	b7e7cc232a	Made completion script cleaner	2020-04-12 17:41:59 +02:00
Celine Mercier	b6ab792ceb	C: made error message more detailed when checking that sequences and qualities match	2020-04-12 17:40:24 +02:00
Celine Mercier	ddea5a2964	obi import: fixed inconsequential error when precomputing number of entries in some formats	2020-04-12 17:38:42 +02:00
Celine Mercier	30852ab7d5	View bash history: removed useless shebang	2020-04-12 17:36:04 +02:00
Celine Mercier	4d0299904e	all commands (almost): cleaner DMS closing at the end	2020-04-12 17:31:58 +02:00
Celine Mercier	eef5156d95	obi stats: fixed error when printing bool keys	2020-04-12 17:12:04 +02:00
Celine Mercier	e62c991bbc	goes with previous commit	2020-04-10 11:22:26 +02:00
Celine Mercier	1218eed7fd	ecopcr: now printing a warning instead of interrupting with an error when a taxid is not found	2020-04-10 11:22:04 +02:00
Celine Mercier	cd9cea8c97	obi import: fixed critical bug where the last entry of embl and genbank files was not imported	2020-04-09 19:26:27 +02:00
Celine Mercier	98cfb70d73	ecopcr: made some errors more informative	2020-04-09 09:15:28 +02:00
Celine Mercier	b9f68c76c8	ecopcr: added warnings and check of primer length (related to #75 )	2020-04-05 18:40:56 +02:00
Celine Mercier	0b98371688	ngsfilter: added warning about primer length in -h (#75 )	2020-04-05 18:39:20 +02:00
Celine Mercier	f0d152fcbd	ngsfilter: now checking primer length (fixes #75 )	2020-04-05 18:29:10 +02:00
Celine Mercier	8019dee68e	ecotag: now closing all DMS properly	2020-04-05 13:20:49 +02:00
Celine Mercier	0b4a234671	Swich to version 3.0.0-beta11	2020-02-12 14:23:42 +01:00
Celine Mercier	d32cfdcce5	ecotag: fixed the generated column comments formatting that would generate errors	2020-02-12 14:23:17 +01:00
Celine Mercier	219c0d6fdc	obi cat: Fixed the handling when concatenating views with dictionaries having different key sets	2020-02-12 14:21:39 +01:00
Celine Mercier	dc9f897917	switch to version 3.0.0-beta10	2020-02-02 21:15:27 +01:00
Celine Mercier	bb72682f7d	obi import: new option --preread to do a first readthrough of the dataset if it contains huge dictionaries for a much faster import.	2020-02-02 21:12:34 +01:00
Celine Mercier	52920c3c71	URI decoding: dirty temp fix for bug where default dms makes a mess when should guess file	2020-02-02 21:11:05 +01:00