Switch to version 3.0.0b19

Cleaner installation
obi import: fixed to work with seq genbank extension
2020-05-20 10:29:36 +02:00 · 2020-05-20 10:29:12 +02:00 · 2020-05-20 10:28:14 +02:00 · 2020-05-18 17:08:55 +02:00 · 2020-05-18 17:08:09 +02:00 · 2020-05-18 17:06:58 +02:00
15 changed files with 138 additions and 79 deletions
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -6,7 +6,7 @@ recursive-include doc/sphinx/source *.txt *.rst *.py
 recursive-include doc/sphinx/sphinxext *.py
 include doc/sphinx/Makefile
 include doc/sphinx/Doxyfile
-include README.txt
+include README.md
 include requirements.txt
 include scripts/obi

--- a/python/obitools3/commands/annotate.pyx
+++ b/python/obitools3/commands/annotate.pyx
@ -13,7 +13,8 @@ from obitools3.dms.capi.obiview cimport NUC_SEQUENCE_COLUMN, \
                                        ID_COLUMN, \
                                        DEFINITION_COLUMN, \
                                        QUALITY_COLUMN, \
-                                        COUNT_COLUMN
+                                        COUNT_COLUMN, \
+                                        TAXID_COLUMN

 import time
 import math 
@ -175,8 +176,8 @@ def sequenceTaggerGenerator(config, taxo=None):
            counter[0]+=1
                            
        for rank in annoteRank:
-            if 'taxid' in seq:
-                taxid = seq['taxid']
+            if TAXID_COLUMN in seq:
+                taxid = seq[TAXID_COLUMN]
                if taxid is not None:
                    rtaxid = taxo.get_taxon_at_rank(taxid, rank)
                    if rtaxid is not None:
--- a/python/obitools3/commands/ecotag.pyx
+++ b/python/obitools3/commands/ecotag.pyx
@ -107,8 +107,8 @@ def run(config):
    comments = View.print_config(config, "ecotag", command_line, input_dms_name=input_dms_name, input_view_name=input_view_name)

    if obi_ecotag(i_dms.name_with_full_path, tobytes(i_view_name), \
-                  tobytes(ref_dms_name), tobytes(ref_view_name), \
-                  tobytes(taxo_dms_name), tobytes(taxonomy_name), \
+                  ref_dms.name_with_full_path, tobytes(ref_view_name), \
+                  taxo_dms.name_with_full_path, tobytes(taxonomy_name), \
                  tobytes(o_view_name), comments, 
                  config['ecotag']['threshold']) < 0:
        raise Exception("Error running ecotag")
--- a/python/obitools3/commands/import.pyx
+++ b/python/obitools3/commands/import.pyx
@ -25,7 +25,8 @@ from obitools3.dms.capi.obiview cimport VIEW_TYPE_NUC_SEQS, \
                                        DEFINITION_COLUMN, \
                                        QUALITY_COLUMN, \
                                        COUNT_COLUMN, \
-                                        TAXID_COLUMN
+                                        TAXID_COLUMN, \
+                                        MERGED_PREFIX
                                        
 from obitools3.dms.capi.obidms cimport obi_import_view

@ -163,7 +164,7 @@ def run(config):
        taxo.write(taxo_name)
        taxo.close()
        o_dms.record_command_line(" ".join(sys.argv[1:]))
-        o_dms.close()
+        o_dms.close(force=True)
        logger("info", "Done.")
        return

@ -217,11 +218,14 @@ def run(config):
                logger("info", "Read %d entries", i)
    
            for tag in entry :
+                newtag = tag
+                if tag[:7] == b"merged_":
+                    newtag = MERGED_PREFIX+tag[7:]
                if type(entry[tag]) == dict :
                    if tag in dict_dict:
-                        dict_dict[tag][0].update(entry[tag].keys())
+                        dict_dict[newtag][0].update(entry[tag].keys())
                    else:
-                        dict_dict[tag] = [set(entry[tag].keys()), get_obitype(entry[tag])]
+                        dict_dict[newtag] = [set(entry[tag].keys()), get_obitype(entry[tag])]
            i+=1
        
        if pb is not None:
@ -232,7 +236,7 @@ def run(config):
            dcols[tag] = (Column.new_column(view, tag, dict_dict[tag][1], \
                              nb_elements_per_line=len(dict_dict[tag][0]), \
                              elements_names=list(dict_dict[tag][0])), \
-                          value_obitype)
+                          dict_dict[tag][1])
    
        
        # Reinitialize the input
@ -288,6 +292,8 @@ def run(config):
                    tag = TAXID_COLUMN
                if tag == b"count":
                    tag = COUNT_COLUMN
+                if tag[:7] == b"merged_":
+                    tag = MERGED_PREFIX+tag[7:]
                    
                if tag not in dcols :
                     
@ -328,8 +334,8 @@ def run(config):
                    try:
                        # Check that it's not the case where the first entry contained a dict of length 1 and now there is a new key                        
                        if type(value) == dict and \
-                            dcols[tag][0].nb_elements_per_line == 1 and len(value.keys()) == 1 \
-                            and dcols[tag][0].elements_names[0] != list(value.keys())[0] :
+                            dcols[tag][0].nb_elements_per_line == 1 \
+                            and set(dcols[tag][0].elements_names) != set(value.keys()) :
                            raise IndexError  # trigger column rewrite
                        
                        # Fill value
@ -402,7 +408,7 @@ def run(config):
    except AttributeError:
        pass
    try:
-        output[0].close()
+        output[0].close(force=True)
    except AttributeError:
        pass

--- a/python/obitools3/commands/ngsfilter.pyx
+++ b/python/obitools3/commands/ngsfilter.pyx
@ -42,8 +42,8 @@ def addOptions(parser):
                     metavar="<URI>",
                     type=str,
                     default=None,
-                     help="URI to the view containing the samples definition (with tags, primers, sample names,...)"
-                          "Warning: primer lengths must be less than or equal to 32")
+                     help="URI to the view containing the samples definition (with tags, primers, sample names,...).\n"
+                          "\nWarning: primer lengths must be less than or equal to 32")

    group.add_argument('-R', '--reverse-reads',
                     action="store", dest="ngsfilter:reverse",
--- a/python/obitools3/commands/uniq.pxd
+++ b/python/obitools3/commands/uniq.pxd
@ -5,5 +5,5 @@ from obitools3.dms.taxo.taxo cimport Taxonomy
 from obitools3.dms.view.typed_view.view_NUC_SEQS cimport View_NUC_SEQS


-cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy)
-cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, list mergedKeys_list=*, Taxonomy taxonomy=*, bint mergeIds=*, list categories=*, int max_elts=*)
+cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy, dict config)
+cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, dict config, list mergedKeys_list=*, Taxonomy taxonomy=*, bint mergeIds=*, list categories=*, int max_elts=*)
--- a/python/obitools3/commands/uniq.pyx
+++ b/python/obitools3/commands/uniq.pyx
@ -56,7 +56,7 @@ def addOptions(parser):
                             "(option can be used several times).")


-cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
+cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy, dict config) :
    
    cdef int             taxid
    cdef Nuc_Seq_Stored  seq
@ -69,7 +69,7 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
    cdef object          gn_sn
    cdef object          fa_sn

-    # Create columns
+    # Create columns and save them for efficiency
    if b"species" in o_view and o_view[b"species"].data_type_int != OBI_INT :
        o_view.delete_column(b"species")
    if b"species" not in o_view:
@ -77,6 +77,7 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
                          b"species", 
                          OBI_INT
                         )
+    species_column = o_view[b"species"]

    if b"genus" in o_view and o_view[b"genus"].data_type_int != OBI_INT :
        o_view.delete_column(b"genus")
@ -85,6 +86,7 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
                          b"genus", 
                          OBI_INT
                         )
+    genus_column = o_view[b"genus"]

    if b"family" in o_view and o_view[b"family"].data_type_int != OBI_INT :
        o_view.delete_column(b"family")
@ -93,6 +95,7 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
                          b"family", 
                          OBI_INT
                         )
+    family_column = o_view[b"family"]

    if b"species_name" in o_view and o_view[b"species_name"].data_type_int != OBI_STR :
        o_view.delete_column(b"species_name")
@ -101,6 +104,7 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
                          b"species_name", 
                          OBI_STR
                         )
+    species_name_column = o_view[b"species_name"]

    if b"genus_name" in o_view and o_view[b"genus_name"].data_type_int != OBI_STR :
        o_view.delete_column(b"genus_name")
@ -109,6 +113,7 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
                          b"genus_name", 
                          OBI_STR
                         )
+    genus_name_column = o_view[b"genus_name"]

    if b"family_name" in o_view and o_view[b"family_name"].data_type_int != OBI_STR :
        o_view.delete_column(b"family_name")
@ -117,6 +122,7 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
                          b"family_name", 
                          OBI_STR
                         )
+    family_name_column = o_view[b"family_name"]

    if b"rank" in o_view and o_view[b"rank"].data_type_int != OBI_STR :
        o_view.delete_column(b"rank")
@ -125,6 +131,7 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
                          b"rank", 
                          OBI_STR
                         )
+    rank_column = o_view[b"rank"]

    if b"scientific_name" in o_view and o_view[b"scientific_name"].data_type_int != OBI_STR :
        o_view.delete_column(b"scientific_name")
@ -133,9 +140,15 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
                          b"scientific_name", 
                          OBI_STR
                         )
-        
-    for seq in o_view:     
-        PyErr_CheckSignals()   
+    scientific_name_column = o_view[b"scientific_name"]  
+    
+    # Initialize the progress bar
+    pb = ProgressBar(len(o_view), config, seconde=5)
+    
+    i=0
+    for seq in o_view:
+        PyErr_CheckSignals()
+        pb(i)
        if MERGED_TAXID_COLUMN in seq :
            m_taxids = []            
            m_taxids_dict = seq[MERGED_TAXID_COLUMN]
@ -165,20 +178,23 @@ cdef merge_taxonomy_classification(View_NUC_SEQS o_view, Taxonomy taxonomy) :
            else:
                fa_sn = None
                tfa = None
-                
-            seq[b"species"] = tsp
-            seq[b"genus"] = tgn
-            seq[b"family"] = tfa
-                
-            seq[b"species_name"] = sp_sn
-            seq[b"genus_name"] = gn_sn
-            seq[b"family_name"] = fa_sn
            
-            seq[b"rank"] = taxonomy.get_rank(taxid)
-            seq[b"scientific_name"] = taxonomy.get_scientific_name(taxid)
+            species_column[i] = tsp
+            genus_column[i] = tgn
+            family_column[i] = tfa
+                
+            species_name_column[i] = sp_sn
+            genus_name_column[i] = gn_sn
+            family_name_column[i] = fa_sn
+            
+            rank_column[i] = taxonomy.get_rank(taxid)
+            scientific_name_column[i] = taxonomy.get_scientific_name(taxid)
+        i+=1
+    
+    pb(len(o_view), force=True)
+    

-
-cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, list mergedKeys_list=None, Taxonomy taxonomy=None, bint mergeIds=False, list categories=None, int max_elts=1000000) :
+cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, dict config, list mergedKeys_list=None, Taxonomy taxonomy=None, bint mergeIds=False, list categories=None, int max_elts=1000000) :
     
    cdef int            i
    cdef int            k
@ -187,6 +203,7 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
    cdef int            u_idx
    cdef int            i_idx
    cdef int            i_count
+    cdef int            o_count
    cdef str            key_str
    cdef bytes          key
    cdef bytes          mkey
@ -209,7 +226,6 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
    cdef Nuc_Seq_Stored i_seq
    cdef Nuc_Seq_Stored o_seq
    cdef Nuc_Seq_Stored u_seq
-    cdef Column         i_col
    cdef Column         i_seq_col
    cdef Column         i_id_col
    cdef Column         i_taxid_col
@ -217,6 +233,8 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
    cdef Column         o_id_col
    cdef Column         o_taxid_dist_col
    cdef Column         o_merged_col
+    cdef Column         o_count_col
+    cdef Column         i_count_col
    cdef Column_line    i_mcol  
    cdef object         taxid_dist_dict
    cdef object         iter_view
@ -252,7 +270,12 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
    mergedKeys_m = []
    for k in range(k_count):
        mergedKeys_m.append(MERGED_PREFIX + mergedKeys[k])
-     
+    
+    # Check that not trying to remerge without total count information
+    for key in mergedKeys_m:
+        if key in view and COUNT_COLUMN not in view:
+            raise Exception("\n>>>>\nError: trying to re-merge tags without total count tag. Run obi annotate to add the count tag from the relevant merged tag, i.e.: \nobi annotate --set-tag COUNT:'sum([value for key,value in sequence['MERGED_sample'].items()])' dms/input dms/output\n")
+    
    if categories is None:
        categories = []
 
@ -320,7 +343,11 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
    for k in range(k_count):
        key = mergedKeys[k]
        merged_col_name = mergedKeys_m[k]
-        i_col = view[key]
+        
+        if merged_col_name in view:
+            i_col = view[merged_col_name]
+        else:
+            i_col = view[key]
        
        if merged_infos[merged_col_name]['nb_elts'] > max_elts:
            str_merged_cols.append(merged_col_name)
@ -374,12 +401,19 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
                          alias=MERGED_COLUMN
                         )

-    # Keep columns that are going to be used a lot in variables 
+    # Keep columns in variables for efficiency
    o_id_col = o_view[ID_COLUMN]
    if TAXID_DIST_COLUMN in o_view:
        o_taxid_dist_col = o_view[TAXID_DIST_COLUMN]
    if MERGED_COLUMN in o_view:
        o_merged_col = o_view[MERGED_COLUMN]
+    if COUNT_COLUMN not in o_view:
+        Column.new_column(o_view,
+                          COUNT_COLUMN,
+                          OBI_INT)
+    o_count_col = o_view[COUNT_COLUMN]
+    if COUNT_COLUMN in view:
+        i_count_col = view[COUNT_COLUMN]
        
    pb(len(view), force=True)
    print("")
@ -407,7 +441,7 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
            merged_list = list(set(merged_list)) # deduplicate the list
            o_merged_col[o_idx] = merged_list

-        o_seq[COUNT_COLUMN] = 0
+        o_count = 0

        if TAXID_DIST_COLUMN in u_seq and i_taxid_dist_col[u_idx] is not None:
            taxid_dist_dict = i_taxid_dist_col[u_idx]
@ -423,12 +457,12 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
            i_id = i_id_col[i_idx]
            i_seq = view[i_idx]

-            if COUNT_COLUMN not in i_seq or i_seq[COUNT_COLUMN] is None:
+            if COUNT_COLUMN not in i_seq or i_count_col[i_idx] is None:
                i_count = 1
            else:
-                i_count = i_seq[COUNT_COLUMN]
+                i_count = i_count_col[i_idx]
 
-            o_seq[COUNT_COLUMN] += i_count
+            o_count += i_count
        
            for k in range(k_count):
                                
@ -463,44 +497,52 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, li
                                mcol[key2] = i_mcol[key2]
                            else:
                                mcol[key2] = mcol[key2] + i_mcol[key2]
-            
-            # Write taxid_dist
-            if mergeIds and TAXID_COLUMN in mergedKeys:
-                if TAXID_DIST_COLUMN in str_merged_cols:
-                    o_taxid_dist_col[o_idx] = str(taxid_dist_dict)
-                else:
-                    o_taxid_dist_col[o_idx] = taxid_dist_dict
-            
-            # Write merged dicts
-            for mkey in merged_dict: 
-                if mkey in str_merged_cols:
-                    mkey_cols[mkey][o_idx] = str(merged_dict[mkey])
-                else:
-                    mkey_cols[mkey][o_idx] = merged_dict[mkey]
-                    # Sets NA values to 0  # TODO discuss, for now keep as None and test for None instead of testing for 0 in tools
-                    #for key in mkey_cols[mkey][o_idx]:
-                    #    if mkey_cols[mkey][o_idx][key] is None:
-                    #        mkey_cols[mkey][o_idx][key] = 0
-                            
+                                        
            for key in i_seq.keys():
                # Delete informations that differ between the merged sequences
-                # TODO make special columns list?
+                # TODO make special columns list? // could be more efficient
                if key != COUNT_COLUMN and key != ID_COLUMN and key != NUC_SEQUENCE_COLUMN and key in o_seq and o_seq[key] != i_seq[key] \
                    and key not in merged_dict :
                    o_seq[key] = None

+        # Write merged dicts
+        for mkey in merged_dict: 
+            if mkey in str_merged_cols:
+                mkey_cols[mkey][o_idx] = str(merged_dict[mkey])
+            else:
+                mkey_cols[mkey][o_idx] = merged_dict[mkey]
+                # Sets NA values to 0  # TODO discuss, for now keep as None and test for None instead of testing for 0 in tools
+                #for key in mkey_cols[mkey][o_idx]:
+                #    if mkey_cols[mkey][o_idx][key] is None:
+                #        mkey_cols[mkey][o_idx][key] = 0
+
+        # Write taxid_dist
+        if mergeIds and TAXID_COLUMN in mergedKeys:
+            if TAXID_DIST_COLUMN in str_merged_cols:
+                o_taxid_dist_col[o_idx] = str(taxid_dist_dict)
+            else:
+                o_taxid_dist_col[o_idx] = taxid_dist_dict
+                
+        o_count_col[o_idx] = o_count
        o_idx += 1
    
+    pb(len(uniques), force=True)
+    
    # Deletes quality columns if there is one because the matching between sequence and quality will be broken (quality set to NA when sequence not)
    if QUALITY_COLUMN in view:
        o_view.delete_column(QUALITY_COLUMN)
    if REVERSE_QUALITY_COLUMN in view:
        o_view.delete_column(REVERSE_QUALITY_COLUMN)
    
+    # Delete old columns that are now merged
+    for k in range(k_count):
+        if mergedKeys[k] in o_view:
+            o_view.delete_column(mergedKeys[k])
+    
    if taxonomy is not None:
        print("")  # TODO because in the middle of progress bar. Better solution?
        logger("info", "Merging taxonomy classification")
-        merge_taxonomy_classification(o_view, taxonomy)
+        merge_taxonomy_classification(o_view, taxonomy, config)



@ -547,11 +589,10 @@ def run(config):
    pb = ProgressBar(len(entries), config, seconde=5)
    
    try:
-        uniq_sequences(entries, o_view, pb, mergedKeys_list=config['uniq']['merge'], taxonomy=taxo, mergeIds=config['uniq']['mergeids'], categories=config['uniq']['categories'], max_elts=config['obi']['maxelts'])       
+        uniq_sequences(entries, o_view, pb, config, mergedKeys_list=config['uniq']['merge'], taxonomy=taxo, mergeIds=config['uniq']['mergeids'], categories=config['uniq']['categories'], max_elts=config['obi']['maxelts'])       
    except Exception, e:
        raise RollbackException("obi uniq error, rollbacking view: "+str(e), o_view)
    
-    pb(len(entries), force=True)
    print("", file=sys.stderr)

    # Save command config in View and DMS comments
@ -567,8 +608,8 @@ def run(config):
    #print("\n\nOutput view:\n````````````", file=sys.stderr)
    #print(repr(o_view), file=sys.stderr)
    
-    input[0].close()
-    output[0].close()
+    input[0].close(force=True)
+    output[0].close(force=True)

    logger("info", "Done.")

--- a/python/obitools3/dms/view/view.pyx
+++ b/python/obitools3/dms/view/view.pyx
@ -531,8 +531,8 @@ cdef class View(OBIWrapper) :
        for level in self.view_history:
            command_list = [level[input][b"command_line"] for input in level.keys()]
            for command in command_list:
+                s+=b"obi "
                s+=command
-                s+=b"\n"
        return s


--- a/python/obitools3/parsers/fasta.pyx
+++ b/python/obitools3/parsers/fasta.pyx
@ -104,6 +104,7 @@ def fastaNucIterator(lineiterator,
    cdef bytes      sequence
    cdef int        skipped, ionly, read
    cdef Nuc_Seq    seq
+    cdef bint       stop
    
    if only is None:
        ionly = -1
@ -130,7 +131,8 @@ def fastaNucIterator(lineiterator,
    else:
        line = firstline       
    
-    while True:
+    stop=False
+    while not stop:
                
        if ionly >= 0 and read >= ionly:
            break
@ -153,7 +155,7 @@ def fastaNucIterator(lineiterator,
                s.append(line[0:-1])
                line = next(iterator)
        except StopIteration:
-            pass
+            stop=True
        
        sequence  = b"".join(s)        
        
--- a/python/obitools3/parsers/genbank.pyx
+++ b/python/obitools3/parsers/genbank.pyx
@ -171,6 +171,8 @@ def genbankIterator_dir(dir_path,
    read = 0
    read_files = 0
    files = [filename for filename in glob.glob(os.path.join(path, b'*.gbff*'))]
+    files.extend([filename for filename in glob.glob(os.path.join(path, b'*.seq*'))])  # new genbank extension
+    files = list(set(files))
    for filename in files:
        if read==only:
            return
--- a/python/obitools3/version.py
+++ b/python/obitools3/version.py
@ -1,5 +1,5 @@
 major = 3
 minor = 0
-serial= '0-beta15'
+serial= '0b19'

-version ="%d.%02d.%s" % (major,minor,serial)
+version ="%d.%d.%s" % (major,minor,serial)
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,5 @@
+--extra-index-url https://pypi.python.org/simple/
+Cython>=0.24
+Sphinx>=1.2.0
+ipython>=3.0.0
+breathe>=4.0.0
--- a/setup.py
+++ b/setup.py
@ -5,8 +5,9 @@ import re
 import subprocess

 from distutils import log
-from distutils.core import setup
-    
+#from distutils.core import setup
+from setuptools import setup    # to work with pip
+
 from distutils.core import Extension
 from distutils.sysconfig import get_python_lib

--- a/src/obiavl.c
+++ b/src/obiavl.c
@ -648,7 +648,7 @@ int truncate_avl_data_to_size_used(OBIDMS_avl_data_p avl_data)	// TODO is it nec
 	new_data_size = ((index_t) multiple) * getpagesize();

 	// Check that it is actually greater than the current size of the file, otherwise no need to truncate
-	if ((avl_data->header)->data_size_max == new_data_size)
+	if ((avl_data->header)->data_size_max >= new_data_size)
 		return 0;

 	// Get the file descriptor
@ -667,7 +667,7 @@ int truncate_avl_data_to_size_used(OBIDMS_avl_data_p avl_data)	// TODO is it nec
 	if (ftruncate(file_descriptor, file_size) < 0)
 	{
 		obi_set_errno(OBI_AVL_ERROR);
-		obidebug(1, "\nError truncating an AVL data file");
+		obidebug(1, "\nError truncating an AVL data file, old data size = %lld, new data size = %lld", (avl_data->header)->data_size_max, new_data_size);
 		return -1;
 	}

--- a/src/obiview.c
+++ b/src/obiview.c
@ -1037,8 +1037,9 @@ static int finish_view(Obiview_p view)
 		return -1;
 	}

-	// Add count column if it's a NUC_SEQ_VIEW with no count column // TODO discuss
-	if ((!strcmp((view->infos)->view_type, VIEW_TYPE_NUC_SEQS)) && (!obi_view_column_exists(view, COUNT_COLUMN)))
+	// Add count column if it's a NUC_SEQ_VIEW with no count column (and there's no MERGED_sample column) // TODO discuss
+	if ((!strcmp((view->infos)->view_type, VIEW_TYPE_NUC_SEQS)) && (!obi_view_column_exists(view, COUNT_COLUMN))
+			&& (!obi_view_column_exists(view, "MERGED_sample")))  // TODO should eventually compute from merged samples?
 	{
 		if (obi_create_auto_count_column(view) < 0)
 		{
Author	SHA1	Message	Date
Celine Mercier	b5a29ac413	Switch to version 3.0.0b19	2020-05-20 10:29:36 +02:00
Celine Mercier	efd2b9d338	Cleaner installation	2020-05-20 10:29:12 +02:00
Celine Mercier	ca6e3e7aad	obi import: fixed to work with `seq` genbank extension	2020-05-20 10:28:14 +02:00
Celine Mercier	76ed8e18e5	Switch to version 3.0.0b18 with version formatting that fits setuptools	2020-05-18 17:08:55 +02:00
Celine Mercier	1d17f28aec	setup: now using setuptools instead of distutils to work with pip	2020-05-18 17:08:09 +02:00
Celine Mercier	fa834e4b8b	obi import: small bug fix	2020-05-18 17:06:58 +02:00
Celine Mercier	a72fea3cc9	Python: fasta parser: fixed a bug stopping the program when the last line contained a single nucleotide	2020-05-12 11:24:12 +02:00
Celine Mercier	e9a37d8a6e	Switch to version 3.0.0-beta16	2020-05-07 17:09:26 +02:00
Celine Mercier	ef074f8455	typo	2020-05-07 17:08:59 +02:00
Celine Mercier	aec5e69f2c	C, views: no more automatic COUNT column if MERGED_sample column exists	2020-05-07 17:08:07 +02:00
Celine Mercier	170ef3f1ba	Views: added obi prefix to commands in bash history	2020-05-07 17:07:01 +02:00
Celine Mercier	f999946582	obi uniq: fixed the remerging of already merged informations, and efficiency improvements	2020-05-07 17:05:54 +02:00
Celine Mercier	773b36ec37	obi import: fixed the import of old obitools files with premerged informations, and other minor improvements	2020-05-07 17:03:04 +02:00
Celine Mercier	69cb434a6c	version 3.0.0-beta15c	2020-04-29 14:25:33 +02:00
Celine Mercier	55d4f98d60	obi annotate: fixed annotation at ranks	2020-04-29 14:24:40 +02:00
Celine Mercier	0bec2631e8	ecotag: fixed a bug where all the full DMS path weren't properly sent to the C layer	2020-04-29 10:35:55 +02:00
Celine Mercier	e6b6c6fa84	AVLs: Made an error message more informative	2020-04-29 10:14:04 +02:00