ecotag: added separate threshold for minimum circle identity (and switch

to version 3.0.0b35
obi cat: fixed open file descriptor leak and switch to version 3.0.0b34
2020-09-25 16:22:09 +02:00 · 2020-08-28 10:41:22 +02:00 · 2020-08-27 18:38:16 +02:00 · 2020-08-13 18:17:09 +02:00 · 2020-08-12 17:55:08 +02:00 · 2020-07-31 16:43:35 +02:00
21 changed files with 257 additions and 83 deletions
--- a/python/obitools3/commands/cat.pyx
+++ b/python/obitools3/commands/cat.pyx
@ -4,7 +4,7 @@ from obitools3.apps.progress cimport ProgressBar  # @UnresolvedImport
 from obitools3.dms import DMS
 from obitools3.dms.view.view cimport View
 from obitools3.uri.decode import open_uri
-from obitools3.apps.optiongroups import addMinimalOutputOption
+from obitools3.apps.optiongroups import addMinimalOutputOption, addNoProgressBarOption
 from obitools3.dms.view import RollbackException
 from obitools3.apps.config import logger
 from obitools3.utils cimport str2bytes
@ -28,6 +28,7 @@ __title__="Concatenate views."
 def addOptions(parser):
    
    addMinimalOutputOption(parser)
+    addNoProgressBarOption(parser)

    group=parser.add_argument_group('obi cat specific options')

@ -47,9 +48,9 @@ def run(config):
    
    logger("info", "obi cat")

-    # Open the views to concatenate
-    iview_list = []
+    # Check the views to concatenate
    idms_list = []
+    iview_list = []
    total_len = 0
    remove_qual = False
    remove_rev_qual = False
@ -67,8 +68,9 @@ def run(config):
        if REVERSE_QUALITY_COLUMN not in i_view: # same as above for reverse quality
            remove_rev_qual = True
        total_len += len(i_view)
-        iview_list.append(i_view)
        idms_list.append(i_dms)
+        iview_list.append(i_view.name)
+        i_view.close()

    # Open the output: only the DMS
    output = open_uri(config['obi']['outputURI'],
@ -97,8 +99,10 @@ def run(config):
    # Initialize multiple elements columns
    if type(output_0)==BufferedWriter: 
        dict_cols = {}
-        for v in iview_list:
+        for v_uri in config["cat"]["views_to_cat"]:
+            v = open_uri(v_uri)[1]
            for coln in v.keys():
+                col = v[coln]
                if v[coln].nb_elements_per_line > 1:
                    if coln not in dict_cols:
                        dict_cols[coln] = {}
@ -108,6 +112,7 @@ def run(config):
                    else:
                        dict_cols[coln]['eltnames'] = set(v[coln].elements_names + list(dict_cols[coln]['eltnames']))
                        dict_cols[coln]['nbelts'] = len(dict_cols[coln]['eltnames'])
+            v.close()            
        for coln in dict_cols:
            Column.new_column(o_view, coln, dict_cols[coln]['obitype'], 
                              nb_elements_per_line=dict_cols[coln]['nbelts'], elements_names=list(dict_cols[coln]['eltnames']))
@ -119,7 +124,8 @@ def run(config):
        pb = None
    
    i = 0
-    for v in iview_list:
+    for v_uri in config["cat"]["views_to_cat"]:
+        v = open_uri(v_uri)[1]
        for entry in v:
            PyErr_CheckSignals()
            if pb is not None:
@ -130,6 +136,7 @@ def run(config):
            else:
                o_view[i] = entry
            i+=1
+        v.close()

    # Deletes quality columns if needed
    if type(output_0)!=BufferedWriter: 
@ -144,7 +151,7 @@ def run(config):
    
    # Save command config in DMS comments
    command_line = " ".join(sys.argv[1:])
-    o_view.write_config(config, "cat", command_line, input_dms_name=[d.name for d in idms_list], input_view_name=[v.name for v in iview_list])
+    o_view.write_config(config, "cat", command_line, input_dms_name=[d.name for d in idms_list], input_view_name=[vname for vname in iview_list])
    o_dms.record_command_line(command_line)

    #print("\n\nOutput view:\n````````````", file=sys.stderr)
--- a/python/obitools3/commands/ecotag.pyx
+++ b/python/obitools3/commands/ecotag.pyx
@ -41,6 +41,17 @@ def addOptions(parser):
                      help="Minimum identity to consider for assignment, as a normalized identity, e.g. 0.95 for an identity of 95%%. "
                           "Default: 0.00 (no threshold).")

+    group.add_argument('--minimum-circle','-c',
+                      action="store", dest="ecotag:bubble_threshold",
+                      metavar='<CIRCLE_THRESHOLD>',
+                      default=0.99,
+                      type=float,
+                      help="Minimum identity considered for the assignment circle "
+                           "(sequence is assigned to the LCA of all sequences within a similarity circle of the best matches; "
+                           "the threshold for this circle is the highest value between <CIRCLE_THRESHOLD> and the best assignment score found). "
+                           "Give value as a normalized identity, e.g. 0.95 for an identity of 95%%. "
+                           "Default: 0.99.")
+
 def run(config):
        
    DMS.obi_atexit()
@ -66,9 +77,8 @@ def run(config):
    ref_view_name = ref[1]

    # Check that the threshold demanded is greater than or equal to the threshold used to build the reference database
-    if config['ecotag']['threshold'] < eval(ref_dms[ref_view_name].comments["ref_db_threshold"]) :
-        print("Error: The threshold demanded (%f) is lower than the threshold used to build the reference database (%f).", 
-              config['ecotag']['threshold'], ref_dms[ref_view_name].comments["ref_db_threshold"])
+    if config['ecotag']['bubble_threshold'] < eval(ref_dms[ref_view_name].comments["ref_db_threshold"]) :
+        raise Exception(f"Error: The threshold demanded ({config['ecotag']['bubble_threshold']}) is lower than the threshold used to build the reference database ({float(ref_dms[ref_view_name].comments['ref_db_threshold'])}).")
    
    # Open the output: only the DMS
    output = open_uri(config['obi']['outputURI'],
@ -113,8 +123,9 @@ def run(config):
    if obi_ecotag(i_dms.name_with_full_path, tobytes(i_view_name), \
                  ref_dms.name_with_full_path, tobytes(ref_view_name), \
                  taxo_dms.name_with_full_path, tobytes(taxonomy_name), \
-                  tobytes(o_view_name), comments, 
-                  config['ecotag']['threshold']) < 0:
+                  tobytes(o_view_name), comments, \
+                  config['ecotag']['threshold'], \
+                  config['ecotag']['bubble_threshold']) < 0:
        raise Exception("Error running ecotag")
    
    # If the input and output DMS are not the same, export result view to output DMS
--- a/python/obitools3/commands/export.pyx
+++ b/python/obitools3/commands/export.pyx
@ -89,7 +89,7 @@ def run(config):

    if pb is not None:
        pb(i, force=True)
-    print("", file=sys.stderr)
+        print("", file=sys.stderr)

    # TODO save command in input dms?
    
--- a/python/obitools3/commands/import.pyx
+++ b/python/obitools3/commands/import.pyx
@ -77,6 +77,11 @@ def addOptions(parser):
                     help="Do a first readthrough of the dataset if it contains huge dictionaries (more than 100 keys) for "
                          "a much faster import. This option is not recommended and will slow down the import in any other case.")

+    group.add_argument('--space-priority',
+                     action="store_true", dest="import:space_priority",
+                     default=False,
+                     help="If importing a view into another DMS, do it by importing each line, saving disk space if the original view "
+                          "has a line selection associated.")

 def run(config):
    
@ -142,7 +147,7 @@ def run(config):
    else:
        v = None
    
-    if config['obi']['taxdump'] or isinstance(input[1], View): 
+    if config['obi']['taxdump'] or (isinstance(input[1], View) and not config['import']['space_priority']): 
        dms_only=True
    else:
        dms_only=False
@ -170,12 +175,15 @@ def run(config):
        logger("info", "Done.")
        return

-    # If importing a view between two DMS, use C API
-    if isinstance(input[1], View):
+    # If importing a view between two DMS and not wanting to save space if line selection in original view, use C API
+    if isinstance(input[1], View) and not config['import']['space_priority']:
        if obi_import_view(input[0].name_with_full_path, o_dms.name_with_full_path, input[1].name, tobytes((config['obi']['outputURI'].split('/'))[-1])) < 0 :
+            input[0].close(force=True)
+            output[0].close(force=True)
            raise Exception("Error importing a view in a DMS")
        o_dms.record_command_line(" ".join(sys.argv[1:]))
-        o_dms.close()
+        input[0].close(force=True)
+        output[0].close(force=True)
        logger("info", "Done.")
        return

--- a/python/obitools3/commands/test.pyx
+++ b/python/obitools3/commands/test.pyx
@ -23,6 +23,7 @@ from obitools3.dms.capi.obiview cimport NUC_SEQUENCE_COLUMN, \
 import shutil
 import string
 import random
+import sys
 from cpython.exc cimport PyErr_CheckSignals


@ -366,7 +367,7 @@ def random_new_view(config, infos, first=False):
            infos['view'] = View_NUC_SEQS.new(infos['dms'], random_unique_name(infos), comments=random_comments(config))   # TODO quality column
        else :
            infos['view'] = View.new(infos['dms'], random_unique_name(infos), comments=random_comments(config))   # TODO quality column
-        
+        infos['view'].write_config(config, "test", infos["command_line"], input_dms_name=[infos['dms'].name], input_view_name=["random"])
    print_test(config, repr(infos['view']))
    if v_to_clone is not None :
        if line_selection is None:
@ -441,7 +442,7 @@ def addOptions(parser):
                       default=20,
                       type=int,
                       help="Maximum length of tuples. "
-                            "Default: 200")
+                            "Default: 50")
  
    group.add_argument('--max_ini_col_count','-o',
                       action="store", dest="test:maxinicolcount",
@ -457,7 +458,7 @@ def addOptions(parser):
                       default=10000,
                       type=int,
                       help="Maximum number of lines in a column. "
-                            "Default: 10000")
+                            "Default: 1000")
 
    group.add_argument('--max_elts_per_line','-e',
                       action="store", dest="test:maxelts",
@ -497,7 +498,8 @@ def run(config):
                                    (b"OBI_SEQ", False): random_seq, (b"OBI_SEQ", True): random_seq_tuples,
                                    (b"OBI_STR", False): random_bytes, (b"OBI_STR", True): random_bytes_tuples
                                  },
-             'tests': [test_set_and_get, test_add_col, test_delete_col, test_col_alias, test_new_view]
+             'tests': [test_set_and_get, test_add_col, test_delete_col, test_col_alias, test_new_view],
+             'command_line': " ".join(sys.argv[1:])
            }
 
    # TODO ???
--- a/python/obitools3/commands/uniq.pyx
+++ b/python/obitools3/commands/uniq.pyx
@ -354,6 +354,9 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, di
        key = mergedKeys[k]
        merged_col_name = mergedKeys_m[k]
        
+        if merged_infos[merged_col_name]['nb_elts'] == 1:
+            raise Exception("Can't merge information from a tag with only one element (e.g. one sample ; don't use -m option)")
+        
        if merged_col_name in view:
            i_col = view[merged_col_name]
        else:
--- a/python/obitools3/dms/capi/obiecotag.pxd
+++ b/python/obitools3/dms/capi/obiecotag.pxd
@ -11,4 +11,5 @@ cdef extern from "obi_ecotag.h" nogil:
                   const char* taxonomy_name,
                   const char* output_view_name,
                   const char* output_view_comments,
-                   double ecotag_threshold)
+                   double ecotag_threshold,
+                   double bubble_threshold)
--- a/python/obitools3/dms/column/column.pyx
+++ b/python/obitools3/dms/column/column.pyx
@ -40,7 +40,8 @@ from obitools3.utils cimport tobytes, \
 from obitools3.dms.column import typed_column
 
 from libc.stdlib cimport free
- 
+from libc.string cimport strcpy
+  
 import importlib
 import inspect
 import pkgutil
@ -97,6 +98,7 @@ cdef class Column(OBIWrapper) :
                   object    alias=b""):
        # TODO indexer_name?
        
+        cdef Column column
        cdef bytes  column_name_b = tobytes(column_name)
        cdef bytes  alias_b = tobytes(alias)
        cdef bytes  comments_b = str2bytes(json.dumps(bytes2str_object(comments)))
@ -132,13 +134,14 @@ cdef class Column(OBIWrapper) :
                         raise RuntimeError("Cannot create column %s in view %s: trying to create quality column but no NUC_SEQ column to associate it with in the view" % (bytes2str(column_name_b),
                                                                               bytes2str(view.name)))
                    associated_column_name_b = NUC_SEQUENCE_COLUMN
-                    associated_column_version = view[NUC_SEQUENCE_COLUMN].version
+                    associated_column_version = view[NUC_SEQUENCE_COLUMN].version                    
                elif column_name == REVERSE_QUALITY_COLUMN:
                    if REVERSE_SEQUENCE_COLUMN not in view:
                         raise RuntimeError("Cannot create column %s in view %s: trying to create reverse quality column but no REVERSE_SEQUENCE column to associate it with in the view" % (bytes2str(column_name_b),
                                                                               bytes2str(view.name)))
                    associated_column_name_b = REVERSE_SEQUENCE_COLUMN
                    associated_column_version = view[REVERSE_SEQUENCE_COLUMN].version
+                    
        
        if (obi_view_add_column(view                      = view.pointer(),
                                column_name               = column_name_b,
@ -158,8 +161,19 @@ cdef class Column(OBIWrapper) :
                                create                    = True)<0):
            raise RuntimeError("Cannot create column %s in view %s" % (bytes2str(column_name_b),
                                                                       bytes2str(view.name)))
-
-        return Column.open(view, alias_b)
+        
+        column = Column.open(view, alias_b)
+        
+        # Automatically associate nuc sequence column to quality column if necessary
+        if data_type == OBI_QUAL:
+            if column_name == QUALITY_COLUMN:
+                view[NUC_SEQUENCE_COLUMN].associated_column_name = column.name
+                view[NUC_SEQUENCE_COLUMN].associated_column_version = column.version
+            elif column_name == REVERSE_QUALITY_COLUMN:
+                view[REVERSE_SEQUENCE_COLUMN].associated_column_name = column.name
+                view[REVERSE_SEQUENCE_COLUMN].associated_column_version = column.version
+        
+        return column
 
 
    @staticmethod
@ -407,6 +421,31 @@ cdef class Column(OBIWrapper) :
            raise OBIDeactivatedInstanceError()
        return obi_format_date(self.pointer().header.creation_date)

+
+    # associated_column name property getter and setter
+    @property
+    def associated_column_name(self):
+        if not self.active() :
+            raise OBIDeactivatedInstanceError()
+        return self.pointer().header.associated_column.column_name
+
+    @associated_column_name.setter
+    def associated_column_name(self, object new_name):
+        strcpy(self.pointer().header.associated_column.column_name, tobytes(new_name))
+
+
+    # associated_column version property getter and setter
+    @property
+    def associated_column_version(self):
+        if not self.active() :
+            raise OBIDeactivatedInstanceError()
+        return self.pointer().header.associated_column.version
+
+    @associated_column_version.setter
+    def associated_column_version(self, int new_version):
+        self.pointer().header.associated_column.version = new_version
+
+
    # comments property getter
    @property
    def comments(self):
--- a/python/obitools3/dms/view/view.pyx
+++ b/python/obitools3/dms/view/view.pyx
@ -7,6 +7,7 @@ cdef dict __VIEW_CLASS__= {}
 from libc.stdlib  cimport malloc

 from obitools3.apps.progress cimport ProgressBar  # @UnresolvedImport
+from obitools3.version import version

 from ..capi.obiview cimport Alias_column_pair_p, \
                            obi_new_view, \
@ -183,9 +184,15 @@ cdef class View(OBIWrapper) :
    
    
    @OBIWrapper.checkIsActive
-    def __repr__(self) :         
-        cdef str s = "#View name:\n{name:s}\n#Line count:\n{line_count:d}\n#Columns:\n".format(name       = bytes2str(self.name),
-                                                                                                                         line_count = self.line_count)
+    def __repr__(self) :
+        cdef str s
+        if self.read_only:   # can read date   
+            s = "#View name:\n{name:s}\n#Date created:\n{date:s}\n#Line count:\n{line_count:d}\n#Columns:\n".format(name       = bytes2str(self.name),
+                                                                                                                    line_count = self.line_count,
+                                                                                                                    date       = str(bytes2str_object(self.comments["Date created"])))        
+        else:
+            s = "#View name:\n{name:s}\n#Line count:\n{line_count:d}\n#Columns:\n".format(name       = bytes2str(self.name),
+                                                                                          line_count = self.line_count)
        for column_name in self.keys() :
            s = s + repr(self[column_name]) + '\n'
        return s
@ -434,6 +441,7 @@ cdef class View(OBIWrapper) :
            for i in range(len(input_view_name)):
                input_str.append(tostr(input_dms_name[i])+"/"+tostr(input_view_name[i]))
        comments["input_str"] = input_str
+        comments["version"] = version
        return bytes2str_object(comments)

    
--- a/python/obitools3/format/tab.pyx
+++ b/python/obitools3/format/tab.pyx
@ -5,6 +5,7 @@ from obitools3.dms.view.view cimport Line
 from obitools3.utils cimport bytes2str_object, str2bytes, tobytes
 from obitools3.dms.column.column cimport Column_line, Column_multi_elts

+import sys

 cdef class TabFormat:
    
@ -26,18 +27,22 @@ cdef class TabFormat:
            
            if self.header and self.first_line:
                if isinstance(data.view[k], Column_multi_elts):
-                    for k2 in data.view[k].keys():
+                    keys = data.view[k].keys()
+                    keys.sort()
+                    for k2 in keys:
                        line.append(tobytes(k)+b':'+tobytes(k2))
                else:
                    line.append(tobytes(k))
            else:
                value = data[k]
                if isinstance(data.view[k], Column_multi_elts):
+                    keys = data.view[k].keys()
+                    keys.sort()
                    if value is None:  # all keys at None
-                        for k2 in data.view[k].keys(): # TODO could be much more efficient
+                        for k2 in keys: # TODO could be much more efficient
                            line.append(self.NAString)
                    else:
-                        for k2 in data.view[k].keys(): # TODO could be much more efficient
+                        for k2 in keys: # TODO could be much more efficient
                            if value[k2] is not None:
                                line.append(str2bytes(str(bytes2str_object(value[k2]))))  # genius programming
                            else:
--- a/python/obitools3/libalign/_solexapairend.pyx
+++ b/python/obitools3/libalign/_solexapairend.pyx
@ -259,7 +259,7 @@ def buildJoinedSequence(ali, reverse, seq, forward=None):
    seq[b"pairedend_limit"]=len(forward)
    seq[b"seq_length"] = ali.consensus_len
    seq[b"overlap_length"] = ali.overlap_len
-    if ali.consensus_len > 0:
+    if ali.overlap_len > 0:
        seq[b'score_norm']=round(float(ali.score)/ali.overlap_len, 3)
    else:
        seq[b"score_norm"]=0.0
--- a/python/obitools3/uri/decode.pyx
+++ b/python/obitools3/uri/decode.pyx
@ -276,11 +276,11 @@ def open_uri(uri,
            iseq = urib
            objclass = bytes
    else:  # TODO update uopen to be able to write? 
-        if urip.path == b'-':
+        if not urip.path or urip.path == b'-':
            file = sys.stdout.buffer
-        elif urip.path :
+        else:
            file = open(urip.path, 'wb')
-        
+
    if file is not None:
        qualifiers=parse_qs(urip.query)
        
--- a/python/obitools3/utils.pxd
+++ b/python/obitools3/utils.pxd
@ -2,7 +2,7 @@

 from obitools3.dms.capi.obitypes cimport obitype_t, index_t

-cpdef bytes format_separator(bytes format)
+cpdef bytes format_uniq_pattern(bytes format)
 cpdef int count_entries(file, bytes format)

 cdef obi_errno_to_exception(index_t line_nb=*, object elt_id=*, str error_message=*)
--- a/python/obitools3/utils.pyx
+++ b/python/obitools3/utils.pyx
@ -24,11 +24,11 @@ import glob
 import gzip


-cpdef bytes format_separator(bytes format):
+cpdef bytes format_uniq_pattern(bytes format):
    if format == b"fasta":
        return b"\n>"
    elif format == b"fastq":
-        return b"\n@"
+        return b"\n\+\n"
    elif format == b"ngsfilter" or format == b"tabular":
        return b"\n"
    elif format == b"genbank" or format == b"embl":
@ -42,7 +42,7 @@ cpdef bytes format_separator(bytes format):
 cpdef int count_entries(file, bytes format):
    
    try:
-        sep = format_separator(format)
+        sep = format_uniq_pattern(format)
        if sep is None:
            return -1
        sep = re.compile(sep)
@ -72,7 +72,7 @@ cpdef int count_entries(file, bytes format):
                return -1
            mmapped_file = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
            total_count += len(re.findall(sep, mmapped_file))
-            if format != b"ngsfilter" and format != b"tabular" and format != b"embl" and format != b"genbank":
+            if format != b"ngsfilter" and format != b"tabular" and format != b"embl" and format != b"genbank" and format != b"fastq":
                total_count += 1 # adding +1 for 1st entry because separators include \n (ngsfilter and tabular already count one more because of last \n)
            
    except:
--- a/python/obitools3/version.py
+++ b/python/obitools3/version.py
@ -1,5 +1,5 @@
 major = 3
 minor = 0
-serial= '0b28'
+serial= '0b35'

 version ="%d.%d.%s" % (major,minor,serial)
--- a/src/obi_ecotag.c
+++ b/src/obi_ecotag.c
@ -218,7 +218,8 @@ int obi_ecotag(const char* dms_name,
 			   const char* taxonomy_name,
 			   const char* output_view_name,
 			   const char* output_view_comments,
-			   double ecotag_threshold)    // TODO different threshold for the similarity sphere around ref seqs
+			   double ecotag_threshold,
+			   double bubble_threshold)
 {

 	// For each sequence
@ -239,6 +240,7 @@ int obi_ecotag(const char* dms_name,
 	index_t         query_seq_idx, ref_seq_idx;
 	double          score, best_score;
 	double			threshold;
+	double			lca_threshold;
 	int             lcs_length;
 	int             ali_length;
 	Kmer_table_p    ktable;
@ -389,10 +391,10 @@ int obi_ecotag(const char* dms_name,
 		return -1;
 	}
 	free(db_threshold_str);
-	if (ecotag_threshold < db_threshold)
+	if (bubble_threshold < db_threshold)
 	{
 		fprintf(stderr, "\nError: The threshold demanded (%f) is lower than the threshold used to build the reference database (%f).\n\n",
-				ecotag_threshold, db_threshold);
+				bubble_threshold, db_threshold);
 		return -1;
 	}

@ -597,11 +599,16 @@ int obi_ecotag(const char* dms_name,
 		{
 			best_match_idx = best_match_array[j];

-			// Find the LCA for the chosen threshold
+			// Find the LCA for the highest threshold between best_score and the chosen bubble threshold
 			score_array = obi_get_array_with_col_p_in_view(ref_view, score_a_column, best_match_idx, &lca_array_length);

+			if (bubble_threshold < best_score)
+				lca_threshold = best_score;
+			else
+				lca_threshold = bubble_threshold;
+
 			k = 0;
-			while ((k < lca_array_length) && (score_array[k] >= best_score))
+			while ((k < lca_array_length) && (score_array[k] >= lca_threshold))
 				k++;

 			if (k>0)
--- a/src/obi_ecotag.h
+++ b/src/obi_ecotag.h
@ -42,12 +42,14 @@
 * @param output_view_name The name to give to the output view.
 * @param output_view_comments The comments to associate to the output view.
 * @param ecotag_threshold The threshold at which to assign.
+ * @param bubble_threshold The threshold at which to look for an LCA (i.e. minimum identity considered for the assignment circle);
+ *                         the threshold actually used will be the highest between this value and the best assignment score found.
 *
 * 	The algorithm works like this:
 * 		For each query sequence:
 *			Align with reference database
 *			Keep the indices of all the best matches
- *			For each kept index, get the LCA at that threshold as stored in the reference database, then the LCA of those LCAs
+ *			For each kept index, get the LCA at the highest threshold between bubble_threshold and the best assignment score found (as stored in the reference database), then the LCA of those LCAs
 *			Write result (max score, threshold, taxid and scientific name of the LCA assigned, list of the ids of the best matches)
 *
 * @returns A value indicating the success of the operation.
@ -65,7 +67,8 @@ int obi_ecotag(const char* dms_name,
 			   const char* taxonomy_name,
 			   const char* output_view_name,
 			   const char* output_view_comments,
-			   double ecotag_threshold);
+			   double ecotag_threshold,
+			   double bubble_threshold);


 #endif /* OBI_ECOTAG_H_ */
--- a/src/obidms.c
+++ b/src/obidms.c
@ -1659,6 +1659,12 @@ int obi_import_view(const char* dms_path_1, const char* dms_path_2, const char*
 	else	// Non-typed view
 		view_2 = obi_new_view(dms_2, view_name_2, NULL, NULL, (view_1->infos)->comments);

+	if (view_2 == NULL)
+	{
+		obidebug(1, "\nError creating the new view to import a view in a DMS");
+		return -1;
+	}
+
 	// Import line count
 	view_2->infos->line_count = view_1->infos->line_count;

--- a/src/obidmscolumn.c
+++ b/src/obidmscolumn.c
@ -1312,19 +1312,10 @@ OBIDMS_column_p obi_create_column(OBIDMS_p     dms,
 		return NULL;
 	}

-	// Store the associated column reference if needed // TODO discuss cases
-	if (data_type == OBI_QUAL)
+	// Store the associated column reference if needed
+	if ((associated_column_name != NULL) && (*associated_column_name != '\0'))
 	{
-		if ((associated_column_name == NULL) || (*associated_column_name == '\0'))
-		{
-			obidebug(1, "\nError: The name of the associated column when creating a new column is NULL");
-			munmap(new_column->header, header_size);
-			close(column_file_descriptor);
-			free(new_column);
-			return NULL;
-		}
 		strcpy((header->associated_column).column_name, associated_column_name);
-
 		if (associated_column_version == -1)
 		{
 			obidebug(1, "\nError: The version of the associated column when creating a new column is not defined");
@ -1336,6 +1327,7 @@ OBIDMS_column_p obi_create_column(OBIDMS_p     dms,
 		(header->associated_column).version = associated_column_version;
 	}

+
 	// If the data type is OBI_STR, OBI_SEQ or OBI_QUAL, the associated obi_indexer is opened or created
 	if ((returned_data_type == OBI_STR) || (returned_data_type == OBI_SEQ) || (returned_data_type == OBI_QUAL) || tuples)
 	{
@ -1733,16 +1725,32 @@ int obi_close_column(OBIDMS_column_p column)
 int obi_clone_column_indexer(OBIDMS_column_p column)
 {
 	char* new_indexer_name;
+	int i;

-	new_indexer_name = obi_build_indexer_name((column->header)->name, (column->header)->version);
-	if (new_indexer_name == NULL)
-		return -1;
-
-	column->indexer = obi_clone_indexer(column->indexer, new_indexer_name);	// TODO Need to lock this somehow?
-	if (column->indexer == NULL)
+	i=0;
+	while (true) // find avl name not already used
 	{
-		obidebug(1, "\nError cloning a column's indexer to make it writable");
-		return -1;
+		new_indexer_name = obi_build_indexer_name((column->header)->name, ((column->header)->version)+i);
+		if (new_indexer_name == NULL)
+			return -1;
+
+		column->indexer = obi_clone_indexer(column->indexer, new_indexer_name);	// TODO Need to lock this somehow?
+		if (column->indexer == NULL)
+		{
+			if (errno == EEXIST)
+			{
+				free(new_indexer_name);
+				i++;
+			}
+			else
+			{
+				free(new_indexer_name);
+				obidebug(1, "\nError cloning a column's indexer to make it writable");
+				return -1;
+			}
+		}
+		else
+			break;
 	}

 	strcpy((column->header)->indexer_name, new_indexer_name);
@ -2423,16 +2431,20 @@ char* obi_get_formatted_elements_names(OBIDMS_column_p column)
 }


-char* obi_column_formatted_infos(OBIDMS_column_p column)
+char* obi_column_formatted_infos(OBIDMS_column_p column, bool detailed)
 {
-	char* column_infos;
-	char* elt_names;
-
-	column_infos = malloc(1024 * sizeof(char));
+	char* column_infos = NULL;
+	char* elt_names = NULL;
+	char* column_name = NULL;
+	 // should be in view.c because alias exists in the context of view
+	column_infos = malloc(2048 * sizeof(char)); // TODO

 	elt_names = obi_get_formatted_elements_names(column);


+//  "column_name, data type: OBI_TYPE, element names: [formatted element names](, all comments)"
+
+
 	free(elt_names);
 	return column_infos;
 }
--- a/src/obiview.c
+++ b/src/obiview.c
@ -254,11 +254,15 @@ static int update_lines(Obiview_p view, index_t line_count);
 /**
 * @brief Internal function to clone a column in the context of a view.
 *
+ * Used to edit a closed column.
+ *
 * Clones with the right line selection and replaces the cloned columns with the new ones in the view.
 * If there is a line selection, all columns have to be cloned, otherwise only the column of interest is cloned.
 *
 * @param view A pointer on the view.
 * @param column_name The name of the column in the view that should be cloned.
+ * @param clone_associated Whether to clone the associated column
+ *  	  (should always be true except when calling from the function itself to avoid infinite recursion).
 *
 * @returns A pointer on the new column.
 * @retval NULL if an error occurred.
@ -266,7 +270,7 @@ static int update_lines(Obiview_p view, index_t line_count);
 * @since February 2016
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
-static OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name);
+static OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name, bool clone_associated);


 /**
@ -845,7 +849,7 @@ static int update_lines(Obiview_p view, index_t line_count)
 		// Clone the column first if needed
 		if (!(column->writable))
 		{
-			column = clone_column_in_view(view, (((view->infos)->column_references)[i]).alias);
+			column = clone_column_in_view(view, (((view->infos)->column_references)[i]).alias, true);
 			if (column == NULL)
 			{
 				obidebug(1, "\nError cloning a column in a view when updating its line count");
@ -870,12 +874,14 @@ static int update_lines(Obiview_p view, index_t line_count)
 }


-static OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name)
+static OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_name, bool clone_associated)
 {
-	int i;
+	int i, j;
 	OBIDMS_column_p column = NULL;
 	OBIDMS_column_p new_column = NULL;
 	OBIDMS_column_p column_buffer;
+	OBIDMS_column_p associated_cloned_column = NULL;
+	char* associated_column_alias = NULL;

 	// Check that the view is not read-only
 	if (view->read_only)
@ -916,11 +922,62 @@ static OBIDMS_column_p clone_column_in_view(Obiview_p view, const char* column_n
 				return NULL;
 			}

+			// Look for associated column to clone and reassociate
+			if ((column_buffer->header->associated_column).column_name[0] != '\0')
+			{
+				// Get the associated column alias
+				j=0;
+				while (((strcmp((((view->infos)->column_references)[j]).column_refs.column_name, (column_buffer->header->associated_column).column_name)) ||
+						((((view->infos)->column_references)[j]).column_refs.version != (column_buffer->header->associated_column).version)) &&
+						j<(view->infos)->column_count)  // TODO function for that
+					j++;
+
+				if (j == (view->infos)->column_count) // not found
+				{
+					obi_set_errno(OBIVIEW_ERROR);
+					obidebug(1, "\nCould not find associated column when cloning a column for editing");
+					return NULL;
+				}
+
+				// No line selection: only this column is cloned, clone and reassociate the associated column
+				if ((view->line_selection == NULL) && clone_associated)
+				{
+					associated_column_alias = (((view->infos)->column_references)[j]).alias;
+					// Clone the associated column
+					associated_cloned_column = clone_column_in_view(view, associated_column_alias, false);
+					// Reassociate both ways
+					strcpy((associated_cloned_column->header->associated_column).column_name, column->header->name);
+					(associated_cloned_column->header->associated_column).version = column->header->version;
+					strcpy((column->header->associated_column).column_name, associated_cloned_column->header->name);
+					(column->header->associated_column).version = associated_cloned_column->header->version;
+				}
+				else
+				{
+					// Line selection: all columns are cloned, check if associated column has been cloned previously (it precedes this one in the list) to reassociate
+					if (j < i)
+					{
+						// Get pointer to associated column
+						associated_cloned_column = *((OBIDMS_column_p*)ll_get(view->columns, j));
+						if (associated_cloned_column == NULL)
+						{
+							obi_set_errno(OBIVIEW_ERROR);
+							obidebug(1, "\nError getting a column to clone from the linked list of column pointers of a view");
+							return NULL;
+						}
+						// Reassociate both ways
+						strcpy((associated_cloned_column->header->associated_column).column_name, column->header->name);
+						(associated_cloned_column->header->associated_column).version = column->header->version;
+						strcpy((column->header->associated_column).column_name, associated_cloned_column->header->name);
+						(column->header->associated_column).version = associated_cloned_column->header->version;
+					}
+				}
+			}
+
 			// Close old cloned column
 			obi_close_column(column_buffer);

 			if (!strcmp((((view->infos)->column_references)[i]).alias, column_name))
-				// Found the column to return
+				// Get the column to return
 				new_column = column;
 		}
 	}
@ -1193,7 +1250,7 @@ static int prepare_to_set_value_in_column(Obiview_p view, OBIDMS_column_p* colum
 			return -1;
 		}

-		(*column_pp) = clone_column_in_view(view, column_name);
+		(*column_pp) = clone_column_in_view(view, column_name, true);
 		if ((*column_pp) == NULL)
 		{
 			obidebug(1, "\nError trying to clone a column to modify it");
@ -1844,6 +1901,7 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v
 {
 	Obiview_p 		view;
 	OBIDMS_column_p associated_nuc_column;
+	OBIDMS_column_p associated_qual_column;
 	int				nb_predicates;

 	if (view_to_clone != NULL)
@ -1896,6 +1954,10 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v
 				obidebug(1, "Error adding an obligatory column in a nucleotide sequences view");
 				return NULL;
 			}
+			// Associating both ways: associating nuc sequences column to quality column
+			associated_qual_column = obi_view_get_column(view, QUALITY_COLUMN);
+			strcpy((associated_nuc_column->header->associated_column).column_name, associated_qual_column->header->name);
+			(associated_nuc_column->header->associated_column).version = associated_qual_column->header->version;
 		}
 	}

@ -1922,7 +1984,7 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v
 	(view->predicate_functions)[(view->nb_predicates)] = view_has_nuc_sequence_column;
 	(view->predicate_functions)[(view->nb_predicates) + 1] = view_has_id_column;
 	(view->predicate_functions)[(view->nb_predicates) + 2] = view_has_definition_column;
-//	if (quality_column)   # TODO discuss. Commented bc for example with obi annotate, clone view so clone predicate, then modify seq, so quality is deleted, and predicate boom
+//	if (quality_column)   # TODO fix by triggering predicate deleting if quality deleting. Commented bc for example with obi annotate, clone view so clone predicate, then modify seq, so quality is deleted, and predicate boom
 //		(view->predicate_functions)[(view->nb_predicates) + 3] = view_has_quality_column;

 	view->nb_predicates = nb_predicates;
@ -2212,7 +2274,7 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name)

 // TODO return a pointer on the column?
 int obi_view_add_column(Obiview_p    view,
-						char*  column_name,
+						char*        column_name,
 						obiversion_t version_number,
 						const char*  alias,
 						OBIType_t    data_type,
--- a/src/obiview.h
+++ b/src/obiview.h
@ -406,7 +406,7 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name);
 * @param associated_column_name The name of the associated column if there is one (otherwise NULL or ""), if the column is created.
 * @param associated_column_version The version of the associated column if there is one (otherwise -1), if the column is created.
 * @param comments Optional comments associated with the column if it is created (NULL or "" if no comments associated).
- * @param create Whether the column should be created (create == true) or opened (create == false).
+ * @param create Whether the column should be created (create == true) or already exists (create == false).
 *
 * @returns A value indicating the success of the operation.
 * @retval 0 if the operation was successfully completed.
@ -416,7 +416,7 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name);
 * @author Celine Mercier (celine.mercier@metabarcoding.org)
 */
 int obi_view_add_column(Obiview_p    view,
-						char*  column_name,
+						char*        column_name,
 						obiversion_t version_number,
 						const char*  alias,
 						OBIType_t    data_type,
Author	SHA1	Message	Date
Celine Mercier	c4696ac865	ecotag: added separate threshold for minimum circle identity (and switch to version 3.0.0b35	2020-09-25 16:22:09 +02:00
Celine Mercier	11a0945a9b	obi cat: fixed open file descriptor leak and switch to version 3.0.0b34	2020-08-28 10:41:22 +02:00
Celine Mercier	f23c40c905	obi cat: fixed a bug introduced in 3.0.0b28 and switch to version 3.0.0b33	2020-08-27 18:38:16 +02:00
Celine Mercier	f99fc13b75	switch to version 3.0.0b32	2020-08-13 18:17:09 +02:00
Celine Mercier	1da6aac1b8	C: patch for failed creation of AVL with errno EEXIST	2020-08-12 17:55:08 +02:00
Celine Mercier	159803b40a	export: now automatically sorts dictionary keys alphabetically for tab/csv output	2020-07-31 16:43:35 +02:00
Celine Mercier	7dcbc34017	import: fixed entry count estimation when importing fastq files	2020-07-30 16:56:36 +02:00
Celine Mercier	db2202c8b4	uniq: added a check to make sure that there is more than one element for one tag when merging its information	2020-07-30 16:14:37 +02:00
Celine Mercier	d33ff97846	switch to version 3.0.0b31	2020-07-28 09:31:19 +02:00
Celine Mercier	1dcdf69f1f	export: fixed a bug introduced in version 3.0.0b28	2020-07-28 09:31:05 +02:00
Celine Mercier	dec114eed6	Python: added "date created" information in view representation	2020-07-27 17:38:45 +02:00
Celine Mercier	f36691053b	Python: added the OBITools3 version that generated the view in view comments	2020-07-27 16:50:00 +02:00
Celine Mercier	f2aa5fcf8b	alignpairedend: fixed division by 0 bug and switch to version 3.0.0b30	2020-07-27 10:15:59 +02:00
Celine Mercier	bccb3e6874	switch to version 3.0.0b29	2020-07-26 17:40:26 +02:00
Celine Mercier	f5a17bea68	C: added a missing error check	2020-07-26 17:39:55 +02:00
Celine Mercier	e28507639a	C and Cython: fixed and improved the associated columns system	2020-07-26 17:39:29 +02:00
Celine Mercier	e6feac93fe	obi test: made less heavy to be faster	2020-07-26 17:37:21 +02:00
Celine Mercier	50b292b489	obi import: added --space-priority option to import a view line by line	2020-07-26 17:36:52 +02:00