switch to version 3.0.0b32

C: patch for failed creation of AVL with errno EEXIST
export: now automatically sorts dictionary keys alphabetically for
2020-08-13 18:17:09 +02:00 · 2020-08-12 17:55:08 +02:00 · 2020-07-31 16:43:35 +02:00 · 2020-07-30 16:56:36 +02:00 · 2020-07-30 16:14:37 +02:00
7 changed files with 51 additions and 23 deletions
--- a/python/obitools3/commands/uniq.pyx
+++ b/python/obitools3/commands/uniq.pyx
@ -354,6 +354,9 @@ cdef uniq_sequences(View_NUC_SEQS view, View_NUC_SEQS o_view, ProgressBar pb, di
        key = mergedKeys[k]
        merged_col_name = mergedKeys_m[k]
        
+        if merged_infos[merged_col_name]['nb_elts'] == 1:
+            raise Exception("Can't merge information from a tag with only one element (e.g. one sample ; don't use -m option)")
+        
        if merged_col_name in view:
            i_col = view[merged_col_name]
        else:
--- a/python/obitools3/format/tab.pyx
+++ b/python/obitools3/format/tab.pyx
@ -5,6 +5,7 @@ from obitools3.dms.view.view cimport Line
 from obitools3.utils cimport bytes2str_object, str2bytes, tobytes
 from obitools3.dms.column.column cimport Column_line, Column_multi_elts

+import sys

 cdef class TabFormat:
    
@ -26,18 +27,22 @@ cdef class TabFormat:
            
            if self.header and self.first_line:
                if isinstance(data.view[k], Column_multi_elts):
-                    for k2 in data.view[k].keys():
+                    keys = data.view[k].keys()
+                    keys.sort()
+                    for k2 in keys:
                        line.append(tobytes(k)+b':'+tobytes(k2))
                else:
                    line.append(tobytes(k))
            else:
                value = data[k]
                if isinstance(data.view[k], Column_multi_elts):
+                    keys = data.view[k].keys()
+                    keys.sort()
                    if value is None:  # all keys at None
-                        for k2 in data.view[k].keys(): # TODO could be much more efficient
+                        for k2 in keys: # TODO could be much more efficient
                            line.append(self.NAString)
                    else:
-                        for k2 in data.view[k].keys(): # TODO could be much more efficient
+                        for k2 in keys: # TODO could be much more efficient
                            if value[k2] is not None:
                                line.append(str2bytes(str(bytes2str_object(value[k2]))))  # genius programming
                            else:
--- a/python/obitools3/utils.pxd
+++ b/python/obitools3/utils.pxd
@ -2,7 +2,7 @@

 from obitools3.dms.capi.obitypes cimport obitype_t, index_t

-cpdef bytes format_separator(bytes format)
+cpdef bytes format_uniq_pattern(bytes format)
 cpdef int count_entries(file, bytes format)

 cdef obi_errno_to_exception(index_t line_nb=*, object elt_id=*, str error_message=*)
--- a/python/obitools3/utils.pyx
+++ b/python/obitools3/utils.pyx
@ -24,11 +24,11 @@ import glob
 import gzip


-cpdef bytes format_separator(bytes format):
+cpdef bytes format_uniq_pattern(bytes format):
    if format == b"fasta":
        return b"\n>"
    elif format == b"fastq":
-        return b"\n@"
+        return b"\n\+\n"
    elif format == b"ngsfilter" or format == b"tabular":
        return b"\n"
    elif format == b"genbank" or format == b"embl":
@ -42,7 +42,7 @@ cpdef bytes format_separator(bytes format):
 cpdef int count_entries(file, bytes format):
    
    try:
-        sep = format_separator(format)
+        sep = format_uniq_pattern(format)
        if sep is None:
            return -1
        sep = re.compile(sep)
@ -72,7 +72,7 @@ cpdef int count_entries(file, bytes format):
                return -1
            mmapped_file = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
            total_count += len(re.findall(sep, mmapped_file))
-            if format != b"ngsfilter" and format != b"tabular" and format != b"embl" and format != b"genbank":
+            if format != b"ngsfilter" and format != b"tabular" and format != b"embl" and format != b"genbank" and format != b"fastq":
                total_count += 1 # adding +1 for 1st entry because separators include \n (ngsfilter and tabular already count one more because of last \n)
            
    except:
--- a/python/obitools3/version.py
+++ b/python/obitools3/version.py
@ -1,5 +1,5 @@
 major = 3
 minor = 0
-serial= '0b31'
+serial= '0b32'

 version ="%d.%d.%s" % (major,minor,serial)
--- a/src/obidmscolumn.c
+++ b/src/obidmscolumn.c
@ -1725,17 +1725,33 @@ int obi_close_column(OBIDMS_column_p column)
 int obi_clone_column_indexer(OBIDMS_column_p column)
 {
 	char* new_indexer_name;
+	int i;

-	new_indexer_name = obi_build_indexer_name((column->header)->name, (column->header)->version);
+	i=0;
+	while (true) // find avl name not already used
+	{
+		new_indexer_name = obi_build_indexer_name((column->header)->name, ((column->header)->version)+i);
 		if (new_indexer_name == NULL)
 			return -1;

 		column->indexer = obi_clone_indexer(column->indexer, new_indexer_name);	// TODO Need to lock this somehow?
 		if (column->indexer == NULL)
 		{
+			if (errno == EEXIST)
+			{
+				free(new_indexer_name);
+				i++;
+			}
+			else
+			{
+				free(new_indexer_name);
 				obidebug(1, "\nError cloning a column's indexer to make it writable");
 				return -1;
 			}
+		}
+		else
+			break;
+	}

 	strcpy((column->header)->indexer_name, new_indexer_name);

@ -2415,16 +2431,20 @@ char* obi_get_formatted_elements_names(OBIDMS_column_p column)
 }


-char* obi_column_formatted_infos(OBIDMS_column_p column)
+char* obi_column_formatted_infos(OBIDMS_column_p column, bool detailed)
 {
-	char* column_infos;
-	char* elt_names;
-
-	column_infos = malloc(1024 * sizeof(char));
+	char* column_infos = NULL;
+	char* elt_names = NULL;
+	char* column_name = NULL;
+	 // should be in view.c because alias exists in the context of view
+	column_infos = malloc(2048 * sizeof(char)); // TODO

 	elt_names = obi_get_formatted_elements_names(column);


+//  "column_name, data type: OBI_TYPE, element names: [formatted element names](, all comments)"
+
+
 	free(elt_names);
 	return column_infos;
 }
Author	SHA1	Message	Date
Celine Mercier	f99fc13b75	switch to version 3.0.0b32	2020-08-13 18:17:09 +02:00
Celine Mercier	1da6aac1b8	C: patch for failed creation of AVL with errno EEXIST	2020-08-12 17:55:08 +02:00
Celine Mercier	159803b40a	export: now automatically sorts dictionary keys alphabetically for tab/csv output	2020-07-31 16:43:35 +02:00
Celine Mercier	7dcbc34017	import: fixed entry count estimation when importing fastq files	2020-07-30 16:56:36 +02:00
Celine Mercier	db2202c8b4	uniq: added a check to make sure that there is more than one element for one tag when merging its information	2020-07-30 16:14:37 +02:00