diff --git a/python/obitools3/dms/capi/obidmscolumn.pxd b/python/obitools3/dms/capi/obidmscolumn.pxd index 88bc563..900a0a1 100644 --- a/python/obitools3/dms/capi/obidmscolumn.pxd +++ b/python/obitools3/dms/capi/obidmscolumn.pxd @@ -32,6 +32,7 @@ cdef extern from "obidmscolumn.h" nogil: OBIType_t returned_data_type OBIType_t stored_data_type bint tuples + bint to_eval time_t creation_date obiversion_t version obiversion_t cloned_from diff --git a/python/obitools3/dms/capi/obiview.pxd b/python/obitools3/dms/capi/obiview.pxd index 9decb64..9272a60 100644 --- a/python/obitools3/dms/capi/obiview.pxd +++ b/python/obitools3/dms/capi/obiview.pxd @@ -86,7 +86,9 @@ cdef extern from "obiview.h" nogil: index_t nb_lines, index_t nb_elements_per_line, char* elements_names, + bint elt_names_formatted, bint tuples, + bint to_eval, const_char_p indexer_name, const_char_p associated_column_name, obiversion_t associated_column_version, diff --git a/python/obitools3/dms/column/typed_column/str.pyx b/python/obitools3/dms/column/typed_column/str.pyx index ba672f2..4fd9585 100644 --- a/python/obitools3/dms/column/typed_column/str.pyx +++ b/python/obitools3/dms/column/typed_column/str.pyx @@ -56,6 +56,9 @@ cdef class Column_str(Column_idx): obi_errno_to_exception(obi_errno, line_nb=line_nb, elt_id=None, error_message="Problem getting a value from a column") if value == OBIStr_NA : result = None + # For columns containing character strings that should be evaluated: + elif self.to_eval: + result = eval(value) else : result = value # NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file. return result diff --git a/python/obitools3/dms/view/view.pyx b/python/obitools3/dms/view/view.pyx index 1370d92..3495c7f 100644 --- a/python/obitools3/dms/view/view.pyx +++ b/python/obitools3/dms/view/view.pyx @@ -330,6 +330,7 @@ cdef class View(OBIWrapper) : line = self[idx] for k in item : # If setting from another View Line and the column doesn't exist, create it based on the informations from the other View + # TODO use clone_column if isinstance(item, Line) and tostr(k) not in self: col = item.view[k] Column.new_column(self, @@ -337,6 +338,8 @@ cdef class View(OBIWrapper) : col.data_type_int, nb_elements_per_line = col.nb_elements_per_line, elements_names = col.elements_names, + tuples = col.tuples, + to_eval = col.to_eval, comments = col.comments, alias=k ) diff --git a/src/obi_align.c b/src/obi_align.c index d43c55c..036d59b 100644 --- a/src/obi_align.c +++ b/src/obi_align.c @@ -154,35 +154,35 @@ static int create_alignment_output_columns(Obiview_p output_view, bool normalize, int reference, bool similarity_mode) { // Create the column for the ids of the 1st sequence aligned - if (obi_view_add_column(output_view, ID1_COLUMN_NAME, -1, NULL, OBI_STR, 0, 1, NULL, false, id1_indexer_name, NULL, -1, ID1_COLUMN_COMMENTS, true) < 0) + if (obi_view_add_column(output_view, ID1_COLUMN_NAME, -1, NULL, OBI_STR, 0, 1, NULL, false, false, false, id1_indexer_name, NULL, -1, ID1_COLUMN_COMMENTS, true) < 0) { obidebug(1, "\nError creating the first column for the sequence ids when aligning"); return -1; } // Create the column for the ids of the 2nd sequence aligned - if (obi_view_add_column(output_view, ID2_COLUMN_NAME, -1, NULL, OBI_STR, 0, 1, NULL, false, id2_indexer_name, NULL, -1, ID2_COLUMN_COMMENTS, true) < 0) + if (obi_view_add_column(output_view, ID2_COLUMN_NAME, -1, NULL, OBI_STR, 0, 1, NULL, false, false, false, id2_indexer_name, NULL, -1, ID2_COLUMN_COMMENTS, true) < 0) { obidebug(1, "\nError creating the second column for the sequence ids when aligning"); return -1; } // Create the column for the index (in the input view) of the first sequences aligned - if (obi_view_add_column(output_view, IDX1_COLUMN_NAME, -1, NULL, OBI_INT, 0, 1, NULL, false, NULL, NULL, -1, IDX1_COLUMN_COMMENTS, true) < 0) + if (obi_view_add_column(output_view, IDX1_COLUMN_NAME, -1, NULL, OBI_INT, 0, 1, NULL, false, false, false, NULL, NULL, -1, IDX1_COLUMN_COMMENTS, true) < 0) { obidebug(1, "\nError creating the first column for the sequence indices when aligning"); return -1; } // Create the column for the index (in the input view) of the second sequences aligned - if (obi_view_add_column(output_view, IDX2_COLUMN_NAME, -1, NULL, OBI_INT, 0, 1, NULL, false, NULL, NULL, -1, IDX2_COLUMN_COMMENTS, true) < 0) + if (obi_view_add_column(output_view, IDX2_COLUMN_NAME, -1, NULL, OBI_INT, 0, 1, NULL, false, false, false, NULL, NULL, -1, IDX2_COLUMN_COMMENTS, true) < 0) { obidebug(1, "\nError creating the second column for the sequence indices when aligning"); return -1; } // Create the column for the LCS length - if (obi_view_add_column(output_view, LCS_LENGTH_COLUMN_NAME, -1, NULL, OBI_INT, 0, 1, NULL, false, NULL, NULL, -1, LCS_LENGTH_COLUMN_COMMENTS, true) < 0) + if (obi_view_add_column(output_view, LCS_LENGTH_COLUMN_NAME, -1, NULL, OBI_INT, 0, 1, NULL, false, false, false, NULL, NULL, -1, LCS_LENGTH_COLUMN_COMMENTS, true) < 0) { obidebug(1, "\nError creating the column for the LCS length when aligning"); return -1; @@ -191,7 +191,7 @@ static int create_alignment_output_columns(Obiview_p output_view, // Create the column for the alignment length if it is computed if ((reference == ALILEN) && (normalize || !similarity_mode)) { - if (obi_view_add_column(output_view, ALI_LENGTH_COLUMN_NAME, -1, NULL, OBI_INT, 0, 1, NULL, false, NULL, NULL, -1, ALI_LENGTH_COLUMN_COMMENTS, true) < 0) + if (obi_view_add_column(output_view, ALI_LENGTH_COLUMN_NAME, -1, NULL, OBI_INT, 0, 1, NULL, false, false, false, NULL, NULL, -1, ALI_LENGTH_COLUMN_COMMENTS, true) < 0) { obidebug(1, "\nError creating the column for the alignment length when aligning"); return -1; @@ -200,7 +200,7 @@ static int create_alignment_output_columns(Obiview_p output_view, // Create the column for the alignment score if (normalize) { - if (obi_view_add_column(output_view, SCORE_COLUMN_NAME, -1, NULL, OBI_FLOAT, 0, 1, NULL, false, NULL, NULL, -1, SCORE_COLUMN_NAME, true) < 0) + if (obi_view_add_column(output_view, SCORE_COLUMN_NAME, -1, NULL, OBI_FLOAT, 0, 1, NULL, false, false, false, NULL, NULL, -1, SCORE_COLUMN_NAME, true) < 0) { obidebug(1, "\nError creating the column for the score when aligning"); return -1; @@ -208,7 +208,7 @@ static int create_alignment_output_columns(Obiview_p output_view, } else { - if (obi_view_add_column(output_view, SCORE_COLUMN_NAME, -1, NULL, OBI_INT, 0, 1, NULL, false, NULL, NULL, -1, SCORE_COLUMN_NAME, true) < 0) + if (obi_view_add_column(output_view, SCORE_COLUMN_NAME, -1, NULL, OBI_INT, 0, 1, NULL, false, false, false, NULL, NULL, -1, SCORE_COLUMN_NAME, true) < 0) { obidebug(1, "\nError creating the column for the score when aligning"); return -1; @@ -218,14 +218,14 @@ static int create_alignment_output_columns(Obiview_p output_view, if (print_seq) { // Create the column for the first sequences aligned - if (obi_view_add_column(output_view, SEQ1_COLUMN_NAME, -1, NULL, OBI_SEQ, 0, 1, NULL, false, seq1_indexer_name, NULL, -1, SEQ1_COLUMN_COMMENTS, true) < 0) + if (obi_view_add_column(output_view, SEQ1_COLUMN_NAME, -1, NULL, OBI_SEQ, 0, 1, NULL, false, false, false, seq1_indexer_name, NULL, -1, SEQ1_COLUMN_COMMENTS, true) < 0) { obidebug(1, "\nError creating the first column for the sequences when aligning"); return -1; } // Create the column for the second sequences aligned - if (obi_view_add_column(output_view, SEQ2_COLUMN_NAME, -1, NULL, OBI_SEQ, 0, 1, NULL, false, seq2_indexer_name, NULL, -1, SEQ2_COLUMN_COMMENTS, true) < 0) + if (obi_view_add_column(output_view, SEQ2_COLUMN_NAME, -1, NULL, OBI_SEQ, 0, 1, NULL, false, false, false, seq2_indexer_name, NULL, -1, SEQ2_COLUMN_COMMENTS, true) < 0) { obidebug(1, "\nError creating the second column for the sequences when aligning"); return -1; @@ -234,14 +234,14 @@ static int create_alignment_output_columns(Obiview_p output_view, // if (print_count) // TODO count columns not implemented yet // { // // Create the column for the count of the first sequences aligned -// if (obi_view_add_column(output_view, COUNT1_COLUMN_NAME, -1, NULL, OBI_INT, 0, 1, NULL, false, NULL, NULL, -1, COUNT1_COLUMN_COMMENTS, true) < 0) +// if (obi_view_add_column(output_view, COUNT1_COLUMN_NAME, -1, NULL, OBI_INT, 0, 1, NULL, false, false, false, NULL, NULL, -1, COUNT1_COLUMN_COMMENTS, true) < 0) // { // obidebug(1, "\nError creating the first column for the sequence counts when aligning"); // return -1; // } // // // Create the column for the count of the second sequences aligned -// if (obi_view_add_column(output_view, COUNT2_COLUMN_NAME, -1, NULL, OBI_INT, 0, 1, NULL, false, NULL, NULL, -1, COUNT2_COLUMN_COMMENTS, true) < 0) +// if (obi_view_add_column(output_view, COUNT2_COLUMN_NAME, -1, NULL, OBI_INT, 0, 1, NULL, false, false, false, NULL, NULL, -1, COUNT2_COLUMN_COMMENTS, true) < 0) // { // obidebug(1, "\nError creating the second column for the sequence counts when aligning"); // return -1; diff --git a/src/obidms.c b/src/obidms.c index 540004f..e8f6eb7 100644 --- a/src/obidms.c +++ b/src/obidms.c @@ -1036,8 +1036,8 @@ obiversion_t obi_import_column(const char* dms_path_1, const char* dms_path_2, c // Create new column column_2 = obi_create_column(dms_2, column_name, header_1->returned_data_type, header_1->line_count, header_1->nb_elements_per_line, header_1->elements_names, true, header_1->tuples, - new_avl_name, (header_1->associated_column).column_name, (header_1->associated_column).version, - header_1->comments); + header_1->to_eval, new_avl_name, (header_1->associated_column).column_name, + (header_1->associated_column).version, header_1->comments); if (column_2 == NULL) { @@ -1254,6 +1254,8 @@ int obi_import_view(const char* dms_path_1, const char* dms_path_2, const char* 0, NULL, false, + false, + false, NULL, NULL, -1, diff --git a/src/obidmscolumn.c b/src/obidmscolumn.c index d0ee29b..eb146cd 100644 --- a/src/obidmscolumn.c +++ b/src/obidmscolumn.c @@ -139,12 +139,11 @@ static char* build_default_elements_names(index_t nb_elements_per_line); * column with '\0' as separator (e.g. "0\01\02\0...\0n\0"). * * @param elements_names The character string formatted with ';' as separator (e.g. "0;1;2;...;n\0"). - * @param elts_names_length A pointer on an integer where the function will store the length of the character string. * * @since January 2017 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ -static void format_elements_names(char* elements_names, int* elts_names_length); +static void format_elements_names(char* elements_names); /** @@ -188,7 +187,7 @@ static int cmp_elements_names_with_name_and_idx(const void* name1, const void* n * * @param column A pointer as returned by obi_create_column(). * @param elements_names The names of the elements as formatted by format_elements_names(). - * @param elts_names_length The length of elements_names. + * @param elts_names_length The length of elements_names including the two terminal '\0's. * * @retval 0 if the operation was successfully completed. * @retval -1 if an error occurred. @@ -196,7 +195,19 @@ static int cmp_elements_names_with_name_and_idx(const void* name1, const void* n * @since July 2015 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ -static int set_elements_names(OBIDMS_column_p column, char* elements_names, int elts_names_length); +static int set_elements_names(OBIDMS_column_p column, char* elements_names, int64_t elts_names_length, index_t nb_elements_per_line); + + +/** + * @brief Internal function reading the informations related to the elements names + * of the lines of a column in the header of the OBIDMS column structure. + * + * @param header A pointer on the header of the column. + * + * @since December 2017 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +static void read_elt_names_informations(OBIDMS_column_header_p header); /** @@ -224,7 +235,7 @@ static index_t check_elt_names_count(const char* elements_names, bool elt_names_ * @since January 2017 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ -static int get_formatted_elt_names_length(const char* elements_names); +static int get_formatted_elt_names_length(const char* elements_names, index_t nb_elements); /** @@ -521,7 +532,15 @@ static char* build_default_elements_names(index_t nb_elements_per_line) int i; int len; - elements_names = (char*) malloc(ELEMENTS_NAMES_MAX * sizeof(char)); +// if (nb_elements_per_line > NB_ELTS_MAX_IF_DEFAULT_NAME) +// { +// obi_set_errno(OBICOL_UNKNOWN_ERROR); +// obidebug(1, "\nError: too many elements per line to use the default names (max = %d elements)", NB_ELTS_MAX_IF_DEFAULT_NAME); +// return NULL; +// } + + // TODO + elements_names = (char*) malloc(nb_elements_per_line * 10 * sizeof(char)); if (elements_names == NULL) { obi_set_errno(OBI_MALLOC_ERROR); @@ -529,13 +548,6 @@ static char* build_default_elements_names(index_t nb_elements_per_line) return NULL; } - if (nb_elements_per_line > NB_ELTS_MAX_IF_DEFAULT_NAME) - { - obi_set_errno(OBICOL_UNKNOWN_ERROR); - obidebug(1, "\nError: too many elements per line to use the default names (max = %d elements)", NB_ELTS_MAX_IF_DEFAULT_NAME); - return NULL; - } - len = 0; for (i = 0; i < nb_elements_per_line; i++) len += sprintf(elements_names+len, "%d;", i); @@ -549,14 +561,15 @@ static char* build_default_elements_names(index_t nb_elements_per_line) -static void format_elements_names(char* elements_names, int* elts_names_length) +static void format_elements_names(char* elements_names) { - int i; + int i; + int64_t elts_names_length; - *elts_names_length = strlen(elements_names); + elts_names_length = strlen(elements_names); // Replace the ';' with '\0' - for (i=0; i < *elts_names_length; i++) + for (i=0; i < elts_names_length; i++) { if (elements_names[i] == ';') elements_names[i] = '\0'; @@ -570,11 +583,11 @@ static int cmp_elements_names_with_idx(const void* n1_sort_idx, const void* n2_s char* name1=NULL; char* name2=NULL; - int name1_idx; - int name2_idx; + index_t name1_idx; + index_t name2_idx; - int name1_sort_idx = *((int*)n1_sort_idx); - int name2_sort_idx = *((int*)n2_sort_idx); + index_t name1_sort_idx = *((index_t*)n1_sort_idx); + index_t name2_sort_idx = *((index_t*)n2_sort_idx); OBIDMS_column_header_p header = (OBIDMS_column_header_p) h; name1_idx = (header->elements_names_idx)[name1_sort_idx]; @@ -590,10 +603,10 @@ static int cmp_elements_names_with_idx(const void* n1_sort_idx, const void* n2_s static int cmp_elements_names_with_name_and_idx(const void* name1, const void* n2_sort_idx, const void* h) { - char* name2=NULL; - int name2_idx; + char* name2=NULL; + index_t name2_idx; - int name2_sort_idx = *((int*)n2_sort_idx); + index_t name2_sort_idx = *((index_t*)n2_sort_idx); OBIDMS_column_header_p header = (OBIDMS_column_header_p) h; name2_idx = (header->elements_names_idx)[name2_sort_idx]; @@ -603,56 +616,67 @@ static int cmp_elements_names_with_name_and_idx(const void* name1, const void* n } - -static int set_elements_names(OBIDMS_column_p column, char* elements_names, int elts_names_length) +static int set_elements_names(OBIDMS_column_p column, char* elements_names, int64_t elts_names_length, index_t nb_elements) { - int i, j; + OBIDMS_column_header_p header; + index_t i, j; - // Check that the elements names are not too long - if (elts_names_length+2 > ELEMENTS_NAMES_MAX) - { - obi_set_errno(OBICOL_UNKNOWN_ERROR); - obidebug(1, "\nError: element names too long (%d, max: %d)", elts_names_length+2, ELEMENTS_NAMES_MAX); - return -1; - } - - // Copy the elements names in the header - memcpy((column->header)->elements_names, elements_names, elts_names_length*sizeof(char)); - - // Terminal characters - (column->header)->elements_names[elts_names_length] = '\0'; - (column->header)->elements_names[elts_names_length + 1] = '\0'; + header = column->header; // Store the length of the character array containing the elements names - (column->header)->elements_names_length = elts_names_length; + header->elements_names_length = elts_names_length; + // Store the pointers pointing to the different elements stored in the memory arena + header->elements_names = (char*)&(header->mem_arena)[0]; + header->elements_names_idx = (index_t*)&((char*)(header->mem_arena) + elts_names_length)[0]; + header->sorted_elements_idx = (header->elements_names_idx) + nb_elements; + + // Copy the elements names in the header + memcpy(header->elements_names, elements_names, (elts_names_length-2)*sizeof(char)); + + // Terminal characters + header->elements_names[elts_names_length - 2] = '\0'; + header->elements_names[elts_names_length - 1] = '\0'; // Build the elements names index i = 0; j = 0; // Index the first element name - ((column->header)->elements_names_idx)[j] = i; - ((column->header)->sorted_elements_idx)[j] = j; + (header->elements_names_idx)[j] = i; + (header->sorted_elements_idx)[j] = j; i++; j++; - while (i < elts_names_length) + + while (i < elts_names_length-2) { if (elements_names[i] == '\0') { // Index new element name - ((column->header)->elements_names_idx)[j] = i+1; - ((column->header)->sorted_elements_idx)[j] = j; + (header->elements_names_idx)[j] = i+1; + (header->sorted_elements_idx)[j] = j; j++; } i++; } // Build the sorted index - qsort_user_data((column->header)->sorted_elements_idx, j, sizeof(int), column->header, cmp_elements_names_with_idx); + qsort_user_data(header->sorted_elements_idx, j, sizeof(index_t), column->header, cmp_elements_names_with_idx); return 0; } +static void read_elt_names_informations(OBIDMS_column_header_p header) +{ + int64_t elts_names_length; + + elts_names_length = header->elements_names_length; + header->elements_names = (char*)&(header->mem_arena)[0]; + header->elements_names_idx = (index_t*)&((char*)(header->mem_arena) + elts_names_length)[0]; + header->sorted_elements_idx = (index_t*)&((header->elements_names_idx) + (header->nb_elements_per_line))[0]; +} + + + static index_t check_elt_names_count(const char* elements_names, bool elt_names_formatted) { char sep; @@ -680,20 +704,19 @@ static index_t check_elt_names_count(const char* elements_names, bool elt_names_ -static int get_formatted_elt_names_length(const char* elements_names) +static int get_formatted_elt_names_length(const char* elements_names, index_t nb_elements) { int i = 0; - bool stop = false; + index_t n = 0; - while (! stop) + while (n < nb_elements) { - if ((elements_names[i] == '\0') && (elements_names[i+1] == '\0')) - stop = true; - else - i++; + if (elements_names[i] == '\0') + n++; + i++; } - return i; + return i+1; } @@ -866,14 +889,15 @@ obiversion_t obi_column_get_latest_version_from_name(OBIDMS_p dms, const char* c } - -size_t obi_get_platform_header_size() +// TODO make private +size_t obi_calculate_header_size(index_t nb_elements_per_line, int64_t elts_names_length) { size_t header_size; size_t rounded_header_size; double multiple; header_size = sizeof(OBIDMS_column_header_t); + header_size = header_size + (nb_elements_per_line*2)*sizeof(int64_t) + elts_names_length*sizeof(char); multiple = ceil((double) header_size / (double) getpagesize()); @@ -891,6 +915,7 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms, char* elements_names, bool elt_names_formatted, bool tuples, + bool to_eval, const char* indexer_name, const char* associated_column_name, obiversion_t associated_column_version, @@ -911,7 +936,7 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms, OBIType_t stored_data_type; char* final_indexer_name; char* built_elements_names = NULL; - int elts_names_length; + int64_t elts_names_length; new_column = NULL; @@ -1012,12 +1037,11 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms, // Format the elements names string if (! elt_names_formatted) - format_elements_names(elements_names, &elts_names_length); - else - elts_names_length = get_formatted_elt_names_length(elements_names); + format_elements_names(elements_names); + elts_names_length = get_formatted_elt_names_length(elements_names, nb_elements_per_line); // Calculate the size needed - header_size = obi_get_platform_header_size(); + header_size = obi_calculate_header_size(nb_elements_per_line, elts_names_length); data_size = obi_array_sizeof(stored_data_type, nb_lines, nb_elements_per_line); file_size = header_size + data_size; @@ -1107,12 +1131,14 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms, header->stored_data_type = stored_data_type; header->returned_data_type = returned_data_type; header->tuples = tuples; + header->to_eval = to_eval; header->creation_date = time(NULL); header->version = version_number; header->cloned_from = -1; header->finished = false; - set_elements_names(new_column, elements_names, elts_names_length); + set_elements_names(new_column, elements_names, elts_names_length, nb_elements_per_line); + read_elt_names_informations(header); // Free the element names if they were built if (built_elements_names != NULL) @@ -1237,7 +1263,7 @@ OBIDMS_column_p obi_open_column(OBIDMS_p dms, } // Open the column file, ALWAYS READ-ONLY - column_file_descriptor = openat(column_directory->dir_fd, column_file_name, O_RDONLY); + column_file_descriptor = openat(column_directory->dir_fd, column_file_name, O_RDWR); if (column_file_descriptor < 0) { obi_set_errno(OBICOL_UNKNOWN_ERROR); @@ -1273,7 +1299,7 @@ OBIDMS_column_p obi_open_column(OBIDMS_p dms, column->column_directory = column_directory; column->header = mmap(NULL, header_size, - PROT_READ, + PROT_READ | PROT_WRITE, MAP_SHARED, column_file_descriptor, 0 @@ -1288,6 +1314,9 @@ OBIDMS_column_p obi_open_column(OBIDMS_p dms, return NULL; } + // Set the pointers relative to elements names informations in the header + read_elt_names_informations(column->header); + // Map the data column->data = mmap(NULL, (column->header)->data_size, @@ -1380,6 +1409,7 @@ OBIDMS_column_p obi_clone_column(OBIDMS_p dms, (column_to_clone->header)->elements_names, true, (column_to_clone->header)->tuples, + (column_to_clone->header)->to_eval, (column_to_clone->header)->indexer_name, ((column_to_clone->header)->associated_column).column_name, ((column_to_clone->header)->associated_column).version, @@ -1821,8 +1851,8 @@ OBIDMS_column_header_p obi_column_get_header_from_name(OBIDMS_p dms, const char* return NULL; } - // Open the column file (READ-ONLY) - column_file_descriptor = openat(column_directory->dir_fd, column_file_name, O_RDONLY); + // Open the column file + column_file_descriptor = openat(column_directory->dir_fd, column_file_name, O_RDWR); if (column_file_descriptor < 0) { obidebug(1, "\nError opening a column file"); @@ -1842,10 +1872,10 @@ OBIDMS_column_header_p obi_column_get_header_from_name(OBIDMS_p dms, const char* return NULL; } - // Fill the header structure + // Map the header structure header = mmap(NULL, header_size, - PROT_READ, + PROT_READ | PROT_WRITE, MAP_SHARED, column_file_descriptor, 0 @@ -1859,6 +1889,9 @@ OBIDMS_column_header_p obi_column_get_header_from_name(OBIDMS_p dms, const char* return NULL; } + // Read the element names informations (storing pointers on informations) + read_elt_names_informations(header); + if (close(column_file_descriptor) < 0) { obi_set_errno(OBICOL_UNKNOWN_ERROR); @@ -1888,7 +1921,7 @@ index_t obi_column_get_element_index_from_name(OBIDMS_column_p column, const cha { int* elt_names_idx; - elt_names_idx = bsearch_user_data(element_name, (column->header)->sorted_elements_idx, (column->header)->nb_elements_per_line, sizeof(int), column->header, cmp_elements_names_with_name_and_idx); + elt_names_idx = bsearch_user_data(element_name, (column->header)->sorted_elements_idx, (column->header)->nb_elements_per_line, sizeof(index_t), column->header, cmp_elements_names_with_name_and_idx); if (elt_names_idx != NULL) return (index_t)(*elt_names_idx); @@ -1906,7 +1939,7 @@ char* obi_get_elements_names(OBIDMS_column_p column) int elt_idx; int len; - elements_names = (char*) malloc(ELEMENTS_NAMES_MAX * sizeof(char)); + elements_names = (char*) malloc((column->header)->elements_names_length * sizeof(char)); if (elements_names == NULL) { obi_set_errno(OBI_MALLOC_ERROR); @@ -2148,6 +2181,8 @@ int obi_clean_unfinished_columns(OBIDMS_p dms) // dir_to_delete[ddir] = column_dir_path; // ddir++; // } +// else +// free(column_dir_path); free(col_name); } diff --git a/src/obidmscolumn.h b/src/obidmscolumn.h index 34e27d4..6bddc93 100644 --- a/src/obidmscolumn.h +++ b/src/obidmscolumn.h @@ -28,9 +28,10 @@ #include "obiblob_indexer.h" -#define ELEMENTS_NAMES_MAX (20480) /**< The maximum length of the list of elements names. // TODO Discuss +// TODO delete useless ones, add default nb? +#define ELEMENTS_NAMES_MAX (1000000) /**< The maximum length of the list of elements names. // TODO Discuss */ -#define NB_ELTS_MAX_IF_DEFAULT_NAME (20480) /**< The maximum number of elements per line if the default element names +#define NB_ELTS_MAX_IF_DEFAULT_NAME (1000000) /**< The maximum number of elements per line if the default element names * are used ("0\01\02\0...\0n"), considering ELEMENTS_NAMES_MAX. // TODO not up to date */ #define COLUMN_GROWTH_FACTOR (2) /**< The growth factor when a column is enlarged. @@ -70,16 +71,6 @@ typedef struct OBIDMS_column_header { */ index_t nb_elements_per_line; /**< Number of elements per line. */ - char elements_names[ELEMENTS_NAMES_MAX+1]; /**< Names of the line elements with '\0' as separator - * and '\0\0' as terminal flag. - * (default are the indices: "0\01\02\0...\0n\0\0"). - */ - int elements_names_length; /**< Length of the character array where the elements names are stored. - */ - int elements_names_idx[NB_ELTS_MAX_IF_DEFAULT_NAME]; /**< Index for the start of each element name in elements_names. - */ - int sorted_elements_idx[NB_ELTS_MAX_IF_DEFAULT_NAME]; /**< Index for the sorted element names in elements_names_idx. - */ OBIType_t returned_data_type; /**< Type of the data that is returned when getting an * element from the column. */ @@ -88,6 +79,9 @@ typedef struct OBIDMS_column_header { */ bool tuples; /**< A boolean indicating whether the column contains indices referring to indexed tuples. */ + bool to_eval; /**< A boolean indicating whether the column contains expressions that should be evaluated + * (typically OBI_STR columns containing character strings to be evaluated by Python). + */ time_t creation_date; /**< Date of creation of the file. */ obiversion_t version; /**< Version of the column. @@ -106,6 +100,18 @@ typedef struct OBIDMS_column_header { */ char comments[COMMENTS_MAX_LENGTH+1]; /**< Comments stored as a classical zero end C string. */ + int64_t elements_names_length; /**< Length of the character array where the elements names are stored. + */ + char* elements_names; /**< Pointer in mem_arnea on the names of the line elements with '\0' as separator + * and '\0\0' as terminal flag. + * (default are the indices: "0\01\02\0...\0n\0\0"). + */ + int64_t* elements_names_idx; /**< Pointer in mem_arnea on the index for the start of each element name in elements_names. + */ + int64_t* sorted_elements_idx; /**< Index for the sorted element names in elements_names_idx. + */ + byte_t mem_arena[]; /**< Memory array where the elements names, the elements names index and the sorted elements index are stored. + */ } OBIDMS_column_header_t, *OBIDMS_column_header_p; @@ -212,17 +218,19 @@ obiversion_t obi_column_get_latest_version_from_name(OBIDMS_p dms, const char* c /** - * @brief Returns the header size in bytes of a column on this platform. + * @brief Returns the header size in bytes of a column. * - * The header size is defined as a multiple of the memory page size. - * As of now the header size is defined as one time the page size. + * The header size is rounded to a multiple of the memory page size. + * + * @param nb_elements_per_line The number of elements per line. + * @param elts_names_length The length of elements_names including the two terminal '\0's. * * @returns The header size in bytes. * * @since May 2015 - * @author Eric Coissac (eric.coissac@metabarcoding.org) + * @author Celine Mercier (celine.mercier@metabarcoding.org) */ -size_t obi_get_platform_header_size(); +size_t obi_calculate_header_size(index_t nb_elements_per_line, int64_t elts_names_length); /** @@ -242,6 +250,8 @@ size_t obi_get_platform_header_size(); * NULL or "" if the default names are to be used ("0\01\02\0...\0n"). * @param elt_names_formatted Whether the separator for the elements names is ';' (false), or '\0' (true, as formatted by format_elements_names()). * @param tuples A boolean indicating whether the column should contain indices referring to indexed tuples. + * @param to_eval A boolean indicating whether the column contains expressions that should be evaluated + * (typically OBI_STR columns containing character strings to be evaluated by Python). * @param indexer_name The name of the indexer if there is one associated with the column. * If NULL or "", the indexer name is set as the column name. * @param associated_column_name The name of the associated column if there is one (otherwise NULL or ""). @@ -262,6 +272,7 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms, char* elements_names, bool elt_names_formatted, bool tuples, + bool to_eval, const char* indexer_name, const char* associated_column_name, obiversion_t associated_column_version, diff --git a/src/obiview.c b/src/obiview.c index 295f9cb..8a86154 100644 --- a/src/obiview.c +++ b/src/obiview.c @@ -1646,7 +1646,7 @@ Obiview_p obi_new_view(OBIDMS_p dms, const char* view_name, Obiview_p view_to_cl // If there is a new line selection, build it by combining it with the one from the view to clone if there is one else if (line_selection != NULL) { - view->line_selection = obi_create_column(view->dms, LINES_COLUMN_NAME, OBI_IDX, 0, 1, NULL, false, false, NULL, NULL, -1, NULL); + view->line_selection = obi_create_column(view->dms, LINES_COLUMN_NAME, OBI_IDX, 0, 1, NULL, false, false, false, NULL, NULL, -1, NULL); if ((view->line_selection) == NULL) { obidebug(1, "\nError creating a column corresponding to a line selection"); @@ -1799,6 +1799,8 @@ Obiview_p obi_new_view(OBIDMS_p dms, const char* view_name, Obiview_p view_to_cl 0, NULL, false, + false, + false, NULL, NULL, -1, @@ -1866,19 +1868,19 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v if (view_to_clone == NULL) { // Adding sequence column - if (obi_view_add_column(view, NUC_SEQUENCE_COLUMN, -1, NULL, OBI_SEQ, 0, 1, NULL, false, NULL, NULL, -1, "Nucleotide sequences", true) < 0) // discuss using same indexer "NUC_SEQ_INDEXER" + if (obi_view_add_column(view, NUC_SEQUENCE_COLUMN, -1, NULL, OBI_SEQ, 0, 1, NULL, false, false, false, NULL, NULL, -1, "Nucleotide sequences", true) < 0) // discuss using same indexer "NUC_SEQ_INDEXER" { obidebug(1, "Error adding an obligatory column in a nucleotide sequences view"); return NULL; } // Adding id column - if (obi_view_add_column(view, ID_COLUMN, -1, NULL, OBI_STR, 0, 1, NULL, false, NULL, NULL, -1, "Sequence identifiers", true) < 0) + if (obi_view_add_column(view, ID_COLUMN, -1, NULL, OBI_STR, 0, 1, NULL, false, false, false, NULL, NULL, -1, "Sequence identifiers", true) < 0) { obidebug(1, "Error adding an obligatory column in a nucleotide sequences view"); return NULL; } // Adding definition column - if (obi_view_add_column(view, DEFINITION_COLUMN, -1, NULL, OBI_STR, 0, 1, NULL, false, NULL, NULL, -1, "Definitions", true) < 0) + if (obi_view_add_column(view, DEFINITION_COLUMN, -1, NULL, OBI_STR, 0, 1, NULL, false, false, false, NULL, NULL, -1, "Definitions", true) < 0) { obidebug(1, "Error adding an obligatory column in a nucleotide sequences view"); return NULL; @@ -1887,7 +1889,7 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v if (quality_column) { associated_nuc_column = obi_view_get_column(view, NUC_SEQUENCE_COLUMN); - if (obi_view_add_column(view, QUALITY_COLUMN, -1, NULL, OBI_QUAL, 0, 1, NULL, false, NULL, (associated_nuc_column->header)->name, (associated_nuc_column->header)->version, "Sequence qualities", true) < 0) // TODO discuss automatic association + if (obi_view_add_column(view, QUALITY_COLUMN, -1, NULL, OBI_QUAL, 0, 1, NULL, false, false, false, NULL, (associated_nuc_column->header)->name, (associated_nuc_column->header)->version, "Sequence qualities", true) < 0) // TODO discuss automatic association { obidebug(1, "Error adding an obligatory column in a nucleotide sequences view"); return NULL; @@ -2215,7 +2217,9 @@ int obi_view_add_column(Obiview_p view, index_t nb_lines, index_t nb_elements_per_line, char* elements_names, + bool elt_names_formatted, bool tuples, + bool to_eval, const char* indexer_name, const char* associated_column_name, obiversion_t associated_column_version, @@ -2294,7 +2298,7 @@ int obi_view_add_column(Obiview_p view, // Open or create the column if (create) { // Create column - column = obi_create_column(view->dms, column_name, data_type, nb_lines, nb_elements_per_line, elements_names, false, tuples, indexer_name, associated_column_name, associated_column_version, comments); + column = obi_create_column(view->dms, column_name, data_type, nb_lines, nb_elements_per_line, elements_names, elt_names_formatted, tuples, to_eval, indexer_name, associated_column_name, associated_column_version, comments); if (column == NULL) { obidebug(1, "\nError creating a column to add to a view"); @@ -2801,7 +2805,7 @@ int obi_create_auto_count_column(Obiview_p view) return -1; } - if (obi_view_add_column(view, COUNT_COLUMN, -1, NULL, OBI_INT, 0, 1, NULL, false, NULL, NULL, -1, "Sequence counts", true) < 0) + if (obi_view_add_column(view, COUNT_COLUMN, -1, NULL, OBI_INT, 0, 1, NULL, false, false, false, NULL, NULL, -1, "Sequence counts", true) < 0) { obidebug(1, "Error adding an automatic count column in a view"); return -1; @@ -2853,7 +2857,7 @@ int obi_create_auto_id_column(Obiview_p view, const char* prefix) } // Create the new ID column - if (obi_view_add_column(view, ID_COLUMN, -1, NULL, OBI_STR, 0, 1, NULL, false, NULL, NULL, -1, "Sequence identifiers", true) < 0) + if (obi_view_add_column(view, ID_COLUMN, -1, NULL, OBI_STR, 0, 1, NULL, false, false, false, NULL, NULL, -1, "Sequence identifiers", true) < 0) { obidebug(1, "Error adding an automatic ID column in a view"); return -1; diff --git a/src/obiview.h b/src/obiview.h index aac9a58..605cde3 100644 --- a/src/obiview.h +++ b/src/obiview.h @@ -361,7 +361,10 @@ Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name); * @param nb_elements_per_line The number of elements per line, if the column is created. * @param elements_names The names of the elements with ';' as separator (no terminal ';'), * if the column is created; NULL or "" if the default names are to be used ("0\01\02\0...\0n"). + * @param elt_names_formatted Whether the separator for the elements names is ';' (false), or '\0' (true, as formatted by format_elements_names()). * @param tuples A boolean indicating whether the column should contain indices referring to indexed tuples. + * @param to_eval A boolean indicating whether the column contains expressions that should be evaluated + * (typically OBI_STR columns containing character strings to be evaluated by Python). * @param indexer_name The name of the indexer if there is one associated with the column, if the column is created. * If NULL or "", the indexer name is set as the column name. * @param associated_column_name The name of the associated column if there is one (otherwise NULL or ""), if the column is created. @@ -384,7 +387,9 @@ int obi_view_add_column(Obiview_p view, index_t nb_lines, index_t nb_elements_per_line, char* elements_names, + bool elt_names_formatted, bool tuples, + bool to_eval, const char* indexer_name, const char* associated_column_name, obiversion_t associated_column_version,