From 7b606c0477688297c7eb60c80eec7558605c3d63 Mon Sep 17 00:00:00 2001 From: Celine Mercier Date: Mon, 21 Sep 2015 15:42:29 +0200 Subject: [PATCH] Column files now always have a size that is a multiple of the page size, and the function that enlarges mapped column files tries mapping on next byte before unmapping/mapping again --- src/obidmscolumn.c | 186 +++++++++++++++++++++++++++++++-------------- src/obidmscolumn.h | 1 - 2 files changed, 129 insertions(+), 58 deletions(-) diff --git a/src/obidmscolumn.c b/src/obidmscolumn.c index 53e2a65..23fbd52 100644 --- a/src/obidmscolumn.c +++ b/src/obidmscolumn.c @@ -18,6 +18,7 @@ #include #include #include +#include #include /* mmap() is defined in this header */ #include "obidmscolumn.h" @@ -135,6 +136,20 @@ static int create_version_file(OBIDMS_column_directory_p column_directory); int obi_column_set_elements_names(OBIDMS_column_p column, const char* elements_names); +/** + * @brief Internal function computing how many lines of an OBIDMS column fill in a memory page. + * + * @param data_type the data OBIType + * @param nb_elements_per_line the number of elements per line + * + * @return the line count for one memory page + * + * @since September 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +size_t get_line_count_per_page(OBIType_t data_type, size_t nb_elements_per_line); + + /************************************************************************ * * D E F I N I T I O N O F T H E P R I V A T E F U N C T I O N S @@ -452,6 +467,11 @@ int obi_column_set_elements_names(OBIDMS_column_p column, const char* elements_n return 0; } +size_t get_line_count_per_page(OBIType_t data_type, size_t nb_elements_per_line) +{ + return getpagesize() / (obi_sizeof(data_type) * nb_elements_per_line); +} + /********************************************************************** * @@ -600,6 +620,7 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms, int column_dir_file_descriptor; size_t header_size; size_t data_size; + size_t minimum_line_count; new_column = NULL; @@ -614,7 +635,51 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms, obidebug(1, "\nCan't create column because of empty column name"); return NULL; } - //if (type < 1) + if ((type < 1) || (type > 4)) + { + obidebug(1, "\nCan't create column because of invalid data type"); + return NULL; + } + + // The initial line count should be between the minimum (corresponding to the page size) and the maximum allowed + minimum_line_count = get_line_count_per_page(type, nb_elements_per_line); + if (nb_lines > MAXIMUM_LINE_COUNT) + { + obidebug(1, "\nCan't create column because of line count greater than the maximum allowed (%lld)", MAXIMUM_LINE_COUNT); + return NULL; + } + else if (nb_lines < minimum_line_count) + nb_lines = minimum_line_count; + + // The number of elements names should be equal to the number of elements per line + + if ((elements_names == NULL) && (nb_elements_per_line > 1)) + { + obidebug(1, "\nCan't create column because no elements names were given for a number of elements per line greater than 1"); + return NULL; + } + else if ((elements_names != NULL) && (nb_elements_per_line > 1)) + { + char* token; + size_t n = 0; + token = strdup(elements_names); + token = strtok(token, ";"); + while (token != NULL) + { + token = strtok(NULL, ";"); + n++; + } + if (n != nb_elements_per_line) + { + obidebug(1, "\nCan't create column because the number of elements names given is not equal to the number of elements per line"); + return NULL; + } + } + else if ((nb_elements_per_line == 1) && (strcmp(elements_names, column_name) != 0)) + { + obidebug(1, "\nCan't create column because the element name does not match the column name"); + return NULL; + } // Get the column directory structure associated to the column column_directory = obi_column_directory(dms, column_name); @@ -892,13 +957,14 @@ OBIDMS_column_p obi_clone_column(OBIDMS_p dms, const char* column_name, obiversi return NULL; } + data_type = (column_to_clone->header)->data_type; + + nb_elements_per_line = (column_to_clone->header)->nb_elements_per_line; + if (clone_data) nb_lines = (column_to_clone->header)->line_count; else - nb_lines = INITIAL_LINE_COUNT; - - nb_elements_per_line = (column_to_clone->header)->nb_elements_per_line; - data_type = (column_to_clone->header)->data_type; + nb_lines = get_line_count_per_page(data_type, nb_elements_per_line); // minimum line count corresponding to one memory page new_column = obi_create_column(dms, column_name, @@ -968,10 +1034,20 @@ int obi_truncate_column_to_lines_used(OBIDMS_column_p column) { size_t file_size; size_t data_size; + size_t new_line_count; + double multiple; int column_dir_file_descriptor; int column_file_descriptor; char* column_file_name; + // Compute the new line count = the number of lines used rounded to the nearest multiple of page size + multiple = ceil((double) ((column->header)->lines_used * (column->header)->nb_elements_per_line * obi_sizeof((column->header)->data_type)) / (double) getpagesize()); + new_line_count = (int) multiple * getpagesize(); + + // Check that it is actually greater than the current number of lines allocated in the file, otherwise no need to truncate + if ((column->header)->line_count == new_line_count) + return 0; + // Get the file descriptor associated to the column directory column_dir_file_descriptor = dirfd((column->column_directory)->directory); if (column_dir_file_descriptor < 0) @@ -999,7 +1075,7 @@ int obi_truncate_column_to_lines_used(OBIDMS_column_p column) } // Unmap the data before truncating the file - data_size = (column->header)->line_count * (column->header)->nb_elements_per_line * sizeof((column->header)->data_type); + data_size = obi_array_sizeof((column->header)->data_type, (column->header)->line_count, (column->header)->nb_elements_per_line); if (munmap(column->data, data_size) < 0) { obi_set_errno(OBICOL_UNKNOWN_ERROR); @@ -1009,8 +1085,8 @@ int obi_truncate_column_to_lines_used(OBIDMS_column_p column) return -1; } - // Truncate the column file at the number of lines used - data_size = (column->header)->lines_used * (column->header)->nb_elements_per_line * sizeof((column->header)->data_type); + // Truncate the column file + data_size = obi_array_sizeof((column->header)->data_type, new_line_count, (column->header)->nb_elements_per_line); file_size = (column->header)->header_size + data_size; if (ftruncate(column_file_descriptor, file_size) < 0) { @@ -1039,8 +1115,8 @@ int obi_truncate_column_to_lines_used(OBIDMS_column_p column) return -1; } - // Set line_count to lines_used - (column->header)->line_count = (column->header)->lines_used; + // Set line_count to the new line count + (column->header)->line_count = new_line_count; free(column_file_name); close(column_file_descriptor); @@ -1060,6 +1136,7 @@ int obi_enlarge_column(OBIDMS_column_p column) int column_dir_file_descriptor; int column_file_descriptor; char* column_file_name; + void* new_data; // Get the file descriptor associated to the column directory column_dir_file_descriptor = dirfd((column->column_directory)->directory); @@ -1090,6 +1167,7 @@ int obi_enlarge_column(OBIDMS_column_p column) // Calculate the new file size old_line_count = (column->header)->line_count; new_line_count = old_line_count * GROWTH_FACTOR; + if (new_line_count > MAXIMUM_LINE_COUNT) { obi_set_errno(OBICOL_UNKNOWN_ERROR); @@ -1098,32 +1176,12 @@ int obi_enlarge_column(OBIDMS_column_p column) close(column_file_descriptor); return -1; } - old_data_size = (column->header)->line_count * (column->header)->nb_elements_per_line * sizeof((column->header)->data_type); + old_data_size = obi_array_sizeof((column->header)->data_type, old_line_count, (column->header)->nb_elements_per_line); new_data_size = old_data_size * GROWTH_FACTOR; header_size = (column->header)->header_size; file_size = header_size + new_data_size; - // Unmap the data - if (munmap(column->data, old_data_size) < 0) - { - obi_set_errno(OBICOL_UNKNOWN_ERROR); - obidebug(1, "\nError munmapping the data of a column before enlarging"); - free(column_file_name); - close(column_file_descriptor); - return -1; - } - - // Unmap the header - if (munmap(column->header, header_size) < 0) - { - obi_set_errno(OBICOL_UNKNOWN_ERROR); - obidebug(1, "\nError munmapping the header of a column before enlarging"); - free(column_file_name); - close(column_file_descriptor); - return -1; - } - - // Enlarge the file + // Enlarge the file // TODO isn't it possible that this makes the file "move"? if (ftruncate(column_file_descriptor, file_size) < 0) { obi_set_errno(OBICOL_UNKNOWN_ERROR); @@ -1133,34 +1191,17 @@ int obi_enlarge_column(OBIDMS_column_p column) return -1; } - // Remap the header (TODO not sure if necessary??) - column->header = mmap(NULL, - header_size, - PROT_READ | PROT_WRITE, - MAP_SHARED, - column_file_descriptor, - 0 - ); - - if (column->header == MAP_FAILED) - { - obi_set_errno(OBICOL_UNKNOWN_ERROR); - obidebug(1, "\nError mmapping the header of a column after enlarging file"); - close(column_file_descriptor); - free(column_file_name); - return -1; - } - - // Remap the data - column->data = mmap(NULL, - new_data_size, + // Remap the data: try enlarging mapped region (this actually never works on my mac without the MAP_FIXED flag which overwrites everything) + //obidebug(2, "\ntry enlarging mapped region: old size = %ld, new size = %ld, size = %ld", old_data_size, new_data_size, new_data_size - old_data_size); + new_data = mmap(column->data, + new_data_size - old_data_size, PROT_READ | PROT_WRITE, MAP_SHARED, column_file_descriptor, - header_size + old_data_size ); - if (column->data == MAP_FAILED) + if (new_data == MAP_FAILED) { obi_set_errno(OBICOL_UNKNOWN_ERROR); obidebug(1, "\nError re-mmapping the data of a column after enlarging the file"); @@ -1169,6 +1210,37 @@ int obi_enlarge_column(OBIDMS_column_p column) return -1; } + // If remap failed: Unmap and map the data again + if (new_data != (column->data)) // TODO check that this works without exception + { + //obidebug(2, "\nEnlarging mapped region failed: Unmap and map the data again, %x != %x", column->data, new_data); + if (munmap(column->data, old_data_size) < 0) + { + obi_set_errno(OBICOL_UNKNOWN_ERROR); + obidebug(1, "\nError munmapping the data of a column before enlarging"); + free(column_file_name); + close(column_file_descriptor); + return -1; + } + + column->data = mmap(NULL, + new_data_size, + PROT_READ | PROT_WRITE, + MAP_SHARED, + column_file_descriptor, + header_size + ); + + if (column->data == MAP_FAILED) + { + obi_set_errno(OBICOL_UNKNOWN_ERROR); + obidebug(1, "\nError re-mmapping the data of a column after enlarging the file"); + free(column_file_name); + close(column_file_descriptor); + return -1; + } + } + // Set new line count (column->header)->line_count = new_line_count; @@ -1455,7 +1527,7 @@ size_t obi_column_get_element_index_from_name(OBIDMS_column_p column, const char element_index = 0; - name = strtok (elements_names, ";"); // not thread safe, see strtok_r maybe + name = strtok (elements_names, ";"); // TODO not thread safe, see strtok_r maybe if (strcmp(element_name, name) == 0) { free(elements_names); @@ -1465,7 +1537,7 @@ size_t obi_column_get_element_index_from_name(OBIDMS_column_p column, const char while (name != NULL) { - name = strtok (NULL, ";"); // not thread safe, see strtok_r maybe + name = strtok (NULL, ";"); // TODO not thread safe, see strtok_r maybe if (strcmp(element_name, name) == 0) { free(elements_names); diff --git a/src/obidmscolumn.h b/src/obidmscolumn.h index 63ff5ff..3d670b3 100644 --- a/src/obidmscolumn.h +++ b/src/obidmscolumn.h @@ -26,7 +26,6 @@ #include "obidmscolumndir.h" #define ELEMENTS_NAMES_MAX (2048) -#define INITIAL_LINE_COUNT (1000) #define GROWTH_FACTOR (2) #define MAXIMUM_LINE_COUNT (1000000)