Column files now always have a size that is a multiple of the page size,

and the function that enlarges mapped column files tries mapping on next
byte before unmapping/mapping again
This commit is contained in:
Celine Mercier
2015-09-21 15:42:29 +02:00
parent 7d7dbb1bf9
commit 7b606c0477
2 changed files with 129 additions and 58 deletions

View File

@ -18,6 +18,7 @@
#include <unistd.h>
#include <fcntl.h>
#include <stdbool.h>
#include <math.h>
#include <sys/mman.h> /* mmap() is defined in this header */
#include "obidmscolumn.h"
@ -135,6 +136,20 @@ static int create_version_file(OBIDMS_column_directory_p column_directory);
int obi_column_set_elements_names(OBIDMS_column_p column, const char* elements_names);
/**
* @brief Internal function computing how many lines of an OBIDMS column fill in a memory page.
*
* @param data_type the data OBIType
* @param nb_elements_per_line the number of elements per line
*
* @return the line count for one memory page
*
* @since September 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
size_t get_line_count_per_page(OBIType_t data_type, size_t nb_elements_per_line);
/************************************************************************
*
* D E F I N I T I O N O F T H E P R I V A T E F U N C T I O N S
@ -452,6 +467,11 @@ int obi_column_set_elements_names(OBIDMS_column_p column, const char* elements_n
return 0;
}
size_t get_line_count_per_page(OBIType_t data_type, size_t nb_elements_per_line)
{
return getpagesize() / (obi_sizeof(data_type) * nb_elements_per_line);
}
/**********************************************************************
*
@ -600,6 +620,7 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
int column_dir_file_descriptor;
size_t header_size;
size_t data_size;
size_t minimum_line_count;
new_column = NULL;
@ -614,7 +635,51 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
obidebug(1, "\nCan't create column because of empty column name");
return NULL;
}
//if (type < 1)
if ((type < 1) || (type > 4))
{
obidebug(1, "\nCan't create column because of invalid data type");
return NULL;
}
// The initial line count should be between the minimum (corresponding to the page size) and the maximum allowed
minimum_line_count = get_line_count_per_page(type, nb_elements_per_line);
if (nb_lines > MAXIMUM_LINE_COUNT)
{
obidebug(1, "\nCan't create column because of line count greater than the maximum allowed (%lld)", MAXIMUM_LINE_COUNT);
return NULL;
}
else if (nb_lines < minimum_line_count)
nb_lines = minimum_line_count;
// The number of elements names should be equal to the number of elements per line
if ((elements_names == NULL) && (nb_elements_per_line > 1))
{
obidebug(1, "\nCan't create column because no elements names were given for a number of elements per line greater than 1");
return NULL;
}
else if ((elements_names != NULL) && (nb_elements_per_line > 1))
{
char* token;
size_t n = 0;
token = strdup(elements_names);
token = strtok(token, ";");
while (token != NULL)
{
token = strtok(NULL, ";");
n++;
}
if (n != nb_elements_per_line)
{
obidebug(1, "\nCan't create column because the number of elements names given is not equal to the number of elements per line");
return NULL;
}
}
else if ((nb_elements_per_line == 1) && (strcmp(elements_names, column_name) != 0))
{
obidebug(1, "\nCan't create column because the element name does not match the column name");
return NULL;
}
// Get the column directory structure associated to the column
column_directory = obi_column_directory(dms, column_name);
@ -892,13 +957,14 @@ OBIDMS_column_p obi_clone_column(OBIDMS_p dms, const char* column_name, obiversi
return NULL;
}
data_type = (column_to_clone->header)->data_type;
nb_elements_per_line = (column_to_clone->header)->nb_elements_per_line;
if (clone_data)
nb_lines = (column_to_clone->header)->line_count;
else
nb_lines = INITIAL_LINE_COUNT;
nb_elements_per_line = (column_to_clone->header)->nb_elements_per_line;
data_type = (column_to_clone->header)->data_type;
nb_lines = get_line_count_per_page(data_type, nb_elements_per_line); // minimum line count corresponding to one memory page
new_column = obi_create_column(dms,
column_name,
@ -968,10 +1034,20 @@ int obi_truncate_column_to_lines_used(OBIDMS_column_p column)
{
size_t file_size;
size_t data_size;
size_t new_line_count;
double multiple;
int column_dir_file_descriptor;
int column_file_descriptor;
char* column_file_name;
// Compute the new line count = the number of lines used rounded to the nearest multiple of page size
multiple = ceil((double) ((column->header)->lines_used * (column->header)->nb_elements_per_line * obi_sizeof((column->header)->data_type)) / (double) getpagesize());
new_line_count = (int) multiple * getpagesize();
// Check that it is actually greater than the current number of lines allocated in the file, otherwise no need to truncate
if ((column->header)->line_count == new_line_count)
return 0;
// Get the file descriptor associated to the column directory
column_dir_file_descriptor = dirfd((column->column_directory)->directory);
if (column_dir_file_descriptor < 0)
@ -999,7 +1075,7 @@ int obi_truncate_column_to_lines_used(OBIDMS_column_p column)
}
// Unmap the data before truncating the file
data_size = (column->header)->line_count * (column->header)->nb_elements_per_line * sizeof((column->header)->data_type);
data_size = obi_array_sizeof((column->header)->data_type, (column->header)->line_count, (column->header)->nb_elements_per_line);
if (munmap(column->data, data_size) < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
@ -1009,8 +1085,8 @@ int obi_truncate_column_to_lines_used(OBIDMS_column_p column)
return -1;
}
// Truncate the column file at the number of lines used
data_size = (column->header)->lines_used * (column->header)->nb_elements_per_line * sizeof((column->header)->data_type);
// Truncate the column file
data_size = obi_array_sizeof((column->header)->data_type, new_line_count, (column->header)->nb_elements_per_line);
file_size = (column->header)->header_size + data_size;
if (ftruncate(column_file_descriptor, file_size) < 0)
{
@ -1039,8 +1115,8 @@ int obi_truncate_column_to_lines_used(OBIDMS_column_p column)
return -1;
}
// Set line_count to lines_used
(column->header)->line_count = (column->header)->lines_used;
// Set line_count to the new line count
(column->header)->line_count = new_line_count;
free(column_file_name);
close(column_file_descriptor);
@ -1060,6 +1136,7 @@ int obi_enlarge_column(OBIDMS_column_p column)
int column_dir_file_descriptor;
int column_file_descriptor;
char* column_file_name;
void* new_data;
// Get the file descriptor associated to the column directory
column_dir_file_descriptor = dirfd((column->column_directory)->directory);
@ -1090,6 +1167,7 @@ int obi_enlarge_column(OBIDMS_column_p column)
// Calculate the new file size
old_line_count = (column->header)->line_count;
new_line_count = old_line_count * GROWTH_FACTOR;
if (new_line_count > MAXIMUM_LINE_COUNT)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
@ -1098,32 +1176,12 @@ int obi_enlarge_column(OBIDMS_column_p column)
close(column_file_descriptor);
return -1;
}
old_data_size = (column->header)->line_count * (column->header)->nb_elements_per_line * sizeof((column->header)->data_type);
old_data_size = obi_array_sizeof((column->header)->data_type, old_line_count, (column->header)->nb_elements_per_line);
new_data_size = old_data_size * GROWTH_FACTOR;
header_size = (column->header)->header_size;
file_size = header_size + new_data_size;
// Unmap the data
if (munmap(column->data, old_data_size) < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError munmapping the data of a column before enlarging");
free(column_file_name);
close(column_file_descriptor);
return -1;
}
// Unmap the header
if (munmap(column->header, header_size) < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError munmapping the header of a column before enlarging");
free(column_file_name);
close(column_file_descriptor);
return -1;
}
// Enlarge the file
// Enlarge the file // TODO isn't it possible that this makes the file "move"?
if (ftruncate(column_file_descriptor, file_size) < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
@ -1133,34 +1191,17 @@ int obi_enlarge_column(OBIDMS_column_p column)
return -1;
}
// Remap the header (TODO not sure if necessary??)
column->header = mmap(NULL,
header_size,
PROT_READ | PROT_WRITE,
MAP_SHARED,
column_file_descriptor,
0
);
if (column->header == MAP_FAILED)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError mmapping the header of a column after enlarging file");
close(column_file_descriptor);
free(column_file_name);
return -1;
}
// Remap the data
column->data = mmap(NULL,
new_data_size,
// Remap the data: try enlarging mapped region (this actually never works on my mac without the MAP_FIXED flag which overwrites everything)
//obidebug(2, "\ntry enlarging mapped region: old size = %ld, new size = %ld, size = %ld", old_data_size, new_data_size, new_data_size - old_data_size);
new_data = mmap(column->data,
new_data_size - old_data_size,
PROT_READ | PROT_WRITE,
MAP_SHARED,
column_file_descriptor,
header_size
old_data_size
);
if (column->data == MAP_FAILED)
if (new_data == MAP_FAILED)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError re-mmapping the data of a column after enlarging the file");
@ -1169,6 +1210,37 @@ int obi_enlarge_column(OBIDMS_column_p column)
return -1;
}
// If remap failed: Unmap and map the data again
if (new_data != (column->data)) // TODO check that this works without exception
{
//obidebug(2, "\nEnlarging mapped region failed: Unmap and map the data again, %x != %x", column->data, new_data);
if (munmap(column->data, old_data_size) < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError munmapping the data of a column before enlarging");
free(column_file_name);
close(column_file_descriptor);
return -1;
}
column->data = mmap(NULL,
new_data_size,
PROT_READ | PROT_WRITE,
MAP_SHARED,
column_file_descriptor,
header_size
);
if (column->data == MAP_FAILED)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError re-mmapping the data of a column after enlarging the file");
free(column_file_name);
close(column_file_descriptor);
return -1;
}
}
// Set new line count
(column->header)->line_count = new_line_count;
@ -1455,7 +1527,7 @@ size_t obi_column_get_element_index_from_name(OBIDMS_column_p column, const char
element_index = 0;
name = strtok (elements_names, ";"); // not thread safe, see strtok_r maybe
name = strtok (elements_names, ";"); // TODO not thread safe, see strtok_r maybe
if (strcmp(element_name, name) == 0)
{
free(elements_names);
@ -1465,7 +1537,7 @@ size_t obi_column_get_element_index_from_name(OBIDMS_column_p column, const char
while (name != NULL)
{
name = strtok (NULL, ";"); // not thread safe, see strtok_r maybe
name = strtok (NULL, ";"); // TODO not thread safe, see strtok_r maybe
if (strcmp(element_name, name) == 0)
{
free(elements_names);