Column files now always have a size that is a multiple of the page size,
and the function that enlarges mapped column files tries mapping on next byte before unmapping/mapping again
This commit is contained in:
@ -18,6 +18,7 @@
|
|||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
|
#include <math.h>
|
||||||
#include <sys/mman.h> /* mmap() is defined in this header */
|
#include <sys/mman.h> /* mmap() is defined in this header */
|
||||||
|
|
||||||
#include "obidmscolumn.h"
|
#include "obidmscolumn.h"
|
||||||
@ -135,6 +136,20 @@ static int create_version_file(OBIDMS_column_directory_p column_directory);
|
|||||||
int obi_column_set_elements_names(OBIDMS_column_p column, const char* elements_names);
|
int obi_column_set_elements_names(OBIDMS_column_p column, const char* elements_names);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Internal function computing how many lines of an OBIDMS column fill in a memory page.
|
||||||
|
*
|
||||||
|
* @param data_type the data OBIType
|
||||||
|
* @param nb_elements_per_line the number of elements per line
|
||||||
|
*
|
||||||
|
* @return the line count for one memory page
|
||||||
|
*
|
||||||
|
* @since September 2015
|
||||||
|
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||||
|
*/
|
||||||
|
size_t get_line_count_per_page(OBIType_t data_type, size_t nb_elements_per_line);
|
||||||
|
|
||||||
|
|
||||||
/************************************************************************
|
/************************************************************************
|
||||||
*
|
*
|
||||||
* D E F I N I T I O N O F T H E P R I V A T E F U N C T I O N S
|
* D E F I N I T I O N O F T H E P R I V A T E F U N C T I O N S
|
||||||
@ -452,6 +467,11 @@ int obi_column_set_elements_names(OBIDMS_column_p column, const char* elements_n
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t get_line_count_per_page(OBIType_t data_type, size_t nb_elements_per_line)
|
||||||
|
{
|
||||||
|
return getpagesize() / (obi_sizeof(data_type) * nb_elements_per_line);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**********************************************************************
|
/**********************************************************************
|
||||||
*
|
*
|
||||||
@ -600,6 +620,7 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
|
|||||||
int column_dir_file_descriptor;
|
int column_dir_file_descriptor;
|
||||||
size_t header_size;
|
size_t header_size;
|
||||||
size_t data_size;
|
size_t data_size;
|
||||||
|
size_t minimum_line_count;
|
||||||
|
|
||||||
new_column = NULL;
|
new_column = NULL;
|
||||||
|
|
||||||
@ -614,7 +635,51 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
|
|||||||
obidebug(1, "\nCan't create column because of empty column name");
|
obidebug(1, "\nCan't create column because of empty column name");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
//if (type < 1)
|
if ((type < 1) || (type > 4))
|
||||||
|
{
|
||||||
|
obidebug(1, "\nCan't create column because of invalid data type");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The initial line count should be between the minimum (corresponding to the page size) and the maximum allowed
|
||||||
|
minimum_line_count = get_line_count_per_page(type, nb_elements_per_line);
|
||||||
|
if (nb_lines > MAXIMUM_LINE_COUNT)
|
||||||
|
{
|
||||||
|
obidebug(1, "\nCan't create column because of line count greater than the maximum allowed (%lld)", MAXIMUM_LINE_COUNT);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
else if (nb_lines < minimum_line_count)
|
||||||
|
nb_lines = minimum_line_count;
|
||||||
|
|
||||||
|
// The number of elements names should be equal to the number of elements per line
|
||||||
|
|
||||||
|
if ((elements_names == NULL) && (nb_elements_per_line > 1))
|
||||||
|
{
|
||||||
|
obidebug(1, "\nCan't create column because no elements names were given for a number of elements per line greater than 1");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
else if ((elements_names != NULL) && (nb_elements_per_line > 1))
|
||||||
|
{
|
||||||
|
char* token;
|
||||||
|
size_t n = 0;
|
||||||
|
token = strdup(elements_names);
|
||||||
|
token = strtok(token, ";");
|
||||||
|
while (token != NULL)
|
||||||
|
{
|
||||||
|
token = strtok(NULL, ";");
|
||||||
|
n++;
|
||||||
|
}
|
||||||
|
if (n != nb_elements_per_line)
|
||||||
|
{
|
||||||
|
obidebug(1, "\nCan't create column because the number of elements names given is not equal to the number of elements per line");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if ((nb_elements_per_line == 1) && (strcmp(elements_names, column_name) != 0))
|
||||||
|
{
|
||||||
|
obidebug(1, "\nCan't create column because the element name does not match the column name");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
// Get the column directory structure associated to the column
|
// Get the column directory structure associated to the column
|
||||||
column_directory = obi_column_directory(dms, column_name);
|
column_directory = obi_column_directory(dms, column_name);
|
||||||
@ -892,13 +957,14 @@ OBIDMS_column_p obi_clone_column(OBIDMS_p dms, const char* column_name, obiversi
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
data_type = (column_to_clone->header)->data_type;
|
||||||
|
|
||||||
|
nb_elements_per_line = (column_to_clone->header)->nb_elements_per_line;
|
||||||
|
|
||||||
if (clone_data)
|
if (clone_data)
|
||||||
nb_lines = (column_to_clone->header)->line_count;
|
nb_lines = (column_to_clone->header)->line_count;
|
||||||
else
|
else
|
||||||
nb_lines = INITIAL_LINE_COUNT;
|
nb_lines = get_line_count_per_page(data_type, nb_elements_per_line); // minimum line count corresponding to one memory page
|
||||||
|
|
||||||
nb_elements_per_line = (column_to_clone->header)->nb_elements_per_line;
|
|
||||||
data_type = (column_to_clone->header)->data_type;
|
|
||||||
|
|
||||||
new_column = obi_create_column(dms,
|
new_column = obi_create_column(dms,
|
||||||
column_name,
|
column_name,
|
||||||
@ -968,10 +1034,20 @@ int obi_truncate_column_to_lines_used(OBIDMS_column_p column)
|
|||||||
{
|
{
|
||||||
size_t file_size;
|
size_t file_size;
|
||||||
size_t data_size;
|
size_t data_size;
|
||||||
|
size_t new_line_count;
|
||||||
|
double multiple;
|
||||||
int column_dir_file_descriptor;
|
int column_dir_file_descriptor;
|
||||||
int column_file_descriptor;
|
int column_file_descriptor;
|
||||||
char* column_file_name;
|
char* column_file_name;
|
||||||
|
|
||||||
|
// Compute the new line count = the number of lines used rounded to the nearest multiple of page size
|
||||||
|
multiple = ceil((double) ((column->header)->lines_used * (column->header)->nb_elements_per_line * obi_sizeof((column->header)->data_type)) / (double) getpagesize());
|
||||||
|
new_line_count = (int) multiple * getpagesize();
|
||||||
|
|
||||||
|
// Check that it is actually greater than the current number of lines allocated in the file, otherwise no need to truncate
|
||||||
|
if ((column->header)->line_count == new_line_count)
|
||||||
|
return 0;
|
||||||
|
|
||||||
// Get the file descriptor associated to the column directory
|
// Get the file descriptor associated to the column directory
|
||||||
column_dir_file_descriptor = dirfd((column->column_directory)->directory);
|
column_dir_file_descriptor = dirfd((column->column_directory)->directory);
|
||||||
if (column_dir_file_descriptor < 0)
|
if (column_dir_file_descriptor < 0)
|
||||||
@ -999,7 +1075,7 @@ int obi_truncate_column_to_lines_used(OBIDMS_column_p column)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Unmap the data before truncating the file
|
// Unmap the data before truncating the file
|
||||||
data_size = (column->header)->line_count * (column->header)->nb_elements_per_line * sizeof((column->header)->data_type);
|
data_size = obi_array_sizeof((column->header)->data_type, (column->header)->line_count, (column->header)->nb_elements_per_line);
|
||||||
if (munmap(column->data, data_size) < 0)
|
if (munmap(column->data, data_size) < 0)
|
||||||
{
|
{
|
||||||
obi_set_errno(OBICOL_UNKNOWN_ERROR);
|
obi_set_errno(OBICOL_UNKNOWN_ERROR);
|
||||||
@ -1009,8 +1085,8 @@ int obi_truncate_column_to_lines_used(OBIDMS_column_p column)
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Truncate the column file at the number of lines used
|
// Truncate the column file
|
||||||
data_size = (column->header)->lines_used * (column->header)->nb_elements_per_line * sizeof((column->header)->data_type);
|
data_size = obi_array_sizeof((column->header)->data_type, new_line_count, (column->header)->nb_elements_per_line);
|
||||||
file_size = (column->header)->header_size + data_size;
|
file_size = (column->header)->header_size + data_size;
|
||||||
if (ftruncate(column_file_descriptor, file_size) < 0)
|
if (ftruncate(column_file_descriptor, file_size) < 0)
|
||||||
{
|
{
|
||||||
@ -1039,8 +1115,8 @@ int obi_truncate_column_to_lines_used(OBIDMS_column_p column)
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set line_count to lines_used
|
// Set line_count to the new line count
|
||||||
(column->header)->line_count = (column->header)->lines_used;
|
(column->header)->line_count = new_line_count;
|
||||||
|
|
||||||
free(column_file_name);
|
free(column_file_name);
|
||||||
close(column_file_descriptor);
|
close(column_file_descriptor);
|
||||||
@ -1060,6 +1136,7 @@ int obi_enlarge_column(OBIDMS_column_p column)
|
|||||||
int column_dir_file_descriptor;
|
int column_dir_file_descriptor;
|
||||||
int column_file_descriptor;
|
int column_file_descriptor;
|
||||||
char* column_file_name;
|
char* column_file_name;
|
||||||
|
void* new_data;
|
||||||
|
|
||||||
// Get the file descriptor associated to the column directory
|
// Get the file descriptor associated to the column directory
|
||||||
column_dir_file_descriptor = dirfd((column->column_directory)->directory);
|
column_dir_file_descriptor = dirfd((column->column_directory)->directory);
|
||||||
@ -1090,6 +1167,7 @@ int obi_enlarge_column(OBIDMS_column_p column)
|
|||||||
// Calculate the new file size
|
// Calculate the new file size
|
||||||
old_line_count = (column->header)->line_count;
|
old_line_count = (column->header)->line_count;
|
||||||
new_line_count = old_line_count * GROWTH_FACTOR;
|
new_line_count = old_line_count * GROWTH_FACTOR;
|
||||||
|
|
||||||
if (new_line_count > MAXIMUM_LINE_COUNT)
|
if (new_line_count > MAXIMUM_LINE_COUNT)
|
||||||
{
|
{
|
||||||
obi_set_errno(OBICOL_UNKNOWN_ERROR);
|
obi_set_errno(OBICOL_UNKNOWN_ERROR);
|
||||||
@ -1098,32 +1176,12 @@ int obi_enlarge_column(OBIDMS_column_p column)
|
|||||||
close(column_file_descriptor);
|
close(column_file_descriptor);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
old_data_size = (column->header)->line_count * (column->header)->nb_elements_per_line * sizeof((column->header)->data_type);
|
old_data_size = obi_array_sizeof((column->header)->data_type, old_line_count, (column->header)->nb_elements_per_line);
|
||||||
new_data_size = old_data_size * GROWTH_FACTOR;
|
new_data_size = old_data_size * GROWTH_FACTOR;
|
||||||
header_size = (column->header)->header_size;
|
header_size = (column->header)->header_size;
|
||||||
file_size = header_size + new_data_size;
|
file_size = header_size + new_data_size;
|
||||||
|
|
||||||
// Unmap the data
|
// Enlarge the file // TODO isn't it possible that this makes the file "move"?
|
||||||
if (munmap(column->data, old_data_size) < 0)
|
|
||||||
{
|
|
||||||
obi_set_errno(OBICOL_UNKNOWN_ERROR);
|
|
||||||
obidebug(1, "\nError munmapping the data of a column before enlarging");
|
|
||||||
free(column_file_name);
|
|
||||||
close(column_file_descriptor);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Unmap the header
|
|
||||||
if (munmap(column->header, header_size) < 0)
|
|
||||||
{
|
|
||||||
obi_set_errno(OBICOL_UNKNOWN_ERROR);
|
|
||||||
obidebug(1, "\nError munmapping the header of a column before enlarging");
|
|
||||||
free(column_file_name);
|
|
||||||
close(column_file_descriptor);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Enlarge the file
|
|
||||||
if (ftruncate(column_file_descriptor, file_size) < 0)
|
if (ftruncate(column_file_descriptor, file_size) < 0)
|
||||||
{
|
{
|
||||||
obi_set_errno(OBICOL_UNKNOWN_ERROR);
|
obi_set_errno(OBICOL_UNKNOWN_ERROR);
|
||||||
@ -1133,34 +1191,17 @@ int obi_enlarge_column(OBIDMS_column_p column)
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remap the header (TODO not sure if necessary??)
|
// Remap the data: try enlarging mapped region (this actually never works on my mac without the MAP_FIXED flag which overwrites everything)
|
||||||
column->header = mmap(NULL,
|
//obidebug(2, "\ntry enlarging mapped region: old size = %ld, new size = %ld, size = %ld", old_data_size, new_data_size, new_data_size - old_data_size);
|
||||||
header_size,
|
new_data = mmap(column->data,
|
||||||
PROT_READ | PROT_WRITE,
|
new_data_size - old_data_size,
|
||||||
MAP_SHARED,
|
|
||||||
column_file_descriptor,
|
|
||||||
0
|
|
||||||
);
|
|
||||||
|
|
||||||
if (column->header == MAP_FAILED)
|
|
||||||
{
|
|
||||||
obi_set_errno(OBICOL_UNKNOWN_ERROR);
|
|
||||||
obidebug(1, "\nError mmapping the header of a column after enlarging file");
|
|
||||||
close(column_file_descriptor);
|
|
||||||
free(column_file_name);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remap the data
|
|
||||||
column->data = mmap(NULL,
|
|
||||||
new_data_size,
|
|
||||||
PROT_READ | PROT_WRITE,
|
PROT_READ | PROT_WRITE,
|
||||||
MAP_SHARED,
|
MAP_SHARED,
|
||||||
column_file_descriptor,
|
column_file_descriptor,
|
||||||
header_size
|
old_data_size
|
||||||
);
|
);
|
||||||
|
|
||||||
if (column->data == MAP_FAILED)
|
if (new_data == MAP_FAILED)
|
||||||
{
|
{
|
||||||
obi_set_errno(OBICOL_UNKNOWN_ERROR);
|
obi_set_errno(OBICOL_UNKNOWN_ERROR);
|
||||||
obidebug(1, "\nError re-mmapping the data of a column after enlarging the file");
|
obidebug(1, "\nError re-mmapping the data of a column after enlarging the file");
|
||||||
@ -1169,6 +1210,37 @@ int obi_enlarge_column(OBIDMS_column_p column)
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If remap failed: Unmap and map the data again
|
||||||
|
if (new_data != (column->data)) // TODO check that this works without exception
|
||||||
|
{
|
||||||
|
//obidebug(2, "\nEnlarging mapped region failed: Unmap and map the data again, %x != %x", column->data, new_data);
|
||||||
|
if (munmap(column->data, old_data_size) < 0)
|
||||||
|
{
|
||||||
|
obi_set_errno(OBICOL_UNKNOWN_ERROR);
|
||||||
|
obidebug(1, "\nError munmapping the data of a column before enlarging");
|
||||||
|
free(column_file_name);
|
||||||
|
close(column_file_descriptor);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
column->data = mmap(NULL,
|
||||||
|
new_data_size,
|
||||||
|
PROT_READ | PROT_WRITE,
|
||||||
|
MAP_SHARED,
|
||||||
|
column_file_descriptor,
|
||||||
|
header_size
|
||||||
|
);
|
||||||
|
|
||||||
|
if (column->data == MAP_FAILED)
|
||||||
|
{
|
||||||
|
obi_set_errno(OBICOL_UNKNOWN_ERROR);
|
||||||
|
obidebug(1, "\nError re-mmapping the data of a column after enlarging the file");
|
||||||
|
free(column_file_name);
|
||||||
|
close(column_file_descriptor);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Set new line count
|
// Set new line count
|
||||||
(column->header)->line_count = new_line_count;
|
(column->header)->line_count = new_line_count;
|
||||||
|
|
||||||
@ -1455,7 +1527,7 @@ size_t obi_column_get_element_index_from_name(OBIDMS_column_p column, const char
|
|||||||
|
|
||||||
element_index = 0;
|
element_index = 0;
|
||||||
|
|
||||||
name = strtok (elements_names, ";"); // not thread safe, see strtok_r maybe
|
name = strtok (elements_names, ";"); // TODO not thread safe, see strtok_r maybe
|
||||||
if (strcmp(element_name, name) == 0)
|
if (strcmp(element_name, name) == 0)
|
||||||
{
|
{
|
||||||
free(elements_names);
|
free(elements_names);
|
||||||
@ -1465,7 +1537,7 @@ size_t obi_column_get_element_index_from_name(OBIDMS_column_p column, const char
|
|||||||
|
|
||||||
while (name != NULL)
|
while (name != NULL)
|
||||||
{
|
{
|
||||||
name = strtok (NULL, ";"); // not thread safe, see strtok_r maybe
|
name = strtok (NULL, ";"); // TODO not thread safe, see strtok_r maybe
|
||||||
if (strcmp(element_name, name) == 0)
|
if (strcmp(element_name, name) == 0)
|
||||||
{
|
{
|
||||||
free(elements_names);
|
free(elements_names);
|
||||||
|
@ -26,7 +26,6 @@
|
|||||||
#include "obidmscolumndir.h"
|
#include "obidmscolumndir.h"
|
||||||
|
|
||||||
#define ELEMENTS_NAMES_MAX (2048)
|
#define ELEMENTS_NAMES_MAX (2048)
|
||||||
#define INITIAL_LINE_COUNT (1000)
|
|
||||||
#define GROWTH_FACTOR (2)
|
#define GROWTH_FACTOR (2)
|
||||||
#define MAXIMUM_LINE_COUNT (1000000)
|
#define MAXIMUM_LINE_COUNT (1000000)
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user