Columns: elements names informations are now kept in a memory arena of
adapted size in the header, and added a boolean in the header indicating whether the values should be evaluated (typically character strings to be evaluated in Python)
This commit is contained in:
@ -139,12 +139,11 @@ static char* build_default_elements_names(index_t nb_elements_per_line);
|
||||
* column with '\0' as separator (e.g. "0\01\02\0...\0n\0").
|
||||
*
|
||||
* @param elements_names The character string formatted with ';' as separator (e.g. "0;1;2;...;n\0").
|
||||
* @param elts_names_length A pointer on an integer where the function will store the length of the character string.
|
||||
*
|
||||
* @since January 2017
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
static void format_elements_names(char* elements_names, int* elts_names_length);
|
||||
static void format_elements_names(char* elements_names);
|
||||
|
||||
|
||||
/**
|
||||
@ -188,7 +187,7 @@ static int cmp_elements_names_with_name_and_idx(const void* name1, const void* n
|
||||
*
|
||||
* @param column A pointer as returned by obi_create_column().
|
||||
* @param elements_names The names of the elements as formatted by format_elements_names().
|
||||
* @param elts_names_length The length of elements_names.
|
||||
* @param elts_names_length The length of elements_names including the two terminal '\0's.
|
||||
*
|
||||
* @retval 0 if the operation was successfully completed.
|
||||
* @retval -1 if an error occurred.
|
||||
@ -196,7 +195,19 @@ static int cmp_elements_names_with_name_and_idx(const void* name1, const void* n
|
||||
* @since July 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
static int set_elements_names(OBIDMS_column_p column, char* elements_names, int elts_names_length);
|
||||
static int set_elements_names(OBIDMS_column_p column, char* elements_names, int64_t elts_names_length, index_t nb_elements_per_line);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Internal function reading the informations related to the elements names
|
||||
* of the lines of a column in the header of the OBIDMS column structure.
|
||||
*
|
||||
* @param header A pointer on the header of the column.
|
||||
*
|
||||
* @since December 2017
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
static void read_elt_names_informations(OBIDMS_column_header_p header);
|
||||
|
||||
|
||||
/**
|
||||
@ -224,7 +235,7 @@ static index_t check_elt_names_count(const char* elements_names, bool elt_names_
|
||||
* @since January 2017
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
static int get_formatted_elt_names_length(const char* elements_names);
|
||||
static int get_formatted_elt_names_length(const char* elements_names, index_t nb_elements);
|
||||
|
||||
|
||||
/**
|
||||
@ -521,7 +532,15 @@ static char* build_default_elements_names(index_t nb_elements_per_line)
|
||||
int i;
|
||||
int len;
|
||||
|
||||
elements_names = (char*) malloc(ELEMENTS_NAMES_MAX * sizeof(char));
|
||||
// if (nb_elements_per_line > NB_ELTS_MAX_IF_DEFAULT_NAME)
|
||||
// {
|
||||
// obi_set_errno(OBICOL_UNKNOWN_ERROR);
|
||||
// obidebug(1, "\nError: too many elements per line to use the default names (max = %d elements)", NB_ELTS_MAX_IF_DEFAULT_NAME);
|
||||
// return NULL;
|
||||
// }
|
||||
|
||||
// TODO
|
||||
elements_names = (char*) malloc(nb_elements_per_line * 10 * sizeof(char));
|
||||
if (elements_names == NULL)
|
||||
{
|
||||
obi_set_errno(OBI_MALLOC_ERROR);
|
||||
@ -529,13 +548,6 @@ static char* build_default_elements_names(index_t nb_elements_per_line)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (nb_elements_per_line > NB_ELTS_MAX_IF_DEFAULT_NAME)
|
||||
{
|
||||
obi_set_errno(OBICOL_UNKNOWN_ERROR);
|
||||
obidebug(1, "\nError: too many elements per line to use the default names (max = %d elements)", NB_ELTS_MAX_IF_DEFAULT_NAME);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
len = 0;
|
||||
for (i = 0; i < nb_elements_per_line; i++)
|
||||
len += sprintf(elements_names+len, "%d;", i);
|
||||
@ -549,14 +561,15 @@ static char* build_default_elements_names(index_t nb_elements_per_line)
|
||||
|
||||
|
||||
|
||||
static void format_elements_names(char* elements_names, int* elts_names_length)
|
||||
static void format_elements_names(char* elements_names)
|
||||
{
|
||||
int i;
|
||||
int i;
|
||||
int64_t elts_names_length;
|
||||
|
||||
*elts_names_length = strlen(elements_names);
|
||||
elts_names_length = strlen(elements_names);
|
||||
|
||||
// Replace the ';' with '\0'
|
||||
for (i=0; i < *elts_names_length; i++)
|
||||
for (i=0; i < elts_names_length; i++)
|
||||
{
|
||||
if (elements_names[i] == ';')
|
||||
elements_names[i] = '\0';
|
||||
@ -570,11 +583,11 @@ static int cmp_elements_names_with_idx(const void* n1_sort_idx, const void* n2_s
|
||||
char* name1=NULL;
|
||||
char* name2=NULL;
|
||||
|
||||
int name1_idx;
|
||||
int name2_idx;
|
||||
index_t name1_idx;
|
||||
index_t name2_idx;
|
||||
|
||||
int name1_sort_idx = *((int*)n1_sort_idx);
|
||||
int name2_sort_idx = *((int*)n2_sort_idx);
|
||||
index_t name1_sort_idx = *((index_t*)n1_sort_idx);
|
||||
index_t name2_sort_idx = *((index_t*)n2_sort_idx);
|
||||
OBIDMS_column_header_p header = (OBIDMS_column_header_p) h;
|
||||
|
||||
name1_idx = (header->elements_names_idx)[name1_sort_idx];
|
||||
@ -590,10 +603,10 @@ static int cmp_elements_names_with_idx(const void* n1_sort_idx, const void* n2_s
|
||||
|
||||
static int cmp_elements_names_with_name_and_idx(const void* name1, const void* n2_sort_idx, const void* h)
|
||||
{
|
||||
char* name2=NULL;
|
||||
int name2_idx;
|
||||
char* name2=NULL;
|
||||
index_t name2_idx;
|
||||
|
||||
int name2_sort_idx = *((int*)n2_sort_idx);
|
||||
index_t name2_sort_idx = *((index_t*)n2_sort_idx);
|
||||
OBIDMS_column_header_p header = (OBIDMS_column_header_p) h;
|
||||
|
||||
name2_idx = (header->elements_names_idx)[name2_sort_idx];
|
||||
@ -603,56 +616,67 @@ static int cmp_elements_names_with_name_and_idx(const void* name1, const void* n
|
||||
}
|
||||
|
||||
|
||||
|
||||
static int set_elements_names(OBIDMS_column_p column, char* elements_names, int elts_names_length)
|
||||
static int set_elements_names(OBIDMS_column_p column, char* elements_names, int64_t elts_names_length, index_t nb_elements)
|
||||
{
|
||||
int i, j;
|
||||
OBIDMS_column_header_p header;
|
||||
index_t i, j;
|
||||
|
||||
// Check that the elements names are not too long
|
||||
if (elts_names_length+2 > ELEMENTS_NAMES_MAX)
|
||||
{
|
||||
obi_set_errno(OBICOL_UNKNOWN_ERROR);
|
||||
obidebug(1, "\nError: element names too long (%d, max: %d)", elts_names_length+2, ELEMENTS_NAMES_MAX);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Copy the elements names in the header
|
||||
memcpy((column->header)->elements_names, elements_names, elts_names_length*sizeof(char));
|
||||
|
||||
// Terminal characters
|
||||
(column->header)->elements_names[elts_names_length] = '\0';
|
||||
(column->header)->elements_names[elts_names_length + 1] = '\0';
|
||||
header = column->header;
|
||||
|
||||
// Store the length of the character array containing the elements names
|
||||
(column->header)->elements_names_length = elts_names_length;
|
||||
header->elements_names_length = elts_names_length;
|
||||
// Store the pointers pointing to the different elements stored in the memory arena
|
||||
header->elements_names = (char*)&(header->mem_arena)[0];
|
||||
header->elements_names_idx = (index_t*)&((char*)(header->mem_arena) + elts_names_length)[0];
|
||||
header->sorted_elements_idx = (header->elements_names_idx) + nb_elements;
|
||||
|
||||
// Copy the elements names in the header
|
||||
memcpy(header->elements_names, elements_names, (elts_names_length-2)*sizeof(char));
|
||||
|
||||
// Terminal characters
|
||||
header->elements_names[elts_names_length - 2] = '\0';
|
||||
header->elements_names[elts_names_length - 1] = '\0';
|
||||
|
||||
// Build the elements names index
|
||||
i = 0;
|
||||
j = 0;
|
||||
// Index the first element name
|
||||
((column->header)->elements_names_idx)[j] = i;
|
||||
((column->header)->sorted_elements_idx)[j] = j;
|
||||
(header->elements_names_idx)[j] = i;
|
||||
(header->sorted_elements_idx)[j] = j;
|
||||
i++;
|
||||
j++;
|
||||
while (i < elts_names_length)
|
||||
|
||||
while (i < elts_names_length-2)
|
||||
{
|
||||
if (elements_names[i] == '\0')
|
||||
{ // Index new element name
|
||||
((column->header)->elements_names_idx)[j] = i+1;
|
||||
((column->header)->sorted_elements_idx)[j] = j;
|
||||
(header->elements_names_idx)[j] = i+1;
|
||||
(header->sorted_elements_idx)[j] = j;
|
||||
j++;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
|
||||
// Build the sorted index
|
||||
qsort_user_data((column->header)->sorted_elements_idx, j, sizeof(int), column->header, cmp_elements_names_with_idx);
|
||||
qsort_user_data(header->sorted_elements_idx, j, sizeof(index_t), column->header, cmp_elements_names_with_idx);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void read_elt_names_informations(OBIDMS_column_header_p header)
|
||||
{
|
||||
int64_t elts_names_length;
|
||||
|
||||
elts_names_length = header->elements_names_length;
|
||||
header->elements_names = (char*)&(header->mem_arena)[0];
|
||||
header->elements_names_idx = (index_t*)&((char*)(header->mem_arena) + elts_names_length)[0];
|
||||
header->sorted_elements_idx = (index_t*)&((header->elements_names_idx) + (header->nb_elements_per_line))[0];
|
||||
}
|
||||
|
||||
|
||||
|
||||
static index_t check_elt_names_count(const char* elements_names, bool elt_names_formatted)
|
||||
{
|
||||
char sep;
|
||||
@ -680,20 +704,19 @@ static index_t check_elt_names_count(const char* elements_names, bool elt_names_
|
||||
|
||||
|
||||
|
||||
static int get_formatted_elt_names_length(const char* elements_names)
|
||||
static int get_formatted_elt_names_length(const char* elements_names, index_t nb_elements)
|
||||
{
|
||||
int i = 0;
|
||||
bool stop = false;
|
||||
index_t n = 0;
|
||||
|
||||
while (! stop)
|
||||
while (n < nb_elements)
|
||||
{
|
||||
if ((elements_names[i] == '\0') && (elements_names[i+1] == '\0'))
|
||||
stop = true;
|
||||
else
|
||||
i++;
|
||||
if (elements_names[i] == '\0')
|
||||
n++;
|
||||
i++;
|
||||
}
|
||||
|
||||
return i;
|
||||
return i+1;
|
||||
}
|
||||
|
||||
|
||||
@ -866,14 +889,15 @@ obiversion_t obi_column_get_latest_version_from_name(OBIDMS_p dms, const char* c
|
||||
}
|
||||
|
||||
|
||||
|
||||
size_t obi_get_platform_header_size()
|
||||
// TODO make private
|
||||
size_t obi_calculate_header_size(index_t nb_elements_per_line, int64_t elts_names_length)
|
||||
{
|
||||
size_t header_size;
|
||||
size_t rounded_header_size;
|
||||
double multiple;
|
||||
|
||||
header_size = sizeof(OBIDMS_column_header_t);
|
||||
header_size = header_size + (nb_elements_per_line*2)*sizeof(int64_t) + elts_names_length*sizeof(char);
|
||||
|
||||
multiple = ceil((double) header_size / (double) getpagesize());
|
||||
|
||||
@ -891,6 +915,7 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
|
||||
char* elements_names,
|
||||
bool elt_names_formatted,
|
||||
bool tuples,
|
||||
bool to_eval,
|
||||
const char* indexer_name,
|
||||
const char* associated_column_name,
|
||||
obiversion_t associated_column_version,
|
||||
@ -911,7 +936,7 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
|
||||
OBIType_t stored_data_type;
|
||||
char* final_indexer_name;
|
||||
char* built_elements_names = NULL;
|
||||
int elts_names_length;
|
||||
int64_t elts_names_length;
|
||||
|
||||
new_column = NULL;
|
||||
|
||||
@ -1012,12 +1037,11 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
|
||||
|
||||
// Format the elements names string
|
||||
if (! elt_names_formatted)
|
||||
format_elements_names(elements_names, &elts_names_length);
|
||||
else
|
||||
elts_names_length = get_formatted_elt_names_length(elements_names);
|
||||
format_elements_names(elements_names);
|
||||
elts_names_length = get_formatted_elt_names_length(elements_names, nb_elements_per_line);
|
||||
|
||||
// Calculate the size needed
|
||||
header_size = obi_get_platform_header_size();
|
||||
header_size = obi_calculate_header_size(nb_elements_per_line, elts_names_length);
|
||||
data_size = obi_array_sizeof(stored_data_type, nb_lines, nb_elements_per_line);
|
||||
file_size = header_size + data_size;
|
||||
|
||||
@ -1107,12 +1131,14 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
|
||||
header->stored_data_type = stored_data_type;
|
||||
header->returned_data_type = returned_data_type;
|
||||
header->tuples = tuples;
|
||||
header->to_eval = to_eval;
|
||||
header->creation_date = time(NULL);
|
||||
header->version = version_number;
|
||||
header->cloned_from = -1;
|
||||
header->finished = false;
|
||||
|
||||
set_elements_names(new_column, elements_names, elts_names_length);
|
||||
set_elements_names(new_column, elements_names, elts_names_length, nb_elements_per_line);
|
||||
read_elt_names_informations(header);
|
||||
|
||||
// Free the element names if they were built
|
||||
if (built_elements_names != NULL)
|
||||
@ -1237,7 +1263,7 @@ OBIDMS_column_p obi_open_column(OBIDMS_p dms,
|
||||
}
|
||||
|
||||
// Open the column file, ALWAYS READ-ONLY
|
||||
column_file_descriptor = openat(column_directory->dir_fd, column_file_name, O_RDONLY);
|
||||
column_file_descriptor = openat(column_directory->dir_fd, column_file_name, O_RDWR);
|
||||
if (column_file_descriptor < 0)
|
||||
{
|
||||
obi_set_errno(OBICOL_UNKNOWN_ERROR);
|
||||
@ -1273,7 +1299,7 @@ OBIDMS_column_p obi_open_column(OBIDMS_p dms,
|
||||
column->column_directory = column_directory;
|
||||
column->header = mmap(NULL,
|
||||
header_size,
|
||||
PROT_READ,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED,
|
||||
column_file_descriptor,
|
||||
0
|
||||
@ -1288,6 +1314,9 @@ OBIDMS_column_p obi_open_column(OBIDMS_p dms,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Set the pointers relative to elements names informations in the header
|
||||
read_elt_names_informations(column->header);
|
||||
|
||||
// Map the data
|
||||
column->data = mmap(NULL,
|
||||
(column->header)->data_size,
|
||||
@ -1380,6 +1409,7 @@ OBIDMS_column_p obi_clone_column(OBIDMS_p dms,
|
||||
(column_to_clone->header)->elements_names,
|
||||
true,
|
||||
(column_to_clone->header)->tuples,
|
||||
(column_to_clone->header)->to_eval,
|
||||
(column_to_clone->header)->indexer_name,
|
||||
((column_to_clone->header)->associated_column).column_name,
|
||||
((column_to_clone->header)->associated_column).version,
|
||||
@ -1821,8 +1851,8 @@ OBIDMS_column_header_p obi_column_get_header_from_name(OBIDMS_p dms, const char*
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Open the column file (READ-ONLY)
|
||||
column_file_descriptor = openat(column_directory->dir_fd, column_file_name, O_RDONLY);
|
||||
// Open the column file
|
||||
column_file_descriptor = openat(column_directory->dir_fd, column_file_name, O_RDWR);
|
||||
if (column_file_descriptor < 0)
|
||||
{
|
||||
obidebug(1, "\nError opening a column file");
|
||||
@ -1842,10 +1872,10 @@ OBIDMS_column_header_p obi_column_get_header_from_name(OBIDMS_p dms, const char*
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Fill the header structure
|
||||
// Map the header structure
|
||||
header = mmap(NULL,
|
||||
header_size,
|
||||
PROT_READ,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED,
|
||||
column_file_descriptor,
|
||||
0
|
||||
@ -1859,6 +1889,9 @@ OBIDMS_column_header_p obi_column_get_header_from_name(OBIDMS_p dms, const char*
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Read the element names informations (storing pointers on informations)
|
||||
read_elt_names_informations(header);
|
||||
|
||||
if (close(column_file_descriptor) < 0)
|
||||
{
|
||||
obi_set_errno(OBICOL_UNKNOWN_ERROR);
|
||||
@ -1888,7 +1921,7 @@ index_t obi_column_get_element_index_from_name(OBIDMS_column_p column, const cha
|
||||
{
|
||||
int* elt_names_idx;
|
||||
|
||||
elt_names_idx = bsearch_user_data(element_name, (column->header)->sorted_elements_idx, (column->header)->nb_elements_per_line, sizeof(int), column->header, cmp_elements_names_with_name_and_idx);
|
||||
elt_names_idx = bsearch_user_data(element_name, (column->header)->sorted_elements_idx, (column->header)->nb_elements_per_line, sizeof(index_t), column->header, cmp_elements_names_with_name_and_idx);
|
||||
|
||||
if (elt_names_idx != NULL)
|
||||
return (index_t)(*elt_names_idx);
|
||||
@ -1906,7 +1939,7 @@ char* obi_get_elements_names(OBIDMS_column_p column)
|
||||
int elt_idx;
|
||||
int len;
|
||||
|
||||
elements_names = (char*) malloc(ELEMENTS_NAMES_MAX * sizeof(char));
|
||||
elements_names = (char*) malloc((column->header)->elements_names_length * sizeof(char));
|
||||
if (elements_names == NULL)
|
||||
{
|
||||
obi_set_errno(OBI_MALLOC_ERROR);
|
||||
@ -2148,6 +2181,8 @@ int obi_clean_unfinished_columns(OBIDMS_p dms)
|
||||
// dir_to_delete[ddir] = column_dir_path;
|
||||
// ddir++;
|
||||
// }
|
||||
// else
|
||||
// free(column_dir_path);
|
||||
|
||||
free(col_name);
|
||||
}
|
||||
|
Reference in New Issue
Block a user