The elements names when a column contains several elements per line are

now formatted with '\0' as separator and handled in a more optimized way
This commit is contained in:
Celine Mercier
2017-01-31 16:48:06 +01:00
parent 651c1d7845
commit e50da64ea1
6 changed files with 411 additions and 152 deletions

View File

@ -119,7 +119,7 @@ static obiversion_t create_version_file(OBIDMS_column_directory_p column_directo
/**
* @brief Internal function building the default elements names of the lines of a
* column (i.e. "0;1;2;...;n").
* column, with ';' as separator (i.e. "0;1;2;...;n\0").
*
* @warning The returned pointer has to be freed by the caller.
*
@ -134,12 +134,61 @@ static obiversion_t create_version_file(OBIDMS_column_directory_p column_directo
static char* build_default_elements_names(index_t nb_elements_per_line);
/**
* @brief Internal function formatting the elements names of the lines of a
* column with '\0' as separator (e.g. "0\01\02\0...\0n\0").
*
* @param elements_names The character string formatted with ';' as separator (e.g. "0;1;2;...;n\0").
* @param elts_names_length A pointer on an integer where the function will store the length of the character string.
*
* @since January 2017
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
static void format_elements_names(char* elements_names, int* elts_names_length);
/**
* @brief Internal function comparing two element names using their sorted index, using data stored in the column header.
*
* @param n1_sort_idx A pointer on the sorted index of the first name.
* @param n2_sort_idx A pointer on the sorted index of the second name.
* @param h A pointer on the column header.
*
* @returns A value < 0 if name1 < name2,
* a value > 0 if name1 > name2,
* and 0 if name1 == name2.
*
* @since January 2017
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
static int cmp_elements_names_with_idx(const void* n1_sort_idx, const void* n2_sort_idx, const void* h);
/**
* @brief Internal function comparing two element names using a pointer on the first name and the sorted index of the second name,
* using data stored in the column header.
*
* @param name1 A pointer on the first name.
* @param n2_sort_idx A pointer on the sorted index of the second name.
* @param h A pointer on the column header.
*
* @returns A value < 0 if name1 < name2,
* a value > 0 if name1 > name2,
* and 0 if name1 == name2.
*
* @since January 2017
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
static int cmp_elements_names_with_name_and_idx(const void* name1, const void* n2_sort_idx, const void* h);
/**
* @brief Internal function setting the elements names of the lines of a
* column in the header of the OBIDMS column structure.
*
* @param column A pointer as returned by obi_create_column().
* @param elements_names The names of the elements with ';' as separator.
* @param elements_names The names of the elements as formatted by format_elements_names().
* @param elts_names_length The length of elements_names.
*
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
@ -147,7 +196,35 @@ static char* build_default_elements_names(index_t nb_elements_per_line);
* @since July 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
static int obi_column_set_elements_names(OBIDMS_column_p column, char* elements_names);
static int set_elements_names(OBIDMS_column_p column, char* elements_names, int elts_names_length);
/**
* @brief Internal function counting the number of elements names in a character array.
*
* @param elements_names A pointer on the character string corresponding to the elements names,
* formatted with ';' or with '\0' as separator.
* @param elt_names_formatted Whether the separator is ';' (false), or '\0' (true, as formatted by format_elements_names()).
*
* @returns The number of elements names in the character array.
*
* @since January 2017
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
static index_t check_elt_names_count(const char* elements_names, bool elt_names_formatted);
/**
* @brief Internal function computing the length of a character array containing elements names as formatted by format_elements_names().
*
* @param elements_names A pointer on the character string corresponding to the elements names as formatted by format_elements_names().
*
* @returns The length of a character array.
*
* @since January 2017
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
static int get_formatted_elt_names_length(const char* elements_names);
/**
@ -198,6 +275,7 @@ static char* build_column_file_name(const char* column_name, obiversion_t versio
}
static char* build_version_file_name(const char* column_name)
{
char* file_name;
@ -222,6 +300,7 @@ static char* build_version_file_name(const char* column_name)
}
static obiversion_t obi_get_new_version_number(OBIDMS_column_directory_p column_directory, bool block)
{
off_t loc_size;
@ -346,6 +425,7 @@ static obiversion_t obi_get_new_version_number(OBIDMS_column_directory_p column_
}
static obiversion_t create_version_file(OBIDMS_column_directory_p column_directory)
{
off_t loc_size;
@ -437,10 +517,12 @@ static obiversion_t create_version_file(OBIDMS_column_directory_p column_directo
}
static char* build_default_elements_names(index_t nb_elements_per_line)
{
char* elements_names;
int i;
int len;
elements_names = (char*) malloc(ELEMENTS_NAMES_MAX * sizeof(char));
if (elements_names == NULL)
@ -457,31 +539,169 @@ static char* build_default_elements_names(index_t nb_elements_per_line)
return NULL;
}
for (i= 0; i < nb_elements_per_line; i++)
sprintf(elements_names, "%d", i);
len = 0;
for (i = 0; i < nb_elements_per_line; i++)
len += sprintf(elements_names+len, "%d;", i);
// Terminal character
elements_names[strlen(elements_names)] = '\0';
elements_names[len-1] = '\0'; // -1 to delete last ';'
len--;
return elements_names;
}
int obi_column_set_elements_names(OBIDMS_column_p column, char* elements_names)
static void format_elements_names(char* elements_names, int* elts_names_length)
{
if (strlen(elements_names) > ELEMENTS_NAMES_MAX)
int i;
*elts_names_length = strlen(elements_names);
// Replace the ';' with '\0'
for (i=0; i < *elts_names_length; i++)
{
if (elements_names[i] == ';')
elements_names[i] = '\0';
}
}
static int cmp_elements_names_with_idx(const void* n1_sort_idx, const void* n2_sort_idx, const void* h)
{
char* name1=NULL;
char* name2=NULL;
int name1_idx;
int name2_idx;
int name1_sort_idx = *((int*)n1_sort_idx);
int name2_sort_idx = *((int*)n2_sort_idx);
OBIDMS_column_header_p header = (OBIDMS_column_header_p) h;
name1_idx = (header->elements_names_idx)[name1_sort_idx];
name1 = (header->elements_names)+name1_idx;
name2_idx = (header->elements_names_idx)[name2_sort_idx];
name2 = (header->elements_names)+name2_idx;
return strcmp(name1, name2);
}
static int cmp_elements_names_with_name_and_idx(const void* name1, const void* n2_sort_idx, const void* h)
{
char* name2=NULL;
int name2_idx;
int name2_sort_idx = *((int*)n2_sort_idx);
OBIDMS_column_header_p header = (OBIDMS_column_header_p) h;
name2_idx = (header->elements_names_idx)[name2_sort_idx];
name2 = (header->elements_names)+name2_idx;
return strcmp(name1, name2);
}
static int set_elements_names(OBIDMS_column_p column, char* elements_names, int elts_names_length)
{
int i, j;
// Check that the elements names are not too long
if (elts_names_length+2 > ELEMENTS_NAMES_MAX)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError: element names too long (max: %d)", ELEMENTS_NAMES_MAX);
return -1;
}
strcpy((column->header)->elements_names, elements_names);
// Copy the elements names in the header
memcpy((column->header)->elements_names, elements_names, elts_names_length*sizeof(char));
// Terminal characters
(column->header)->elements_names[elts_names_length] = '\0';
(column->header)->elements_names[elts_names_length + 1] = '\0';
// Store the length of the character array containing the elements names
(column->header)->elements_names_length = elts_names_length;
// Build the elements names index
i = 0;
j = 0;
// Index the first element name
((column->header)->elements_names_idx)[j] = i;
((column->header)->sorted_elements_idx)[j] = j;
i++;
j++;
while (i < elts_names_length)
{
if (elements_names[i] == '\0')
{ // Index new element name
((column->header)->elements_names_idx)[j] = i+1;
((column->header)->sorted_elements_idx)[j] = j;
j++;
}
i++;
}
// Build the sorted index
qsort_user_data((column->header)->sorted_elements_idx, j, sizeof(int), column->header, cmp_elements_names_with_idx);
return 0;
}
index_t get_line_count_per_page(OBIType_t data_type, index_t nb_elements_per_line)
static index_t check_elt_names_count(const char* elements_names, bool elt_names_formatted)
{
char sep;
int i = 0;
bool stop = false;
index_t count = 0;
if (elt_names_formatted)
sep = FORMATTED_ELT_NAMES_SEPARATOR;
else
sep = NOT_FORMATTED_ELT_NAMES_SEPARATOR;
while (! stop)
{
if ((elt_names_formatted && (elements_names[i] == '\0') && (elements_names[i+1] == '\0')) ||
((! elt_names_formatted) && (elements_names[i] == '\0')))
stop = true;
if ((elements_names[i] == sep) || (elements_names[i] == '\0'))
count++;
i++;
}
return count;
}
static int get_formatted_elt_names_length(const char* elements_names)
{
int i = 0;
bool stop = false;
while (! stop)
{
if ((elements_names[i] == '\0') && (elements_names[i+1] == '\0'))
stop = true;
else
i++;
}
return i;
}
static index_t get_line_count_per_page(OBIType_t data_type, index_t nb_elements_per_line)
{
return getpagesize() / (obi_sizeof(data_type) * nb_elements_per_line);
}
@ -493,6 +713,7 @@ index_t get_line_count_per_page(OBIType_t data_type, index_t nb_elements_per_lin
*
**********************************************************************/
obiversion_t obi_get_latest_version_number(OBIDMS_column_directory_p column_directory)
{
off_t loc_size;
@ -557,6 +778,7 @@ obiversion_t obi_get_latest_version_number(OBIDMS_column_directory_p column_dire
}
obiversion_t obi_column_get_latest_version_from_name(OBIDMS_p dms, const char* column_name)
{
OBIDMS_column_directory_p column_directory;
@ -582,6 +804,7 @@ obiversion_t obi_column_get_latest_version_from_name(OBIDMS_p dms, const char* c
}
size_t obi_get_platform_header_size()
{
size_t header_size;
@ -607,7 +830,8 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
const char* indexer_name,
const char* associated_column_name,
obiversion_t associated_column_version,
const char* comments
const char* comments,
bool elt_names_formatted
)
{
OBIDMS_column_p new_column;
@ -623,6 +847,8 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
OBIType_t returned_data_type;
OBIType_t stored_data_type;
char* final_indexer_name;
char* built_elements_names = NULL;
int elts_names_length;
new_column = NULL;
@ -695,31 +921,29 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
else if (nb_lines < minimum_line_count)
nb_lines = minimum_line_count;
// Check and build if needed the element names
if ((elements_names == NULL) || (strcmp(elements_names, "") == 0)) // Build the default element names: str of the element index
// Check, format, and build if needed the element names
if ((elements_names == NULL) || (*elements_names == '\0')) // Build the default element names: str of the element index
{
elements_names = build_default_elements_names(nb_elements_per_line);
if (elements_names == NULL)
built_elements_names = build_default_elements_names(nb_elements_per_line);
if (built_elements_names == NULL)
return NULL;
elements_names = built_elements_names;
}
else if (((elements_names == NULL) || (strcmp(elements_names, "") != 0)) && (nb_elements_per_line > 1))
else
{ // The number of elements names should be equal to the number of elements per line
char* token;
index_t n = 0;
token = strdup(elements_names);
token = strtok(token, ";");
while (token != NULL)
if (check_elt_names_count(elements_names, elt_names_formatted) != nb_elements_per_line)
{
token = strtok(NULL, ";");
n++;
}
if (n != nb_elements_per_line)
{
obidebug(1, "\nCan't create column because the number of elements names given is not equal to the number of elements per line");
obidebug(1, "\nCan't create column because the number of elements names given is not equal to the number of elements per line:"
"\n%lld elements per line\nelements names:%s\n", nb_elements_per_line, elements_names);
return NULL;
}
}
// TODO what if 1 element and name specified? doc
// Format the elements names string
if (! elt_names_formatted)
format_elements_names(elements_names, &elts_names_length);
else
elts_names_length = get_formatted_elt_names_length(elements_names);
// Calculate the size needed
header_size = obi_get_platform_header_size();
@ -816,11 +1040,11 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
header->version = version_number;
header->cloned_from = -1;
obi_column_set_elements_names(new_column, elements_names);
set_elements_names(new_column, elements_names, elts_names_length);
// Free the element names if they were built
if ((elements_names == NULL) || (strcmp(elements_names, "") == 0))
free(elements_names);
if (built_elements_names != NULL)
free(built_elements_names);
strncpy(header->name, column_name, OBIDMS_COLUMN_MAX_NAME);
@ -886,6 +1110,7 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
}
OBIDMS_column_p obi_open_column(OBIDMS_p dms,
const char* column_name,
obiversion_t version_number)
@ -1043,6 +1268,7 @@ OBIDMS_column_p obi_open_column(OBIDMS_p dms,
}
OBIDMS_column_p obi_clone_column(OBIDMS_p dms,
OBIDMS_column_p line_selection,
const char* column_name,
@ -1083,7 +1309,8 @@ OBIDMS_column_p obi_clone_column(OBIDMS_p dms,
(column_to_clone->header)->indexer_name,
((column_to_clone->header)->associated_column).column_name,
((column_to_clone->header)->associated_column).version,
(column_to_clone->header)->comments
(column_to_clone->header)->comments,
true
);
if (new_column == NULL)
@ -1097,6 +1324,8 @@ OBIDMS_column_p obi_clone_column(OBIDMS_p dms,
return NULL;
}
(new_column->header)->cloned_from = (column_to_clone->header)->version;
if (clone_data && (line_selection == NULL))
@ -1137,6 +1366,7 @@ OBIDMS_column_p obi_clone_column(OBIDMS_p dms,
}
int obi_close_column(OBIDMS_column_p column)
{
int ret_val = 0;
@ -1185,6 +1415,7 @@ int obi_close_column(OBIDMS_column_p column)
}
int obi_clone_column_indexer(OBIDMS_column_p column)
{
char* new_indexer_name;
@ -1208,6 +1439,7 @@ int obi_clone_column_indexer(OBIDMS_column_p column)
}
int obi_truncate_column(OBIDMS_column_p column) // TODO is it necessary to unmap/remap?
{
size_t file_size;
@ -1309,6 +1541,7 @@ int obi_truncate_column(OBIDMS_column_p column) // TODO is it necessary to unmap
}
int obi_enlarge_column(OBIDMS_column_p column)
{
size_t file_size;
@ -1363,7 +1596,7 @@ int obi_enlarge_column(OBIDMS_column_p column)
header_size = (column->header)->header_size;
file_size = header_size + new_data_size;
// Enlarge the file // TODO isn't it possible that this makes the file "move"?
// Enlarge the file
if (ftruncate(column_file_descriptor, file_size) < 0)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
@ -1414,6 +1647,7 @@ int obi_enlarge_column(OBIDMS_column_p column)
}
void obi_ini_to_NA_values(OBIDMS_column_p column,
index_t first_line_nb,
index_t nb_lines)
@ -1479,6 +1713,7 @@ void obi_ini_to_NA_values(OBIDMS_column_p column,
}
OBIDMS_column_header_p obi_column_get_header_from_name(OBIDMS_p dms, const char* column_name, obiversion_t version_number)
{
OBIDMS_column_header_p header;
@ -1562,6 +1797,7 @@ OBIDMS_column_header_p obi_column_get_header_from_name(OBIDMS_p dms, const char*
}
int obi_close_header(OBIDMS_column_header_p header)
{
if (munmap(header, header->header_size) < 0)
@ -1574,47 +1810,56 @@ int obi_close_header(OBIDMS_column_header_p header)
}
// TODO to be rewritten in an optimized and safe way if possible
index_t obi_column_get_element_index_from_name(OBIDMS_column_p column, const char* element_name)
{
char* elements_names;
char* name;
index_t element_index;
int* elt_names_idx;
elements_names = strdup((column->header)->elements_names);
if (elements_names == NULL)
{
obidebug(1, "\nError strdup-ing the elements names");
return OBIIdx_NA;
}
elt_names_idx = bsearch_user_data(element_name, (column->header)->sorted_elements_idx, (column->header)->nb_elements_per_line, sizeof(int), column->header, cmp_elements_names_with_name_and_idx);
element_index = 0;
if (elt_names_idx != NULL)
return (index_t)(*elt_names_idx);
name = strtok(elements_names, ";"); // TODO not thread safe, see strtok_r maybe
if (strcmp(element_name, name) == 0)
{
free(elements_names);
return element_index;
}
element_index++;
while (name != NULL)
{
name = strtok(NULL, ";"); // TODO not thread safe, see strtok_r maybe
if (strcmp(element_name, name) == 0)
{
free(elements_names);
return element_index;
}
element_index++;
}
obidebug(1, "\nCan't find an element name");
free(elements_names);
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError: could not find element name %s", element_name);
return OBIIdx_NA;
}
// TODO doc, returns elements names with ; as separator (discuss maybe char**)
char* obi_get_elements_names(OBIDMS_column_p column)
{
char* elements_names;
int i, j;
int elt_idx;
int len;
elements_names = (char*) malloc(ELEMENTS_NAMES_MAX * sizeof(char));
if (elements_names == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError allocating memory for elements names");
return NULL;
}
j = 0;
for (i=0; i < (column->header)->nb_elements_per_line; i++)
{
elt_idx = ((column->header)->elements_names_idx)[i];
len = strlen(((column->header)->elements_names)+elt_idx);
memcpy(elements_names+j, ((column->header)->elements_names)+elt_idx, len*sizeof(char));
j = j + len;
elements_names[j] = ';';
j++;
}
elements_names[j - 1] = '\0';
return elements_names;
}
int obi_column_prepare_to_set_value(OBIDMS_column_p column, index_t line_nb)
{
// Check if the column is read-only
@ -1649,6 +1894,7 @@ int obi_column_prepare_to_set_value(OBIDMS_column_p column, index_t line_nb)
}
int obi_column_prepare_to_get_value(OBIDMS_column_p column, index_t line_nb)
{
if ((line_nb+1) > ((column->header)->line_count))