The elements names when a column contains several elements per line are

now formatted with '\0' as separator and handled in a more optimized way
This commit is contained in:
Celine Mercier
2017-01-31 16:48:06 +01:00
parent 651c1d7845
commit e50da64ea1
6 changed files with 411 additions and 152 deletions

View File

@ -28,17 +28,21 @@
#include "obiblob_indexer.h"
#define ELEMENTS_NAMES_MAX (2048) /**< The maximum length of the list of elements names. // TODO Discuss
*/
#define NB_ELTS_MAX_IF_DEFAULT_NAME (539) /**< The maximum number of elements per line if the default element names
* are used ("0;1;2;...;n"), considering ELEMENTS_NAMES_MAX.
*/
#define COLUMN_GROWTH_FACTOR (2) /**< The growth factor when a column is enlarged.
*/
#define MAXIMUM_LINE_COUNT (1000000000) /**< The maximum line count for the data of a column. //TODO
*/
#define COMMENTS_MAX_LENGTH (2048) /**< The maximum length for comments.
*/
#define ELEMENTS_NAMES_MAX (2048) /**< The maximum length of the list of elements names. // TODO Discuss
*/
#define NB_ELTS_MAX_IF_DEFAULT_NAME (539) /**< The maximum number of elements per line if the default element names
* are used ("0\01\02\0...\0n"), considering ELEMENTS_NAMES_MAX.
*/
#define COLUMN_GROWTH_FACTOR (2) /**< The growth factor when a column is enlarged.
*/
#define MAXIMUM_LINE_COUNT (1000000000) /**< The maximum line count for the data of a column. //TODO
*/
#define COMMENTS_MAX_LENGTH (2048) /**< The maximum length for comments.
*/
#define FORMATTED_ELT_NAMES_SEPARATOR '\0' /**< The maximum length for comments.
*/
#define NOT_FORMATTED_ELT_NAMES_SEPARATOR ';' /**< The maximum length for comments.
*/
/**
@ -56,42 +60,48 @@ typedef struct Column_reference {
* @brief OBIDMS column header structure.
*/
typedef struct OBIDMS_column_header {
size_t header_size; /**< Size of the header in bytes.
*/
size_t data_size; /**< Size of the data in bytes.
*/
index_t line_count; /**< Number of lines of data allocated.
*/
index_t lines_used; /**< Number of lines of data used.
*/
index_t nb_elements_per_line; /**< Number of elements per line.
*/
char elements_names[ELEMENTS_NAMES_MAX+1]; /**< Names of the line elements with ';' as separator
* (no terminal ';').
* (default are the indices: "0;1;2;...;n").
*/
OBIType_t returned_data_type; /**< Type of the data that is returned when getting an
* element from the column.
*/
OBIType_t stored_data_type; /**< Type of the data that is actually stored in the data
* part of the column.
*/
time_t creation_date; /**< Date of creation of the file.
*/
obiversion_t version; /**< Version of the column.
*/
obiversion_t cloned_from; /**< Version of the column from which this column
* was cloned from (-1 if it was not created by cloning
* another column).
*/
char name[OBIDMS_COLUMN_MAX_NAME+1]; /**< The column name as a NULL terminated string.
*/
char indexer_name[INDEXER_MAX_NAME+1]; /**< If there is one, the indexer name as a NULL terminated string.
*/
Column_reference_t associated_column; /**< If there is one, the reference to the associated column.
*/
char comments[COMMENTS_MAX_LENGTH+1]; /**< Comments stored as a classical zero end C string.
*/
size_t header_size; /**< Size of the header in bytes.
*/
size_t data_size; /**< Size of the data in bytes.
*/
index_t line_count; /**< Number of lines of data allocated.
*/
index_t lines_used; /**< Number of lines of data used.
*/
index_t nb_elements_per_line; /**< Number of elements per line.
*/
char elements_names[ELEMENTS_NAMES_MAX+1]; /**< Names of the line elements with '\0' as separator
* and '\0\0' as terminal flag.
* (default are the indices: "0\01\02\0...\0n\0\0").
*/
int elements_names_length; /**< Length of the character array where the elements names are stored.
*/
int elements_names_idx[NB_ELTS_MAX_IF_DEFAULT_NAME]; /**< Index for the start of each element name in elements_names.
*/
int sorted_elements_idx[NB_ELTS_MAX_IF_DEFAULT_NAME]; /**< Index for the sorted element names in elements_names_idx.
*/
OBIType_t returned_data_type; /**< Type of the data that is returned when getting an
* element from the column.
*/
OBIType_t stored_data_type; /**< Type of the data that is actually stored in the data
* part of the column.
*/
time_t creation_date; /**< Date of creation of the file.
*/
obiversion_t version; /**< Version of the column.
*/
obiversion_t cloned_from; /**< Version of the column from which this column
* was cloned from (-1 if it was not created by cloning
* another column).
*/
char name[OBIDMS_COLUMN_MAX_NAME+1]; /**< The column name as a NULL terminated string.
*/
char indexer_name[INDEXER_MAX_NAME+1]; /**< If there is one, the indexer name as a NULL terminated string.
*/
Column_reference_t associated_column; /**< If there is one, the reference to the associated column.
*/
char comments[COMMENTS_MAX_LENGTH+1]; /**< Comments stored as a classical zero end C string.
*/
} OBIDMS_column_header_t, *OBIDMS_column_header_p;
@ -184,12 +194,13 @@ size_t obi_get_platform_header_size();
* @param nb_lines The number of lines to be stored.
* @param nb_elements_per_line The number of elements per line. // TODO talk about default values
* @param elements_names The names of the elements with ';' as separator (no terminal ';'),
* NULL or "" if the default names are to be used ("0;1;2;...;n").
* NULL or "" if the default names are to be used ("0\01\02\0...\0n").
* @param indexer_name The name of the indexer if there is one associated with the column.
* If NULL or "", the indexer name is set as the column name.
* @param associated_column_name The name of the associated column if there is one.
* @param associated_column_version The version of the associated column if there is one.
* @param comments Optional comments associated with the column.
* @param elt_names_formatted Whether the separator for the elements names is ';' (false), or '\0' (true, as formatted by format_elements_names()).
*
* @returns A pointer on the newly created column structure.
* @retval NULL if an error occurred.
@ -206,7 +217,8 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
const char* indexer_name,
const char* associated_column_name,
obiversion_t associated_column_version,
const char* comments
const char* comments,
bool elt_names_formatted
);
@ -353,7 +365,7 @@ int obi_close_header(OBIDMS_column_header_p header);
* @param element_name The name of the element.
*
* @returns The index of the element in a line of the column.
* @retval OBIIdx_NA if an error occurred. // TODO not sure if this is "clean".
* @retval OBIIdx_NA if an error occurred.
*
* @since July 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
@ -361,6 +373,22 @@ int obi_close_header(OBIDMS_column_header_p header);
index_t obi_column_get_element_index_from_name(OBIDMS_column_p column, const char* element_name);
/**
* @brief Recovers the elements names of the lines of a column, with ';' as separator (i.e. "0;1;2;...;n\0").
*
* @warning The returned pointer has to be freed by the caller.
*
* @param column A pointer on an OBIDMS column.
*
* @returns A pointer on a character array where the elements names are stored.
* @retval NULL if an error occurred.
*
* @since January 2017
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
char* obi_get_elements_names(OBIDMS_column_p column);
/**
* @brief Prepares a column to set a value.
*