Columns: elements names informations are now kept in a memory arena of

adapted size in the header, and added a boolean in the header indicating
whether the values should be evaluated (typically character strings to
be evaluated in Python)
This commit is contained in:
Celine Mercier
2017-12-13 22:46:50 +01:00
parent 2df5932b67
commit 1fd3323372
10 changed files with 177 additions and 111 deletions

View File

@ -28,9 +28,10 @@
#include "obiblob_indexer.h"
#define ELEMENTS_NAMES_MAX (20480) /**< The maximum length of the list of elements names. // TODO Discuss
// TODO delete useless ones, add default nb?
#define ELEMENTS_NAMES_MAX (1000000) /**< The maximum length of the list of elements names. // TODO Discuss
*/
#define NB_ELTS_MAX_IF_DEFAULT_NAME (20480) /**< The maximum number of elements per line if the default element names
#define NB_ELTS_MAX_IF_DEFAULT_NAME (1000000) /**< The maximum number of elements per line if the default element names
* are used ("0\01\02\0...\0n"), considering ELEMENTS_NAMES_MAX. // TODO not up to date
*/
#define COLUMN_GROWTH_FACTOR (2) /**< The growth factor when a column is enlarged.
@ -70,16 +71,6 @@ typedef struct OBIDMS_column_header {
*/
index_t nb_elements_per_line; /**< Number of elements per line.
*/
char elements_names[ELEMENTS_NAMES_MAX+1]; /**< Names of the line elements with '\0' as separator
* and '\0\0' as terminal flag.
* (default are the indices: "0\01\02\0...\0n\0\0").
*/
int elements_names_length; /**< Length of the character array where the elements names are stored.
*/
int elements_names_idx[NB_ELTS_MAX_IF_DEFAULT_NAME]; /**< Index for the start of each element name in elements_names.
*/
int sorted_elements_idx[NB_ELTS_MAX_IF_DEFAULT_NAME]; /**< Index for the sorted element names in elements_names_idx.
*/
OBIType_t returned_data_type; /**< Type of the data that is returned when getting an
* element from the column.
*/
@ -88,6 +79,9 @@ typedef struct OBIDMS_column_header {
*/
bool tuples; /**< A boolean indicating whether the column contains indices referring to indexed tuples.
*/
bool to_eval; /**< A boolean indicating whether the column contains expressions that should be evaluated
* (typically OBI_STR columns containing character strings to be evaluated by Python).
*/
time_t creation_date; /**< Date of creation of the file.
*/
obiversion_t version; /**< Version of the column.
@ -106,6 +100,18 @@ typedef struct OBIDMS_column_header {
*/
char comments[COMMENTS_MAX_LENGTH+1]; /**< Comments stored as a classical zero end C string.
*/
int64_t elements_names_length; /**< Length of the character array where the elements names are stored.
*/
char* elements_names; /**< Pointer in mem_arnea on the names of the line elements with '\0' as separator
* and '\0\0' as terminal flag.
* (default are the indices: "0\01\02\0...\0n\0\0").
*/
int64_t* elements_names_idx; /**< Pointer in mem_arnea on the index for the start of each element name in elements_names.
*/
int64_t* sorted_elements_idx; /**< Index for the sorted element names in elements_names_idx.
*/
byte_t mem_arena[]; /**< Memory array where the elements names, the elements names index and the sorted elements index are stored.
*/
} OBIDMS_column_header_t, *OBIDMS_column_header_p;
@ -212,17 +218,19 @@ obiversion_t obi_column_get_latest_version_from_name(OBIDMS_p dms, const char* c
/**
* @brief Returns the header size in bytes of a column on this platform.
* @brief Returns the header size in bytes of a column.
*
* The header size is defined as a multiple of the memory page size.
* As of now the header size is defined as one time the page size.
* The header size is rounded to a multiple of the memory page size.
*
* @param nb_elements_per_line The number of elements per line.
* @param elts_names_length The length of elements_names including the two terminal '\0's.
*
* @returns The header size in bytes.
*
* @since May 2015
* @author Eric Coissac (eric.coissac@metabarcoding.org)
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
size_t obi_get_platform_header_size();
size_t obi_calculate_header_size(index_t nb_elements_per_line, int64_t elts_names_length);
/**
@ -242,6 +250,8 @@ size_t obi_get_platform_header_size();
* NULL or "" if the default names are to be used ("0\01\02\0...\0n").
* @param elt_names_formatted Whether the separator for the elements names is ';' (false), or '\0' (true, as formatted by format_elements_names()).
* @param tuples A boolean indicating whether the column should contain indices referring to indexed tuples.
* @param to_eval A boolean indicating whether the column contains expressions that should be evaluated
* (typically OBI_STR columns containing character strings to be evaluated by Python).
* @param indexer_name The name of the indexer if there is one associated with the column.
* If NULL or "", the indexer name is set as the column name.
* @param associated_column_name The name of the associated column if there is one (otherwise NULL or "").
@ -262,6 +272,7 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
char* elements_names,
bool elt_names_formatted,
bool tuples,
bool to_eval,
const char* indexer_name,
const char* associated_column_name,
obiversion_t associated_column_version,