Read-only AVLs are now hard-linked instead of copied when cloning an AVL

group to make it writable. Also fixed several bugs when handling AVL
groups.
This commit is contained in:
Celine Mercier
2016-06-03 19:02:46 +02:00
parent 799b942017
commit fc3641d7ff
6 changed files with 254 additions and 39 deletions

View File

@ -107,6 +107,42 @@ static char* build_avl_file_name(const char* avl_name);
static char* build_avl_data_file_name(const char* avl_name);
/**
* @brief Internal function building the full path of an AVL tree file.
*
* @warning The returned pointer has to be freed by the caller.
*
* @param dms A pointer to the OBIDMS to which the AVL tree belongs.
* @param avl_name The name of the AVL tree.
* @param avl_idx The index of the AVL if it's part of an AVL group, or -1 if not.
*
* @returns A pointer to the full path of the file where the AVL tree is stored.
* @retval NULL if an error occurred.
*
* @since May 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
static char* get_full_path_of_avl_file_name(OBIDMS_p dms, const char* avl_name, int avl_idx);
/**
* @brief Internal function building the file name for an AVL data file.
*
* @warning The returned pointer has to be freed by the caller.
*
* @param dms A pointer to the OBIDMS to which the AVL tree belongs.
* @param avl_name The name of the AVL tree.
* @param avl_idx The index of the AVL if it's part of an AVL group, or -1 if not.
*
* @returns A pointer to the full path of the file where the data referred to by the AVL tree is stored.
* @retval NULL if an error occurred.
*
* @since May 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
static char* get_full_path_of_avl_data_file_name(OBIDMS_p dms, const char* avl_name, int avl_idx);
/**
* @brief Internal function returning the size of an AVL tree header on this platform,
* including the size of the bloom filter associated with the AVL tree.
@ -253,9 +289,12 @@ int remap_an_avl(OBIDMS_avl_p avl);
/**
* @brief Internal function (re)mapping the tree and data parts of an AVL tree structure.
* @brief Internal function creating and adding a new AVL in an AVL group.
*
* @param avl A pointer to the AVL tree group structure.
* @warning The previous AVL in the list of the group is unmapped,
* if it's not the 1st AVL being added.
*
* @param avl A pointer on the AVL tree group structure.
*
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
@ -547,6 +586,102 @@ static char* build_avl_data_file_name(const char* avl_name)
}
static char* get_full_path_of_avl_file_name(OBIDMS_p dms, const char* avl_name, int avl_idx)
{
char* complete_avl_name;
char* full_path;
char* avl_file_name;
if (avl_idx >= 0)
{
complete_avl_name = build_avl_name_with_idx(avl_name, avl_idx);
if (complete_avl_name == NULL)
return NULL;
}
else
{
complete_avl_name = (char*) malloc((strlen(avl_name)+1)*sizeof(char));
if (complete_avl_name == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError allocating memory for an AVL name");
return NULL;
}
strcpy(complete_avl_name, avl_name);
}
avl_file_name = build_avl_file_name(complete_avl_name);
if (avl_file_name == NULL)
{
free(complete_avl_name);
return NULL;
}
full_path = get_full_path_of_avl_dir(dms, avl_name);
if (full_path == NULL)
{
free(complete_avl_name);
free(avl_file_name);
return NULL;
}
strcat(full_path, "/");
strcat(full_path, avl_file_name);
free(complete_avl_name);
return full_path;
}
static char* get_full_path_of_avl_data_file_name(OBIDMS_p dms, const char* avl_name, int avl_idx)
{
char* complete_avl_name;
char* full_path;
char* avl_data_file_name;
if (avl_idx >= 0)
{
complete_avl_name = build_avl_name_with_idx(avl_name, avl_idx);
if (complete_avl_name == NULL)
return NULL;
}
else
{
complete_avl_name = (char*) malloc((strlen(avl_name)+1)*sizeof(char));
if (complete_avl_name == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError allocating memory for an AVL name");
return NULL;
}
strcpy(complete_avl_name, avl_name);
}
avl_data_file_name = build_avl_data_file_name(complete_avl_name);
if (avl_data_file_name == NULL)
{
free(complete_avl_name);
return NULL;
}
full_path = get_full_path_of_avl_dir(dms, avl_name);
if (full_path == NULL)
{
free(complete_avl_name);
free(avl_data_file_name);
return NULL;
}
strcat(full_path, "/");
strcat(full_path, avl_data_file_name);
free(complete_avl_name);
return full_path;
}
size_t get_avl_header_size()
{
size_t header_size;
@ -646,7 +781,6 @@ int truncate_avl_to_size_used(OBIDMS_avl_p avl) // TODO is it necessary to unmap
file_descriptor,
(avl->header)->header_size
);
if (avl->tree == MAP_FAILED)
{
obi_set_errno(OBI_AVL_ERROR);
@ -930,9 +1064,10 @@ int add_new_avl_in_group(OBIDMS_avl_group_p avl_group)
return -1;
}
// Unmap the previous AVL
if (unmap_an_avl((avl_group->sub_avls)[avl_group->last_avl_idx]) < 0)
return -1;
// Unmap the previous AVL if it's not the 1st
if (avl_group->last_avl_idx > 0)
if (unmap_an_avl((avl_group->sub_avls)[avl_group->last_avl_idx]) < 0)
return -1;
// Increment current AVL index
(avl_group->last_avl_idx)++;
@ -949,6 +1084,36 @@ int add_new_avl_in_group(OBIDMS_avl_group_p avl_group)
}
// TODO doc
int add_existing_avl_in_group(OBIDMS_avl_group_p avl_group_dest, OBIDMS_avl_group_p avl_group_source, int avl_idx)
{
if (link(get_full_path_of_avl_file_name(avl_group_source->dms, avl_group_source->name, avl_idx), get_full_path_of_avl_file_name(avl_group_dest->dms, avl_group_dest->name, avl_idx)) < 0)
{
obi_set_errno(OBI_AVL_ERROR);
obidebug(1, "\nError creating a hard link to an existing AVL tree file");
return -1;
}
if (link(get_full_path_of_avl_data_file_name(avl_group_source->dms, avl_group_source->name, avl_idx), get_full_path_of_avl_data_file_name(avl_group_dest->dms, avl_group_dest->name, avl_idx)) < 0)
{
obi_set_errno(OBI_AVL_ERROR);
obidebug(1, "\nError creating a hard link to an existing AVL data file");
return -1;
}
// Increment current AVL index
(avl_group_dest->last_avl_idx)++;
// Open AVL for that group TODO ideally not needed, but needed for now
avl_group_dest->sub_avls[avl_group_dest->last_avl_idx] = obi_open_avl(avl_group_source->dms, avl_group_source->name, avl_idx);
if ((avl_group_dest->sub_avls)[avl_group_dest->last_avl_idx] == NULL)
{
obidebug(1, "\nError opening an AVL to add in an AVL group");
return -1;
}
return 0;
}
int maybe_in_avl(OBIDMS_avl_p avl, Obi_blob_p value)
{
return (bloom_check(&((avl->header)->bloom_filter), value, obi_blob_sizeof(value)));
@ -1529,8 +1694,7 @@ OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name, int avl_idx)
// Bloom filter
bloom_init(&((avl->header)->bloom_filter), MAX_NODE_COUNT_PER_AVL);
if (avl_idx >= 0)
free(complete_avl_name);
free(complete_avl_name);
return avl;
}
@ -1777,8 +1941,7 @@ OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name, int avl_idx)
avl->dir_fd = avl_dir_file_descriptor;
avl->avl_fd = avl_file_descriptor;
if (avl_idx >= 0)
free(complete_avl_name);
free(complete_avl_name);
return avl;
}
@ -1806,6 +1969,7 @@ OBIDMS_avl_group_p obi_avl_group(OBIDMS_p dms, const char* avl_name)
OBIDMS_avl_group_p obi_create_avl_group(OBIDMS_p dms, const char* avl_name)
{
OBIDMS_avl_group_p avl_group;
char* avl_dir_name;
avl_group = (OBIDMS_avl_group_p) malloc(sizeof(OBIDMS_avl_group_t));
if (avl_group == NULL)
@ -1815,18 +1979,22 @@ OBIDMS_avl_group_p obi_create_avl_group(OBIDMS_p dms, const char* avl_name)
return NULL;
}
// Create 1st avl
(avl_group->sub_avls)[0] = obi_create_avl(dms, avl_name, 0);
if ((avl_group->sub_avls)[0] == NULL)
{
obidebug(1, "\nError creating the first AVL of an AVL group");
return NULL;
}
avl_group->last_avl_idx = 0;
avl_group->last_avl_idx = -1;
avl_group->dms = dms;
strcpy(avl_group->name, avl_name);
avl_group->dms = dms;
// Create the directory for that AVL group
avl_dir_name = get_full_path_of_avl_dir(dms, avl_name);
if (avl_dir_name == NULL)
return NULL;
if (mkdirat(dms->indexer_dir_fd, avl_dir_name, 00777) < 0)
{
obi_set_errno(OBI_AVL_ERROR);
obidebug(1, "\nError creating an AVL directory");
free(avl_dir_name);
return NULL;
}
// Add in the list of open indexers
obi_dms_list_indexer(dms, avl_group);
@ -1926,20 +2094,23 @@ OBIDMS_avl_group_p obi_clone_avl_group(OBIDMS_avl_group_p avl_group, const char*
// Create the new AVL group
new_avl_group = obi_create_avl_group(avl_group->dms, new_avl_name);
// Copy the data from each old AVL to the new ones
for (i=0; i<=(avl_group->last_avl_idx); i++)
// Create hard links to all the full AVLs that won't be modified: all but the last one
for (i=0; i<(avl_group->last_avl_idx); i++)
{
if (i > 0) // Don't need to create the 1st AVL
{
if (add_new_avl_in_group(new_avl_group) < 0)
{
obi_close_avl_group(new_avl_group);
return NULL;
}
}
obi_clone_avl((avl_group->sub_avls)[i], (new_avl_group->sub_avls)[i]);
if (add_existing_avl_in_group(new_avl_group, avl_group, i) < 0)
return NULL;
}
// Create the last AVL to copy data in it
if (add_new_avl_in_group(new_avl_group) < 0)
{
obi_close_avl_group(new_avl_group);
return NULL;
}
// Copy the data from the last AVL to a new one that can be modified
obi_clone_avl((avl_group->sub_avls)[avl_group->last_avl_idx], (new_avl_group->sub_avls)[new_avl_group->last_avl_idx]);
// Close old AVL group
if (obi_close_avl_group(avl_group) < 0)
{
@ -1959,7 +2130,7 @@ int obi_close_avl(OBIDMS_avl_p avl)
ret_val = truncate_avl_to_size_used(avl);
if (munmap(avl->tree, (((avl->header)->nb_items_max) * sizeof(AVL_node_t))) < 0)
if (munmap(avl->tree, (avl->header)->avl_size) < 0)
{
obi_set_errno(OBI_AVL_ERROR);
obidebug(1, "\nError munmapping the tree of an AVL tree file");
@ -1996,9 +2167,17 @@ int obi_close_avl_group(OBIDMS_avl_group_p avl_group)
ret_val = obi_dms_unlist_indexer(avl_group->dms, avl_group);
// Close each AVL of the group
for (i=0; i < (avl_group->last_avl_idx); i++)
for (i=0; i <= (avl_group->last_avl_idx); i++)
{
// Remap all but the last AVL (already mapped) before closing to truncate and close properly
if (i < (avl_group->last_avl_idx))
{
if (remap_an_avl((avl_group->sub_avls)[i]) < 0)
ret_val = -1;
}
if (obi_close_avl((avl_group->sub_avls)[i]) < 0)
ret_val = -1;
}
free(avl_group);
}
@ -2207,6 +2386,16 @@ index_t obi_avl_group_add(OBIDMS_avl_group_p avl_group, Obi_blob_p value)
index_t index_with_avl;
int i;
// Create 1st AVL if group is empty
if (avl_group->last_avl_idx == -1)
{
if (add_new_avl_in_group(avl_group) < 0)
{
obidebug(1, "\nError creating the first AVL of an AVL group");
return -1;
}
}
if (maybe_in_avl((avl_group->sub_avls)[avl_group->last_avl_idx], value))
{
index_in_avl = (int32_t) obi_avl_find((avl_group->sub_avls)[avl_group->last_avl_idx], value);
@ -2218,6 +2407,7 @@ index_t obi_avl_group_add(OBIDMS_avl_group_p avl_group, Obi_blob_p value)
return index_with_avl;
}
}
for (i=0; i < (avl_group->last_avl_idx); i++)
{
if (maybe_in_avl((avl_group->sub_avls)[i], value))

View File

@ -73,7 +73,7 @@ typedef struct AVL_node {
* @brief OBIDMS AVL tree data header structure.
*/
typedef struct OBIDMS_avl_data_header {
int header_size; /**< Size of the header in bytes.
size_t header_size; /**< Size of the header in bytes.
*/
index_t data_size_used; /**< Size of the data used in bytes.
*/
@ -105,7 +105,7 @@ typedef struct OBIDMS_avl_data {
* @brief OBIDMS AVL tree header structure.
*/
typedef struct OBIDMS_avl_header {
int header_size; /**< Size of the header in bytes.
size_t header_size; /**< Size of the header in bytes.
*/
size_t avl_size; /**< Size of the AVL tree in bytes.
*/
@ -160,7 +160,7 @@ typedef struct OBIDMS_avl {
typedef struct OBIDMS_avl_group {
OBIDMS_avl_p sub_avls[MAX_NB_OF_AVLS_IN_GROUP]; /**< Array containing the pointers to the AVL trees of the group.
*/
int last_avl_idx; /**< Index in the sub_avls array of the AVL tree currently being filled.
int last_avl_idx; /**< Index in the sub_avls array of the AVL tree currently being filled.
*/
char name[AVL_MAX_NAME+1]; /**< Base name of the AVL group. The AVL trees in it have names of the form basename_idx.
*/

View File

@ -33,6 +33,8 @@ int obi_column_set_obiqual_char_with_elt_idx(OBIDMS_column_p column, index_t lin
int i;
int ret_value;
// TODO NA
int_value_length = strlen(value);
int_value = (uint8_t*) malloc(int_value_length * sizeof(uint8_t));
@ -56,17 +58,24 @@ int obi_column_set_obiqual_int_with_elt_idx(OBIDMS_column_p column, index_t line
if (obi_column_prepare_to_set_value(column, line_nb) < 0)
return -1;
// TODO NA
// Add the value in the indexer
idx = obi_index_uint8(column->indexer, value, value_length);
if (idx == -1) // An error occurred
{
if (obi_errno == OBI_READ_ONLY_INDEXER_ERROR)
{
// TODO PUT IN A COLUMN FUNCTION
// If the error is that the indexer is read-only, clone it
new_indexer_name = obi_build_indexer_name((column->header)->name, (column->header)->version);
if (new_indexer_name == NULL)
return -1;
column->indexer = obi_clone_indexer(column->indexer, new_indexer_name); // TODO Need to lock this somehow?
strcpy((column->header)->indexer_name, new_indexer_name);
free(new_indexer_name);
obi_set_errno(0);
// Add the value in the new indexer
idx = obi_index_uint8(column->indexer, value, value_length);
if (idx == -1)
@ -91,6 +100,8 @@ char* obi_column_get_obiqual_char_with_elt_idx(OBIDMS_column_p column, index_t l
int_value = obi_column_get_obiqual_int_with_elt_idx(column, line_nb, element_idx, &int_value_length);
// TODO NA
value = (char*) malloc((int_value_length + 1) * sizeof(char));
// Encode int quality to char quality

View File

@ -86,7 +86,7 @@ int obi_column_set_obiqual_int_with_elt_idx(OBIDMS_column_p column, index_t line
* @param element_idx The index of the element that should be recovered in the line.
*
* @returns The recovered value, in the character string format.
* @retval OBIQual_str_NA the NA value of the type if an error occurred and obi_errno is set.
* @retval OBIQual_char_NA the NA value of the type if an error occurred and obi_errno is set.
*
* @since May 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
@ -172,7 +172,7 @@ int obi_column_set_obiqual_int_with_elt_name(OBIDMS_column_p column, index_t lin
* @param element_name The name of the element that should be recovered in the line.
*
* @returns The recovered value, in the character string format.
* @retval OBIQual_str_NA the NA value of the type if an error occurred and obi_errno is set.
* @retval OBIQual_char_NA the NA value of the type if an error occurred and obi_errno is set.
*
* @since May 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)

View File

@ -32,17 +32,24 @@ int obi_column_set_obiseq_with_elt_idx(OBIDMS_column_p column, index_t line_nb,
if (obi_column_prepare_to_set_value(column, line_nb) < 0)
return -1;
// TODO NA
// Add the value in the indexer
idx = obi_index_dna_seq(column->indexer, value);
if (idx == -1) // An error occurred
{
if (obi_errno == OBI_READ_ONLY_INDEXER_ERROR)
{
// TODO PUT IN A COLUMN FUNCTION
// If the error is that the indexer is read-only, clone it
new_indexer_name = obi_build_indexer_name((column->header)->name, (column->header)->version);
if (new_indexer_name == NULL)
return -1;
column->indexer = obi_clone_indexer(column->indexer, new_indexer_name); // TODO Need to lock this somehow?
strcpy((column->header)->indexer_name, new_indexer_name);
free(new_indexer_name);
obi_set_errno(0);
// Add the value in the new indexer
idx = obi_index_dna_seq(column->indexer, value);
if (idx == -1)

View File

@ -32,17 +32,24 @@ int obi_column_set_obistr_with_elt_idx(OBIDMS_column_p column, index_t line_nb,
if (obi_column_prepare_to_set_value(column, line_nb) < 0)
return -1;
// TODO NA
// Add the value in the indexer
idx = obi_index_char_str(column->indexer, value);
if (idx == -1) // An error occurred
{
if (obi_errno == OBI_READ_ONLY_INDEXER_ERROR)
{
// TODO PUT IN A COLUMN FUNCTION
// If the error is that the indexer is read-only, clone it
new_indexer_name = obi_build_indexer_name((column->header)->name, (column->header)->version);
if (new_indexer_name == NULL)
return -1;
column->indexer = obi_clone_indexer(column->indexer, new_indexer_name); // TODO Need to lock this somehow?
strcpy((column->header)->indexer_name, new_indexer_name);
free(new_indexer_name);
obi_set_errno(0);
// Add the value in the new indexer
idx = obi_index_char_str(column->indexer, value);
if (idx == -1)
@ -64,7 +71,7 @@ const char* obi_column_get_obistr_with_elt_idx(OBIDMS_column_p column, index_t l
index_t idx;
if (obi_column_prepare_to_get_value(column, line_nb) < 0)
return OBISeq_NA;
return OBIStr_NA;
idx = *(((index_t*) (column->data)) + (line_nb * ((column->header)->nb_elements_per_line)) + element_idx);