From 7fc1b578cf08603a9ea4f068939648f399b90a4e Mon Sep 17 00:00:00 2001 From: Celine Mercier Date: Mon, 19 Nov 2018 11:19:07 +0100 Subject: [PATCH] AVLs: AVLs in a group are not unmapped and remapped constantly anymore when adding new values, fixed a bug when calculating if an AVL data file has reached the maximum size, fixed a casting bug, and added a boolean so read-only AVLs files are not truncated --- src/obiavl.c | 103 ++++++++++++++++++++++++--------------------------- 1 file changed, 48 insertions(+), 55 deletions(-) diff --git a/src/obiavl.c b/src/obiavl.c index 360049d..5a67552 100755 --- a/src/obiavl.c +++ b/src/obiavl.c @@ -197,6 +197,7 @@ int grow_avl_data(OBIDMS_avl_data_p avl_data); * @brief Internal function closing an AVL data structure where the data referred to by an AVL tree is stored. * * @param avl_data A pointer to the data structure referred to by an AVL tree. + * @param writable Whether the AVL is writable or not (and therefore if the files can and should be truncated to the used size). * * @retval 0 if the operation was successfully completed. * @retval -1 if an error occurred. @@ -204,7 +205,7 @@ int grow_avl_data(OBIDMS_avl_data_p avl_data); * @since December 2015 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ -int close_avl_data(OBIDMS_avl_data_p avl_data); +int close_avl_data(OBIDMS_avl_data_p avl_data, bool writable); /** @@ -586,7 +587,7 @@ int truncate_avl_to_size_used(OBIDMS_avl_p avl) // TODO is it necessary to unmap // Compute the new size: used size rounded to the nearest greater multiple of page size greater than 0 multiple = ceil((double) (ONE_IF_ZERO((avl->header)->nb_items * sizeof(AVL_node_t))) / (double) getpagesize()); - new_data_size = ((int) multiple) * getpagesize(); + new_data_size = ((size_t) multiple) * getpagesize(); // Check that it is actually greater than the current size of the file, otherwise no need to truncate if ((avl->header)->avl_size == new_data_size) @@ -644,7 +645,7 @@ int truncate_avl_data_to_size_used(OBIDMS_avl_data_p avl_data) // TODO is it nec // Compute the new size: used size rounded to the nearest greater multiple of page size greater than 0 multiple = ceil((double) (ONE_IF_ZERO((avl_data->header)->data_size_used)) / (double) getpagesize()); - new_data_size = ((int) multiple) * getpagesize(); + new_data_size = ((index_t) multiple) * getpagesize(); // Check that it is actually greater than the current size of the file, otherwise no need to truncate if ((avl_data->header)->data_size_max == new_data_size) @@ -809,11 +810,12 @@ int grow_avl_data(OBIDMS_avl_data_p avl_data) // TODO Lock when needed } -int close_avl_data(OBIDMS_avl_data_p avl_data) +int close_avl_data(OBIDMS_avl_data_p avl_data, bool writable) { int ret_val = 0; - ret_val = truncate_avl_data_to_size_used(avl_data); + if (writable) + ret_val = truncate_avl_data_to_size_used(avl_data); if (munmap(avl_data->data, (avl_data->header)->data_size_max) < 0) { @@ -903,9 +905,10 @@ int add_new_avl_in_group(OBIDMS_avl_group_p avl_group) } // Unmap the previous AVL if it's not the 1st - if (avl_group->last_avl_idx > 0) - if (unmap_an_avl((avl_group->sub_avls)[avl_group->last_avl_idx]) < 0) - return -1; + // Not done anymore, currently keeping all mapped for efficiency reasons +// if (avl_group->last_avl_idx > 0) +// if (unmap_an_avl((avl_group->sub_avls)[avl_group->last_avl_idx]) < 0) +// return -1; // Increment current AVL index (avl_group->last_avl_idx)++; @@ -1556,7 +1559,7 @@ OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name, int avl_idx) avl_file_name = build_avl_file_name(complete_avl_name); if (avl_file_name == NULL) { - close_avl_data(avl_data); + close_avl_data(avl_data, true); return NULL; } @@ -1573,7 +1576,7 @@ OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name, int avl_idx) obidebug(1, "\nError creating an AVL tree file"); if (avl_idx >= 0) free(complete_avl_name); - close_avl_data(avl_data); + close_avl_data(avl_data, true); free(avl_file_name); return NULL; } @@ -1586,7 +1589,7 @@ OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name, int avl_idx) obidebug(1, "\nError truncating an AVL tree file to the right size"); if (avl_idx >= 0) free(complete_avl_name); - close_avl_data(avl_data); + close_avl_data(avl_data, true); close(avl_file_descriptor); return NULL; } @@ -1599,7 +1602,7 @@ OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name, int avl_idx) obidebug(1, "\nError allocating the memory for the AVL tree structure"); if (avl_idx >= 0) free(complete_avl_name); - close_avl_data(avl_data); + close_avl_data(avl_data, true); close(avl_file_descriptor); return NULL; } @@ -1618,7 +1621,7 @@ OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name, int avl_idx) obidebug(1, "\nError mmapping the header of an AVL tree file"); if (avl_idx >= 0) free(complete_avl_name); - close_avl_data(avl_data); + close_avl_data(avl_data, true); close(avl_file_descriptor); free(avl); return NULL; @@ -1637,7 +1640,7 @@ OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name, int avl_idx) obidebug(1, "\nError mmapping the data of an AVL tree file"); if (avl_idx >= 0) free(complete_avl_name); - close_avl_data(avl_data); + close_avl_data(avl_data, true); munmap(avl->header, header_size); close(avl_file_descriptor); free(avl); @@ -1709,8 +1712,7 @@ OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name, int avl_idx) avl_dir_name = get_full_path_of_avl_dir(dms, avl_name); if (avl_dir_name == NULL) { - if (avl_idx >= 0) - free(complete_avl_name); + free(complete_avl_name); return NULL; } directory = opendir(avl_dir_name); @@ -1718,8 +1720,7 @@ OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name, int avl_idx) { obi_set_errno(OBI_AVL_ERROR); obidebug(1, "\nError opening an AVL directory"); - if (avl_idx >= 0) - free(complete_avl_name); + free(complete_avl_name); free(avl_dir_name); return NULL; } @@ -1729,8 +1730,7 @@ OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name, int avl_idx) { obi_set_errno(OBI_AVL_ERROR); obidebug(1, "\nError getting the file descriptor of an AVL directory"); - if (avl_idx >= 0) - free(complete_avl_name); + free(complete_avl_name); return NULL; } @@ -1748,8 +1748,7 @@ OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name, int avl_idx) { obi_set_errno(OBI_AVL_ERROR); obidebug(1, "\nError opening an AVL tree data file"); - if (avl_idx >= 0) - free(complete_avl_name); + free(complete_avl_name); free(avl_data_file_name); return NULL; } @@ -1761,8 +1760,7 @@ OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name, int avl_idx) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for the AVL tree data structure"); - if (avl_idx >= 0) - free(complete_avl_name); + free(complete_avl_name); close(avl_data_file_descriptor); return NULL; } @@ -1772,8 +1770,7 @@ OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name, int avl_idx) { obi_set_errno(OBI_AVL_ERROR); obidebug(1, "\nError reading the header size to open an AVL tree data file"); - if (avl_idx >= 0) - free(complete_avl_name); + free(complete_avl_name); close(avl_data_file_descriptor); return NULL; } @@ -1790,8 +1787,7 @@ OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name, int avl_idx) { obi_set_errno(OBI_AVL_ERROR); obidebug(1, "\nError mmapping the header of an AVL tree data file"); - if (avl_idx >= 0) - free(complete_avl_name); + free(complete_avl_name); close(avl_data_file_descriptor); free(avl_data); return NULL; @@ -1808,8 +1804,7 @@ OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name, int avl_idx) { obi_set_errno(OBI_AVL_ERROR); obidebug(1, "\nError mmapping the data of an AVL tree data file"); - if (avl_idx >= 0) - free(complete_avl_name); + free(complete_avl_name); munmap(avl_data->header, header_size); close(avl_data_file_descriptor); free(avl_data); @@ -1825,7 +1820,7 @@ OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name, int avl_idx) avl_file_name = build_avl_file_name(complete_avl_name); if (avl_file_name == NULL) { - close_avl_data(avl_data); + close_avl_data(avl_data, false); return NULL; } @@ -1835,9 +1830,8 @@ OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name, int avl_idx) { obi_set_errno(OBI_AVL_ERROR); obidebug(1, "\nError opening an AVL tree file"); - if (avl_idx >= 0) - free(complete_avl_name); - close_avl_data(avl_data); + free(complete_avl_name); + close_avl_data(avl_data, false); free(avl_file_name); return NULL; } @@ -1849,9 +1843,8 @@ OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name, int avl_idx) { obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for the AVL tree structure"); - if (avl_idx >= 0) - free(complete_avl_name); - close_avl_data(avl_data); + free(complete_avl_name); + close_avl_data(avl_data, false); close(avl_file_descriptor); return NULL; } @@ -1861,8 +1854,8 @@ OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name, int avl_idx) { obi_set_errno(OBI_AVL_ERROR); obidebug(1, "\nError reading the header size to open an AVL tree"); - if (avl_idx >= 0) - free(complete_avl_name); + free(complete_avl_name); + close_avl_data(avl_data, false); close(avl_file_descriptor); return NULL; } @@ -1879,9 +1872,8 @@ OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name, int avl_idx) { obi_set_errno(OBI_AVL_ERROR); obidebug(1, "\nError mmapping the header of an AVL tree file"); - if (avl_idx >= 0) - free(complete_avl_name); - close_avl_data(avl_data); + free(complete_avl_name); + close_avl_data(avl_data, false); close(avl_file_descriptor); free(avl); return NULL; @@ -1898,9 +1890,8 @@ OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name, int avl_idx) { obi_set_errno(OBI_AVL_ERROR); obidebug(1, "\nError mmapping the data of an AVL tree file"); - if (avl_idx >= 0) - free(complete_avl_name); - close_avl_data(avl_data); + free(complete_avl_name); + close_avl_data(avl_data, false); munmap(avl->header, header_size); close(avl_file_descriptor); free(avl); @@ -2141,13 +2132,14 @@ OBIDMS_avl_group_p obi_clone_avl_group(OBIDMS_avl_group_p avl_group, const char* } -int obi_close_avl(OBIDMS_avl_p avl) +int obi_close_avl(OBIDMS_avl_p avl, bool writable) { int ret_val = 0; - ret_val = close_avl_data(avl->data); + ret_val = close_avl_data(avl->data, writable); - ret_val = truncate_avl_to_size_used(avl); + if (writable) + ret_val = truncate_avl_to_size_used(avl); if (munmap(avl->tree, (avl->header)->avl_size) < 0) { @@ -2199,7 +2191,7 @@ int obi_close_avl_group(OBIDMS_avl_group_p avl_group) if (remap_an_avl((avl_group->sub_avls)[i]) < 0) ret_val = -1; } - if (obi_close_avl((avl_group->sub_avls)[i]) < 0) + if (obi_close_avl((avl_group->sub_avls)[i], avl_group->writable) < 0) ret_val = -1; } @@ -2427,6 +2419,7 @@ index_t obi_avl_group_add(OBIDMS_avl_group_p avl_group, Obi_blob_p value) } } + // Check if already in current AVL if (maybe_in_avl((avl_group->sub_avls)[avl_group->last_avl_idx], value)) { index_in_avl = (int32_t) obi_avl_find((avl_group->sub_avls)[avl_group->last_avl_idx], value); @@ -2442,12 +2435,12 @@ index_t obi_avl_group_add(OBIDMS_avl_group_p avl_group, Obi_blob_p value) for (i=0; i < (avl_group->last_avl_idx); i++) { if (maybe_in_avl((avl_group->sub_avls)[i], value)) - { - if (remap_an_avl((avl_group->sub_avls)[i]) < 0) - return -1; + { // AVLS are not unmapped and remapped anymore as it is very costly and keeping all mapped seems to be handled well + //if (remap_an_avl((avl_group->sub_avls)[i]) < 0) + // return -1; index_in_avl = (int32_t) obi_avl_find((avl_group->sub_avls)[i], value); - if (unmap_an_avl((avl_group->sub_avls)[i]) < 0) - return -1; + //if (unmap_an_avl((avl_group->sub_avls)[i]) < 0) + // return -1; if (index_in_avl >= 0) { index_with_avl = i; @@ -2468,7 +2461,7 @@ index_t obi_avl_group_add(OBIDMS_avl_group_p avl_group, Obi_blob_p value) } // Check if need to make new AVL - if (((((avl_group->sub_avls)[avl_group->last_avl_idx])->header)->nb_items == MAX_NODE_COUNT_PER_AVL) || (((((avl_group->sub_avls)[avl_group->last_avl_idx])->data)->header)->data_size_used >= MAX_DATA_SIZE_PER_AVL)) + if (((((avl_group->sub_avls)[avl_group->last_avl_idx])->header)->nb_items == MAX_NODE_COUNT_PER_AVL) || ((((((avl_group->sub_avls)[avl_group->last_avl_idx])->data)->header)->data_size_used + obi_blob_sizeof(value)) >= MAX_DATA_SIZE_PER_AVL)) { if (add_new_avl_in_group(avl_group) < 0) return -1;