diff --git a/src/obiavl.c b/src/obiavl.c index b8e10a2..8c68ae3 100644 --- a/src/obiavl.c +++ b/src/obiavl.c @@ -107,6 +107,42 @@ static char* build_avl_file_name(const char* avl_name); static char* build_avl_data_file_name(const char* avl_name); +/** + * @brief Internal function building the full path of an AVL tree file. + * + * @warning The returned pointer has to be freed by the caller. + * + * @param dms A pointer to the OBIDMS to which the AVL tree belongs. + * @param avl_name The name of the AVL tree. + * @param avl_idx The index of the AVL if it's part of an AVL group, or -1 if not. + * + * @returns A pointer to the full path of the file where the AVL tree is stored. + * @retval NULL if an error occurred. + * + * @since May 2016 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +static char* get_full_path_of_avl_file_name(OBIDMS_p dms, const char* avl_name, int avl_idx); + + +/** + * @brief Internal function building the file name for an AVL data file. + * + * @warning The returned pointer has to be freed by the caller. + * + * @param dms A pointer to the OBIDMS to which the AVL tree belongs. + * @param avl_name The name of the AVL tree. + * @param avl_idx The index of the AVL if it's part of an AVL group, or -1 if not. + * + * @returns A pointer to the full path of the file where the data referred to by the AVL tree is stored. + * @retval NULL if an error occurred. + * + * @since May 2016 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +static char* get_full_path_of_avl_data_file_name(OBIDMS_p dms, const char* avl_name, int avl_idx); + + /** * @brief Internal function returning the size of an AVL tree header on this platform, * including the size of the bloom filter associated with the AVL tree. @@ -253,9 +289,12 @@ int remap_an_avl(OBIDMS_avl_p avl); /** - * @brief Internal function (re)mapping the tree and data parts of an AVL tree structure. + * @brief Internal function creating and adding a new AVL in an AVL group. * - * @param avl A pointer to the AVL tree group structure. + * @warning The previous AVL in the list of the group is unmapped, + * if it's not the 1st AVL being added. + * + * @param avl A pointer on the AVL tree group structure. * * @retval 0 if the operation was successfully completed. * @retval -1 if an error occurred. @@ -547,6 +586,102 @@ static char* build_avl_data_file_name(const char* avl_name) } +static char* get_full_path_of_avl_file_name(OBIDMS_p dms, const char* avl_name, int avl_idx) +{ + char* complete_avl_name; + char* full_path; + char* avl_file_name; + + if (avl_idx >= 0) + { + complete_avl_name = build_avl_name_with_idx(avl_name, avl_idx); + if (complete_avl_name == NULL) + return NULL; + } + else + { + complete_avl_name = (char*) malloc((strlen(avl_name)+1)*sizeof(char)); + if (complete_avl_name == NULL) + { + obi_set_errno(OBI_MALLOC_ERROR); + obidebug(1, "\nError allocating memory for an AVL name"); + return NULL; + } + strcpy(complete_avl_name, avl_name); + } + + avl_file_name = build_avl_file_name(complete_avl_name); + if (avl_file_name == NULL) + { + free(complete_avl_name); + return NULL; + } + + full_path = get_full_path_of_avl_dir(dms, avl_name); + if (full_path == NULL) + { + free(complete_avl_name); + free(avl_file_name); + return NULL; + } + + strcat(full_path, "/"); + strcat(full_path, avl_file_name); + + free(complete_avl_name); + + return full_path; +} + + +static char* get_full_path_of_avl_data_file_name(OBIDMS_p dms, const char* avl_name, int avl_idx) +{ + char* complete_avl_name; + char* full_path; + char* avl_data_file_name; + + if (avl_idx >= 0) + { + complete_avl_name = build_avl_name_with_idx(avl_name, avl_idx); + if (complete_avl_name == NULL) + return NULL; + } + else + { + complete_avl_name = (char*) malloc((strlen(avl_name)+1)*sizeof(char)); + if (complete_avl_name == NULL) + { + obi_set_errno(OBI_MALLOC_ERROR); + obidebug(1, "\nError allocating memory for an AVL name"); + return NULL; + } + strcpy(complete_avl_name, avl_name); + } + + avl_data_file_name = build_avl_data_file_name(complete_avl_name); + if (avl_data_file_name == NULL) + { + free(complete_avl_name); + return NULL; + } + + full_path = get_full_path_of_avl_dir(dms, avl_name); + if (full_path == NULL) + { + free(complete_avl_name); + free(avl_data_file_name); + return NULL; + } + + strcat(full_path, "/"); + strcat(full_path, avl_data_file_name); + + free(complete_avl_name); + + return full_path; +} + + size_t get_avl_header_size() { size_t header_size; @@ -646,7 +781,6 @@ int truncate_avl_to_size_used(OBIDMS_avl_p avl) // TODO is it necessary to unmap file_descriptor, (avl->header)->header_size ); - if (avl->tree == MAP_FAILED) { obi_set_errno(OBI_AVL_ERROR); @@ -930,9 +1064,10 @@ int add_new_avl_in_group(OBIDMS_avl_group_p avl_group) return -1; } - // Unmap the previous AVL - if (unmap_an_avl((avl_group->sub_avls)[avl_group->last_avl_idx]) < 0) - return -1; + // Unmap the previous AVL if it's not the 1st + if (avl_group->last_avl_idx > 0) + if (unmap_an_avl((avl_group->sub_avls)[avl_group->last_avl_idx]) < 0) + return -1; // Increment current AVL index (avl_group->last_avl_idx)++; @@ -949,6 +1084,36 @@ int add_new_avl_in_group(OBIDMS_avl_group_p avl_group) } +// TODO doc +int add_existing_avl_in_group(OBIDMS_avl_group_p avl_group_dest, OBIDMS_avl_group_p avl_group_source, int avl_idx) +{ + if (link(get_full_path_of_avl_file_name(avl_group_source->dms, avl_group_source->name, avl_idx), get_full_path_of_avl_file_name(avl_group_dest->dms, avl_group_dest->name, avl_idx)) < 0) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError creating a hard link to an existing AVL tree file"); + return -1; + } + if (link(get_full_path_of_avl_data_file_name(avl_group_source->dms, avl_group_source->name, avl_idx), get_full_path_of_avl_data_file_name(avl_group_dest->dms, avl_group_dest->name, avl_idx)) < 0) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError creating a hard link to an existing AVL data file"); + return -1; + } + + // Increment current AVL index + (avl_group_dest->last_avl_idx)++; + + // Open AVL for that group TODO ideally not needed, but needed for now + avl_group_dest->sub_avls[avl_group_dest->last_avl_idx] = obi_open_avl(avl_group_source->dms, avl_group_source->name, avl_idx); + if ((avl_group_dest->sub_avls)[avl_group_dest->last_avl_idx] == NULL) + { + obidebug(1, "\nError opening an AVL to add in an AVL group"); + return -1; + } + + return 0; +} + int maybe_in_avl(OBIDMS_avl_p avl, Obi_blob_p value) { return (bloom_check(&((avl->header)->bloom_filter), value, obi_blob_sizeof(value))); @@ -1529,8 +1694,7 @@ OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name, int avl_idx) // Bloom filter bloom_init(&((avl->header)->bloom_filter), MAX_NODE_COUNT_PER_AVL); - if (avl_idx >= 0) - free(complete_avl_name); + free(complete_avl_name); return avl; } @@ -1777,8 +1941,7 @@ OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name, int avl_idx) avl->dir_fd = avl_dir_file_descriptor; avl->avl_fd = avl_file_descriptor; - if (avl_idx >= 0) - free(complete_avl_name); + free(complete_avl_name); return avl; } @@ -1806,6 +1969,7 @@ OBIDMS_avl_group_p obi_avl_group(OBIDMS_p dms, const char* avl_name) OBIDMS_avl_group_p obi_create_avl_group(OBIDMS_p dms, const char* avl_name) { OBIDMS_avl_group_p avl_group; + char* avl_dir_name; avl_group = (OBIDMS_avl_group_p) malloc(sizeof(OBIDMS_avl_group_t)); if (avl_group == NULL) @@ -1815,18 +1979,22 @@ OBIDMS_avl_group_p obi_create_avl_group(OBIDMS_p dms, const char* avl_name) return NULL; } - // Create 1st avl - (avl_group->sub_avls)[0] = obi_create_avl(dms, avl_name, 0); - if ((avl_group->sub_avls)[0] == NULL) - { - obidebug(1, "\nError creating the first AVL of an AVL group"); - return NULL; - } - - avl_group->last_avl_idx = 0; + avl_group->last_avl_idx = -1; + avl_group->dms = dms; strcpy(avl_group->name, avl_name); - avl_group->dms = dms; + // Create the directory for that AVL group + avl_dir_name = get_full_path_of_avl_dir(dms, avl_name); + if (avl_dir_name == NULL) + return NULL; + + if (mkdirat(dms->indexer_dir_fd, avl_dir_name, 00777) < 0) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError creating an AVL directory"); + free(avl_dir_name); + return NULL; + } // Add in the list of open indexers obi_dms_list_indexer(dms, avl_group); @@ -1926,20 +2094,23 @@ OBIDMS_avl_group_p obi_clone_avl_group(OBIDMS_avl_group_p avl_group, const char* // Create the new AVL group new_avl_group = obi_create_avl_group(avl_group->dms, new_avl_name); - // Copy the data from each old AVL to the new ones - for (i=0; i<=(avl_group->last_avl_idx); i++) + // Create hard links to all the full AVLs that won't be modified: all but the last one + for (i=0; i<(avl_group->last_avl_idx); i++) { - if (i > 0) // Don't need to create the 1st AVL - { - if (add_new_avl_in_group(new_avl_group) < 0) - { - obi_close_avl_group(new_avl_group); - return NULL; - } - } - obi_clone_avl((avl_group->sub_avls)[i], (new_avl_group->sub_avls)[i]); + if (add_existing_avl_in_group(new_avl_group, avl_group, i) < 0) + return NULL; } + // Create the last AVL to copy data in it + if (add_new_avl_in_group(new_avl_group) < 0) + { + obi_close_avl_group(new_avl_group); + return NULL; + } + + // Copy the data from the last AVL to a new one that can be modified + obi_clone_avl((avl_group->sub_avls)[avl_group->last_avl_idx], (new_avl_group->sub_avls)[new_avl_group->last_avl_idx]); + // Close old AVL group if (obi_close_avl_group(avl_group) < 0) { @@ -1959,7 +2130,7 @@ int obi_close_avl(OBIDMS_avl_p avl) ret_val = truncate_avl_to_size_used(avl); - if (munmap(avl->tree, (((avl->header)->nb_items_max) * sizeof(AVL_node_t))) < 0) + if (munmap(avl->tree, (avl->header)->avl_size) < 0) { obi_set_errno(OBI_AVL_ERROR); obidebug(1, "\nError munmapping the tree of an AVL tree file"); @@ -1996,9 +2167,17 @@ int obi_close_avl_group(OBIDMS_avl_group_p avl_group) ret_val = obi_dms_unlist_indexer(avl_group->dms, avl_group); // Close each AVL of the group - for (i=0; i < (avl_group->last_avl_idx); i++) + for (i=0; i <= (avl_group->last_avl_idx); i++) + { + // Remap all but the last AVL (already mapped) before closing to truncate and close properly + if (i < (avl_group->last_avl_idx)) + { + if (remap_an_avl((avl_group->sub_avls)[i]) < 0) + ret_val = -1; + } if (obi_close_avl((avl_group->sub_avls)[i]) < 0) ret_val = -1; + } free(avl_group); } @@ -2207,6 +2386,16 @@ index_t obi_avl_group_add(OBIDMS_avl_group_p avl_group, Obi_blob_p value) index_t index_with_avl; int i; + // Create 1st AVL if group is empty + if (avl_group->last_avl_idx == -1) + { + if (add_new_avl_in_group(avl_group) < 0) + { + obidebug(1, "\nError creating the first AVL of an AVL group"); + return -1; + } + } + if (maybe_in_avl((avl_group->sub_avls)[avl_group->last_avl_idx], value)) { index_in_avl = (int32_t) obi_avl_find((avl_group->sub_avls)[avl_group->last_avl_idx], value); @@ -2218,6 +2407,7 @@ index_t obi_avl_group_add(OBIDMS_avl_group_p avl_group, Obi_blob_p value) return index_with_avl; } } + for (i=0; i < (avl_group->last_avl_idx); i++) { if (maybe_in_avl((avl_group->sub_avls)[i], value)) diff --git a/src/obiavl.h b/src/obiavl.h index 6b05d4e..e3655f1 100644 --- a/src/obiavl.h +++ b/src/obiavl.h @@ -73,7 +73,7 @@ typedef struct AVL_node { * @brief OBIDMS AVL tree data header structure. */ typedef struct OBIDMS_avl_data_header { - int header_size; /**< Size of the header in bytes. + size_t header_size; /**< Size of the header in bytes. */ index_t data_size_used; /**< Size of the data used in bytes. */ @@ -105,7 +105,7 @@ typedef struct OBIDMS_avl_data { * @brief OBIDMS AVL tree header structure. */ typedef struct OBIDMS_avl_header { - int header_size; /**< Size of the header in bytes. + size_t header_size; /**< Size of the header in bytes. */ size_t avl_size; /**< Size of the AVL tree in bytes. */ @@ -160,7 +160,7 @@ typedef struct OBIDMS_avl { typedef struct OBIDMS_avl_group { OBIDMS_avl_p sub_avls[MAX_NB_OF_AVLS_IN_GROUP]; /**< Array containing the pointers to the AVL trees of the group. */ - int last_avl_idx; /**< Index in the sub_avls array of the AVL tree currently being filled. + int last_avl_idx; /**< Index in the sub_avls array of the AVL tree currently being filled. */ char name[AVL_MAX_NAME+1]; /**< Base name of the AVL group. The AVL trees in it have names of the form basename_idx. */ diff --git a/src/obidmscolumn_qual.c b/src/obidmscolumn_qual.c index a789f64..3559524 100644 --- a/src/obidmscolumn_qual.c +++ b/src/obidmscolumn_qual.c @@ -33,6 +33,8 @@ int obi_column_set_obiqual_char_with_elt_idx(OBIDMS_column_p column, index_t lin int i; int ret_value; + // TODO NA + int_value_length = strlen(value); int_value = (uint8_t*) malloc(int_value_length * sizeof(uint8_t)); @@ -56,17 +58,24 @@ int obi_column_set_obiqual_int_with_elt_idx(OBIDMS_column_p column, index_t line if (obi_column_prepare_to_set_value(column, line_nb) < 0) return -1; + // TODO NA + // Add the value in the indexer idx = obi_index_uint8(column->indexer, value, value_length); if (idx == -1) // An error occurred { if (obi_errno == OBI_READ_ONLY_INDEXER_ERROR) { + // TODO PUT IN A COLUMN FUNCTION // If the error is that the indexer is read-only, clone it new_indexer_name = obi_build_indexer_name((column->header)->name, (column->header)->version); if (new_indexer_name == NULL) return -1; column->indexer = obi_clone_indexer(column->indexer, new_indexer_name); // TODO Need to lock this somehow? + strcpy((column->header)->indexer_name, new_indexer_name); + free(new_indexer_name); + obi_set_errno(0); + // Add the value in the new indexer idx = obi_index_uint8(column->indexer, value, value_length); if (idx == -1) @@ -91,6 +100,8 @@ char* obi_column_get_obiqual_char_with_elt_idx(OBIDMS_column_p column, index_t l int_value = obi_column_get_obiqual_int_with_elt_idx(column, line_nb, element_idx, &int_value_length); + // TODO NA + value = (char*) malloc((int_value_length + 1) * sizeof(char)); // Encode int quality to char quality diff --git a/src/obidmscolumn_qual.h b/src/obidmscolumn_qual.h index 9057983..90178de 100644 --- a/src/obidmscolumn_qual.h +++ b/src/obidmscolumn_qual.h @@ -86,7 +86,7 @@ int obi_column_set_obiqual_int_with_elt_idx(OBIDMS_column_p column, index_t line * @param element_idx The index of the element that should be recovered in the line. * * @returns The recovered value, in the character string format. - * @retval OBIQual_str_NA the NA value of the type if an error occurred and obi_errno is set. + * @retval OBIQual_char_NA the NA value of the type if an error occurred and obi_errno is set. * * @since May 2016 * @author Celine Mercier (celine.mercier@metabarcoding.org) @@ -172,7 +172,7 @@ int obi_column_set_obiqual_int_with_elt_name(OBIDMS_column_p column, index_t lin * @param element_name The name of the element that should be recovered in the line. * * @returns The recovered value, in the character string format. - * @retval OBIQual_str_NA the NA value of the type if an error occurred and obi_errno is set. + * @retval OBIQual_char_NA the NA value of the type if an error occurred and obi_errno is set. * * @since May 2016 * @author Celine Mercier (celine.mercier@metabarcoding.org) diff --git a/src/obidmscolumn_seq.c b/src/obidmscolumn_seq.c index c7daadf..7bfaacd 100644 --- a/src/obidmscolumn_seq.c +++ b/src/obidmscolumn_seq.c @@ -32,17 +32,24 @@ int obi_column_set_obiseq_with_elt_idx(OBIDMS_column_p column, index_t line_nb, if (obi_column_prepare_to_set_value(column, line_nb) < 0) return -1; + // TODO NA + // Add the value in the indexer idx = obi_index_dna_seq(column->indexer, value); if (idx == -1) // An error occurred { if (obi_errno == OBI_READ_ONLY_INDEXER_ERROR) { + // TODO PUT IN A COLUMN FUNCTION // If the error is that the indexer is read-only, clone it new_indexer_name = obi_build_indexer_name((column->header)->name, (column->header)->version); if (new_indexer_name == NULL) return -1; column->indexer = obi_clone_indexer(column->indexer, new_indexer_name); // TODO Need to lock this somehow? + strcpy((column->header)->indexer_name, new_indexer_name); + free(new_indexer_name); + obi_set_errno(0); + // Add the value in the new indexer idx = obi_index_dna_seq(column->indexer, value); if (idx == -1) diff --git a/src/obidmscolumn_str.c b/src/obidmscolumn_str.c index 111ab8c..81a006a 100644 --- a/src/obidmscolumn_str.c +++ b/src/obidmscolumn_str.c @@ -32,17 +32,24 @@ int obi_column_set_obistr_with_elt_idx(OBIDMS_column_p column, index_t line_nb, if (obi_column_prepare_to_set_value(column, line_nb) < 0) return -1; + // TODO NA + // Add the value in the indexer idx = obi_index_char_str(column->indexer, value); if (idx == -1) // An error occurred { if (obi_errno == OBI_READ_ONLY_INDEXER_ERROR) { + // TODO PUT IN A COLUMN FUNCTION // If the error is that the indexer is read-only, clone it new_indexer_name = obi_build_indexer_name((column->header)->name, (column->header)->version); if (new_indexer_name == NULL) return -1; column->indexer = obi_clone_indexer(column->indexer, new_indexer_name); // TODO Need to lock this somehow? + strcpy((column->header)->indexer_name, new_indexer_name); + free(new_indexer_name); + obi_set_errno(0); + // Add the value in the new indexer idx = obi_index_char_str(column->indexer, value); if (idx == -1) @@ -64,7 +71,7 @@ const char* obi_column_get_obistr_with_elt_idx(OBIDMS_column_p column, index_t l index_t idx; if (obi_column_prepare_to_get_value(column, line_nb) < 0) - return OBISeq_NA; + return OBIStr_NA; idx = *(((index_t*) (column->data)) + (line_nb * ((column->header)->nb_elements_per_line)) + element_idx);