/**************************************************************************** * OBIDMS AVL tree header file * ****************************************************************************/ /** * @file obiavl.h * @author Celine Mercier * @date December 3rd 2015 * @brief Header file for handling AVL trees for storing and retrieving byte arrays (i.e. coding for character strings). */ #ifndef OBIAVL_H_ #define OBIAVL_H_ #include #include #include #include #include #include #include #include "obidms.h" #include "obitypes.h" #include "bloom.h" #define NB_OF_AVLS (64) #define MASK (63) #define AVL_MAX_NAME (1024) /**< The maximum length of an AVL tree name. */ #define AVL_GROWTH_FACTOR (2) /**< The growth factor when an AVL tree is enlarged. */ #define AVL_MAX_DEPTH (1000) /**< The maximum depth of an AVL tree. */ #define LEFT_CHILD(node) (avl->tree)+(node->left_child) /**< Pointer to the left child of a node in an AVL tree. */ #define RIGHT_CHILD(node) (avl->tree)+(node->right_child) /**< Pointer to the right child of a node in an AVL tree. */ #define BYTE_ARRAY_HEADER_SIZE (9) /**< The size of the header of a byte array. */ typedef struct bloom bloom_t; /** * @brief AVL tree node structure. */ typedef struct AVL_node { index_t left_child; /**< Index of left less child node. */ index_t right_child; /**< Index of right greater child node. */ int8_t balance_factor; /**< Balance factor of the node. */ index_t value; /**< Index of the value associated with the node in the data array. */ } AVL_node_t, *AVL_node_p; /** * @brief OBIDMS AVL tree data header structure. */ typedef struct OBIDMS_avl_data_header { int header_size; /**< Size of the header in bytes. */ index_t data_size_used; /**< Size of the data used in bytes. */ index_t data_size_max; /**< Max size of the data in bytes. */ index_t nb_items; /**< Number of items. */ char avl_name[AVL_MAX_NAME+1]; /**< The AVL tree name as a NULL terminated string. */ time_t creation_date; /**< Date of creation of the file. */ } OBIDMS_avl_data_header_t, *OBIDMS_avl_data_header_p; /** * @brief OBIDMS AVL tree data structure. */ typedef struct OBIDMS_avl_data { OBIDMS_avl_data_header_p header; /**< A pointer to the header of the AVL tree data. */ byte_t* data; /**< A pointer to the beginning of the data. */ } OBIDMS_avl_data_t, *OBIDMS_avl_data_p; /** * @brief OBIDMS AVL tree header structure. */ typedef struct OBIDMS_avl_header { int header_size; /**< Size of the header in bytes. */ size_t avl_size; /**< Size of the AVL tree in bytes. */ index_t nb_items; /**< Number of items in the AVL tree. */ index_t nb_items_max; /**< Maximum number of items in the AVL tree before it has to be enlarged. */ index_t root_idx; /**< Index of the root of the AVL tree. */ char avl_name[AVL_MAX_NAME+1]; /**< The AVL tree name as a NULL terminated string. */ time_t creation_date; /**< Date of creation of the file. */ bloom_t bloom_filter; } OBIDMS_avl_header_t, *OBIDMS_avl_header_p; /** * @brief OBIDMS AVL tree structure. */ typedef struct OBIDMS_avl { OBIDMS_p dms; /**< A pointer to the OBIDMS structure to which the AVL tree belongs. */ OBIDMS_avl_header_p header; /**< A pointer to the header of the AVL tree. */ struct AVL_node* tree; /**< A pointer to the root of the AVL tree. */ index_t path_idx[AVL_MAX_DEPTH]; /**< The path taken to a node from the root as an array of node indices. */ int8_t path_dir[AVL_MAX_DEPTH]; /**< The path taken to a node from the root as an array of directions * (0 for left, -1 for right). */ OBIDMS_avl_data_p data; /**< A pointer to the structure containing the data * that the AVL tree references. */ DIR* directory; /**< A directory entry usable to * refer and scan the AVL tree directory. */ int dir_fd; /**< The file descriptor of the directory entry * usable to refer and scan the AVL tree directory. */ size_t counter; /**< Indicates by how many threads/programs (TODO) the AVL tree is used. */ int avl_fd; int data_fd; } OBIDMS_avl_t, *OBIDMS_avl_p; /** * @brief OBIDMS AVL tree group structure. */ typedef struct OBIDMS_avl_group { // TODO put each group in a directory later OBIDMS_avl_p sub_avls[64]; // TODO macro for max int current_avl_idx; char avl_name[AVL_MAX_NAME+1]; OBIDMS_p dms; } OBIDMS_avl_group_t, *OBIDMS_avl_group_p; OBIDMS_avl_group_p obi_create_avl_group(OBIDMS_p dms, const char* avl_name); index_t insert_in_avl_group(OBIDMS_avl_group_p avl_group, byte_t* value); /** * @brief Checks if an AVL tree already exists or not. * * @param dms The OBIDMS to which the AVL tree belongs. * @param avl_name The name of the AVL tree. * * @returns A value indicating whether the AVL tree exists or not. * @retval 1 if the AVL tree exists. * @retval 0 if the AVL tree does not exist. * @retval -1 if an error occurred. * * @since December 2015 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ int obi_avl_exists(OBIDMS_p dms, const char* avl_name); /** * @brief Opens an AVL tree and creates it if it does not already exist. * * Note: An AVL tree is made of two files (referred to by two structures). * One file contains the indices referring to the data, and the other * file contains the data itself. The AVL tree as a whole is referred * to via the OBIDMS_avl structure. * * @param dms The OBIDMS to which the AVL tree belongs. * @param avl_name The name of the AVL tree. * * @returns A pointer to the AVL tree structure. * @retval NULL if an error occurred. * * @since December 2015 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ OBIDMS_avl_p obi_avl(OBIDMS_p dms, const char* avl_name); /** * @brief Creates an AVL tree. Fails if it already exists. * * Note: An AVL tree is made of two files (referred to by two structures). * One file contains the indices referring to the data, and the other * file contains the data itself. The AVL tree as a whole is referred * to via the OBIDMS_avl structure. * * @param dms The OBIDMS to which the AVL tree belongs. * @param avl_name The name of the AVL tree. * * @returns A pointer to the newly created AVL tree structure. * @retval NULL if an error occurred. * * @since December 2015 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name); /** * @brief Opens an AVL tree. Fails if it does not already exist. * * Note: An AVL tree is made of two files (referred to by two structures). * One file contains the indices referring to the data, and the other * file contains the data itself. The AVL tree as a whole is referred * to via the OBIDMS_avl structure. * * @param dms The OBIDMS to which the AVL tree belongs. * @param avl_name The name of the AVL tree. * * @returns A pointer to the AVL tree structure. * @retval NULL if an error occurred. * * @since December 2015 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name); /** * @brief Closes an AVL tree. * * Note: An AVL tree is made of two files (referred to by two structures). * One file contains the indices referring to the data, and the other * file contains the data itself. The AVL tree as a whole is referred * to via the OBIDMS_avl structure. * * @param avl A pointer to the AVL tree structure to close and free. * * @retval 0 if the operation was successfully completed. * @retval -1 if an error occurred. * * @since December 2015 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ int obi_close_avl(OBIDMS_avl_p avl); /** * @brief Adds a value (byte array) in an AVL tree, checking if it is already in it. * * @warning The byte array to add must already be encoded and contain its header. * * @param avl A pointer to the AVL tree. * @param value The byte array to add in the AVL tree. * * @returns The index of the value, whether it was added or already in the AVL tree. * @retval -1 if an error occurred. * * @since December 2015 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ index_t obi_avl_add(OBIDMS_avl_p avl, byte_t* value); /** * @brief Finds a value (byte array) in an AVL tree, checking first if it is already in it. * * @warning The byte array to add must already be encoded and contain its header. * * @param avl A pointer to the AVL tree. * @param value The byte array to add in the AVL tree. * * @returns The data index of the value. * @retval -1 if the value is not in the tree. * * @since December 2015 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ index_t obi_avl_find(OBIDMS_avl_p avl, byte_t* value); /** * @brief Recovers a value (byte array) in an AVL tree. * * @warning The byte array recovered is encoded and contains its header. * * @param avl A pointer to the AVL tree. * @param index The index of the value in the data array. * * @returns A pointer to the byte array recovered. * * @since December 2015 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ byte_t* obi_avl_get(OBIDMS_avl_p avl, index_t index); /** * @brief Converts a character string to a byte array with a header. * * @warning The byte array must be freed by the caller. * * @param value The character string to convert. * * @returns A pointer to the byte array created. * @retval NULL if an error occurred. * * @since October 2015 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ byte_t* obi_str_to_obibytes(char* value); /** * @brief Converts a byte array to a character string. * * @param value_b The byte array to convert. * * @returns A pointer to the character string contained in the byte array. * * @since October 2015 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ const char* obi_obibytes_to_str(byte_t* value_b); /** * @brief Converts a DNA sequence to a byte array with a header. * * @warning The byte array must be freed by the caller. * * @param value The DNA sequence to convert. * * @returns A pointer to the byte array created. * @retval NULL if an error occurred. * * @since November 2015 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ byte_t* obi_seq_to_obibytes(char* seq); /** * @brief Converts a byte array to a DNA sequence. * * @param value_b The byte array to convert. * * @returns A pointer to the DNA sequence contained in the byte array. * @retval NULL if an error occurred. * * @since November 2015 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ const char* obi_obibytes_to_seq(byte_t* value_b); #endif /* OBIAVL_H_ */