Branch to refactor and debug (AVLs bugged)
This commit is contained in:
324
src/obiavl.h
324
src/obiavl.h
@ -25,41 +25,46 @@
|
||||
#include "obidms.h"
|
||||
#include "obitypes.h"
|
||||
#include "bloom.h"
|
||||
#include "utils.h"
|
||||
#include "encode.h"
|
||||
|
||||
|
||||
#define NODE_COUNT_PER_AVL (10000000)
|
||||
|
||||
#define BLOOM_FILTER_ERROR_RATE (0.001)
|
||||
|
||||
#define MAX_NB_OF_AVLS_IN_GROUP (100) /**< The maximum number of AVL trees in a group. // TODO discuss
|
||||
*/
|
||||
#define MAX_NODE_COUNT_PER_AVL (10000000) /**< The maximum number of nodes in an AVL tree.
|
||||
* Only used to decide when to create a new AVL in a group, and to initialize the bloom filter // TODO discuss.
|
||||
*/
|
||||
#define MAX_DATA_SIZE_PER_AVL (1073741824) /**< The maximum size of the data referred to by an AVL tree in a group.
|
||||
* Only used to decide when to create a new AVL in a group.
|
||||
* Should not be greater than int32_t max (2,147,483,647), as indexes will have to be stored on 32 bits.
|
||||
* Here 1073741824 B = 1 GB
|
||||
*/
|
||||
#define AVL_MAX_DEPTH (1024) /**< The maximum depth of an AVL tree. Used to save paths through the tree.
|
||||
*/
|
||||
#define AVL_MAX_NAME (1024) /**< The maximum length of an AVL tree name.
|
||||
*/
|
||||
#define AVL_GROWTH_FACTOR (2) /**< The growth factor when an AVL tree is enlarged.
|
||||
*/
|
||||
#define AVL_MAX_DEPTH (1000) /**< The maximum depth of an AVL tree.
|
||||
*/
|
||||
#define LEFT_CHILD(node) (avl->tree)+(node->left_child) /**< Pointer to the left child of a node in an AVL tree.
|
||||
*/
|
||||
#define RIGHT_CHILD(node) (avl->tree)+(node->right_child) /**< Pointer to the right child of a node in an AVL tree.
|
||||
*/
|
||||
#define BYTE_ARRAY_HEADER_SIZE (9) /**< The size of the header of a byte array.
|
||||
*/
|
||||
|
||||
typedef struct bloom bloom_t;
|
||||
|
||||
|
||||
/**
|
||||
* @brief AVL tree node structure.
|
||||
*/
|
||||
typedef struct AVL_node {
|
||||
index_t left_child; /**< Index of left less child node.
|
||||
*/
|
||||
index_t right_child; /**< Index of right greater child node.
|
||||
*/
|
||||
int8_t balance_factor; /**< Balance factor of the node.
|
||||
*/
|
||||
index_t value; /**< Index of the value associated with the node in the data array.
|
||||
*/
|
||||
uint64_t crc64; // TODO
|
||||
index_t left_child; /**< Index of left less child node.
|
||||
*/
|
||||
index_t right_child; /**< Index of right greater child node.
|
||||
*/
|
||||
int8_t balance_factor; /**< Balance factor of the node.
|
||||
*/
|
||||
index_t value; /**< Index of the value associated with the node in the data array.
|
||||
*/
|
||||
uint64_t crc64; /**< Cyclic Redundancy Check code on 64 bits associated with the value.
|
||||
*/
|
||||
} AVL_node_t, *AVL_node_p;
|
||||
|
||||
|
||||
@ -88,8 +93,10 @@ typedef struct OBIDMS_avl_data_header {
|
||||
typedef struct OBIDMS_avl_data {
|
||||
OBIDMS_avl_data_header_p header; /**< A pointer to the header of the AVL tree data.
|
||||
*/
|
||||
byte_t* data; /**< A pointer to the beginning of the data.
|
||||
byte_t* data; /**< A pointer to the beginning of the data.
|
||||
*/
|
||||
int data_fd; /**< File descriptor of the file containing the data.
|
||||
*/
|
||||
} OBIDMS_avl_data_t, *OBIDMS_avl_data_p;
|
||||
|
||||
|
||||
@ -111,7 +118,9 @@ typedef struct OBIDMS_avl_header {
|
||||
*/
|
||||
time_t creation_date; /**< Date of creation of the file.
|
||||
*/
|
||||
bloom_t bloom_filter;
|
||||
bloom_t bloom_filter; /**< Bloom filter associated with the AVL tree, enabling to know if a value
|
||||
* might already be stored in the data associated with the tree.
|
||||
*/
|
||||
} OBIDMS_avl_header_t, *OBIDMS_avl_header_p;
|
||||
|
||||
|
||||
@ -119,30 +128,30 @@ typedef struct OBIDMS_avl_header {
|
||||
* @brief OBIDMS AVL tree structure.
|
||||
*/
|
||||
typedef struct OBIDMS_avl {
|
||||
OBIDMS_p dms; /**< A pointer to the OBIDMS structure to which the AVL tree belongs.
|
||||
*/
|
||||
OBIDMS_avl_header_p header; /**< A pointer to the header of the AVL tree.
|
||||
*/
|
||||
struct AVL_node* tree; /**< A pointer to the root of the AVL tree.
|
||||
*/
|
||||
OBIDMS_p dms; /**< A pointer to the OBIDMS structure to which the AVL tree belongs.
|
||||
*/
|
||||
OBIDMS_avl_header_p header; /**< A pointer to the header of the AVL tree.
|
||||
*/
|
||||
struct AVL_node* tree; /**< A pointer to the root of the AVL tree.
|
||||
*/
|
||||
index_t path_idx[AVL_MAX_DEPTH]; /**< The path taken to a node from the root as an array of node indices.
|
||||
*/
|
||||
int8_t path_dir[AVL_MAX_DEPTH]; /**< The path taken to a node from the root as an array of directions
|
||||
* (0 for left, -1 for right).
|
||||
* (0 for left, -1 for right).
|
||||
*/
|
||||
OBIDMS_avl_data_p data; /**< A pointer to the structure containing the data
|
||||
* that the AVL tree references.
|
||||
*/
|
||||
DIR* directory; /**< A directory entry usable to
|
||||
* refer and scan the AVL tree directory.
|
||||
*/
|
||||
int dir_fd; /**< The file descriptor of the directory entry
|
||||
* usable to refer and scan the AVL tree directory.
|
||||
*/
|
||||
size_t counter; /**< Indicates by how many threads/programs (TODO) the AVL tree is used.
|
||||
*/
|
||||
int avl_fd;
|
||||
int data_fd;
|
||||
OBIDMS_avl_data_p data; /**< A pointer to the structure containing the data
|
||||
* that the AVL tree references.
|
||||
*/
|
||||
DIR* directory; /**< A directory entry usable to
|
||||
* refer and scan the AVL tree directory.
|
||||
*/
|
||||
int dir_fd; /**< The file descriptor of the directory entry
|
||||
* usable to refer and scan the AVL tree directory.
|
||||
*/
|
||||
int avl_fd; /**< The file descriptor of the file containing the AVL tree.
|
||||
*/
|
||||
size_t counter; /**< Indicates by how many threads/programs (TODO) the AVL tree is used.
|
||||
*/
|
||||
} OBIDMS_avl_t, *OBIDMS_avl_p;
|
||||
|
||||
|
||||
@ -150,28 +159,26 @@ typedef struct OBIDMS_avl {
|
||||
* @brief OBIDMS AVL tree group structure.
|
||||
*/
|
||||
typedef struct OBIDMS_avl_group {
|
||||
// TODO put each group in a directory later
|
||||
OBIDMS_avl_p sub_avls[64]; // TODO macro for max
|
||||
int current_avl_idx;
|
||||
char avl_name[AVL_MAX_NAME+1];
|
||||
OBIDMS_p dms;
|
||||
OBIDMS_avl_p sub_avls[MAX_NB_OF_AVLS_IN_GROUP]; /**< Array containing the pointers to the AVL trees of the group.
|
||||
*/
|
||||
int current_avl_idx; /**< Index in the sub_avls array of the AVL tree currently being filled.
|
||||
*/
|
||||
char avl_name[AVL_MAX_NAME+1]; /**< Base name of the AVL group. The AVL trees in it have names of the form basename_idx.
|
||||
*/
|
||||
OBIDMS_p dms; /**< Pointer to the OBIDMS structure to which the AVL group belongs.
|
||||
*/
|
||||
} OBIDMS_avl_group_t, *OBIDMS_avl_group_p;
|
||||
|
||||
|
||||
OBIDMS_avl_group_p obi_create_avl_group(OBIDMS_p dms, const char* avl_name);
|
||||
index_t insert_in_avl_group(OBIDMS_avl_group_p avl_group, byte_t* value);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @brief Checks if an AVL tree already exists or not.
|
||||
* @brief Checks if an AVL tree or AVL tree group already exists or not.
|
||||
*
|
||||
* @param dms The OBIDMS to which the AVL tree belongs.
|
||||
* @param avl_name The name of the AVL tree.
|
||||
* @param dms The OBIDMS to which the AVL tree or AVL tree group belongs.
|
||||
* @param avl_name The name of the AVL treeor the base name of the AVL tree group.
|
||||
*
|
||||
* @returns A value indicating whether the AVL tree exists or not.
|
||||
* @retval 1 if the AVL tree exists.
|
||||
* @retval 0 if the AVL tree does not exist.
|
||||
* @returns A value indicating whether the AVL tree or AVL tree group exists or not.
|
||||
* @retval 1 if the AVL tree or AVL tree group exists.
|
||||
* @retval 0 if the AVL tree or AVL tree group does not exist.
|
||||
* @retval -1 if an error occurred.
|
||||
*
|
||||
* @since December 2015
|
||||
@ -180,36 +187,19 @@ index_t insert_in_avl_group(OBIDMS_avl_group_p avl_group, byte_t* value);
|
||||
int obi_avl_exists(OBIDMS_p dms, const char* avl_name);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Opens an AVL tree and creates it if it does not already exist.
|
||||
*
|
||||
* Note: An AVL tree is made of two files (referred to by two structures).
|
||||
* One file contains the indices referring to the data, and the other
|
||||
* file contains the data itself. The AVL tree as a whole is referred
|
||||
* to via the OBIDMS_avl structure.
|
||||
*
|
||||
* @param dms The OBIDMS to which the AVL tree belongs.
|
||||
* @param avl_name The name of the AVL tree.
|
||||
*
|
||||
* @returns A pointer to the AVL tree structure.
|
||||
* @retval NULL if an error occurred.
|
||||
*
|
||||
* @since December 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
OBIDMS_avl_p obi_avl(OBIDMS_p dms, const char* avl_name);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Creates an AVL tree. Fails if it already exists.
|
||||
*
|
||||
* Note: An AVL tree is made of two files (referred to by two structures).
|
||||
* One file contains the indices referring to the data, and the other
|
||||
* file contains the data itself. The AVL tree as a whole is referred
|
||||
* to via the OBIDMS_avl structure.
|
||||
* to via the OBIDMS_avl structure. An AVL tree is stored in a directory
|
||||
* with the same name, or with the base name of the AVL group if it is
|
||||
* part of an AVL group.
|
||||
*
|
||||
* @param dms The OBIDMS to which the AVL tree belongs.
|
||||
* @param avl_name The name of the AVL tree.
|
||||
* @param avl_idx The index of the AVL tree if it is part of an AVL group.
|
||||
*
|
||||
* @returns A pointer to the newly created AVL tree structure.
|
||||
* @retval NULL if an error occurred.
|
||||
@ -217,7 +207,7 @@ OBIDMS_avl_p obi_avl(OBIDMS_p dms, const char* avl_name);
|
||||
* @since December 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name);
|
||||
OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name, int avl_idx);
|
||||
|
||||
|
||||
/**
|
||||
@ -230,6 +220,7 @@ OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name);
|
||||
*
|
||||
* @param dms The OBIDMS to which the AVL tree belongs.
|
||||
* @param avl_name The name of the AVL tree.
|
||||
* @param avl_idx The index of the AVL tree if it is part of an AVL group.
|
||||
*
|
||||
* @returns A pointer to the AVL tree structure.
|
||||
* @retval NULL if an error occurred.
|
||||
@ -237,17 +228,66 @@ OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name);
|
||||
* @since December 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name);
|
||||
OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name, int avl_idx);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Opens an AVL tree group and creates it if it does not already exist.
|
||||
*
|
||||
* Note: An AVL tree group is composed of multiple AVL trees that all have the
|
||||
* same base name, and an index differentiating them.
|
||||
*
|
||||
* @param dms The OBIDMS to which the AVL tree belongs.
|
||||
* @param avl_name The base name of the AVL tree group.
|
||||
*
|
||||
* @returns A pointer to the AVL tree group structure.
|
||||
* @retval NULL if an error occurred.
|
||||
*
|
||||
* @since April 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
OBIDMS_avl_group_p obi_avl_group(OBIDMS_p dms, const char* avl_name);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Creates an AVL tree group.
|
||||
*
|
||||
* Note: An AVL tree group is composed of multiple AVL trees that all have the
|
||||
* same base name, and an index differentiating them.
|
||||
*
|
||||
* @param dms The OBIDMS to which the AVL tree belongs.
|
||||
* @param avl_name The base name of the AVL tree group.
|
||||
*
|
||||
* @returns A pointer to the AVL tree group structure.
|
||||
* @retval NULL if an error occurred.
|
||||
*
|
||||
* @since April 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
OBIDMS_avl_group_p obi_create_avl_group(OBIDMS_p dms, const char* avl_name);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Opens an AVL tree group.
|
||||
*
|
||||
* Note: An AVL tree group is composed of multiple AVL trees that all have the
|
||||
* same base name, and an index differentiating them.
|
||||
*
|
||||
* @param dms The OBIDMS to which the AVL tree belongs.
|
||||
* @param avl_name The base name of the AVL tree group.
|
||||
*
|
||||
* @returns A pointer to the AVL tree group structure.
|
||||
* @retval NULL if an error occurred.
|
||||
*
|
||||
* @since April 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
OBIDMS_avl_group_p obi_open_avl_group(OBIDMS_p dms, const char* avl_name);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Closes an AVL tree.
|
||||
*
|
||||
* Note: An AVL tree is made of two files (referred to by two structures).
|
||||
* One file contains the indices referring to the data, and the other
|
||||
* file contains the data itself. The AVL tree as a whole is referred
|
||||
* to via the OBIDMS_avl structure.
|
||||
*
|
||||
* @param avl A pointer to the AVL tree structure to close and free.
|
||||
*
|
||||
* @retval 0 if the operation was successfully completed.
|
||||
@ -260,26 +300,56 @@ int obi_close_avl(OBIDMS_avl_p avl);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Adds a value (byte array) in an AVL tree, checking if it is already in it.
|
||||
* @brief Closes an AVL tree group.
|
||||
*
|
||||
* @warning The byte array to add must already be encoded and contain its header.
|
||||
* @param avl_group A pointer to the AVL tree group structure to close and free.
|
||||
*
|
||||
* @retval 0 if the operation was successfully completed.
|
||||
* @retval -1 if an error occurred.
|
||||
*
|
||||
* @since April 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int obi_close_avl_group(OBIDMS_avl_group_p avl_group);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Recovers a value (byte array) in an AVL tree.
|
||||
*
|
||||
* @warning The byte array recovered must be decoded to get the original value.
|
||||
*
|
||||
* @param avl A pointer to the AVL tree.
|
||||
* @param index The index of the value in the data array.
|
||||
*
|
||||
* @returns A pointer to the byte array recovered.
|
||||
*
|
||||
* @since December 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
Obi_byte_array_p obi_avl_get(OBIDMS_avl_p avl, index_t index);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Adds a value (byte array) in an AVL tree NOT checking first if it is already in it. // TODO to discuss
|
||||
*
|
||||
* @warning The value given must be already be encoded into a byte array structure (Obi_byte_array_t).
|
||||
*
|
||||
* @param avl A pointer to the AVL tree.
|
||||
* @param value The byte array to add in the AVL tree.
|
||||
*
|
||||
* @returns The index of the value, whether it was added or already in the AVL tree.
|
||||
* @returns The index of the value newly added in the AVL tree.
|
||||
* @retval -1 if an error occurred.
|
||||
*
|
||||
* @since December 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
index_t obi_avl_add(OBIDMS_avl_p avl, byte_t* value);
|
||||
index_t obi_avl_add(OBIDMS_avl_p avl, Obi_byte_array_p value);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Finds a value (byte array) in an AVL tree, checking first if it is already in it.
|
||||
* @brief Finds a value (byte array) in an AVL tree.
|
||||
*
|
||||
* @warning The byte array to add must already be encoded and contain its header.
|
||||
* @warning The value given must be already be encoded into a byte array structure (Obi_byte_array_t).
|
||||
*
|
||||
* @param avl A pointer to the AVL tree.
|
||||
* @param value The byte array to add in the AVL tree.
|
||||
@ -290,86 +360,40 @@ index_t obi_avl_add(OBIDMS_avl_p avl, byte_t* value);
|
||||
* @since December 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
index_t obi_avl_find(OBIDMS_avl_p avl, byte_t* value);
|
||||
index_t obi_avl_find(OBIDMS_avl_p avl, Obi_byte_array_p value);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Recovers a value (byte array) in an AVL tree.
|
||||
*
|
||||
* @warning The byte array recovered is encoded and contains its header.
|
||||
* @warning The byte array recovered must be decoded to get the original value.
|
||||
*
|
||||
* @param avl A pointer to the AVL tree.
|
||||
* @param avl_group A pointer to the AVL tree.
|
||||
* @param index The index of the value in the data array.
|
||||
*
|
||||
* @returns A pointer to the byte array recovered.
|
||||
*
|
||||
* @since December 2015
|
||||
* @since April 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
byte_t* obi_avl_get(OBIDMS_avl_p avl, index_t index);
|
||||
Obi_byte_array_p obi_avl_group_get(OBIDMS_avl_group_p avl_group, index_t idx);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Converts a character string to a byte array with a header.
|
||||
* @brief Adds a value (byte array) in an AVL tree group, checking if it is already in it.
|
||||
*
|
||||
* @warning The byte array must be freed by the caller.
|
||||
* @warning The value given must be already be encoded into a byte array structure (Obi_byte_array_t).
|
||||
*
|
||||
* @param value The character string to convert.
|
||||
* @param avl_group A pointer to the AVL tree group.
|
||||
* @param value The byte array to add in the AVL tree group.
|
||||
*
|
||||
* @returns A pointer to the byte array created.
|
||||
* @retval NULL if an error occurred.
|
||||
* @returns The index of the value newly added in the AVL tree group.
|
||||
* @retval -1 if an error occurred.
|
||||
*
|
||||
* @since October 2015
|
||||
* @since April 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
byte_t* obi_str_to_obibytes(char* value);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Converts a byte array to a character string.
|
||||
*
|
||||
* @param value_b The byte array to convert.
|
||||
*
|
||||
* @returns A pointer to the character string contained in the byte array.
|
||||
*
|
||||
* @since October 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
const char* obi_obibytes_to_str(byte_t* value_b);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Converts a DNA sequence to a byte array with a header.
|
||||
*
|
||||
* @warning The byte array must be freed by the caller.
|
||||
*
|
||||
* @param value The DNA sequence to convert.
|
||||
*
|
||||
* @returns A pointer to the byte array created.
|
||||
* @retval NULL if an error occurred.
|
||||
*
|
||||
* @since November 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
byte_t* obi_seq_to_obibytes(char* seq);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Converts a byte array to a DNA sequence.
|
||||
*
|
||||
* @param value_b The byte array to convert.
|
||||
*
|
||||
* @returns A pointer to the DNA sequence contained in the byte array.
|
||||
* @retval NULL if an error occurred.
|
||||
*
|
||||
* @since November 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
const char* obi_obibytes_to_seq(byte_t* value_b); // TODO move to encode source files
|
||||
|
||||
|
||||
// TODO
|
||||
byte_t* obi_avl_group_get(OBIDMS_avl_group_p avl_group, index_t idx);
|
||||
index_t obi_avl_group_add(OBIDMS_avl_group_p avl_group, Obi_byte_array_p value);
|
||||
|
||||
|
||||
#endif /* OBIAVL_H_ */
|
||||
|
Reference in New Issue
Block a user