diff --git a/python/obitools3/obidms/_obidms.cfiles b/python/obitools3/obidms/_obidms.cfiles index a141e81..1428221 100644 --- a/python/obitools3/obidms/_obidms.cfiles +++ b/python/obitools3/obidms/_obidms.cfiles @@ -12,7 +12,7 @@ ../../../src/obitypes.c ../../../src/private_at_functions.h ../../../src/private_at_functions.c -../../../src/obiarray.h -../../../src/obiarray.c +../../../src/obiavl.h +../../../src/obiavl.c ../../../src/encode.h ../../../src/encode.c \ No newline at end of file diff --git a/python/obitools3/obidms/_obidms.pxd b/python/obitools3/obidms/_obidms.pxd index 0e2fbda..7093304 100644 --- a/python/obitools3/obidms/_obidms.pxd +++ b/python/obitools3/obidms/_obidms.pxd @@ -25,7 +25,7 @@ cdef class OBIDMS: index_t nb_lines=*, index_t nb_elements_per_line=*, list elements_names=*, - str array_name=*, + str avl_name=*, str comments=*) diff --git a/python/obitools3/obidms/_obidms.pyx b/python/obitools3/obidms/_obidms.pyx index 8880e0a..3478d6c 100644 --- a/python/obitools3/obidms/_obidms.pyx +++ b/python/obitools3/obidms/_obidms.pyx @@ -133,7 +133,7 @@ cdef class OBIDMS : index_t nb_lines=0, index_t nb_elements_per_line=0, list elements_names=None, - str array_name="default_obiarray", + str avl_name="default_AVL_tree", str comments=""): # Declarations @@ -263,7 +263,7 @@ cdef class OBIDMS : referring, version_number, data_type, nb_lines, nb_elements_per_line, - elements_names, array_name, + elements_names, avl_name, comments) return column @@ -284,13 +284,13 @@ cdef class OBIDMS_column : index_t nb_lines, index_t nb_elements_per_line, list elements_names, - str array_name, + str avl_name, str comments): # Declarations cdef bytes column_name_b cdef bytes dms_name_b - cdef bytes array_name_b + cdef bytes avl_name_b cdef bytes elements_names_b cdef bytes comments_b @@ -304,7 +304,7 @@ cdef class OBIDMS_column : # Format the character strings to send them to C functions column_name_b = str2bytes(column_name) dms_name_b = str2bytes(self.dms.dms_name) - array_name_b = str2bytes(array_name) + avl_name_b = str2bytes(avl_name) comments_b = str2bytes(comments) # Create, clone or open column @@ -315,7 +315,7 @@ cdef class OBIDMS_column : elements_names_b = str2bytes(";".join(elements_names)) self.pointer = obi_create_column(self.dms.pointer, column_name_b, type, nb_lines, nb_elements_per_line, - elements_names_b, array_name_b, comments_b, + elements_names_b, avl_name_b, comments_b, referring) else : if clone : diff --git a/python/obitools3/obidms/_obidmscolumn_bool.cfiles b/python/obitools3/obidms/_obidmscolumn_bool.cfiles index 04bda7a..c96979c 100644 --- a/python/obitools3/obidms/_obidmscolumn_bool.cfiles +++ b/python/obitools3/obidms/_obidmscolumn_bool.cfiles @@ -14,5 +14,5 @@ ../../../src/obitypes.c ../../../src/private_at_functions.h ../../../src/private_at_functions.c -../../../src/obiarray.h -../../../src/obiarray.c +../../../src/obiavl.h +../../../src/obiavl.c diff --git a/python/obitools3/obidms/_obidmscolumn_char.cfiles b/python/obitools3/obidms/_obidmscolumn_char.cfiles index dc88b30..25f9a80 100644 --- a/python/obitools3/obidms/_obidmscolumn_char.cfiles +++ b/python/obitools3/obidms/_obidmscolumn_char.cfiles @@ -14,5 +14,5 @@ ../../../src/obitypes.c ../../../src/private_at_functions.h ../../../src/private_at_functions.c -../../../src/obiarray.h -../../../src/obiarray.c +../../../src/obiavl.h +../../../src/obiavl.c diff --git a/python/obitools3/obidms/_obidmscolumn_float.cfiles b/python/obitools3/obidms/_obidmscolumn_float.cfiles index 7a6df19..3878e83 100644 --- a/python/obitools3/obidms/_obidmscolumn_float.cfiles +++ b/python/obitools3/obidms/_obidmscolumn_float.cfiles @@ -14,5 +14,5 @@ ../../../src/obitypes.c ../../../src/private_at_functions.h ../../../src/private_at_functions.c -../../../src/obiarray.h -../../../src/obiarray.c +../../../src/obiavl.h +../../../src/obiavl.c diff --git a/python/obitools3/obidms/_obidmscolumn_int.cfiles b/python/obitools3/obidms/_obidmscolumn_int.cfiles index ab45f82..6a303aa 100644 --- a/python/obitools3/obidms/_obidmscolumn_int.cfiles +++ b/python/obitools3/obidms/_obidmscolumn_int.cfiles @@ -14,5 +14,5 @@ ../../../src/obitypes.c ../../../src/private_at_functions.h ../../../src/private_at_functions.c -../../../src/obiarray.h -../../../src/obiarray.c +../../../src/obiavl.h +../../../src/obiavl.c diff --git a/python/obitools3/obidms/_obidmscolumn_seq.cfiles b/python/obitools3/obidms/_obidmscolumn_seq.cfiles index b8a9119..857c9dc 100644 --- a/python/obitools3/obidms/_obidmscolumn_seq.cfiles +++ b/python/obitools3/obidms/_obidmscolumn_seq.cfiles @@ -14,5 +14,5 @@ ../../../src/obitypes.c ../../../src/private_at_functions.h ../../../src/private_at_functions.c -../../../src/obiarray.h -../../../src/obiarray.c +../../../src/obiavl.h +../../../src/obiavl.c diff --git a/python/obitools3/obidms/_obidmscolumn_str.cfiles b/python/obitools3/obidms/_obidmscolumn_str.cfiles index 7875117..9f7af7b 100644 --- a/python/obitools3/obidms/_obidmscolumn_str.cfiles +++ b/python/obitools3/obidms/_obidmscolumn_str.cfiles @@ -14,5 +14,5 @@ ../../../src/obitypes.c ../../../src/private_at_functions.h ../../../src/private_at_functions.c -../../../src/obiarray.h -../../../src/obiarray.c +../../../src/obiavl.h +../../../src/obiavl.c diff --git a/python/obitools3/obidms/capi/obidmscolumn.pxd b/python/obitools3/obidms/capi/obidmscolumn.pxd index 4c13e61..d3bae82 100644 --- a/python/obitools3/obidms/capi/obidmscolumn.pxd +++ b/python/obitools3/obidms/capi/obidmscolumn.pxd @@ -30,7 +30,7 @@ cdef extern from "obidmscolumn.h" nogil: bint referring obiversion_t referred_column_version const_char_p name - const_char_p array_name + const_char_p avl_name const_char_p comments ctypedef OBIDMS_column_header_t* OBIDMS_column_header_p @@ -47,7 +47,7 @@ cdef extern from "obidmscolumn.h" nogil: index_t nb_lines, index_t nb_elements_per_line, const_char_p elements_names, - const_char_p array_name, + const_char_p avl_name, const_char_p comments, bint referring) diff --git a/src/encode.c b/src/encode.c index 4d437e7..f145e6c 100644 --- a/src/encode.c +++ b/src/encode.c @@ -16,7 +16,8 @@ #include #include "encode.h" -#include "obiarray.h" +#include "obierrno.h" +#include "obitypes.h" // For byte_t type #include "obidebug.h" diff --git a/src/encode.h b/src/encode.h index 9bb7954..83093e6 100644 --- a/src/encode.h +++ b/src/encode.h @@ -15,7 +15,7 @@ #include #include -#include "obiarray.h" +#include "obitypes.h" #define NUC_MASK_2B 0x3 /**< Binary: 11 to use when decoding 2 bits sequences */ diff --git a/src/obiarray.c b/src/obiarray.c deleted file mode 100644 index af2ebcc..0000000 --- a/src/obiarray.c +++ /dev/null @@ -1,1279 +0,0 @@ -/**************************************************************************** - * OBIDMS array functions * - ****************************************************************************/ - -/** - * @file obiarray.c - * @author Celine Mercier - * @date October 26th 2015 - * @brief Functions handling arrays for storing and retrieving bit arrays. - */ - - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "obiarray.h" -#include "obierrno.h" -#include "obitypes.h" -#include "obidebug.h" -#include "private_at_functions.h" -#include "encode.h" - - -#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?) - - -/************************************************************************** - * - * D E C L A R A T I O N O F T H E P R I V A T E F U N C T I O N S - * - **************************************************************************/ - -/** - * @brief Internal function building the file name for an array file. - * - * @warning The returned pointer has to be freed by the caller. - * - * @param array_name The name of the array. - * - * @returns A pointer to the array file name. - * @retval NULL if an error occurred. - * - * @since October 2015 - * @author Celine Mercier (celine.mercier@metabarcoding.org) - */ -static char* build_array_file_name(const char* array_name); - - -/** - * @brief Internal function building the file name for an array data file. - * - * @warning The returned pointer has to be freed by the caller. - * - * @param array_name The name of the array. - * - * @returns A pointer to the array data file name. - * @retval NULL if an error occurred. - * - * @since October 2015 - * @author Celine Mercier (celine.mercier@metabarcoding.org) - */ -static char* build_array_data_file_name(const char* array_name); - - -/** - * @brief Internal function returning the size of an array header on this platform. - * - * @returns The size of an array header in bytes. - * - * @since October 2015 - * @author Celine Mercier (celine.mercier@metabarcoding.org) - */ -size_t get_array_header_size(); - - -/** - * @brief Internal function returning the initial size of an array on this platform. - * - * @returns The initial size of an array in bytes. - * - * @since October 2015 - * @author Celine Mercier (celine.mercier@metabarcoding.org) - */ -size_t get_initial_array_size(); - - -/** - * @brief Internal function returning the size of a data array header on this platform. - * - * @returns The size of a data array header in bytes. - * - * @since October 2015 - * @author Celine Mercier (celine.mercier@metabarcoding.org) - */ -size_t get_array_data_header_size(); - - -/** - * @brief Internal function returning the initial size of a data array on this platform. - * - * @returns The initial size of a data array in bytes. - * - * @since October 2015 - * @author Celine Mercier (celine.mercier@metabarcoding.org) - */ -size_t get_initial_array_data_size(); - - -/** - * @brief Internal function closing a data array structure. - * - * @param array_data A pointer to the data array. - * - * @retval 0 if the operation was successfully completed. - * @retval -1 if an error occurred. - * - * @since October 2015 - * @author Celine Mercier (celine.mercier@metabarcoding.org) - */ -int close_array_data(OBIDMS_array_data_p array_data); - - -/** - * @brief Internal function enlarging an array. - * - * @param array A pointer to the array structure. - * - * @retval 0 if the operation was successfully completed. - * @retval -1 if an error occurred. - * - * @since October 2015 - * @author Celine Mercier (celine.mercier@metabarcoding.org) - */ -int grow_array(OBIDMS_array_p array); - - -/** - * @brief Internal function enlarging a data array. - * - * @param array A pointer to the array structure. - * - * @retval 0 if the operation was successfully completed. - * @retval -1 if an error occurred. - * - * @since October 2015 - * @author Celine Mercier (celine.mercier@metabarcoding.org) - */ -int grow_array_data(OBIDMS_array_p array); - - -/** - * @brief Internal function comparing two byte arrays. - * - * The encoding is compared first, then the length of the - * values, then the values themselves. - * - * @param value_1 A pointer to the first byte array. - * @param value_2 A pointer to the second byte array. - * - * @returns A value < 0 if value_1 < value_2, - * a value > 0 if value_1 > value_2, - * and 0 if value_1 == value_2. - * - * @since October 2015 - * @author Celine Mercier (celine.mercier@metabarcoding.org) - */ -int array_compare(byte_t* value_1, byte_t* value_2); - - -/** - * @brief Internal function calculating the size in bytes of a byte array. - * - * @param value A pointer to the byte array. - * - * @returns The size of the byte array in bytes. - * - * @since October 2015 - * @author Celine Mercier (celine.mercier@metabarcoding.org) - */ -size_t array_sizeof(byte_t* value); - - -/************************************************************************ - * - * D E F I N I T I O N O F T H E P R I V A T E F U N C T I O N S - * - ************************************************************************/ - -static char* build_array_file_name(const char* array_name) -{ - char* file_name; - - // Build the file name - if (asprintf(&file_name,"%s.oda", array_name) < 0) - { - obi_set_errno(OBI_ARRAY_ERROR); - obidebug(1, "\nError building an array file name"); - return NULL; - } - - // Test if the array name is not too long - if (strlen(file_name) >= ARRAY_MAX_NAME) - { - obi_set_errno(OBI_ARRAY_ERROR); - obidebug(1, "\nError due to array name too long"); - free(file_name); - return NULL; - } - - return file_name; -} - - -static char* build_array_data_file_name(const char* array_name) -{ - char* file_name; - - // Build the file name - if (asprintf(&file_name,"%s.odd", array_name) < 0) - { - obi_set_errno(OBI_ARRAY_ERROR); - obidebug(1, "\nError building an array data file name"); - return NULL; - } - - return file_name; -} - - - -size_t get_array_header_size() -{ - size_t header_size; - size_t rounded_header_size; - double multiple; - - header_size = sizeof(OBIDMS_array_header_t); - - multiple = ceil((double) header_size / (double) getpagesize()); - - rounded_header_size = multiple * getpagesize(); - - return rounded_header_size; -} - - -size_t get_initial_array_size() -{ - return getpagesize() * 1; -} - - -size_t get_array_data_header_size() -{ - size_t header_size; - size_t rounded_header_size; - double multiple; - - header_size = sizeof(OBIDMS_array_data_header_t); - - multiple = ceil((double) header_size / (double) getpagesize()); - - rounded_header_size = multiple * getpagesize(); - - return rounded_header_size; -} - - -size_t get_initial_array_data_size() -{ - return getpagesize() * 1; -} - - -int close_array_data(OBIDMS_array_data_p array_data) -{ - int ret_val = 0; - - if (munmap(array_data->data, (array_data->header)->data_size_max) < 0) - { - obi_set_errno(OBI_ARRAY_ERROR); - obidebug(1, "\nError munmapping the data of an array data file"); - ret_val = -1; - } - - if (munmap(array_data->header, (array_data->header)->header_size) < 0) - { - obi_set_errno(OBI_ARRAY_ERROR); - obidebug(1, "\nError munmapping the header of an array data file"); - ret_val = -1; - } - - free(array_data); - - return ret_val; -} - - -int grow_array(OBIDMS_array_p array) // TODO Lock when needed -{ - size_t file_size; - size_t old_data_size; - size_t new_data_size; - size_t header_size; - int array_file_descriptor; - char* array_file_name; - - // Get the array file name - array_file_name = build_array_file_name((array->header)->array_name); - if (array_file_name == NULL) - return -1; - - // Open the array file - array_file_descriptor = openat(array->dir_fd, array_file_name, O_RDWR); - if (array_file_descriptor < 0) - { - obi_set_errno(OBI_ARRAY_ERROR); - obidebug(1, "\nError opening an array file"); - free(array_file_name); - return -1; - } - free(array_file_name); - - // Calculate the new file size - old_data_size = (array->header)->array_size; - new_data_size = old_data_size * ARRAY_GROWTH_FACTOR; - header_size = (array->header)->header_size; - file_size = header_size + new_data_size; - - // Enlarge the file - if (ftruncate(array_file_descriptor, file_size) < 0) - { - obi_set_errno(OBI_ARRAY_ERROR); - obidebug(1, "\nError enlarging an array file"); - close(array_file_descriptor); - return -1; - } - - // Unmap and re-map the data - - if (munmap(array->first, old_data_size) < 0) - { - obi_set_errno(OBI_ARRAY_ERROR); - obidebug(1, "\nError munmapping the array of an array file before enlarging"); - close(array_file_descriptor); - return -1; - } - - array->first = mmap(NULL, - new_data_size, - PROT_READ | PROT_WRITE, - MAP_SHARED, - array_file_descriptor, - header_size - ); - - if (array->first == MAP_FAILED) - { - obi_set_errno(OBI_ARRAY_ERROR); - obidebug(1, "\nError re-mmapping the array of an array file after enlarging the file"); - close(array_file_descriptor); - return -1; - } - - // Set new maximum number of items - (array->header)->nb_items_max = ceil(((double) new_data_size) / ((double) sizeof(index_t))); - - // Set the new array size - (array->header)->array_size = new_data_size; - - // Initialize new data to 0 - memset(((uint8_t*)(array->first))+old_data_size, 0, (new_data_size-old_data_size)); - - close(array_file_descriptor); - - return 0; -} - - -int grow_array_data(OBIDMS_array_p array) // TODO Lock when needed -{ - size_t file_size; - index_t old_data_size; - index_t new_data_size; - size_t header_size; - int array_data_file_descriptor; - char* array_data_file_name; - - // Get the array data file name - array_data_file_name = build_array_data_file_name((array->header)->array_name); - if (array_data_file_name == NULL) - return -1; - - // Open the array data file - array_data_file_descriptor = openat(array->dir_fd, array_data_file_name, O_RDWR); - if (array_data_file_descriptor < 0) - { - obi_set_errno(OBI_ARRAY_ERROR); - obidebug(1, "\nError opening an array data file"); - free(array_data_file_name); - return -1; - } - free(array_data_file_name); - - // Calculate the new file size - old_data_size = ((array->data)->header)->data_size_max; - new_data_size = old_data_size * ARRAY_GROWTH_FACTOR; - header_size = ((array->data)->header)->header_size; - file_size = header_size + new_data_size; - - // Enlarge the file - if (ftruncate(array_data_file_descriptor, file_size) < 0) - { - obi_set_errno(OBI_ARRAY_ERROR); - obidebug(1, "\nError enlarging an array data file"); - close(array_data_file_descriptor); - return -1; - } - - // Unmap and re-map the data - - if (munmap((array->data)->data, old_data_size) < 0) - { - obi_set_errno(OBI_ARRAY_ERROR); - obidebug(1, "\nError munmapping the data of an array data file before enlarging"); - close(array_data_file_descriptor); - return -1; - } - - (array->data)->data = mmap(NULL, - new_data_size, - PROT_READ | PROT_WRITE, - MAP_SHARED, - array_data_file_descriptor, - header_size - ); - - if ((array->data)->data == MAP_FAILED) - { - obi_set_errno(OBI_ARRAY_ERROR); - obidebug(1, "\nError re-mmapping the data of an array data file after enlarging the file"); - close(array_data_file_descriptor); - return -1; - } - - // Set new data size - ((array->data)->header)->data_size_max = new_data_size; - - // Initialize new data to 0 - memset(((array->data)->data)+old_data_size, 0, new_data_size - old_data_size); - - close(array_data_file_descriptor); - - return 0; -} - - -int array_compare(byte_t* value_1, byte_t* value_2) -{ - int comp; - uint8_t size_1; - uint8_t size_2; - int32_t len_1; - int32_t len_2; - int32_t ini_len_1; - int32_t ini_len_2; - int32_t b; - - //obidebug(1, "\nCOMPARING 1=%d,%.*s; 2=%d,%.*s", *((int32_t*)(value_1+1)), *((int32_t*)(value_1+1)), value_1+BYTE_ARRAY_HEADER_SIZE, *((int32_t*)(value_2+1)), *((int32_t*)(value_2+1)), value_2+BYTE_ARRAY_HEADER_SIZE); - - size_1 = (uint8_t) *(value_1); - size_2 = (uint8_t) *(value_2); - - if (size_1 != size_2) - return (size_1 - size_2); - - len_1 = *((int32_t*)(value_1+1)); - len_2 = *((int32_t*)(value_2+1)); - - if (len_1 != len_2) - return (len_1 - len_2); - - if (size_1 != 8) - { - ini_len_1 = *((int32_t*)(value_1+5)); - ini_len_2 = *((int32_t*)(value_2+5)); - - if (ini_len_1 != ini_len_2) - return (ini_len_1 - ini_len_2); - } - - b = BYTE_ARRAY_HEADER_SIZE; - comp = 0; - while (!comp && (b < len_1+BYTE_ARRAY_HEADER_SIZE)) - { - comp = *(value_1+b) - *(value_2+b); - b++; - } - return comp; -} - - -size_t array_sizeof(byte_t* value) -{ - return (BYTE_ARRAY_HEADER_SIZE + *((int32_t*)(value+1))); -} - - -/********************************************************************** - * - * D E F I N I T I O N O F T H E P U B L I C F U N C T I O N S - * - **********************************************************************/ - -int obi_array_exists(OBIDMS_p dms, const char* array_name) -{ - struct stat buffer; - char* array_file_path; - char* array_file_name; - int check_dir; - - // Build file name - array_file_name = build_array_file_name(array_name); - if (array_file_name == NULL) - return -1; - - // Build the array file path - array_file_path = get_full_path(dms->array_dir_fd, array_file_name); - if (array_file_path == NULL) - { - obidebug(1, "\nError getting the file path for an array file"); - return -1; - } - - check_dir = stat(array_file_path, &buffer); - - free(array_file_path); - free(array_file_name); - - if (check_dir == 0) - return 1; - else - return 0; -} - - -OBIDMS_array_p obi_array(OBIDMS_p dms, const char* array_name) -{ - int exists; - - exists = obi_array_exists(dms, array_name); - - switch (exists) - { - case 0: - return obi_create_array(dms, array_name); - case 1: - return obi_open_array(dms, array_name); - }; - - obidebug(1, "\nError checking if an array already exists"); - return NULL; -} - - -OBIDMS_array_p obi_create_array(OBIDMS_p dms, const char* array_name) -{ - char* array_file_name; - char* array_data_file_name; - size_t header_size; - size_t data_size; - size_t file_size; - int array_file_descriptor; - int array_data_file_descriptor; - int array_dir_file_descriptor; - OBIDMS_array_data_p array_data; - OBIDMS_array_p array; - - // Create the data file - - // Build file name - array_data_file_name = build_array_data_file_name(array_name); - if (array_data_file_name == NULL) - return NULL; - - // Get the file descriptor of the array directory - array_dir_file_descriptor = dms->array_dir_fd; - - // Create file - array_data_file_descriptor = openat(array_dir_file_descriptor, array_data_file_name, O_RDWR | O_CREAT | O_EXCL, 0777); - if (array_data_file_descriptor < 0) - { - obi_set_errno(OBI_ARRAY_ERROR); - obidebug(1, "\nError creating an array data file"); - free(array_data_file_name); - return NULL; - } - free(array_data_file_name); - - // Calculate the size needed - header_size = get_array_data_header_size(); - data_size = get_initial_array_data_size(); - file_size = header_size + data_size; - - // Truncate the array data file to the right size - if (ftruncate(array_data_file_descriptor, file_size) < 0) - { - obi_set_errno(OBI_ARRAY_ERROR); - obidebug(1, "\nError truncating an array data file to the right size"); - close(array_data_file_descriptor); - return NULL; - } - - // Allocate the memory for the array data structure - array_data = (OBIDMS_array_data_p) malloc(sizeof(OBIDMS_array_data_t)); - if (array_data == NULL) - { - obi_set_errno(OBI_ARRAY_ERROR); - obidebug(1, "\nError allocating the memory for the array data structure"); - close(array_data_file_descriptor); - return NULL; - } - - // Fill the array data structure - array_data->header = mmap(NULL, - header_size, - PROT_READ | PROT_WRITE, - MAP_SHARED, - array_data_file_descriptor, - 0 - ); - if (array_data->header == MAP_FAILED) - { - obi_set_errno(OBI_ARRAY_ERROR); - obidebug(1, "\nError mmapping the header of an array data file"); - close(array_data_file_descriptor); - free(array_data); - return NULL; - } - - array_data->data = mmap(NULL, - data_size, - PROT_READ | PROT_WRITE, - MAP_SHARED, - array_data_file_descriptor, - header_size - ); - if (array_data->data == MAP_FAILED) - { - obi_set_errno(OBI_ARRAY_ERROR); - obidebug(1, "\nError mmapping the data of an array data file"); - munmap(array_data->header, header_size); - close(array_data_file_descriptor); - free(array_data); - return NULL; - } - - (array_data->header)->header_size = header_size; - (array_data->header)->data_size_max = data_size; - (array_data->header)->data_size_used = 0; - (array_data->header)->nb_items = 0; - (array_data->header)->creation_date = time(NULL); - strcpy((array_data->header)->array_name, array_name); - - // Initialize all bits to 0 - memset(array_data->data, 0, (array_data->header)->data_size_max); - - close(array_data_file_descriptor); - - - // Create the array file - - // Build file name - array_file_name = build_array_file_name(array_name); - if (array_file_name == NULL) - { - close_array_data(array_data); - return NULL; - } - - // Calculate the size needed - header_size = get_array_header_size(); - data_size = get_initial_array_size(); - file_size = header_size + data_size; - - // Create file - array_file_descriptor = openat(array_dir_file_descriptor, array_file_name, O_RDWR | O_CREAT | O_EXCL, 0777); - if (array_file_descriptor < 0) - { - obi_set_errno(OBI_ARRAY_ERROR); - obidebug(1, "\nError creating an array file"); - close_array_data(array_data); - free(array_file_name); - return NULL; - } - free(array_file_name); - - // Truncate the array file to the right size - if (ftruncate(array_file_descriptor, file_size) < 0) - { - obi_set_errno(OBI_ARRAY_ERROR); - obidebug(1, "\nError truncating an array file to the right size"); - close_array_data(array_data); - close(array_file_descriptor); - return NULL; - } - - // Allocate the memory for the array structure - array = (OBIDMS_array_p) malloc(sizeof(OBIDMS_array_t)); - if (array == NULL) - { - obi_set_errno(OBI_ARRAY_ERROR); - obidebug(1, "\nError allocating the memory for the array structure"); - close_array_data(array_data); - close(array_file_descriptor); - return NULL; - } - - // Fill the array structure - array->header = mmap(NULL, - header_size, - PROT_READ | PROT_WRITE, - MAP_SHARED, - array_file_descriptor, - 0 - ); - if (array->header == MAP_FAILED) - { - obi_set_errno(OBI_ARRAY_ERROR); - obidebug(1, "\nError mmapping the header of an array file"); - close_array_data(array_data); - close(array_file_descriptor); - free(array); - return NULL; - } - - array->first = mmap(NULL, - data_size, - PROT_READ | PROT_WRITE, - MAP_SHARED, - array_file_descriptor, - header_size - ); - if (array->first == MAP_FAILED) - { - obi_set_errno(OBI_ARRAY_ERROR); - obidebug(1, "\nError mmapping the data of an array file"); - close_array_data(array_data); - munmap(array->header, header_size); - close(array_file_descriptor); - free(array); - return NULL; - } - - array->dms = dms; - array->data = array_data; - array->directory = dms->array_directory; - array->dir_fd = array_dir_file_descriptor; - - (array->header)->header_size = header_size; - (array->header)->array_size = data_size; - (array->header)->nb_items = 0; - (array->header)->nb_items_max = (index_t) ceil(((double) get_initial_array_size()) / ((double) sizeof(index_t))); - (array->header)->creation_date = time(NULL); - strcpy((array->header)->array_name, array_name); - - // Initialize to 0 (TODO: unnecessary?) - memset(array->first, 0, data_size); - - close(array_file_descriptor); - - // Add in the list of opened arrays - *(((dms->opened_arrays)->arrays)+((dms->opened_arrays)->nb_opened_arrays)) = array; - ((dms->opened_arrays)->nb_opened_arrays)++; - array->counter = 1; - - return array; -} - - -OBIDMS_array_p obi_open_array(OBIDMS_p dms, const char* array_name) -{ - char* array_file_name; - char* array_data_file_name; - size_t header_size; - int array_file_descriptor; - int array_data_file_descriptor; - int array_dir_file_descriptor; - OBIDMS_array_data_p array_data; - OBIDMS_array_p array; - size_t i; - - // Check if the array is already in the list of opened arrays - for (i=0; i < ((dms->opened_arrays)->nb_opened_arrays); i++) - { - if (!strcmp(((*(((dms->opened_arrays)->arrays)+i))->header)->array_name, array_name)) - { // Found the array already opened - ((*(((dms->opened_arrays)->arrays)+i))->counter)++; - return *(((dms->opened_arrays)->arrays)+i); - } - } - - // Open the data file - - // Get the file descriptor of the array directory - array_dir_file_descriptor = dms->array_dir_fd; - - // Build file name - array_data_file_name = build_array_data_file_name(array_name); - if (array_data_file_name == NULL) - return NULL; - - // Open file - array_data_file_descriptor = openat(array_dir_file_descriptor, array_data_file_name, O_RDWR, 0777); - if (array_data_file_descriptor < 0) - { - obi_set_errno(OBI_ARRAY_ERROR); - obidebug(1, "\nError opening an array data file"); - free(array_data_file_name); - return NULL; - } - free(array_data_file_name); - - // Allocate the memory for the array data structure - array_data = (OBIDMS_array_data_p) malloc(sizeof(OBIDMS_array_data_t)); - if (array_data == NULL) - { - obi_set_errno(OBI_ARRAY_ERROR); - obidebug(1, "\nError allocating the memory for the array data structure"); - close(array_data_file_descriptor); - return NULL; - } - - // Read the header size - if (read(array_data_file_descriptor, &header_size, sizeof(size_t)) < ((ssize_t) sizeof(size_t))) - { - obi_set_errno(OBI_ARRAY_ERROR); - obidebug(1, "\nError reading the header size to open a data array"); - close(array_data_file_descriptor); - return NULL; - } - - // Fill the array data structure - array_data->header = mmap(NULL, - header_size, - PROT_READ | PROT_WRITE, - MAP_SHARED, - array_data_file_descriptor, - 0 - ); - if (array_data->header == MAP_FAILED) - { - obi_set_errno(OBI_ARRAY_ERROR); - obidebug(1, "\nError mmapping the header of an array data file"); - close(array_data_file_descriptor); - free(array_data); - return NULL; - } - - array_data->data = mmap(NULL, - (array_data->header)->data_size_max, - PROT_READ | PROT_WRITE, - MAP_SHARED, - array_data_file_descriptor, - header_size - ); - if (array_data->data == MAP_FAILED) - { - obi_set_errno(OBI_ARRAY_ERROR); - obidebug(1, "\nError mmapping the data of an array data file"); - munmap(array_data->header, header_size); - close(array_data_file_descriptor); - free(array_data); - return NULL; - } - - close(array_data_file_descriptor); - - - // Open the array file - - // Build file name - array_file_name = build_array_file_name(array_name); - if (array_file_name == NULL) - { - close_array_data(array_data); - return NULL; - } - - // Open file - array_file_descriptor = openat(array_dir_file_descriptor, array_file_name, O_RDWR, 0777); - if (array_file_descriptor < 0) - { - obi_set_errno(OBI_ARRAY_ERROR); - obidebug(1, "\nError opening an array file"); - close_array_data(array_data); - free(array_file_name); - return NULL; - } - free(array_file_name); - - // Allocate the memory for the array structure - array = (OBIDMS_array_p) malloc(sizeof(OBIDMS_array_t)); - if (array == NULL) - { - obi_set_errno(OBI_ARRAY_ERROR); - obidebug(1, "\nError allocating the memory for the array structure"); - close_array_data(array_data); - close(array_file_descriptor); - return NULL; - } - - // Read the header size - if (read(array_file_descriptor, &header_size, sizeof(size_t)) < ((ssize_t) sizeof(size_t))) - { - obi_set_errno(OBI_ARRAY_ERROR); - obidebug(1, "\nError reading the header size to open an array"); - close(array_file_descriptor); - return NULL; - } - - // Fill the array structure - array->header = mmap(NULL, - header_size, - PROT_READ | PROT_WRITE, - MAP_SHARED, - array_file_descriptor, - 0 - ); - if (array->header == MAP_FAILED) - { - obi_set_errno(OBI_ARRAY_ERROR); - obidebug(1, "\nError mmapping the header of an array file"); - close_array_data(array_data); - close(array_file_descriptor); - free(array); - return NULL; - } - - array->first = mmap(NULL, - (((array->header)->nb_items_max) * sizeof(index_t)), - PROT_READ | PROT_WRITE, - MAP_SHARED, - array_file_descriptor, - header_size - ); - if (array->first == MAP_FAILED) - { - obi_set_errno(OBI_ARRAY_ERROR); - obidebug(1, "\nError mmapping the data of an array file"); - close_array_data(array_data); - munmap(array->header, header_size); - close(array_file_descriptor); - free(array); - return NULL; - } - - array->dms = dms; - array->data = array_data; - array->directory = dms->array_directory; - array->dir_fd = array_dir_file_descriptor; - - close(array_file_descriptor); - - // Add in the list of opened arrays - *(((dms->opened_arrays)->arrays)+((dms->opened_arrays)->nb_opened_arrays)) = array; - ((dms->opened_arrays)->nb_opened_arrays)++; - array->counter = 1; - - return array; -} - - -int obi_close_array(OBIDMS_array_p array) -{ - int ret_val = 0; - size_t i; - Opened_arrays_list_p arrays_list; - OBIDMS_p dms; - - dms = array->dms; - - arrays_list = dms->opened_arrays; - - (array->counter)--; - - if (array->counter == 0) - { - // Delete from the list of opened arrays - for (i=0; i < (arrays_list->nb_opened_arrays); i++) - { - if (!strcmp(((*((arrays_list->arrays)+i))->header)->array_name, (array->header)->array_name)) - { // Found the array. Rearrange list - (arrays_list->nb_opened_arrays)--; - (arrays_list->arrays)[i] = (arrays_list->arrays)[arrays_list->nb_opened_arrays]; - } - } - - ret_val = close_array_data(array->data); - - if (munmap(array->first, (((array->header)->nb_items_max) * sizeof(index_t))) < 0) - { - obi_set_errno(OBI_ARRAY_ERROR); - obidebug(1, "\nError munmapping the array of an array file"); - ret_val = -1; - } - - if (munmap(array->header, (array->header)->header_size) < 0) - { - obi_set_errno(OBI_ARRAY_ERROR); - obidebug(1, "\nError munmapping the header of an array file"); - ret_val = -1; - } - - free(array); - } - - return ret_val; -} - - -index_t obi_array_add(OBIDMS_array_p array, byte_t* value) -{ - index_t idx; - index_t nb_items; - index_t nb_items_max; - index_t data_size_used; - index_t* data_size_max_p; - size_t value_size; - - nb_items = (array->header)->nb_items; - nb_items_max = (array->header)->nb_items_max; - data_size_used = ((array->data)->header)->data_size_used; - data_size_max_p = &(((array->data)->header)->data_size_max); - - // Check if the value is already in the array - if (nb_items > 0) - { - idx = obi_array_search(array, value); - if (idx >= 0) // The value is already in the array - return idx; - else - idx = -(idx+1); - } - else // First item - idx = 0; - - // Grow the array if needed - if (nb_items == nb_items_max) - { - if (grow_array(array) < 0) - return -1; - } - - // Grow the data if needed - value_size = array_sizeof(value); - while (*data_size_max_p < (data_size_used + (int64_t) value_size)) - { - if (grow_array_data(array) < 0) - return -1; - } - - // Store the index of the value in the array - - if (nb_items > 0) - memmove(((array->first)+idx+1), ((array->first)+idx), ((nb_items - idx) * sizeof(index_t))); - (array->first)[idx] = data_size_used; - - // Store the value itself at the end of the data - memcpy((((array->data)->data)+data_size_used), value, value_size); - - // Update the data size - ((array->data)->header)->data_size_used = data_size_used + value_size; - - // Update the number of items - ((array->header)->nb_items)++; - (((array->data)->header)->nb_items)++; - - // PRINT ARRAY -// fprintf(stderr, "\nARRAY:"); -// int i; -// for (i=0; i<=nb_items; i++) -// fprintf(stderr, "\narray[%d] = %d -> %s", i, a[i], data+a[i]+BYTE_ARRAY_HEADER_SIZE); - - // Return the data index of the value - return data_size_used; -} - - -byte_t* obi_array_get(OBIDMS_array_p array, index_t idx) -{ - return (((array->data)->data)+idx); -} - - -index_t obi_array_search(OBIDMS_array_p array, byte_t* value) -{ - index_t idx_min; - index_t idx_max; - index_t idx_mid; - index_t idx; - index_t* a; - byte_t* data; - byte_t* to_compare; - index_t nb_items; - int comp; - - nb_items = (array->header)->nb_items; - data = (array->data)->data; - a = array->first; - - if (nb_items == 0) - return -1; - - idx_max = nb_items-1; - idx_min = 0; - - while (idx_min <= idx_max) - { - idx_mid = (idx_min+idx_max)/2; - to_compare = obi_array_get(array, a[idx_mid]); - comp = array_compare(to_compare, value); - if (!comp) - { - // Found - return a[idx_mid]; - } - else if (comp < 0) - { - // Search in upper half - idx = idx_mid + 1; - idx_min = idx_mid + 1; - } - else - { - // Search in lower half - idx = idx_mid; - idx_max = idx_mid - 1; - } - } - - // Not found: return where the value should be in the array - return -(idx+1); -} - - -byte_t* obi_str_to_obibytes(char* value) -{ - byte_t* value_b; - int32_t length; - - // Compute the number of bytes on which the value will be encoded - length = strlen(value) + 1; // +1 to store \0 at the end (makes retrieving faster) - - // Allocate the memory for the encoded value - value_b = (byte_t*) malloc(BYTE_ARRAY_HEADER_SIZE + length); - if (value_b == NULL) - { - obi_set_errno(OBI_ARRAY_ERROR); - obidebug(1, "\nError allocating memory for a byte array"); - return NULL; - } - - // Store the number of bits on which each element is encoded - *(value_b) = 8; - - // Store the length (in bytes) of the encoded value (same as decoded for character strings) - *((int32_t*)(value_b+1)) = length; - - // Store the initial length (in bytes) of the decoded value (same as encoded for character strings) - *((int32_t*)(value_b+5)) = length; - - // Store the character string - strcpy(value_b+BYTE_ARRAY_HEADER_SIZE, value); - - return value_b; -} - - -const char* obi_obibytes_to_str(byte_t* value_b) -{ - const char* value; - - value = value_b+BYTE_ARRAY_HEADER_SIZE; - - return value; -} - - -byte_t* obi_seq_to_obibytes(char* seq) -{ - byte_t* value_b; - int32_t length; // length of the value (without the header) in bytes - uint8_t size; // size of one element in bits - int32_t seq_length; - byte_t* encoded_seq; - - // Check if just ATGC and set size of a nucleotide accordingly (2 bits or 4 bits) - if (only_ATGC(seq)) - size = 2; - else - size = 4; - - // Compute the length (in bytes) of the encoded sequence - seq_length = strlen(seq); - if (size == 2) - length = ceil((double) seq_length / (double) 4.0); - else // size == 4 - length = ceil((double) seq_length / (double) 2.0); - - // Encode - if (size == 2) - encoded_seq = encode_seq_on_2_bits(seq, seq_length); - else // size == 4 - encoded_seq = encode_seq_on_4_bits(seq, seq_length); - if (encoded_seq == NULL) - { - obi_set_errno(OBI_ARRAY_ERROR); - obidebug(1, "\nError encoding a DNA sequence"); - return NULL; - } - - // Allocate the memory for the encoded value - value_b = (byte_t*) malloc(BYTE_ARRAY_HEADER_SIZE + length); - if (value_b == NULL) - { - obi_set_errno(OBI_ARRAY_ERROR); - obidebug(1, "\nError allocating memory for a byte array"); - return NULL; - } - - // Store the number of bits on which each nucleotide is encoded - *(value_b) = size; - - // Store the length (in bytes) of the encoded sequence - *((int32_t*)(value_b+1)) = length; - - // Store the length (in bytes) of the initial sequence (necessary for decoding) - *((int32_t*)(value_b+5)) = seq_length; - - // Store the encoded sequence - memcpy(value_b+BYTE_ARRAY_HEADER_SIZE, encoded_seq, length); - - free(encoded_seq); - - return value_b; -} - - -const char* obi_obibytes_to_seq(byte_t* value_b) -{ - const char* value; - uint8_t size; // size of one element in bits - - // Check the encoding (each nucleotide on 2 bits or 4 bits) - size = *(value_b); - - // Decode - if (size == 2) - value = decode_seq_on_2_bits(value_b+BYTE_ARRAY_HEADER_SIZE, *((int32_t*)(value_b+5))); - else - value = decode_seq_on_4_bits(value_b+BYTE_ARRAY_HEADER_SIZE, *((int32_t*)(value_b+5))); - - if (value == NULL) - { - obi_set_errno(OBI_ARRAY_ERROR); - obidebug(1, "\nError decoding a DNA sequence"); - return NULL; - } - - return value; -} - diff --git a/src/obiarray.h b/src/obiarray.h deleted file mode 100644 index c8bfe69..0000000 --- a/src/obiarray.h +++ /dev/null @@ -1,322 +0,0 @@ -/**************************************************************************** - * OBIDMS array header file * - ****************************************************************************/ - -/** - * @file obiarray.h - * @author Celine Mercier - * @date October 19th 2015 - * @brief Header file for handling arrays for storing and retrieving byte arrays (i.e. coding for character strings). - */ - - -#ifndef OBIARRAY_H_ -#define OBIARRAY_H_ - - -#include -#include -#include -#include -#include -#include - -#include "obidms.h" -#include "obitypes.h" - - -#define ARRAY_MAX_NAME (1024) /**< The maximum length of an array name. - */ -#define ARRAY_GROWTH_FACTOR (2) /**< The growth factor when an array is enlarged. - */ -#define BYTE_ARRAY_HEADER_SIZE (9) /**< The size of the header of a byte array. - */ - - -typedef char byte_t; /**< Defining byte type since data is stored in bits - * and char (stored on one byte) is the smallest addressable unit. - */ - - -/** - * @brief OBIDMS array data header structure. - */ -typedef struct OBIDMS_array_data_header { - int header_size; /**< Size of the header in bytes. - */ - index_t data_size_used; /**< Size of the data used in bytes. - */ - index_t data_size_max; /**< Max size of the data in bytes. - */ - index_t nb_items; /**< Number of items. - */ - char array_name[ARRAY_MAX_NAME+1]; /**< The array name as a NULL terminated string. - */ - time_t creation_date; /**< Date of creation of the file. - */ -} OBIDMS_array_data_header_t, *OBIDMS_array_data_header_p; - - -/** - * @brief OBIDMS array data structure. - */ -typedef struct OBIDMS_array_data { - OBIDMS_array_data_header_p header; /**< A pointer to the header of the array data. - */ - byte_t* data; /**< A pointer to the beginning of the data. - */ -} OBIDMS_array_data_t, *OBIDMS_array_data_p; - - -/** - * @brief OBIDMS array header structure. - */ -typedef struct OBIDMS_array_header { - int header_size; /**< Size of the header in bytes. - */ - size_t array_size; /**< Size of the array in bytes. - */ - index_t nb_items; /**< Number of items in the array. - */ - index_t nb_items_max; /**< Maximum number of items in the array before it has to be enlarged. - */ - char array_name[ARRAY_MAX_NAME+1]; /**< The array name as a NULL terminated string. - */ - time_t creation_date; /**< Date of creation of the file. - */ -} OBIDMS_array_header_t, *OBIDMS_array_header_p; - - -/** - * @brief OBIDMS array structure. - */ -typedef struct OBIDMS_array { - OBIDMS_p dms; /**< A pointer to the OBIDMS structure to which the array belongs. - */ - OBIDMS_array_header_p header; /**< A pointer to the header of the array. - */ - index_t* first; /**< A pointer to the beginning of the array itself. - */ - OBIDMS_array_data_p data; /**< A pointer to the structure containing the data - * that the array references. - */ - DIR* directory; /**< A directory entry usable to - * refer and scan the array directory. - */ - int dir_fd; /**< The file descriptor of the directory entry - * usable to refer and scan the array directory. - */ - size_t counter; /**< Indicates by how many threads/programs (TODO) the array is used. - */ -} OBIDMS_array_t, *OBIDMS_array_p; - - -/** - * @brief Checks if an obiarray already exists or not. - * - * @param dms The OBIDMS to which the obiarray belongs. - * @param array_name The name of the obiarray. - * - * @returns A value indicating whether the obiarray exists or not. - * @retval 1 if the obiarray exists. - * @retval 0 if the obiarray does not exist. - * @retval -1 if an error occurred. - * - * @since October 2015 - * @author Celine Mercier (celine.mercier@metabarcoding.org) - */ -int obi_array_exists(OBIDMS_p dms, const char* array_name); - - -/** - * @brief Opens an obiarray and creates it if it does not already exist. - * - * Note: An obiarray is made of two files (referred to by two structures). - * One file contains the indices referring to the data, and the other - * file contains the data itself. The obiarray as a whole is referred - * to via the OBIDMS_array structure. - * - * @param dms The OBIDMS to which the obiarray belongs. - * @param array_name The name of the obiarray. - * - * @returns A pointer to the obiarray structure. - * @retval NULL if an error occurred. - * - * @since October 2015 - * @author Celine Mercier (celine.mercier@metabarcoding.org) - */ -OBIDMS_array_p obi_array(OBIDMS_p dms, const char* array_name); - - -/** - * @brief Creates an obiarray. Fails if it already exists. - * - * Note: An obiarray is made of two files (referred to by two structures). - * One file contains the indices referring to the data, and the other - * file contains the data itself. The obiarray as a whole is referred - * to via the OBIDMS_array structure. - * - * @param dms The OBIDMS to which the obiarray belongs. - * @param array_name The name of the obiarray. - * - * @returns A pointer to the newly created obiarray structure. - * @retval NULL if an error occurred. - * - * @since October 2015 - * @author Celine Mercier (celine.mercier@metabarcoding.org) - */ -OBIDMS_array_p obi_create_array(OBIDMS_p dms, const char* array_name); - - -/** - * @brief Opens an obiarray. Fails if it does not already exist. - * - * Note: An obiarray is made of two files (referred to by two structures). - * One file contains the indices referring to the data, and the other - * file contains the data itself. The obiarray as a whole is referred - * to via the OBIDMS_array structure. - * - * @param dms The OBIDMS to which the obiarray belongs. - * @param array_name The name of the obiarray. - * - * @returns A pointer to the obiarray structure. - * @retval NULL if an error occurred. - * - * @since October 2015 - * @author Celine Mercier (celine.mercier@metabarcoding.org) - */ -OBIDMS_array_p obi_open_array(OBIDMS_p dms, const char* array_name); - - -/** - * @brief Closes an obiarray. - * - * Note: An obiarray is made of two files (referred to by two structures). - * One file contains the indices referring to the data, and the other - * file contains the data itself. The obiarray as a whole is referred - * to via the OBIDMS_array structure. - * - * @param array A pointer to the obiarray structure to close and free. - * - * @retval 0 if the operation was successfully completed. - * @retval -1 if an error occurred. - * - * @since October 2015 - * @author Celine Mercier (celine.mercier@metabarcoding.org) - */ -int obi_close_array(OBIDMS_array_p array); - - -/** - * @brief Adds a value (byte array) in an obiarray, checking first if it is already in it. - * - * @warning The byte array to add must already be encoded and contain its header. - * - * @param array A pointer to the obiarray. - * @param value The byte array to add in the obiarray. - * - * @returns The index of the value, whether it was added or already in the obiarray. - * @retval -1 if an error occurred. - * - * @since October 2015 - * @author Celine Mercier (celine.mercier@metabarcoding.org) - */ -index_t obi_array_add(OBIDMS_array_p array, byte_t* value); - - -/** - * @brief Recovers a value (byte array) in an obiarray. - * - * @warning The byte array recovered is encoded and contains its header. - * - * @param array A pointer to the obiarray. - * @param index The index of the value in the data array. - * - * @returns A pointer to the byte array recovered. - * - * @since October 2015 - * @author Celine Mercier (celine.mercier@metabarcoding.org) - */ -byte_t* obi_array_get(OBIDMS_array_p array, index_t index); - - -/** - * @brief Searches a value (byte array) in an obiarray performing a binary search. - * - * @warning The byte array to search must already be encoded and contain its header. - * - * @param array A pointer to the obiarray. - * @param value The byte array to add in the obiarray. - * - * @returns If the value is found, its data index is returned. - * If the value is not found, the array index indicating where the value's data index - * should be in the array is returned in the form (- (index + 1)), as data indices in an - * obiarray are sorted according to the ascending order of the values (byte arrays) themselves. - * - * @since October 2015 - * @author Celine Mercier (celine.mercier@metabarcoding.org) - */ -index_t obi_array_search(OBIDMS_array_p array, byte_t* value); - - -/** - * @brief Converts a character string to a byte array with a header. - * - * @warning The byte array must be freed by the caller. - * - * @param value The character string to convert. - * - * @returns A pointer to the byte array created. - * @retval NULL if an error occurred. - * - * @since October 2015 - * @author Celine Mercier (celine.mercier@metabarcoding.org) - */ -byte_t* obi_str_to_obibytes(char* value); - - -/** - * @brief Converts a byte array to a character string. - * - * @param value_b The byte array to convert. - * - * @returns A pointer to the character string contained in the byte array. - * - * @since October 2015 - * @author Celine Mercier (celine.mercier@metabarcoding.org) - */ -const char* obi_obibytes_to_str(byte_t* value_b); - - -/** - * @brief Converts a DNA sequence to a byte array with a header. - * - * @warning The byte array must be freed by the caller. - * - * @param value The DNA sequence to convert. - * - * @returns A pointer to the byte array created. - * @retval NULL if an error occurred. - * - * @since November 2015 - * @author Celine Mercier (celine.mercier@metabarcoding.org) - */ -byte_t* obi_seq_to_obibytes(char* seq); - - -/** - * @brief Converts a byte array to a DNA sequence. - * - * @param value_b The byte array to convert. - * - * @returns A pointer to the DNA sequence contained in the byte array. - * @retval NULL if an error occurred. - * - * @since November 2015 - * @author Celine Mercier (celine.mercier@metabarcoding.org) - */ -const char* obi_obibytes_to_seq(byte_t* value_b); - - -#endif /* OBIARRAY_H_ */ - diff --git a/src/obiavl.c b/src/obiavl.c new file mode 100644 index 0000000..14f8899 --- /dev/null +++ b/src/obiavl.c @@ -0,0 +1,1717 @@ +/**************************************************************************** + * OBIDMS AVL tree functions * + ****************************************************************************/ + +/** + * @file obiavl.c + * @author Celine Mercier + * @date December 3rd 2015 + * @brief Functions handling AVL trees for storing and retrieving bit arrays. + */ + + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "obiavl.h" +#include "obierrno.h" +#include "obitypes.h" +#include "obidebug.h" +#include "private_at_functions.h" +#include "encode.h" + + +#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?) + + +/************************************************************************** + * + * D E C L A R A T I O N O F T H E P R I V A T E F U N C T I O N S + * + **************************************************************************/ + +/** + * @brief Internal function building the file name for an AVL tree file. + * + * @warning The returned pointer has to be freed by the caller. + * + * @param avl_name The name of the AVL tree. + * + * @returns A pointer to the name of the file where the AVL tree is stored. + * @retval NULL if an error occurred. + * + * @since December 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +static char* build_avl_file_name(const char* avl_name); + + +/** + * @brief Internal function building the file name for an AVL tree file. + * + * @warning The returned pointer has to be freed by the caller. + * + * @param avl_name The name of the AVL tree. + * + * @returns A pointer to the name of the file where the data referred to by the AVL tree is stored. + * @retval NULL if an error occurred. + * + * @since December 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +static char* build_avl_data_file_name(const char* avl_name); + + +/** + * @brief Internal function returning the size of an AVL tree header on this platform. + * + * @returns The size of an AVL tree header in bytes. + * + * @since December 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +size_t get_avl_header_size(); + + +/** + * @brief Internal function returning the initial size of an AVL tree on this platform. + * + * @returns The initial size of an AVL tree in bytes. + * + * @since December 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +size_t get_initial_avl_size(); + + +/** + * @brief Internal function returning the size, on this platform, of the header of the data + * referred to by an AVL tree. + * + * @returns The size of an AVL data header in bytes. + * + * @since December 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +size_t get_avl_data_header_size(); + + +/** + * @brief Internal function returning the initial size, on this platform, of the data + * referred to by an AVL tree. + * + * @returns The initial size of an AVL data array in bytes. + * + * @since December 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +size_t get_initial_avl_data_size(); + + +/** + * @brief Internal function closing an AVL data structure where the data referred to by an AVL tree is stored. + * + * @param avl_data A pointer to the data structure referred to by an AVL tree. + * + * @retval 0 if the operation was successfully completed. + * @retval -1 if an error occurred. + * + * @since December 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +int close_avl_data(OBIDMS_avl_data_p avl_data); + + +/** + * @brief Internal function enlarging an AVL tree. + * + * @param avl A pointer to the AVL tree structure. + * + * @retval 0 if the operation was successfully completed. + * @retval -1 if an error occurred. + * + * @since December 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +int grow_avl(OBIDMS_avl_p avl); + + +/** + * @brief Internal function enlarging the data array referred to by an AVL tree. + * + * @param avl A pointer to the AVL tree structure. + * + * @retval 0 if the operation was successfully completed. + * @retval -1 if an error occurred. + * + * @since December 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +int grow_avl_data(OBIDMS_avl_p avl); + + +/** + * @brief Internal function storing a value (byte array) in the data array referred to by an AVL tree. + * + * @param avl A pointer to the AVL tree structure. + * @param value A pointer to the value (byte array). + * + * @returns The index of the stored value. + * @retval -1 if an error occurred. + * + * @since December 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +index_t avl_add_value_in_data_array(OBIDMS_avl_p avl, byte_t* value); + + +/** + * @brief Internal function comparing two byte arrays. + * + * The encoding is compared first, then the length of the + * values, then the values themselves. + * + * @param value_1 A pointer to the first byte array. + * @param value_2 A pointer to the second byte array. + * + * @returns A value < 0 if value_1 < value_2, + * a value > 0 if value_1 > value_2, + * and 0 if value_1 == value_2. + * + * @since October 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +int byte_array_compare(byte_t* value_1, byte_t* value_2); + + +/** + * @brief Internal function calculating the size in bytes of a byte array. + * + * @param value A pointer to the byte array. + * + * @returns The size of the byte array in bytes. + * + * @since October 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +size_t byte_array_sizeof(byte_t* value); + + +/** + * @brief Internal function initializing a node in an AVL tree. + * + * @param avl A pointer to the AVL tree structure. + * @param node_idx The index of the node to initialize in the mmapped AVL tree. + * + * @returns The node structure initialized. + * + * @since December 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +AVL_node_p avl_create_node(OBIDMS_avl_p avl, index_t node_idx); + + +/** + * @brief Internal function updating the balance factors in an AVL tree + * after adding a node, only in the subtree that will have to be balanced. + * That subtree is found using the avl->path_idx array and the directions taken + * down the tree to add the new node are stored in the path->dir array. + * + * @param avl A pointer to the AVL tree structure. + * + * @since December 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +void avl_update_balance_factors(OBIDMS_avl_p avl); + + +/** + * @brief Internal function rotating a node with a "left left rotation". + * + * @param avl A pointer to the AVL tree structure. + * @param node A pointer to the node that has to be rotated. + * @param node_idx The index of the node that has to be rotated. + * + * @returns The new root of the subtree. + * + * @since December 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +index_t avl_rotate_leftleft(OBIDMS_avl_p avl, AVL_node_p node, index_t node_idx); + +/** + * @brief Internal function rotating a node with a "left right rotation". + * + * @param avl A pointer to the AVL tree structure. + * @param node A pointer to the node that has to be rotated. + * @param node_idx The index of the node that has to be rotated. + * + * @returns The new root of the subtree. + * + * @since December 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +index_t avl_rotate_leftright(OBIDMS_avl_p avl, AVL_node_p node, index_t node_idx); + + +/** + * @brief Internal function rotating a node with a "right left rotation". + * + * @param avl A pointer to the AVL tree structure. + * @param node A pointer to the node that has to be rotated. + * @param node_idx The index of the node that has to be rotated. + * + * @returns The new root of the subtree. + * + * @since December 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +index_t avl_rotate_rightleft(OBIDMS_avl_p avl, AVL_node_p node, index_t node_idx); + + +/** + * @brief Internal function rotating a node with a "right right rotation". + * + * @param avl A pointer to the AVL tree structure. + * @param node A pointer to the node that has to be rotated. + * @param node_idx The index of the node that has to be rotated. + * + * @returns The new root of the subtree. + * + * @since December 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +index_t avl_rotate_rightright(OBIDMS_avl_p avl, AVL_node_p node, index_t node_idx); + + +/** + * @brief Internal function balancing one node. + * + * @param avl A pointer to the AVL tree structure. + * @param node A pointer to the node that has to be balanced. + * @param node_idx The index of the node that has to be balanced. + * + * @returns The new root of the subtree. + * + * @since December 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +index_t avl_balance_node(OBIDMS_avl_p avl, AVL_node_p node, index_t node_idx); + + +/** + * @brief Internal function balancing the nodes of an AVL tree after adding a node, + * only in the subtree that eventually has to be balanced. + * That subtree is found using the avl->path_idx array. + * + * @param avl A pointer to the AVL tree structure. + * + * @since December 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +void avl_balance(OBIDMS_avl_p avl); + + +/** + * @brief Internal function printing a depth first traverse of a node. + * + * @param avl A pointer to the AVL tree structure. + * @param node A pointer to the node. + * @param node_idx The index of the node. + * @param depth The depth of the node. + * + * @since December 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +void avl_print_node(OBIDMS_avl_p avl, AVL_node_p node, index_t node_idx, int depth); + + +/** + * @brief Internal function printing a depth first traverse of an AVL tree. + * + * @param avl A pointer to the AVL tree structure. + * + * @since December 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +void avl_print(OBIDMS_avl_p avl); + + +/************************************************************************ + * + * D E F I N I T I O N O F T H E P R I V A T E F U N C T I O N S + * + ************************************************************************/ + +static char* build_avl_file_name(const char* avl_name) +{ + char* file_name; + + // Build the file name + if (asprintf(&file_name,"%s.oda", avl_name) < 0) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError building an avl file name"); + return NULL; + } + + // Test if the avl name is not too long + if (strlen(file_name) >= AVL_MAX_NAME) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError due to avl name too long"); + free(file_name); + return NULL; + } + + return file_name; +} + + +static char* build_avl_data_file_name(const char* avl_name) +{ + char* file_name; + + // Build the file name + if (asprintf(&file_name,"%s.odd", avl_name) < 0) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError building an avl data file name"); + return NULL; + } + + return file_name; +} + + +size_t get_avl_header_size() +{ + size_t header_size; + size_t rounded_header_size; + double multiple; + + header_size = sizeof(OBIDMS_avl_header_t); + + multiple = ceil((double) header_size / (double) getpagesize()); + + rounded_header_size = multiple * getpagesize(); + + return rounded_header_size; +} + + +size_t get_initial_avl_size() +{ + return getpagesize() * 1; +} + + +size_t get_avl_data_header_size() +{ + size_t header_size; + size_t rounded_header_size; + double multiple; + + header_size = sizeof(OBIDMS_avl_data_header_t); + + multiple = ceil((double) header_size / (double) getpagesize()); + + rounded_header_size = multiple * getpagesize(); + + return rounded_header_size; +} + + +size_t get_initial_avl_data_size() +{ + return getpagesize() * 1; +} + + +int close_avl_data(OBIDMS_avl_data_p avl_data) +{ + int ret_val = 0; + + if (munmap(avl_data->data, (avl_data->header)->data_size_max) < 0) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError munmapping the data of an avl data file"); + ret_val = -1; + } + + if (munmap(avl_data->header, (avl_data->header)->header_size) < 0) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError munmapping the header of an avl data file"); + ret_val = -1; + } + + free(avl_data); + + return ret_val; +} + + +int grow_avl(OBIDMS_avl_p avl) // TODO Lock when needed +{ + size_t file_size; + size_t old_data_size; + size_t new_data_size; + size_t header_size; + int avl_file_descriptor; + char* avl_file_name; + + // Get the avl file name + avl_file_name = build_avl_file_name((avl->header)->avl_name); + if (avl_file_name == NULL) + return -1; + + // Open the avl file + avl_file_descriptor = openat(avl->dir_fd, avl_file_name, O_RDWR); + if (avl_file_descriptor < 0) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError opening an avl file"); + free(avl_file_name); + return -1; + } + free(avl_file_name); + + // Calculate the new file size + old_data_size = (avl->header)->avl_size; + new_data_size = old_data_size * AVL_GROWTH_FACTOR; + header_size = (avl->header)->header_size; + file_size = header_size + new_data_size; + + // Enlarge the file + if (ftruncate(avl_file_descriptor, file_size) < 0) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError enlarging an avl file"); + close(avl_file_descriptor); + return -1; + } + + // Unmap and re-map the data + + if (munmap(avl->tree, old_data_size) < 0) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError munmapping the tree of an avl file before enlarging"); + close(avl_file_descriptor); + return -1; + } + + avl->tree = mmap(NULL, + new_data_size, + PROT_READ | PROT_WRITE, + MAP_SHARED, + avl_file_descriptor, + header_size + ); + + if (avl->tree == MAP_FAILED) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError re-mmapping the tree of an avl file after enlarging the file"); + close(avl_file_descriptor); + return -1; + } + + // Set new maximum number of items + (avl->header)->nb_items_max = floor(((double) new_data_size) / ((double) sizeof(AVL_node_t))); + + // Set the new avl size + (avl->header)->avl_size = new_data_size; + + close(avl_file_descriptor); + + return 0; +} + + +int grow_avl_data(OBIDMS_avl_p avl) // TODO Lock when needed +{ + size_t file_size; + index_t old_data_size; + index_t new_data_size; + size_t header_size; + int avl_data_file_descriptor; + char* avl_data_file_name; + + // Get the avl data file name + avl_data_file_name = build_avl_data_file_name((avl->header)->avl_name); + if (avl_data_file_name == NULL) + return -1; + + // Open the avl data file + avl_data_file_descriptor = openat(avl->dir_fd, avl_data_file_name, O_RDWR); + if (avl_data_file_descriptor < 0) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError opening an avl data file"); + free(avl_data_file_name); + return -1; + } + free(avl_data_file_name); + + // Calculate the new file size + old_data_size = ((avl->data)->header)->data_size_max; + new_data_size = old_data_size * AVL_GROWTH_FACTOR; + header_size = ((avl->data)->header)->header_size; + file_size = header_size + new_data_size; + + // Enlarge the file + if (ftruncate(avl_data_file_descriptor, file_size) < 0) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError enlarging an avl data file"); + close(avl_data_file_descriptor); + return -1; + } + + // Unmap and re-map the data + + if (munmap((avl->data)->data, old_data_size) < 0) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError munmapping the data of an avl data file before enlarging"); + close(avl_data_file_descriptor); + return -1; + } + + (avl->data)->data = mmap(NULL, + new_data_size, + PROT_READ | PROT_WRITE, + MAP_SHARED, + avl_data_file_descriptor, + header_size + ); + + if ((avl->data)->data == MAP_FAILED) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError re-mmapping the data of an avl data file after enlarging the file"); + close(avl_data_file_descriptor); + return -1; + } + + // Set new data size + ((avl->data)->header)->data_size_max = new_data_size; + + // Initialize new data to 0 + memset(((avl->data)->data)+old_data_size, 0, new_data_size - old_data_size); + + close(avl_data_file_descriptor); + + return 0; +} + + +index_t avl_add_value_in_data_array(OBIDMS_avl_p avl, byte_t* value) +{ + index_t value_idx; + size_t value_size; + + value_idx = ((avl->data)->header)->data_size_used; + + // Grow the data if needed + value_size = byte_array_sizeof(value); + while (((avl->data)->header)->data_size_max < (value_idx + (int64_t) value_size)) + { + if (grow_avl_data(avl) < 0) + return -1; + } + + // Store the value itself at the end of the data + memcpy((((avl->data)->data)+value_idx), value, value_size); + + // Update the data size + ((avl->data)->header)->data_size_used = value_idx + value_size; + + // Update the number of items + (((avl->data)->header)->nb_items)++; + + return value_idx; +} + + +int byte_array_compare(byte_t* value_1, byte_t* value_2) +{ + int comp; + uint8_t size_1; + uint8_t size_2; + int32_t len_1; + int32_t len_2; + int32_t ini_len_1; + int32_t ini_len_2; + int32_t b; + + size_1 = (uint8_t) *(value_1); + size_2 = (uint8_t) *(value_2); + + if (size_1 != size_2) + return (size_1 - size_2); + + len_1 = *((int32_t*)(value_1+1)); + len_2 = *((int32_t*)(value_2+1)); + + if (len_1 != len_2) + return (len_1 - len_2); + + if (size_1 != 8) + { + ini_len_1 = *((int32_t*)(value_1+5)); + ini_len_2 = *((int32_t*)(value_2+5)); + + if (ini_len_1 != ini_len_2) + return (ini_len_1 - ini_len_2); + } + + b = BYTE_ARRAY_HEADER_SIZE; + comp = 0; + while (!comp && (b < len_1+BYTE_ARRAY_HEADER_SIZE)) + { + comp = *(value_1+b) - *(value_2+b); + b++; + } + return comp; +} + + +size_t byte_array_sizeof(byte_t* value) +{ + return (BYTE_ARRAY_HEADER_SIZE + *((int32_t*)(value+1))); +} + + +// Initialize a new node +AVL_node_p avl_create_node(OBIDMS_avl_p avl, index_t node_idx) +{ + AVL_node_p node; + + node = (avl->tree)+node_idx; + + node->left_child = -1; + node->right_child = -1; + node->balance_factor = 0; + node->value = -1; + + return node; +} + + +// Update the balance factors of the nodes from the node that will need balancing +void avl_update_balance_factors(OBIDMS_avl_p avl) +{ + uint8_t n; + AVL_node_p node; + + // Update balance factors from the node where balancing might be needed + node=(avl->tree)+((avl->path_idx)[1]); + + for (n=1; (avl->path_dir)[n] != -1; n++) + { + if ((avl->path_dir)[n]) // Went right + { + (node->balance_factor)--; + node=RIGHT_CHILD(node); + } + else // Went left + { + (node->balance_factor)++; + node=LEFT_CHILD(node); + } + } +} + + +// Left Left Rotate +index_t avl_rotate_leftleft(OBIDMS_avl_p avl, AVL_node_p node, index_t node_idx) +{ + AVL_node_p left_child = LEFT_CHILD(node); + index_t left_child_idx = node->left_child; + + node->left_child = left_child->right_child; + left_child->right_child = node_idx; + + node->balance_factor = 0; + left_child->balance_factor = 0; + + return left_child_idx; +} + + +// Left Right Rotate +index_t avl_rotate_leftright(OBIDMS_avl_p avl, AVL_node_p node, index_t node_idx) +{ + AVL_node_p left_child = LEFT_CHILD(node); + index_t left_child_idx = node->left_child; + AVL_node_p rc_of_lc = RIGHT_CHILD(left_child); + index_t rc_of_lc_idx = left_child->right_child; + + node->left_child = rc_of_lc->right_child; + left_child->right_child = rc_of_lc->left_child; + rc_of_lc->left_child = left_child_idx; + rc_of_lc->right_child = node_idx; + + if (rc_of_lc->balance_factor == -1) + { + left_child->balance_factor = 1; + node->balance_factor = 0; + } + else if (rc_of_lc->balance_factor == 0) + { + left_child->balance_factor = 0; + node->balance_factor = 0; + } + else // if (rc_of_lc->balance_factor == 1) + { + left_child->balance_factor = 0; + node->balance_factor = -1; + } + + rc_of_lc->balance_factor = 0; + + return rc_of_lc_idx; +} + + +// Right Left Rotate +index_t avl_rotate_rightleft(OBIDMS_avl_p avl, AVL_node_p node, index_t node_idx) +{ + AVL_node_p right_child = RIGHT_CHILD(node); + index_t right_child_idx = node->right_child;; + AVL_node_p lc_of_rc = LEFT_CHILD(right_child); + index_t lc_of_rc_idx = right_child->left_child; + + node->right_child = lc_of_rc->left_child; + right_child->left_child = lc_of_rc->right_child; + lc_of_rc->right_child = right_child_idx; + lc_of_rc->left_child = node_idx; + + if (lc_of_rc->balance_factor == 1) + { + right_child->balance_factor = 1; + node->balance_factor = 0; + } + else if (lc_of_rc->balance_factor == 0) + { + right_child->balance_factor = 0; + node->balance_factor = 0; + } + else // if (lc_of_rc->balance_factor == -1) + { + right_child->balance_factor = 0; + node->balance_factor = 1; + } + + lc_of_rc->balance_factor = 0; + + return lc_of_rc_idx; +} + + +// Right Right Rotate +index_t avl_rotate_rightright(OBIDMS_avl_p avl, AVL_node_p node, index_t node_idx) +{ + AVL_node_p right_child = RIGHT_CHILD(node); + index_t right_child_idx = node->right_child; + + node->right_child = right_child->left_child; + right_child->left_child = node_idx; + + node->balance_factor = 0; + right_child->balance_factor = 0; + + return right_child_idx; +} + + +// Balance a given node +index_t avl_balance_node(OBIDMS_avl_p avl, AVL_node_p node, index_t node_idx) +{ + index_t new_root = 0; + + if (node->balance_factor == 2) + { // Left Heavy + if ((LEFT_CHILD(node))->balance_factor == -1) + new_root = avl_rotate_leftright(avl, node, node_idx); + else + new_root = avl_rotate_leftleft(avl, node, node_idx); + } + else if (node->balance_factor == -2) + { // Right Heavy + if ((RIGHT_CHILD(node))->balance_factor == 1) + new_root = avl_rotate_rightleft(avl, node, node_idx); + else + new_root = avl_rotate_rightright(avl, node, node_idx); + } + else + // Node is balanced + new_root = node_idx; + + return new_root; +} + + +// Balance a given tree +void avl_balance(OBIDMS_avl_p avl) +{ + index_t new_root; + index_t node_index; + AVL_node_p node_to_balance; + AVL_node_p parent_of_node_to_balance; + + node_index = (avl->path_idx)[1]; + node_to_balance = (avl->tree)+node_index; + parent_of_node_to_balance = (avl->tree)+((avl->path_idx)[0]); + + // Balance the 2nd node stored in the path (the first is only kept to connect the new root + // of the subtree if needed). + new_root = avl_balance_node(avl, node_to_balance, node_index); + + if (new_root != node_index) + // If the root of the subtree has changed + { + // If the subtree's root is the tree's root, store the new root + if (node_index == (avl->header)->root_idx) + (avl->header)->root_idx = new_root; + // Else, connect the new subtree's root to the parent of the subtree + else if ((avl->path_dir)[0]) // Subtree is the right child of its parent + parent_of_node_to_balance->right_child = new_root; + else // Subtree is the left child of its parent + parent_of_node_to_balance->left_child = new_root; + } +} + + +// Print a depth first traverse of a node +void avl_print_node(OBIDMS_avl_p avl, AVL_node_p node, index_t node_idx, int depth) +{ + int i = 0; + + if (node->left_child != -1) + avl_print_node(avl, LEFT_CHILD(node), node->left_child, depth+2); + + for (i = 0; i < depth; i++) + putchar(' '); + + fprintf(stderr, "Node idx: %lld, Value idx: %lld, Left child: %lld, Right child: %lld, " + "Balance factor: %d\n", node_idx, node->value, node->left_child, node->right_child, node->balance_factor); + + if (node->right_child != -1) + avl_print_node(avl, RIGHT_CHILD(node), node->right_child, depth+2); +} + + +// Print a depth first traverse of a tree +void avl_print(OBIDMS_avl_p avl) +{ + fprintf(stderr, "\nRoot index: %lld\n", (avl->header)->root_idx); + avl_print_node(avl, (avl->tree)+((avl->header)->root_idx), (avl->header)->root_idx, 0); +} + + +/********************************************************************** + * + * D E F I N I T I O N O F T H E P U B L I C F U N C T I O N S + * + **********************************************************************/ + +int obi_avl_exists(OBIDMS_p dms, const char* avl_name) +{ + struct stat buffer; + char* avl_file_path; + char* avl_file_name; + int check_dir; + + // Build file name + avl_file_name = build_avl_file_name(avl_name); + if (avl_file_name == NULL) + return -1; + + // Build the avl file path + avl_file_path = get_full_path(dms->avl_dir_fd, avl_file_name); + if (avl_file_path == NULL) + { + obidebug(1, "\nError getting the file path for an avl file"); + return -1; + } + + check_dir = stat(avl_file_path, &buffer); + + free(avl_file_path); + free(avl_file_name); + + if (check_dir == 0) + return 1; + else + return 0; +} + + +OBIDMS_avl_p obi_avl(OBIDMS_p dms, const char* avl_name) +{ + int exists; + + exists = obi_avl_exists(dms, avl_name); + + switch (exists) + { + case 0: + return obi_create_avl(dms, avl_name); + case 1: + return obi_open_avl(dms, avl_name); + }; + + obidebug(1, "\nError checking if an avl already exists"); + return NULL; +} + + +OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name) +{ + char* avl_file_name; + char* avl_data_file_name; + size_t header_size; + size_t data_size; + size_t file_size; + int avl_file_descriptor; + int avl_data_file_descriptor; + int avl_dir_file_descriptor; + OBIDMS_avl_data_p avl_data; + OBIDMS_avl_p avl; + + // Create the data file + + // Build file name + avl_data_file_name = build_avl_data_file_name(avl_name); + if (avl_data_file_name == NULL) + return NULL; + + // Get the file descriptor of the avl directory + avl_dir_file_descriptor = dms->avl_dir_fd; + + // Create file + avl_data_file_descriptor = openat(avl_dir_file_descriptor, avl_data_file_name, O_RDWR | O_CREAT | O_EXCL, 0777); + if (avl_data_file_descriptor < 0) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError creating an avl data file"); + free(avl_data_file_name); + return NULL; + } + free(avl_data_file_name); + + // Calculate the size needed + header_size = get_avl_data_header_size(); + data_size = get_initial_avl_data_size(); + file_size = header_size + data_size; + + // Truncate the avl data file to the right size + if (ftruncate(avl_data_file_descriptor, file_size) < 0) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError truncating an avl data file to the right size"); + close(avl_data_file_descriptor); + return NULL; + } + + // Allocate the memory for the avl data structure + avl_data = (OBIDMS_avl_data_p) malloc(sizeof(OBIDMS_avl_data_t)); + if (avl_data == NULL) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError allocating the memory for the avl data structure"); + close(avl_data_file_descriptor); + return NULL; + } + + // Fill the avl data structure + avl_data->header = mmap(NULL, + header_size, + PROT_READ | PROT_WRITE, + MAP_SHARED, + avl_data_file_descriptor, + 0 + ); + if (avl_data->header == MAP_FAILED) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError mmapping the header of an avl data file"); + close(avl_data_file_descriptor); + free(avl_data); + return NULL; + } + + avl_data->data = mmap(NULL, + data_size, + PROT_READ | PROT_WRITE, + MAP_SHARED, + avl_data_file_descriptor, + header_size + ); + if (avl_data->data == MAP_FAILED) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError mmapping the data of an avl data file"); + munmap(avl_data->header, header_size); + close(avl_data_file_descriptor); + free(avl_data); + return NULL; + } + + (avl_data->header)->header_size = header_size; + (avl_data->header)->data_size_max = data_size; + (avl_data->header)->data_size_used = 0; + (avl_data->header)->nb_items = 0; + (avl_data->header)->creation_date = time(NULL); + strcpy((avl_data->header)->avl_name, avl_name); + + // Initialize all bits to 0 + memset(avl_data->data, 0, (avl_data->header)->data_size_max); + + close(avl_data_file_descriptor); + + + // Create the avl file + + // Build file name + avl_file_name = build_avl_file_name(avl_name); + if (avl_file_name == NULL) + { + close_avl_data(avl_data); + return NULL; + } + + // Calculate the size needed + header_size = get_avl_header_size(); + data_size = get_initial_avl_size(); + file_size = header_size + data_size; + + // Create file + avl_file_descriptor = openat(avl_dir_file_descriptor, avl_file_name, O_RDWR | O_CREAT | O_EXCL, 0777); + if (avl_file_descriptor < 0) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError creating an avl file"); + close_avl_data(avl_data); + free(avl_file_name); + return NULL; + } + free(avl_file_name); + + // Truncate the avl file to the right size + if (ftruncate(avl_file_descriptor, file_size) < 0) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError truncating an avl file to the right size"); + close_avl_data(avl_data); + close(avl_file_descriptor); + return NULL; + } + + // Allocate the memory for the avl structure + avl = (OBIDMS_avl_p) malloc(sizeof(OBIDMS_avl_t)); + if (avl == NULL) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError allocating the memory for the avl structure"); + close_avl_data(avl_data); + close(avl_file_descriptor); + return NULL; + } + + // Fill the avl structure + avl->header = mmap(NULL, + header_size, + PROT_READ | PROT_WRITE, + MAP_SHARED, + avl_file_descriptor, + 0 + ); + if (avl->header == MAP_FAILED) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError mmapping the header of an avl file"); + close_avl_data(avl_data); + close(avl_file_descriptor); + free(avl); + return NULL; + } + + avl->tree = mmap(NULL, + data_size, + PROT_READ | PROT_WRITE, + MAP_SHARED, + avl_file_descriptor, + header_size + ); + if (avl->tree == MAP_FAILED) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError mmapping the data of an avl file"); + close_avl_data(avl_data); + munmap(avl->header, header_size); + close(avl_file_descriptor); + free(avl); + return NULL; + } + + avl->dms = dms; + avl->data = avl_data; + avl->directory = dms->avl_directory; + avl->dir_fd = avl_dir_file_descriptor; + + (avl->header)->header_size = header_size; + (avl->header)->avl_size = data_size; + (avl->header)->nb_items = 0; + (avl->header)->nb_items_max = (index_t) floor(((double) get_initial_avl_size()) / ((double) sizeof(AVL_node_t))); + (avl->header)->root_idx = -1; + (avl->header)->creation_date = time(NULL); + strcpy((avl->header)->avl_name, avl_name); + + close(avl_file_descriptor); + + // Add in the list of opened avls + *(((dms->opened_avls)->avls)+((dms->opened_avls)->nb_opened_avls)) = avl; + ((dms->opened_avls)->nb_opened_avls)++; + avl->counter = 1; + + return avl; +} + + +OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name) +{ + char* avl_file_name; + char* avl_data_file_name; + size_t header_size; + int avl_file_descriptor; + int avl_data_file_descriptor; + int avl_dir_file_descriptor; + OBIDMS_avl_data_p avl_data; + OBIDMS_avl_p avl; + size_t i; + + // Check if the avl is already in the list of opened avls + for (i=0; i < ((dms->opened_avls)->nb_opened_avls); i++) + { + if (!strcmp(((*(((dms->opened_avls)->avls)+i))->header)->avl_name, avl_name)) + { // Found the avl already opened + ((*(((dms->opened_avls)->avls)+i))->counter)++; + return *(((dms->opened_avls)->avls)+i); + } + } + + // Open the data file + + // Get the file descriptor of the avl directory + avl_dir_file_descriptor = dms->avl_dir_fd; + + // Build file name + avl_data_file_name = build_avl_data_file_name(avl_name); + if (avl_data_file_name == NULL) + return NULL; + + // Open file + avl_data_file_descriptor = openat(avl_dir_file_descriptor, avl_data_file_name, O_RDWR, 0777); + if (avl_data_file_descriptor < 0) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError opening an avl data file"); + free(avl_data_file_name); + return NULL; + } + free(avl_data_file_name); + + // Allocate the memory for the avl data structure + avl_data = (OBIDMS_avl_data_p) malloc(sizeof(OBIDMS_avl_data_t)); + if (avl_data == NULL) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError allocating the memory for the avl data structure"); + close(avl_data_file_descriptor); + return NULL; + } + + // Read the header size + if (read(avl_data_file_descriptor, &header_size, sizeof(size_t)) < ((ssize_t) sizeof(size_t))) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError reading the header size to open a data avl"); + close(avl_data_file_descriptor); + return NULL; + } + + // Fill the avl data structure + avl_data->header = mmap(NULL, + header_size, + PROT_READ | PROT_WRITE, + MAP_SHARED, + avl_data_file_descriptor, + 0 + ); + if (avl_data->header == MAP_FAILED) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError mmapping the header of an avl data file"); + close(avl_data_file_descriptor); + free(avl_data); + return NULL; + } + + avl_data->data = mmap(NULL, + (avl_data->header)->data_size_max, + PROT_READ | PROT_WRITE, + MAP_SHARED, + avl_data_file_descriptor, + header_size + ); + if (avl_data->data == MAP_FAILED) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError mmapping the data of an avl data file"); + munmap(avl_data->header, header_size); + close(avl_data_file_descriptor); + free(avl_data); + return NULL; + } + + close(avl_data_file_descriptor); + + + // Open the avl file + + // Build file name + avl_file_name = build_avl_file_name(avl_name); + if (avl_file_name == NULL) + { + close_avl_data(avl_data); + return NULL; + } + + // Open file + avl_file_descriptor = openat(avl_dir_file_descriptor, avl_file_name, O_RDWR, 0777); + if (avl_file_descriptor < 0) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError opening an avl file"); + close_avl_data(avl_data); + free(avl_file_name); + return NULL; + } + free(avl_file_name); + + // Allocate the memory for the avl structure + avl = (OBIDMS_avl_p) malloc(sizeof(OBIDMS_avl_t)); + if (avl == NULL) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError allocating the memory for the avl structure"); + close_avl_data(avl_data); + close(avl_file_descriptor); + return NULL; + } + + // Read the header size + if (read(avl_file_descriptor, &header_size, sizeof(size_t)) < ((ssize_t) sizeof(size_t))) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError reading the header size to open an avl"); + close(avl_file_descriptor); + return NULL; + } + + // Fill the avl structure + avl->header = mmap(NULL, + header_size, + PROT_READ | PROT_WRITE, + MAP_SHARED, + avl_file_descriptor, + 0 + ); + if (avl->header == MAP_FAILED) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError mmapping the header of an avl file"); + close_avl_data(avl_data); + close(avl_file_descriptor); + free(avl); + return NULL; + } + + avl->tree = mmap(NULL, + (((avl->header)->nb_items_max) * sizeof(AVL_node_t)), + PROT_READ | PROT_WRITE, + MAP_SHARED, + avl_file_descriptor, + header_size + ); + if (avl->tree == MAP_FAILED) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError mmapping the data of an avl file"); + close_avl_data(avl_data); + munmap(avl->header, header_size); + close(avl_file_descriptor); + free(avl); + return NULL; + } + + avl->dms = dms; + avl->data = avl_data; + avl->directory = dms->avl_directory; + avl->dir_fd = avl_dir_file_descriptor; + + close(avl_file_descriptor); + + // Add in the list of opened avls + *(((dms->opened_avls)->avls)+((dms->opened_avls)->nb_opened_avls)) = avl; + ((dms->opened_avls)->nb_opened_avls)++; + avl->counter = 1; + + return avl; +} + + +int obi_close_avl(OBIDMS_avl_p avl) +{ + int ret_val = 0; + size_t i; + Opened_avls_list_p avls_list; + OBIDMS_p dms; + + dms = avl->dms; + + avls_list = dms->opened_avls; + + (avl->counter)--; + + if (avl->counter == 0) + { + // Delete from the list of opened avls + for (i=0; i < (avls_list->nb_opened_avls); i++) + { + if (!strcmp(((*((avls_list->avls)+i))->header)->avl_name, (avl->header)->avl_name)) + { // Found the avl. Rearrange list + (avls_list->nb_opened_avls)--; + (avls_list->avls)[i] = (avls_list->avls)[avls_list->nb_opened_avls]; + } + } + + ret_val = close_avl_data(avl->data); + + if (munmap(avl->tree, (((avl->header)->nb_items_max) * sizeof(AVL_node_t))) < 0) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError munmapping the avl of an avl file"); + ret_val = -1; + } + + if (munmap(avl->header, (avl->header)->header_size) < 0) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError munmapping the header of an avl file"); + ret_val = -1; + } + + free(avl); + } + + return ret_val; +} + + +byte_t* obi_avl_get(OBIDMS_avl_p avl, index_t idx) +{ + return (((avl->data)->data)+idx); +} + + +// Insert a new node +index_t obi_avl_add(OBIDMS_avl_p avl, byte_t* value) +{ + AVL_node_p node_to_add = NULL; + AVL_node_p current_node; + index_t next, parent; + index_t value_data_idx; + index_t node_idx; + byte_t* to_compare; + int comp; + uint8_t n = 0; + uint8_t depth = 0; + + // Check if first node + if (!((avl->header)->nb_items)) + { + node_to_add = avl_create_node(avl, 0); + + // Add the value in the data array and store its index + value_data_idx = avl_add_value_in_data_array(avl, value); + node_to_add->value = value_data_idx; + + // Update the number of items + ((avl->header)->nb_items)++; + + // Set the AVL tree root + (avl->header)->root_idx = 0; + + return 0; + } + + // Not first node + next = (avl->header)->root_idx; + parent = next; + comp = 0; + + while (next != -1) + { + current_node = (avl->tree)+next; + + // Store path from the lowest node with a balance factor different than 0, + // as it is the node that will have to be balanced. + if (current_node->balance_factor != 0) + // New lowest node with a balance factor different than 0 + n=0; + (avl->path_idx)[n] = parent; // Store parent + (avl->path_dir)[n] = comp < 0; // Store direction (0 if left, 1 if right) + n++; + + parent = next; + + // Compare value with value of current node + to_compare = obi_avl_get(avl, current_node->value); + comp = byte_array_compare(to_compare, value); + + if (comp > 0) + // Go to left child + next = current_node->left_child; + else if (comp < 0) + // Go to right child + next = current_node->right_child; + else if (comp == 0) + // Value already stored + return current_node->value; + + depth++; + } + + // Check if the AVL tree has not become too big + if (depth == AVL_MAX_DEPTH) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nThis AVL tree has reached the maximum height (50)."); + return -1; + } + + // Grow the AVL tree if needed + if ((avl->header)->nb_items == (avl->header)->nb_items_max) + { + if (grow_avl(avl) < 0) + return -1; + } + + // Initialize node at the end of the tree + node_idx = (avl->header)->nb_items; + node_to_add = avl_create_node(avl, node_idx); + + // Add the value in the data array and store its index + value_data_idx = avl_add_value_in_data_array(avl, value); + node_to_add->value = value_data_idx; + + // Update the number of items + ((avl->header)->nb_items)++; + + // Add either as right or left child + if (comp > 0) // Add as left child + ((avl->tree)+parent)->left_child = node_idx; + else // Add as right child + ((avl->tree)+parent)->right_child = node_idx; + + // End path + (avl->path_idx)[n] = parent; + (avl->path_dir)[n] = comp < 0; // 0 if went left, 1 if went right + n++; + (avl->path_idx)[n] = -1; // flag path end + (avl->path_dir)[n] = -1; + + // Update balance factors + avl_update_balance_factors(avl); + + // Balance tree + avl_balance(avl); + + // Print tree + //avl_print(avl); + + return value_data_idx; +} + + +// Find if a value is already in an AVL tree +index_t obi_avl_find(OBIDMS_avl_p avl, byte_t* value) +{ + int comp; + index_t next; + byte_t* to_compare; + AVL_node_p current_node; + + next = (avl->header)->root_idx; + while (next != -1) + { + current_node = (avl->tree)+next; + + // Compare value with value of current node + to_compare = obi_avl_get(avl, current_node->value); + comp = byte_array_compare(to_compare, value); + + if (comp > 0) + // Go to left child + next = current_node->left_child; + else if (comp < 0) + // Go to right child + next = current_node->right_child; + else if (comp == 0) + // Value found + return current_node->value; + } + // Value not found + return -1; +} + + +byte_t* obi_str_to_obibytes(char* value) +{ + byte_t* value_b; + int32_t length; + + // Compute the number of bytes on which the value will be encoded + length = strlen(value) + 1; // +1 to store \0 at the end (makes retrieving faster) + + // Allocate the memory for the encoded value + value_b = (byte_t*) malloc(BYTE_ARRAY_HEADER_SIZE + length); + if (value_b == NULL) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError allocating memory for a byte avl"); + return NULL; + } + + // Store the number of bits on which each element is encoded + *(value_b) = 8; + + // Store the length (in bytes) of the encoded value (same as decoded for character strings) + *((int32_t*)(value_b+1)) = length; + + // Store the initial length (in bytes) of the decoded value (same as encoded for character strings) + *((int32_t*)(value_b+5)) = length; + + // Store the character string + strcpy(value_b+BYTE_ARRAY_HEADER_SIZE, value); + + return value_b; +} + + +const char* obi_obibytes_to_str(byte_t* value_b) +{ + const char* value; + + value = value_b+BYTE_ARRAY_HEADER_SIZE; + + return value; +} + + +byte_t* obi_seq_to_obibytes(char* seq) +{ + byte_t* value_b; + int32_t length; // length of the value (without the header) in bytes + uint8_t size; // size of one element in bits + int32_t seq_length; + byte_t* encoded_seq; + + // Check if just ATGC and set size of a nucleotide accordingly (2 bits or 4 bits) + if (only_ATGC(seq)) + size = 2; + else + size = 4; + + // Compute the length (in bytes) of the encoded sequence + seq_length = strlen(seq); + if (size == 2) + length = ceil((double) seq_length / (double) 4.0); + else // size == 4 + length = ceil((double) seq_length / (double) 2.0); + + // Encode + if (size == 2) + encoded_seq = encode_seq_on_2_bits(seq, seq_length); + else // size == 4 + encoded_seq = encode_seq_on_4_bits(seq, seq_length); + if (encoded_seq == NULL) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError encoding a DNA sequence"); + return NULL; + } + + // Allocate the memory for the encoded value + value_b = (byte_t*) malloc(BYTE_ARRAY_HEADER_SIZE + length); + if (value_b == NULL) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError allocating memory for a byte avl"); + return NULL; + } + + // Store the number of bits on which each nucleotide is encoded + *(value_b) = size; + + // Store the length (in bytes) of the encoded sequence + *((int32_t*)(value_b+1)) = length; + + // Store the length (in bytes) of the initial sequence (necessary for decoding) + *((int32_t*)(value_b+5)) = seq_length; + + // Store the encoded sequence + memcpy(value_b+BYTE_ARRAY_HEADER_SIZE, encoded_seq, length); + + free(encoded_seq); + + return value_b; +} + + +const char* obi_obibytes_to_seq(byte_t* value_b) +{ + const char* value; + uint8_t size; // size of one element in bits + + // Check the encoding (each nucleotide on 2 bits or 4 bits) + size = *(value_b); + + // Decode + if (size == 2) + value = decode_seq_on_2_bits(value_b+BYTE_ARRAY_HEADER_SIZE, *((int32_t*)(value_b+5))); + else + value = decode_seq_on_4_bits(value_b+BYTE_ARRAY_HEADER_SIZE, *((int32_t*)(value_b+5))); + + if (value == NULL) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError decoding a DNA sequence"); + return NULL; + } + + return value; +} + diff --git a/src/obiavl.h b/src/obiavl.h new file mode 100644 index 0000000..4abfe22 --- /dev/null +++ b/src/obiavl.h @@ -0,0 +1,344 @@ +/**************************************************************************** + * OBIDMS AVL tree header file * + ****************************************************************************/ + +/** + * @file obiavl.h + * @author Celine Mercier + * @date December 3rd 2015 + * @brief Header file for handling AVL trees for storing and retrieving byte arrays (i.e. coding for character strings). + */ + + +#ifndef OBIAVL_H_ +#define OBIAVL_H_ + + +#include +#include +#include +#include +#include +#include +#include + +#include "obidms.h" +#include "obitypes.h" + + +#define AVL_MAX_NAME (1024) /**< The maximum length of an AVL tree name. + */ +#define AVL_GROWTH_FACTOR (2) /**< The growth factor when an AVL tree is enlarged. + */ +#define AVL_MAX_DEPTH (50) /**< The maximum depth of an AVL tree. + */ +#define LEFT_CHILD(node) (avl->tree)+(node->left_child) /**< Pointer to the left child of a node in an AVL tree. + */ +#define RIGHT_CHILD(node) (avl->tree)+(node->right_child) /**< Pointer to the right child of a node in an AVL tree. + */ +#define BYTE_ARRAY_HEADER_SIZE (9) /**< The size of the header of a byte array. + */ + + +/** + * @brief AVL tree node structure. + */ +typedef struct AVL_node { + index_t left_child; /**< Index of left less child node. + */ + index_t right_child; /**< Index of right greater child node. + */ + int8_t balance_factor; /**< Balance factor of the node. + */ + index_t value; /**< Index of the value associated with the node in the data array. + */ +} AVL_node_t, *AVL_node_p; + + +/** + * @brief OBIDMS AVL tree data header structure. + */ +typedef struct OBIDMS_avl_data_header { + int header_size; /**< Size of the header in bytes. + */ + index_t data_size_used; /**< Size of the data used in bytes. + */ + index_t data_size_max; /**< Max size of the data in bytes. + */ + index_t nb_items; /**< Number of items. + */ + char avl_name[AVL_MAX_NAME+1]; /**< The AVL tree name as a NULL terminated string. + */ + time_t creation_date; /**< Date of creation of the file. + */ +} OBIDMS_avl_data_header_t, *OBIDMS_avl_data_header_p; + + +/** + * @brief OBIDMS AVL tree data structure. + */ +typedef struct OBIDMS_avl_data { + OBIDMS_avl_data_header_p header; /**< A pointer to the header of the AVL tree data. + */ + byte_t* data; /**< A pointer to the beginning of the data. + */ +} OBIDMS_avl_data_t, *OBIDMS_avl_data_p; + + +/** + * @brief OBIDMS AVL tree header structure. + */ +typedef struct OBIDMS_avl_header { + int header_size; /**< Size of the header in bytes. + */ + size_t avl_size; /**< Size of the AVL tree in bytes. + */ + index_t nb_items; /**< Number of items in the AVL tree. + */ + index_t nb_items_max; /**< Maximum number of items in the AVL tree before it has to be enlarged. + */ + index_t root_idx; /**< Index of the root of the AVL tree. + */ + char avl_name[AVL_MAX_NAME+1]; /**< The AVL tree name as a NULL terminated string. + */ + time_t creation_date; /**< Date of creation of the file. + */ +} OBIDMS_avl_header_t, *OBIDMS_avl_header_p; + + +/** + * @brief OBIDMS AVL tree structure. + */ +typedef struct OBIDMS_avl { + OBIDMS_p dms; /**< A pointer to the OBIDMS structure to which the AVL tree belongs. + */ + OBIDMS_avl_header_p header; /**< A pointer to the header of the AVL tree. + */ + struct AVL_node* tree; /**< A pointer to the root of the AVL tree. + */ + index_t path_idx[AVL_MAX_DEPTH]; /**< The path taken to a node from the root as an array of node indices. + */ + int8_t path_dir[AVL_MAX_DEPTH]; /**< The path taken to a node from the root as an array of directions + * (0 for left, -1 for right). + */ + OBIDMS_avl_data_p data; /**< A pointer to the structure containing the data + * that the AVL tree references. + */ + DIR* directory; /**< A directory entry usable to + * refer and scan the AVL tree directory. + */ + int dir_fd; /**< The file descriptor of the directory entry + * usable to refer and scan the AVL tree directory. + */ + size_t counter; /**< Indicates by how many threads/programs (TODO) the AVL tree is used. + */ +} OBIDMS_avl_t, *OBIDMS_avl_p; + + +/** + * @brief Checks if an AVL tree already exists or not. + * + * @param dms The OBIDMS to which the AVL tree belongs. + * @param avl_name The name of the AVL tree. + * + * @returns A value indicating whether the AVL tree exists or not. + * @retval 1 if the AVL tree exists. + * @retval 0 if the AVL tree does not exist. + * @retval -1 if an error occurred. + * + * @since December 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +int obi_avl_exists(OBIDMS_p dms, const char* avl_name); + + +/** + * @brief Opens an AVL tree and creates it if it does not already exist. + * + * Note: An AVL tree is made of two files (referred to by two structures). + * One file contains the indices referring to the data, and the other + * file contains the data itself. The AVL tree as a whole is referred + * to via the OBIDMS_avl structure. + * + * @param dms The OBIDMS to which the AVL tree belongs. + * @param avl_name The name of the AVL tree. + * + * @returns A pointer to the AVL tree structure. + * @retval NULL if an error occurred. + * + * @since December 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +OBIDMS_avl_p obi_avl(OBIDMS_p dms, const char* avl_name); + + +/** + * @brief Creates an AVL tree. Fails if it already exists. + * + * Note: An AVL tree is made of two files (referred to by two structures). + * One file contains the indices referring to the data, and the other + * file contains the data itself. The AVL tree as a whole is referred + * to via the OBIDMS_avl structure. + * + * @param dms The OBIDMS to which the AVL tree belongs. + * @param avl_name The name of the AVL tree. + * + * @returns A pointer to the newly created AVL tree structure. + * @retval NULL if an error occurred. + * + * @since December 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name); + + +/** + * @brief Opens an AVL tree. Fails if it does not already exist. + * + * Note: An AVL tree is made of two files (referred to by two structures). + * One file contains the indices referring to the data, and the other + * file contains the data itself. The AVL tree as a whole is referred + * to via the OBIDMS_avl structure. + * + * @param dms The OBIDMS to which the AVL tree belongs. + * @param avl_name The name of the AVL tree. + * + * @returns A pointer to the AVL tree structure. + * @retval NULL if an error occurred. + * + * @since December 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name); + + +/** + * @brief Closes an AVL tree. + * + * Note: An AVL tree is made of two files (referred to by two structures). + * One file contains the indices referring to the data, and the other + * file contains the data itself. The AVL tree as a whole is referred + * to via the OBIDMS_avl structure. + * + * @param avl A pointer to the AVL tree structure to close and free. + * + * @retval 0 if the operation was successfully completed. + * @retval -1 if an error occurred. + * + * @since December 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +int obi_close_avl(OBIDMS_avl_p avl); + + +/** + * @brief Adds a value (byte array) in an AVL tree, checking if it is already in it. + * + * @warning The byte array to add must already be encoded and contain its header. + * + * @param avl A pointer to the AVL tree. + * @param value The byte array to add in the AVL tree. + * + * @returns The index of the value, whether it was added or already in the AVL tree. + * @retval -1 if an error occurred. + * + * @since December 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +index_t obi_avl_add(OBIDMS_avl_p avl, byte_t* value); + + +/** + * @brief Finds a value (byte array) in an AVL tree, checking first if it is already in it. + * + * @warning The byte array to add must already be encoded and contain its header. + * + * @param avl A pointer to the AVL tree. + * @param value The byte array to add in the AVL tree. + * + * @returns The data index of the value. + * @retval -1 if the value is not in the tree. + * + * @since December 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +index_t obi_avl_find(OBIDMS_avl_p avl, byte_t* value); + + +/** + * @brief Recovers a value (byte array) in an AVL tree. + * + * @warning The byte array recovered is encoded and contains its header. + * + * @param avl A pointer to the AVL tree. + * @param index The index of the value in the data array. + * + * @returns A pointer to the byte array recovered. + * + * @since December 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +byte_t* obi_avl_get(OBIDMS_avl_p avl, index_t index); + + +/** + * @brief Converts a character string to a byte array with a header. + * + * @warning The byte array must be freed by the caller. + * + * @param value The character string to convert. + * + * @returns A pointer to the byte array created. + * @retval NULL if an error occurred. + * + * @since October 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +byte_t* obi_str_to_obibytes(char* value); + + +/** + * @brief Converts a byte array to a character string. + * + * @param value_b The byte array to convert. + * + * @returns A pointer to the character string contained in the byte array. + * + * @since October 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +const char* obi_obibytes_to_str(byte_t* value_b); + + +/** + * @brief Converts a DNA sequence to a byte array with a header. + * + * @warning The byte array must be freed by the caller. + * + * @param value The DNA sequence to convert. + * + * @returns A pointer to the byte array created. + * @retval NULL if an error occurred. + * + * @since November 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +byte_t* obi_seq_to_obibytes(char* seq); + + +/** + * @brief Converts a byte array to a DNA sequence. + * + * @param value_b The byte array to convert. + * + * @returns A pointer to the DNA sequence contained in the byte array. + * @retval NULL if an error occurred. + * + * @since November 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +const char* obi_obibytes_to_seq(byte_t* value_b); + + +#endif /* OBIAVL_H_ */ + diff --git a/src/obidms.c b/src/obidms.c index 4688178..d2cb509 100644 --- a/src/obidms.c +++ b/src/obidms.c @@ -247,7 +247,7 @@ OBIDMS_p obi_create_dms(const char* dms_name) return NULL; } - // Get file descriptor of DMS directory to create the arrays directory + // Get file descriptor of DMS directory to create the AVL trees directory dms_dir = opendir(directory_name); if (dms_dir == NULL) { @@ -267,11 +267,11 @@ OBIDMS_p obi_create_dms(const char* dms_name) return NULL; } - // Create the arrays directory - if (mkdirat(dms_file_descriptor, ARRAYS_DIR_NAME, 00777) < 0) + // Create the AVL trees directory + if (mkdirat(dms_file_descriptor, AVL_TREES_DIR_NAME, 00777) < 0) { - obi_set_errno(OBI_ARRAY_ERROR); - obidebug(1, "\nProblem creating an arrays directory"); + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nProblem creating an AVL trees directory"); return NULL; } @@ -390,24 +390,24 @@ OBIDMS_p obi_open_dms(const char* dms_name) dms->little_endian = little_endian_dms; - // Open the arrays directory - dms->array_directory = private_opendirat(dms->dir_fd, ARRAYS_DIR_NAME); - if (dms->array_directory == NULL) + // Open the AVL trees directory + dms->avl_directory = private_opendirat(dms->dir_fd, AVL_TREES_DIR_NAME); + if (dms->avl_directory == NULL) { obi_set_errno(OBIDMS_UNKNOWN_ERROR); - obidebug(1, "\nError opening the arrays directory"); + obidebug(1, "\nError opening the AVL trees directory"); closedir(dms->directory); free(dms); return NULL; } - // Store the array directory's file descriptor - dms->array_dir_fd = dirfd(dms->array_directory); - if (dms->array_dir_fd < 0) + // Store the AVL trees directory's file descriptor + dms->avl_dir_fd = dirfd(dms->avl_directory); + if (dms->avl_dir_fd < 0) { obi_set_errno(OBIDMS_UNKNOWN_ERROR); - obidebug(1, "\nError getting the file descriptor of the arrays directory"); - closedir(dms->array_directory); + obidebug(1, "\nError getting the file descriptor of the AVL trees directory"); + closedir(dms->avl_directory); closedir(dms->directory); free(dms); return NULL; @@ -418,10 +418,10 @@ OBIDMS_p obi_open_dms(const char* dms_name) (dms->opened_columns)->columns = (OBIDMS_column_p*) malloc(MAX_NB_OPENED_COLUMNS*sizeof(OBIDMS_column_p)); (dms->opened_columns)->nb_opened_columns = 0; - // Initialize the list of opened arrays - dms->opened_arrays = (Opened_arrays_list_p) malloc(sizeof(Opened_arrays_list_t)); - (dms->opened_arrays)->arrays = (OBIDMS_array_p*) malloc(MAX_NB_OPENED_ARRAYS*sizeof(OBIDMS_array_p)); - (dms->opened_arrays)->nb_opened_arrays = 0; + // Initialize the list of opened AVL trees + dms->opened_avls = (Opened_avls_list_p) malloc(sizeof(Opened_avls_list_t)); + (dms->opened_avls)->avls = (OBIDMS_avl_p*) malloc(MAX_NB_OPENED_AVL_TREES*sizeof(OBIDMS_avl_p)); + (dms->opened_avls)->nb_opened_avls = 0; return dms; } @@ -454,7 +454,7 @@ int obi_close_dms(OBIDMS_p dms) while ((dms->opened_columns)->nb_opened_columns > 0) obi_close_column(*((dms->opened_columns)->columns)); - // Close dms and array directories + // Close dms and AVL trees directories if (closedir(dms->directory) < 0) { obi_set_errno(OBIDMS_MEMORY_ERROR); @@ -462,10 +462,10 @@ int obi_close_dms(OBIDMS_p dms) free(dms); return -1; } - if (closedir(dms->array_directory) < 0) + if (closedir(dms->avl_directory) < 0) { - obi_set_errno(OBI_ARRAY_ERROR); - obidebug(1, "\nError closing an array directory"); + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError closing an AVL trees directory"); free(dms); return -1; } diff --git a/src/obidms.h b/src/obidms.h index 96e980b..6f94b0e 100644 --- a/src/obidms.h +++ b/src/obidms.h @@ -25,14 +25,14 @@ #include "obierrno.h" -#define OBIDMS_MAX_NAME (2048) /**< The maximum length of an OBIDMS name. - */ -#define ARRAYS_DIR_NAME "arrays" /**< The name of the arrays directory. - */ -#define MAX_NB_OPENED_COLUMNS (100) /**< The maximum number of columns open at the same time. - */ -#define MAX_NB_OPENED_ARRAYS (100) /**< The maximum number of arrays open at the same time. - */ +#define OBIDMS_MAX_NAME (2048) /**< The maximum length of an OBIDMS name. + */ +#define AVL_TREES_DIR_NAME "AVL_trees" /**< The name of the AVL trees directory. + */ +#define MAX_NB_OPENED_COLUMNS (100) /**< The maximum number of columns open at the same time. + */ +#define MAX_NB_OPENED_AVL_TREES (100) /**< The maximum number of AVL trees open at the same time. + */ struct OBIDMS_column; // TODO @@ -43,12 +43,12 @@ typedef struct Opened_columns_list { } Opened_columns_list_t, *Opened_columns_list_p; -struct OBIDMS_array; // TODO +struct OBIDMS_avl; // TODO -typedef struct Opened_arrays_list { - size_t nb_opened_arrays; - struct OBIDMS_array** arrays; -} Opened_arrays_list_t, *Opened_arrays_list_p; +typedef struct Opened_avls_list { + size_t nb_opened_avls; + struct OBIDMS_avl** avls; +} Opened_avls_list_t, *Opened_avls_list_p; /** @@ -67,17 +67,17 @@ typedef struct OBIDMS { int dir_fd; /**< The file descriptor of the directory entry * usable to refer and scan the database directory. */ - DIR* array_directory; /**< A directory entry usable to - * refer and scan the array directory. + DIR* avl_directory; /**< A directory entry usable to + * refer and scan the AVL trees directory. */ - int array_dir_fd; /**< The file descriptor of the directory entry - * usable to refer and scan the array directory. + int avl_dir_fd; /**< The file descriptor of the directory entry + * usable to refer and scan the AVL trees directory. */ bool little_endian; /**< Endianness of the database. */ Opened_columns_list_p opened_columns; /**< List of opened columns. */ - Opened_arrays_list_p opened_arrays; /**< List of opened arrays. + Opened_avls_list_p opened_avls; /**< List of opened AVL trees. */ } OBIDMS_t, *OBIDMS_p; @@ -105,7 +105,7 @@ int obi_dms_exists(const char* dms_name); * if a directory with this name does not already exist * before creating the new database. * - * A directory to store obiarrays is also created. + * A directory to store AVL trees is also created. * * @param dms_name A pointer to a C string containing the name of the database. * The actual directory name used to store the DMS will be diff --git a/src/obidmscolumn.c b/src/obidmscolumn.c index 47b7b45..01898d4 100644 --- a/src/obidmscolumn.c +++ b/src/obidmscolumn.c @@ -29,7 +29,7 @@ #include "obierrno.h" #include "obidebug.h" #include "obilittlebigman.h" -#include "obiarray.h" +#include "obiavl.h" #define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?) @@ -514,14 +514,14 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms, index_t nb_lines, index_t nb_elements_per_line, const char* elements_names, - const char* array_name, + const char* avl_name, const char* comments, bool referring) { OBIDMS_column_p new_column; OBIDMS_column_directory_p column_directory; OBIDMS_column_header_p header; - OBIDMS_array_p array; + OBIDMS_avl_p avl; size_t file_size; obiversion_t version_number; char* column_file_name; @@ -552,9 +552,9 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms, obidebug(1, "\nCan't create column because of invalid data type"); return NULL; } - if (((data_type == OBI_STR) || (data_type == OBI_SEQ)) && (array_name == NULL)) + if (((data_type == OBI_STR) || (data_type == OBI_SEQ)) && (avl_name == NULL)) { - obidebug(1, "\nCan't create column because of empty array name"); + obidebug(1, "\nCan't create column because of empty avl name"); return NULL; } @@ -732,20 +732,20 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms, if (comments != NULL) strncpy(header->comments, comments, COMMENTS_MAX_LENGTH); - // If the data type is OBI_STR or OBI_SEQ, the associated obi_array is opened or created + // If the data type is OBI_STR or OBI_SEQ, the associated obi_avl is opened or created if ((returned_data_type == OBI_STR) || (returned_data_type == OBI_SEQ)) { - array = obi_array(dms, array_name); - if (array == NULL) + avl = obi_avl(dms, avl_name); + if (avl == NULL) { - obidebug(1, "\nError opening or creating the array associated with a column"); + obidebug(1, "\nError opening or creating the aVL tree associated with a column"); munmap(new_column->header, header_size); close(column_file_descriptor); free(new_column); return NULL; } - new_column->array = array; - strncpy(header->array_name, array_name, ARRAY_MAX_NAME); + new_column->avl = avl; + strncpy(header->avl_name, avl_name, AVL_MAX_NAME); } // Fill the data with NA values @@ -768,7 +768,7 @@ OBIDMS_column_p obi_open_column(OBIDMS_p dms, { OBIDMS_column_p column; OBIDMS_column_directory_p column_directory; - OBIDMS_array_p array; + OBIDMS_avl_p avl; char* column_file_name; int column_file_descriptor; size_t header_size; @@ -886,19 +886,19 @@ OBIDMS_column_p obi_open_column(OBIDMS_p dms, column->writable = false; - // If the data type is OBI_STR or OBI_SEQ, the associated obi_array is opened + // If the data type is OBI_STR or OBI_SEQ, the associated AVL tree is opened if (((column->header)->returned_data_type == OBI_STR) || ((column->header)->returned_data_type == OBI_SEQ)) { - array = obi_array(dms, (column->header)->array_name); - if (array == NULL) + avl = obi_avl(dms, (column->header)->avl_name); + if (avl == NULL) { - obidebug(1, "\nError opening the array associated with a column"); + obidebug(1, "\nError opening the AVL tree associated with a column"); munmap(column->header, header_size); close(column_file_descriptor); free(column); return NULL; } - column->array = array; + column->avl = avl; } if ((column->header)->referring) @@ -973,7 +973,7 @@ OBIDMS_column_p obi_clone_column(OBIDMS_p dms, nb_lines, nb_elements_per_line, (column_to_clone->header)->elements_names, - (column_to_clone->header)->array_name, + (column_to_clone->header)->avl_name, (column_to_clone->header)->comments, referring); @@ -1052,10 +1052,10 @@ int obi_close_column(OBIDMS_column_p column) if ((column->header)->referring) obi_close_column(column->referred_column); - // If the data type is OBI_STR or OBI_SEQ, the associated obi_array is closed + // If the data type is OBI_STR or OBI_SEQ, the associated AVL tree is closed if (((column->header)->returned_data_type == OBI_STR) || ((column->header)->returned_data_type == OBI_SEQ)) { - if (obi_close_array(column->array) < 0) + if (obi_close_avl(column->avl) < 0) return -1; } diff --git a/src/obidmscolumn.h b/src/obidmscolumn.h index 09ebd17..e295ccf 100644 --- a/src/obidmscolumn.h +++ b/src/obidmscolumn.h @@ -25,7 +25,7 @@ #include "obierrno.h" #include "obilittlebigman.h" #include "obidmscolumndir.h" -#include "obiarray.h" +#include "obiavl.h" #define ONE_IF_ZERO(x) (((x)==0)?1:(x)) /**< If x is equal to 0, x takes the value 1. @@ -86,7 +86,7 @@ typedef struct OBIDMS_column_header { */ char name[OBIDMS_COLUMN_MAX_NAME+1]; /**< The column name as a NULL terminated string. */ - char array_name[ARRAY_MAX_NAME+1]; /**< If there is one, the obi_array name as a NULL terminated string. + char avl_name[AVL_MAX_NAME+1]; /**< If there is one, the AVL tree name as a NULL terminated string. */ char comments[COMMENTS_MAX_LENGTH+1]; /**< Comments stored as a classical zero end C string. */ @@ -106,7 +106,7 @@ typedef struct OBIDMS_column { */ OBIDMS_column_header_p header; /**< A pointer to the header of the column. */ - OBIDMS_array_p array; /**< A pointer to the array associated with the column if there is one. + OBIDMS_avl_p avl; /**< A pointer to the AVL tree associated with the column if there is one. */ struct OBIDMS_column* referred_column; /**< A pointer to the referred column if the column is referring. */ @@ -174,7 +174,7 @@ size_t obi_get_platform_header_size(); * @brief Creates a column. * * The minimum data size allocated is one memory page, and the data is initialized to the NA value of the OBIType. - * If there is an array associated with the column, it is opened or created if it does not already exist. + * If there is an AVL tree associated with the column, it is opened or created if it does not already exist. * * @warning If there is one element per line, elements_names should be equal to column_name. // TODO change this condition? * @@ -182,9 +182,9 @@ size_t obi_get_platform_header_size(); * @param column_name The name of the new column. * @param data_type The OBIType code of the data. * @param nb_lines The number of lines to be stored. - * @param nb_elements_per_line The number of elements per line. + * @param nb_elements_per_line The number of elements per line. // TODO talk about default values * @param elements_names The names of the elements with ';' as separator. - * @param array_name The name of the array if there is one associated with the column. + * @param avl_name The name of the AVL tree if there is one associated with the column. * @param comments Optional comments associated with the column. * @param referring * @@ -200,7 +200,7 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms, index_t nb_lines, index_t nb_elements_per_line, const char* elements_names, - const char* array_name, + const char* avl_name, const char* comments, bool referring); diff --git a/src/obidmscolumn_seq.c b/src/obidmscolumn_seq.c index cf91590..beabf51 100644 --- a/src/obidmscolumn_seq.c +++ b/src/obidmscolumn_seq.c @@ -17,7 +17,7 @@ #include "obitypes.h" #include "obierrno.h" #include "obidebug.h" -#include "obiarray.h" +#include "obiavl.h" #define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?) @@ -67,8 +67,8 @@ int obi_column_set_obiseq_with_elt_idx(OBIDMS_column_p column, index_t line_nb, if (value_b == NULL) return -1; - // Add in the obiarray - idx = obi_array_add(column->array, value_b); + // Add in the AVL tree + idx = obi_avl_add(column->avl, value_b); if (idx == -1) return -1; @@ -106,7 +106,7 @@ const char* obi_column_get_obiseq_with_elt_idx(OBIDMS_column_p column, index_t l if (idx == OBIIdx_NA) return "\0"; // TODO - value_b = obi_array_get(column->array, idx); + value_b = obi_avl_get(column->avl, idx); return obi_obibytes_to_seq(value_b); } diff --git a/src/obidmscolumn_seq.h b/src/obidmscolumn_seq.h index d35ec8e..ca934a2 100644 --- a/src/obidmscolumn_seq.h +++ b/src/obidmscolumn_seq.h @@ -23,7 +23,7 @@ /** * @brief Sets a value in an OBIDMS column containing data in the form of indices referring - * to DNA sequences in an obiarray, using the index of the element in the line. + * to DNA sequences in an AVL tree, using the index of the element in the line. * * @warning Pointers returned by obi_open_column() don't allow writing. * @@ -44,7 +44,7 @@ int obi_column_set_obiseq_with_elt_idx(OBIDMS_column_p column, index_t line_nb, /** * @brief Recovers a value in an OBIDMS column containing data in the form of indices referring - * to DNA sequences in an obiarray, using the index of the element in the line. + * to DNA sequences in an AVL tree, using the index of the element in the line. * * @param column A pointer as returned by obi_create_column(). * @param line_nb The number of the line where the value should be recovered. @@ -61,7 +61,7 @@ const char* obi_column_get_obiseq_with_elt_idx(OBIDMS_column_p column, index_t l /** * @brief Sets a value in an OBIDMS column containing data in the form of indices referring - * to DNA sequences in an obiarray, using the name of the element in the line. + * to DNA sequences in an AVL tree, using the name of the element in the line. * * @warning Pointers returned by obi_open_column() don't allow writing. * @@ -82,7 +82,7 @@ int obi_column_set_obiseq_with_elt_name(OBIDMS_column_p column, index_t line_nb, /** * @brief Recovers a value in an OBIDMS column containing data in the form of indices referring - * to DNA sequences in an obiarray, using the name of the element in the line. + * to DNA sequences in an AVL tree, using the name of the element in the line. * * @param column A pointer as returned by obi_create_column() or obi_clone_column(). * @param line_nb The number of the line where the value should be recovered. diff --git a/src/obidmscolumn_str.c b/src/obidmscolumn_str.c index d654a8b..72e332c 100644 --- a/src/obidmscolumn_str.c +++ b/src/obidmscolumn_str.c @@ -17,7 +17,7 @@ #include "obitypes.h" #include "obierrno.h" #include "obidebug.h" -#include "obiarray.h" +#include "obiavl.h" #define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?) @@ -67,8 +67,8 @@ int obi_column_set_obistr_with_elt_idx(OBIDMS_column_p column, index_t line_nb, if (value_b == NULL) return -1; - // Add in the obiarray - idx = obi_array_add(column->array, value_b); + // Add in the AVL tree + idx = obi_avl_add(column->avl, value_b); if (idx == -1) return -1; @@ -106,7 +106,7 @@ const char* obi_column_get_obistr_with_elt_idx(OBIDMS_column_p column, index_t l if (idx == OBIIdx_NA) return "\0"; // TODO - value_b = obi_array_get(column->array, idx); + value_b = obi_avl_get(column->avl, idx); return obi_obibytes_to_str(value_b); } diff --git a/src/obidmscolumn_str.h b/src/obidmscolumn_str.h index b1c98de..8bf7fdd 100644 --- a/src/obidmscolumn_str.h +++ b/src/obidmscolumn_str.h @@ -23,7 +23,7 @@ /** * @brief Sets a value in an OBIDMS column containing data in the form of indices referring - * to character strings in an obiarray, using the index of the element in the line. + * to character strings in an AVL tree, using the index of the element in the line. * * @warning Pointers returned by obi_open_column() don't allow writing. * @@ -44,7 +44,7 @@ int obi_column_set_obistr_with_elt_idx(OBIDMS_column_p column, index_t line_nb, /** * @brief Recovers a value in an OBIDMS column containing data in the form of indices referring - * to character strings in an obiarray, using the index of the element in the line. + * to character strings in an AVL tree, using the index of the element in the line. * * @param column A pointer as returned by obi_create_column(). * @param line_nb The number of the line where the value should be recovered. @@ -61,7 +61,7 @@ const char* obi_column_get_obistr_with_elt_idx(OBIDMS_column_p column, index_t l /** * @brief Sets a value in an OBIDMS column containing data in the form of indices referring - * to character strings in an obiarray, using the name of the element in the line. + * to character strings in an AVL tree, using the name of the element in the line. * * @warning Pointers returned by obi_open_column() don't allow writing. * @@ -82,7 +82,7 @@ int obi_column_set_obistr_with_elt_name(OBIDMS_column_p column, index_t line_nb, /** * @brief Recovers a value in an OBIDMS column containing data in the form of indices referring - * to character strings in an obiarray, using the name of the element in the line. + * to character strings in an AVL tree, using the name of the element in the line. * * @param column A pointer as returned by obi_create_column() or obi_clone_column(). * @param line_nb The number of the line where the value should be recovered. diff --git a/src/obierrno.h b/src/obierrno.h index 5a0c478..5679f93 100644 --- a/src/obierrno.h +++ b/src/obierrno.h @@ -98,7 +98,7 @@ extern int obi_errno; */ #define OBICOL_ACCESS_ERROR (19) /**< Permission error trying to access an OBIDSM column directory */ -#define OBI_ARRAY_ERROR (20) /** Error while handling an array +#define OBI_AVL_ERROR (20) /** Error while handling an AVL tree */ /**@}*/ diff --git a/src/obitypes.h b/src/obitypes.h index 97c4b20..4f22cb9 100644 --- a/src/obitypes.h +++ b/src/obitypes.h @@ -56,6 +56,12 @@ typedef double obifloat_t; typedef char obichar_t; // TODO same for obistr_t and obiseq_t ? + +typedef char byte_t; /**< Defining byte type since the data referred to by AVL trees is stored in bits + * and char (stored on one byte) is the smallest addressable unit. + */ + + /** * @brief Union used to compute the NA value of the OBI_FLOAT OBIType. */