From 019dfc01b4c50e3810f27ec6e731cb007c3957f1 Mon Sep 17 00:00:00 2001 From: Celine Mercier Date: Fri, 8 Apr 2016 15:38:57 +0200 Subject: [PATCH] Branch to refactor and debug (AVLs bugged) --- python/obitools3/obidms/_obidms.cfiles | 4 +- .../obidms/_obidmscolumn_bool.cfiles | 4 +- .../obidms/_obidmscolumn_char.cfiles | 4 +- .../obidms/_obidmscolumn_float.cfiles | 4 +- .../obitools3/obidms/_obidmscolumn_int.cfiles | 4 +- .../obitools3/obidms/_obidmscolumn_seq.cfiles | 4 +- python/obitools3/obidms/_obidmscolumn_seq.pyx | 32 +- .../obitools3/obidms/_obidmscolumn_str.cfiles | 4 +- python/obitools3/obidms/_obidmscolumn_str.pyx | 31 +- python/obitools3/obidms/_obiseq.cfiles | 4 +- python/obitools3/obidms/_obiseq.pyx | 13 +- python/obitools3/obidms/_obitaxo.cfiles | 4 +- python/obitools3/obiimport.py | 48 +- requirements.txt | 2 +- src/bloom.c | 48 +- src/bloom.h | 15 +- src/encode.c | 133 ++ src/encode.h | 93 +- src/obiavl.c | 1421 +++++++++++------ src/obiavl.h | 324 ++-- src/obidms.c | 2 +- src/obidms.h | 2 +- src/obidms_taxonomy.c | 2 +- src/obidmscolumn.c | 48 +- src/obidmscolumn.h | 2 - src/obidmscolumn_seq.c | 15 +- src/obidmscolumn_str.c | 10 +- src/obidmscolumndir.c | 2 +- src/obierrno.h | 8 + src/obiview.c | 2 +- src/{private_at_functions.c => utils.c} | 45 +- src/{private_at_functions.h => utils.h} | 31 +- 32 files changed, 1553 insertions(+), 812 deletions(-) rename src/{private_at_functions.c => utils.c} (62%) rename src/{private_at_functions.h => utils.h} (73%) diff --git a/python/obitools3/obidms/_obidms.cfiles b/python/obitools3/obidms/_obidms.cfiles index aa6abc7..c9f9455 100644 --- a/python/obitools3/obidms/_obidms.cfiles +++ b/python/obitools3/obidms/_obidms.cfiles @@ -12,8 +12,6 @@ ../../../src/obilittlebigman.c ../../../src/obitypes.h ../../../src/obitypes.c -../../../src/private_at_functions.h -../../../src/private_at_functions.c ../../../src/obiavl.h ../../../src/obiavl.c ../../../src/encode.h @@ -28,3 +26,5 @@ ../../../src/murmurhash2.h ../../../src/crc64.c ../../../src/crc64.h +../../../src/utils.c +../../../src/utils.h diff --git a/python/obitools3/obidms/_obidmscolumn_bool.cfiles b/python/obitools3/obidms/_obidmscolumn_bool.cfiles index c0ce92d..cf17ca3 100644 --- a/python/obitools3/obidms/_obidmscolumn_bool.cfiles +++ b/python/obitools3/obidms/_obidmscolumn_bool.cfiles @@ -14,7 +14,7 @@ ../../../src/obilittlebigman.c ../../../src/obitypes.h ../../../src/obitypes.c -../../../src/private_at_functions.h -../../../src/private_at_functions.c +../../../src/utils.c +../../../src/utils.h ../../../src/obiavl.h ../../../src/obiavl.c diff --git a/python/obitools3/obidms/_obidmscolumn_char.cfiles b/python/obitools3/obidms/_obidmscolumn_char.cfiles index 381f1e0..f4c8b31 100644 --- a/python/obitools3/obidms/_obidmscolumn_char.cfiles +++ b/python/obitools3/obidms/_obidmscolumn_char.cfiles @@ -14,7 +14,7 @@ ../../../src/obilittlebigman.c ../../../src/obitypes.h ../../../src/obitypes.c -../../../src/private_at_functions.h -../../../src/private_at_functions.c +../../../src/utils.c +../../../src/utils.h ../../../src/obiavl.h ../../../src/obiavl.c diff --git a/python/obitools3/obidms/_obidmscolumn_float.cfiles b/python/obitools3/obidms/_obidmscolumn_float.cfiles index baf7e74..25ab2dc 100644 --- a/python/obitools3/obidms/_obidmscolumn_float.cfiles +++ b/python/obitools3/obidms/_obidmscolumn_float.cfiles @@ -14,7 +14,7 @@ ../../../src/obilittlebigman.c ../../../src/obitypes.h ../../../src/obitypes.c -../../../src/private_at_functions.h -../../../src/private_at_functions.c +../../../src/utils.c +../../../src/utils.h ../../../src/obiavl.h ../../../src/obiavl.c diff --git a/python/obitools3/obidms/_obidmscolumn_int.cfiles b/python/obitools3/obidms/_obidmscolumn_int.cfiles index 35fb144..0c4ce07 100644 --- a/python/obitools3/obidms/_obidmscolumn_int.cfiles +++ b/python/obitools3/obidms/_obidmscolumn_int.cfiles @@ -14,7 +14,7 @@ ../../../src/obilittlebigman.c ../../../src/obitypes.h ../../../src/obitypes.c -../../../src/private_at_functions.h -../../../src/private_at_functions.c +../../../src/utils.c +../../../src/utils.h ../../../src/obiavl.h ../../../src/obiavl.c diff --git a/python/obitools3/obidms/_obidmscolumn_seq.cfiles b/python/obitools3/obidms/_obidmscolumn_seq.cfiles index 379c552..a9fd569 100644 --- a/python/obitools3/obidms/_obidmscolumn_seq.cfiles +++ b/python/obitools3/obidms/_obidmscolumn_seq.cfiles @@ -14,7 +14,7 @@ ../../../src/obilittlebigman.c ../../../src/obitypes.h ../../../src/obitypes.c -../../../src/private_at_functions.h -../../../src/private_at_functions.c +../../../src/utils.c +../../../src/utils.h ../../../src/obiavl.h ../../../src/obiavl.c diff --git a/python/obitools3/obidms/_obidmscolumn_seq.pyx b/python/obitools3/obidms/_obidmscolumn_seq.pyx index 7648719..6d9a55e 100644 --- a/python/obitools3/obidms/_obidmscolumn_seq.pyx +++ b/python/obitools3/obidms/_obidmscolumn_seq.pyx @@ -10,19 +10,23 @@ from .capi.obitypes cimport OBISeq_NA, const_char_p from obitools3.utils cimport str2bytes, bytes2str +from libc.stdlib cimport free +from libc.string cimport strcmp + cdef class OBIDMS_column_seq(OBIDMS_column): cpdef object get_line(self, index_t line_nb): - cdef bytes value + cdef char* value cdef object result - value = obi_column_get_obiseq_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0) + value = obi_column_get_obiseq_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0) if obi_errno > 0 : raise IndexError(line_nb) - if value == OBISeq_NA : # TODO + if strcmp(value, OBISeq_NA) == 0 : result = None else : result = bytes2str(value) + free(value) return result cpdef set_line(self, index_t line_nb, object value): @@ -38,33 +42,35 @@ cdef class OBIDMS_column_seq(OBIDMS_column): cdef class OBIDMS_column_multi_elts_seq(OBIDMS_column_multi_elts): cpdef object get_item(self, index_t line_nb, str element_name): - cdef bytes value + cdef char* value cdef object result - value = obi_column_get_obiseq_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name)) + value = obi_column_get_obiseq_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name)) if obi_errno > 0 : raise IndexError(line_nb, element_name) - if value == OBISeq_NA : + if strcmp(value, OBISeq_NA) == 0 : result = None else : result = bytes2str(value) + free(value) return result cpdef object get_line(self, index_t line_nb) : - cdef bytes value - cdef object value_in_result - cdef dict result + cdef char* value + cdef object value_in_result + cdef dict result cdef index_t i - cdef bint all_NA + cdef bint all_NA result = {} all_NA = True for i in range(self.nb_elements_per_line) : - value = obi_column_get_obiseq_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, i) + value = obi_column_get_obiseq_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, i) if obi_errno > 0 : raise IndexError(line_nb) - if value == OBISeq_NA : + if strcmp(value, OBISeq_NA) == 0 : value_in_result = None else : - value_in_result = bytes2str(value) + value_in_result = bytes2str(value) + free(value) result[self.elements_names[i]] = value_in_result if all_NA and (value_in_result is not None) : all_NA = False diff --git a/python/obitools3/obidms/_obidmscolumn_str.cfiles b/python/obitools3/obidms/_obidmscolumn_str.cfiles index 006aa64..dd79069 100644 --- a/python/obitools3/obidms/_obidmscolumn_str.cfiles +++ b/python/obitools3/obidms/_obidmscolumn_str.cfiles @@ -14,7 +14,7 @@ ../../../src/obilittlebigman.c ../../../src/obitypes.h ../../../src/obitypes.c -../../../src/private_at_functions.h -../../../src/private_at_functions.c +../../../src/utils.c +../../../src/utils.h ../../../src/obiavl.h ../../../src/obiavl.c diff --git a/python/obitools3/obidms/_obidmscolumn_str.pyx b/python/obitools3/obidms/_obidmscolumn_str.pyx index 0041872..e1636f9 100644 --- a/python/obitools3/obidms/_obidmscolumn_str.pyx +++ b/python/obitools3/obidms/_obidmscolumn_str.pyx @@ -10,19 +10,22 @@ from .capi.obitypes cimport OBIStr_NA, const_char_p from obitools3.utils cimport str2bytes, bytes2str +from libc.string cimport strcmp + cdef class OBIDMS_column_str(OBIDMS_column): cpdef object get_line(self, index_t line_nb): - cdef bytes value + cdef char* value cdef object result - value = obi_column_get_obistr_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0) + value = obi_column_get_obistr_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0) if obi_errno > 0 : raise IndexError(line_nb) - if value == OBIStr_NA : # TODO + if strcmp(value, OBIStr_NA) == 0 : result = None else : result = bytes2str(value) + # NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file. (TODO discuss) return result cpdef set_line(self, index_t line_nb, object value): @@ -38,33 +41,35 @@ cdef class OBIDMS_column_str(OBIDMS_column): cdef class OBIDMS_column_multi_elts_str(OBIDMS_column_multi_elts): cpdef object get_item(self, index_t line_nb, str element_name): - cdef bytes value + cdef char* value cdef object result - value = obi_column_get_obistr_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name)) + value = obi_column_get_obistr_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name)) if obi_errno > 0 : raise IndexError(line_nb, element_name) - if value == OBIStr_NA : + if strcmp(value, OBIStr_NA) == 0 : result = None else : result = bytes2str(value) + # NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file. (TODO discuss) return result cpdef object get_line(self, index_t line_nb) : - cdef bytes value - cdef object value_in_result - cdef dict result + cdef char* value + cdef object value_in_result + cdef dict result cdef index_t i - cdef bint all_NA + cdef bint all_NA result = {} all_NA = True for i in range(self.nb_elements_per_line) : - value = obi_column_get_obistr_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, i) + value = obi_column_get_obistr_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, i) if obi_errno > 0 : raise IndexError(line_nb) - if value == OBIStr_NA : + if strcmp(value, OBIStr_NA) == 0 : value_in_result = None else : - value_in_result = bytes2str(value) + value_in_result = bytes2str(value) + # NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file. (TODO discuss) result[self.elements_names[i]] = value_in_result if all_NA and (value_in_result is not None) : all_NA = False diff --git a/python/obitools3/obidms/_obiseq.cfiles b/python/obitools3/obidms/_obiseq.cfiles index b5b7b4f..9758342 100644 --- a/python/obitools3/obidms/_obiseq.cfiles +++ b/python/obitools3/obidms/_obiseq.cfiles @@ -12,8 +12,8 @@ ../../../src/obilittlebigman.c ../../../src/obitypes.h ../../../src/obitypes.c -../../../src/private_at_functions.h -../../../src/private_at_functions.c +../../../src/utils.c +../../../src/utils.h ../../../src/obiavl.h ../../../src/obiavl.c ../../../src/encode.h diff --git a/python/obitools3/obidms/_obiseq.pyx b/python/obitools3/obidms/_obiseq.pyx index 4e10350..1f50c70 100644 --- a/python/obitools3/obidms/_obiseq.pyx +++ b/python/obitools3/obidms/_obiseq.pyx @@ -26,7 +26,7 @@ cdef class OBI_Seq(dict) : self[bytes2str(DESCRIPTION_COLUMN)] = description cpdef get_description(self) : - return self.description + return self.description # TODO no cpdef get_sequence(self) : return self.sequence @@ -48,28 +48,25 @@ cdef class OBI_Nuc_Seq(OBI_Seq) : cdef class OBI_Nuc_Seq_Stored(OBIView_line) : cpdef set_id(self, str id) : - self.id = id self[bytes2str(ID_COLUMN)] = id cpdef get_id(self) : - return self.id + return self[bytes2str(ID_COLUMN)] cpdef set_description(self, str description) : - self.description = description self[bytes2str(DESCRIPTION_COLUMN)] = description cpdef get_description(self) : - return self.description + return self[bytes2str(DESCRIPTION_COLUMN)] cpdef set_sequence(self, str sequence) : - self.sequence = sequence self[bytes2str(NUC_SEQUENCE_COLUMN)] = sequence cpdef get_sequence(self) : - return self.sequence + return self[bytes2str(NUC_SEQUENCE_COLUMN)] def __str__(self) : - return self.sequence # or not + return self[bytes2str(NUC_SEQUENCE_COLUMN)] # or not # cpdef str reverse_complement(self) : TODO in C ? # pass diff --git a/python/obitools3/obidms/_obitaxo.cfiles b/python/obitools3/obidms/_obitaxo.cfiles index 939bafa..5054bdb 100644 --- a/python/obitools3/obidms/_obitaxo.cfiles +++ b/python/obitools3/obidms/_obitaxo.cfiles @@ -12,8 +12,8 @@ ../../../src/obilittlebigman.c ../../../src/obitypes.h ../../../src/obitypes.c -../../../src/private_at_functions.h -../../../src/private_at_functions.c +../../../src/utils.c +../../../src/utils.h ../../../src/obiavl.h ../../../src/obiavl.c ../../../src/encode.h diff --git a/python/obitools3/obiimport.py b/python/obitools3/obiimport.py index 99580a1..f0d7ff5 100644 --- a/python/obitools3/obiimport.py +++ b/python/obitools3/obiimport.py @@ -5,7 +5,7 @@ import time from obitools3.obidms._obidms import OBIDMS -def bufferedRead(fileobj,size=100000000): +def bufferedRead(fileobj,size=209715200): ## 200 MB buffer = fileobj.readlines(size) while buffer: for l in buffer: @@ -26,14 +26,16 @@ if __name__ == '__main__': view = d.new_view('uniq view', view_type="NUC_SEQS_VIEW") -# for i in range(35000000) : -# if (not (i%500000)) : -# print(str(time.time())+'\t'+str(i)) -# id = "@HWI-D00405:142:C71BAANXX:4:1101:1234:2234_CONS_SUB_SUB_"+str(i) -# view[i].set_id(id) + for i in range(35000000) : + if (not (i%500000)) : + print(str(time.time())+'\t'+str(i)) + id = "@HWI-D00405:142:C71BAANXX:4:1101:1234:2234_CONS_SUB_SUB_"+str(i) + view[i].set_id(id) + if id != view[i]["ID"] : + print("nope", id, view[i]["ID"]) - input_file = open(args.input_file, 'r') - input_file_buffered = bufferedRead(input_file) +# input_file = open(args.input_file, 'r') +# input_file_buffered = bufferedRead(input_file) # # if args.input_file[-1:] == "a" : @@ -111,37 +113,37 @@ if __name__ == '__main__': # l = 0 # next = False # - l=0 - i=0 +# l=0 +# i=0 # while (True): # l+=1 # line = input_file.readline() # if line=="": # break - for line in input_file_buffered : +# for line in input_file_buffered : # # #if i > 1E7 : # # print('hmm?') # -# #if i == 10000000 : -# # break +# if i == 6000000 : +# break # - if l%4 == 0 : +# if l%4 == 0 : # - if (not (i%500000)) : - print(str(time.time())+'\t'+str(i)) +# if (not (i%500000)) : +# print(str(time.time())+'\t'+str(i)) # # # # #print("header", line) # # - id = line.split(" ", 1)[0][1:] - print(id) +# id = line.split(" ", 1)[0][1:] +# print(id) # # #rest = (line[:-1].split(" ", 1)[1]).split(";") - view[i].set_id(id) - #print(view[i]["ID"]) +# view[i].set_id(id) +# print(view[i]["ID"]) # - i+=1 +# i+=1 - l+=1 +# l+=1 # # # description = "" # # for j in range(len(rest)) : @@ -186,7 +188,7 @@ if __name__ == '__main__': # l+=1 # # - input_file.close() +# input_file.close() #print(view) print(view.__repr__()) diff --git a/requirements.txt b/requirements.txt index 6b08d55..ed97ae6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ --extra-index-url https://pypi.python.org/simple/ -Cython>=0.21 +Cython==0.23.5 Sphinx>=1.2.0 ipython>=3.0.0 breathe>=4.0.0 diff --git a/src/bloom.c b/src/bloom.c index 518c125..935e2d9 100755 --- a/src/bloom.c +++ b/src/bloom.c @@ -122,6 +122,40 @@ static void setup_buckets(struct bloom * bloom, unsigned int cache_size) } +// TODO +int bloom_filter_size(int entries, double error) +{ + int bytes; + double num; + double denom; + double bpe; + int bits; + unsigned bucket_bytes; + int not_even_by; + + num = log(error); + denom = 0.480453013918201; // ln(2)^2 + bpe = -(num / denom); + bits = (int)(((double)entries) * bpe); + + if (bits % 8) { + bytes = (bits / 8) + 1; + } + else { + bytes = bits / 8; + } + + bucket_bytes = BLOOM_BUCKET_SIZE_FALLBACK; + not_even_by = bytes % bucket_bytes; + if (not_even_by) { + // adjust bytes + bytes += (bucket_bytes - not_even_by); + } + + return bytes; +} + + int bloom_init_size(struct bloom * bloom, int entries, double error, unsigned int cache_size) { @@ -151,19 +185,21 @@ int bloom_init_size(struct bloom * bloom, int entries, double error, setup_buckets(bloom, cache_size); - bloom->bf = (unsigned char *)calloc(bloom->bytes, sizeof(unsigned char)); - if (bloom->bf == NULL) { - return 1; - } + // TODO comment + memset(bloom->bf, 0, bloom->bytes); + //bloom->bf = (unsigned char *)calloc(bloom->bytes, sizeof(unsigned char)); + //if (bloom->bf == NULL) { + // return 1; + //} bloom->ready = 1; return 0; } -int bloom_init(struct bloom * bloom, int entries, double error) +int bloom_init(struct bloom * bloom, int entries) //, double error) { - return bloom_init_size(bloom, entries, error, 0); + return bloom_init_size(bloom, entries, BLOOM_FILTER_ERROR_RATE, 0); } diff --git a/src/bloom.h b/src/bloom.h index 203584a..e225b70 100755 --- a/src/bloom.h +++ b/src/bloom.h @@ -9,6 +9,10 @@ #define _BLOOM_H +// TODO +#define BLOOM_FILTER_ERROR_RATE (0.001) + + /** *************************************************************************** * On Linux, the code attempts to compute a bucket size based on CPU cache * size info, if available. If that fails for any reason, this fallback size @@ -60,10 +64,17 @@ struct bloom unsigned bucket_bits_fast_mod_operand; double bpe; - unsigned char * bf; int ready; + + unsigned char bf[]; }; +typedef struct bloom bloom_t; + + +// TODO +int bloom_filter_size(int entries, double error); + /** *************************************************************************** * Initialize the bloom filter for use. @@ -91,7 +102,7 @@ struct bloom * 1 - on failure * */ -int bloom_init(struct bloom * bloom, int entries, double error); +int bloom_init(struct bloom * bloom, int entries); //, double error); /** *************************************************************************** diff --git a/src/encode.c b/src/encode.c index 2a1f669..ca5df44 100644 --- a/src/encode.c +++ b/src/encode.c @@ -64,6 +64,12 @@ byte_t* encode_seq_on_2_bits(char* seq, int32_t length) length_b = ceil((double) length / (double) 4.0); seq_b = (byte_t*) malloc(length_b * sizeof(byte_t)); + if (seq_b == NULL) + { + obi_set_errno(OBI_MALLOC_ERROR); // TODO + obidebug(1, "\nError allocating memory for an encoded DNA sequence"); + return NULL; + } // Initialize all the bits to 0 memset(seq_b, 0, length_b); @@ -93,6 +99,7 @@ byte_t* encode_seq_on_2_bits(char* seq, int32_t length) seq_b[i/4] |= NUC_T_2b; break; default: + obi_set_errno(OBI_ENCODE_ERROR); // TODO obidebug(1, "\nInvalid nucleotide base when encoding (not [atgcATGC])"); return NULL; } @@ -116,6 +123,12 @@ char* decode_seq_on_2_bits(byte_t* seq_b, int32_t length_seq) uint8_t nuc; seq = (char*) malloc((length_seq+1) * sizeof(char)); + if (seq == NULL) + { + obi_set_errno(OBI_MALLOC_ERROR); // TODO + obidebug(1, "\nError allocating memory for a decoded DNA sequence"); + return NULL; + } for (i=0; ielement_size = element_size; + + // Store the length (in bytes) of the encoded value + byte_array->length_encoded_value = length_encoded_value; + + // Store the initial length (in bytes) of the decoded value + byte_array->length_decoded_value = length_decoded_value; + + // Store the encoded value + memcpy(byte_array->value, encoded_value, length_encoded_value); + + return byte_array; +} + + +Obi_byte_array_p obi_str_to_obibytes(char* value) +{ + Obi_byte_array_p value_b; + int32_t length; + + // Compute the number of bytes on which the value will be encoded + length = strlen(value) + 1; // +1 to store \0 at the end (makes retrieving faster) + + value_b = obi_byte_array(value, ELEMENT_SIZE_STR, length, length); + if (value_b == NULL) + { + obidebug(1, "\nError encoding a character string in a byte array"); + return NULL; + } + + return value_b; +} + + +const char* obi_obibytes_to_str(Obi_byte_array_p value_b) +{ + fprintf(stderr, "\n%s", value_b->value); + return value_b->value; +} + + +Obi_byte_array_p obi_seq_to_obibytes(char* seq) +{ + Obi_byte_array_p value_b; + int32_t length_encoded_seq; // length of the encoded sequence in bytes + int32_t seq_length; + byte_t* encoded_seq; + + seq_length = strlen(seq); + + // Check if just ATGC and encode accordingly + if (only_ATGC(seq)) + { + // Compute the length (in bytes) of the encoded sequence + length_encoded_seq = ceil((double) seq_length / (double) 4.0); + // Encode + encoded_seq = encode_seq_on_2_bits(seq, seq_length); + if (encoded_seq == NULL) + return NULL; + value_b = obi_byte_array(encoded_seq, ELEMENT_SIZE_SEQ_2, length_encoded_seq, seq_length); + } + else + { + // Compute the length (in bytes) of the encoded sequence + length_encoded_seq = ceil((double) seq_length / (double) 2.0); + // Encode + encoded_seq = encode_seq_on_4_bits(seq, seq_length); + if (encoded_seq == NULL) + return NULL; + value_b = obi_byte_array(encoded_seq, ELEMENT_SIZE_SEQ_4, length_encoded_seq, seq_length); + } + + free(encoded_seq); + + return value_b; +} + + +const char* obi_obibytes_to_seq(Obi_byte_array_p value_b) +{ + // Decode + if (value_b->element_size == 2) + return decode_seq_on_2_bits(value_b->value, value_b->length_decoded_value); + else + return decode_seq_on_4_bits(value_b->value, value_b->length_decoded_value); +} + + +// TODO same for int + + ///////////////////// FOR DEBUGGING /////////////////////////// //NOTE: The first byte is printed the first (at the left-most). diff --git a/src/encode.h b/src/encode.h index 3a01802..c6a6a7a 100644 --- a/src/encode.h +++ b/src/encode.h @@ -10,6 +10,10 @@ */ +#ifndef ENCODE_H_ +#define ENCODE_H_ + + #include #include #include @@ -18,8 +22,31 @@ #include "obitypes.h" -#define NUC_MASK_2B 0x3 /**< Binary: 11 to use when decoding 2 bits sequences */ -#define NUC_MASK_4B 0xF /**< Binary: 1111 to use when decoding 4 bits sequences */ +#define NUC_MASK_2B 0x3 /**< Binary: 11 to use when decoding 2 bits sequences + */ +#define NUC_MASK_4B 0xF /**< Binary: 1111 to use when decoding 4 bits sequences + */ +#define ELEMENT_SIZE_STR (8) /**< The size of an element from a value of type character string. + */ +#define ELEMENT_SIZE_SEQ_2 (2) /**< The size of an element from a value of type DNA sequence encoded on 2 bits. + */ +#define ELEMENT_SIZE_SEQ_4 (4) /**< The size of an element from a value of type DNA sequence encoded on 4 bits. + */ + + +/** + * @brief Byte array structure. + */ +typedef struct Obi_byte_array { + uint8_t element_size; /**< Size in bits of one element from the value. + */ + int32_t length_encoded_value; /**< Length in bytes of the encoded value. + */ + int32_t length_decoded_value; /**< Length in bytes of the decoded value. + */ + byte_t value[]; /**< Encoded value. + */ +} Obi_byte_array_t, *Obi_byte_array_p; /** @@ -174,8 +201,70 @@ byte_t* encode_seq_on_4_bits(char* seq, int32_t length); char* decode_seq_on_4_bits(byte_t* seq_b, int32_t length_seq); +/** + * @brief Converts a character string to a byte array with a header. + * + * @warning The byte array must be freed by the caller. + * + * @param value The character string to convert. + * + * @returns A pointer to the byte array created. + * @retval NULL if an error occurred. + * + * @since October 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +Obi_byte_array_p obi_str_to_obibytes(char* value); + + +/** + * @brief Converts a byte array to a character string. + * + * @param value_b The byte array to convert. + * + * @returns A pointer to the character string contained in the byte array. + * + * @since October 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +const char* obi_obibytes_to_str(Obi_byte_array_p value_b); + + +/** + * @brief Converts a DNA sequence to a byte array with a header. + * + * @warning The byte array must be freed by the caller. + * + * @param value The DNA sequence to convert. + * + * @returns A pointer to the byte array created. + * @retval NULL if an error occurred. + * + * @since November 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +Obi_byte_array_p obi_seq_to_obibytes(char* seq); + + +/** + * @brief Converts a byte array to a DNA sequence. + * + * @param value_b The byte array to convert. + * + * @returns A pointer to the DNA sequence contained in the byte array. + * @retval NULL if an error occurred. + * + * @since November 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +const char* obi_obibytes_to_seq(Obi_byte_array_p value_b); // TODO move to encode source files + + ////////// FOR DEBUGGING /////////// // little endian void print_bits(void* ptr, int32_t length); + +#endif /* ENCODE_H_ */ + diff --git a/src/obiavl.c b/src/obiavl.c index e9dc24c..f1346fb 100644 --- a/src/obiavl.c +++ b/src/obiavl.c @@ -25,8 +25,8 @@ #include "obierrno.h" #include "obitypes.h" #include "obidebug.h" -#include "private_at_functions.h" #include "encode.h" +#include "utils.h" #define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?) @@ -39,6 +39,41 @@ * **************************************************************************/ + +/** + * @brief Internal function building the complete AVL name for an AVL with an associated index (for AVL groups). + * + * @warning The returned pointer has to be freed by the caller. + * + * @param avl_name The base name of the AVL tree. + * @param avl_idx The index associated with that AVL. + * + * @returns A pointer to the complete name of the AVL. + * @retval NULL if an error occurred. + * + * @since April 2016 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +static char* build_avl_name_with_idx(const char* avl_name, int avl_idx); + + +/** + * @brief Internal function building the full path of an AVL directory containing an AVL or an AVL group. + * + * @warning The returned pointer has to be freed by the caller. + * + * @param dms A pointer to the OBIDMS to which the AVL tree belongs. + * @param avl_name The name of the AVL tree or the base name of the AVL tree group. + * + * @returns A pointer to the full path of the AVL directory. + * @retval NULL if an error occurred. + * + * @since April 2016 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +char* get_full_path_of_avl_dir(OBIDMS_p dms, const char* avl_name); + + /** * @brief Internal function building the file name for an AVL tree file. * @@ -118,17 +153,31 @@ size_t get_initial_avl_data_size(); /** - * @brief Internal function closing an AVL data structure where the data referred to by an AVL tree is stored. + * @brief Internal function truncating an AVL tree file to the minimum size that is a multiple of the page size. * - * @param avl_data A pointer to the data structure referred to by an AVL tree. + * @param avl A pointer to the AVL tree structure. * * @retval 0 if the operation was successfully completed. * @retval -1 if an error occurred. * - * @since December 2015 + * @since April 2016 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ -int close_avl_data(OBIDMS_avl_data_p avl_data); +int truncate_avl_to_size_used(OBIDMS_avl_p avl); + + +/** + * @brief Internal function truncating an AVL tree data file to the minimum size that is a multiple of the page size. + * + * @param avl A pointer to the the data structure referred to by an AVL tree. + * + * @retval 0 if the operation was successfully completed. + * @retval -1 if an error occurred. + * + * @since April 2016 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +int truncate_avl_data_to_size_used(OBIDMS_avl_data_p avl_data); /** @@ -156,22 +205,80 @@ int grow_avl(OBIDMS_avl_p avl); * @since December 2015 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ -int grow_avl_data(OBIDMS_avl_p avl); +int grow_avl_data(OBIDMS_avl_data_p avl_data); /** - * @brief Internal function storing a value (byte array) in the data array referred to by an AVL tree. + * @brief Internal function closing an AVL data structure where the data referred to by an AVL tree is stored. * - * @param avl A pointer to the AVL tree structure. - * @param value A pointer to the value (byte array). + * @param avl_data A pointer to the data structure referred to by an AVL tree. * - * @returns The index of the stored value. + * @retval 0 if the operation was successfully completed. * @retval -1 if an error occurred. * * @since December 2015 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ -index_t avl_add_value_in_data_array(OBIDMS_avl_p avl, byte_t* value); +int close_avl_data(OBIDMS_avl_data_p avl_data); + + +/** + * @brief Internal function unmapping the tree and data parts of an AVL tree structure. + * + * @param avl A pointer to the AVL tree structure. + * + * @retval 0 if the operation was successfully completed. + * @retval -1 if an error occurred. + * + * @since April 2016 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +int unmap_an_avl(OBIDMS_avl_p avl); + + +/** + * @brief Internal function (re)mapping the tree and data parts of an AVL tree structure. + * + * @param avl A pointer to the AVL tree structure. + * + * @retval 0 if the operation was successfully completed. + * @retval -1 if an error occurred. + * + * @since April 2016 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +int remap_an_avl(OBIDMS_avl_p avl); + + +/** + * @brief Internal function (re)mapping the tree and data parts of an AVL tree structure. + * + * @param avl A pointer to the AVL tree group structure. + * + * @retval 0 if the operation was successfully completed. + * @retval -1 if an error occurred. + * + * @since April 2016 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +int add_new_avl_in_group(OBIDMS_avl_group_p avl_group); + + +/** + * @brief Internal function testing if a value might already be stored in an AVL tree. + * + * The function checks a bloom filter. No false negatives, possible false positives. + * + * @param avl A pointer to the AVL tree structure. + * @param value A pointer to the byte array structure. + * + * @retval 0 if the value is definitely not already stored in the AVL tree. + * @retval 1 if the value might already be stored in the AVL tree. + * + * @since April 2016 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +int maybe_in_avl(OBIDMS_avl_p avl, Obi_byte_array_p value); /** @@ -180,8 +287,8 @@ index_t avl_add_value_in_data_array(OBIDMS_avl_p avl, byte_t* value); * The encoding is compared first, then the length of the * values, then the values themselves. * - * @param value_1 A pointer to the first byte array. - * @param value_2 A pointer to the second byte array. + * @param value_1 A pointer to the first byte array structure. + * @param value_2 A pointer to the second byte array structure. * * @returns A value < 0 if value_1 < value_2, * a value > 0 if value_1 > value_2, @@ -190,20 +297,35 @@ index_t avl_add_value_in_data_array(OBIDMS_avl_p avl, byte_t* value); * @since October 2015 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ -int byte_array_compare(byte_t* value_1, byte_t* value_2); +int byte_array_compare(Obi_byte_array_p value_1, Obi_byte_array_p value_2); /** * @brief Internal function calculating the size in bytes of a byte array. * - * @param value A pointer to the byte array. + * @param value A pointer to the byte array structure. * * @returns The size of the byte array in bytes. * * @since October 2015 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ -size_t byte_array_sizeof(byte_t* value); +int32_t byte_array_sizeof(Obi_byte_array_p value); + + +/** + * @brief Internal function storing a value (byte array) in the data array referred to by an AVL tree. + * + * @param avl A pointer to the AVL tree structure. + * @param value A pointer to the value (byte array structure). + * + * @returns The index of the stored value. + * @retval -1 if an error occurred. + * + * @since December 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +index_t avl_add_value_in_data_array(OBIDMS_avl_p avl, Obi_byte_array_p value); /** @@ -352,24 +474,78 @@ void avl_print(OBIDMS_avl_p avl); * ************************************************************************/ +static char* build_avl_name_with_idx(const char* avl_name, int avl_idx) +{ + char* avl_name_with_idx; + int avl_idx_length; + + if (avl_idx < 0) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError building an AVL tree name with index: index < 0"); + return NULL; + } + + avl_idx_length = avl_idx == 0 ? 1 : (int)(log10(avl_idx)+1); + avl_name_with_idx = malloc((strlen(avl_name) + avl_idx_length + 2)*sizeof(char)); + if (avl_name_with_idx == NULL) + { + obi_set_errno(OBI_MALLOC_ERROR); + obidebug(1, "\nError allocating memory for an AVL name"); + return NULL; + } + if (sprintf(avl_name_with_idx, "%s_%u", avl_name, avl_idx) < 0) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError building an AVL tree name with index"); + return NULL; + } + + return avl_name_with_idx; +} + + +char* get_full_path_of_avl_dir(OBIDMS_p dms, const char* avl_name) +{ + char* avl_dir_name; + + avl_dir_name = get_full_path(dms, AVL_TREES_DIR_NAME); + if (avl_dir_name == NULL) + { + obidebug(1, "\nError getting path for the DMS AVL directory"); + return NULL; + } + strcat(avl_dir_name, "/"); + strcat(avl_dir_name, avl_name); + + return avl_dir_name; +} + + static char* build_avl_file_name(const char* avl_name) { char* file_name; + // Test if the AVL name is not too long + if (strlen(avl_name) >= AVL_MAX_NAME) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError due to AVL tree name too long"); + return NULL; + } + // Build the file name file_name = (char*) malloc((strlen(avl_name) + 5)*sizeof(char)); + if (file_name == NULL) + { + obi_set_errno(OBI_MALLOC_ERROR); + obidebug(1, "\nError allocating memory for an AVL file name"); + return NULL; + } if (sprintf(file_name,"%s.oda", avl_name) < 0) { obi_set_errno(OBI_AVL_ERROR); obidebug(1, "\nError building an AVL tree file name"); - return NULL; - } - - // Test if the avl name is not too long - if (strlen(file_name) >= AVL_MAX_NAME) - { - obi_set_errno(OBI_AVL_ERROR); - obidebug(1, "\nError due to AVL tree name too long"); free(file_name); return NULL; } @@ -384,6 +560,12 @@ static char* build_avl_data_file_name(const char* avl_name) // Build the file name file_name = (char*) malloc((strlen(avl_name) + 5)*sizeof(char)); + if (file_name == NULL) + { + obi_set_errno(OBI_MALLOC_ERROR); + obidebug(1, "\nError allocating memory for an AVL data file name"); + return NULL; + } if (sprintf(file_name,"%s.odd", avl_name) < 0) { obi_set_errno(OBI_AVL_ERROR); @@ -401,7 +583,7 @@ size_t get_avl_header_size() size_t rounded_header_size; double multiple; - header_size = sizeof(OBIDMS_avl_header_t); + header_size = sizeof(OBIDMS_avl_header_t) + bloom_filter_size(MAX_NODE_COUNT_PER_AVL, BLOOM_FILTER_ERROR_RATE); multiple = ceil((double) header_size / (double) getpagesize()); @@ -451,27 +633,120 @@ size_t get_initial_avl_data_size() } -int close_avl_data(OBIDMS_avl_data_p avl_data) +int truncate_avl_to_size_used(OBIDMS_avl_p avl) // TODO is it necessary to unmap/remap? { - int ret_val = 0; + size_t file_size; + size_t new_data_size; + double multiple; + int file_descriptor; + // Compute the new size: used size rounded to the nearest greater multiple of page size greater than 0 + multiple = ceil((double) (ONE_IF_ZERO((avl->header)->nb_items * sizeof(AVL_node_t))) / (double) getpagesize()); + new_data_size = ((int) multiple) * getpagesize(); + + // Check that it is actually greater than the current size of the file, otherwise no need to truncate + if ((avl->header)->avl_size == new_data_size) + return 0; + + // Get the file descriptor + file_descriptor = avl->avl_fd; + + // Unmap the tree before truncating the file + if (munmap(avl->tree, (avl->header)->avl_size) < 0) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError munmapping the tree of an AVL before truncating"); + return -1; + } + + // Truncate the file + file_size = (avl->header)->header_size + new_data_size; + if (ftruncate(file_descriptor, file_size) < 0) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError truncating an AVL file"); + return -1; + } + + // Remap the data + avl->tree = mmap(NULL, + new_data_size, + PROT_READ | PROT_WRITE, + MAP_SHARED, + file_descriptor, + (avl->header)->header_size + ); + + if (avl->tree == MAP_FAILED) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError re-mmapping the tree of an AVL after truncating"); + return -1; + } + + // Set new data size and new max node count + (avl->header)->avl_size = new_data_size; + (avl->header)->nb_items_max = floor(new_data_size / sizeof(AVL_node_t)); + + return 0; +} + + +int truncate_avl_data_to_size_used(OBIDMS_avl_data_p avl_data) // TODO is it necessary to unmap/remap? +{ + size_t file_size; + index_t new_data_size; + double multiple; + int file_descriptor; + + // Compute the new size: used size rounded to the nearest greater multiple of page size greater than 0 + multiple = ceil((double) (ONE_IF_ZERO((avl_data->header)->data_size_used)) / (double) getpagesize()); + new_data_size = ((int) multiple) * getpagesize(); + + // Check that it is actually greater than the current size of the file, otherwise no need to truncate + if ((avl_data->header)->data_size_max == new_data_size) + return 0; + + // Get the file descriptor + file_descriptor = avl_data->data_fd; + + // Unmap the data before truncating the file if (munmap(avl_data->data, (avl_data->header)->data_size_max) < 0) { obi_set_errno(OBI_AVL_ERROR); - obidebug(1, "\nError munmapping the data of an AVL tree data file"); - ret_val = -1; + obidebug(1, "\nError munmapping the data of an AVL before truncating"); + return -1; } - if (munmap(avl_data->header, (avl_data->header)->header_size) < 0) + // Truncate the file + file_size = (avl_data->header)->header_size + new_data_size; + if (ftruncate(file_descriptor, file_size) < 0) { obi_set_errno(OBI_AVL_ERROR); - obidebug(1, "\nError munmapping the header of an AVL tree data file"); - ret_val = -1; + obidebug(1, "\nError truncating an AVL data file"); + return -1; } - free(avl_data); + // Remap the data + avl_data->data = mmap(NULL, + new_data_size, + PROT_READ | PROT_WRITE, + MAP_SHARED, + file_descriptor, + (avl_data->header)->header_size + ); - return ret_val; + if (avl_data->data == MAP_FAILED) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError re-mmapping the data of an AVL after truncating"); + return -1; + } + + // Set new data size + (avl_data->header)->data_size_max = new_data_size; + + return 0; } @@ -532,13 +807,11 @@ int grow_avl(OBIDMS_avl_p avl) // TODO Lock when needed // Set the new avl size (avl->header)->avl_size = new_data_size; - //close(avl_file_descriptor); - return 0; } -int grow_avl_data(OBIDMS_avl_p avl) // TODO Lock when needed +int grow_avl_data(OBIDMS_avl_data_p avl_data) // TODO Lock when needed { size_t file_size; index_t old_data_size; @@ -546,12 +819,12 @@ int grow_avl_data(OBIDMS_avl_p avl) // TODO Lock when needed size_t header_size; int avl_data_file_descriptor; - avl_data_file_descriptor = avl->data_fd; + avl_data_file_descriptor = avl_data->data_fd; // Calculate the new file size - old_data_size = ((avl->data)->header)->data_size_max; + old_data_size = (avl_data->header)->data_size_max; new_data_size = old_data_size * AVL_GROWTH_FACTOR; - header_size = ((avl->data)->header)->header_size; + header_size = (avl_data->header)->header_size; file_size = header_size + new_data_size; // Enlarge the file @@ -565,7 +838,7 @@ int grow_avl_data(OBIDMS_avl_p avl) // TODO Lock when needed // Unmap and re-map the data - if (munmap((avl->data)->data, old_data_size) < 0) + if (munmap(avl_data->data, old_data_size) < 0) { obi_set_errno(OBI_AVL_ERROR); obidebug(1, "\nError munmapping the data of an AVL tree data file before enlarging"); @@ -573,15 +846,15 @@ int grow_avl_data(OBIDMS_avl_p avl) // TODO Lock when needed return -1; } - (avl->data)->data = mmap(NULL, - new_data_size, - PROT_READ | PROT_WRITE, - MAP_SHARED, - avl_data_file_descriptor, - header_size - ); + avl_data->data = mmap(NULL, + new_data_size, + PROT_READ | PROT_WRITE, + MAP_SHARED, + avl_data_file_descriptor, + header_size + ); - if ((avl->data)->data == MAP_FAILED) + if (avl_data->data == MAP_FAILED) { obi_set_errno(OBI_AVL_ERROR); obidebug(1, "\nError re-mmapping the data of an AVL tree data file after enlarging the file"); @@ -590,31 +863,175 @@ int grow_avl_data(OBIDMS_avl_p avl) // TODO Lock when needed } // Set new data size - ((avl->data)->header)->data_size_max = new_data_size; - - //fprintf(stderr, "\nGrowing AVL, new data size = %lld, count = %ld\n", new_data_size, (avl->header)->nb_items); + (avl_data->header)->data_size_max = new_data_size; // Initialize new data to 0 - memset(((avl->data)->data)+old_data_size, 0, new_data_size - old_data_size); - - //close(avl_data_file_descriptor); + memset((avl_data->data)+old_data_size, 0, new_data_size - old_data_size); return 0; } -index_t avl_add_value_in_data_array(OBIDMS_avl_p avl, byte_t* value) +int close_avl_data(OBIDMS_avl_data_p avl_data) +{ + int ret_val = 0; + + ret_val = truncate_avl_data_to_size_used(avl_data); + + if (munmap(avl_data->data, (avl_data->header)->data_size_max) < 0) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError munmapping the data of an AVL tree data file"); + ret_val = -1; + } + + if (munmap(avl_data->header, (avl_data->header)->header_size) < 0) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError munmapping the header of an AVL tree data file"); + ret_val = -1; + } + + close(avl_data->data_fd); + + free(avl_data); + + return ret_val; +} + + +int unmap_an_avl(OBIDMS_avl_p avl) +{ + if (munmap((avl->data)->data, ((avl->data)->header)->data_size_max) < 0) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError unmapping the data of an AVL tree"); + return -1; + } + if (munmap(avl->tree, (((avl->header)->nb_items_max) * sizeof(AVL_node_t))) < 0) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError unmapping the tree of an AVL tree"); + return -1; + } + return 0; +} + + +int remap_an_avl(OBIDMS_avl_p avl) +{ + (avl->data)->data = mmap(NULL, + ((avl->data)->header)->data_size_max, + PROT_READ, + MAP_SHARED, + (avl->data)->data_fd, + ((avl->data)->header)->header_size); + if ((avl->data)->data == NULL) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError mapping the data of an AVL tree"); + return -1; + } + + avl->tree = mmap(NULL, + ((avl->header)->nb_items_max) * sizeof(AVL_node_t), + PROT_READ, + MAP_SHARED, + avl->avl_fd, + (avl->header)->header_size); + if (avl->tree == NULL) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError mapping the tree of an AVL tree"); + return -1; + } + + return 0; +} + + +int add_new_avl_in_group(OBIDMS_avl_group_p avl_group) +{ + // Check that maximum number of AVLs in a group was not reached + if (avl_group->current_avl_idx == (MAX_NB_OF_AVLS_IN_GROUP-1)) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError: Trying to add new AVL in AVL group but maximum number of AVLs in a group reached"); + return -1; + } + + // Unmap the previous AVL + if (unmap_an_avl((avl_group->sub_avls)[avl_group->current_avl_idx]) < 0) + return -1; + + // Increment current AVL index + (avl_group->current_avl_idx)++; + + // Create the new AVL + (avl_group->sub_avls)[avl_group->current_avl_idx] = obi_create_avl(avl_group->dms, avl_group->avl_name, avl_group->current_avl_idx); + if ((avl_group->sub_avls)[avl_group->current_avl_idx] == NULL) + { + obidebug(1, "\nError creating a new AVL tree in a group"); + return -1; + } + + return 0; +} + + +int maybe_in_avl(OBIDMS_avl_p avl, Obi_byte_array_p value) +{ + return (bloom_check(&((avl->header)->bloom_filter), value, byte_array_sizeof(value))); +} + + +int byte_array_compare(Obi_byte_array_p value_1, Obi_byte_array_p value_2) +{ + int comp; + int32_t b; + + if (value_1->element_size != value_2->element_size) + return (value_1->element_size - value_2->element_size); + + if (value_1->length_encoded_value != value_2->length_encoded_value) + return (value_1->length_encoded_value - value_2->length_encoded_value); + + if (value_1->element_size != ELEMENT_SIZE_STR) // because if so, length_decoded_value == length_encoded_value + { + if (value_1->length_decoded_value != value_2->length_decoded_value) + return (value_1->length_decoded_value - value_2->length_decoded_value); + } + + b = 0; + comp = 0; + while (!comp && (b < value_1->length_encoded_value)) + { + //fprintf(stderr, "\ncomparing %c and %c", *((value_1->value)+b), *((value_2->value)+b)); + comp = *((value_1->value)+b) - *((value_2->value)+b); + b++; + } + return comp; +} + + +int32_t byte_array_sizeof(Obi_byte_array_p value) +{ + return (sizeof(Obi_byte_array_t) + (value->length_encoded_value)); +} + + +index_t avl_add_value_in_data_array(OBIDMS_avl_p avl, Obi_byte_array_p value) { index_t value_idx; - size_t value_size; + int32_t value_size; value_idx = ((avl->data)->header)->data_size_used; // Grow the data if needed value_size = byte_array_sizeof(value); - while (((avl->data)->header)->data_size_max < (value_idx + (int64_t) value_size)) + while (((avl->data)->header)->data_size_max < (value_idx + value_size)) { - if (grow_avl_data(avl) < 0) + if (grow_avl_data(avl->data) < 0) return -1; } @@ -631,67 +1048,17 @@ index_t avl_add_value_in_data_array(OBIDMS_avl_p avl, byte_t* value) } -int byte_array_compare(byte_t* value_1, byte_t* value_2) -{ - int comp; - uint8_t size_1; - uint8_t size_2; - int32_t len_1; - int32_t len_2; - int32_t ini_len_1; - int32_t ini_len_2; - int32_t b; - - size_1 = (uint8_t) *(value_1); - size_2 = (uint8_t) *(value_2); - - if (size_1 != size_2) - return (size_1 - size_2); - - len_1 = *((int32_t*)(value_1+1)); - len_2 = *((int32_t*)(value_2+1)); - - if (len_1 != len_2) - return (len_1 - len_2); - - if (size_1 != 8) - { - ini_len_1 = *((int32_t*)(value_1+5)); - ini_len_2 = *((int32_t*)(value_2+5)); - - if (ini_len_1 != ini_len_2) - return (ini_len_1 - ini_len_2); - } - - b = BYTE_ARRAY_HEADER_SIZE; - comp = 0; - while (!comp && (b < len_1+BYTE_ARRAY_HEADER_SIZE)) - { - comp = *(value_1+b) - *(value_2+b); - b++; - } - return comp; -} - - -size_t byte_array_sizeof(byte_t* value) -{ - return (BYTE_ARRAY_HEADER_SIZE + *((int32_t*)(value+1))); -} - - -// Initialize a new node AVL_node_p avl_create_node(OBIDMS_avl_p avl, index_t node_idx) { AVL_node_p node; node = (avl->tree)+node_idx; - node->left_child = -1; - node->right_child = -1; + node->left_child = -1; + node->right_child = -1; node->balance_factor = 0; - node->value = -1; - node->crc64 = 0; // TODO + node->value = -1; + node->crc64 = 0; // TODO no NA value return node; } @@ -894,7 +1261,7 @@ void avl_print_node(OBIDMS_avl_p avl, AVL_node_p node, index_t node_idx, int dep putchar(' '); fprintf(stderr, "Node idx: %lld, Value idx: %lld, Left child: %lld, Right child: %lld, " - "Balance factor: %d\n", node_idx, node->value, node->left_child, node->right_child, node->balance_factor); + "Balance factor: %d, CRC: %llu\n", node_idx, node->value, node->left_child, node->right_child, node->balance_factor, node->crc64); if (node->right_child != -1) avl_print_node(avl, RIGHT_CHILD(node), node->right_child, depth+2); @@ -918,33 +1285,34 @@ void avl_print(OBIDMS_avl_p avl) int obi_avl_exists(OBIDMS_p dms, const char* avl_name) { struct stat buffer; - char* avl_file_path; - char* avl_file_name; - char* avl_file_relative_path; + char* avl_dir_path; + char* avl_dir_relative_path; int relative_path_size; int check_dir; // Build the AVL tree file path - avl_file_name = build_avl_file_name(avl_name); - if (avl_file_name == NULL) - return -1; - relative_path_size = strlen(avl_file_name) + strlen(AVL_TREES_DIR_NAME) + 2; - avl_file_relative_path = (char*) malloc(relative_path_size*sizeof(char)); - strcpy(avl_file_relative_path, AVL_TREES_DIR_NAME); - strcat(avl_file_relative_path, "/"); - strcat(avl_file_relative_path, avl_file_name); - avl_file_path = get_full_path(dms, avl_file_relative_path); - if (avl_file_path == NULL) + relative_path_size = strlen(avl_name) + strlen(AVL_TREES_DIR_NAME) + 2; + avl_dir_relative_path = (char*) malloc(relative_path_size*sizeof(char)); + if (avl_dir_relative_path == NULL) { - obidebug(1, "\nError getting the file path for an AVL tree file"); + obi_set_errno(OBI_MALLOC_ERROR); + obidebug(1, "\nError allocating memory for the path to the AVL directory"); + return -1; + } + strcpy(avl_dir_relative_path, AVL_TREES_DIR_NAME); + strcat(avl_dir_relative_path, "/"); + strcat(avl_dir_relative_path, avl_name); + avl_dir_path = get_full_path(dms, avl_dir_relative_path); + if (avl_dir_path == NULL) + { + obidebug(1, "\nError getting the directory path for an AVL tree"); return -1; } - check_dir = stat(avl_file_path, &buffer); + check_dir = stat(avl_dir_path, &buffer); - free(avl_file_path); - free(avl_file_name); - free(avl_file_relative_path); + free(avl_dir_path); + free(avl_dir_relative_path); if (check_dir == 0) return 1; @@ -953,120 +1321,10 @@ int obi_avl_exists(OBIDMS_p dms, const char* avl_name) } -OBIDMS_avl_p obi_avl(OBIDMS_p dms, const char* avl_name) -{ - int exists; - - exists = obi_avl_exists(dms, avl_name); - - switch (exists) - { - case 0: - return obi_create_avl(dms, avl_name); - case 1: - return obi_open_avl(dms, avl_name); - }; - - obidebug(1, "\nError checking if an AVL tree already exists"); - return NULL; -} - - -OBIDMS_avl_group_p obi_create_avl_group(OBIDMS_p dms, const char* avl_name) -{ - OBIDMS_avl_group_p avl_group; - char* avl_name_with_idx; - - avl_group = (OBIDMS_avl_group_p) malloc(sizeof(OBIDMS_avl_group_t)); - - // Create 1st avl - avl_name_with_idx = malloc((strlen(avl_name) + 3)*sizeof(char)); - if (sprintf(avl_name_with_idx, "%s_%u", avl_name, 0) < 0) - { - obi_set_errno(OBI_AVL_ERROR); - obidebug(1, "\nError building an AVL tree file name"); - return NULL; - } - (avl_group->sub_avls)[0] = obi_create_avl(dms, avl_name_with_idx); - if ((avl_group->sub_avls)[0] == NULL) - { - obidebug(1, "\nError creating the first AVL of an AVL group"); - return NULL; - } - - avl_group->current_avl_idx = 0; - strcpy(avl_group->avl_name, avl_name); - - avl_group->dms = dms; - - return avl_group; -} - - -int unmap_an_avl(OBIDMS_avl_p avl) -{ - if (munmap((avl->data)->data, ((avl->data)->header)->data_size_max) < 0) - return -1; - if (munmap(avl->tree, (((avl->header)->nb_items_max) * sizeof(AVL_node_t))) < 0) - return -1; - return 0; -} - - -int remap_an_avl(OBIDMS_avl_p avl) -{ - (avl->data)->data = mmap(NULL, - ((avl->data)->header)->data_size_max, - PROT_READ, - MAP_SHARED, // TODO test MAP_PRIVATE? - avl->data_fd, - ((avl->data)->header)->header_size); - if ((avl->data)->data == NULL) - return -1; - - avl->tree = mmap(NULL, - ((avl->header)->nb_items_max) * sizeof(AVL_node_t), - PROT_READ, - MAP_SHARED, // TODO test MAP_PRIVATE? - avl->avl_fd, - (avl->header)->header_size); - if (avl->tree == NULL) - return -1; - - return 0; -} - - -int obi_add_new_avl_in_group(OBIDMS_avl_group_p avl_group) // TODO check for errors -{ - char* avl_name_with_idx; - int avl_idx_length; - - // unmap older - unmap_an_avl((avl_group->sub_avls)[avl_group->current_avl_idx]); - (avl_group->current_avl_idx)++; - avl_idx_length = ((avl_group->current_avl_idx) == 0 ? 1 : (int)(log10(avl_group->current_avl_idx)+1)); - avl_name_with_idx = malloc((strlen(avl_group->avl_name) + avl_idx_length + 2)*sizeof(char)); - if (sprintf(avl_name_with_idx, "%s_%u", avl_group->avl_name, avl_group->current_avl_idx) < 0) - { - obi_set_errno(OBI_AVL_ERROR); - obidebug(1, "\nError building an AVL tree file name"); - return -1; - } - - (avl_group->sub_avls)[avl_group->current_avl_idx] = obi_create_avl(avl_group->dms, avl_name_with_idx); - if ((avl_group->sub_avls)[avl_group->current_avl_idx] == NULL) - { - obidebug(1, "\nError creating a new AVL tree in a group"); - return -1; - } - - return 0; -} - - -OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name) +OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name, int avl_idx) { + char* complete_avl_name; + char* avl_dir_name; char* avl_file_name; char* avl_data_file_name; size_t header_size; @@ -1074,26 +1332,90 @@ OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name) size_t file_size; int avl_file_descriptor; int avl_data_file_descriptor; - int avl_dir_file_descriptor; + int avl_dir_fd; OBIDMS_avl_data_p avl_data; OBIDMS_avl_p avl; + DIR* directory; + struct stat buffer; + int check_dir; + + // Get complete name of AVL if index + if (avl_idx >= 0) + { + complete_avl_name = build_avl_name_with_idx(avl_name, avl_idx); + if (complete_avl_name == NULL) + return NULL; + } + else + { + complete_avl_name = (char*) malloc((strlen(avl_name)+1)*sizeof(char)); + if (complete_avl_name == NULL) + { + obi_set_errno(OBI_MALLOC_ERROR); + obidebug(1, "\nError allocating memory for an AVL name"); + return NULL; + } + strcpy(complete_avl_name, avl_name); + } + + // Create that AVL's directory if needed + avl_dir_name = get_full_path_of_avl_dir(dms, avl_name); + if (avl_dir_name == NULL) + return NULL; + // Check if the AVL's directory already exists + check_dir = stat(avl_dir_name, &buffer); + // Create that AVL's directory if it doesn't already exist + if (check_dir < 0) + { + if (mkdirat(dms->avl_dir_fd, avl_dir_name, 00777) < 0) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError creating an AVL directory"); + if (avl_idx >= 0) + free(complete_avl_name); + free(avl_dir_name); + return NULL; + } + } + // Open the AVL directory + directory = opendir(avl_dir_name); + if (directory == NULL) + { + obidebug(1, "\nError opening an AVL directory"); + if (avl_idx >= 0) + free(complete_avl_name); + free(avl_dir_name); + return NULL; + } + free(avl_dir_name); + avl_dir_fd = dirfd(directory); + if (avl_dir_fd < 0) + { + obidebug(1, "\nError getting an AVL directory file descriptor"); + if (avl_idx >= 0) + free(complete_avl_name); + return NULL; + } // Create the data file // Build file name - avl_data_file_name = build_avl_data_file_name(avl_name); + avl_data_file_name = build_avl_data_file_name(complete_avl_name); if (avl_data_file_name == NULL) + { + if (avl_idx >= 0) + free(complete_avl_name); return NULL; - - // Get the file descriptor of the avl directory - avl_dir_file_descriptor = dms->avl_dir_fd; + } // Create file - avl_data_file_descriptor = openat(avl_dir_file_descriptor, avl_data_file_name, O_RDWR | O_CREAT | O_EXCL, 0777); + avl_data_file_descriptor = openat(avl_dir_fd, avl_data_file_name, O_RDWR | O_CREAT | O_EXCL, 0777); if (avl_data_file_descriptor < 0) { obi_set_errno(OBI_AVL_ERROR); obidebug(1, "\nError creating an AVL tree data file"); + if (avl_idx >= 0) + free(complete_avl_name); free(avl_data_file_name); return NULL; } @@ -1109,6 +1431,8 @@ OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name) { obi_set_errno(OBI_AVL_ERROR); obidebug(1, "\nError truncating an AVL tree data file to the right size"); + if (avl_idx >= 0) + free(complete_avl_name); close(avl_data_file_descriptor); return NULL; } @@ -1117,8 +1441,10 @@ OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name) avl_data = (OBIDMS_avl_data_p) malloc(sizeof(OBIDMS_avl_data_t)); if (avl_data == NULL) { - obi_set_errno(OBI_AVL_ERROR); + obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for the AVL tree data structure"); + if (avl_idx >= 0) + free(complete_avl_name); close(avl_data_file_descriptor); return NULL; } @@ -1135,6 +1461,8 @@ OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name) { obi_set_errno(OBI_AVL_ERROR); obidebug(1, "\nError mmapping the header of an AVL tree data file"); + if (avl_idx >= 0) + free(complete_avl_name); close(avl_data_file_descriptor); free(avl_data); return NULL; @@ -1151,6 +1479,8 @@ OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name) { obi_set_errno(OBI_AVL_ERROR); obidebug(1, "\nError mmapping the data of an AVL tree data file"); + if (avl_idx >= 0) + free(complete_avl_name); munmap(avl_data->header, header_size); close(avl_data_file_descriptor); free(avl_data); @@ -1162,18 +1492,18 @@ OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name) (avl_data->header)->data_size_used = 0; (avl_data->header)->nb_items = 0; (avl_data->header)->creation_date = time(NULL); - strcpy((avl_data->header)->avl_name, avl_name); + strcpy((avl_data->header)->avl_name, complete_avl_name); + + avl_data->data_fd = avl_data_file_descriptor; // Initialize all bits to 0 memset(avl_data->data, 0, (avl_data->header)->data_size_max); - //close(avl_data_file_descriptor); - // Create the AVL tree file // Build file name - avl_file_name = build_avl_file_name(avl_name); + avl_file_name = build_avl_file_name(complete_avl_name); if (avl_file_name == NULL) { close_avl_data(avl_data); @@ -1186,11 +1516,13 @@ OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name) file_size = header_size + data_size; // Create file - avl_file_descriptor = openat(avl_dir_file_descriptor, avl_file_name, O_RDWR | O_CREAT | O_EXCL, 0777); + avl_file_descriptor = openat(avl_dir_fd, avl_file_name, O_RDWR | O_CREAT | O_EXCL, 0777); if (avl_file_descriptor < 0) { obi_set_errno(OBI_AVL_ERROR); obidebug(1, "\nError creating an AVL tree file"); + if (avl_idx >= 0) + free(complete_avl_name); close_avl_data(avl_data); free(avl_file_name); return NULL; @@ -1202,6 +1534,8 @@ OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name) { obi_set_errno(OBI_AVL_ERROR); obidebug(1, "\nError truncating an AVL tree file to the right size"); + if (avl_idx >= 0) + free(complete_avl_name); close_avl_data(avl_data); close(avl_file_descriptor); return NULL; @@ -1211,8 +1545,10 @@ OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name) avl = (OBIDMS_avl_p) malloc(sizeof(OBIDMS_avl_t)); if (avl == NULL) { - obi_set_errno(OBI_AVL_ERROR); + obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for the AVL tree structure"); + if (avl_idx >= 0) + free(complete_avl_name); close_avl_data(avl_data); close(avl_file_descriptor); return NULL; @@ -1230,6 +1566,8 @@ OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name) { obi_set_errno(OBI_AVL_ERROR); obidebug(1, "\nError mmapping the header of an AVL tree file"); + if (avl_idx >= 0) + free(complete_avl_name); close_avl_data(avl_data); close(avl_file_descriptor); free(avl); @@ -1247,6 +1585,8 @@ OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name) { obi_set_errno(OBI_AVL_ERROR); obidebug(1, "\nError mmapping the data of an AVL tree file"); + if (avl_idx >= 0) + free(complete_avl_name); close_avl_data(avl_data); munmap(avl->header, header_size); close(avl_file_descriptor); @@ -1254,65 +1594,115 @@ OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name) return NULL; } - avl->dms = dms; - avl->data = avl_data; + avl->dms = dms; + avl->data = avl_data; avl->directory = dms->avl_directory; - avl->dir_fd = avl_dir_file_descriptor; + avl->dir_fd = avl_dir_fd; + avl->avl_fd = avl_file_descriptor; (avl->header)->header_size = header_size; (avl->header)->avl_size = data_size; (avl->header)->nb_items = 0; (avl->header)->nb_items_max = (index_t) floor(((double) get_initial_avl_size()) / ((double) sizeof(AVL_node_t))); - (avl->header)->root_idx = -1; + (avl->header)->root_idx = -1; (avl->header)->creation_date = time(NULL); - strcpy((avl->header)->avl_name, avl_name); - - avl->avl_fd = avl_file_descriptor; - avl->data_fd = avl_data_file_descriptor; + strcpy((avl->header)->avl_name, complete_avl_name); // Bloom filter - bloom_init(&((avl->header)->bloom_filter), NODE_COUNT_PER_AVL, BLOOM_FILTER_ERROR_RATE); - - //close(avl_file_descriptor); + bloom_init(&((avl->header)->bloom_filter), MAX_NODE_COUNT_PER_AVL); // Add in the list of opened AVL trees *(((dms->opened_avls)->avls)+((dms->opened_avls)->nb_opened_avls)) = avl; ((dms->opened_avls)->nb_opened_avls)++; avl->counter = 1; + if (avl_idx >= 0) + free(complete_avl_name); + return avl; } -OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name) +OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name, int avl_idx) { char* avl_file_name; + char* complete_avl_name; + char* avl_dir_name; char* avl_data_file_name; + DIR* directory; size_t header_size; int avl_file_descriptor; int avl_data_file_descriptor; int avl_dir_file_descriptor; OBIDMS_avl_data_p avl_data; OBIDMS_avl_p avl; - size_t i; + size_t i; + + // Get complete name of AVL if index + if (avl_idx >= 0) + { + complete_avl_name = build_avl_name_with_idx(avl_name, avl_idx); + if (complete_avl_name == NULL) + return NULL; + } + else + { + complete_avl_name = (char*) malloc((strlen(avl_name)+1)*sizeof(char)); + if (complete_avl_name == NULL) + { + obi_set_errno(OBI_MALLOC_ERROR); + obidebug(1, "\nError allocating memory for an AVL name"); + return NULL; + } + strcpy(complete_avl_name, avl_name); + } // Check if the AVL tree is already in the list of opened AVL trees for (i=0; i < ((dms->opened_avls)->nb_opened_avls); i++) { - if (!strcmp(((*(((dms->opened_avls)->avls)+i))->header)->avl_name, avl_name)) + if (!strcmp(((*(((dms->opened_avls)->avls)+i))->header)->avl_name, complete_avl_name)) { // Found the AVL tree already opened ((*(((dms->opened_avls)->avls)+i))->counter)++; + if (avl_idx >= 0) + free(complete_avl_name); return *(((dms->opened_avls)->avls)+i); } } + // Open the AVL directory + avl_dir_name = get_full_path_of_avl_dir(dms, avl_name); + if (avl_dir_name == NULL) + { + if (avl_idx >= 0) + free(complete_avl_name); + return NULL; + } + directory = opendir(avl_dir_name); + if (directory == NULL) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError opening an AVL directory"); + if (avl_idx >= 0) + free(complete_avl_name); + free(avl_dir_name); + return NULL; + } + free(avl_dir_name); + avl_dir_file_descriptor = dirfd(directory); + if (avl_dir_file_descriptor < 0) + { + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nError getting the file descriptor of an AVL directory"); + if (avl_idx >= 0) + free(complete_avl_name); + return NULL; + } + + // Open the data file - // Get the file descriptor of the AVL trees directory - avl_dir_file_descriptor = dms->avl_dir_fd; - // Build file name - avl_data_file_name = build_avl_data_file_name(avl_name); + avl_data_file_name = build_avl_data_file_name(complete_avl_name); if (avl_data_file_name == NULL) return NULL; @@ -1322,6 +1712,8 @@ OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name) { obi_set_errno(OBI_AVL_ERROR); obidebug(1, "\nError opening an AVL tree data file"); + if (avl_idx >= 0) + free(complete_avl_name); free(avl_data_file_name); return NULL; } @@ -1331,8 +1723,10 @@ OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name) avl_data = (OBIDMS_avl_data_p) malloc(sizeof(OBIDMS_avl_data_t)); if (avl_data == NULL) { - obi_set_errno(OBI_AVL_ERROR); + obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for the AVL tree data structure"); + if (avl_idx >= 0) + free(complete_avl_name); close(avl_data_file_descriptor); return NULL; } @@ -1342,6 +1736,8 @@ OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name) { obi_set_errno(OBI_AVL_ERROR); obidebug(1, "\nError reading the header size to open an AVL tree data file"); + if (avl_idx >= 0) + free(complete_avl_name); close(avl_data_file_descriptor); return NULL; } @@ -1358,6 +1754,8 @@ OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name) { obi_set_errno(OBI_AVL_ERROR); obidebug(1, "\nError mmapping the header of an AVL tree data file"); + if (avl_idx >= 0) + free(complete_avl_name); close(avl_data_file_descriptor); free(avl_data); return NULL; @@ -1374,19 +1772,21 @@ OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name) { obi_set_errno(OBI_AVL_ERROR); obidebug(1, "\nError mmapping the data of an AVL tree data file"); + if (avl_idx >= 0) + free(complete_avl_name); munmap(avl_data->header, header_size); close(avl_data_file_descriptor); free(avl_data); return NULL; } - //close(avl_data_file_descriptor); + avl_data->data_fd = avl_data_file_descriptor; // Open the AVL tree file // Build file name - avl_file_name = build_avl_file_name(avl_name); + avl_file_name = build_avl_file_name(complete_avl_name); if (avl_file_name == NULL) { close_avl_data(avl_data); @@ -1399,6 +1799,8 @@ OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name) { obi_set_errno(OBI_AVL_ERROR); obidebug(1, "\nError opening an AVL tree file"); + if (avl_idx >= 0) + free(complete_avl_name); close_avl_data(avl_data); free(avl_file_name); return NULL; @@ -1409,8 +1811,10 @@ OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name) avl = (OBIDMS_avl_p) malloc(sizeof(OBIDMS_avl_t)); if (avl == NULL) { - obi_set_errno(OBI_AVL_ERROR); + obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating the memory for the AVL tree structure"); + if (avl_idx >= 0) + free(complete_avl_name); close_avl_data(avl_data); close(avl_file_descriptor); return NULL; @@ -1421,6 +1825,8 @@ OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name) { obi_set_errno(OBI_AVL_ERROR); obidebug(1, "\nError reading the header size to open an AVL tree"); + if (avl_idx >= 0) + free(complete_avl_name); close(avl_file_descriptor); return NULL; } @@ -1437,6 +1843,8 @@ OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name) { obi_set_errno(OBI_AVL_ERROR); obidebug(1, "\nError mmapping the header of an AVL tree file"); + if (avl_idx >= 0) + free(complete_avl_name); close_avl_data(avl_data); close(avl_file_descriptor); free(avl); @@ -1454,6 +1862,8 @@ OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name) { obi_set_errno(OBI_AVL_ERROR); obidebug(1, "\nError mmapping the data of an AVL tree file"); + if (avl_idx >= 0) + free(complete_avl_name); close_avl_data(avl_data); munmap(avl->header, header_size); close(avl_file_descriptor); @@ -1461,31 +1871,122 @@ OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name) return NULL; } - avl->dms = dms; - avl->data = avl_data; + avl->dms = dms; + avl->data = avl_data; avl->directory = dms->avl_directory; - avl->dir_fd = avl_dir_file_descriptor; - - avl->avl_fd = avl_file_descriptor; - avl->data_fd = avl_data_file_descriptor; - - //close(avl_file_descriptor); + avl->dir_fd = avl_dir_file_descriptor; + avl->avl_fd = avl_file_descriptor; // Add in the list of opened AVL trees *(((dms->opened_avls)->avls)+((dms->opened_avls)->nb_opened_avls)) = avl; ((dms->opened_avls)->nb_opened_avls)++; avl->counter = 1; + if (avl_idx >= 0) + free(complete_avl_name); + return avl; } +OBIDMS_avl_group_p obi_avl_group(OBIDMS_p dms, const char* avl_name) +{ + int exists; + + exists = obi_avl_exists(dms, avl_name); + + switch (exists) + { + case 0: + return obi_create_avl_group(dms, avl_name); + case 1: + return obi_open_avl_group(dms, avl_name); + }; + + obidebug(1, "\nError checking if an AVL tree already exists"); + return NULL; +} + + +OBIDMS_avl_group_p obi_create_avl_group(OBIDMS_p dms, const char* avl_name) +{ + OBIDMS_avl_group_p avl_group; + + avl_group = (OBIDMS_avl_group_p) malloc(sizeof(OBIDMS_avl_group_t)); + if (avl_group == NULL) + { + obi_set_errno(OBI_MALLOC_ERROR); + obidebug(1, "\nError allocating the memory for an AVL group"); + return NULL; + } + + // Create 1st avl + (avl_group->sub_avls)[0] = obi_create_avl(dms, avl_name, 0); + if ((avl_group->sub_avls)[0] == NULL) + { + obidebug(1, "\nError creating the first AVL of an AVL group"); + return NULL; + } + + avl_group->current_avl_idx = 0; + strcpy(avl_group->avl_name, avl_name); + + avl_group->dms = dms; + + return avl_group; +} + + +OBIDMS_avl_group_p obi_open_avl_group(OBIDMS_p dms, const char* avl_name) +{ + OBIDMS_avl_group_p avl_group; + char* avl_dir_name; + int avl_count; + int i; + + // TODO check that the group isn't already open + + avl_group = (OBIDMS_avl_group_p) malloc(sizeof(OBIDMS_avl_group_t)); + if (avl_group == NULL) + { + obi_set_errno(OBI_MALLOC_ERROR); + obidebug(1, "\nError allocating the memory for an AVL group"); + return NULL; + } + + // Count the AVLs + avl_dir_name = get_full_path_of_avl_dir(dms, avl_name); + if (avl_dir_name == NULL) + return NULL; + avl_count = count_dir(avl_dir_name) / 2; + if (avl_count < 0) + { + obidebug(1, "\nError counting the AVLs in an AVL directory: %s", avl_name); + return NULL; + } + + // Open the AVLs + for (i=0; isub_avls)[i] = obi_open_avl(dms, avl_name, i); + if ((avl_group->sub_avls)[i] == NULL) + return NULL; + } + avl_group->current_avl_idx = avl_count-1; // TODO latest. discuss + strcpy(avl_group->avl_name, avl_name); + + avl_group->dms = dms; + + return avl_group; +} + + int obi_close_avl(OBIDMS_avl_p avl) { - int ret_val = 0; - size_t i; - Opened_avls_list_p avls_list; - OBIDMS_p dms; + int ret_val = 0; + size_t i; + Opened_avls_list_p avls_list; + OBIDMS_p dms; dms = avl->dms; @@ -1507,6 +2008,8 @@ int obi_close_avl(OBIDMS_avl_p avl) ret_val = close_avl_data(avl->data); + ret_val = truncate_avl_to_size_used(avl); + if (munmap(avl->tree, (((avl->header)->nb_items_max) * sizeof(AVL_node_t))) < 0) { obi_set_errno(OBI_AVL_ERROR); @@ -1521,6 +2024,8 @@ int obi_close_avl(OBIDMS_avl_p avl) ret_val = -1; } + close(avl->avl_fd); + free(avl); } @@ -1528,100 +2033,44 @@ int obi_close_avl(OBIDMS_avl_p avl) } -byte_t* obi_avl_group_get(OBIDMS_avl_group_p avl_group, index_t idx) +int obi_close_avl_group(OBIDMS_avl_group_p avl_group) { - int32_t avl_idx; - index_t idx_in_avl; + int i; + int ret_val; - avl_idx = (int32_t) (idx >> 32); - idx_in_avl = idx & 0x00000000FFFFFFFF; + ret_val = 0; - return obi_avl_get((avl_group->sub_avls)[avl_idx], idx_in_avl); -} - - -byte_t* obi_avl_get(OBIDMS_avl_p avl, index_t idx) -{ - return (((avl->data)->data)+idx); -} - - -int maybe_in_avl(OBIDMS_avl_p avl, byte_t* value) -{ - return (bloom_check(&((avl->header)->bloom_filter), value, (BYTE_ARRAY_HEADER_SIZE + *((int32_t*)(value+1))))); -} - - -int64_t insert_in_avl_group(OBIDMS_avl_group_p avl_group, byte_t* value) // TODO won't be index_t -{ - int32_t index_in_avl; - int64_t index_with_avl; - int i; - - if (maybe_in_avl((avl_group->sub_avls)[avl_group->current_avl_idx], value)) - { - index_in_avl = (int32_t) obi_avl_find((avl_group->sub_avls)[avl_group->current_avl_idx], value); - if (index_in_avl >= 0) - { - index_with_avl = avl_group->current_avl_idx; - index_with_avl = index_with_avl << 32; - index_with_avl = index_with_avl + index_in_avl; - return index_with_avl; - } - } for (i=0; i < (avl_group->current_avl_idx); i++) - { - if (maybe_in_avl((avl_group->sub_avls)[i], value)) - { - if (remap_an_avl((avl_group->sub_avls)[i]) < 0) - return -1; - index_in_avl = (int32_t) obi_avl_find((avl_group->sub_avls)[i], value); - if (unmap_an_avl((avl_group->sub_avls)[i]) < 0) - return -1; - if (index_in_avl >= 0) - { - index_with_avl = i; - index_with_avl = index_with_avl << 32; - index_with_avl = index_with_avl + index_in_avl; - return index_with_avl; - } - } - } - - // Not found in any AVL: add in current - // First, check if make new AVL - if ((((avl_group->sub_avls)[avl_group->current_avl_idx])->header)->nb_items == NODE_COUNT_PER_AVL) // TODO add condition with data size - obi_add_new_avl_in_group(avl_group); - - // Add in the current AVL - bloom_add(&((((avl_group->sub_avls)[avl_group->current_avl_idx])->header)->bloom_filter), value, BYTE_ARRAY_HEADER_SIZE + *((int32_t*)(value+1))); - - // Build the index containing the AVL index - index_in_avl = (int32_t) obi_avl_add((avl_group->sub_avls)[avl_group->current_avl_idx], value); - index_with_avl = avl_group->current_avl_idx; - index_with_avl = index_with_avl << 32; - index_with_avl = index_with_avl + index_in_avl; - - return index_with_avl; + if (obi_close_avl((avl_group->sub_avls)[i]) < 0) + ret_val = -1; + free(avl_group); + return ret_val; } -// Insert a new node -index_t obi_avl_add(OBIDMS_avl_p avl, byte_t* value) +Obi_byte_array_p obi_avl_get(OBIDMS_avl_p avl, index_t idx) { - AVL_node_p node_to_add = NULL; - AVL_node_p current_node; - index_t next, parent; - index_t value_data_idx; - index_t node_idx; - byte_t* to_compare; - int comp; - int n = 0; - int depth = 0; + obidebug(1, "\ngetting >%s", ((Obi_byte_array_p)((avl->data)->data)+idx)->value); + return ((Obi_byte_array_p)((avl->data)->data)+idx); +} - uint64_t crc; - crc = crc64(value, BYTE_ARRAY_HEADER_SIZE + ((uint64_t) (*((int32_t*)(value+1))))); // TODO warning +index_t obi_avl_add(OBIDMS_avl_p avl, Obi_byte_array_p value) +{ + AVL_node_p node_to_add = NULL; + AVL_node_p current_node; + index_t next, parent; + index_t value_data_idx; + index_t node_idx; + Obi_byte_array_p to_compare; + int comp; + int n; + int depth; + uint64_t crc; + + n = 0; + depth = 0; + crc = crc64((byte_t*)value, byte_array_sizeof(value)); // Check if first node if (!((avl->header)->nb_items)) @@ -1662,9 +2111,7 @@ index_t obi_avl_add(OBIDMS_avl_p avl, byte_t* value) parent = next; - // Compare value with value of current node - //to_compare = obi_avl_get(avl, current_node->value); - //comp = byte_array_compare(to_compare, value); + // Compare the crc of the value with the crc of the current node comp = (current_node->crc64) - crc; if (comp == 0) @@ -1681,9 +2128,10 @@ index_t obi_avl_add(OBIDMS_avl_p avl, byte_t* value) next = current_node->right_child; else if (comp == 0) // Value already stored - { - fprintf(stderr, "\n>>>ALREADY IN, %s, %lld\n", obi_obibytes_to_seq(value), (avl->header)->nb_items); - return current_node->value; // TODO should trigger error if using bloom filters + { // TODO add an option to eventually return the value index? (useful for simple AVLs (not in groups)) + obi_set_errno(OBI_AVL_ERROR); + obidebug(1, "\nValue to add already in AVL"); + return -1; } depth++; @@ -1693,7 +2141,7 @@ index_t obi_avl_add(OBIDMS_avl_p avl, byte_t* value) if (depth == AVL_MAX_DEPTH) { obi_set_errno(OBI_AVL_ERROR); - obidebug(1, "\nThis AVL tree has reached the maximum height (%d).", AVL_MAX_DEPTH); + obidebug(1, "\nThis AVL tree has reached the maximum depth (%d).", AVL_MAX_DEPTH); return -1; } @@ -1705,7 +2153,7 @@ index_t obi_avl_add(OBIDMS_avl_p avl, byte_t* value) } // Initialize node at the end of the tree - node_idx = (avl->header)->nb_items; + node_idx = (avl->header)->nb_items; node_to_add = avl_create_node(avl, node_idx); // Add the value in the data array and store its index @@ -1743,31 +2191,31 @@ index_t obi_avl_add(OBIDMS_avl_p avl, byte_t* value) // Find if a value is already in an AVL tree -index_t obi_avl_find(OBIDMS_avl_p avl, byte_t* value) +index_t obi_avl_find(OBIDMS_avl_p avl, Obi_byte_array_p value) { - int comp; - index_t next; - byte_t* to_compare; - AVL_node_p current_node; + int comp; + index_t next; + Obi_byte_array_p to_compare; + AVL_node_p current_node; + uint64_t crc; - uint64_t crc; - crc = crc64(value, BYTE_ARRAY_HEADER_SIZE + ((uint64_t) (*((int32_t*)(value+1))))); // TODO warning + crc = crc64((byte_t*)value, byte_array_sizeof(value)); next = (avl->header)->root_idx; while (next != -1) { current_node = (avl->tree)+next; - // Compare value with value of current node - //to_compare = obi_avl_get(avl, current_node->value); - //comp = byte_array_compare(to_compare, value); - + // Compare the crc of the value with the crc of the current node comp = (current_node->crc64) - crc; if (comp == 0) - { // check if really same value + { // Check if really same value + //fprintf(stderr, "\ngonna get\n"); to_compare = obi_avl_get(avl, current_node->value); + //fprintf(stderr, "\ngot, gonna compare\n"); comp = byte_array_compare(to_compare, value); + //fprintf(stderr, "\ncompared\n"); } if (comp > 0) @@ -1778,7 +2226,6 @@ index_t obi_avl_find(OBIDMS_avl_p avl, byte_t* value) next = current_node->right_child; else if (comp == 0) { // Value found - fprintf(stderr, "\n>>>ALREADY IN in find, %s, %lld\n", obi_obibytes_to_seq(value), (avl->header)->nb_items); return current_node->value; } } @@ -1787,130 +2234,84 @@ index_t obi_avl_find(OBIDMS_avl_p avl, byte_t* value) } -byte_t* obi_str_to_obibytes(char* value) +Obi_byte_array_p obi_avl_group_get(OBIDMS_avl_group_p avl_group, index_t idx) { - byte_t* value_b; - int32_t length; + int32_t avl_idx; + index_t idx_in_avl; - // Compute the number of bytes on which the value will be encoded - length = strlen(value) + 1; // +1 to store \0 at the end (makes retrieving faster) + //fprintf(stderr, "\noriginal index = %lld", idx); - // Allocate the memory for the encoded value - value_b = (byte_t*) malloc(BYTE_ARRAY_HEADER_SIZE + length); - if (value_b == NULL) - { - obi_set_errno(OBI_AVL_ERROR); - obidebug(1, "\nError allocating memory for a byte array"); - return NULL; - } + avl_idx = (int32_t) (idx >> 32); + idx_in_avl = idx & 0x00000000FFFFFFFF; - // Store the number of bits on which each element is encoded - *(value_b) = 8; + //fprintf(stderr, "\navl index = %d", avl_idx); + //fprintf(stderr, "\nindex in avl = %lld", idx_in_avl); - // Store the length (in bytes) of the encoded value (same as decoded for character strings) - *((int32_t*)(value_b+1)) = length; - - // Store the initial length (in bytes) of the decoded value (same as encoded for character strings) - *((int64_t*)(value_b+5)) = length; - - // Store the character string - strcpy(value_b+BYTE_ARRAY_HEADER_SIZE, value); - - return value_b; + return obi_avl_get((avl_group->sub_avls)[avl_idx], idx_in_avl); } -const char* obi_obibytes_to_str(byte_t* value_b) +index_t obi_avl_group_add(OBIDMS_avl_group_p avl_group, Obi_byte_array_p value) { - const char* value; + int32_t index_in_avl; + index_t index_with_avl; + int i; - value = value_b+BYTE_ARRAY_HEADER_SIZE; + //fprintf(stderr, "\nAdding %s", value->value); - return value; + if (maybe_in_avl((avl_group->sub_avls)[avl_group->current_avl_idx], value)) + { + //fprintf(stderr, "\nmaybe in current AVL"); + index_in_avl = (int32_t) obi_avl_find((avl_group->sub_avls)[avl_group->current_avl_idx], value); + //fprintf(stderr, "\nfind went fine\n"); + if (index_in_avl >= 0) + { + index_with_avl = avl_group->current_avl_idx; + index_with_avl = index_with_avl << 32; + index_with_avl = index_with_avl + index_in_avl; + return index_with_avl; + } + } + for (i=0; i < (avl_group->current_avl_idx); i++) + { + if (maybe_in_avl((avl_group->sub_avls)[i], value)) + { + //fprintf(stderr, "\nmaybe in AVL %d", i); + if (remap_an_avl((avl_group->sub_avls)[i]) < 0) + return -1; + index_in_avl = (int32_t) obi_avl_find((avl_group->sub_avls)[i], value); + if (unmap_an_avl((avl_group->sub_avls)[i]) < 0) + return -1; + if (index_in_avl >= 0) + { + index_with_avl = i; + index_with_avl = index_with_avl << 32; + index_with_avl = index_with_avl + index_in_avl; + return index_with_avl; + } + } + } + + // Not found in any AVL: add in current + // First, check if make new AVL + if (((((avl_group->sub_avls)[avl_group->current_avl_idx])->header)->nb_items == MAX_NODE_COUNT_PER_AVL) || (((((avl_group->sub_avls)[avl_group->current_avl_idx])->data)->header)->data_size_used >= MAX_DATA_SIZE_PER_AVL)) + { + if (add_new_avl_in_group(avl_group) < 0) + return -1; + } + + //fprintf(stderr, "\nAdding in current AVL"); + + // Add in the current AVL + bloom_add(&((((avl_group->sub_avls)[avl_group->current_avl_idx])->header)->bloom_filter), value, byte_array_sizeof(value)); + index_in_avl = (int32_t) obi_avl_add((avl_group->sub_avls)[avl_group->current_avl_idx], value); + + // Build the index containing the AVL index + index_with_avl = avl_group->current_avl_idx; + index_with_avl = index_with_avl << 32; + index_with_avl = index_with_avl + index_in_avl; + + return index_with_avl; } -byte_t* obi_seq_to_obibytes(char* seq) -{ - byte_t* value_b; - int32_t length; // length of the value (without the header) in bytes - uint8_t size; // size of one element in bits - int32_t seq_length; - byte_t* encoded_seq; - - // Check if just ATGC and set size of a nucleotide accordingly (2 bits or 4 bits) - if (only_ATGC(seq)) - size = 2; - else - size = 4; - - // Compute the length (in bytes) of the encoded sequence - seq_length = strlen(seq); - if (size == 2) - length = ceil((double) seq_length / (double) 4.0); - else // size == 4 - length = ceil((double) seq_length / (double) 2.0); - - // Encode - if (size == 2) - encoded_seq = encode_seq_on_2_bits(seq, seq_length); - else // size == 4 - encoded_seq = encode_seq_on_4_bits(seq, seq_length); - if (encoded_seq == NULL) - { - obi_set_errno(OBI_AVL_ERROR); - obidebug(1, "\nError encoding a DNA sequence"); - return NULL; - } - - // Allocate the memory for the encoded value - value_b = (byte_t*) malloc(BYTE_ARRAY_HEADER_SIZE + length); - if (value_b == NULL) - { - obi_set_errno(OBI_AVL_ERROR); - obidebug(1, "\nError allocating memory for a byte array"); - return NULL; - } - - // Store the number of bits on which each nucleotide is encoded - *(value_b) = size; - - // Store the length (in bytes) of the encoded sequence - *((int32_t*)(value_b+1)) = length; - - // Store the length (in bytes) of the initial sequence (necessary for decoding) - *((int32_t*)(value_b+5)) = seq_length; - - // Store the encoded sequence - memcpy(value_b+BYTE_ARRAY_HEADER_SIZE, encoded_seq, length); - - free(encoded_seq); - - return value_b; -} - - -const char* obi_obibytes_to_seq(byte_t* value_b) -{ - const char* value; - uint8_t size; // size of one element in bits - - // Check the encoding (each nucleotide on 2 bits or 4 bits) - size = *(value_b); - - // Decode - if (size == 2) - value = decode_seq_on_2_bits(value_b+BYTE_ARRAY_HEADER_SIZE, *((int32_t*)(value_b+5))); - else - value = decode_seq_on_4_bits(value_b+BYTE_ARRAY_HEADER_SIZE, *((int32_t*)(value_b+5))); - - if (value == NULL) - { - obi_set_errno(OBI_AVL_ERROR); - obidebug(1, "\nError decoding a DNA sequence"); - return NULL; - } - - return value; -} - diff --git a/src/obiavl.h b/src/obiavl.h index 2e83fc1..1127b42 100644 --- a/src/obiavl.h +++ b/src/obiavl.h @@ -25,41 +25,46 @@ #include "obidms.h" #include "obitypes.h" #include "bloom.h" +#include "utils.h" +#include "encode.h" -#define NODE_COUNT_PER_AVL (10000000) - -#define BLOOM_FILTER_ERROR_RATE (0.001) - +#define MAX_NB_OF_AVLS_IN_GROUP (100) /**< The maximum number of AVL trees in a group. // TODO discuss + */ +#define MAX_NODE_COUNT_PER_AVL (10000000) /**< The maximum number of nodes in an AVL tree. + * Only used to decide when to create a new AVL in a group, and to initialize the bloom filter // TODO discuss. + */ +#define MAX_DATA_SIZE_PER_AVL (1073741824) /**< The maximum size of the data referred to by an AVL tree in a group. + * Only used to decide when to create a new AVL in a group. + * Should not be greater than int32_t max (2,147,483,647), as indexes will have to be stored on 32 bits. + * Here 1073741824 B = 1 GB + */ +#define AVL_MAX_DEPTH (1024) /**< The maximum depth of an AVL tree. Used to save paths through the tree. + */ #define AVL_MAX_NAME (1024) /**< The maximum length of an AVL tree name. */ #define AVL_GROWTH_FACTOR (2) /**< The growth factor when an AVL tree is enlarged. */ -#define AVL_MAX_DEPTH (1000) /**< The maximum depth of an AVL tree. - */ #define LEFT_CHILD(node) (avl->tree)+(node->left_child) /**< Pointer to the left child of a node in an AVL tree. */ #define RIGHT_CHILD(node) (avl->tree)+(node->right_child) /**< Pointer to the right child of a node in an AVL tree. */ -#define BYTE_ARRAY_HEADER_SIZE (9) /**< The size of the header of a byte array. - */ - -typedef struct bloom bloom_t; /** * @brief AVL tree node structure. */ typedef struct AVL_node { - index_t left_child; /**< Index of left less child node. - */ - index_t right_child; /**< Index of right greater child node. - */ - int8_t balance_factor; /**< Balance factor of the node. - */ - index_t value; /**< Index of the value associated with the node in the data array. - */ - uint64_t crc64; // TODO + index_t left_child; /**< Index of left less child node. + */ + index_t right_child; /**< Index of right greater child node. + */ + int8_t balance_factor; /**< Balance factor of the node. + */ + index_t value; /**< Index of the value associated with the node in the data array. + */ + uint64_t crc64; /**< Cyclic Redundancy Check code on 64 bits associated with the value. + */ } AVL_node_t, *AVL_node_p; @@ -88,8 +93,10 @@ typedef struct OBIDMS_avl_data_header { typedef struct OBIDMS_avl_data { OBIDMS_avl_data_header_p header; /**< A pointer to the header of the AVL tree data. */ - byte_t* data; /**< A pointer to the beginning of the data. + byte_t* data; /**< A pointer to the beginning of the data. */ + int data_fd; /**< File descriptor of the file containing the data. + */ } OBIDMS_avl_data_t, *OBIDMS_avl_data_p; @@ -111,7 +118,9 @@ typedef struct OBIDMS_avl_header { */ time_t creation_date; /**< Date of creation of the file. */ - bloom_t bloom_filter; + bloom_t bloom_filter; /**< Bloom filter associated with the AVL tree, enabling to know if a value + * might already be stored in the data associated with the tree. + */ } OBIDMS_avl_header_t, *OBIDMS_avl_header_p; @@ -119,30 +128,30 @@ typedef struct OBIDMS_avl_header { * @brief OBIDMS AVL tree structure. */ typedef struct OBIDMS_avl { - OBIDMS_p dms; /**< A pointer to the OBIDMS structure to which the AVL tree belongs. - */ - OBIDMS_avl_header_p header; /**< A pointer to the header of the AVL tree. - */ - struct AVL_node* tree; /**< A pointer to the root of the AVL tree. - */ + OBIDMS_p dms; /**< A pointer to the OBIDMS structure to which the AVL tree belongs. + */ + OBIDMS_avl_header_p header; /**< A pointer to the header of the AVL tree. + */ + struct AVL_node* tree; /**< A pointer to the root of the AVL tree. + */ index_t path_idx[AVL_MAX_DEPTH]; /**< The path taken to a node from the root as an array of node indices. */ int8_t path_dir[AVL_MAX_DEPTH]; /**< The path taken to a node from the root as an array of directions - * (0 for left, -1 for right). + * (0 for left, -1 for right). */ - OBIDMS_avl_data_p data; /**< A pointer to the structure containing the data - * that the AVL tree references. - */ - DIR* directory; /**< A directory entry usable to - * refer and scan the AVL tree directory. - */ - int dir_fd; /**< The file descriptor of the directory entry - * usable to refer and scan the AVL tree directory. - */ - size_t counter; /**< Indicates by how many threads/programs (TODO) the AVL tree is used. - */ - int avl_fd; - int data_fd; + OBIDMS_avl_data_p data; /**< A pointer to the structure containing the data + * that the AVL tree references. + */ + DIR* directory; /**< A directory entry usable to + * refer and scan the AVL tree directory. + */ + int dir_fd; /**< The file descriptor of the directory entry + * usable to refer and scan the AVL tree directory. + */ + int avl_fd; /**< The file descriptor of the file containing the AVL tree. + */ + size_t counter; /**< Indicates by how many threads/programs (TODO) the AVL tree is used. + */ } OBIDMS_avl_t, *OBIDMS_avl_p; @@ -150,28 +159,26 @@ typedef struct OBIDMS_avl { * @brief OBIDMS AVL tree group structure. */ typedef struct OBIDMS_avl_group { - // TODO put each group in a directory later - OBIDMS_avl_p sub_avls[64]; // TODO macro for max - int current_avl_idx; - char avl_name[AVL_MAX_NAME+1]; - OBIDMS_p dms; + OBIDMS_avl_p sub_avls[MAX_NB_OF_AVLS_IN_GROUP]; /**< Array containing the pointers to the AVL trees of the group. + */ + int current_avl_idx; /**< Index in the sub_avls array of the AVL tree currently being filled. + */ + char avl_name[AVL_MAX_NAME+1]; /**< Base name of the AVL group. The AVL trees in it have names of the form basename_idx. + */ + OBIDMS_p dms; /**< Pointer to the OBIDMS structure to which the AVL group belongs. + */ } OBIDMS_avl_group_t, *OBIDMS_avl_group_p; -OBIDMS_avl_group_p obi_create_avl_group(OBIDMS_p dms, const char* avl_name); -index_t insert_in_avl_group(OBIDMS_avl_group_p avl_group, byte_t* value); - - - /** - * @brief Checks if an AVL tree already exists or not. + * @brief Checks if an AVL tree or AVL tree group already exists or not. * - * @param dms The OBIDMS to which the AVL tree belongs. - * @param avl_name The name of the AVL tree. + * @param dms The OBIDMS to which the AVL tree or AVL tree group belongs. + * @param avl_name The name of the AVL treeor the base name of the AVL tree group. * - * @returns A value indicating whether the AVL tree exists or not. - * @retval 1 if the AVL tree exists. - * @retval 0 if the AVL tree does not exist. + * @returns A value indicating whether the AVL tree or AVL tree group exists or not. + * @retval 1 if the AVL tree or AVL tree group exists. + * @retval 0 if the AVL tree or AVL tree group does not exist. * @retval -1 if an error occurred. * * @since December 2015 @@ -180,36 +187,19 @@ index_t insert_in_avl_group(OBIDMS_avl_group_p avl_group, byte_t* value); int obi_avl_exists(OBIDMS_p dms, const char* avl_name); -/** - * @brief Opens an AVL tree and creates it if it does not already exist. - * - * Note: An AVL tree is made of two files (referred to by two structures). - * One file contains the indices referring to the data, and the other - * file contains the data itself. The AVL tree as a whole is referred - * to via the OBIDMS_avl structure. - * - * @param dms The OBIDMS to which the AVL tree belongs. - * @param avl_name The name of the AVL tree. - * - * @returns A pointer to the AVL tree structure. - * @retval NULL if an error occurred. - * - * @since December 2015 - * @author Celine Mercier (celine.mercier@metabarcoding.org) - */ -OBIDMS_avl_p obi_avl(OBIDMS_p dms, const char* avl_name); - - /** * @brief Creates an AVL tree. Fails if it already exists. * * Note: An AVL tree is made of two files (referred to by two structures). * One file contains the indices referring to the data, and the other * file contains the data itself. The AVL tree as a whole is referred - * to via the OBIDMS_avl structure. + * to via the OBIDMS_avl structure. An AVL tree is stored in a directory + * with the same name, or with the base name of the AVL group if it is + * part of an AVL group. * * @param dms The OBIDMS to which the AVL tree belongs. * @param avl_name The name of the AVL tree. + * @param avl_idx The index of the AVL tree if it is part of an AVL group. * * @returns A pointer to the newly created AVL tree structure. * @retval NULL if an error occurred. @@ -217,7 +207,7 @@ OBIDMS_avl_p obi_avl(OBIDMS_p dms, const char* avl_name); * @since December 2015 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ -OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name); +OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name, int avl_idx); /** @@ -230,6 +220,7 @@ OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name); * * @param dms The OBIDMS to which the AVL tree belongs. * @param avl_name The name of the AVL tree. + * @param avl_idx The index of the AVL tree if it is part of an AVL group. * * @returns A pointer to the AVL tree structure. * @retval NULL if an error occurred. @@ -237,17 +228,66 @@ OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name); * @since December 2015 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ -OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name); +OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name, int avl_idx); + + +/** + * @brief Opens an AVL tree group and creates it if it does not already exist. + * + * Note: An AVL tree group is composed of multiple AVL trees that all have the + * same base name, and an index differentiating them. + * + * @param dms The OBIDMS to which the AVL tree belongs. + * @param avl_name The base name of the AVL tree group. + * + * @returns A pointer to the AVL tree group structure. + * @retval NULL if an error occurred. + * + * @since April 2016 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +OBIDMS_avl_group_p obi_avl_group(OBIDMS_p dms, const char* avl_name); + + +/** + * @brief Creates an AVL tree group. + * + * Note: An AVL tree group is composed of multiple AVL trees that all have the + * same base name, and an index differentiating them. + * + * @param dms The OBIDMS to which the AVL tree belongs. + * @param avl_name The base name of the AVL tree group. + * + * @returns A pointer to the AVL tree group structure. + * @retval NULL if an error occurred. + * + * @since April 2016 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +OBIDMS_avl_group_p obi_create_avl_group(OBIDMS_p dms, const char* avl_name); + + +/** + * @brief Opens an AVL tree group. + * + * Note: An AVL tree group is composed of multiple AVL trees that all have the + * same base name, and an index differentiating them. + * + * @param dms The OBIDMS to which the AVL tree belongs. + * @param avl_name The base name of the AVL tree group. + * + * @returns A pointer to the AVL tree group structure. + * @retval NULL if an error occurred. + * + * @since April 2016 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +OBIDMS_avl_group_p obi_open_avl_group(OBIDMS_p dms, const char* avl_name); /** * @brief Closes an AVL tree. * - * Note: An AVL tree is made of two files (referred to by two structures). - * One file contains the indices referring to the data, and the other - * file contains the data itself. The AVL tree as a whole is referred - * to via the OBIDMS_avl structure. - * * @param avl A pointer to the AVL tree structure to close and free. * * @retval 0 if the operation was successfully completed. @@ -260,26 +300,56 @@ int obi_close_avl(OBIDMS_avl_p avl); /** - * @brief Adds a value (byte array) in an AVL tree, checking if it is already in it. + * @brief Closes an AVL tree group. * - * @warning The byte array to add must already be encoded and contain its header. + * @param avl_group A pointer to the AVL tree group structure to close and free. + * + * @retval 0 if the operation was successfully completed. + * @retval -1 if an error occurred. + * + * @since April 2016 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +int obi_close_avl_group(OBIDMS_avl_group_p avl_group); + + +/** + * @brief Recovers a value (byte array) in an AVL tree. + * + * @warning The byte array recovered must be decoded to get the original value. + * + * @param avl A pointer to the AVL tree. + * @param index The index of the value in the data array. + * + * @returns A pointer to the byte array recovered. + * + * @since December 2015 + * @author Celine Mercier (celine.mercier@metabarcoding.org) + */ +Obi_byte_array_p obi_avl_get(OBIDMS_avl_p avl, index_t index); + + +/** + * @brief Adds a value (byte array) in an AVL tree NOT checking first if it is already in it. // TODO to discuss + * + * @warning The value given must be already be encoded into a byte array structure (Obi_byte_array_t). * * @param avl A pointer to the AVL tree. * @param value The byte array to add in the AVL tree. * - * @returns The index of the value, whether it was added or already in the AVL tree. + * @returns The index of the value newly added in the AVL tree. * @retval -1 if an error occurred. * * @since December 2015 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ -index_t obi_avl_add(OBIDMS_avl_p avl, byte_t* value); +index_t obi_avl_add(OBIDMS_avl_p avl, Obi_byte_array_p value); /** - * @brief Finds a value (byte array) in an AVL tree, checking first if it is already in it. + * @brief Finds a value (byte array) in an AVL tree. * - * @warning The byte array to add must already be encoded and contain its header. + * @warning The value given must be already be encoded into a byte array structure (Obi_byte_array_t). * * @param avl A pointer to the AVL tree. * @param value The byte array to add in the AVL tree. @@ -290,86 +360,40 @@ index_t obi_avl_add(OBIDMS_avl_p avl, byte_t* value); * @since December 2015 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ -index_t obi_avl_find(OBIDMS_avl_p avl, byte_t* value); +index_t obi_avl_find(OBIDMS_avl_p avl, Obi_byte_array_p value); /** * @brief Recovers a value (byte array) in an AVL tree. * - * @warning The byte array recovered is encoded and contains its header. + * @warning The byte array recovered must be decoded to get the original value. * - * @param avl A pointer to the AVL tree. + * @param avl_group A pointer to the AVL tree. * @param index The index of the value in the data array. * * @returns A pointer to the byte array recovered. * - * @since December 2015 + * @since April 2016 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ -byte_t* obi_avl_get(OBIDMS_avl_p avl, index_t index); +Obi_byte_array_p obi_avl_group_get(OBIDMS_avl_group_p avl_group, index_t idx); /** - * @brief Converts a character string to a byte array with a header. + * @brief Adds a value (byte array) in an AVL tree group, checking if it is already in it. * - * @warning The byte array must be freed by the caller. + * @warning The value given must be already be encoded into a byte array structure (Obi_byte_array_t). * - * @param value The character string to convert. + * @param avl_group A pointer to the AVL tree group. + * @param value The byte array to add in the AVL tree group. * - * @returns A pointer to the byte array created. - * @retval NULL if an error occurred. + * @returns The index of the value newly added in the AVL tree group. + * @retval -1 if an error occurred. * - * @since October 2015 + * @since April 2016 * @author Celine Mercier (celine.mercier@metabarcoding.org) */ -byte_t* obi_str_to_obibytes(char* value); - - -/** - * @brief Converts a byte array to a character string. - * - * @param value_b The byte array to convert. - * - * @returns A pointer to the character string contained in the byte array. - * - * @since October 2015 - * @author Celine Mercier (celine.mercier@metabarcoding.org) - */ -const char* obi_obibytes_to_str(byte_t* value_b); - - -/** - * @brief Converts a DNA sequence to a byte array with a header. - * - * @warning The byte array must be freed by the caller. - * - * @param value The DNA sequence to convert. - * - * @returns A pointer to the byte array created. - * @retval NULL if an error occurred. - * - * @since November 2015 - * @author Celine Mercier (celine.mercier@metabarcoding.org) - */ -byte_t* obi_seq_to_obibytes(char* seq); - - -/** - * @brief Converts a byte array to a DNA sequence. - * - * @param value_b The byte array to convert. - * - * @returns A pointer to the DNA sequence contained in the byte array. - * @retval NULL if an error occurred. - * - * @since November 2015 - * @author Celine Mercier (celine.mercier@metabarcoding.org) - */ -const char* obi_obibytes_to_seq(byte_t* value_b); // TODO move to encode source files - - -// TODO -byte_t* obi_avl_group_get(OBIDMS_avl_group_p avl_group, index_t idx); +index_t obi_avl_group_add(OBIDMS_avl_group_p avl_group, Obi_byte_array_p value); #endif /* OBIAVL_H_ */ diff --git a/src/obidms.c b/src/obidms.c index 4d3a646..183ec17 100644 --- a/src/obidms.c +++ b/src/obidms.c @@ -23,7 +23,7 @@ #include "obierrno.h" #include "obidebug.h" #include "obidmscolumn.h" -#include "private_at_functions.h" +#include "utils.h" #include "obilittlebigman.h" diff --git a/src/obidms.h b/src/obidms.h index 2825d14..7e5d81f 100644 --- a/src/obidms.h +++ b/src/obidms.h @@ -33,7 +33,7 @@ */ #define MAX_NB_OPENED_COLUMNS (100) /**< The maximum number of columns open at the same time. */ -#define MAX_NB_OPENED_AVL_TREES (100) /**< The maximum number of AVL trees open at the same time. +#define MAX_NB_OPENED_AVL_TREES (1000) /**< The maximum number of AVL trees open at the same time. */ diff --git a/src/obidms_taxonomy.c b/src/obidms_taxonomy.c index 4701215..1922805 100644 --- a/src/obidms_taxonomy.c +++ b/src/obidms_taxonomy.c @@ -20,7 +20,7 @@ #include "obidms.h" #include "obidebug.h" #include "obierrno.h" -#include "private_at_functions.h" +#include "utils.h" #define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?) diff --git a/src/obidmscolumn.c b/src/obidmscolumn.c index 13a5169..ac1000a 100644 --- a/src/obidmscolumn.c +++ b/src/obidmscolumn.c @@ -30,6 +30,7 @@ #include "obidebug.h" #include "obilittlebigman.h" #include "obiavl.h" +#include "utils.h" #define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?) @@ -726,15 +727,15 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms, // If the data type is OBI_STR or OBI_SEQ, the associated obi_avl is opened or created if ((returned_data_type == OBI_STR) || (returned_data_type == OBI_SEQ)) { - new_column->avl = obi_create_avl_group(dms, avl_name); -// if (avl == NULL) TODO -// { -// obidebug(1, "\nError opening or creating the aVL tree associated with a column"); -// munmap(new_column->header, header_size); -// close(column_file_descriptor); -// free(new_column); -// return NULL; -// } + new_column->avl = obi_avl_group(dms, avl_name); + if (new_column->avl == NULL) + { + obidebug(1, "\nError opening or creating the AVL group associated with a column"); + munmap(new_column->header, header_size); + close(column_file_descriptor); + free(new_column); + return NULL; + } strncpy(header->avl_name, avl_name, AVL_MAX_NAME); } @@ -762,7 +763,6 @@ OBIDMS_column_p obi_open_column(OBIDMS_p dms, int column_file_descriptor; size_t header_size; size_t i; - OBIDMS_avl_p avl; column = NULL; @@ -770,7 +770,7 @@ OBIDMS_column_p obi_open_column(OBIDMS_p dms, column_directory = obi_open_column_directory(dms, column_name); if (column_directory == NULL) { - //obidebug(1, "\nError opening a column directory structure"); + //obidebug(1, "\nError opening a column directory structure"); // TODO return NULL; } @@ -879,8 +879,8 @@ OBIDMS_column_p obi_open_column(OBIDMS_p dms, // If the data type is OBI_STR or OBI_SEQ, the associated AVL tree is opened if (((column->header)->returned_data_type == OBI_STR) || ((column->header)->returned_data_type == OBI_SEQ)) { - avl = obi_avl(dms, (column->header)->avl_name); - if (avl == NULL) + column->avl = obi_open_avl_group(dms, (column->header)->avl_name); + if (column->avl == NULL) { obidebug(1, "\nError opening the AVL tree associated with a column"); munmap(column->header, header_size); @@ -888,7 +888,6 @@ OBIDMS_column_p obi_open_column(OBIDMS_p dms, free(column); return NULL; } - //column->avl = avl; TODO } close(column_file_descriptor); @@ -964,18 +963,13 @@ OBIDMS_column_p obi_clone_column(OBIDMS_p dms, } else if (clone_data && (line_selection != NULL)) { - obidebug(1, "\nCloning data from line selection\n"); line_size = obi_sizeof((new_column->header)->stored_data_type) * (new_column->header)->nb_elements_per_line; - fprintf(stderr, "\nline size = %ld\n", line_size); for (i=0; i<((line_selection->header)->lines_used); i++) { index = *(((index_t*) (line_selection->data)) + i); - fprintf(stderr, "\nindex = %lld, i = %lld\n", index, i); memcpy((new_column->data)+(i*line_size), (column_to_clone->data)+(index*line_size), line_size); - fprintf(stderr, "\nmemcpied\n"); } (new_column->header)->lines_used = (line_selection->header)->lines_used; - obidebug(1, "\nCloned data from line selection\n"); } // Close column_to_clone @@ -1022,12 +1016,12 @@ int obi_close_column(OBIDMS_column_p column) } } - // If the data type is OBI_STR or OBI_SEQ, the associated AVL tree is closed TODO -// if (((column->header)->returned_data_type == OBI_STR) || ((column->header)->returned_data_type == OBI_SEQ)) -// { -// if (obi_close_avl(column->avl) < 0) -// return -1; -// } + // If the data type is OBI_STR or OBI_SEQ, the associated AVL group is closed + if (((column->header)->returned_data_type == OBI_STR) || ((column->header)->returned_data_type == OBI_SEQ)) + { + if (obi_close_avl_group(column->avl) < 0) + return -1; + } // Munmap data if (munmap(column->data, (column->header)->data_size) < 0) @@ -1045,10 +1039,10 @@ int obi_close_column(OBIDMS_column_p column) return -1; } - free(column); - if (close_dir) obi_close_column_directory(column->column_directory); + + free(column); } return 0; diff --git a/src/obidmscolumn.h b/src/obidmscolumn.h index 39d9e04..159f09f 100644 --- a/src/obidmscolumn.h +++ b/src/obidmscolumn.h @@ -28,8 +28,6 @@ #include "obiavl.h" -#define ONE_IF_ZERO(x) (((x)==0)?1:(x)) /**< If x is equal to 0, x takes the value 1. - */ #define ELEMENTS_NAMES_MAX (2048) /**< The maximum length of the list of elements names. */ #define COLUMN_GROWTH_FACTOR (2) /**< The growth factor when a column is enlarged. diff --git a/src/obidmscolumn_seq.c b/src/obidmscolumn_seq.c index 236018c..7ab4d11 100644 --- a/src/obidmscolumn_seq.c +++ b/src/obidmscolumn_seq.c @@ -33,7 +33,7 @@ int obi_column_set_obiseq_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, char* value) { - byte_t* value_b; + Obi_byte_array_p value_b; index_t idx; // Check that the line number is not greater than the maximum allowed @@ -56,18 +56,13 @@ int obi_column_set_obiseq_with_elt_idx(OBIDMS_column_p column, index_t line_nb, if ((line_nb+1) > (column->header)->lines_used) (column->header)->lines_used = line_nb+1; - // Encode the value on a byte array with a header + // Encode the value on a byte array with a header // TODO make function value_b = obi_seq_to_obibytes(value); if (value_b == NULL) return -1; - //if (strlen(value_b) == 0) - // fprintf(stderr, "\nPOUIC"); - - //fprintf(stderr, "\n>%s||%s", value, obi_obibytes_to_seq(value_b)); - // Add in the AVL tree - idx = insert_in_avl_group(column->avl, value_b); + idx = obi_avl_group_add(column->avl, value_b); if (idx == -1) return -1; @@ -119,8 +114,8 @@ int obi_column_set_obiseq_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p c const char* obi_column_get_obiseq_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx) { - index_t idx; - byte_t* value_b; + index_t idx; + Obi_byte_array_p value_b; if ((line_nb+1) > ((column->header)->line_count)) { diff --git a/src/obidmscolumn_str.c b/src/obidmscolumn_str.c index 92c057a..83a8680 100644 --- a/src/obidmscolumn_str.c +++ b/src/obidmscolumn_str.c @@ -32,8 +32,8 @@ int obi_column_set_obistr_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, char* value) { - byte_t* value_b; - index_t idx; + Obi_byte_array_p value_b; + index_t idx; // Check that the line number is not greater than the maximum allowed if (line_nb >= MAXIMUM_LINE_COUNT) @@ -61,7 +61,7 @@ int obi_column_set_obistr_with_elt_idx(OBIDMS_column_p column, index_t line_nb, return -1; // Add in the AVL tree - idx = insert_in_avl_group(column->avl, value_b); + idx = obi_avl_group_add(column->avl, value_b); if (idx == -1) return -1; @@ -113,8 +113,8 @@ int obi_column_set_obistr_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p c const char* obi_column_get_obistr_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx) { - index_t idx; - byte_t* value_b; + index_t idx; + Obi_byte_array_p value_b; if ((line_nb+1) > ((column->header)->line_count)) { diff --git a/src/obidmscolumndir.c b/src/obidmscolumndir.c index bfe6d1e..eab7adb 100644 --- a/src/obidmscolumndir.c +++ b/src/obidmscolumndir.c @@ -20,7 +20,7 @@ #include "obidmscolumndir.h" #include "obidms.h" -#include "private_at_functions.h" +#include "utils.h" #include "obierrno.h" #include "obidebug.h" diff --git a/src/obierrno.h b/src/obierrno.h index 54a9d7b..a74e42b 100644 --- a/src/obierrno.h +++ b/src/obierrno.h @@ -104,6 +104,14 @@ extern int obi_errno; */ #define OBI_TAXONOMY_ERROR (22) /** Error while handling binary taxonomy files */ +#define OBI_MALLOC_ERROR (23) /** Error while allocating memory + */ +#define OBI_ENCODE_ERROR (24) /** Error while encoding a value + */ +#define OBI_DECODE_ERROR (25) /** Error while decoding a value + */ +#define OBI_UTILS_ERROR (26) /** Error in a utils function + */ /**@}*/ #endif /* OBIERRNO_H_ */ diff --git a/src/obiview.c b/src/obiview.c index 73da4be..5c0acb4 100644 --- a/src/obiview.c +++ b/src/obiview.c @@ -22,7 +22,7 @@ #include "obierrno.h" #include "obidebug.h" #include "obidmscolumn.h" -#include "private_at_functions.h" +#include "utils.h" #include "obilittlebigman.h" #include "obidmscolumn_idx.h" diff --git a/src/private_at_functions.c b/src/utils.c similarity index 62% rename from src/private_at_functions.c rename to src/utils.c index 3207fd7..cb287dc 100644 --- a/src/private_at_functions.c +++ b/src/utils.c @@ -1,15 +1,14 @@ /**************************************************************************** - * Private *at functions * + * Utility functions * ****************************************************************************/ /** - * @file private_at_functions.c + * @file utils.c * @author Celine Mercier (celine.mercier@metabarcoding.org) - * @date 15 June 2015 - * @brief Private replacement functions for *at functions. + * @date 29 March 2016 + * @brief Code for utility functions. */ - #include #include #include @@ -19,7 +18,7 @@ #include #include -#include "private_at_functions.h" +#include "utils.h" #include "obidebug.h" #include "obierrno.h" #include "obidms.h" @@ -28,6 +27,13 @@ #define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?) + +/********************************************************************** + * + * D E F I N I T I O N O F T H E P U B L I C F U N C T I O N S + * + **********************************************************************/ + char* get_full_path(OBIDMS_p dms, const char* path_name) { char* full_path; @@ -35,17 +41,18 @@ char* get_full_path(OBIDMS_p dms, const char* path_name) full_path = (char*) malloc((MAX_PATH_LEN)*sizeof(char)); if (full_path == NULL) { + obi_set_errno(OBI_MALLOC_ERROR); obidebug(1, "\nError allocating memory for the char* path to a file or directory"); return NULL; } if (getcwd(full_path, MAX_PATH_LEN) == NULL) { + obi_set_errno(OBI_UTILS_ERROR); obidebug(1, "\nError getting the path to a file or directory"); return NULL; } - // TODO check errors? strcat(full_path, "/"); strcat(full_path, dms->directory_name); strcat(full_path, "/"); @@ -66,7 +73,10 @@ DIR* opendir_in_dms(OBIDMS_p dms, const char* path_name) directory = opendir(full_path); if (directory == NULL) + { + obi_set_errno(OBI_UTILS_ERROR); obidebug(1, "\nError opening a directory"); + } free(full_path); @@ -74,3 +84,24 @@ DIR* opendir_in_dms(OBIDMS_p dms, const char* path_name) } +int count_dir(char *dir) +{ + struct dirent *dp; + DIR *fd; + int count; + + count = 0; + if ((fd = opendir(dir)) == NULL) + { + obi_set_errno(OBI_UTILS_ERROR); + obidebug(1, "Error opening a directory: %s\n", dir); + return -1; + } + while ((dp = readdir(fd)) != NULL) + { + if ((dp->d_name)[0] == '.') + continue; + count++; + } + return count; +} diff --git a/src/private_at_functions.h b/src/utils.h similarity index 73% rename from src/private_at_functions.h rename to src/utils.h index 668f80a..20fddf2 100644 --- a/src/private_at_functions.h +++ b/src/utils.h @@ -1,25 +1,30 @@ /**************************************************************************** - * Header file for private *at functions * + * Header file for utility functions * ****************************************************************************/ /** - * @file private_at_functions.h + * @file utils.h * @author Celine Mercier (celine.mercier@metabarcoding.org) - * @date 15 June 2015 - * @brief Header file for the private replacement functions for *at functions. + * @date 29 March 2016 + * @brief Header file for utility functions. */ -#ifndef PRIVATE_OPENAT_H_ -#define PRIVATE_OPENAT_H_ - +#ifndef UTILS_H_ +#define UTILS_H_ +#include #include #include "obidms.h" -#define MAX_PATH_LEN 4096 /**< Maximum length for the character string defining a - file or directory path */ + +#define ONE_IF_ZERO(x) (((x)==0)?1:(x)) /**< If x is equal to 0, x takes the value 1. + */ + +#define MAX_PATH_LEN 4096 /**< Maximum length for the character string defining a + * file or directory path. + */ /** @@ -56,4 +61,10 @@ char* get_full_path(OBIDMS_p dms, const char* path_name); DIR* opendir_in_dms(OBIDMS_p dms, const char* path_name); -#endif /* PRIVATEOPENAT_H_ */ +/* + * TODO + */ +int count_dir(char *dir); + + +#endif /* UTILS_H_ */