untested (and no possible retrieval) of CRC used to represent data in

AVL trees
This commit is contained in:
Celine Mercier
2016-03-24 16:38:11 +01:00
parent 58ac860cc7
commit 69856f18dd
5 changed files with 252 additions and 17 deletions

View File

@ -19,9 +19,8 @@
#include <fcntl.h>
#include <math.h>
//#include <libbloom.h>
#include "bloom.h"
#include "crc64.h"
#include "obiavl.h"
#include "obierrno.h"
#include "obitypes.h"
@ -692,6 +691,7 @@ AVL_node_p avl_create_node(OBIDMS_avl_p avl, index_t node_idx)
node->right_child = -1;
node->balance_factor = 0;
node->value = -1;
node->crc64 = 0; // TODO
return node;
}
@ -1018,7 +1018,7 @@ int remap_an_avl(OBIDMS_avl_p avl)
(avl->data)->data = mmap(NULL,
((avl->data)->header)->data_size_max,
PROT_READ,
MAP_SHARED,
MAP_SHARED, // TODO test MAP_PRIVATE?
avl->data_fd,
((avl->data)->header)->header_size);
if ((avl->data)->data == NULL)
@ -1027,7 +1027,7 @@ int remap_an_avl(OBIDMS_avl_p avl)
avl->tree = mmap(NULL,
((avl->header)->nb_items_max) * sizeof(AVL_node_t),
PROT_READ,
MAP_SHARED,
MAP_SHARED, // TODO test MAP_PRIVATE?
avl->avl_fd,
(avl->header)->header_size);
if (avl->tree == NULL)
@ -1054,8 +1054,6 @@ int obi_add_new_avl_in_group(OBIDMS_avl_group_p avl_group) // TODO check for err
return -1;
}
//fprintf(stderr, "\nindex length = %d, file name = %s\n", avl_idx_length, avl_name_with_idx);
(avl_group->sub_avls)[avl_group->current_avl_idx] = obi_create_avl(avl_group->dms, avl_name_with_idx);
if ((avl_group->sub_avls)[avl_group->current_avl_idx] == NULL)
{
@ -1578,7 +1576,8 @@ index_t insert_in_avl_group(OBIDMS_avl_group_p avl_group, byte_t* value) // TODO
if ((((avl_group->sub_avls)[avl_group->current_avl_idx])->header)->nb_items == NODE_COUNT_PER_AVL) // TODO add condition with data size
obi_add_new_avl_in_group(avl_group);
bloom_add(&((((avl_group->sub_avls)[avl_group->current_avl_idx])->header)->bloom_filter), value, (BYTE_ARRAY_HEADER_SIZE + *((int32_t*)(value+1))));
bloom_add(&((((avl_group->sub_avls)[avl_group->current_avl_idx])->header)->bloom_filter), value, BYTE_ARRAY_HEADER_SIZE + *((int32_t*)(value+1)));
return obi_avl_add((avl_group->sub_avls)[avl_group->current_avl_idx], value);
}
@ -1596,6 +1595,10 @@ index_t obi_avl_add(OBIDMS_avl_p avl, byte_t* value)
int n = 0;
int depth = 0;
uint64_t crc;
crc = crc64(value, BYTE_ARRAY_HEADER_SIZE + ((uint64_t) (*((int32_t*)(value+1))))); // TODO warning
// Check if first node
if (!((avl->header)->nb_items))
{
@ -1604,6 +1607,7 @@ index_t obi_avl_add(OBIDMS_avl_p avl, byte_t* value)
// Add the value in the data array and store its index
value_data_idx = avl_add_value_in_data_array(avl, value);
node_to_add->value = value_data_idx;
node_to_add->crc64 = crc;
// Update the number of items
((avl->header)->nb_items)++;
@ -1635,8 +1639,15 @@ index_t obi_avl_add(OBIDMS_avl_p avl, byte_t* value)
parent = next;
// Compare value with value of current node
to_compare = obi_avl_get(avl, current_node->value);
comp = byte_array_compare(to_compare, value);
//to_compare = obi_avl_get(avl, current_node->value);
//comp = byte_array_compare(to_compare, value);
comp = (current_node->crc64) - crc;
if (comp == 0)
{ // check if really same value
to_compare = obi_avl_get(avl, current_node->value);
comp = byte_array_compare(to_compare, value);
}
if (comp > 0)
// Go to left child
@ -1647,7 +1658,7 @@ index_t obi_avl_add(OBIDMS_avl_p avl, byte_t* value)
else if (comp == 0)
// Value already stored
{
//fprintf(stderr, "\n>>>ALREADY IN, %s, %lld\n", obi_obibytes_to_seq(value), (avl->header)->nb_items);
fprintf(stderr, "\n>>>ALREADY IN, %s, %lld\n", obi_obibytes_to_seq(value), (avl->header)->nb_items);
return current_node->value; // TODO should trigger error if using bloom filters
}
@ -1676,6 +1687,7 @@ index_t obi_avl_add(OBIDMS_avl_p avl, byte_t* value)
// Add the value in the data array and store its index
value_data_idx = avl_add_value_in_data_array(avl, value);
node_to_add->value = value_data_idx;
node_to_add->crc64 = crc;
// Update the number of items
((avl->header)->nb_items)++;
@ -1706,7 +1718,7 @@ index_t obi_avl_add(OBIDMS_avl_p avl, byte_t* value)
}
// Find if a value is already in an AVL tree TODO use bloom
// Find if a value is already in an AVL tree
index_t obi_avl_find(OBIDMS_avl_p avl, byte_t* value)
{
int comp;
@ -1714,14 +1726,25 @@ index_t obi_avl_find(OBIDMS_avl_p avl, byte_t* value)
byte_t* to_compare;
AVL_node_p current_node;
uint64_t crc;
crc = crc64(value, BYTE_ARRAY_HEADER_SIZE + ((uint64_t) (*((int32_t*)(value+1))))); // TODO warning
next = (avl->header)->root_idx;
while (next != -1)
{
current_node = (avl->tree)+next;
// Compare value with value of current node
to_compare = obi_avl_get(avl, current_node->value);
comp = byte_array_compare(to_compare, value);
//to_compare = obi_avl_get(avl, current_node->value);
//comp = byte_array_compare(to_compare, value);
comp = (current_node->crc64) - crc;
if (comp == 0)
{ // check if really same value
to_compare = obi_avl_get(avl, current_node->value);
comp = byte_array_compare(to_compare, value);
}
if (comp > 0)
// Go to left child
@ -1730,8 +1753,10 @@ index_t obi_avl_find(OBIDMS_avl_p avl, byte_t* value)
// Go to right child
next = current_node->right_child;
else if (comp == 0)
// Value found
{ // Value found
fprintf(stderr, "\n>>>ALREADY IN in find, %s, %lld\n", obi_obibytes_to_seq(value), (avl->header)->nb_items);
return current_node->value;
}
}
// Value not found
return -1;