diff --git a/src/obiavl.c b/src/obiavl.c index 0d9ac28..50022cf 100644 --- a/src/obiavl.c +++ b/src/obiavl.c @@ -30,6 +30,159 @@ #define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?) +////crc crcTable[256]; +//static crc crcTable[] = { +//0x00, 0xd8, 0x68, 0xb0, 0xd0, 0x8, 0xb8, 0x60, 0x78, 0xa0, 0x10, 0xc8, 0xa8, 0x70, 0xc0, 0x18, 0xf0, 0x28, 0x98, 0x40, 0x20, 0xf8, 0x48, 0x90, 0x88, 0x50, 0xe0, 0x38, 0x58, 0x80, 0x30, 0xe8, 0x38, 0xe0, 0x50, 0x88, 0xe8, 0x30, 0x80, 0x58, 0x40, 0x98, 0x28, 0xf0, 0x90, 0x48, 0xf8, 0x20, 0xc8, 0x10, 0xa0, 0x78, 0x18, 0xc0, 0x70, 0xa8, 0xb0, 0x68, 0xd8, 0, 0x60, 0xb8, 0x8, 0xd0, 0x70, 0xa8, 0x18, 0xc0, 0xa0, 0x78, 0xc8, 0x10, 0x8, 0xd0, 0x60, 0xb8, 0xd8, 0, 0xb0, 0x68, 0x80, 0x58, 0xe8, 0x30, 0x50, 0x88, 0x38, 0xe0, 0xf8, 0x20, 0x90, 0x48, 0x28, 0xf0, 0x40, 0x98, 0x48, 0x90, 0x20, 0xf8, 0x98, 0x40, 0xf0, 0x28, 0x30, 0xe8, 0x58, 0x80, 0xe0, 0x38, 0x88, 0x50, 0xb8, 0x60, 0xd0, 0x8, 0x68, 0xb0, 0, 0xd8, 0xc0, 0x18, 0xa8, 0x70, 0x10, 0xc8, 0x78, 0xa0, 0xe0, 0x38, 0x88, 0x50, 0x30, 0xe8, 0x58, 0x80, 0x98, 0x40, 0xf0, 0x28, 0x48, 0x90, 0x20, 0xf8, 0x10, 0xc8, 0x78, 0xa0, 0xc0, 0x18, 0xa8, 0x70, 0x68, 0xb0, 0, 0xd8, 0xb8, 0x60, 0xd0, 0x8, 0xd8, 0, 0xb0, 0x68, 0x8, 0xd0, 0x60, 0xb8, 0xa0, 0x78, 0xc8, 0x10, 0x70, 0xa8, 0x18, 0xc0, 0x28, 0xf0, 0x40, 0x98, 0xf8, 0x20, 0x90, 0x48, 0x50, 0x88, 0x38, 0xe0, 0x80, 0x58, 0xe8, 0x30, 0x90, 0x48, 0xf8, 0x20, 0x40, 0x98, 0x28, 0xf0, 0xe8, 0x30, 0x80, 0x58, 0x38, 0xe0, 0x50, 0x88, 0x60, 0xb8, 0x8, 0xd0, 0xb0, 0x68, 0xd8, 0, 0x18, 0xc0, 0x70, 0xa8, 0xc8, 0x10, 0xa0, 0x78, 0xa8, 0x70, 0xc0, 0x18, 0x78, 0xa0, 0x10, 0xc8, 0xd0, 0x8, 0xb8, 0x60, 0, 0xd8, 0x68, 0xb0, 0x58, 0x80, 0x30, 0xe8, 0x88, 0x50, 0xe0, 0x38, 0x20, 0xf8, 0x48, 0x90, 0xf0, 0x28, 0x98, 0x40 +//}; +// +// +//void crcInit(void) +//{ +// crc remainder; +// +// fprintf(stderr, "\n"); +// +// /* +// * Compute the remainder of each possible dividend. +// */ +// for (int dividend = 0; dividend < 256; ++dividend) +// { +// /* +// * Start with the dividend followed by zeros. +// */ +// remainder = dividend << (WIDTH - 8); +// +// /* +// * Perform modulo-2 division, a bit at a time. +// */ +// for (uint8_t bit = 8; bit > 0; --bit) +// { +// /* +// * Try to divide the current data bit. +// */ +// if (remainder & TOPBIT) +// { +// remainder = (remainder << 1) ^ POLYNOMIAL; +// } +// else +// { +// remainder = (remainder << 1); +// } +// } +// +// /* +// * Store the result into the table. +// */ +// crcTable[dividend] = remainder; +// fprintf(stderr, "%#x, ", remainder); +// } +// +//} /* crcInit() */ +// +// +//crc crcFast(uint8_t const message[], int nBytes) +//{ +// uint8_t data; +// crc remainder = 0; +// +// +// /* +// * Divide the message by the polynomial, a byte at a time. +// */ +// for (int byte = 0; byte < nBytes; ++byte) +// { +// data = message[byte] ^ (remainder >> (WIDTH - 8)); +// remainder = crcTable[data] ^ (remainder << 8); +// } +// +// /* +// * The final remainder is the CRC. +// */ +// return (remainder); +// +//} /* crcFast() */ +// +// +//crc compute_crc(const char* s) +//{ +// crc c; +// //uint8_t cache; +// +// //cache = 15; +// +//// crcInit(); +// +// c = crcFast(s, strlen(s)); +// +// //fprintf(stderr, "\nlen = %d", strlen(argv[1])); +// +// //fprintf(stderr, "\ncrc = %u\n\n", c); +// //fprintf(stderr, "\ncrc mod 8 = %u\n\n", c%8); +// +// c = c >> 3; +// //fprintf(stderr, "\nshifted crc = %u\n\n", c); +// +// //c = c & cache; +// //c = c % 32; +// +// //fprintf(stderr, "\ncrc = %u\n\n", c); +// +// return (c & 7); +//} + +static unsigned char crc8_table[] = { + 0x00, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba, 0x95, 0xab, 0xe9, 0xd7, + 0x6d, 0x53, 0x11, 0x2f, 0x4f, 0x71, 0x33, 0x0d, 0xb7, 0x89, 0xcb, 0xf5, + 0xda, 0xe4, 0xa6, 0x98, 0x22, 0x1c, 0x5e, 0x60, 0x9e, 0xa0, 0xe2, 0xdc, + 0x66, 0x58, 0x1a, 0x24, 0x0b, 0x35, 0x77, 0x49, 0xf3, 0xcd, 0x8f, 0xb1, + 0xd1, 0xef, 0xad, 0x93, 0x29, 0x17, 0x55, 0x6b, 0x44, 0x7a, 0x38, 0x06, + 0xbc, 0x82, 0xc0, 0xfe, 0x59, 0x67, 0x25, 0x1b, 0xa1, 0x9f, 0xdd, 0xe3, + 0xcc, 0xf2, 0xb0, 0x8e, 0x34, 0x0a, 0x48, 0x76, 0x16, 0x28, 0x6a, 0x54, + 0xee, 0xd0, 0x92, 0xac, 0x83, 0xbd, 0xff, 0xc1, 0x7b, 0x45, 0x07, 0x39, + 0xc7, 0xf9, 0xbb, 0x85, 0x3f, 0x01, 0x43, 0x7d, 0x52, 0x6c, 0x2e, 0x10, + 0xaa, 0x94, 0xd6, 0xe8, 0x88, 0xb6, 0xf4, 0xca, 0x70, 0x4e, 0x0c, 0x32, + 0x1d, 0x23, 0x61, 0x5f, 0xe5, 0xdb, 0x99, 0xa7, 0xb2, 0x8c, 0xce, 0xf0, + 0x4a, 0x74, 0x36, 0x08, 0x27, 0x19, 0x5b, 0x65, 0xdf, 0xe1, 0xa3, 0x9d, + 0xfd, 0xc3, 0x81, 0xbf, 0x05, 0x3b, 0x79, 0x47, 0x68, 0x56, 0x14, 0x2a, + 0x90, 0xae, 0xec, 0xd2, 0x2c, 0x12, 0x50, 0x6e, 0xd4, 0xea, 0xa8, 0x96, + 0xb9, 0x87, 0xc5, 0xfb, 0x41, 0x7f, 0x3d, 0x03, 0x63, 0x5d, 0x1f, 0x21, + 0x9b, 0xa5, 0xe7, 0xd9, 0xf6, 0xc8, 0x8a, 0xb4, 0x0e, 0x30, 0x72, 0x4c, + 0xeb, 0xd5, 0x97, 0xa9, 0x13, 0x2d, 0x6f, 0x51, 0x7e, 0x40, 0x02, 0x3c, + 0x86, 0xb8, 0xfa, 0xc4, 0xa4, 0x9a, 0xd8, 0xe6, 0x5c, 0x62, 0x20, 0x1e, + 0x31, 0x0f, 0x4d, 0x73, 0xc9, 0xf7, 0xb5, 0x8b, 0x75, 0x4b, 0x09, 0x37, + 0x8d, 0xb3, 0xf1, 0xcf, 0xe0, 0xde, 0x9c, 0xa2, 0x18, 0x26, 0x64, 0x5a, + 0x3a, 0x04, 0x46, 0x78, 0xc2, 0xfc, 0xbe, 0x80, 0xaf, 0x91, 0xd3, 0xed, + 0x57, 0x69, 0x2b, 0x15}; + + +unsigned crc8(unsigned char *data, size_t len) +{ + unsigned char *end; + unsigned crc; + + crc = 0; + + crc ^= 0xff; + end = data + len; + do { + crc = crc8_table[crc ^ *data++]; + } while (data < end); + return crc ^ 0xff; +} + +crc compute_crc(const char* s) +{ + unsigned c; + + c = crc8(s, strlen(s)); + //fprintf(stderr, "%02x\n", c); + return (c & 7); +} + + + + + /************************************************************************** * * D E C L A R A T I O N O F T H E P R I V A T E F U N C T I O N S @@ -605,6 +758,8 @@ int grow_avl_data(OBIDMS_avl_p avl) // TODO Lock when needed // Set new data size ((avl->data)->header)->data_size_max = new_data_size; + //fprintf(stderr, "\nGrowing AVL, new data size = %lld, count = %ld\n", new_data_size, (avl->header)->nb_items); + // Initialize new data to 0 memset(((avl->data)->data)+old_data_size, 0, new_data_size - old_data_size); @@ -976,6 +1131,24 @@ OBIDMS_avl_p obi_avl(OBIDMS_p dms, const char* avl_name) } +OBIDMS_avl_p* obi_create_avl_in_64_parts(OBIDMS_p dms, const char* avl_name) +{ + OBIDMS_avl_p* avls; + char* avl_name_with_idx; + uint8_t i; + + avls = (OBIDMS_avl_p*) malloc(64*sizeof(OBIDMS_avl_p)); + + for (i=0; i < 64; i++) + { + asprintf(&avl_name_with_idx,"%s_%u", avl_name, i); + avls[i] = obi_create_avl(dms, avl_name_with_idx); + } + + return avls; +} + + OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name) { char* avl_file_name; @@ -1446,7 +1619,7 @@ index_t obi_avl_add(OBIDMS_avl_p avl, byte_t* value) index_t node_idx; byte_t* to_compare; int comp; - uint8_t n = 0; + int n = 0; int depth = 0; // Check if first node @@ -1499,7 +1672,10 @@ index_t obi_avl_add(OBIDMS_avl_p avl, byte_t* value) next = current_node->right_child; else if (comp == 0) // Value already stored + { + //fprintf(stderr, "\n>>>ALREADY IN, %s, %lld\n", obi_obibytes_to_seq(value), (avl->header)->nb_items); return current_node->value; + } depth++; } @@ -1612,7 +1788,7 @@ byte_t* obi_str_to_obibytes(char* value) *((int32_t*)(value_b+1)) = length; // Store the initial length (in bytes) of the decoded value (same as encoded for character strings) - *((int32_t*)(value_b+5)) = length; + *((int64_t*)(value_b+5)) = length; // Store the character string strcpy(value_b+BYTE_ARRAY_HEADER_SIZE, value); diff --git a/src/obiavl.h b/src/obiavl.h index bc4e918..1369b8f 100644 --- a/src/obiavl.h +++ b/src/obiavl.h @@ -340,5 +340,12 @@ byte_t* obi_seq_to_obibytes(char* seq); const char* obi_obibytes_to_seq(byte_t* value_b); +OBIDMS_avl_p* obi_create_avl_in_64_parts(OBIDMS_p dms, const char* avl_name); + +typedef uint8_t crc; + +crc compute_crc(const char* s); + + #endif /* OBIAVL_H_ */ diff --git a/src/obidmscolumn.c b/src/obidmscolumn.c index 4f5f4bc..bfa8579 100644 --- a/src/obidmscolumn.c +++ b/src/obidmscolumn.c @@ -521,7 +521,7 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms, OBIDMS_column_p new_column; OBIDMS_column_directory_p column_directory; OBIDMS_column_header_p header; - OBIDMS_avl_p avl; + OBIDMS_avl_p* avl; size_t file_size; obiversion_t version_number; char* column_file_name; @@ -723,7 +723,7 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms, // If the data type is OBI_STR or OBI_SEQ, the associated obi_avl is opened or created if ((returned_data_type == OBI_STR) || (returned_data_type == OBI_SEQ)) { - avl = obi_avl(dms, avl_name); + avl = obi_create_avl_in_64_parts(dms, avl_name); if (avl == NULL) { obidebug(1, "\nError opening or creating the aVL tree associated with a column"); @@ -732,7 +732,7 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms, free(new_column); return NULL; } - new_column->avl = avl; + memcpy(new_column->avl, avl, 64*sizeof(OBIDMS_avl_p)); strncpy(header->avl_name, avl_name, AVL_MAX_NAME); } @@ -886,7 +886,7 @@ OBIDMS_column_p obi_open_column(OBIDMS_p dms, free(column); return NULL; } - column->avl = avl; + //column->avl = avl; TODO } close(column_file_descriptor); @@ -1020,12 +1020,12 @@ int obi_close_column(OBIDMS_column_p column) } } - // If the data type is OBI_STR or OBI_SEQ, the associated AVL tree is closed - if (((column->header)->returned_data_type == OBI_STR) || ((column->header)->returned_data_type == OBI_SEQ)) - { - if (obi_close_avl(column->avl) < 0) - return -1; - } + // If the data type is OBI_STR or OBI_SEQ, the associated AVL tree is closed TODO +// if (((column->header)->returned_data_type == OBI_STR) || ((column->header)->returned_data_type == OBI_SEQ)) +// { +// if (obi_close_avl(column->avl) < 0) +// return -1; +// } // Munmap data if (munmap(column->data, (column->header)->data_size) < 0) diff --git a/src/obidmscolumn.h b/src/obidmscolumn.h index c5d355b..00db5f7 100644 --- a/src/obidmscolumn.h +++ b/src/obidmscolumn.h @@ -98,7 +98,7 @@ typedef struct OBIDMS_column { */ OBIDMS_column_header_p header; /**< A pointer to the header of the column. */ - OBIDMS_avl_p avl; /**< A pointer to the AVL tree associated with the column if there is one. + OBIDMS_avl_p avl[64]; /**< A pointer to the group of AVL trees associated with the column if there is one. */ void* data; /**< A `void` pointer to the beginning of the data. * diff --git a/src/obidmscolumn_seq.c b/src/obidmscolumn_seq.c index 789b859..8d329d7 100644 --- a/src/obidmscolumn_seq.c +++ b/src/obidmscolumn_seq.c @@ -61,8 +61,13 @@ int obi_column_set_obiseq_with_elt_idx(OBIDMS_column_p column, index_t line_nb, if (value_b == NULL) return -1; + if (strlen(value_b) == 0) + fprintf(stderr, "\nPOUIC"); + + //fprintf(stderr, "\n>%s||%s", value, obi_obibytes_to_seq(value_b)); + // Add in the AVL tree - idx = obi_avl_add(column->avl, value_b); + idx = obi_avl_add((column->avl)[compute_crc(value)], value_b); if (idx == -1) return -1; @@ -130,7 +135,7 @@ const char* obi_column_get_obiseq_with_elt_idx(OBIDMS_column_p column, index_t l if (idx == OBIIdx_NA) return OBISeq_NA; - value_b = obi_avl_get(column->avl, idx); + //value_b = obi_avl_get((column->avl)[crc(value)], idx); return obi_obibytes_to_seq(value_b); } diff --git a/src/obidmscolumn_str.c b/src/obidmscolumn_str.c index c92ca1d..d4a15f5 100644 --- a/src/obidmscolumn_str.c +++ b/src/obidmscolumn_str.c @@ -61,7 +61,7 @@ int obi_column_set_obistr_with_elt_idx(OBIDMS_column_p column, index_t line_nb, return -1; // Add in the AVL tree - idx = obi_avl_add(column->avl, value_b); + idx = obi_avl_add((column->avl)[compute_crc(value)], value_b); if (idx == -1) return -1; @@ -129,7 +129,7 @@ const char* obi_column_get_obistr_with_elt_idx(OBIDMS_column_p column, index_t l if (idx == OBIIdx_NA) return OBIStr_NA; - value_b = obi_avl_get(column->avl, idx); + //value_b = obi_avl_get(column->avl, idx); return obi_obibytes_to_str(value_b); }