Multiple AVLs with bloom filters (very raw test version)
This commit is contained in:
@ -22,3 +22,7 @@
|
|||||||
../../../src/obidmscolumn_idx.c
|
../../../src/obidmscolumn_idx.c
|
||||||
../../../src/obidms_taxonomy.c
|
../../../src/obidms_taxonomy.c
|
||||||
../../../src/obidms_taxonomy.h
|
../../../src/obidms_taxonomy.h
|
||||||
|
../../../src/bloom.c
|
||||||
|
../../../src/bloom.h
|
||||||
|
../../../src/MurmurHash2.c
|
||||||
|
../../../src/murmurhash2.h
|
64
src/MurmurHash2.c
Executable file
64
src/MurmurHash2.c
Executable file
@ -0,0 +1,64 @@
|
|||||||
|
//-----------------------------------------------------------------------------
|
||||||
|
// MurmurHash2, by Austin Appleby
|
||||||
|
|
||||||
|
// Note - This code makes a few assumptions about how your machine behaves -
|
||||||
|
|
||||||
|
// 1. We can read a 4-byte value from any address without crashing
|
||||||
|
// 2. sizeof(int) == 4
|
||||||
|
|
||||||
|
// And it has a few limitations -
|
||||||
|
|
||||||
|
// 1. It will not work incrementally.
|
||||||
|
// 2. It will not produce the same results on little-endian and big-endian
|
||||||
|
// machines.
|
||||||
|
|
||||||
|
unsigned int murmurhash2(const void * key, int len, const unsigned int seed)
|
||||||
|
{
|
||||||
|
// 'm' and 'r' are mixing constants generated offline.
|
||||||
|
// They're not really 'magic', they just happen to work well.
|
||||||
|
|
||||||
|
const unsigned int m = 0x5bd1e995;
|
||||||
|
const int r = 24;
|
||||||
|
|
||||||
|
// Initialize the hash to a 'random' value
|
||||||
|
|
||||||
|
unsigned int h = seed ^ len;
|
||||||
|
|
||||||
|
// Mix 4 bytes at a time into the hash
|
||||||
|
|
||||||
|
const unsigned char * data = (const unsigned char *)key;
|
||||||
|
|
||||||
|
while(len >= 4)
|
||||||
|
{
|
||||||
|
unsigned int k = *(unsigned int *)data;
|
||||||
|
|
||||||
|
k *= m;
|
||||||
|
k ^= k >> r;
|
||||||
|
k *= m;
|
||||||
|
|
||||||
|
h *= m;
|
||||||
|
h ^= k;
|
||||||
|
|
||||||
|
data += 4;
|
||||||
|
len -= 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle the last few bytes of the input array
|
||||||
|
|
||||||
|
switch(len)
|
||||||
|
{
|
||||||
|
case 3: h ^= data[2] << 16;
|
||||||
|
case 2: h ^= data[1] << 8;
|
||||||
|
case 1: h ^= data[0];
|
||||||
|
h *= m;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Do a few final mixes of the hash to ensure the last few
|
||||||
|
// bytes are well-incorporated.
|
||||||
|
|
||||||
|
h ^= h >> 13;
|
||||||
|
h *= m;
|
||||||
|
h ^= h >> 15;
|
||||||
|
|
||||||
|
return h;
|
||||||
|
}
|
212
src/bloom.c
Executable file
212
src/bloom.c
Executable file
@ -0,0 +1,212 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2012-2015, Jyri J. Virkki
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This file is under BSD license. See LICENSE file.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Refer to bloom.h for documentation on the public interfaces.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
#include "bloom.h"
|
||||||
|
#include "murmurhash2.h"
|
||||||
|
|
||||||
|
#define MAKESTRING(n) STRING(n)
|
||||||
|
#define STRING(n) #n
|
||||||
|
|
||||||
|
#ifdef __linux__
|
||||||
|
unsigned detect_bucket_size(unsigned fallback_size);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
static int test_bit_set_bit(unsigned char * buf, unsigned int x, int set_bit)
|
||||||
|
{
|
||||||
|
register uint32_t * word_buf = (uint32_t *)buf;
|
||||||
|
register unsigned int offset = x >> 5;
|
||||||
|
register uint32_t word = word_buf[offset];
|
||||||
|
register unsigned int mask = 1 << (x % 32);
|
||||||
|
|
||||||
|
if (word & mask) {
|
||||||
|
return 1;
|
||||||
|
} else {
|
||||||
|
if (set_bit) {
|
||||||
|
word_buf[offset] = word | mask;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int bloom_check_add(struct bloom * bloom,
|
||||||
|
const void * buffer, int len, int add)
|
||||||
|
{
|
||||||
|
if (bloom->ready == 0) {
|
||||||
|
(void)printf("bloom at %p not initialized!\n", (void *)bloom);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int hits = 0;
|
||||||
|
register unsigned int a = murmurhash2(buffer, len, 0x9747b28c);
|
||||||
|
register unsigned int b = murmurhash2(buffer, len, a);
|
||||||
|
register unsigned int x;
|
||||||
|
register unsigned int i;
|
||||||
|
|
||||||
|
unsigned bucket_index = (a % bloom->buckets);
|
||||||
|
|
||||||
|
unsigned char * bucket_ptr =
|
||||||
|
(bloom->bf + (bucket_index << bloom->bucket_bytes_exponent));
|
||||||
|
|
||||||
|
for (i = 0; i < bloom->hashes; i++) {
|
||||||
|
x = (a + i*b) & bloom->bucket_bits_fast_mod_operand;
|
||||||
|
if (test_bit_set_bit(bucket_ptr, x, add)) {
|
||||||
|
hits++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hits == bloom->hashes) {
|
||||||
|
return 1; // 1 == element already in (or collision)
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void setup_buckets(struct bloom * bloom, unsigned int cache_size)
|
||||||
|
{
|
||||||
|
// If caller passed a non-zero cache_size, use it as given, otherwise
|
||||||
|
// either compute it or use built-in default
|
||||||
|
|
||||||
|
if (cache_size == 0) {
|
||||||
|
#ifdef __linux__
|
||||||
|
cache_size = detect_bucket_size(BLOOM_BUCKET_SIZE_FALLBACK);
|
||||||
|
#else
|
||||||
|
cache_size = BLOOM_BUCKET_SIZE_FALLBACK;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
bloom->buckets = (bloom->bytes / cache_size);
|
||||||
|
bloom->bucket_bytes = cache_size;
|
||||||
|
|
||||||
|
// make sure bloom buffer bytes and bucket_bytes are even
|
||||||
|
int not_even_by = (bloom->bytes % bloom->bucket_bytes);
|
||||||
|
|
||||||
|
if (not_even_by) {
|
||||||
|
// adjust bytes
|
||||||
|
bloom->bytes += (bloom->bucket_bytes - not_even_by);
|
||||||
|
assert((bloom->bytes % bloom->bucket_bytes) == 0); // Should get even
|
||||||
|
|
||||||
|
// adjust bits
|
||||||
|
bloom->bits = bloom->bytes * 8;
|
||||||
|
|
||||||
|
// adjust bits per element
|
||||||
|
bloom->bpe = bloom->bits*1. / bloom->entries;
|
||||||
|
|
||||||
|
// adjust buckets
|
||||||
|
bloom->buckets++;
|
||||||
|
}
|
||||||
|
|
||||||
|
bloom->bucket_bytes_exponent = __builtin_ctz(cache_size);
|
||||||
|
bloom->bucket_bits_fast_mod_operand = (cache_size * 8 - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int bloom_init_size(struct bloom * bloom, int entries, double error,
|
||||||
|
unsigned int cache_size)
|
||||||
|
{
|
||||||
|
bloom->ready = 0;
|
||||||
|
|
||||||
|
if (entries < 1 || error == 0) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
bloom->entries = entries;
|
||||||
|
bloom->error = error;
|
||||||
|
|
||||||
|
double num = log(bloom->error);
|
||||||
|
double denom = 0.480453013918201; // ln(2)^2
|
||||||
|
bloom->bpe = -(num / denom);
|
||||||
|
|
||||||
|
double dentries = (double)entries;
|
||||||
|
bloom->bits = (int)(dentries * bloom->bpe);
|
||||||
|
|
||||||
|
if (bloom->bits % 8) {
|
||||||
|
bloom->bytes = (bloom->bits / 8) + 1;
|
||||||
|
} else {
|
||||||
|
bloom->bytes = bloom->bits / 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
bloom->hashes = (int)ceil(0.693147180559945 * bloom->bpe); // ln(2)
|
||||||
|
|
||||||
|
setup_buckets(bloom, cache_size);
|
||||||
|
|
||||||
|
bloom->bf = (unsigned char *)calloc(bloom->bytes, sizeof(unsigned char));
|
||||||
|
if (bloom->bf == NULL) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
bloom->ready = 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int bloom_init(struct bloom * bloom, int entries, double error)
|
||||||
|
{
|
||||||
|
return bloom_init_size(bloom, entries, error, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int bloom_check(struct bloom * bloom, const void * buffer, int len)
|
||||||
|
{
|
||||||
|
return bloom_check_add(bloom, buffer, len, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int bloom_add(struct bloom * bloom, const void * buffer, int len)
|
||||||
|
{
|
||||||
|
return bloom_check_add(bloom, buffer, len, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void bloom_print(struct bloom * bloom)
|
||||||
|
{
|
||||||
|
(void)printf("bloom at %p\n", (void *)bloom);
|
||||||
|
(void)printf(" ->entries = %d\n", bloom->entries);
|
||||||
|
(void)printf(" ->error = %f\n", bloom->error);
|
||||||
|
(void)printf(" ->bits = %d\n", bloom->bits);
|
||||||
|
(void)printf(" ->bits per elem = %f\n", bloom->bpe);
|
||||||
|
(void)printf(" ->bytes = %d\n", bloom->bytes);
|
||||||
|
(void)printf(" ->buckets = %u\n", bloom->buckets);
|
||||||
|
(void)printf(" ->bucket_bytes = %u\n", bloom->bucket_bytes);
|
||||||
|
(void)printf(" ->bucket_bytes_exponent = %u\n",
|
||||||
|
bloom->bucket_bytes_exponent);
|
||||||
|
(void)printf(" ->bucket_bits_fast_mod_operand = 0%o\n",
|
||||||
|
bloom->bucket_bits_fast_mod_operand);
|
||||||
|
(void)printf(" ->hash functions = %d\n", bloom->hashes);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void bloom_free(struct bloom * bloom)
|
||||||
|
{
|
||||||
|
if (bloom->ready) {
|
||||||
|
free(bloom->bf);
|
||||||
|
}
|
||||||
|
bloom->ready = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
const char * bloom_version()
|
||||||
|
{
|
||||||
|
return MAKESTRING(BLOOM_VERSION);
|
||||||
|
}
|
188
src/bloom.h
Executable file
188
src/bloom.h
Executable file
@ -0,0 +1,188 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2012-2015, Jyri J. Virkki
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This file is under BSD license. See LICENSE file.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _BLOOM_H
|
||||||
|
#define _BLOOM_H
|
||||||
|
|
||||||
|
|
||||||
|
/** ***************************************************************************
|
||||||
|
* On Linux, the code attempts to compute a bucket size based on CPU cache
|
||||||
|
* size info, if available. If that fails for any reason, this fallback size
|
||||||
|
* is used instead.
|
||||||
|
*
|
||||||
|
* On non-Linux systems, this is the bucket size always used unless the
|
||||||
|
* caller overrides it (see bloom_init_size()).
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
#define BLOOM_BUCKET_SIZE_FALLBACK (32 * 1024)
|
||||||
|
|
||||||
|
|
||||||
|
/** ***************************************************************************
|
||||||
|
* It was found that using multiplier x0.5 for CPU L1 cache size is
|
||||||
|
* more effective in terms of CPU usage and, surprisingly, collisions
|
||||||
|
* number.
|
||||||
|
*
|
||||||
|
* Feel free to tune this constant the way it will work for you.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
#define BLOOM_L1_CACHE_SIZE_DIV 1
|
||||||
|
|
||||||
|
|
||||||
|
/** ***************************************************************************
|
||||||
|
* Structure to keep track of one bloom filter. Caller needs to
|
||||||
|
* allocate this and pass it to the functions below. First call for
|
||||||
|
* every struct must be to bloom_init().
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
struct bloom
|
||||||
|
{
|
||||||
|
// These fields are part of the public interface of this structure.
|
||||||
|
// Client code may read these values if desired. Client code MUST NOT
|
||||||
|
// modify any of these.
|
||||||
|
int entries;
|
||||||
|
double error;
|
||||||
|
int bits;
|
||||||
|
int bytes;
|
||||||
|
int hashes;
|
||||||
|
|
||||||
|
// Fields below are private to the implementation. These may go away or
|
||||||
|
// change incompatibly at any moment. Client code MUST NOT access or rely
|
||||||
|
// on these.
|
||||||
|
unsigned buckets;
|
||||||
|
unsigned bucket_bytes;
|
||||||
|
|
||||||
|
// x86 CPU divide by/multiply by operation optimization helpers
|
||||||
|
unsigned bucket_bytes_exponent;
|
||||||
|
unsigned bucket_bits_fast_mod_operand;
|
||||||
|
|
||||||
|
double bpe;
|
||||||
|
unsigned char * bf;
|
||||||
|
int ready;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/** ***************************************************************************
|
||||||
|
* Initialize the bloom filter for use.
|
||||||
|
*
|
||||||
|
* The filter is initialized with a bit field and number of hash functions
|
||||||
|
* according to the computations from the wikipedia entry:
|
||||||
|
* http://en.wikipedia.org/wiki/Bloom_filter
|
||||||
|
*
|
||||||
|
* Optimal number of bits is:
|
||||||
|
* bits = (entries * ln(error)) / ln(2)^2
|
||||||
|
*
|
||||||
|
* Optimal number of hash functions is:
|
||||||
|
* hashes = bpe * ln(2)
|
||||||
|
*
|
||||||
|
* Parameters:
|
||||||
|
* -----------
|
||||||
|
* bloom - Pointer to an allocated struct bloom (see above).
|
||||||
|
* entries - The expected number of entries which will be inserted.
|
||||||
|
* error - Probability of collision (as long as entries are not
|
||||||
|
* exceeded).
|
||||||
|
*
|
||||||
|
* Return:
|
||||||
|
* -------
|
||||||
|
* 0 - on success
|
||||||
|
* 1 - on failure
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
int bloom_init(struct bloom * bloom, int entries, double error);
|
||||||
|
|
||||||
|
|
||||||
|
/** ***************************************************************************
|
||||||
|
* Initialize the bloom filter for use.
|
||||||
|
*
|
||||||
|
* See comments above for general information.
|
||||||
|
*
|
||||||
|
* This is the same as bloom_init() but allows the caller to pass in a
|
||||||
|
* cache_size to override the internal value (which is either computed
|
||||||
|
* or the default of BLOOM_BUCKET_SIZE_FALLBACK). Mostly useful for
|
||||||
|
* experimenting.
|
||||||
|
*
|
||||||
|
* See misc/bucketsize for a script which can help identify a good value
|
||||||
|
* for cache_size.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
int bloom_init_size(struct bloom * bloom, int entries, double error,
|
||||||
|
unsigned int cache_size);
|
||||||
|
|
||||||
|
|
||||||
|
/** ***************************************************************************
|
||||||
|
* Check if the given element is in the bloom filter. Remember this may
|
||||||
|
* return false positive if a collision occured.
|
||||||
|
*
|
||||||
|
* Parameters:
|
||||||
|
* -----------
|
||||||
|
* bloom - Pointer to an allocated struct bloom (see above).
|
||||||
|
* buffer - Pointer to buffer containing element to check.
|
||||||
|
* len - Size of 'buffer'.
|
||||||
|
*
|
||||||
|
* Return:
|
||||||
|
* -------
|
||||||
|
* 0 - element is not present
|
||||||
|
* 1 - element is present (or false positive due to collision)
|
||||||
|
* -1 - bloom not initialized
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
int bloom_check(struct bloom * bloom, const void * buffer, int len);
|
||||||
|
|
||||||
|
|
||||||
|
/** ***************************************************************************
|
||||||
|
* Add the given element to the bloom filter.
|
||||||
|
* The return code indicates if the element (or a collision) was already in,
|
||||||
|
* so for the common check+add use case, no need to call check separately.
|
||||||
|
*
|
||||||
|
* Parameters:
|
||||||
|
* -----------
|
||||||
|
* bloom - Pointer to an allocated struct bloom (see above).
|
||||||
|
* buffer - Pointer to buffer containing element to add.
|
||||||
|
* len - Size of 'buffer'.
|
||||||
|
*
|
||||||
|
* Return:
|
||||||
|
* -------
|
||||||
|
* 0 - element was not present and was added
|
||||||
|
* 1 - element (or a collision) had already been added previously
|
||||||
|
* -1 - bloom not initialized
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
int bloom_add(struct bloom * bloom, const void * buffer, int len);
|
||||||
|
|
||||||
|
|
||||||
|
/** ***************************************************************************
|
||||||
|
* Print (to stdout) info about this bloom filter. Debugging aid.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
void bloom_print(struct bloom * bloom);
|
||||||
|
|
||||||
|
|
||||||
|
/** ***************************************************************************
|
||||||
|
* Deallocate internal storage.
|
||||||
|
*
|
||||||
|
* Upon return, the bloom struct is no longer usable. You may call bloom_init
|
||||||
|
* again on the same struct to reinitialize it again.
|
||||||
|
*
|
||||||
|
* Parameters:
|
||||||
|
* -----------
|
||||||
|
* bloom - Pointer to an allocated struct bloom (see above).
|
||||||
|
*
|
||||||
|
* Return: none
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
void bloom_free(struct bloom * bloom);
|
||||||
|
|
||||||
|
|
||||||
|
/** ***************************************************************************
|
||||||
|
* Returns version string compiled into library.
|
||||||
|
*
|
||||||
|
* Return: version string
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
const char * bloom_version();
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
7
src/murmurhash2.h
Executable file
7
src/murmurhash2.h
Executable file
@ -0,0 +1,7 @@
|
|||||||
|
|
||||||
|
#ifndef _BLOOM_MURMURHASH2
|
||||||
|
#define _BLOOM_MURMURHASH2
|
||||||
|
|
||||||
|
unsigned int murmurhash2(const void * key, int len, const unsigned int seed);
|
||||||
|
|
||||||
|
#endif
|
357
src/obiavl.c
357
src/obiavl.c
@ -19,6 +19,9 @@
|
|||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
|
|
||||||
|
//#include <libbloom.h>
|
||||||
|
#include "bloom.h"
|
||||||
|
|
||||||
#include "obiavl.h"
|
#include "obiavl.h"
|
||||||
#include "obierrno.h"
|
#include "obierrno.h"
|
||||||
#include "obitypes.h"
|
#include "obitypes.h"
|
||||||
@ -30,158 +33,6 @@
|
|||||||
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
|
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
|
||||||
|
|
||||||
|
|
||||||
////crc crcTable[256];
|
|
||||||
//static crc crcTable[] = {
|
|
||||||
//0x00, 0xd8, 0x68, 0xb0, 0xd0, 0x8, 0xb8, 0x60, 0x78, 0xa0, 0x10, 0xc8, 0xa8, 0x70, 0xc0, 0x18, 0xf0, 0x28, 0x98, 0x40, 0x20, 0xf8, 0x48, 0x90, 0x88, 0x50, 0xe0, 0x38, 0x58, 0x80, 0x30, 0xe8, 0x38, 0xe0, 0x50, 0x88, 0xe8, 0x30, 0x80, 0x58, 0x40, 0x98, 0x28, 0xf0, 0x90, 0x48, 0xf8, 0x20, 0xc8, 0x10, 0xa0, 0x78, 0x18, 0xc0, 0x70, 0xa8, 0xb0, 0x68, 0xd8, 0, 0x60, 0xb8, 0x8, 0xd0, 0x70, 0xa8, 0x18, 0xc0, 0xa0, 0x78, 0xc8, 0x10, 0x8, 0xd0, 0x60, 0xb8, 0xd8, 0, 0xb0, 0x68, 0x80, 0x58, 0xe8, 0x30, 0x50, 0x88, 0x38, 0xe0, 0xf8, 0x20, 0x90, 0x48, 0x28, 0xf0, 0x40, 0x98, 0x48, 0x90, 0x20, 0xf8, 0x98, 0x40, 0xf0, 0x28, 0x30, 0xe8, 0x58, 0x80, 0xe0, 0x38, 0x88, 0x50, 0xb8, 0x60, 0xd0, 0x8, 0x68, 0xb0, 0, 0xd8, 0xc0, 0x18, 0xa8, 0x70, 0x10, 0xc8, 0x78, 0xa0, 0xe0, 0x38, 0x88, 0x50, 0x30, 0xe8, 0x58, 0x80, 0x98, 0x40, 0xf0, 0x28, 0x48, 0x90, 0x20, 0xf8, 0x10, 0xc8, 0x78, 0xa0, 0xc0, 0x18, 0xa8, 0x70, 0x68, 0xb0, 0, 0xd8, 0xb8, 0x60, 0xd0, 0x8, 0xd8, 0, 0xb0, 0x68, 0x8, 0xd0, 0x60, 0xb8, 0xa0, 0x78, 0xc8, 0x10, 0x70, 0xa8, 0x18, 0xc0, 0x28, 0xf0, 0x40, 0x98, 0xf8, 0x20, 0x90, 0x48, 0x50, 0x88, 0x38, 0xe0, 0x80, 0x58, 0xe8, 0x30, 0x90, 0x48, 0xf8, 0x20, 0x40, 0x98, 0x28, 0xf0, 0xe8, 0x30, 0x80, 0x58, 0x38, 0xe0, 0x50, 0x88, 0x60, 0xb8, 0x8, 0xd0, 0xb0, 0x68, 0xd8, 0, 0x18, 0xc0, 0x70, 0xa8, 0xc8, 0x10, 0xa0, 0x78, 0xa8, 0x70, 0xc0, 0x18, 0x78, 0xa0, 0x10, 0xc8, 0xd0, 0x8, 0xb8, 0x60, 0, 0xd8, 0x68, 0xb0, 0x58, 0x80, 0x30, 0xe8, 0x88, 0x50, 0xe0, 0x38, 0x20, 0xf8, 0x48, 0x90, 0xf0, 0x28, 0x98, 0x40
|
|
||||||
//};
|
|
||||||
//
|
|
||||||
//
|
|
||||||
//void crcInit(void)
|
|
||||||
//{
|
|
||||||
// crc remainder;
|
|
||||||
//
|
|
||||||
// fprintf(stderr, "\n");
|
|
||||||
//
|
|
||||||
// /*
|
|
||||||
// * Compute the remainder of each possible dividend.
|
|
||||||
// */
|
|
||||||
// for (int dividend = 0; dividend < 256; ++dividend)
|
|
||||||
// {
|
|
||||||
// /*
|
|
||||||
// * Start with the dividend followed by zeros.
|
|
||||||
// */
|
|
||||||
// remainder = dividend << (WIDTH - 8);
|
|
||||||
//
|
|
||||||
// /*
|
|
||||||
// * Perform modulo-2 division, a bit at a time.
|
|
||||||
// */
|
|
||||||
// for (uint8_t bit = 8; bit > 0; --bit)
|
|
||||||
// {
|
|
||||||
// /*
|
|
||||||
// * Try to divide the current data bit.
|
|
||||||
// */
|
|
||||||
// if (remainder & TOPBIT)
|
|
||||||
// {
|
|
||||||
// remainder = (remainder << 1) ^ POLYNOMIAL;
|
|
||||||
// }
|
|
||||||
// else
|
|
||||||
// {
|
|
||||||
// remainder = (remainder << 1);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// /*
|
|
||||||
// * Store the result into the table.
|
|
||||||
// */
|
|
||||||
// crcTable[dividend] = remainder;
|
|
||||||
// fprintf(stderr, "%#x, ", remainder);
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
//} /* crcInit() */
|
|
||||||
//
|
|
||||||
//
|
|
||||||
//crc crcFast(uint8_t const message[], int nBytes)
|
|
||||||
//{
|
|
||||||
// uint8_t data;
|
|
||||||
// crc remainder = 0;
|
|
||||||
//
|
|
||||||
//
|
|
||||||
// /*
|
|
||||||
// * Divide the message by the polynomial, a byte at a time.
|
|
||||||
// */
|
|
||||||
// for (int byte = 0; byte < nBytes; ++byte)
|
|
||||||
// {
|
|
||||||
// data = message[byte] ^ (remainder >> (WIDTH - 8));
|
|
||||||
// remainder = crcTable[data] ^ (remainder << 8);
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// /*
|
|
||||||
// * The final remainder is the CRC.
|
|
||||||
// */
|
|
||||||
// return (remainder);
|
|
||||||
//
|
|
||||||
//} /* crcFast() */
|
|
||||||
//
|
|
||||||
//
|
|
||||||
//crc compute_crc(const char* s)
|
|
||||||
//{
|
|
||||||
// crc c;
|
|
||||||
// //uint8_t cache;
|
|
||||||
//
|
|
||||||
// //cache = 15;
|
|
||||||
//
|
|
||||||
//// crcInit();
|
|
||||||
//
|
|
||||||
// c = crcFast(s, strlen(s));
|
|
||||||
//
|
|
||||||
// //fprintf(stderr, "\nlen = %d", strlen(argv[1]));
|
|
||||||
//
|
|
||||||
// //fprintf(stderr, "\ncrc = %u\n\n", c);
|
|
||||||
// //fprintf(stderr, "\ncrc mod 8 = %u\n\n", c%8);
|
|
||||||
//
|
|
||||||
// c = c >> 3;
|
|
||||||
// //fprintf(stderr, "\nshifted crc = %u\n\n", c);
|
|
||||||
//
|
|
||||||
// //c = c & cache;
|
|
||||||
// //c = c % 32;
|
|
||||||
//
|
|
||||||
// //fprintf(stderr, "\ncrc = %u\n\n", c);
|
|
||||||
//
|
|
||||||
// return (c & 7);
|
|
||||||
//}
|
|
||||||
|
|
||||||
static unsigned char crc8_table[] = {
|
|
||||||
0x00, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba, 0x95, 0xab, 0xe9, 0xd7,
|
|
||||||
0x6d, 0x53, 0x11, 0x2f, 0x4f, 0x71, 0x33, 0x0d, 0xb7, 0x89, 0xcb, 0xf5,
|
|
||||||
0xda, 0xe4, 0xa6, 0x98, 0x22, 0x1c, 0x5e, 0x60, 0x9e, 0xa0, 0xe2, 0xdc,
|
|
||||||
0x66, 0x58, 0x1a, 0x24, 0x0b, 0x35, 0x77, 0x49, 0xf3, 0xcd, 0x8f, 0xb1,
|
|
||||||
0xd1, 0xef, 0xad, 0x93, 0x29, 0x17, 0x55, 0x6b, 0x44, 0x7a, 0x38, 0x06,
|
|
||||||
0xbc, 0x82, 0xc0, 0xfe, 0x59, 0x67, 0x25, 0x1b, 0xa1, 0x9f, 0xdd, 0xe3,
|
|
||||||
0xcc, 0xf2, 0xb0, 0x8e, 0x34, 0x0a, 0x48, 0x76, 0x16, 0x28, 0x6a, 0x54,
|
|
||||||
0xee, 0xd0, 0x92, 0xac, 0x83, 0xbd, 0xff, 0xc1, 0x7b, 0x45, 0x07, 0x39,
|
|
||||||
0xc7, 0xf9, 0xbb, 0x85, 0x3f, 0x01, 0x43, 0x7d, 0x52, 0x6c, 0x2e, 0x10,
|
|
||||||
0xaa, 0x94, 0xd6, 0xe8, 0x88, 0xb6, 0xf4, 0xca, 0x70, 0x4e, 0x0c, 0x32,
|
|
||||||
0x1d, 0x23, 0x61, 0x5f, 0xe5, 0xdb, 0x99, 0xa7, 0xb2, 0x8c, 0xce, 0xf0,
|
|
||||||
0x4a, 0x74, 0x36, 0x08, 0x27, 0x19, 0x5b, 0x65, 0xdf, 0xe1, 0xa3, 0x9d,
|
|
||||||
0xfd, 0xc3, 0x81, 0xbf, 0x05, 0x3b, 0x79, 0x47, 0x68, 0x56, 0x14, 0x2a,
|
|
||||||
0x90, 0xae, 0xec, 0xd2, 0x2c, 0x12, 0x50, 0x6e, 0xd4, 0xea, 0xa8, 0x96,
|
|
||||||
0xb9, 0x87, 0xc5, 0xfb, 0x41, 0x7f, 0x3d, 0x03, 0x63, 0x5d, 0x1f, 0x21,
|
|
||||||
0x9b, 0xa5, 0xe7, 0xd9, 0xf6, 0xc8, 0x8a, 0xb4, 0x0e, 0x30, 0x72, 0x4c,
|
|
||||||
0xeb, 0xd5, 0x97, 0xa9, 0x13, 0x2d, 0x6f, 0x51, 0x7e, 0x40, 0x02, 0x3c,
|
|
||||||
0x86, 0xb8, 0xfa, 0xc4, 0xa4, 0x9a, 0xd8, 0xe6, 0x5c, 0x62, 0x20, 0x1e,
|
|
||||||
0x31, 0x0f, 0x4d, 0x73, 0xc9, 0xf7, 0xb5, 0x8b, 0x75, 0x4b, 0x09, 0x37,
|
|
||||||
0x8d, 0xb3, 0xf1, 0xcf, 0xe0, 0xde, 0x9c, 0xa2, 0x18, 0x26, 0x64, 0x5a,
|
|
||||||
0x3a, 0x04, 0x46, 0x78, 0xc2, 0xfc, 0xbe, 0x80, 0xaf, 0x91, 0xd3, 0xed,
|
|
||||||
0x57, 0x69, 0x2b, 0x15};
|
|
||||||
|
|
||||||
|
|
||||||
unsigned crc8(unsigned char *data, size_t len)
|
|
||||||
{
|
|
||||||
unsigned char *end;
|
|
||||||
unsigned crc;
|
|
||||||
|
|
||||||
crc = 0;
|
|
||||||
|
|
||||||
crc ^= 0xff;
|
|
||||||
end = data + len;
|
|
||||||
do {
|
|
||||||
crc = crc8_table[crc ^ *data++];
|
|
||||||
} while (data < end);
|
|
||||||
return crc ^ 0xff;
|
|
||||||
}
|
|
||||||
|
|
||||||
crc compute_crc(const char* s)
|
|
||||||
{
|
|
||||||
unsigned c;
|
|
||||||
|
|
||||||
c = crc8(s, strlen(s));
|
|
||||||
//fprintf(stderr, "%02x\n", c);
|
|
||||||
return (c & 7);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**************************************************************************
|
/**************************************************************************
|
||||||
*
|
*
|
||||||
@ -620,21 +471,23 @@ int grow_avl(OBIDMS_avl_p avl) // TODO Lock when needed
|
|||||||
int avl_file_descriptor;
|
int avl_file_descriptor;
|
||||||
char* avl_file_name;
|
char* avl_file_name;
|
||||||
|
|
||||||
// Get the avl file name
|
avl_file_descriptor = avl->avl_fd;
|
||||||
avl_file_name = build_avl_file_name((avl->header)->avl_name);
|
|
||||||
if (avl_file_name == NULL)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
// Open the avl file
|
// // Get the avl file name
|
||||||
avl_file_descriptor = openat(avl->dir_fd, avl_file_name, O_RDWR);
|
// avl_file_name = build_avl_file_name((avl->header)->avl_name);
|
||||||
if (avl_file_descriptor < 0)
|
// if (avl_file_name == NULL)
|
||||||
{
|
// return -1;
|
||||||
obi_set_errno(OBI_AVL_ERROR);
|
//
|
||||||
obidebug(1, "\nError opening an AVL tree file");
|
// // Open the avl file
|
||||||
free(avl_file_name);
|
// avl_file_descriptor = openat(avl->dir_fd, avl_file_name, O_RDWR);
|
||||||
return -1;
|
// if (avl_file_descriptor < 0)
|
||||||
}
|
// {
|
||||||
free(avl_file_name);
|
// obi_set_errno(OBI_AVL_ERROR);
|
||||||
|
// obidebug(1, "\nError opening an AVL tree file");
|
||||||
|
// free(avl_file_name);
|
||||||
|
// return -1;
|
||||||
|
// }
|
||||||
|
// free(avl_file_name);
|
||||||
|
|
||||||
// Calculate the new file size
|
// Calculate the new file size
|
||||||
old_data_size = (avl->header)->avl_size;
|
old_data_size = (avl->header)->avl_size;
|
||||||
@ -683,7 +536,7 @@ int grow_avl(OBIDMS_avl_p avl) // TODO Lock when needed
|
|||||||
// Set the new avl size
|
// Set the new avl size
|
||||||
(avl->header)->avl_size = new_data_size;
|
(avl->header)->avl_size = new_data_size;
|
||||||
|
|
||||||
close(avl_file_descriptor);
|
//close(avl_file_descriptor);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -698,21 +551,23 @@ int grow_avl_data(OBIDMS_avl_p avl) // TODO Lock when needed
|
|||||||
int avl_data_file_descriptor;
|
int avl_data_file_descriptor;
|
||||||
char* avl_data_file_name;
|
char* avl_data_file_name;
|
||||||
|
|
||||||
// Get the avl data file name
|
avl_data_file_descriptor = avl->data_fd;
|
||||||
avl_data_file_name = build_avl_data_file_name((avl->header)->avl_name);
|
|
||||||
if (avl_data_file_name == NULL)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
// Open the avl data file
|
// // Get the avl data file name
|
||||||
avl_data_file_descriptor = openat(avl->dir_fd, avl_data_file_name, O_RDWR);
|
// avl_data_file_name = build_avl_data_file_name((avl->header)->avl_name);
|
||||||
if (avl_data_file_descriptor < 0)
|
// if (avl_data_file_name == NULL)
|
||||||
{
|
// return -1;
|
||||||
obi_set_errno(OBI_AVL_ERROR);
|
//
|
||||||
obidebug(1, "\nError opening an AVL tree data file");
|
// // Open the avl data file
|
||||||
free(avl_data_file_name);
|
// avl_data_file_descriptor = openat(avl->dir_fd, avl_data_file_name, O_RDWR);
|
||||||
return -1;
|
// if (avl_data_file_descriptor < 0)
|
||||||
}
|
// {
|
||||||
free(avl_data_file_name);
|
// obi_set_errno(OBI_AVL_ERROR);
|
||||||
|
// obidebug(1, "\nError opening an AVL tree data file");
|
||||||
|
// free(avl_data_file_name);
|
||||||
|
// return -1;
|
||||||
|
// }
|
||||||
|
// free(avl_data_file_name);
|
||||||
|
|
||||||
// Calculate the new file size
|
// Calculate the new file size
|
||||||
old_data_size = ((avl->data)->header)->data_size_max;
|
old_data_size = ((avl->data)->header)->data_size_max;
|
||||||
@ -763,7 +618,7 @@ int grow_avl_data(OBIDMS_avl_p avl) // TODO Lock when needed
|
|||||||
// Initialize new data to 0
|
// Initialize new data to 0
|
||||||
memset(((avl->data)->data)+old_data_size, 0, new_data_size - old_data_size);
|
memset(((avl->data)->data)+old_data_size, 0, new_data_size - old_data_size);
|
||||||
|
|
||||||
close(avl_data_file_descriptor);
|
//close(avl_data_file_descriptor);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -1131,21 +986,71 @@ OBIDMS_avl_p obi_avl(OBIDMS_p dms, const char* avl_name)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
OBIDMS_avl_p* obi_create_avl_in_64_parts(OBIDMS_p dms, const char* avl_name)
|
OBIDMS_avl_group_p obi_create_avl_group(OBIDMS_p dms, const char* avl_name)
|
||||||
{
|
{
|
||||||
OBIDMS_avl_p* avls;
|
OBIDMS_avl_group_p avl_group;
|
||||||
char* avl_name_with_idx;
|
char* avl_name_with_idx;
|
||||||
uint8_t i;
|
|
||||||
|
|
||||||
avls = (OBIDMS_avl_p*) malloc(64*sizeof(OBIDMS_avl_p));
|
avl_group = (OBIDMS_avl_group_p) malloc(sizeof(OBIDMS_avl_group_t));
|
||||||
|
|
||||||
for (i=0; i < 64; i++)
|
// Create 1st avl
|
||||||
{
|
asprintf(&avl_name_with_idx,"%s_%u", avl_name, 0);
|
||||||
asprintf(&avl_name_with_idx,"%s_%u", avl_name, i);
|
(avl_group->sub_avls)[0] = obi_create_avl(dms, avl_name_with_idx);
|
||||||
avls[i] = obi_create_avl(dms, avl_name_with_idx);
|
avl_group->current_avl_idx = 0;
|
||||||
}
|
strcpy(avl_group->avl_name, avl_name);
|
||||||
|
|
||||||
return avls;
|
avl_group->dms = dms;
|
||||||
|
|
||||||
|
return avl_group;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int unmap_an_avl(OBIDMS_avl_p avl)
|
||||||
|
{
|
||||||
|
if (munmap((avl->data)->data, ((avl->data)->header)->data_size_max) < 0)
|
||||||
|
return -1;
|
||||||
|
if (munmap(avl->tree, (((avl->header)->nb_items_max) * sizeof(AVL_node_t))) < 0)
|
||||||
|
return -1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int remap_an_avl(OBIDMS_avl_p avl)
|
||||||
|
{
|
||||||
|
(avl->data)->data = mmap(NULL,
|
||||||
|
((avl->data)->header)->data_size_max,
|
||||||
|
PROT_READ | PROT_WRITE,
|
||||||
|
MAP_SHARED,
|
||||||
|
avl->data_fd,
|
||||||
|
((avl->data)->header)->header_size);
|
||||||
|
if ((avl->data)->data == NULL)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
avl->tree = mmap(NULL,
|
||||||
|
((avl->header)->nb_items_max) * sizeof(AVL_node_t),
|
||||||
|
PROT_READ | PROT_WRITE,
|
||||||
|
MAP_SHARED,
|
||||||
|
avl->avl_fd,
|
||||||
|
(avl->header)->header_size);
|
||||||
|
if (avl->tree == NULL)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int obi_add_new_avl_in_group(OBIDMS_avl_group_p avl_group) // TODO check for errors
|
||||||
|
{
|
||||||
|
char* avl_name_with_idx;
|
||||||
|
|
||||||
|
// unmap older
|
||||||
|
unmap_an_avl((avl_group->sub_avls)[avl_group->current_avl_idx]);
|
||||||
|
|
||||||
|
(avl_group->current_avl_idx)++;
|
||||||
|
asprintf(&avl_name_with_idx,"%s_%u", avl_group->avl_name, avl_group->current_avl_idx);
|
||||||
|
(avl_group->sub_avls)[avl_group->current_avl_idx] = obi_create_avl(avl_group->dms, avl_name_with_idx);
|
||||||
|
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -1251,7 +1156,7 @@ OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name)
|
|||||||
// Initialize all bits to 0
|
// Initialize all bits to 0
|
||||||
memset(avl_data->data, 0, (avl_data->header)->data_size_max);
|
memset(avl_data->data, 0, (avl_data->header)->data_size_max);
|
||||||
|
|
||||||
close(avl_data_file_descriptor);
|
//close(avl_data_file_descriptor);
|
||||||
|
|
||||||
|
|
||||||
// Create the AVL tree file
|
// Create the AVL tree file
|
||||||
@ -1351,7 +1256,13 @@ OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name)
|
|||||||
(avl->header)->creation_date = time(NULL);
|
(avl->header)->creation_date = time(NULL);
|
||||||
strcpy((avl->header)->avl_name, avl_name);
|
strcpy((avl->header)->avl_name, avl_name);
|
||||||
|
|
||||||
close(avl_file_descriptor);
|
avl->avl_fd = avl_file_descriptor;
|
||||||
|
avl->data_fd = avl_data_file_descriptor;
|
||||||
|
|
||||||
|
// Bloom filter
|
||||||
|
bloom_init(&((avl->header)->bloom_filter), 2000000, 0.001); // TODO use macros
|
||||||
|
|
||||||
|
//close(avl_file_descriptor);
|
||||||
|
|
||||||
// Add in the list of opened AVL trees
|
// Add in the list of opened AVL trees
|
||||||
*(((dms->opened_avls)->avls)+((dms->opened_avls)->nb_opened_avls)) = avl;
|
*(((dms->opened_avls)->avls)+((dms->opened_avls)->nb_opened_avls)) = avl;
|
||||||
@ -1458,7 +1369,7 @@ OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name)
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
close(avl_data_file_descriptor);
|
//close(avl_data_file_descriptor);
|
||||||
|
|
||||||
|
|
||||||
// Open the AVL tree file
|
// Open the AVL tree file
|
||||||
@ -1544,7 +1455,10 @@ OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name)
|
|||||||
avl->directory = dms->avl_directory;
|
avl->directory = dms->avl_directory;
|
||||||
avl->dir_fd = avl_dir_file_descriptor;
|
avl->dir_fd = avl_dir_file_descriptor;
|
||||||
|
|
||||||
close(avl_file_descriptor);
|
avl->avl_fd = avl_file_descriptor;
|
||||||
|
avl->data_fd = avl_data_file_descriptor;
|
||||||
|
|
||||||
|
//close(avl_file_descriptor);
|
||||||
|
|
||||||
// Add in the list of opened AVL trees
|
// Add in the list of opened AVL trees
|
||||||
*(((dms->opened_avls)->avls)+((dms->opened_avls)->nb_opened_avls)) = avl;
|
*(((dms->opened_avls)->avls)+((dms->opened_avls)->nb_opened_avls)) = avl;
|
||||||
@ -1609,6 +1523,53 @@ byte_t* obi_avl_get(OBIDMS_avl_p avl, index_t idx)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int maybe_in_avl(OBIDMS_avl_p avl, byte_t* value)
|
||||||
|
{
|
||||||
|
return (bloom_check(&((avl->header)->bloom_filter), value, (BYTE_ARRAY_HEADER_SIZE + *((int32_t*)(value+1)))));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
index_t insert_in_avl_group(OBIDMS_avl_group_p avl_group, byte_t* value) // TODO won't be index_t
|
||||||
|
{
|
||||||
|
index_t index_if_already_in;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
if (maybe_in_avl((avl_group->sub_avls)[avl_group->current_avl_idx], value))
|
||||||
|
{
|
||||||
|
//fprintf(stderr, "\nyah maybe");
|
||||||
|
index_if_already_in = obi_avl_find((avl_group->sub_avls)[avl_group->current_avl_idx], value);
|
||||||
|
if (index_if_already_in >= 0)
|
||||||
|
return index_if_already_in;
|
||||||
|
}
|
||||||
|
// else
|
||||||
|
// fprintf(stderr, "\nnah");
|
||||||
|
for (i=0; i < (avl_group->current_avl_idx); i++)
|
||||||
|
{
|
||||||
|
if (maybe_in_avl((avl_group->sub_avls)[i], value))
|
||||||
|
{
|
||||||
|
//fprintf(stderr, "\nyah maybe");
|
||||||
|
if (remap_an_avl((avl_group->sub_avls)[i]) < 0)
|
||||||
|
return -1;
|
||||||
|
index_if_already_in = obi_avl_find((avl_group->sub_avls)[i], value);
|
||||||
|
if (unmap_an_avl((avl_group->sub_avls)[i]) < 0)
|
||||||
|
return -1;
|
||||||
|
if (index_if_already_in >= 0)
|
||||||
|
return index_if_already_in;
|
||||||
|
}
|
||||||
|
// else
|
||||||
|
// fprintf(stderr, "\nnah");
|
||||||
|
}
|
||||||
|
|
||||||
|
// not found in any avl: add in current
|
||||||
|
// first, check if make new one
|
||||||
|
if ((((avl_group->sub_avls)[avl_group->current_avl_idx])->header)->nb_items == 2000000) // TODO add condition with data size + use macro
|
||||||
|
obi_add_new_avl_in_group(avl_group);
|
||||||
|
|
||||||
|
bloom_add(&((((avl_group->sub_avls)[avl_group->current_avl_idx])->header)->bloom_filter), value, (BYTE_ARRAY_HEADER_SIZE + *((int32_t*)(value+1))));
|
||||||
|
return obi_avl_add((avl_group->sub_avls)[avl_group->current_avl_idx], value);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// Insert a new node
|
// Insert a new node
|
||||||
index_t obi_avl_add(OBIDMS_avl_p avl, byte_t* value)
|
index_t obi_avl_add(OBIDMS_avl_p avl, byte_t* value)
|
||||||
{
|
{
|
||||||
@ -1674,7 +1635,7 @@ index_t obi_avl_add(OBIDMS_avl_p avl, byte_t* value)
|
|||||||
// Value already stored
|
// Value already stored
|
||||||
{
|
{
|
||||||
//fprintf(stderr, "\n>>>ALREADY IN, %s, %lld\n", obi_obibytes_to_seq(value), (avl->header)->nb_items);
|
//fprintf(stderr, "\n>>>ALREADY IN, %s, %lld\n", obi_obibytes_to_seq(value), (avl->header)->nb_items);
|
||||||
return current_node->value;
|
return current_node->value; // TODO should trigger error if using bloom filters
|
||||||
}
|
}
|
||||||
|
|
||||||
depth++;
|
depth++;
|
||||||
@ -1732,7 +1693,7 @@ index_t obi_avl_add(OBIDMS_avl_p avl, byte_t* value)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Find if a value is already in an AVL tree
|
// Find if a value is already in an AVL tree TODO use bloom
|
||||||
index_t obi_avl_find(OBIDMS_avl_p avl, byte_t* value)
|
index_t obi_avl_find(OBIDMS_avl_p avl, byte_t* value)
|
||||||
{
|
{
|
||||||
int comp;
|
int comp;
|
||||||
|
35
src/obiavl.h
35
src/obiavl.h
@ -25,6 +25,10 @@
|
|||||||
#include "obidms.h"
|
#include "obidms.h"
|
||||||
#include "obitypes.h"
|
#include "obitypes.h"
|
||||||
|
|
||||||
|
#include "bloom.h"
|
||||||
|
|
||||||
|
#define NB_OF_AVLS (64)
|
||||||
|
#define MASK (63)
|
||||||
|
|
||||||
#define AVL_MAX_NAME (1024) /**< The maximum length of an AVL tree name.
|
#define AVL_MAX_NAME (1024) /**< The maximum length of an AVL tree name.
|
||||||
*/
|
*/
|
||||||
@ -39,6 +43,8 @@
|
|||||||
#define BYTE_ARRAY_HEADER_SIZE (9) /**< The size of the header of a byte array.
|
#define BYTE_ARRAY_HEADER_SIZE (9) /**< The size of the header of a byte array.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
typedef struct bloom bloom_t;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief AVL tree node structure.
|
* @brief AVL tree node structure.
|
||||||
@ -48,7 +54,7 @@ typedef struct AVL_node {
|
|||||||
*/
|
*/
|
||||||
index_t right_child; /**< Index of right greater child node.
|
index_t right_child; /**< Index of right greater child node.
|
||||||
*/
|
*/
|
||||||
int8_t balance_factor; /**< Balance factor of the node.
|
int8_t balance_factor; /**< Balance factor of the node.
|
||||||
*/
|
*/
|
||||||
index_t value; /**< Index of the value associated with the node in the data array.
|
index_t value; /**< Index of the value associated with the node in the data array.
|
||||||
*/
|
*/
|
||||||
@ -103,6 +109,7 @@ typedef struct OBIDMS_avl_header {
|
|||||||
*/
|
*/
|
||||||
time_t creation_date; /**< Date of creation of the file.
|
time_t creation_date; /**< Date of creation of the file.
|
||||||
*/
|
*/
|
||||||
|
bloom_t bloom_filter;
|
||||||
} OBIDMS_avl_header_t, *OBIDMS_avl_header_p;
|
} OBIDMS_avl_header_t, *OBIDMS_avl_header_p;
|
||||||
|
|
||||||
|
|
||||||
@ -132,9 +139,28 @@ typedef struct OBIDMS_avl {
|
|||||||
*/
|
*/
|
||||||
size_t counter; /**< Indicates by how many threads/programs (TODO) the AVL tree is used.
|
size_t counter; /**< Indicates by how many threads/programs (TODO) the AVL tree is used.
|
||||||
*/
|
*/
|
||||||
|
int avl_fd;
|
||||||
|
int data_fd;
|
||||||
} OBIDMS_avl_t, *OBIDMS_avl_p;
|
} OBIDMS_avl_t, *OBIDMS_avl_p;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief OBIDMS AVL tree group structure.
|
||||||
|
*/
|
||||||
|
typedef struct OBIDMS_avl_group {
|
||||||
|
// TODO put each group in a directory later
|
||||||
|
OBIDMS_avl_p sub_avls[64]; // TODO macro for max
|
||||||
|
int current_avl_idx;
|
||||||
|
char avl_name[AVL_MAX_NAME+1];
|
||||||
|
OBIDMS_p dms;
|
||||||
|
} OBIDMS_avl_group_t, *OBIDMS_avl_group_p;
|
||||||
|
|
||||||
|
|
||||||
|
OBIDMS_avl_group_p obi_create_avl_group(OBIDMS_p dms, const char* avl_name);
|
||||||
|
index_t insert_in_avl_group(OBIDMS_avl_group_p avl_group, byte_t* value);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Checks if an AVL tree already exists or not.
|
* @brief Checks if an AVL tree already exists or not.
|
||||||
*
|
*
|
||||||
@ -340,12 +366,5 @@ byte_t* obi_seq_to_obibytes(char* seq);
|
|||||||
const char* obi_obibytes_to_seq(byte_t* value_b);
|
const char* obi_obibytes_to_seq(byte_t* value_b);
|
||||||
|
|
||||||
|
|
||||||
OBIDMS_avl_p* obi_create_avl_in_64_parts(OBIDMS_p dms, const char* avl_name);
|
|
||||||
|
|
||||||
typedef uint8_t crc;
|
|
||||||
|
|
||||||
crc compute_crc(const char* s);
|
|
||||||
|
|
||||||
|
|
||||||
#endif /* OBIAVL_H_ */
|
#endif /* OBIAVL_H_ */
|
||||||
|
|
||||||
|
@ -521,7 +521,6 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
|
|||||||
OBIDMS_column_p new_column;
|
OBIDMS_column_p new_column;
|
||||||
OBIDMS_column_directory_p column_directory;
|
OBIDMS_column_directory_p column_directory;
|
||||||
OBIDMS_column_header_p header;
|
OBIDMS_column_header_p header;
|
||||||
OBIDMS_avl_p* avl;
|
|
||||||
size_t file_size;
|
size_t file_size;
|
||||||
obiversion_t version_number;
|
obiversion_t version_number;
|
||||||
char* column_file_name;
|
char* column_file_name;
|
||||||
@ -723,16 +722,15 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
|
|||||||
// If the data type is OBI_STR or OBI_SEQ, the associated obi_avl is opened or created
|
// If the data type is OBI_STR or OBI_SEQ, the associated obi_avl is opened or created
|
||||||
if ((returned_data_type == OBI_STR) || (returned_data_type == OBI_SEQ))
|
if ((returned_data_type == OBI_STR) || (returned_data_type == OBI_SEQ))
|
||||||
{
|
{
|
||||||
avl = obi_create_avl_in_64_parts(dms, avl_name);
|
new_column->avl = obi_create_avl_group(dms, avl_name);
|
||||||
if (avl == NULL)
|
// if (avl == NULL) TODO
|
||||||
{
|
// {
|
||||||
obidebug(1, "\nError opening or creating the aVL tree associated with a column");
|
// obidebug(1, "\nError opening or creating the aVL tree associated with a column");
|
||||||
munmap(new_column->header, header_size);
|
// munmap(new_column->header, header_size);
|
||||||
close(column_file_descriptor);
|
// close(column_file_descriptor);
|
||||||
free(new_column);
|
// free(new_column);
|
||||||
return NULL;
|
// return NULL;
|
||||||
}
|
// }
|
||||||
memcpy(new_column->avl, avl, 64*sizeof(OBIDMS_avl_p));
|
|
||||||
strncpy(header->avl_name, avl_name, AVL_MAX_NAME);
|
strncpy(header->avl_name, avl_name, AVL_MAX_NAME);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -756,11 +754,11 @@ OBIDMS_column_p obi_open_column(OBIDMS_p dms,
|
|||||||
{
|
{
|
||||||
OBIDMS_column_p column;
|
OBIDMS_column_p column;
|
||||||
OBIDMS_column_directory_p column_directory;
|
OBIDMS_column_directory_p column_directory;
|
||||||
OBIDMS_avl_p avl;
|
|
||||||
char* column_file_name;
|
char* column_file_name;
|
||||||
int column_file_descriptor;
|
int column_file_descriptor;
|
||||||
size_t header_size;
|
size_t header_size;
|
||||||
size_t i;
|
size_t i;
|
||||||
|
OBIDMS_avl_p avl;
|
||||||
|
|
||||||
column = NULL;
|
column = NULL;
|
||||||
|
|
||||||
|
@ -98,7 +98,7 @@ typedef struct OBIDMS_column {
|
|||||||
*/
|
*/
|
||||||
OBIDMS_column_header_p header; /**< A pointer to the header of the column.
|
OBIDMS_column_header_p header; /**< A pointer to the header of the column.
|
||||||
*/
|
*/
|
||||||
OBIDMS_avl_p avl[64]; /**< A pointer to the group of AVL trees associated with the column if there is one.
|
OBIDMS_avl_group_p avl; /**< TODO A pointer to the group of AVL trees associated with the column if there is one.
|
||||||
*/
|
*/
|
||||||
void* data; /**< A `void` pointer to the beginning of the data.
|
void* data; /**< A `void` pointer to the beginning of the data.
|
||||||
*
|
*
|
||||||
|
@ -61,13 +61,13 @@ int obi_column_set_obiseq_with_elt_idx(OBIDMS_column_p column, index_t line_nb,
|
|||||||
if (value_b == NULL)
|
if (value_b == NULL)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
if (strlen(value_b) == 0)
|
//if (strlen(value_b) == 0)
|
||||||
fprintf(stderr, "\nPOUIC");
|
// fprintf(stderr, "\nPOUIC");
|
||||||
|
|
||||||
//fprintf(stderr, "\n>%s||%s", value, obi_obibytes_to_seq(value_b));
|
//fprintf(stderr, "\n>%s||%s", value, obi_obibytes_to_seq(value_b));
|
||||||
|
|
||||||
// Add in the AVL tree
|
// Add in the AVL tree
|
||||||
idx = obi_avl_add((column->avl)[compute_crc(value)], value_b);
|
idx = insert_in_avl_group(column->avl, value_b);
|
||||||
if (idx == -1)
|
if (idx == -1)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
|
@ -61,7 +61,7 @@ int obi_column_set_obistr_with_elt_idx(OBIDMS_column_p column, index_t line_nb,
|
|||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
// Add in the AVL tree
|
// Add in the AVL tree
|
||||||
idx = obi_avl_add((column->avl)[compute_crc(value)], value_b);
|
idx = insert_in_avl_group(column->avl, value_b);
|
||||||
if (idx == -1)
|
if (idx == -1)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
|
@ -227,6 +227,7 @@ Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p v
|
|||||||
if (view== NULL)
|
if (view== NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
fprintf(stderr, "\nmmmm\n");
|
||||||
strcpy(view->view_type, VIEW_TYPE_NUC_SEQS);
|
strcpy(view->view_type, VIEW_TYPE_NUC_SEQS);
|
||||||
|
|
||||||
if (view_to_clone == NULL)
|
if (view_to_clone == NULL)
|
||||||
|
Reference in New Issue
Block a user