Compare commits
11 Commits
multiple_a
...
check_avl_
Author | SHA1 | Date | |
---|---|---|---|
ff6c27acf2 | |||
69856f18dd | |||
58ac860cc7 | |||
d44117d625 | |||
6bd42132c4 | |||
4085904362 | |||
b04b4b5902 | |||
383e738ab7 | |||
3681cecb4d | |||
86071d30c9 | |||
6157633137 |
228
python/obi.py
Normal file
228
python/obi.py
Normal file
@ -0,0 +1,228 @@
|
||||
#!/usr/local/bin/python3.4
|
||||
'''
|
||||
obi -- shortdesc
|
||||
|
||||
obi is a description
|
||||
|
||||
It defines classes_and_methods
|
||||
|
||||
@author: user_name
|
||||
|
||||
@copyright: 2014 organization_name. All rights reserved.
|
||||
|
||||
@license: license
|
||||
|
||||
@contact: user_email
|
||||
@deffield updated: Updated
|
||||
'''
|
||||
|
||||
import sys
|
||||
import pkgutil
|
||||
import argparse
|
||||
import logging
|
||||
import json
|
||||
|
||||
default_config = {
|
||||
|
||||
'obi' : { 'log' : True,
|
||||
'loglevel' : 'INFO',
|
||||
'version' : False,
|
||||
'progress' : True
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
from obitools3 import command
|
||||
from obitools3.version import version
|
||||
|
||||
__all__ = []
|
||||
__version__ = version
|
||||
__date__ = '2014-09-28'
|
||||
__updated__ = '2014-09-28'
|
||||
|
||||
DEBUG = 1
|
||||
TESTRUN = 0
|
||||
PROFILE = 0
|
||||
|
||||
|
||||
|
||||
def loadCommand(name,loader):
|
||||
'''
|
||||
Load a command module from its name and an ImpLoader
|
||||
|
||||
This function is for internal use
|
||||
|
||||
@param name: name of the module
|
||||
@type name: str
|
||||
@param loader: the module loader
|
||||
@type loader: ImpLoader
|
||||
|
||||
@return the loaded module
|
||||
@rtype: module
|
||||
'''
|
||||
|
||||
module = loader.find_module(name).load_module(name)
|
||||
return module
|
||||
|
||||
def getCommandsList():
|
||||
'''
|
||||
Returns the list of sub-commands available to the main `obi` command
|
||||
|
||||
@return: a dict instance with key corresponding to each command and
|
||||
value corresponding to the module
|
||||
|
||||
@rtype: dict
|
||||
'''
|
||||
cmds = dict((x[1],loadCommand(x[1],x[0]))
|
||||
for x in pkgutil.iter_modules(command.__path__)
|
||||
if not x[2])
|
||||
return cmds
|
||||
|
||||
def getLogger(config):
|
||||
'''
|
||||
Returns the logger as defined by the command line option
|
||||
or by the config file
|
||||
:param config:
|
||||
'''
|
||||
|
||||
output = config['obi']['outputfilename']
|
||||
level = config['obi']['loglevel']
|
||||
logfile= config['obi']['log']
|
||||
|
||||
rootlogger = logging.getLogger()
|
||||
logFormatter = logging.Formatter("%(asctime)s [%(levelname)-5.5s] %(message)s")
|
||||
|
||||
stderrHandler = logging.StreamHandler(sys.stderr)
|
||||
stderrHandler.setFormatter(logFormatter)
|
||||
|
||||
rootlogger.addHandler(stderrHandler)
|
||||
|
||||
if logfile:
|
||||
fileHandler = logging.FileHandler("%s.log" % output)
|
||||
fileHandler.setFormatter(logFormatter)
|
||||
rootlogger.addHandler(fileHandler)
|
||||
|
||||
try:
|
||||
loglevel = getattr(logging, level)
|
||||
except:
|
||||
loglevel = logging.INFO
|
||||
|
||||
rootlogger.setLevel(loglevel)
|
||||
|
||||
config['obi']['logger']=rootlogger
|
||||
|
||||
return rootlogger
|
||||
|
||||
|
||||
class ObiParser(argparse.ArgumentParser):
|
||||
def error(self, message):
|
||||
sys.stderr.write('error: %s\n' % message)
|
||||
self.print_help()
|
||||
sys.exit(2)
|
||||
|
||||
def buildArgumentParser():
|
||||
parser = ObiParser()
|
||||
|
||||
parser.add_argument('--version', dest='obi:version',
|
||||
action='store_true',
|
||||
default=False,
|
||||
help='Print the version of the OBITools')
|
||||
|
||||
parser.add_argument('--no-log', dest='obi:log',
|
||||
action='store_false',
|
||||
default=None,
|
||||
help='Do not create a logfile for the data analyze')
|
||||
|
||||
parser.add_argument('--no-progress', dest='obi:progress',
|
||||
action='store_false',
|
||||
default=None,
|
||||
help='Do not print the progress bar during analyzes')
|
||||
|
||||
subparsers = parser.add_subparsers(title='subcommands',
|
||||
description='valid subcommands',
|
||||
help='additional help')
|
||||
|
||||
commands = getCommandsList()
|
||||
|
||||
for c in commands:
|
||||
module = commands[c]
|
||||
|
||||
if hasattr(module, "run"):
|
||||
if hasattr(module, "__title__"):
|
||||
sub = subparsers.add_parser(c,help=module.__title__)
|
||||
else:
|
||||
sub = subparsers.add_parser(c)
|
||||
|
||||
if hasattr(module, "addOptions"):
|
||||
module.addOptions(sub)
|
||||
|
||||
sub.set_defaults(**{'obi:module' : module})
|
||||
|
||||
return parser
|
||||
|
||||
def buildDefaultConfiguration():
|
||||
global default_config
|
||||
|
||||
commands = getCommandsList()
|
||||
|
||||
for c in commands:
|
||||
module = commands[c]
|
||||
|
||||
assert hasattr(module, "run")
|
||||
|
||||
if hasattr(module, 'default_config'):
|
||||
default_config[c]=module.default_config
|
||||
else:
|
||||
default_config[c]={}
|
||||
|
||||
return default_config
|
||||
|
||||
|
||||
def getConfiguration():
|
||||
global default_config
|
||||
|
||||
if '__done__' in default_config:
|
||||
return default_config
|
||||
|
||||
parser = buildArgumentParser()
|
||||
options = vars(parser.parse_args())
|
||||
|
||||
config = buildDefaultConfiguration()
|
||||
|
||||
|
||||
for k in options:
|
||||
section,key = k.split(':')
|
||||
s = config[section]
|
||||
if options[k] is not None:
|
||||
s[key]=options[k]
|
||||
|
||||
if config['obi']['version']:
|
||||
print("The OBITools - Version %s" % __version__)
|
||||
sys.exit(0)
|
||||
|
||||
if not 'module' in config['obi']:
|
||||
print('\nError: No obi command specified',file=sys.stderr)
|
||||
parser.print_help()
|
||||
sys.exit(2)
|
||||
|
||||
|
||||
if config['obi']['outputfilename'] is None:
|
||||
config['obi']['outputfilename']=config['obi']['indexfilename']
|
||||
|
||||
getLogger(config)
|
||||
|
||||
config['__done__']=True
|
||||
|
||||
return config
|
||||
|
||||
|
||||
if __name__ =="__main__":
|
||||
|
||||
config = getConfiguration()
|
||||
|
||||
config['obi']['module'].run(config)
|
||||
|
||||
|
||||
|
0
python/obitools3/command/__init__.py
Normal file
0
python/obitools3/command/__init__.py
Normal file
36
python/obitools3/command/count.py
Normal file
36
python/obitools3/command/count.py
Normal file
@ -0,0 +1,36 @@
|
||||
'''
|
||||
Created on 8 mars 2016
|
||||
|
||||
@author: coissac
|
||||
'''
|
||||
|
||||
__title__="Counts sequences in a sequence set"
|
||||
|
||||
|
||||
default_config = { 'countmode' : None
|
||||
}
|
||||
|
||||
def addOptions(parser):
|
||||
parser.add_argument(dest='obi:input', metavar='obi:input',
|
||||
nargs='?',
|
||||
default=None,
|
||||
help='input data set' )
|
||||
|
||||
group=parser.add_option_group('Obicount specific options')
|
||||
group.add_option('-s','--sequence',
|
||||
action="store_true", dest="count:sequence",
|
||||
default=False,
|
||||
help="Prints only the number of sequence records."
|
||||
)
|
||||
|
||||
group.add_option('-a','--all',
|
||||
action="store_true", dest="count:all",
|
||||
default=False,
|
||||
help="Prints only the total count of sequence records (if a sequence has no `count` attribute, its default count is 1) (default: False)."
|
||||
)
|
||||
|
||||
|
||||
|
||||
def run(config):
|
||||
# The code of my command
|
||||
pass
|
@ -22,3 +22,9 @@
|
||||
../../../src/obidmscolumn_idx.c
|
||||
../../../src/obidms_taxonomy.c
|
||||
../../../src/obidms_taxonomy.h
|
||||
../../../src/bloom.c
|
||||
../../../src/bloom.h
|
||||
../../../src/MurmurHash2.c
|
||||
../../../src/murmurhash2.h
|
||||
../../../src/crc64.c
|
||||
../../../src/crc64.h
|
||||
|
197
python/obitools3/obiimport.py
Normal file
197
python/obitools3/obiimport.py
Normal file
@ -0,0 +1,197 @@
|
||||
import sys
|
||||
import argparse
|
||||
import time
|
||||
|
||||
from obitools3.obidms._obidms import OBIDMS
|
||||
|
||||
|
||||
def bufferedRead(fileobj,size=100000000):
|
||||
buffer = fileobj.readlines(size)
|
||||
while buffer:
|
||||
for l in buffer:
|
||||
yield l
|
||||
buffer = fileobj.readlines(size)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
parser = argparse.ArgumentParser(description='Convert a fasta file in an OBIDMS.')
|
||||
|
||||
parser.add_argument('-i', '--input', dest='input_file', type=str,
|
||||
help='Name of the file containing the sequences')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
d = OBIDMS('tdms')
|
||||
|
||||
view = d.new_view('uniq view', view_type="NUC_SEQS_VIEW")
|
||||
|
||||
# for i in range(35000000) :
|
||||
# if (not (i%500000)) :
|
||||
# print(str(time.time())+'\t'+str(i))
|
||||
# id = "@HWI-D00405:142:C71BAANXX:4:1101:1234:2234_CONS_SUB_SUB_"+str(i)
|
||||
# view[i].set_id(id)
|
||||
|
||||
input_file = open(args.input_file, 'r')
|
||||
input_file_buffered = bufferedRead(input_file)
|
||||
|
||||
#
|
||||
# if args.input_file[-1:] == "a" :
|
||||
#
|
||||
# i = 0
|
||||
# next = False
|
||||
# first = True
|
||||
#
|
||||
# for line in input_file :
|
||||
#
|
||||
# if line[0] == ">" :
|
||||
#
|
||||
# if not first :
|
||||
# # save seq
|
||||
# #print(i, id, seq)
|
||||
# view[i].set_sequence(seq)
|
||||
# i+=1
|
||||
#
|
||||
# first = False
|
||||
#
|
||||
# #id = line.split(" ", 1)[0][1:]
|
||||
# #rest = (line[:-1].split(" ", 1)[1]).split(";")
|
||||
# #view[i].set_id(id)
|
||||
#
|
||||
# # description = ""
|
||||
# # for j in range(len(rest)) :
|
||||
# # if "=" in rest[j] :
|
||||
# # rest[j] = rest[j].strip()
|
||||
# # rest[j] = rest[j].split("=", 1)
|
||||
# # column_name = rest[j][0]
|
||||
# # v = rest[j][1]
|
||||
# # if ((not v.isalpha()) and (v.isalnum())) :
|
||||
# # conv_v = int(v)
|
||||
# # elif (v == "True") or (v == "False") :
|
||||
# # conv_v = bool(v)
|
||||
# # else :
|
||||
# # f = True
|
||||
# # for letter in v :
|
||||
# # if ((not letter.isalnum()) or (letter != ".")) :
|
||||
# # f = False
|
||||
# # if f :
|
||||
# # conv_v = float(v)
|
||||
# # else :
|
||||
# # conv_v = v
|
||||
# # view[i][column_name] = conv_v
|
||||
# # else :
|
||||
# # description+=rest[j]
|
||||
# #
|
||||
# # if description != "" :
|
||||
# # description = description.strip()
|
||||
# # view[i].set_description(description)
|
||||
#
|
||||
# #print(id)
|
||||
# #print(rest)
|
||||
# #print(description)
|
||||
#
|
||||
# next = True
|
||||
#
|
||||
# elif next == True :
|
||||
#
|
||||
# # if not (i % 1E5) :
|
||||
# # print(i)
|
||||
#
|
||||
# seq = line[:-1]
|
||||
# next = False
|
||||
#
|
||||
# elif not next :
|
||||
#
|
||||
# seq += line[:-1]
|
||||
#
|
||||
#
|
||||
# elif args.input_file[-1:] == "q" :
|
||||
#
|
||||
# i = 0
|
||||
# l = 0
|
||||
# next = False
|
||||
#
|
||||
l=0
|
||||
i=0
|
||||
# while (True):
|
||||
# l+=1
|
||||
# line = input_file.readline()
|
||||
# if line=="":
|
||||
# break
|
||||
for line in input_file_buffered :
|
||||
#
|
||||
# #if i > 1E7 :
|
||||
# # print('hmm?')
|
||||
#
|
||||
# #if i == 10000000 :
|
||||
# # break
|
||||
#
|
||||
if l%4 == 0 :
|
||||
#
|
||||
if (not (i%500000)) :
|
||||
print(str(time.time())+'\t'+str(i))
|
||||
# #
|
||||
# # #print("header", line)
|
||||
# #
|
||||
id = line.split(" ", 1)[0][1:]
|
||||
print(id)
|
||||
# # #rest = (line[:-1].split(" ", 1)[1]).split(";")
|
||||
view[i].set_id(id)
|
||||
#print(view[i]["ID"])
|
||||
#
|
||||
i+=1
|
||||
|
||||
l+=1
|
||||
#
|
||||
# # description = ""
|
||||
# # for j in range(len(rest)) :
|
||||
# # if "=" in rest[j] :
|
||||
# # rest[j] = rest[j].strip()
|
||||
# # rest[j] = rest[j].split("=", 1)
|
||||
# # column_name = rest[j][0]
|
||||
# # #print("COLUMN", column_name)
|
||||
# # v = rest[j][1]
|
||||
# # if (v == "") and (column_name in view) and (view[column_name].get_data_type() == "OBI_SEQ") :
|
||||
# # #print(">>>>>>YUP")
|
||||
# # conv_v = "aa"
|
||||
# # else :
|
||||
# # if ((not v.isalpha()) and (v.isalnum())) :
|
||||
# # conv_v = int(v)
|
||||
# # elif (v == "True") or (v == "False") :
|
||||
# # conv_v = bool(v)
|
||||
# # else :
|
||||
# # f = True
|
||||
# # for letter in v :
|
||||
# # if ((not letter.isalnum()) or (letter != ".")) :
|
||||
# # f = False
|
||||
# # if f :
|
||||
# # conv_v = float(v)
|
||||
# # else :
|
||||
# # conv_v = v
|
||||
# # view[i][column_name] = conv_v
|
||||
# # else :
|
||||
# # description+=rest[j]
|
||||
# #
|
||||
# # if description != "" :
|
||||
# # description = description.strip()
|
||||
# # view[i].set_description(description)
|
||||
#
|
||||
# # elif l%4 == 1 :
|
||||
# #
|
||||
# # seq = line[:-1]
|
||||
# #print("seq", seq)
|
||||
# # view[i].set_sequence(seq)
|
||||
# # i+=1
|
||||
#
|
||||
# l+=1
|
||||
#
|
||||
#
|
||||
input_file.close()
|
||||
|
||||
#print(view)
|
||||
print(view.__repr__())
|
||||
|
||||
view.save_and_close()
|
||||
d.close()
|
||||
|
||||
print("Done.")
|
64
src/MurmurHash2.c
Executable file
64
src/MurmurHash2.c
Executable file
@ -0,0 +1,64 @@
|
||||
//-----------------------------------------------------------------------------
|
||||
// MurmurHash2, by Austin Appleby
|
||||
|
||||
// Note - This code makes a few assumptions about how your machine behaves -
|
||||
|
||||
// 1. We can read a 4-byte value from any address without crashing
|
||||
// 2. sizeof(int) == 4
|
||||
|
||||
// And it has a few limitations -
|
||||
|
||||
// 1. It will not work incrementally.
|
||||
// 2. It will not produce the same results on little-endian and big-endian
|
||||
// machines.
|
||||
|
||||
unsigned int murmurhash2(const void * key, int len, const unsigned int seed)
|
||||
{
|
||||
// 'm' and 'r' are mixing constants generated offline.
|
||||
// They're not really 'magic', they just happen to work well.
|
||||
|
||||
const unsigned int m = 0x5bd1e995;
|
||||
const int r = 24;
|
||||
|
||||
// Initialize the hash to a 'random' value
|
||||
|
||||
unsigned int h = seed ^ len;
|
||||
|
||||
// Mix 4 bytes at a time into the hash
|
||||
|
||||
const unsigned char * data = (const unsigned char *)key;
|
||||
|
||||
while(len >= 4)
|
||||
{
|
||||
unsigned int k = *(unsigned int *)data;
|
||||
|
||||
k *= m;
|
||||
k ^= k >> r;
|
||||
k *= m;
|
||||
|
||||
h *= m;
|
||||
h ^= k;
|
||||
|
||||
data += 4;
|
||||
len -= 4;
|
||||
}
|
||||
|
||||
// Handle the last few bytes of the input array
|
||||
|
||||
switch(len)
|
||||
{
|
||||
case 3: h ^= data[2] << 16;
|
||||
case 2: h ^= data[1] << 8;
|
||||
case 1: h ^= data[0];
|
||||
h *= m;
|
||||
};
|
||||
|
||||
// Do a few final mixes of the hash to ensure the last few
|
||||
// bytes are well-incorporated.
|
||||
|
||||
h ^= h >> 13;
|
||||
h *= m;
|
||||
h ^= h >> 15;
|
||||
|
||||
return h;
|
||||
}
|
212
src/bloom.c
Executable file
212
src/bloom.c
Executable file
@ -0,0 +1,212 @@
|
||||
/*
|
||||
* Copyright (c) 2012-2015, Jyri J. Virkki
|
||||
* All rights reserved.
|
||||
*
|
||||
* This file is under BSD license. See LICENSE file.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Refer to bloom.h for documentation on the public interfaces.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <fcntl.h>
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "bloom.h"
|
||||
#include "murmurhash2.h"
|
||||
|
||||
#define MAKESTRING(n) STRING(n)
|
||||
#define STRING(n) #n
|
||||
|
||||
#ifdef __linux__
|
||||
unsigned detect_bucket_size(unsigned fallback_size);
|
||||
#endif
|
||||
|
||||
|
||||
static int test_bit_set_bit(unsigned char * buf, unsigned int x, int set_bit)
|
||||
{
|
||||
register uint32_t * word_buf = (uint32_t *)buf;
|
||||
register unsigned int offset = x >> 5;
|
||||
register uint32_t word = word_buf[offset];
|
||||
register unsigned int mask = 1 << (x % 32);
|
||||
|
||||
if (word & mask) {
|
||||
return 1;
|
||||
} else {
|
||||
if (set_bit) {
|
||||
word_buf[offset] = word | mask;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static int bloom_check_add(struct bloom * bloom,
|
||||
const void * buffer, int len, int add)
|
||||
{
|
||||
if (bloom->ready == 0) {
|
||||
(void)printf("bloom at %p not initialized!\n", (void *)bloom);
|
||||
return -1;
|
||||
}
|
||||
|
||||
int hits = 0;
|
||||
register unsigned int a = murmurhash2(buffer, len, 0x9747b28c);
|
||||
register unsigned int b = murmurhash2(buffer, len, a);
|
||||
register unsigned int x;
|
||||
register int i; // TODO why was it unsigned?
|
||||
|
||||
unsigned bucket_index = (a % bloom->buckets);
|
||||
|
||||
unsigned char * bucket_ptr =
|
||||
(bloom->bf + (bucket_index << bloom->bucket_bytes_exponent));
|
||||
|
||||
for (i = 0; i < bloom->hashes; i++) {
|
||||
x = (a + i*b) & bloom->bucket_bits_fast_mod_operand;
|
||||
if (test_bit_set_bit(bucket_ptr, x, add)) {
|
||||
hits++;
|
||||
}
|
||||
}
|
||||
|
||||
if (hits == bloom->hashes) {
|
||||
return 1; // 1 == element already in (or collision)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static void setup_buckets(struct bloom * bloom, unsigned int cache_size)
|
||||
{
|
||||
// If caller passed a non-zero cache_size, use it as given, otherwise
|
||||
// either compute it or use built-in default
|
||||
|
||||
if (cache_size == 0) {
|
||||
#ifdef __linux__
|
||||
cache_size = detect_bucket_size(BLOOM_BUCKET_SIZE_FALLBACK);
|
||||
#else
|
||||
cache_size = BLOOM_BUCKET_SIZE_FALLBACK;
|
||||
#endif
|
||||
}
|
||||
|
||||
bloom->buckets = (bloom->bytes / cache_size);
|
||||
bloom->bucket_bytes = cache_size;
|
||||
|
||||
// make sure bloom buffer bytes and bucket_bytes are even
|
||||
int not_even_by = (bloom->bytes % bloom->bucket_bytes);
|
||||
|
||||
if (not_even_by) {
|
||||
// adjust bytes
|
||||
bloom->bytes += (bloom->bucket_bytes - not_even_by);
|
||||
assert((bloom->bytes % bloom->bucket_bytes) == 0); // Should get even
|
||||
|
||||
// adjust bits
|
||||
bloom->bits = bloom->bytes * 8;
|
||||
|
||||
// adjust bits per element
|
||||
bloom->bpe = bloom->bits*1. / bloom->entries;
|
||||
|
||||
// adjust buckets
|
||||
bloom->buckets++;
|
||||
}
|
||||
|
||||
bloom->bucket_bytes_exponent = __builtin_ctz(cache_size);
|
||||
bloom->bucket_bits_fast_mod_operand = (cache_size * 8 - 1);
|
||||
}
|
||||
|
||||
|
||||
int bloom_init_size(struct bloom * bloom, int entries, double error,
|
||||
unsigned int cache_size)
|
||||
{
|
||||
bloom->ready = 0;
|
||||
|
||||
if (entries < 1 || error == 0) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
bloom->entries = entries;
|
||||
bloom->error = error;
|
||||
|
||||
double num = log(bloom->error);
|
||||
double denom = 0.480453013918201; // ln(2)^2
|
||||
bloom->bpe = -(num / denom);
|
||||
|
||||
double dentries = (double)entries;
|
||||
bloom->bits = (int)(dentries * bloom->bpe);
|
||||
|
||||
if (bloom->bits % 8) {
|
||||
bloom->bytes = (bloom->bits / 8) + 1;
|
||||
} else {
|
||||
bloom->bytes = bloom->bits / 8;
|
||||
}
|
||||
|
||||
bloom->hashes = (int)ceil(0.693147180559945 * bloom->bpe); // ln(2)
|
||||
|
||||
setup_buckets(bloom, cache_size);
|
||||
|
||||
bloom->bf = (unsigned char *)calloc(bloom->bytes, sizeof(unsigned char));
|
||||
if (bloom->bf == NULL) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
bloom->ready = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int bloom_init(struct bloom * bloom, int entries, double error)
|
||||
{
|
||||
return bloom_init_size(bloom, entries, error, 0);
|
||||
}
|
||||
|
||||
|
||||
int bloom_check(struct bloom * bloom, const void * buffer, int len)
|
||||
{
|
||||
return bloom_check_add(bloom, buffer, len, 0);
|
||||
}
|
||||
|
||||
|
||||
int bloom_add(struct bloom * bloom, const void * buffer, int len)
|
||||
{
|
||||
return bloom_check_add(bloom, buffer, len, 1);
|
||||
}
|
||||
|
||||
|
||||
void bloom_print(struct bloom * bloom)
|
||||
{
|
||||
(void)printf("bloom at %p\n", (void *)bloom);
|
||||
(void)printf(" ->entries = %d\n", bloom->entries);
|
||||
(void)printf(" ->error = %f\n", bloom->error);
|
||||
(void)printf(" ->bits = %d\n", bloom->bits);
|
||||
(void)printf(" ->bits per elem = %f\n", bloom->bpe);
|
||||
(void)printf(" ->bytes = %d\n", bloom->bytes);
|
||||
(void)printf(" ->buckets = %u\n", bloom->buckets);
|
||||
(void)printf(" ->bucket_bytes = %u\n", bloom->bucket_bytes);
|
||||
(void)printf(" ->bucket_bytes_exponent = %u\n",
|
||||
bloom->bucket_bytes_exponent);
|
||||
(void)printf(" ->bucket_bits_fast_mod_operand = 0%o\n",
|
||||
bloom->bucket_bits_fast_mod_operand);
|
||||
(void)printf(" ->hash functions = %d\n", bloom->hashes);
|
||||
}
|
||||
|
||||
|
||||
void bloom_free(struct bloom * bloom)
|
||||
{
|
||||
if (bloom->ready) {
|
||||
free(bloom->bf);
|
||||
}
|
||||
bloom->ready = 0;
|
||||
}
|
||||
|
||||
|
||||
const char * bloom_version()
|
||||
{
|
||||
return MAKESTRING(BLOOM_VERSION);
|
||||
}
|
188
src/bloom.h
Executable file
188
src/bloom.h
Executable file
@ -0,0 +1,188 @@
|
||||
/*
|
||||
* Copyright (c) 2012-2015, Jyri J. Virkki
|
||||
* All rights reserved.
|
||||
*
|
||||
* This file is under BSD license. See LICENSE file.
|
||||
*/
|
||||
|
||||
#ifndef _BLOOM_H
|
||||
#define _BLOOM_H
|
||||
|
||||
|
||||
/** ***************************************************************************
|
||||
* On Linux, the code attempts to compute a bucket size based on CPU cache
|
||||
* size info, if available. If that fails for any reason, this fallback size
|
||||
* is used instead.
|
||||
*
|
||||
* On non-Linux systems, this is the bucket size always used unless the
|
||||
* caller overrides it (see bloom_init_size()).
|
||||
*
|
||||
*/
|
||||
#define BLOOM_BUCKET_SIZE_FALLBACK (32 * 1024)
|
||||
|
||||
|
||||
/** ***************************************************************************
|
||||
* It was found that using multiplier x0.5 for CPU L1 cache size is
|
||||
* more effective in terms of CPU usage and, surprisingly, collisions
|
||||
* number.
|
||||
*
|
||||
* Feel free to tune this constant the way it will work for you.
|
||||
*
|
||||
*/
|
||||
#define BLOOM_L1_CACHE_SIZE_DIV 1
|
||||
|
||||
|
||||
/** ***************************************************************************
|
||||
* Structure to keep track of one bloom filter. Caller needs to
|
||||
* allocate this and pass it to the functions below. First call for
|
||||
* every struct must be to bloom_init().
|
||||
*
|
||||
*/
|
||||
struct bloom
|
||||
{
|
||||
// These fields are part of the public interface of this structure.
|
||||
// Client code may read these values if desired. Client code MUST NOT
|
||||
// modify any of these.
|
||||
int entries;
|
||||
double error;
|
||||
int bits;
|
||||
int bytes;
|
||||
int hashes;
|
||||
|
||||
// Fields below are private to the implementation. These may go away or
|
||||
// change incompatibly at any moment. Client code MUST NOT access or rely
|
||||
// on these.
|
||||
unsigned buckets;
|
||||
unsigned bucket_bytes;
|
||||
|
||||
// x86 CPU divide by/multiply by operation optimization helpers
|
||||
unsigned bucket_bytes_exponent;
|
||||
unsigned bucket_bits_fast_mod_operand;
|
||||
|
||||
double bpe;
|
||||
unsigned char * bf;
|
||||
int ready;
|
||||
};
|
||||
|
||||
|
||||
/** ***************************************************************************
|
||||
* Initialize the bloom filter for use.
|
||||
*
|
||||
* The filter is initialized with a bit field and number of hash functions
|
||||
* according to the computations from the wikipedia entry:
|
||||
* http://en.wikipedia.org/wiki/Bloom_filter
|
||||
*
|
||||
* Optimal number of bits is:
|
||||
* bits = (entries * ln(error)) / ln(2)^2
|
||||
*
|
||||
* Optimal number of hash functions is:
|
||||
* hashes = bpe * ln(2)
|
||||
*
|
||||
* Parameters:
|
||||
* -----------
|
||||
* bloom - Pointer to an allocated struct bloom (see above).
|
||||
* entries - The expected number of entries which will be inserted.
|
||||
* error - Probability of collision (as long as entries are not
|
||||
* exceeded).
|
||||
*
|
||||
* Return:
|
||||
* -------
|
||||
* 0 - on success
|
||||
* 1 - on failure
|
||||
*
|
||||
*/
|
||||
int bloom_init(struct bloom * bloom, int entries, double error);
|
||||
|
||||
|
||||
/** ***************************************************************************
|
||||
* Initialize the bloom filter for use.
|
||||
*
|
||||
* See comments above for general information.
|
||||
*
|
||||
* This is the same as bloom_init() but allows the caller to pass in a
|
||||
* cache_size to override the internal value (which is either computed
|
||||
* or the default of BLOOM_BUCKET_SIZE_FALLBACK). Mostly useful for
|
||||
* experimenting.
|
||||
*
|
||||
* See misc/bucketsize for a script which can help identify a good value
|
||||
* for cache_size.
|
||||
*
|
||||
*/
|
||||
int bloom_init_size(struct bloom * bloom, int entries, double error,
|
||||
unsigned int cache_size);
|
||||
|
||||
|
||||
/** ***************************************************************************
|
||||
* Check if the given element is in the bloom filter. Remember this may
|
||||
* return false positive if a collision occured.
|
||||
*
|
||||
* Parameters:
|
||||
* -----------
|
||||
* bloom - Pointer to an allocated struct bloom (see above).
|
||||
* buffer - Pointer to buffer containing element to check.
|
||||
* len - Size of 'buffer'.
|
||||
*
|
||||
* Return:
|
||||
* -------
|
||||
* 0 - element is not present
|
||||
* 1 - element is present (or false positive due to collision)
|
||||
* -1 - bloom not initialized
|
||||
*
|
||||
*/
|
||||
int bloom_check(struct bloom * bloom, const void * buffer, int len);
|
||||
|
||||
|
||||
/** ***************************************************************************
|
||||
* Add the given element to the bloom filter.
|
||||
* The return code indicates if the element (or a collision) was already in,
|
||||
* so for the common check+add use case, no need to call check separately.
|
||||
*
|
||||
* Parameters:
|
||||
* -----------
|
||||
* bloom - Pointer to an allocated struct bloom (see above).
|
||||
* buffer - Pointer to buffer containing element to add.
|
||||
* len - Size of 'buffer'.
|
||||
*
|
||||
* Return:
|
||||
* -------
|
||||
* 0 - element was not present and was added
|
||||
* 1 - element (or a collision) had already been added previously
|
||||
* -1 - bloom not initialized
|
||||
*
|
||||
*/
|
||||
int bloom_add(struct bloom * bloom, const void * buffer, int len);
|
||||
|
||||
|
||||
/** ***************************************************************************
|
||||
* Print (to stdout) info about this bloom filter. Debugging aid.
|
||||
*
|
||||
*/
|
||||
void bloom_print(struct bloom * bloom);
|
||||
|
||||
|
||||
/** ***************************************************************************
|
||||
* Deallocate internal storage.
|
||||
*
|
||||
* Upon return, the bloom struct is no longer usable. You may call bloom_init
|
||||
* again on the same struct to reinitialize it again.
|
||||
*
|
||||
* Parameters:
|
||||
* -----------
|
||||
* bloom - Pointer to an allocated struct bloom (see above).
|
||||
*
|
||||
* Return: none
|
||||
*
|
||||
*/
|
||||
void bloom_free(struct bloom * bloom);
|
||||
|
||||
|
||||
/** ***************************************************************************
|
||||
* Returns version string compiled into library.
|
||||
*
|
||||
* Return: version string
|
||||
*
|
||||
*/
|
||||
const char * bloom_version();
|
||||
|
||||
|
||||
#endif
|
198
src/crc64.c
Normal file
198
src/crc64.c
Normal file
@ -0,0 +1,198 @@
|
||||
/* Redis uses the CRC64 variant with "Jones" coefficients and init value of 0.
|
||||
*
|
||||
* Specification of this CRC64 variant follows:
|
||||
* Name: crc-64-jones
|
||||
* Width: 64 bites
|
||||
* Poly: 0xad93d23594c935a9
|
||||
* Reflected In: True
|
||||
* Xor_In: 0xffffffffffffffff
|
||||
* Reflected_Out: True
|
||||
* Xor_Out: 0x0
|
||||
* Check("123456789"): 0xe9c6d914c4b8d9ca
|
||||
*
|
||||
* Copyright (c) 2012, Salvatore Sanfilippo <antirez at gmail dot com>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Redis nor the names of its contributors may be used
|
||||
* to endorse or promote products derived from this software without
|
||||
* specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE. */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
static const uint64_t crc64_tab[256] = {
|
||||
UINT64_C(0x0000000000000000), UINT64_C(0x7ad870c830358979),
|
||||
UINT64_C(0xf5b0e190606b12f2), UINT64_C(0x8f689158505e9b8b),
|
||||
UINT64_C(0xc038e5739841b68f), UINT64_C(0xbae095bba8743ff6),
|
||||
UINT64_C(0x358804e3f82aa47d), UINT64_C(0x4f50742bc81f2d04),
|
||||
UINT64_C(0xab28ecb46814fe75), UINT64_C(0xd1f09c7c5821770c),
|
||||
UINT64_C(0x5e980d24087fec87), UINT64_C(0x24407dec384a65fe),
|
||||
UINT64_C(0x6b1009c7f05548fa), UINT64_C(0x11c8790fc060c183),
|
||||
UINT64_C(0x9ea0e857903e5a08), UINT64_C(0xe478989fa00bd371),
|
||||
UINT64_C(0x7d08ff3b88be6f81), UINT64_C(0x07d08ff3b88be6f8),
|
||||
UINT64_C(0x88b81eabe8d57d73), UINT64_C(0xf2606e63d8e0f40a),
|
||||
UINT64_C(0xbd301a4810ffd90e), UINT64_C(0xc7e86a8020ca5077),
|
||||
UINT64_C(0x4880fbd87094cbfc), UINT64_C(0x32588b1040a14285),
|
||||
UINT64_C(0xd620138fe0aa91f4), UINT64_C(0xacf86347d09f188d),
|
||||
UINT64_C(0x2390f21f80c18306), UINT64_C(0x594882d7b0f40a7f),
|
||||
UINT64_C(0x1618f6fc78eb277b), UINT64_C(0x6cc0863448deae02),
|
||||
UINT64_C(0xe3a8176c18803589), UINT64_C(0x997067a428b5bcf0),
|
||||
UINT64_C(0xfa11fe77117cdf02), UINT64_C(0x80c98ebf2149567b),
|
||||
UINT64_C(0x0fa11fe77117cdf0), UINT64_C(0x75796f2f41224489),
|
||||
UINT64_C(0x3a291b04893d698d), UINT64_C(0x40f16bccb908e0f4),
|
||||
UINT64_C(0xcf99fa94e9567b7f), UINT64_C(0xb5418a5cd963f206),
|
||||
UINT64_C(0x513912c379682177), UINT64_C(0x2be1620b495da80e),
|
||||
UINT64_C(0xa489f35319033385), UINT64_C(0xde51839b2936bafc),
|
||||
UINT64_C(0x9101f7b0e12997f8), UINT64_C(0xebd98778d11c1e81),
|
||||
UINT64_C(0x64b116208142850a), UINT64_C(0x1e6966e8b1770c73),
|
||||
UINT64_C(0x8719014c99c2b083), UINT64_C(0xfdc17184a9f739fa),
|
||||
UINT64_C(0x72a9e0dcf9a9a271), UINT64_C(0x08719014c99c2b08),
|
||||
UINT64_C(0x4721e43f0183060c), UINT64_C(0x3df994f731b68f75),
|
||||
UINT64_C(0xb29105af61e814fe), UINT64_C(0xc849756751dd9d87),
|
||||
UINT64_C(0x2c31edf8f1d64ef6), UINT64_C(0x56e99d30c1e3c78f),
|
||||
UINT64_C(0xd9810c6891bd5c04), UINT64_C(0xa3597ca0a188d57d),
|
||||
UINT64_C(0xec09088b6997f879), UINT64_C(0x96d1784359a27100),
|
||||
UINT64_C(0x19b9e91b09fcea8b), UINT64_C(0x636199d339c963f2),
|
||||
UINT64_C(0xdf7adabd7a6e2d6f), UINT64_C(0xa5a2aa754a5ba416),
|
||||
UINT64_C(0x2aca3b2d1a053f9d), UINT64_C(0x50124be52a30b6e4),
|
||||
UINT64_C(0x1f423fcee22f9be0), UINT64_C(0x659a4f06d21a1299),
|
||||
UINT64_C(0xeaf2de5e82448912), UINT64_C(0x902aae96b271006b),
|
||||
UINT64_C(0x74523609127ad31a), UINT64_C(0x0e8a46c1224f5a63),
|
||||
UINT64_C(0x81e2d7997211c1e8), UINT64_C(0xfb3aa75142244891),
|
||||
UINT64_C(0xb46ad37a8a3b6595), UINT64_C(0xceb2a3b2ba0eecec),
|
||||
UINT64_C(0x41da32eaea507767), UINT64_C(0x3b024222da65fe1e),
|
||||
UINT64_C(0xa2722586f2d042ee), UINT64_C(0xd8aa554ec2e5cb97),
|
||||
UINT64_C(0x57c2c41692bb501c), UINT64_C(0x2d1ab4dea28ed965),
|
||||
UINT64_C(0x624ac0f56a91f461), UINT64_C(0x1892b03d5aa47d18),
|
||||
UINT64_C(0x97fa21650afae693), UINT64_C(0xed2251ad3acf6fea),
|
||||
UINT64_C(0x095ac9329ac4bc9b), UINT64_C(0x7382b9faaaf135e2),
|
||||
UINT64_C(0xfcea28a2faafae69), UINT64_C(0x8632586aca9a2710),
|
||||
UINT64_C(0xc9622c4102850a14), UINT64_C(0xb3ba5c8932b0836d),
|
||||
UINT64_C(0x3cd2cdd162ee18e6), UINT64_C(0x460abd1952db919f),
|
||||
UINT64_C(0x256b24ca6b12f26d), UINT64_C(0x5fb354025b277b14),
|
||||
UINT64_C(0xd0dbc55a0b79e09f), UINT64_C(0xaa03b5923b4c69e6),
|
||||
UINT64_C(0xe553c1b9f35344e2), UINT64_C(0x9f8bb171c366cd9b),
|
||||
UINT64_C(0x10e3202993385610), UINT64_C(0x6a3b50e1a30ddf69),
|
||||
UINT64_C(0x8e43c87e03060c18), UINT64_C(0xf49bb8b633338561),
|
||||
UINT64_C(0x7bf329ee636d1eea), UINT64_C(0x012b592653589793),
|
||||
UINT64_C(0x4e7b2d0d9b47ba97), UINT64_C(0x34a35dc5ab7233ee),
|
||||
UINT64_C(0xbbcbcc9dfb2ca865), UINT64_C(0xc113bc55cb19211c),
|
||||
UINT64_C(0x5863dbf1e3ac9dec), UINT64_C(0x22bbab39d3991495),
|
||||
UINT64_C(0xadd33a6183c78f1e), UINT64_C(0xd70b4aa9b3f20667),
|
||||
UINT64_C(0x985b3e827bed2b63), UINT64_C(0xe2834e4a4bd8a21a),
|
||||
UINT64_C(0x6debdf121b863991), UINT64_C(0x1733afda2bb3b0e8),
|
||||
UINT64_C(0xf34b37458bb86399), UINT64_C(0x8993478dbb8deae0),
|
||||
UINT64_C(0x06fbd6d5ebd3716b), UINT64_C(0x7c23a61ddbe6f812),
|
||||
UINT64_C(0x3373d23613f9d516), UINT64_C(0x49aba2fe23cc5c6f),
|
||||
UINT64_C(0xc6c333a67392c7e4), UINT64_C(0xbc1b436e43a74e9d),
|
||||
UINT64_C(0x95ac9329ac4bc9b5), UINT64_C(0xef74e3e19c7e40cc),
|
||||
UINT64_C(0x601c72b9cc20db47), UINT64_C(0x1ac40271fc15523e),
|
||||
UINT64_C(0x5594765a340a7f3a), UINT64_C(0x2f4c0692043ff643),
|
||||
UINT64_C(0xa02497ca54616dc8), UINT64_C(0xdafce7026454e4b1),
|
||||
UINT64_C(0x3e847f9dc45f37c0), UINT64_C(0x445c0f55f46abeb9),
|
||||
UINT64_C(0xcb349e0da4342532), UINT64_C(0xb1eceec59401ac4b),
|
||||
UINT64_C(0xfebc9aee5c1e814f), UINT64_C(0x8464ea266c2b0836),
|
||||
UINT64_C(0x0b0c7b7e3c7593bd), UINT64_C(0x71d40bb60c401ac4),
|
||||
UINT64_C(0xe8a46c1224f5a634), UINT64_C(0x927c1cda14c02f4d),
|
||||
UINT64_C(0x1d148d82449eb4c6), UINT64_C(0x67ccfd4a74ab3dbf),
|
||||
UINT64_C(0x289c8961bcb410bb), UINT64_C(0x5244f9a98c8199c2),
|
||||
UINT64_C(0xdd2c68f1dcdf0249), UINT64_C(0xa7f41839ecea8b30),
|
||||
UINT64_C(0x438c80a64ce15841), UINT64_C(0x3954f06e7cd4d138),
|
||||
UINT64_C(0xb63c61362c8a4ab3), UINT64_C(0xcce411fe1cbfc3ca),
|
||||
UINT64_C(0x83b465d5d4a0eece), UINT64_C(0xf96c151de49567b7),
|
||||
UINT64_C(0x76048445b4cbfc3c), UINT64_C(0x0cdcf48d84fe7545),
|
||||
UINT64_C(0x6fbd6d5ebd3716b7), UINT64_C(0x15651d968d029fce),
|
||||
UINT64_C(0x9a0d8ccedd5c0445), UINT64_C(0xe0d5fc06ed698d3c),
|
||||
UINT64_C(0xaf85882d2576a038), UINT64_C(0xd55df8e515432941),
|
||||
UINT64_C(0x5a3569bd451db2ca), UINT64_C(0x20ed197575283bb3),
|
||||
UINT64_C(0xc49581ead523e8c2), UINT64_C(0xbe4df122e51661bb),
|
||||
UINT64_C(0x3125607ab548fa30), UINT64_C(0x4bfd10b2857d7349),
|
||||
UINT64_C(0x04ad64994d625e4d), UINT64_C(0x7e7514517d57d734),
|
||||
UINT64_C(0xf11d85092d094cbf), UINT64_C(0x8bc5f5c11d3cc5c6),
|
||||
UINT64_C(0x12b5926535897936), UINT64_C(0x686de2ad05bcf04f),
|
||||
UINT64_C(0xe70573f555e26bc4), UINT64_C(0x9ddd033d65d7e2bd),
|
||||
UINT64_C(0xd28d7716adc8cfb9), UINT64_C(0xa85507de9dfd46c0),
|
||||
UINT64_C(0x273d9686cda3dd4b), UINT64_C(0x5de5e64efd965432),
|
||||
UINT64_C(0xb99d7ed15d9d8743), UINT64_C(0xc3450e196da80e3a),
|
||||
UINT64_C(0x4c2d9f413df695b1), UINT64_C(0x36f5ef890dc31cc8),
|
||||
UINT64_C(0x79a59ba2c5dc31cc), UINT64_C(0x037deb6af5e9b8b5),
|
||||
UINT64_C(0x8c157a32a5b7233e), UINT64_C(0xf6cd0afa9582aa47),
|
||||
UINT64_C(0x4ad64994d625e4da), UINT64_C(0x300e395ce6106da3),
|
||||
UINT64_C(0xbf66a804b64ef628), UINT64_C(0xc5bed8cc867b7f51),
|
||||
UINT64_C(0x8aeeace74e645255), UINT64_C(0xf036dc2f7e51db2c),
|
||||
UINT64_C(0x7f5e4d772e0f40a7), UINT64_C(0x05863dbf1e3ac9de),
|
||||
UINT64_C(0xe1fea520be311aaf), UINT64_C(0x9b26d5e88e0493d6),
|
||||
UINT64_C(0x144e44b0de5a085d), UINT64_C(0x6e963478ee6f8124),
|
||||
UINT64_C(0x21c640532670ac20), UINT64_C(0x5b1e309b16452559),
|
||||
UINT64_C(0xd476a1c3461bbed2), UINT64_C(0xaeaed10b762e37ab),
|
||||
UINT64_C(0x37deb6af5e9b8b5b), UINT64_C(0x4d06c6676eae0222),
|
||||
UINT64_C(0xc26e573f3ef099a9), UINT64_C(0xb8b627f70ec510d0),
|
||||
UINT64_C(0xf7e653dcc6da3dd4), UINT64_C(0x8d3e2314f6efb4ad),
|
||||
UINT64_C(0x0256b24ca6b12f26), UINT64_C(0x788ec2849684a65f),
|
||||
UINT64_C(0x9cf65a1b368f752e), UINT64_C(0xe62e2ad306bafc57),
|
||||
UINT64_C(0x6946bb8b56e467dc), UINT64_C(0x139ecb4366d1eea5),
|
||||
UINT64_C(0x5ccebf68aecec3a1), UINT64_C(0x2616cfa09efb4ad8),
|
||||
UINT64_C(0xa97e5ef8cea5d153), UINT64_C(0xd3a62e30fe90582a),
|
||||
UINT64_C(0xb0c7b7e3c7593bd8), UINT64_C(0xca1fc72bf76cb2a1),
|
||||
UINT64_C(0x45775673a732292a), UINT64_C(0x3faf26bb9707a053),
|
||||
UINT64_C(0x70ff52905f188d57), UINT64_C(0x0a2722586f2d042e),
|
||||
UINT64_C(0x854fb3003f739fa5), UINT64_C(0xff97c3c80f4616dc),
|
||||
UINT64_C(0x1bef5b57af4dc5ad), UINT64_C(0x61372b9f9f784cd4),
|
||||
UINT64_C(0xee5fbac7cf26d75f), UINT64_C(0x9487ca0fff135e26),
|
||||
UINT64_C(0xdbd7be24370c7322), UINT64_C(0xa10fceec0739fa5b),
|
||||
UINT64_C(0x2e675fb4576761d0), UINT64_C(0x54bf2f7c6752e8a9),
|
||||
UINT64_C(0xcdcf48d84fe75459), UINT64_C(0xb71738107fd2dd20),
|
||||
UINT64_C(0x387fa9482f8c46ab), UINT64_C(0x42a7d9801fb9cfd2),
|
||||
UINT64_C(0x0df7adabd7a6e2d6), UINT64_C(0x772fdd63e7936baf),
|
||||
UINT64_C(0xf8474c3bb7cdf024), UINT64_C(0x829f3cf387f8795d),
|
||||
UINT64_C(0x66e7a46c27f3aa2c), UINT64_C(0x1c3fd4a417c62355),
|
||||
UINT64_C(0x935745fc4798b8de), UINT64_C(0xe98f353477ad31a7),
|
||||
UINT64_C(0xa6df411fbfb21ca3), UINT64_C(0xdc0731d78f8795da),
|
||||
UINT64_C(0x536fa08fdfd90e51), UINT64_C(0x29b7d047efec8728),
|
||||
};
|
||||
|
||||
|
||||
uint64_t crc64(const unsigned char* s, uint64_t l)
|
||||
{
|
||||
uint64_t j;
|
||||
uint64_t crc = 0;
|
||||
|
||||
for (j = 0; j < l; j++)
|
||||
{
|
||||
uint8_t byte = s[j];
|
||||
crc = crc64_tab[(uint8_t)crc ^ byte] ^ (crc >> 8);
|
||||
}
|
||||
return crc;
|
||||
}
|
||||
|
||||
|
||||
/* Test main */
|
||||
//#ifdef TEST_MAIN
|
||||
//#include <stdio.h>
|
||||
//int main(void) {
|
||||
// printf("e9c6d914c4b8d9ca == %016llx\n",
|
||||
// (unsigned long long) crc64(0,(unsigned char*)"123456789",9));
|
||||
// return 0;
|
||||
//}
|
||||
//#endif
|
||||
|
||||
|
9
src/crc64.h
Normal file
9
src/crc64.h
Normal file
@ -0,0 +1,9 @@
|
||||
/**
|
||||
* @file crc64.h
|
||||
* @date March 24th 2016
|
||||
* @brief Header file for CRC64 function.
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
uint64_t crc64(const unsigned char* s, uint64_t l);
|
@ -28,9 +28,9 @@
|
||||
|
||||
|
||||
|
||||
bool only_ATGC(char* seq)
|
||||
bool only_ATGC(const char* seq)
|
||||
{
|
||||
char* c = seq;
|
||||
const char* c = seq;
|
||||
|
||||
while (*c)
|
||||
{
|
||||
|
@ -69,7 +69,7 @@ enum
|
||||
* @since November 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
bool only_ATGC(char* seq);
|
||||
bool only_ATGC(const char* seq);
|
||||
|
||||
|
||||
/**
|
||||
|
7
src/murmurhash2.h
Executable file
7
src/murmurhash2.h
Executable file
@ -0,0 +1,7 @@
|
||||
|
||||
#ifndef _BLOOM_MURMURHASH2
|
||||
#define _BLOOM_MURMURHASH2
|
||||
|
||||
unsigned int murmurhash2(const void * key, int len, const unsigned int seed);
|
||||
|
||||
#endif
|
449
src/obiavl.c
449
src/obiavl.c
@ -19,6 +19,8 @@
|
||||
#include <fcntl.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "bloom.h"
|
||||
#include "crc64.h"
|
||||
#include "obiavl.h"
|
||||
#include "obierrno.h"
|
||||
#include "obitypes.h"
|
||||
@ -30,158 +32,6 @@
|
||||
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
|
||||
|
||||
|
||||
////crc crcTable[256];
|
||||
//static crc crcTable[] = {
|
||||
//0x00, 0xd8, 0x68, 0xb0, 0xd0, 0x8, 0xb8, 0x60, 0x78, 0xa0, 0x10, 0xc8, 0xa8, 0x70, 0xc0, 0x18, 0xf0, 0x28, 0x98, 0x40, 0x20, 0xf8, 0x48, 0x90, 0x88, 0x50, 0xe0, 0x38, 0x58, 0x80, 0x30, 0xe8, 0x38, 0xe0, 0x50, 0x88, 0xe8, 0x30, 0x80, 0x58, 0x40, 0x98, 0x28, 0xf0, 0x90, 0x48, 0xf8, 0x20, 0xc8, 0x10, 0xa0, 0x78, 0x18, 0xc0, 0x70, 0xa8, 0xb0, 0x68, 0xd8, 0, 0x60, 0xb8, 0x8, 0xd0, 0x70, 0xa8, 0x18, 0xc0, 0xa0, 0x78, 0xc8, 0x10, 0x8, 0xd0, 0x60, 0xb8, 0xd8, 0, 0xb0, 0x68, 0x80, 0x58, 0xe8, 0x30, 0x50, 0x88, 0x38, 0xe0, 0xf8, 0x20, 0x90, 0x48, 0x28, 0xf0, 0x40, 0x98, 0x48, 0x90, 0x20, 0xf8, 0x98, 0x40, 0xf0, 0x28, 0x30, 0xe8, 0x58, 0x80, 0xe0, 0x38, 0x88, 0x50, 0xb8, 0x60, 0xd0, 0x8, 0x68, 0xb0, 0, 0xd8, 0xc0, 0x18, 0xa8, 0x70, 0x10, 0xc8, 0x78, 0xa0, 0xe0, 0x38, 0x88, 0x50, 0x30, 0xe8, 0x58, 0x80, 0x98, 0x40, 0xf0, 0x28, 0x48, 0x90, 0x20, 0xf8, 0x10, 0xc8, 0x78, 0xa0, 0xc0, 0x18, 0xa8, 0x70, 0x68, 0xb0, 0, 0xd8, 0xb8, 0x60, 0xd0, 0x8, 0xd8, 0, 0xb0, 0x68, 0x8, 0xd0, 0x60, 0xb8, 0xa0, 0x78, 0xc8, 0x10, 0x70, 0xa8, 0x18, 0xc0, 0x28, 0xf0, 0x40, 0x98, 0xf8, 0x20, 0x90, 0x48, 0x50, 0x88, 0x38, 0xe0, 0x80, 0x58, 0xe8, 0x30, 0x90, 0x48, 0xf8, 0x20, 0x40, 0x98, 0x28, 0xf0, 0xe8, 0x30, 0x80, 0x58, 0x38, 0xe0, 0x50, 0x88, 0x60, 0xb8, 0x8, 0xd0, 0xb0, 0x68, 0xd8, 0, 0x18, 0xc0, 0x70, 0xa8, 0xc8, 0x10, 0xa0, 0x78, 0xa8, 0x70, 0xc0, 0x18, 0x78, 0xa0, 0x10, 0xc8, 0xd0, 0x8, 0xb8, 0x60, 0, 0xd8, 0x68, 0xb0, 0x58, 0x80, 0x30, 0xe8, 0x88, 0x50, 0xe0, 0x38, 0x20, 0xf8, 0x48, 0x90, 0xf0, 0x28, 0x98, 0x40
|
||||
//};
|
||||
//
|
||||
//
|
||||
//void crcInit(void)
|
||||
//{
|
||||
// crc remainder;
|
||||
//
|
||||
// fprintf(stderr, "\n");
|
||||
//
|
||||
// /*
|
||||
// * Compute the remainder of each possible dividend.
|
||||
// */
|
||||
// for (int dividend = 0; dividend < 256; ++dividend)
|
||||
// {
|
||||
// /*
|
||||
// * Start with the dividend followed by zeros.
|
||||
// */
|
||||
// remainder = dividend << (WIDTH - 8);
|
||||
//
|
||||
// /*
|
||||
// * Perform modulo-2 division, a bit at a time.
|
||||
// */
|
||||
// for (uint8_t bit = 8; bit > 0; --bit)
|
||||
// {
|
||||
// /*
|
||||
// * Try to divide the current data bit.
|
||||
// */
|
||||
// if (remainder & TOPBIT)
|
||||
// {
|
||||
// remainder = (remainder << 1) ^ POLYNOMIAL;
|
||||
// }
|
||||
// else
|
||||
// {
|
||||
// remainder = (remainder << 1);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// /*
|
||||
// * Store the result into the table.
|
||||
// */
|
||||
// crcTable[dividend] = remainder;
|
||||
// fprintf(stderr, "%#x, ", remainder);
|
||||
// }
|
||||
//
|
||||
//} /* crcInit() */
|
||||
//
|
||||
//
|
||||
//crc crcFast(uint8_t const message[], int nBytes)
|
||||
//{
|
||||
// uint8_t data;
|
||||
// crc remainder = 0;
|
||||
//
|
||||
//
|
||||
// /*
|
||||
// * Divide the message by the polynomial, a byte at a time.
|
||||
// */
|
||||
// for (int byte = 0; byte < nBytes; ++byte)
|
||||
// {
|
||||
// data = message[byte] ^ (remainder >> (WIDTH - 8));
|
||||
// remainder = crcTable[data] ^ (remainder << 8);
|
||||
// }
|
||||
//
|
||||
// /*
|
||||
// * The final remainder is the CRC.
|
||||
// */
|
||||
// return (remainder);
|
||||
//
|
||||
//} /* crcFast() */
|
||||
//
|
||||
//
|
||||
//crc compute_crc(const char* s)
|
||||
//{
|
||||
// crc c;
|
||||
// //uint8_t cache;
|
||||
//
|
||||
// //cache = 15;
|
||||
//
|
||||
//// crcInit();
|
||||
//
|
||||
// c = crcFast(s, strlen(s));
|
||||
//
|
||||
// //fprintf(stderr, "\nlen = %d", strlen(argv[1]));
|
||||
//
|
||||
// //fprintf(stderr, "\ncrc = %u\n\n", c);
|
||||
// //fprintf(stderr, "\ncrc mod 8 = %u\n\n", c%8);
|
||||
//
|
||||
// c = c >> 3;
|
||||
// //fprintf(stderr, "\nshifted crc = %u\n\n", c);
|
||||
//
|
||||
// //c = c & cache;
|
||||
// //c = c % 32;
|
||||
//
|
||||
// //fprintf(stderr, "\ncrc = %u\n\n", c);
|
||||
//
|
||||
// return (c & 7);
|
||||
//}
|
||||
|
||||
static unsigned char crc8_table[] = {
|
||||
0x00, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba, 0x95, 0xab, 0xe9, 0xd7,
|
||||
0x6d, 0x53, 0x11, 0x2f, 0x4f, 0x71, 0x33, 0x0d, 0xb7, 0x89, 0xcb, 0xf5,
|
||||
0xda, 0xe4, 0xa6, 0x98, 0x22, 0x1c, 0x5e, 0x60, 0x9e, 0xa0, 0xe2, 0xdc,
|
||||
0x66, 0x58, 0x1a, 0x24, 0x0b, 0x35, 0x77, 0x49, 0xf3, 0xcd, 0x8f, 0xb1,
|
||||
0xd1, 0xef, 0xad, 0x93, 0x29, 0x17, 0x55, 0x6b, 0x44, 0x7a, 0x38, 0x06,
|
||||
0xbc, 0x82, 0xc0, 0xfe, 0x59, 0x67, 0x25, 0x1b, 0xa1, 0x9f, 0xdd, 0xe3,
|
||||
0xcc, 0xf2, 0xb0, 0x8e, 0x34, 0x0a, 0x48, 0x76, 0x16, 0x28, 0x6a, 0x54,
|
||||
0xee, 0xd0, 0x92, 0xac, 0x83, 0xbd, 0xff, 0xc1, 0x7b, 0x45, 0x07, 0x39,
|
||||
0xc7, 0xf9, 0xbb, 0x85, 0x3f, 0x01, 0x43, 0x7d, 0x52, 0x6c, 0x2e, 0x10,
|
||||
0xaa, 0x94, 0xd6, 0xe8, 0x88, 0xb6, 0xf4, 0xca, 0x70, 0x4e, 0x0c, 0x32,
|
||||
0x1d, 0x23, 0x61, 0x5f, 0xe5, 0xdb, 0x99, 0xa7, 0xb2, 0x8c, 0xce, 0xf0,
|
||||
0x4a, 0x74, 0x36, 0x08, 0x27, 0x19, 0x5b, 0x65, 0xdf, 0xe1, 0xa3, 0x9d,
|
||||
0xfd, 0xc3, 0x81, 0xbf, 0x05, 0x3b, 0x79, 0x47, 0x68, 0x56, 0x14, 0x2a,
|
||||
0x90, 0xae, 0xec, 0xd2, 0x2c, 0x12, 0x50, 0x6e, 0xd4, 0xea, 0xa8, 0x96,
|
||||
0xb9, 0x87, 0xc5, 0xfb, 0x41, 0x7f, 0x3d, 0x03, 0x63, 0x5d, 0x1f, 0x21,
|
||||
0x9b, 0xa5, 0xe7, 0xd9, 0xf6, 0xc8, 0x8a, 0xb4, 0x0e, 0x30, 0x72, 0x4c,
|
||||
0xeb, 0xd5, 0x97, 0xa9, 0x13, 0x2d, 0x6f, 0x51, 0x7e, 0x40, 0x02, 0x3c,
|
||||
0x86, 0xb8, 0xfa, 0xc4, 0xa4, 0x9a, 0xd8, 0xe6, 0x5c, 0x62, 0x20, 0x1e,
|
||||
0x31, 0x0f, 0x4d, 0x73, 0xc9, 0xf7, 0xb5, 0x8b, 0x75, 0x4b, 0x09, 0x37,
|
||||
0x8d, 0xb3, 0xf1, 0xcf, 0xe0, 0xde, 0x9c, 0xa2, 0x18, 0x26, 0x64, 0x5a,
|
||||
0x3a, 0x04, 0x46, 0x78, 0xc2, 0xfc, 0xbe, 0x80, 0xaf, 0x91, 0xd3, 0xed,
|
||||
0x57, 0x69, 0x2b, 0x15};
|
||||
|
||||
|
||||
unsigned crc8(unsigned char *data, size_t len)
|
||||
{
|
||||
unsigned char *end;
|
||||
unsigned crc;
|
||||
|
||||
crc = 0;
|
||||
|
||||
crc ^= 0xff;
|
||||
end = data + len;
|
||||
do {
|
||||
crc = crc8_table[crc ^ *data++];
|
||||
} while (data < end);
|
||||
return crc ^ 0xff;
|
||||
}
|
||||
|
||||
crc compute_crc(const char* s)
|
||||
{
|
||||
unsigned c;
|
||||
|
||||
c = crc8(s, strlen(s));
|
||||
//fprintf(stderr, "%02x\n", c);
|
||||
return (c & 7);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/**************************************************************************
|
||||
*
|
||||
@ -507,7 +357,8 @@ static char* build_avl_file_name(const char* avl_name)
|
||||
char* file_name;
|
||||
|
||||
// Build the file name
|
||||
if (asprintf(&file_name,"%s.oda", avl_name) < 0)
|
||||
file_name = (char*) malloc((strlen(avl_name) + 5)*sizeof(char));
|
||||
if (sprintf(file_name,"%s.oda", avl_name) < 0)
|
||||
{
|
||||
obi_set_errno(OBI_AVL_ERROR);
|
||||
obidebug(1, "\nError building an AVL tree file name");
|
||||
@ -532,7 +383,8 @@ static char* build_avl_data_file_name(const char* avl_name)
|
||||
char* file_name;
|
||||
|
||||
// Build the file name
|
||||
if (asprintf(&file_name,"%s.odd", avl_name) < 0)
|
||||
file_name = (char*) malloc((strlen(avl_name) + 5)*sizeof(char));
|
||||
if (sprintf(file_name,"%s.odd", avl_name) < 0)
|
||||
{
|
||||
obi_set_errno(OBI_AVL_ERROR);
|
||||
obidebug(1, "\nError building an AVL tree data file name");
|
||||
@ -561,7 +413,13 @@ size_t get_avl_header_size()
|
||||
|
||||
size_t get_initial_avl_size()
|
||||
{
|
||||
return getpagesize() * 1;
|
||||
size_t s;
|
||||
size_t m;
|
||||
|
||||
m = 1;
|
||||
s = getpagesize() * m;
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
@ -583,7 +441,13 @@ size_t get_avl_data_header_size()
|
||||
|
||||
size_t get_initial_avl_data_size()
|
||||
{
|
||||
return getpagesize() * 1;
|
||||
size_t s;
|
||||
size_t m;
|
||||
|
||||
m = 1;
|
||||
s = getpagesize() * m;
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
@ -618,23 +482,8 @@ int grow_avl(OBIDMS_avl_p avl) // TODO Lock when needed
|
||||
size_t new_data_size;
|
||||
size_t header_size;
|
||||
int avl_file_descriptor;
|
||||
char* avl_file_name;
|
||||
|
||||
// Get the avl file name
|
||||
avl_file_name = build_avl_file_name((avl->header)->avl_name);
|
||||
if (avl_file_name == NULL)
|
||||
return -1;
|
||||
|
||||
// Open the avl file
|
||||
avl_file_descriptor = openat(avl->dir_fd, avl_file_name, O_RDWR);
|
||||
if (avl_file_descriptor < 0)
|
||||
{
|
||||
obi_set_errno(OBI_AVL_ERROR);
|
||||
obidebug(1, "\nError opening an AVL tree file");
|
||||
free(avl_file_name);
|
||||
return -1;
|
||||
}
|
||||
free(avl_file_name);
|
||||
avl_file_descriptor = avl->avl_fd;
|
||||
|
||||
// Calculate the new file size
|
||||
old_data_size = (avl->header)->avl_size;
|
||||
@ -683,7 +532,7 @@ int grow_avl(OBIDMS_avl_p avl) // TODO Lock when needed
|
||||
// Set the new avl size
|
||||
(avl->header)->avl_size = new_data_size;
|
||||
|
||||
close(avl_file_descriptor);
|
||||
//close(avl_file_descriptor);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -696,23 +545,8 @@ int grow_avl_data(OBIDMS_avl_p avl) // TODO Lock when needed
|
||||
index_t new_data_size;
|
||||
size_t header_size;
|
||||
int avl_data_file_descriptor;
|
||||
char* avl_data_file_name;
|
||||
|
||||
// Get the avl data file name
|
||||
avl_data_file_name = build_avl_data_file_name((avl->header)->avl_name);
|
||||
if (avl_data_file_name == NULL)
|
||||
return -1;
|
||||
|
||||
// Open the avl data file
|
||||
avl_data_file_descriptor = openat(avl->dir_fd, avl_data_file_name, O_RDWR);
|
||||
if (avl_data_file_descriptor < 0)
|
||||
{
|
||||
obi_set_errno(OBI_AVL_ERROR);
|
||||
obidebug(1, "\nError opening an AVL tree data file");
|
||||
free(avl_data_file_name);
|
||||
return -1;
|
||||
}
|
||||
free(avl_data_file_name);
|
||||
avl_data_file_descriptor = avl->data_fd;
|
||||
|
||||
// Calculate the new file size
|
||||
old_data_size = ((avl->data)->header)->data_size_max;
|
||||
@ -763,7 +597,7 @@ int grow_avl_data(OBIDMS_avl_p avl) // TODO Lock when needed
|
||||
// Initialize new data to 0
|
||||
memset(((avl->data)->data)+old_data_size, 0, new_data_size - old_data_size);
|
||||
|
||||
close(avl_data_file_descriptor);
|
||||
//close(avl_data_file_descriptor);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -857,6 +691,7 @@ AVL_node_p avl_create_node(OBIDMS_avl_p avl, index_t node_idx)
|
||||
node->right_child = -1;
|
||||
node->balance_factor = 0;
|
||||
node->value = -1;
|
||||
node->crc64 = 0; // TODO
|
||||
|
||||
return node;
|
||||
}
|
||||
@ -1085,15 +920,20 @@ int obi_avl_exists(OBIDMS_p dms, const char* avl_name)
|
||||
struct stat buffer;
|
||||
char* avl_file_path;
|
||||
char* avl_file_name;
|
||||
char* avl_file_relative_path;
|
||||
int relative_path_size;
|
||||
int check_dir;
|
||||
|
||||
// Build file name
|
||||
// Build the AVL tree file path
|
||||
avl_file_name = build_avl_file_name(avl_name);
|
||||
if (avl_file_name == NULL)
|
||||
return -1;
|
||||
|
||||
// Build the AVL tree file path
|
||||
avl_file_path = get_full_path(dms->avl_dir_fd, avl_file_name);
|
||||
relative_path_size = strlen(avl_file_name) + strlen(AVL_TREES_DIR_NAME) + 2;
|
||||
avl_file_relative_path = (char*) malloc(relative_path_size*sizeof(char));
|
||||
strcpy(avl_file_relative_path, AVL_TREES_DIR_NAME);
|
||||
strcat(avl_file_relative_path, "/");
|
||||
strcat(avl_file_relative_path, avl_file_name);
|
||||
avl_file_path = get_full_path(dms, avl_file_relative_path);
|
||||
if (avl_file_path == NULL)
|
||||
{
|
||||
obidebug(1, "\nError getting the file path for an AVL tree file");
|
||||
@ -1104,6 +944,7 @@ int obi_avl_exists(OBIDMS_p dms, const char* avl_name)
|
||||
|
||||
free(avl_file_path);
|
||||
free(avl_file_name);
|
||||
free(avl_file_relative_path);
|
||||
|
||||
if (check_dir == 0)
|
||||
return 1;
|
||||
@ -1131,21 +972,96 @@ OBIDMS_avl_p obi_avl(OBIDMS_p dms, const char* avl_name)
|
||||
}
|
||||
|
||||
|
||||
OBIDMS_avl_p* obi_create_avl_in_64_parts(OBIDMS_p dms, const char* avl_name)
|
||||
OBIDMS_avl_group_p obi_create_avl_group(OBIDMS_p dms, const char* avl_name)
|
||||
{
|
||||
OBIDMS_avl_p* avls;
|
||||
OBIDMS_avl_group_p avl_group;
|
||||
char* avl_name_with_idx;
|
||||
uint8_t i;
|
||||
|
||||
avls = (OBIDMS_avl_p*) malloc(64*sizeof(OBIDMS_avl_p));
|
||||
avl_group = (OBIDMS_avl_group_p) malloc(sizeof(OBIDMS_avl_group_t));
|
||||
|
||||
for (i=0; i < 64; i++)
|
||||
// Create 1st avl
|
||||
avl_name_with_idx = malloc((strlen(avl_name) + 3)*sizeof(char));
|
||||
if (sprintf(avl_name_with_idx, "%s_%u", avl_name, 0) < 0)
|
||||
{
|
||||
asprintf(&avl_name_with_idx,"%s_%u", avl_name, i);
|
||||
avls[i] = obi_create_avl(dms, avl_name_with_idx);
|
||||
obi_set_errno(OBI_AVL_ERROR);
|
||||
obidebug(1, "\nError building an AVL tree file name");
|
||||
return NULL;
|
||||
}
|
||||
(avl_group->sub_avls)[0] = obi_create_avl(dms, avl_name_with_idx);
|
||||
if ((avl_group->sub_avls)[0] == NULL)
|
||||
{
|
||||
obidebug(1, "\nError creating the first AVL of an AVL group");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return avls;
|
||||
avl_group->current_avl_idx = 0;
|
||||
strcpy(avl_group->avl_name, avl_name);
|
||||
|
||||
avl_group->dms = dms;
|
||||
|
||||
return avl_group;
|
||||
}
|
||||
|
||||
|
||||
int unmap_an_avl(OBIDMS_avl_p avl)
|
||||
{
|
||||
if (munmap((avl->data)->data, ((avl->data)->header)->data_size_max) < 0)
|
||||
return -1;
|
||||
if (munmap(avl->tree, (((avl->header)->nb_items_max) * sizeof(AVL_node_t))) < 0)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int remap_an_avl(OBIDMS_avl_p avl)
|
||||
{
|
||||
(avl->data)->data = mmap(NULL,
|
||||
((avl->data)->header)->data_size_max,
|
||||
PROT_READ,
|
||||
MAP_SHARED, // TODO test MAP_PRIVATE?
|
||||
avl->data_fd,
|
||||
((avl->data)->header)->header_size);
|
||||
if ((avl->data)->data == NULL)
|
||||
return -1;
|
||||
|
||||
avl->tree = mmap(NULL,
|
||||
((avl->header)->nb_items_max) * sizeof(AVL_node_t),
|
||||
PROT_READ,
|
||||
MAP_SHARED, // TODO test MAP_PRIVATE?
|
||||
avl->avl_fd,
|
||||
(avl->header)->header_size);
|
||||
if (avl->tree == NULL)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int obi_add_new_avl_in_group(OBIDMS_avl_group_p avl_group) // TODO check for errors
|
||||
{
|
||||
char* avl_name_with_idx;
|
||||
int avl_idx_length;
|
||||
|
||||
// unmap older
|
||||
unmap_an_avl((avl_group->sub_avls)[avl_group->current_avl_idx]);
|
||||
(avl_group->current_avl_idx)++;
|
||||
avl_idx_length = ((avl_group->current_avl_idx) == 0 ? 1 : (int)(log10(avl_group->current_avl_idx)+1));
|
||||
avl_name_with_idx = malloc((strlen(avl_group->avl_name) + avl_idx_length + 2)*sizeof(char));
|
||||
if (sprintf(avl_name_with_idx, "%s_%u", avl_group->avl_name, avl_group->current_avl_idx) < 0)
|
||||
{
|
||||
obi_set_errno(OBI_AVL_ERROR);
|
||||
obidebug(1, "\nError building an AVL tree file name");
|
||||
return -1;
|
||||
}
|
||||
|
||||
(avl_group->sub_avls)[avl_group->current_avl_idx] = obi_create_avl(avl_group->dms, avl_name_with_idx);
|
||||
if ((avl_group->sub_avls)[avl_group->current_avl_idx] == NULL)
|
||||
{
|
||||
obidebug(1, "\nError creating a new AVL tree in a group");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@ -1251,7 +1167,7 @@ OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name)
|
||||
// Initialize all bits to 0
|
||||
memset(avl_data->data, 0, (avl_data->header)->data_size_max);
|
||||
|
||||
close(avl_data_file_descriptor);
|
||||
//close(avl_data_file_descriptor);
|
||||
|
||||
|
||||
// Create the AVL tree file
|
||||
@ -1351,7 +1267,13 @@ OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name)
|
||||
(avl->header)->creation_date = time(NULL);
|
||||
strcpy((avl->header)->avl_name, avl_name);
|
||||
|
||||
close(avl_file_descriptor);
|
||||
avl->avl_fd = avl_file_descriptor;
|
||||
avl->data_fd = avl_data_file_descriptor;
|
||||
|
||||
// Bloom filter
|
||||
bloom_init(&((avl->header)->bloom_filter), NODE_COUNT_PER_AVL, BLOOM_FILTER_ERROR_RATE);
|
||||
|
||||
//close(avl_file_descriptor);
|
||||
|
||||
// Add in the list of opened AVL trees
|
||||
*(((dms->opened_avls)->avls)+((dms->opened_avls)->nb_opened_avls)) = avl;
|
||||
@ -1458,7 +1380,7 @@ OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
close(avl_data_file_descriptor);
|
||||
//close(avl_data_file_descriptor);
|
||||
|
||||
|
||||
// Open the AVL tree file
|
||||
@ -1544,7 +1466,10 @@ OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name)
|
||||
avl->directory = dms->avl_directory;
|
||||
avl->dir_fd = avl_dir_file_descriptor;
|
||||
|
||||
close(avl_file_descriptor);
|
||||
avl->avl_fd = avl_file_descriptor;
|
||||
avl->data_fd = avl_data_file_descriptor;
|
||||
|
||||
//close(avl_file_descriptor);
|
||||
|
||||
// Add in the list of opened AVL trees
|
||||
*(((dms->opened_avls)->avls)+((dms->opened_avls)->nb_opened_avls)) = avl;
|
||||
@ -1603,12 +1528,84 @@ int obi_close_avl(OBIDMS_avl_p avl)
|
||||
}
|
||||
|
||||
|
||||
byte_t* obi_avl_group_get(OBIDMS_avl_group_p avl_group, index_t idx)
|
||||
{
|
||||
int32_t avl_idx;
|
||||
index_t idx_in_avl;
|
||||
|
||||
avl_idx = (int32_t) (idx >> 32);
|
||||
idx_in_avl = idx & 0x00000000FFFFFFFF;
|
||||
|
||||
return obi_avl_get((avl_group->sub_avls)[avl_idx], idx_in_avl);
|
||||
}
|
||||
|
||||
|
||||
byte_t* obi_avl_get(OBIDMS_avl_p avl, index_t idx)
|
||||
{
|
||||
return (((avl->data)->data)+idx);
|
||||
}
|
||||
|
||||
|
||||
int maybe_in_avl(OBIDMS_avl_p avl, byte_t* value)
|
||||
{
|
||||
return (bloom_check(&((avl->header)->bloom_filter), value, (BYTE_ARRAY_HEADER_SIZE + *((int32_t*)(value+1)))));
|
||||
}
|
||||
|
||||
|
||||
int64_t insert_in_avl_group(OBIDMS_avl_group_p avl_group, byte_t* value) // TODO won't be index_t
|
||||
{
|
||||
int32_t index_in_avl;
|
||||
int64_t index_with_avl;
|
||||
int i;
|
||||
|
||||
if (maybe_in_avl((avl_group->sub_avls)[avl_group->current_avl_idx], value))
|
||||
{
|
||||
index_in_avl = (int32_t) obi_avl_find((avl_group->sub_avls)[avl_group->current_avl_idx], value);
|
||||
if (index_in_avl >= 0)
|
||||
{
|
||||
index_with_avl = avl_group->current_avl_idx;
|
||||
index_with_avl = index_with_avl << 32;
|
||||
index_with_avl = index_with_avl + index_in_avl;
|
||||
return index_with_avl;
|
||||
}
|
||||
}
|
||||
for (i=0; i < (avl_group->current_avl_idx); i++)
|
||||
{
|
||||
if (maybe_in_avl((avl_group->sub_avls)[i], value))
|
||||
{
|
||||
if (remap_an_avl((avl_group->sub_avls)[i]) < 0)
|
||||
return -1;
|
||||
index_in_avl = (int32_t) obi_avl_find((avl_group->sub_avls)[i], value);
|
||||
if (unmap_an_avl((avl_group->sub_avls)[i]) < 0)
|
||||
return -1;
|
||||
if (index_in_avl >= 0)
|
||||
{
|
||||
index_with_avl = i;
|
||||
index_with_avl = index_with_avl << 32;
|
||||
index_with_avl = index_with_avl + index_in_avl;
|
||||
return index_with_avl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Not found in any AVL: add in current
|
||||
// First, check if make new AVL
|
||||
if ((((avl_group->sub_avls)[avl_group->current_avl_idx])->header)->nb_items == NODE_COUNT_PER_AVL) // TODO add condition with data size
|
||||
obi_add_new_avl_in_group(avl_group);
|
||||
|
||||
// Add in the current AVL
|
||||
bloom_add(&((((avl_group->sub_avls)[avl_group->current_avl_idx])->header)->bloom_filter), value, BYTE_ARRAY_HEADER_SIZE + *((int32_t*)(value+1)));
|
||||
|
||||
// Build the index containing the AVL index
|
||||
index_in_avl = (int32_t) obi_avl_add((avl_group->sub_avls)[avl_group->current_avl_idx], value);
|
||||
index_with_avl = avl_group->current_avl_idx;
|
||||
index_with_avl = index_with_avl << 32;
|
||||
index_with_avl = index_with_avl + index_in_avl;
|
||||
|
||||
return index_with_avl;
|
||||
}
|
||||
|
||||
|
||||
// Insert a new node
|
||||
index_t obi_avl_add(OBIDMS_avl_p avl, byte_t* value)
|
||||
{
|
||||
@ -1622,6 +1619,10 @@ index_t obi_avl_add(OBIDMS_avl_p avl, byte_t* value)
|
||||
int n = 0;
|
||||
int depth = 0;
|
||||
|
||||
uint64_t crc;
|
||||
|
||||
crc = crc64(value, BYTE_ARRAY_HEADER_SIZE + ((uint64_t) (*((int32_t*)(value+1))))); // TODO warning
|
||||
|
||||
// Check if first node
|
||||
if (!((avl->header)->nb_items))
|
||||
{
|
||||
@ -1630,6 +1631,7 @@ index_t obi_avl_add(OBIDMS_avl_p avl, byte_t* value)
|
||||
// Add the value in the data array and store its index
|
||||
value_data_idx = avl_add_value_in_data_array(avl, value);
|
||||
node_to_add->value = value_data_idx;
|
||||
node_to_add->crc64 = crc;
|
||||
|
||||
// Update the number of items
|
||||
((avl->header)->nb_items)++;
|
||||
@ -1661,8 +1663,15 @@ index_t obi_avl_add(OBIDMS_avl_p avl, byte_t* value)
|
||||
parent = next;
|
||||
|
||||
// Compare value with value of current node
|
||||
to_compare = obi_avl_get(avl, current_node->value);
|
||||
comp = byte_array_compare(to_compare, value);
|
||||
//to_compare = obi_avl_get(avl, current_node->value);
|
||||
//comp = byte_array_compare(to_compare, value);
|
||||
comp = (current_node->crc64) - crc;
|
||||
|
||||
if (comp == 0)
|
||||
{ // check if really same value
|
||||
to_compare = obi_avl_get(avl, current_node->value);
|
||||
comp = byte_array_compare(to_compare, value);
|
||||
}
|
||||
|
||||
if (comp > 0)
|
||||
// Go to left child
|
||||
@ -1673,8 +1682,8 @@ index_t obi_avl_add(OBIDMS_avl_p avl, byte_t* value)
|
||||
else if (comp == 0)
|
||||
// Value already stored
|
||||
{
|
||||
//fprintf(stderr, "\n>>>ALREADY IN, %s, %lld\n", obi_obibytes_to_seq(value), (avl->header)->nb_items);
|
||||
return current_node->value;
|
||||
fprintf(stderr, "\n>>>ALREADY IN, %s, %lld\n", obi_obibytes_to_seq(value), (avl->header)->nb_items);
|
||||
return current_node->value; // TODO should trigger error if using bloom filters
|
||||
}
|
||||
|
||||
depth++;
|
||||
@ -1702,6 +1711,7 @@ index_t obi_avl_add(OBIDMS_avl_p avl, byte_t* value)
|
||||
// Add the value in the data array and store its index
|
||||
value_data_idx = avl_add_value_in_data_array(avl, value);
|
||||
node_to_add->value = value_data_idx;
|
||||
node_to_add->crc64 = crc;
|
||||
|
||||
// Update the number of items
|
||||
((avl->header)->nb_items)++;
|
||||
@ -1740,14 +1750,25 @@ index_t obi_avl_find(OBIDMS_avl_p avl, byte_t* value)
|
||||
byte_t* to_compare;
|
||||
AVL_node_p current_node;
|
||||
|
||||
uint64_t crc;
|
||||
crc = crc64(value, BYTE_ARRAY_HEADER_SIZE + ((uint64_t) (*((int32_t*)(value+1))))); // TODO warning
|
||||
|
||||
next = (avl->header)->root_idx;
|
||||
while (next != -1)
|
||||
{
|
||||
current_node = (avl->tree)+next;
|
||||
|
||||
// Compare value with value of current node
|
||||
to_compare = obi_avl_get(avl, current_node->value);
|
||||
comp = byte_array_compare(to_compare, value);
|
||||
//to_compare = obi_avl_get(avl, current_node->value);
|
||||
//comp = byte_array_compare(to_compare, value);
|
||||
|
||||
comp = (current_node->crc64) - crc;
|
||||
|
||||
if (comp == 0)
|
||||
{ // check if really same value
|
||||
to_compare = obi_avl_get(avl, current_node->value);
|
||||
comp = byte_array_compare(to_compare, value);
|
||||
}
|
||||
|
||||
if (comp > 0)
|
||||
// Go to left child
|
||||
@ -1756,8 +1777,10 @@ index_t obi_avl_find(OBIDMS_avl_p avl, byte_t* value)
|
||||
// Go to right child
|
||||
next = current_node->right_child;
|
||||
else if (comp == 0)
|
||||
// Value found
|
||||
{ // Value found
|
||||
fprintf(stderr, "\n>>>ALREADY IN in find, %s, %lld\n", obi_obibytes_to_seq(value), (avl->header)->nb_items);
|
||||
return current_node->value;
|
||||
}
|
||||
}
|
||||
// Value not found
|
||||
return -1;
|
||||
|
39
src/obiavl.h
39
src/obiavl.h
@ -24,8 +24,13 @@
|
||||
|
||||
#include "obidms.h"
|
||||
#include "obitypes.h"
|
||||
#include "bloom.h"
|
||||
|
||||
|
||||
#define NODE_COUNT_PER_AVL (10000000)
|
||||
|
||||
#define BLOOM_FILTER_ERROR_RATE (0.001)
|
||||
|
||||
#define AVL_MAX_NAME (1024) /**< The maximum length of an AVL tree name.
|
||||
*/
|
||||
#define AVL_GROWTH_FACTOR (2) /**< The growth factor when an AVL tree is enlarged.
|
||||
@ -39,6 +44,8 @@
|
||||
#define BYTE_ARRAY_HEADER_SIZE (9) /**< The size of the header of a byte array.
|
||||
*/
|
||||
|
||||
typedef struct bloom bloom_t;
|
||||
|
||||
|
||||
/**
|
||||
* @brief AVL tree node structure.
|
||||
@ -48,10 +55,11 @@ typedef struct AVL_node {
|
||||
*/
|
||||
index_t right_child; /**< Index of right greater child node.
|
||||
*/
|
||||
int8_t balance_factor; /**< Balance factor of the node.
|
||||
int8_t balance_factor; /**< Balance factor of the node.
|
||||
*/
|
||||
index_t value; /**< Index of the value associated with the node in the data array.
|
||||
*/
|
||||
uint64_t crc64; // TODO
|
||||
} AVL_node_t, *AVL_node_p;
|
||||
|
||||
|
||||
@ -103,6 +111,7 @@ typedef struct OBIDMS_avl_header {
|
||||
*/
|
||||
time_t creation_date; /**< Date of creation of the file.
|
||||
*/
|
||||
bloom_t bloom_filter;
|
||||
} OBIDMS_avl_header_t, *OBIDMS_avl_header_p;
|
||||
|
||||
|
||||
@ -132,9 +141,28 @@ typedef struct OBIDMS_avl {
|
||||
*/
|
||||
size_t counter; /**< Indicates by how many threads/programs (TODO) the AVL tree is used.
|
||||
*/
|
||||
int avl_fd;
|
||||
int data_fd;
|
||||
} OBIDMS_avl_t, *OBIDMS_avl_p;
|
||||
|
||||
|
||||
/**
|
||||
* @brief OBIDMS AVL tree group structure.
|
||||
*/
|
||||
typedef struct OBIDMS_avl_group {
|
||||
// TODO put each group in a directory later
|
||||
OBIDMS_avl_p sub_avls[64]; // TODO macro for max
|
||||
int current_avl_idx;
|
||||
char avl_name[AVL_MAX_NAME+1];
|
||||
OBIDMS_p dms;
|
||||
} OBIDMS_avl_group_t, *OBIDMS_avl_group_p;
|
||||
|
||||
|
||||
OBIDMS_avl_group_p obi_create_avl_group(OBIDMS_p dms, const char* avl_name);
|
||||
index_t insert_in_avl_group(OBIDMS_avl_group_p avl_group, byte_t* value);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @brief Checks if an AVL tree already exists or not.
|
||||
*
|
||||
@ -337,14 +365,11 @@ byte_t* obi_seq_to_obibytes(char* seq);
|
||||
* @since November 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
const char* obi_obibytes_to_seq(byte_t* value_b);
|
||||
const char* obi_obibytes_to_seq(byte_t* value_b); // TODO move to encode source files
|
||||
|
||||
|
||||
OBIDMS_avl_p* obi_create_avl_in_64_parts(OBIDMS_p dms, const char* avl_name);
|
||||
|
||||
typedef uint8_t crc;
|
||||
|
||||
crc compute_crc(const char* s);
|
||||
// TODO
|
||||
byte_t* obi_avl_group_get(OBIDMS_avl_group_p avl_group, index_t idx);
|
||||
|
||||
|
||||
#endif /* OBIAVL_H_ */
|
||||
|
@ -105,7 +105,8 @@ static char* build_directory_name(const char* dms_name)
|
||||
char* directory_name;
|
||||
|
||||
// Build the database directory name
|
||||
if (asprintf(&directory_name, "%s.obidms", dms_name) < 0)
|
||||
directory_name = (char*) malloc((strlen(dms_name) + 8)*sizeof(char));
|
||||
if (sprintf(directory_name, "%s.obidms", dms_name) < 0)
|
||||
{
|
||||
obi_set_errno(OBIDMS_MEMORY_ERROR);
|
||||
obidebug(1, "\nProblem building an OBIDMS directory name");
|
||||
@ -130,7 +131,8 @@ static char* build_infos_file_name(const char* dms_name)
|
||||
char* file_name;
|
||||
|
||||
// Build file name
|
||||
if (asprintf(&file_name, "%s_infos", dms_name) < 0)
|
||||
file_name = (char*) malloc((strlen(dms_name) + 7)*sizeof(char));
|
||||
if (sprintf(file_name, "%s_infos", dms_name) < 0)
|
||||
{
|
||||
obi_set_errno(OBIDMS_MEMORY_ERROR);
|
||||
obidebug(1, "\nProblem building an informations file name");
|
||||
@ -391,7 +393,7 @@ OBIDMS_p obi_open_dms(const char* dms_name)
|
||||
dms->little_endian = little_endian_dms;
|
||||
|
||||
// Open the AVL trees directory
|
||||
dms->avl_directory = private_opendirat(dms->dir_fd, AVL_TREES_DIR_NAME);
|
||||
dms->avl_directory = opendir_in_dms(dms, AVL_TREES_DIR_NAME);
|
||||
if (dms->avl_directory == NULL)
|
||||
{
|
||||
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
|
||||
|
@ -379,8 +379,9 @@ OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, boo
|
||||
|
||||
buffer_size = 2048; // TODO
|
||||
|
||||
main_taxonomy_dir_path = get_full_path(dms->dir_fd, TAXONOMY_DIR_NAME);
|
||||
if (asprintf(&taxonomy_path, "%s/%s/%s", main_taxonomy_dir_path, taxonomy_name, taxonomy_name) < 0)
|
||||
main_taxonomy_dir_path = get_full_path(dms, TAXONOMY_DIR_NAME);
|
||||
taxonomy_path = (char*) malloc((strlen(main_taxonomy_dir_path) + strlen(taxonomy_name) + strlen(taxonomy_name) + 3)*sizeof(char));
|
||||
if (sprintf(taxonomy_path, "%s/%s/%s", main_taxonomy_dir_path, taxonomy_name, taxonomy_name) < 0)
|
||||
{
|
||||
free(main_taxonomy_dir_path);
|
||||
obi_close_taxonomy(tax);
|
||||
|
@ -156,9 +156,12 @@ static index_t get_line_count_per_page(OBIType_t data_type, index_t nb_elements_
|
||||
static char* build_column_file_name(const char* column_name, obiversion_t version_number)
|
||||
{
|
||||
char* file_name;
|
||||
int version_number_length;
|
||||
|
||||
// Build the file name
|
||||
if (asprintf(&file_name,"%s@%d.odc", column_name, version_number) < 0)
|
||||
version_number_length = (version_number == 0 ? 1 : (int)(log10(version_number)+1));
|
||||
file_name = (char*) malloc((strlen(column_name) + version_number_length + 6)*sizeof(char)); // TODO check the mallocs...
|
||||
if (sprintf(file_name,"%s@%d.odc", column_name, version_number) < 0)
|
||||
{
|
||||
obi_set_errno(OBICOL_MEMORY_ERROR);
|
||||
obidebug(1, "\nError building a column file name");
|
||||
@ -174,7 +177,8 @@ static char* build_version_file_name(const char* column_name)
|
||||
char* file_name;
|
||||
|
||||
// Build the file name
|
||||
if (asprintf(&file_name,"%s.odv", column_name) < 0)
|
||||
file_name = (char*) malloc((strlen(column_name) + 5)*sizeof(char));
|
||||
if (sprintf(file_name,"%s.odv", column_name) < 0)
|
||||
{
|
||||
obi_set_errno(OBICOL_MEMORY_ERROR);
|
||||
obidebug(1, "\nError building a version file name");
|
||||
@ -521,7 +525,6 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
|
||||
OBIDMS_column_p new_column;
|
||||
OBIDMS_column_directory_p column_directory;
|
||||
OBIDMS_column_header_p header;
|
||||
OBIDMS_avl_p* avl;
|
||||
size_t file_size;
|
||||
obiversion_t version_number;
|
||||
char* column_file_name;
|
||||
@ -723,16 +726,15 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
|
||||
// If the data type is OBI_STR or OBI_SEQ, the associated obi_avl is opened or created
|
||||
if ((returned_data_type == OBI_STR) || (returned_data_type == OBI_SEQ))
|
||||
{
|
||||
avl = obi_create_avl_in_64_parts(dms, avl_name);
|
||||
if (avl == NULL)
|
||||
{
|
||||
obidebug(1, "\nError opening or creating the aVL tree associated with a column");
|
||||
munmap(new_column->header, header_size);
|
||||
close(column_file_descriptor);
|
||||
free(new_column);
|
||||
return NULL;
|
||||
}
|
||||
memcpy(new_column->avl, avl, 64*sizeof(OBIDMS_avl_p));
|
||||
new_column->avl = obi_create_avl_group(dms, avl_name);
|
||||
// if (avl == NULL) TODO
|
||||
// {
|
||||
// obidebug(1, "\nError opening or creating the aVL tree associated with a column");
|
||||
// munmap(new_column->header, header_size);
|
||||
// close(column_file_descriptor);
|
||||
// free(new_column);
|
||||
// return NULL;
|
||||
// }
|
||||
strncpy(header->avl_name, avl_name, AVL_MAX_NAME);
|
||||
}
|
||||
|
||||
@ -756,11 +758,11 @@ OBIDMS_column_p obi_open_column(OBIDMS_p dms,
|
||||
{
|
||||
OBIDMS_column_p column;
|
||||
OBIDMS_column_directory_p column_directory;
|
||||
OBIDMS_avl_p avl;
|
||||
char* column_file_name;
|
||||
int column_file_descriptor;
|
||||
size_t header_size;
|
||||
size_t i;
|
||||
OBIDMS_avl_p avl;
|
||||
|
||||
column = NULL;
|
||||
|
||||
|
@ -98,7 +98,7 @@ typedef struct OBIDMS_column {
|
||||
*/
|
||||
OBIDMS_column_header_p header; /**< A pointer to the header of the column.
|
||||
*/
|
||||
OBIDMS_avl_p avl[64]; /**< A pointer to the group of AVL trees associated with the column if there is one.
|
||||
OBIDMS_avl_group_p avl; /**< TODO A pointer to the group of AVL trees associated with the column if there is one.
|
||||
*/
|
||||
void* data; /**< A `void` pointer to the beginning of the data.
|
||||
*
|
||||
|
@ -61,13 +61,13 @@ int obi_column_set_obiseq_with_elt_idx(OBIDMS_column_p column, index_t line_nb,
|
||||
if (value_b == NULL)
|
||||
return -1;
|
||||
|
||||
if (strlen(value_b) == 0)
|
||||
fprintf(stderr, "\nPOUIC");
|
||||
//if (strlen(value_b) == 0)
|
||||
// fprintf(stderr, "\nPOUIC");
|
||||
|
||||
//fprintf(stderr, "\n>%s||%s", value, obi_obibytes_to_seq(value_b));
|
||||
|
||||
// Add in the AVL tree
|
||||
idx = obi_avl_add((column->avl)[compute_crc(value)], value_b);
|
||||
idx = insert_in_avl_group(column->avl, value_b);
|
||||
if (idx == -1)
|
||||
return -1;
|
||||
|
||||
@ -135,7 +135,8 @@ const char* obi_column_get_obiseq_with_elt_idx(OBIDMS_column_p column, index_t l
|
||||
if (idx == OBIIdx_NA)
|
||||
return OBISeq_NA;
|
||||
|
||||
//value_b = obi_avl_get((column->avl)[crc(value)], idx);
|
||||
value_b = obi_avl_group_get(column->avl, idx);
|
||||
|
||||
return obi_obibytes_to_seq(value_b);
|
||||
}
|
||||
|
||||
|
@ -61,7 +61,7 @@ int obi_column_set_obistr_with_elt_idx(OBIDMS_column_p column, index_t line_nb,
|
||||
return -1;
|
||||
|
||||
// Add in the AVL tree
|
||||
idx = obi_avl_add((column->avl)[compute_crc(value)], value_b);
|
||||
idx = insert_in_avl_group(column->avl, value_b);
|
||||
if (idx == -1)
|
||||
return -1;
|
||||
|
||||
@ -129,7 +129,8 @@ const char* obi_column_get_obistr_with_elt_idx(OBIDMS_column_p column, index_t l
|
||||
if (idx == OBIIdx_NA)
|
||||
return OBIStr_NA;
|
||||
|
||||
//value_b = obi_avl_get(column->avl, idx);
|
||||
value_b = obi_avl_group_get(column->avl, idx);
|
||||
|
||||
return obi_obibytes_to_str(value_b);
|
||||
}
|
||||
|
||||
|
@ -65,7 +65,8 @@ static char* build_column_directory_name(const char* column_name)
|
||||
char* column_directory_name;
|
||||
|
||||
// Build the database directory name
|
||||
if (asprintf(&column_directory_name, "%s.obicol", column_name) < 0)
|
||||
column_directory_name = (char*) malloc((strlen(column_name) + 8)*sizeof(char));
|
||||
if (sprintf(column_directory_name, "%s.obicol", column_name) < 0)
|
||||
{
|
||||
obi_set_errno(OBICOLDIR_MEMORY_ERROR);
|
||||
obidebug(1, "\nError building a column directory name");
|
||||
@ -104,7 +105,7 @@ int obi_column_directory_exists(OBIDMS_p dms, const char* column_name)
|
||||
return -1;
|
||||
|
||||
// Get the full path for the column directory
|
||||
full_path = get_full_path(dms->dir_fd, column_directory_name);
|
||||
full_path = get_full_path(dms, column_directory_name);
|
||||
if (full_path == NULL)
|
||||
{
|
||||
obi_set_errno(OBICOLDIR_UNKNOWN_ERROR);
|
||||
@ -169,7 +170,7 @@ OBIDMS_column_directory_p obi_open_column_directory(OBIDMS_p dms, const char* co
|
||||
return NULL;
|
||||
|
||||
// Try to open the column directory
|
||||
directory = private_opendirat(dms->dir_fd, column_directory_name);
|
||||
directory = opendir_in_dms(dms, column_directory_name);
|
||||
if (directory == NULL) {
|
||||
switch (errno)
|
||||
{
|
||||
|
@ -84,7 +84,8 @@ static char* build_obiview_file_name()
|
||||
char* file_name;
|
||||
|
||||
// Build file name
|
||||
if (asprintf(&file_name, OBIVIEW_FILE_NAME) < 0)
|
||||
file_name = (char*) malloc((strlen(OBIVIEW_FILE_NAME) + 1)*sizeof(char));
|
||||
if (sprintf(file_name, OBIVIEW_FILE_NAME) < 0)
|
||||
{
|
||||
obi_set_errno(OBIVIEW_ERROR);
|
||||
obidebug(1, "\nProblem building an obiview file name");
|
||||
@ -984,7 +985,7 @@ int obi_save_view(Obiview_p view)
|
||||
return -1;
|
||||
|
||||
// Get the full path for the column directory
|
||||
full_path = get_full_path((view->dms)->dir_fd, view_file_name);
|
||||
full_path = get_full_path(view->dms, view_file_name);
|
||||
if (full_path == NULL)
|
||||
{
|
||||
obi_set_errno(OBIVIEW_ERROR);
|
||||
|
@ -17,16 +17,18 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <dirent.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "private_at_functions.h"
|
||||
#include "obidebug.h"
|
||||
#include "obierrno.h"
|
||||
#include "obidms.h"
|
||||
|
||||
|
||||
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
|
||||
|
||||
|
||||
char* get_full_path(int directory_file_descriptor, const char* path_name)
|
||||
char* get_full_path(OBIDMS_p dms, const char* path_name)
|
||||
{
|
||||
char* full_path;
|
||||
|
||||
@ -37,26 +39,28 @@ char* get_full_path(int directory_file_descriptor, const char* path_name)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (fcntl(directory_file_descriptor, F_GETPATH, full_path) < 0)
|
||||
if (getcwd(full_path, MAX_PATH_LEN) == NULL)
|
||||
{
|
||||
obidebug(1, "\nError getting the path to a file or directory");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// TODO check errors?
|
||||
strlcat(full_path, "/", MAX_PATH_LEN);
|
||||
strlcat(full_path, path_name, MAX_PATH_LEN);
|
||||
strcat(full_path, "/");
|
||||
strcat(full_path, dms->directory_name);
|
||||
strcat(full_path, "/");
|
||||
strcat(full_path, path_name);
|
||||
|
||||
return full_path;
|
||||
}
|
||||
|
||||
|
||||
DIR* private_opendirat(int directory_file_descriptor, const char* path_name)
|
||||
DIR* opendir_in_dms(OBIDMS_p dms, const char* path_name)
|
||||
{
|
||||
char* full_path;
|
||||
DIR* directory;
|
||||
|
||||
full_path = get_full_path(directory_file_descriptor, path_name);
|
||||
full_path = get_full_path(dms, path_name);
|
||||
if (full_path == NULL)
|
||||
return NULL;
|
||||
|
||||
|
@ -16,6 +16,7 @@
|
||||
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "obidms.h"
|
||||
|
||||
#define MAX_PATH_LEN 4096 /**< Maximum length for the character string defining a
|
||||
file or directory path */
|
||||
@ -37,7 +38,7 @@
|
||||
* @since June 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
char* get_full_path(int directory_file_descriptor, const char* path_name);
|
||||
char* get_full_path(OBIDMS_p dms, const char* path_name);
|
||||
|
||||
|
||||
/**
|
||||
@ -52,7 +53,7 @@ char* get_full_path(int directory_file_descriptor, const char* path_name);
|
||||
* @since June 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
DIR* private_opendirat(int directory_file_descriptor, const char* path_name);
|
||||
DIR* opendir_in_dms(OBIDMS_p dms, const char* path_name);
|
||||
|
||||
|
||||
#endif /* PRIVATEOPENAT_H_ */
|
||||
|
Reference in New Issue
Block a user