Compare commits

...

67 Commits

Author SHA1 Message Date
02d67c257f The default name of an AVL is now the column name + '_indexer', and when
an AVL is opened (as opposed to created), it is read-only
2016-04-15 12:55:26 +02:00
e04ea85d1e Fixed problematic __str__ method and useless declarations in the
OBI_Nuc_Seq_Stored class
2016-04-15 11:22:05 +02:00
527d3555f0 Moved the functions getting full paths for files and directories to
obidms.c/.h files
2016-04-15 11:11:13 +02:00
71492ad229 Made the handling of listing and unlisting opened columns and indexers
functions in the obidms files.
2016-04-15 10:49:12 +02:00
73d64e5aff Renamed 'unmap_header' function to 'close_header' 2016-04-14 15:19:27 +02:00
4cb52e1632 Made the truncating of columns automatic when closing them (note:
already the case for AVLs)
2016-04-14 15:13:30 +02:00
9d042f7bd0 Refactored and relocated the set and get functions of all column types,
both within and out of the context of a view
2016-04-13 15:10:24 +02:00
5ec2d8842e Character string indexer API 2016-04-12 17:21:01 +02:00
04c9470f7d Fixed and cleaned DNA_seq_indexer API 2016-04-12 17:20:24 +02:00
be05c889e2 DNA_seq_indexer API 2016-04-12 16:38:47 +02:00
04e3a7b5a9 Added more references in cython .cfiles files because it seems necessary
for linux distributions
2016-04-12 15:10:54 +02:00
d8107533d8 Obiblob_indexer API 2016-04-12 14:53:33 +02:00
cd4e65e190 Fixed typo and includes in obiblob files 2016-04-12 14:52:27 +02:00
375bfcce8a Renamed "Obi_byte_arrays" to "Obiblobs" and moved Obiblob functions to
separate obiblob.c and obiblob.h files
2016-04-12 11:21:14 +02:00
c225cfd8b6 Fixed bug with retrieval of values from AVLs (bad cast in byte array
structure)
2016-04-11 17:07:22 +02:00
966b1325ed Deleted declaration of obsolete public function 2016-04-11 11:14:20 +02:00
019dfc01b4 Branch to refactor and debug (AVLs bugged) 2016-04-08 15:38:57 +02:00
edc4fd7b3e Fixed minor warning 2016-03-25 16:11:52 +01:00
ff6c27acf2 Implemented the retrieval of values with groups of AVLs 2016-03-25 15:35:16 +01:00
69856f18dd untested (and no possible retrieval) of CRC used to represent data in
AVL trees
2016-03-24 16:38:11 +01:00
58ac860cc7 Added macro for the bloom filter parameters and deleted old unused
macros for crc
2016-03-23 13:33:40 +01:00
d44117d625 obiimport function for testing purposes 2016-03-23 13:00:02 +01:00
6bd42132c4 Minor fixes to silence warnings and replaced two asprintf uses 2016-03-23 12:58:53 +01:00
4085904362 Merge branch 'multiple_avls_bloom' 2016-03-22 14:14:10 +01:00
b04b4b5902 made POSIX compliant 2016-03-21 11:33:06 +01:00
383e738ab7 Merge branch 'master' of git@git.metabarcoding.org:obitools/obitools3.git 2016-03-18 15:49:53 +01:00
3681cecb4d Multiple AVLs with bloom filters (very raw test version) 2016-03-18 11:06:02 +01:00
545ed8111a Code for tests storing data in multiple AVLs.
(note: unretrievable data as implemented)
2016-03-11 15:34:55 +01:00
86071d30c9 Minor improvement in AVL initial size calculation 2016-03-11 14:07:40 +01:00
21d1b2ed3e First implementation of taxonomy reading 2016-03-11 13:56:38 +01:00
6157633137 prototype for the obi unix command and the count sub command 2016-03-08 16:06:00 +01:00
a08def47e6 It is now impossible to create a view with a name identical to one of an
existing written view
2016-03-01 13:36:54 +01:00
fc5a12bad7 Closes #34 2016-02-29 17:56:55 +01:00
e323d8e702 Cython classes for nucleotide sequences (outside or in the context of a
view)
2016-02-29 16:33:30 +01:00
b350ea0393 Fixed minor error 2016-02-29 16:28:34 +01:00
8e9e21a02e Increased the maximum depth of AVL trees 2016-02-29 16:27:23 +01:00
4df313c54a Added Obiviews specialized for the handling of nucleotide sequences 2016-02-25 09:43:27 +01:00
ffc68d448f Deleted a forgotten print statement 2016-02-18 15:15:42 +01:00
a8f03248a8 Major update : views 2016-02-18 10:38:51 +01:00
cfaf069095 Fixed more typos and formatting imperfections. 2015-12-11 17:37:25 +01:00
a6144eabe2 Fixed typos 2015-12-11 17:26:20 +01:00
c139367555 DNA sequences and character strings are now handled using AVL trees. 2015-12-11 17:24:44 +01:00
1586956d57 Added the lists of opened columns and arrays in the OBIDMS structure,
and a counter in the OBIDMS column structure; fixed some bugs and
created tests for referring columns that are bound to disappear anyway.
2015-12-02 17:32:07 +01:00
b45b496b0e Major update: new type of columns containing indices referring to lines
in other columns
2015-11-29 11:57:07 +01:00
2cf10cb6f0 Column type is now passed as a character string when creating the column
(either 'OBI_INT', 'OBI_FLOAT', 'OBI_BOOL', 'OBI_CHAR', 'OBI_STR' or
'OBI_SEQ')
2015-11-23 15:48:27 +01:00
5a5516303d deleting useless .pyc files 2015-11-23 14:43:34 +01:00
d6a99bafea Fixed a major bug with the versioning of columns that was introduced in
f6ec8ba9
2015-11-23 13:34:51 +01:00
08f2657e18 Increased maximum line count of columns to 1^9 2015-11-23 13:23:18 +01:00
6aa2f92930 DNA sequences are now encoded on 4 bits when they are in IUPAC 2015-11-20 15:32:09 +01:00
87044b41d8 modified the encoding function on 2 bits a little 2015-11-20 11:32:47 +01:00
6ab1c83302 New column type for DNA sequences. Only for those coded on 2 bits (only
'ATGCatgc') for now.
2015-11-19 18:12:48 +01:00
e371248567 changed version to 0.0.0 2015-11-19 18:11:21 +01:00
dbf9463238 The endianness of a DMS is now stored in the OBIDMS structure 2015-11-18 15:35:09 +01:00
eb12af4da4 Fixed minor error in the documentation of a function. 2015-11-16 15:38:01 +01:00
e8417b4f6f The endianness of an OBIDMS is now stored in an informations file that
is read when opening the OBIDMS.
2015-11-16 14:37:51 +01:00
6579566c6e Minor changes in code to improve readability and fix C compilation
warnings
2015-11-10 14:37:58 +01:00
410e2e02a0 When retrieving the header of a column, the version number of the column
wanted can now be provided.
2015-11-10 13:30:10 +01:00
8ce4f264aa When enlarging a column, the function doesn't try anymore to keep the
mapped region at the same pointer (never works), and unmap/remap
instead.
2015-11-10 13:18:36 +01:00
d885eb48ff The header size when creating a column is now calculated according to
the size of the header structure and the page size of the platform.
2015-11-10 13:09:30 +01:00
661fe3606a In OBI_CHAR columns, characters are now given and retrieved as decoded
(unicode) characters.
2015-11-10 11:24:08 +01:00
c4b7e579cf Comments in column headers are now working. 2015-11-10 10:56:45 +01:00
f6ec8ba963 The header size is now directly read in the file when a column or an
array is opened.
2015-11-09 17:50:32 +01:00
0e3d6ed2d7 Methods __len__ (number of lines used) and __sizeof__ (total size in
bytes) implemented for columns.
2015-11-09 15:56:20 +01:00
01bfc14503 The data size in bytes is now stored in the header of a column. 2015-11-09 15:55:00 +01:00
65c1b1e8b2 Minor changes to make the creation of files and directories cleaner 2015-11-09 15:22:01 +01:00
b37bd8f21c File descriptors for dms, column and array directories are now stored in
structures.
2015-11-09 15:06:02 +01:00
05e3956a0c Minor changes in code to improve readability (freeing some character
strings earlier)
2015-11-09 11:22:51 +01:00
90 changed files with 11281 additions and 3189 deletions

228
python/obi.py Normal file
View File

@ -0,0 +1,228 @@
#!/usr/local/bin/python3.4
'''
obi -- shortdesc
obi is a description
It defines classes_and_methods
@author: user_name
@copyright: 2014 organization_name. All rights reserved.
@license: license
@contact: user_email
@deffield updated: Updated
'''
import sys
import pkgutil
import argparse
import logging
import json
default_config = {
'obi' : { 'log' : True,
'loglevel' : 'INFO',
'version' : False,
'progress' : True
}
}
from obitools3 import command
from obitools3.version import version
__all__ = []
__version__ = version
__date__ = '2014-09-28'
__updated__ = '2014-09-28'
DEBUG = 1
TESTRUN = 0
PROFILE = 0
def loadCommand(name,loader):
'''
Load a command module from its name and an ImpLoader
This function is for internal use
@param name: name of the module
@type name: str
@param loader: the module loader
@type loader: ImpLoader
@return the loaded module
@rtype: module
'''
module = loader.find_module(name).load_module(name)
return module
def getCommandsList():
'''
Returns the list of sub-commands available to the main `obi` command
@return: a dict instance with key corresponding to each command and
value corresponding to the module
@rtype: dict
'''
cmds = dict((x[1],loadCommand(x[1],x[0]))
for x in pkgutil.iter_modules(command.__path__)
if not x[2])
return cmds
def getLogger(config):
'''
Returns the logger as defined by the command line option
or by the config file
:param config:
'''
output = config['obi']['outputfilename']
level = config['obi']['loglevel']
logfile= config['obi']['log']
rootlogger = logging.getLogger()
logFormatter = logging.Formatter("%(asctime)s [%(levelname)-5.5s] %(message)s")
stderrHandler = logging.StreamHandler(sys.stderr)
stderrHandler.setFormatter(logFormatter)
rootlogger.addHandler(stderrHandler)
if logfile:
fileHandler = logging.FileHandler("%s.log" % output)
fileHandler.setFormatter(logFormatter)
rootlogger.addHandler(fileHandler)
try:
loglevel = getattr(logging, level)
except:
loglevel = logging.INFO
rootlogger.setLevel(loglevel)
config['obi']['logger']=rootlogger
return rootlogger
class ObiParser(argparse.ArgumentParser):
def error(self, message):
sys.stderr.write('error: %s\n' % message)
self.print_help()
sys.exit(2)
def buildArgumentParser():
parser = ObiParser()
parser.add_argument('--version', dest='obi:version',
action='store_true',
default=False,
help='Print the version of the OBITools')
parser.add_argument('--no-log', dest='obi:log',
action='store_false',
default=None,
help='Do not create a logfile for the data analyze')
parser.add_argument('--no-progress', dest='obi:progress',
action='store_false',
default=None,
help='Do not print the progress bar during analyzes')
subparsers = parser.add_subparsers(title='subcommands',
description='valid subcommands',
help='additional help')
commands = getCommandsList()
for c in commands:
module = commands[c]
if hasattr(module, "run"):
if hasattr(module, "__title__"):
sub = subparsers.add_parser(c,help=module.__title__)
else:
sub = subparsers.add_parser(c)
if hasattr(module, "addOptions"):
module.addOptions(sub)
sub.set_defaults(**{'obi:module' : module})
return parser
def buildDefaultConfiguration():
global default_config
commands = getCommandsList()
for c in commands:
module = commands[c]
assert hasattr(module, "run")
if hasattr(module, 'default_config'):
default_config[c]=module.default_config
else:
default_config[c]={}
return default_config
def getConfiguration():
global default_config
if '__done__' in default_config:
return default_config
parser = buildArgumentParser()
options = vars(parser.parse_args())
config = buildDefaultConfiguration()
for k in options:
section,key = k.split(':')
s = config[section]
if options[k] is not None:
s[key]=options[k]
if config['obi']['version']:
print("The OBITools - Version %s" % __version__)
sys.exit(0)
if not 'module' in config['obi']:
print('\nError: No obi command specified',file=sys.stderr)
parser.print_help()
sys.exit(2)
if config['obi']['outputfilename'] is None:
config['obi']['outputfilename']=config['obi']['indexfilename']
getLogger(config)
config['__done__']=True
return config
if __name__ =="__main__":
config = getConfiguration()
config['obi']['module'].run(config)

Binary file not shown.

View File

View File

@ -0,0 +1,36 @@
'''
Created on 8 mars 2016
@author: coissac
'''
__title__="Counts sequences in a sequence set"
default_config = { 'countmode' : None
}
def addOptions(parser):
parser.add_argument(dest='obi:input', metavar='obi:input',
nargs='?',
default=None,
help='input data set' )
group=parser.add_option_group('Obicount specific options')
group.add_option('-s','--sequence',
action="store_true", dest="count:sequence",
default=False,
help="Prints only the number of sequence records."
)
group.add_option('-a','--all',
action="store_true", dest="count:all",
default=False,
help="Prints only the total count of sequence records (if a sequence has no `count` attribute, its default count is 1) (default: False)."
)
def run(config):
# The code of my command
pass

View File

@ -1,5 +1,28 @@
../../../src/bloom.h
../../../src/bloom.c
../../../src/char_str_indexer.h
../../../src/char_str_indexer.c
../../../src/crc64.h
../../../src/crc64.c
../../../src/dna_seq_indexer.h
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/MurmurHash2.h
../../../src/murmurhash2.c
../../../src/obiavl.h
../../../src/obiavl.c
../../../src/obiblob_indexer.h
../../../src/obiblob_indexer.c
../../../src/obiblob.h
../../../src/obiblob.c
../../../src/obidebug.h
../../../src/obidms_taxonomy.h
../../../src/obidms_taxonomy.c
../../../src/obidms.h
../../../src/obidms.c
../../../src/obidmscolumn_idx.h
../../../src/obidmscolumn_idx.c
../../../src/obidmscolumn.h
../../../src/obidmscolumn.c
../../../src/obidmscolumndir.h
@ -10,7 +33,19 @@
../../../src/obilittlebigman.c
../../../src/obitypes.h
../../../src/obitypes.c
../../../src/private_at_functions.h
../../../src/private_at_functions.c
../../../src/obiarray.h
../../../src/obiarray.c
../../../src/obiview.h
../../../src/obiview.c
../../../src/utils.h
../../../src/utils.c
../../../src/obidmscolumn_bool.c
../../../src/obidmscolumn_bool.h
../../../src/obidmscolumn_char.c
../../../src/obidmscolumn_char.h
../../../src/obidmscolumn_float.c
../../../src/obidmscolumn_float.h
../../../src/obidmscolumn_int.c
../../../src/obidmscolumn_int.h
../../../src/obidmscolumn_seq.c
../../../src/obidmscolumn_seq.h
../../../src/obidmscolumn_str.c
../../../src/obidmscolumn_str.h

View File

@ -2,10 +2,82 @@
from .capi.obidms cimport OBIDMS_p
from .capi.obidmscolumn cimport OBIDMS_column_p
from .capi.obiview cimport Obiview_p
from .capi.obitypes cimport obiversion_t, OBIType_t, index_t
from ._obitaxo cimport OBI_Taxonomy
cdef class OBIDMS_column:
cdef OBIDMS_column_p* pointer
cdef OBIDMS dms
cdef Obiview_p view
cdef str data_type
cdef str dms_name
cdef str column_name
cdef index_t nb_elements_per_line
cdef list elements_names
cpdef update_pointer(self)
cpdef list get_elements_names(self)
cpdef str get_data_type(self)
cpdef index_t get_nb_lines_used(self)
cpdef str get_creation_date(self)
cpdef str get_comments(self)
cpdef close(self)
@staticmethod
cdef object get_subclass_type(OBIDMS_column_p column_p)
cdef class OBIDMS_column
cdef class OBIDMS_column_multi_elts(OBIDMS_column):
cpdef set_line(self, index_t line_nb, dict values)
cdef class OBIDMS_column_line:
cdef OBIDMS_column column
cdef index_t index
cdef class OBIView:
cdef Obiview_p pointer
cdef str name
cdef str comments
cdef dict columns
cdef dict columns_pp # TODO this dict might be unnecessary
cdef OBIDMS dms
cpdef delete_column(self, str column_name)
cpdef add_column(self,
str column_name,
obiversion_t version_number=*,
str type=*,
index_t nb_lines=*,
index_t nb_elements_per_line=*,
list elements_names=*,
str indexer_name=*,
str comments=*,
bint create=*
)
cpdef select_line(self, index_t line_nb)
cpdef select_lines(self, list line_selection)
cpdef save_and_close(self)
cdef class OBIView_NUC_SEQS(OBIView):
cdef OBIDMS_column ids
cdef OBIDMS_column sequences
cdef OBIDMS_column descriptions
cpdef delete_column(self, str column_name)
cdef class OBIView_line :
cdef index_t index
cdef OBIView view
cdef class OBIDMS:
@ -13,35 +85,10 @@ cdef class OBIDMS:
cdef OBIDMS_p pointer
cdef str dms_name
cpdef dict list(self)
cpdef close(self)
cpdef OBIDMS_column open_column(self,
str column_name,
bint create=*,
bint clone=*, bint clone_data=*,
obiversion_t version_number=*,
OBIType_t data_type=*,
index_t nb_lines=*,
index_t nb_elements_per_line=*,
list elements_names=*,
str array_name=*)
cdef class OBIDMS_column:
cdef OBIDMS_column_p pointer
cdef OBIDMS dms
cdef str data_type # TODO keep as OBIType_t? both?
cdef str dms_name
cdef str column_name
cdef index_t nb_elements_per_line
cdef list elements_names
# cpdef object get_item(self, index_t line_nb, str element_name) TODO can't declare because not the same in all subclasses
# cpdef set_item(self, index_t line_nb, str element_name, object value) TODO can't declare because object value
cpdef list get_elements_names(self)
cpdef str get_data_type(self)
cpdef index_t get_nb_lines_used(self)
cpdef str get_creation_date(self)
cpdef close(self)
cpdef OBI_Taxonomy open_taxonomy(self, str taxo_name)
cpdef OBIView open_view(self, str view_name)
cpdef OBIView new_view(self, str view_name, object view_to_clone=*, list line_selection=*, str view_type=*, str comments=*)
cpdef dict read_view_infos(self, str view_name)
cpdef dict read_views(self)

File diff suppressed because it is too large Load Diff

View File

@ -1,18 +1,41 @@
../../../src/obidmscolumn_bool.c
../../../src/obidmscolumn_bool.h
../../../src/bloom.h
../../../src/bloom.c
../../../src/char_str_indexer.h
../../../src/char_str_indexer.c
../../../src/crc64.h
../../../src/crc64.c
../../../src/dna_seq_indexer.h
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/MurmurHash2.h
../../../src/murmurhash2.c
../../../src/obiavl.h
../../../src/obiavl.c
../../../src/obiblob_indexer.h
../../../src/obiblob_indexer.c
../../../src/obiblob.h
../../../src/obiblob.c
../../../src/obidebug.h
../../../src/obidms_taxonomy.h
../../../src/obidms_taxonomy.c
../../../src/obidms.h
../../../src/obidms.c
../../../src/obidmscolumn_idx.h
../../../src/obidmscolumn_idx.c
../../../src/obidmscolumn.h
../../../src/obidmscolumn.c
../../../src/obidmscolumndir.h
../../../src/obidmscolumndir.c
../../../src/obidms.h
../../../src/obidms.c
../../../src/obierrno.h
../../../src/obierrno.c
../../../src/obilittlebigman.h
../../../src/obilittlebigman.c
../../../src/obitypes.h
../../../src/obitypes.c
../../../src/private_at_functions.h
../../../src/private_at_functions.c
../../../src/obiarray.h
../../../src/obiarray.c
../../../src/obiview.h
../../../src/obiview.c
../../../src/utils.h
../../../src/utils.c

View File

@ -1,25 +1,14 @@
#cython: language_level=3
from .capi.obitypes cimport obibool_t, index_t
from ._obidms cimport OBIDMS_column
from .capi.obitypes cimport index_t
from ._obidms cimport OBIDMS_column, OBIDMS_column_multi_elts
cdef class OBIDMS_column_bool(OBIDMS_column):
cpdef object get_line(self, index_t line_nb)
cpdef set_line(self, index_t line_nb, object value)
cpdef close(self)
cdef class OBIDMS_column_bool_writable(OBIDMS_column_bool):
cpdef set_line(self, index_t line_nb, object value)
cpdef close(self)
cdef class OBIDMS_column_bool_multi_elts(OBIDMS_column_bool):
cdef class OBIDMS_column_multi_elts_bool(OBIDMS_column_multi_elts):
cpdef object get_item(self, index_t line_nb, str element_name)
cpdef object get_line(self, index_t line_nb)
cpdef set_item(self, index_t line_nb, str element_name, obibool_t value)
cpdef set_line(self, index_t line_nb, object values)
cdef class OBIDMS_column_bool_multi_elts_writable(OBIDMS_column_bool_multi_elts):
cpdef set_item(self, index_t line_nb, str element_name, obibool_t value)
cpdef set_line(self, index_t line_nb, object values)
cpdef close(self)
cpdef set_item(self, index_t line_nb, str element_name, object value)

View File

@ -1,13 +1,11 @@
#cython: language_level=3
from .capi.obidmscolumn cimport obi_close_column,\
obi_truncate_and_close_column, \
obi_column_get_obibool_with_elt_name, \
obi_column_get_obibool_with_elt_idx, \
obi_column_set_obibool_with_elt_name, \
obi_column_set_obibool_with_elt_idx
from .capi.obiview cimport obi_column_get_obibool_with_elt_name_in_view, \
obi_column_get_obibool_with_elt_idx_in_view, \
obi_column_set_obibool_with_elt_name_in_view, \
obi_column_set_obibool_with_elt_idx_in_view
from .capi.obierrno cimport obi_errno
from .capi.obitypes cimport OBIBool_NA
from .capi.obitypes cimport OBIBool_NA, obibool_t
from obitools3.utils cimport str2bytes
@ -19,7 +17,7 @@ cdef class OBIDMS_column_bool(OBIDMS_column):
cpdef object get_line(self, index_t line_nb):
cdef obibool_t value
cdef object result
value = obi_column_get_obibool_with_elt_idx(self.pointer, line_nb, 0)
value = obi_column_get_obibool_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0)
if obi_errno > 0 :
raise IndexError(line_nb)
if value == OBIBool_NA :
@ -29,30 +27,18 @@ cdef class OBIDMS_column_bool(OBIDMS_column):
return result
cpdef set_line(self, index_t line_nb, object value):
raise Exception("Column is read-only")
cpdef close(self):
if obi_close_column(self.pointer) < 0 :
raise Exception("Problem closing a column")
cdef class OBIDMS_column_bool_writable(OBIDMS_column_bool):
cpdef set_line(self, index_t line_nb, object value):
if obi_column_set_obibool_with_elt_idx(self.pointer, line_nb, 0, <obibool_t> value) < 0:
if value is None :
value = OBIBool_NA
if obi_column_set_obibool_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0, <obibool_t> value) < 0:
raise Exception("Problem setting a value in a column")
cpdef close(self):
if obi_truncate_and_close_column(self.pointer) < 0 :
raise Exception("Problem closing a column")
cdef class OBIDMS_column_bool_multi_elts(OBIDMS_column_bool):
cdef class OBIDMS_column_multi_elts_bool(OBIDMS_column_multi_elts):
cpdef object get_item(self, index_t line_nb, str element_name):
cdef obibool_t value
cdef object result
value = obi_column_get_obibool_with_elt_name(self.pointer, line_nb, str2bytes(element_name))
value = obi_column_get_obibool_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name))
if obi_errno > 0 :
raise IndexError(line_nb, element_name)
if value == OBIBool_NA :
@ -63,42 +49,29 @@ cdef class OBIDMS_column_bool_multi_elts(OBIDMS_column_bool):
cpdef object get_line(self, index_t line_nb) :
cdef obibool_t value
cdef object result
cdef object value_in_result
cdef dict result
cdef index_t i
cdef bint all_NA
result = {}
all_NA = True
for i in range(self.nb_elements_per_line) :
value = obi_column_get_obibool_with_elt_idx(self.pointer, line_nb, i)
value = obi_column_get_obibool_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, i)
if obi_errno > 0 :
raise IndexError(line_nb)
result[self.elements_names[i]] = PyBool_FromLong(value)
if all_NA and (value != OBIBool_NA) :
if value == OBIBool_NA :
value_in_result = None
else :
value_in_result = PyBool_FromLong(value)
result[self.elements_names[i]] = value_in_result
if all_NA and (value_in_result is not None) :
all_NA = False
if all_NA :
result = None
return result
cpdef set_item(self, index_t line_nb, str element_name, obibool_t value):
raise Exception("Column is read-only")
cpdef set_line(self, index_t line_nb, object values):
raise Exception("Column is read-only")
cdef class OBIDMS_column_bool_multi_elts_writable(OBIDMS_column_bool_multi_elts):
cpdef set_item(self, index_t line_nb, str element_name, obibool_t value):
if obi_column_set_obibool_with_elt_name(self.pointer, line_nb, str2bytes(element_name), value) < 0:
cpdef set_item(self, index_t line_nb, str element_name, object value):
if value is None :
value = OBIBool_NA
if obi_column_set_obibool_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name), <obibool_t> value) < 0:
raise Exception("Problem setting a value in a column")
cpdef set_line(self, index_t line_nb, object values):
cdef obibool_t value
for element_name in values :
value = <obibool_t> values[element_name]
self.set_item(line_nb, element_name, value)
cpdef close(self):
if obi_truncate_and_close_column(self.pointer) < 0 :
raise Exception("Problem closing a column")

View File

@ -1,18 +1,41 @@
../../../src/obidmscolumn_char.c
../../../src/obidmscolumn_char.h
../../../src/bloom.h
../../../src/bloom.c
../../../src/char_str_indexer.h
../../../src/char_str_indexer.c
../../../src/crc64.h
../../../src/crc64.c
../../../src/dna_seq_indexer.h
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/MurmurHash2.h
../../../src/murmurhash2.c
../../../src/obiavl.h
../../../src/obiavl.c
../../../src/obiblob_indexer.h
../../../src/obiblob_indexer.c
../../../src/obiblob.h
../../../src/obiblob.c
../../../src/obidebug.h
../../../src/obidms_taxonomy.h
../../../src/obidms_taxonomy.c
../../../src/obidms.h
../../../src/obidms.c
../../../src/obidmscolumn_idx.h
../../../src/obidmscolumn_idx.c
../../../src/obidmscolumn.h
../../../src/obidmscolumn.c
../../../src/obidmscolumndir.h
../../../src/obidmscolumndir.c
../../../src/obidms.h
../../../src/obidms.c
../../../src/obierrno.h
../../../src/obierrno.c
../../../src/obilittlebigman.h
../../../src/obilittlebigman.c
../../../src/obitypes.h
../../../src/obitypes.c
../../../src/private_at_functions.h
../../../src/private_at_functions.c
../../../src/obiarray.h
../../../src/obiarray.c
../../../src/obiview.h
../../../src/obiview.c
../../../src/utils.h
../../../src/utils.c

View File

@ -1,25 +1,14 @@
#cython: language_level=3
from .capi.obitypes cimport obichar_t, index_t
from ._obidms cimport OBIDMS_column
from .capi.obitypes cimport index_t
from ._obidms cimport OBIDMS_column, OBIDMS_column_multi_elts
cdef class OBIDMS_column_char(OBIDMS_column):
cpdef object get_line(self, index_t line_nb)
cpdef set_line(self, index_t line_nb, object value)
cpdef close(self)
cdef class OBIDMS_column_char_writable(OBIDMS_column_char):
cpdef set_line(self, index_t line_nb, object value)
cpdef close(self)
cdef class OBIDMS_column_char_multi_elts(OBIDMS_column_char):
cdef class OBIDMS_column_multi_elts_char(OBIDMS_column_multi_elts):
cpdef object get_item(self, index_t line_nb, str element_name)
cpdef object get_line(self, index_t line_nb)
cpdef set_item(self, index_t line_nb, str element_name, bytes value)
cpdef set_line(self, index_t line_nb, object values)
cdef class OBIDMS_column_char_multi_elts_writable(OBIDMS_column_char_multi_elts):
cpdef set_item(self, index_t line_nb, str element_name, bytes value)
cpdef set_line(self, index_t line_nb, object values)
cpdef close(self)
cpdef set_item(self, index_t line_nb, str element_name, object value)

View File

@ -1,15 +1,13 @@
#cython: language_level=3
from .capi.obidmscolumn cimport obi_close_column,\
obi_truncate_and_close_column, \
obi_column_get_obichar_with_elt_name, \
obi_column_get_obichar_with_elt_idx, \
obi_column_set_obichar_with_elt_name, \
obi_column_set_obichar_with_elt_idx
from .capi.obiview cimport obi_column_get_obichar_with_elt_name_in_view, \
obi_column_get_obichar_with_elt_idx_in_view, \
obi_column_set_obichar_with_elt_name_in_view, \
obi_column_set_obichar_with_elt_idx_in_view
from .capi.obierrno cimport obi_errno
from .capi.obitypes cimport OBIChar_NA
from .capi.obitypes cimport OBIChar_NA, obichar_t
from obitools3.utils cimport str2bytes
from obitools3.utils cimport str2bytes, bytes2str
cdef class OBIDMS_column_char(OBIDMS_column):
@ -17,122 +15,62 @@ cdef class OBIDMS_column_char(OBIDMS_column):
cpdef object get_line(self, index_t line_nb):
cdef obichar_t value
cdef object result
value = obi_column_get_obichar_with_elt_idx(self.pointer, line_nb, 0)
value = obi_column_get_obichar_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0)
if obi_errno > 0 :
raise IndexError(line_nb)
if value == OBIChar_NA :
result = None
else :
result = <bytes> value
result = bytes2str(value)
return result
cpdef set_line(self, index_t line_nb, object value):
raise Exception("Column is read-only")
cpdef close(self):
if obi_close_column(self.pointer) < 0 :
raise Exception("Problem closing a column")
cdef class OBIDMS_column_char_writable(OBIDMS_column_char):
cpdef set_line(self, index_t line_nb, object value):
if obi_column_set_obichar_with_elt_idx(self.pointer, line_nb, 0, <bytes> value[0]) < 0:
if value is None :
value = OBIChar_NA
if obi_column_set_obichar_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0, str2bytes(value)[0]) < 0:
raise Exception("Problem setting a value in a column")
cpdef close(self):
if obi_truncate_and_close_column(self.pointer) < 0 :
raise Exception("Problem closing a column")
cdef class OBIDMS_column_char_multi_elts(OBIDMS_column_char):
cdef class OBIDMS_column_multi_elts_char(OBIDMS_column_multi_elts):
cpdef object get_item(self, index_t line_nb, str element_name):
cdef obichar_t value
cdef object result
value = obi_column_get_obichar_with_elt_name(self.pointer, line_nb, str2bytes(element_name))
value = obi_column_get_obichar_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name))
if obi_errno > 0 :
raise IndexError(line_nb, element_name)
if value == OBIChar_NA :
result = None
else :
result = <bytes> value
result = bytes2str(value)
return result
cpdef object get_line(self, index_t line_nb) :
cdef obichar_t value
cdef object result
cdef object value_in_result
cdef dict result
cdef index_t i
cdef bint all_NA
result = {}
all_NA = True
for i in range(self.nb_elements_per_line) :
value = obi_column_get_obichar_with_elt_idx(self.pointer, line_nb, i)
value = obi_column_get_obichar_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, i)
if obi_errno > 0 :
raise IndexError(line_nb)
result[self.elements_names[i]] = <bytes> value
if all_NA and (value != OBIChar_NA) :
if value == OBIChar_NA :
value_in_result = None
else :
value_in_result = bytes2str(value)
result[self.elements_names[i]] = value_in_result
if all_NA and (value_in_result is not None) :
all_NA = False
if all_NA :
result = None
return result
cpdef set_item(self, index_t line_nb, str element_name, bytes value):
raise Exception("Column is read-only")
cpdef set_line(self, index_t line_nb, object values):
raise Exception("Column is read-only")
cdef class OBIDMS_column_char_multi_elts_writable(OBIDMS_column_char_multi_elts):
cpdef set_item(self, index_t line_nb, str element_name, bytes value):
if obi_column_set_obichar_with_elt_name(self.pointer, line_nb, str2bytes(element_name), value[0]) < 0:
cpdef set_item(self, index_t line_nb, str element_name, object value):
if value is None :
value = OBIChar_NA
if obi_column_set_obichar_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name), str2bytes(value)[0]) < 0:
raise Exception("Problem setting a value in a column")
cpdef set_line(self, index_t line_nb, object values):
cdef bytes value
for element_name in values :
value = <bytes> values[element_name]
self.set_item(line_nb, element_name, value)
cpdef close(self):
if obi_truncate_and_close_column(self.pointer) < 0 :
raise Exception("Problem closing a column")
# cdef class OBIDMS_column_char(OBIDMS_column) :
#
# cpdef object get_item(self, index_t line_nb, str element_name):
# cdef char value
# cdef object result
# value = obi_column_get_obichar_with_elt_name(self.pointer, line_nb, str2bytes(element_name))
# if obi_errno > 0 :
# raise IndexError(line_nb, element_name)
# if value == OBIChar_NA :
# result = None
# else :
# result = <bytes> value
# return result
#
# cpdef set_item(self, index_t line_nb, str element_name, bytes value):
# raise Exception("Column is read-only")
#
# cpdef close(self):
# if obi_close_column(self.pointer) < 0 :
# raise Exception("Problem closing a column")
#
#
# cdef class OBIDMS_column_char_writable(OBIDMS_column_char) :
#
# cpdef set_item(self, index_t line_nb, str element_name, bytes value):
# if obi_column_set_obichar_with_elt_name(self.pointer, line_nb, str2bytes(element_name), value[0]) < 0:
# raise Exception("Problem setting a value in a column")
#
# cpdef close(self):
# if obi_truncate_and_close_column(self.pointer) < 0 :
# raise Exception("Problem closing a column")
#

View File

@ -1,18 +1,41 @@
../../../src/obidmscolumn_float.c
../../../src/obidmscolumn_float.h
../../../src/char_str_indexer.h
../../../src/char_str_indexer.c
../../../src/bloom.h
../../../src/bloom.c
../../../src/crc64.h
../../../src/crc64.c
../../../src/dna_seq_indexer.h
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/MurmurHash2.h
../../../src/murmurhash2.c
../../../src/obiavl.h
../../../src/obiavl.c
../../../src/obiblob_indexer.h
../../../src/obiblob_indexer.c
../../../src/obiblob.h
../../../src/obiblob.c
../../../src/obidebug.h
../../../src/obidms_taxonomy.h
../../../src/obidms_taxonomy.c
../../../src/obidms.h
../../../src/obidms.c
../../../src/obidmscolumn_idx.h
../../../src/obidmscolumn_idx.c
../../../src/obidmscolumn.h
../../../src/obidmscolumn.c
../../../src/obidmscolumndir.h
../../../src/obidmscolumndir.c
../../../src/obidms.h
../../../src/obidms.c
../../../src/obierrno.h
../../../src/obierrno.c
../../../src/obilittlebigman.h
../../../src/obilittlebigman.c
../../../src/obitypes.h
../../../src/obitypes.c
../../../src/private_at_functions.h
../../../src/private_at_functions.c
../../../src/obiarray.h
../../../src/obiarray.c
../../../src/obiview.h
../../../src/obiview.c
../../../src/utils.h
../../../src/utils.c

View File

@ -1,25 +1,14 @@
#cython: language_level=3
from .capi.obitypes cimport obifloat_t, index_t
from ._obidms cimport OBIDMS_column
from .capi.obitypes cimport index_t
from ._obidms cimport OBIDMS_column, OBIDMS_column_multi_elts
cdef class OBIDMS_column_float(OBIDMS_column):
cpdef object get_line(self, index_t line_nb)
cpdef set_line(self, index_t line_nb, object value)
cpdef close(self)
cdef class OBIDMS_column_float_writable(OBIDMS_column_float):
cpdef set_line(self, index_t line_nb, object value)
cpdef close(self)
cdef class OBIDMS_column_float_multi_elts(OBIDMS_column_float):
cdef class OBIDMS_column_multi_elts_float(OBIDMS_column_multi_elts):
cpdef object get_item(self, index_t line_nb, str element_name)
cpdef object get_line(self, index_t line_nb)
cpdef set_item(self, index_t line_nb, str element_name, obifloat_t value)
cpdef set_line(self, index_t line_nb, object values)
cdef class OBIDMS_column_float_multi_elts_writable(OBIDMS_column_float_multi_elts):
cpdef set_item(self, index_t line_nb, str element_name, obifloat_t value)
cpdef set_line(self, index_t line_nb, object values)
cpdef close(self)
cpdef set_item(self, index_t line_nb, str element_name, object value)

View File

@ -1,13 +1,11 @@
#cython: language_level=3
from .capi.obidmscolumn cimport obi_close_column,\
obi_truncate_and_close_column, \
obi_column_get_obifloat_with_elt_name, \
obi_column_get_obifloat_with_elt_idx, \
obi_column_set_obifloat_with_elt_name, \
obi_column_set_obifloat_with_elt_idx
from .capi.obiview cimport obi_column_get_obifloat_with_elt_name_in_view, \
obi_column_get_obifloat_with_elt_idx_in_view, \
obi_column_set_obifloat_with_elt_name_in_view, \
obi_column_set_obifloat_with_elt_idx_in_view
from .capi.obierrno cimport obi_errno
from .capi.obitypes cimport OBIFloat_NA
from .capi.obitypes cimport OBIFloat_NA, obifloat_t
from obitools3.utils cimport str2bytes
@ -17,7 +15,7 @@ cdef class OBIDMS_column_float(OBIDMS_column):
cpdef object get_line(self, index_t line_nb):
cdef obifloat_t value
cdef object result
value = obi_column_get_obifloat_with_elt_idx(self.pointer, line_nb, 0)
value = obi_column_get_obifloat_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0)
if obi_errno > 0 :
raise IndexError(line_nb)
if value == OBIFloat_NA :
@ -27,30 +25,18 @@ cdef class OBIDMS_column_float(OBIDMS_column):
return result
cpdef set_line(self, index_t line_nb, object value):
raise Exception("Column is read-only")
cpdef close(self):
if obi_close_column(self.pointer) < 0 :
raise Exception("Problem closing a column")
cdef class OBIDMS_column_float_writable(OBIDMS_column_float):
cpdef set_line(self, index_t line_nb, object value):
if obi_column_set_obifloat_with_elt_idx(self.pointer, line_nb, 0, <obifloat_t> value) < 0:
if value is None :
value = OBIFloat_NA
if obi_column_set_obifloat_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0, <obifloat_t> value) < 0:
raise Exception("Problem setting a value in a column")
cpdef close(self):
if obi_truncate_and_close_column(self.pointer) < 0 :
raise Exception("Problem closing a column")
cdef class OBIDMS_column_float_multi_elts(OBIDMS_column_float):
cdef class OBIDMS_column_multi_elts_float(OBIDMS_column_multi_elts):
cpdef object get_item(self, index_t line_nb, str element_name):
cdef obifloat_t value
cdef object result
value = obi_column_get_obifloat_with_elt_name(self.pointer, line_nb, str2bytes(element_name))
value = obi_column_get_obifloat_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name))
if obi_errno > 0 :
raise IndexError(line_nb, element_name)
if value == OBIFloat_NA :
@ -61,43 +47,30 @@ cdef class OBIDMS_column_float_multi_elts(OBIDMS_column_float):
cpdef object get_line(self, index_t line_nb) :
cdef obifloat_t value
cdef object result
cdef object value_in_result
cdef dict result
cdef index_t i
cdef bint all_NA
result = {}
all_NA = True
for i in range(self.nb_elements_per_line) :
value = obi_column_get_obifloat_with_elt_idx(self.pointer, line_nb, i)
value = obi_column_get_obifloat_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, i)
if obi_errno > 0 :
raise IndexError(line_nb)
result[self.elements_names[i]] = <double> value
if all_NA and (value != OBIFloat_NA) :
if value == OBIFloat_NA :
value_in_result = None
else :
value_in_result = <double> value
result[self.elements_names[i]] = value_in_result
if all_NA and (value_in_result is not None) :
all_NA = False
if all_NA :
result = None
return result
cpdef set_item(self, index_t line_nb, str element_name, obifloat_t value):
raise Exception("Column is read-only")
cpdef set_line(self, index_t line_nb, object values):
raise Exception("Column is read-only")
cdef class OBIDMS_column_float_multi_elts_writable(OBIDMS_column_float_multi_elts):
cpdef set_item(self, index_t line_nb, str element_name, obifloat_t value):
if obi_column_set_obifloat_with_elt_name(self.pointer, line_nb, str2bytes(element_name), value) < 0:
cpdef set_item(self, index_t line_nb, str element_name, object value):
if value is None :
value = OBIFloat_NA
if obi_column_set_obifloat_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name), <obifloat_t> value) < 0:
raise Exception("Problem setting a value in a column")
cpdef set_line(self, index_t line_nb, object values):
cdef obifloat_t value
for element_name in values :
value = <obifloat_t> values[element_name]
self.set_item(line_nb, element_name, value)
cpdef close(self):
if obi_truncate_and_close_column(self.pointer) < 0 :
raise Exception("Problem closing a column")

View File

@ -1,18 +1,41 @@
../../../src/obidmscolumn_int.c
../../../src/obidmscolumn_int.h
../../../src/char_str_indexer.h
../../../src/char_str_indexer.c
../../../src/bloom.h
../../../src/bloom.c
../../../src/crc64.h
../../../src/crc64.c
../../../src/dna_seq_indexer.h
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/MurmurHash2.h
../../../src/murmurhash2.c
../../../src/obiavl.h
../../../src/obiavl.c
../../../src/obiblob_indexer.h
../../../src/obiblob_indexer.c
../../../src/obiblob.h
../../../src/obiblob.c
../../../src/obidebug.h
../../../src/obidms_taxonomy.h
../../../src/obidms_taxonomy.c
../../../src/obidms.h
../../../src/obidms.c
../../../src/obidmscolumn_idx.h
../../../src/obidmscolumn_idx.c
../../../src/obidmscolumn.h
../../../src/obidmscolumn.c
../../../src/obidmscolumndir.h
../../../src/obidmscolumndir.c
../../../src/obidms.h
../../../src/obidms.c
../../../src/obierrno.h
../../../src/obierrno.c
../../../src/obilittlebigman.h
../../../src/obilittlebigman.c
../../../src/obitypes.h
../../../src/obitypes.c
../../../src/private_at_functions.h
../../../src/private_at_functions.c
../../../src/obiarray.h
../../../src/obiarray.c
../../../src/obiview.h
../../../src/obiview.c
../../../src/utils.h
../../../src/utils.c

View File

@ -1,25 +1,14 @@
#cython: language_level=3
from .capi.obitypes cimport obiint_t, index_t
from ._obidms cimport OBIDMS_column
from .capi.obitypes cimport index_t
from ._obidms cimport OBIDMS_column, OBIDMS_column_multi_elts
cdef class OBIDMS_column_int(OBIDMS_column):
cpdef object get_line(self, index_t line_nb)
cpdef set_line(self, index_t line_nb, object value)
cpdef close(self)
cdef class OBIDMS_column_int_writable(OBIDMS_column_int):
cpdef set_line(self, index_t line_nb, object value)
cpdef close(self)
cdef class OBIDMS_column_int_multi_elts(OBIDMS_column_int):
cdef class OBIDMS_column_multi_elts_int(OBIDMS_column_multi_elts):
cpdef object get_item(self, index_t line_nb, str element_name)
cpdef object get_line(self, index_t line_nb)
cpdef set_item(self, index_t line_nb, str element_name, obiint_t value)
cpdef set_line(self, index_t line_nb, object values)
cdef class OBIDMS_column_int_multi_elts_writable(OBIDMS_column_int_multi_elts):
cpdef set_item(self, index_t line_nb, str element_name, obiint_t value)
cpdef set_line(self, index_t line_nb, object values)
cpdef close(self)
cpdef set_item(self, index_t line_nb, str element_name, object value)

View File

@ -1,13 +1,11 @@
#cython: language_level=3
from .capi.obidmscolumn cimport obi_close_column,\
obi_truncate_and_close_column, \
obi_column_get_obiint_with_elt_name, \
obi_column_get_obiint_with_elt_idx, \
obi_column_set_obiint_with_elt_name, \
obi_column_set_obiint_with_elt_idx
from .capi.obiview cimport obi_column_get_obiint_with_elt_name_in_view, \
obi_column_get_obiint_with_elt_idx_in_view, \
obi_column_set_obiint_with_elt_name_in_view, \
obi_column_set_obiint_with_elt_idx_in_view
from .capi.obierrno cimport obi_errno
from .capi.obitypes cimport OBIInt_NA
from .capi.obitypes cimport OBIInt_NA, obiint_t
from obitools3.utils cimport str2bytes
@ -19,7 +17,7 @@ cdef class OBIDMS_column_int(OBIDMS_column):
cpdef object get_line(self, index_t line_nb):
cdef obiint_t value
cdef object result
value = obi_column_get_obiint_with_elt_idx(self.pointer, line_nb, 0)
value = obi_column_get_obiint_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0)
if obi_errno > 0 :
raise IndexError(line_nb)
if value == OBIInt_NA :
@ -29,30 +27,18 @@ cdef class OBIDMS_column_int(OBIDMS_column):
return result
cpdef set_line(self, index_t line_nb, object value):
raise Exception("Column is read-only")
cpdef close(self):
if obi_close_column(self.pointer) < 0 :
raise Exception("Problem closing a column")
cdef class OBIDMS_column_int_writable(OBIDMS_column_int):
cpdef set_line(self, index_t line_nb, object value):
if obi_column_set_obiint_with_elt_idx(self.pointer, line_nb, 0, <obiint_t> value) < 0:
if value is None :
value = OBIInt_NA
if obi_column_set_obiint_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0, <obiint_t> value) < 0:
raise Exception("Problem setting a value in a column")
cpdef close(self):
if obi_truncate_and_close_column(self.pointer) < 0 :
raise Exception("Problem closing a column")
cdef class OBIDMS_column_int_multi_elts(OBIDMS_column_int):
cdef class OBIDMS_column_multi_elts_int(OBIDMS_column_multi_elts):
cpdef object get_item(self, index_t line_nb, str element_name):
cdef obiint_t value
cdef object result
value = obi_column_get_obiint_with_elt_name(self.pointer, line_nb, str2bytes(element_name))
value = obi_column_get_obiint_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name))
if obi_errno > 0 :
raise IndexError(line_nb, element_name)
if value == OBIInt_NA :
@ -63,42 +49,30 @@ cdef class OBIDMS_column_int_multi_elts(OBIDMS_column_int):
cpdef object get_line(self, index_t line_nb) :
cdef obiint_t value
cdef object result
cdef object value_in_result
cdef dict result
cdef index_t i
cdef bint all_NA
result = {}
all_NA = True
for i in range(self.nb_elements_per_line) :
value = obi_column_get_obiint_with_elt_idx(self.pointer, line_nb, i)
value = obi_column_get_obiint_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, i)
if obi_errno > 0 :
raise IndexError(line_nb)
result[self.elements_names[i]] = PyInt_FromLong(value)
if all_NA and (value != OBIInt_NA) :
if value == OBIInt_NA :
value_in_result = None
else :
value_in_result = PyInt_FromLong(value)
result[self.elements_names[i]] = value_in_result
if all_NA and (value_in_result is not None) :
all_NA = False
if all_NA :
result = None
result = None # TODO discuss
return result
cpdef set_item(self, index_t line_nb, str element_name, obiint_t value):
raise Exception("Column is read-only")
cpdef set_line(self, index_t line_nb, object values):
raise Exception("Column is read-only")
cdef class OBIDMS_column_int_multi_elts_writable(OBIDMS_column_int_multi_elts):
cpdef set_item(self, index_t line_nb, str element_name, obiint_t value):
if obi_column_set_obiint_with_elt_name(self.pointer, line_nb, str2bytes(element_name), value) < 0:
cpdef set_item(self, index_t line_nb, str element_name, object value):
if value is None :
value = OBIInt_NA
if obi_column_set_obiint_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name), <obiint_t> value) < 0:
raise Exception("Problem setting a value in a column")
cpdef set_line(self, index_t line_nb, object values):
cdef obiint_t value
for element_name in values :
value = <obiint_t> values[element_name]
self.set_item(line_nb, element_name, value)
cpdef close(self):
if obi_truncate_and_close_column(self.pointer) < 0 :
raise Exception("Problem closing a column")

View File

@ -0,0 +1,41 @@
../../../src/obidmscolumn_seq.c
../../../src/obidmscolumn_seq.h
../../../src/bloom.h
../../../src/bloom.c
../../../src/char_str_indexer.h
../../../src/char_str_indexer.c
../../../src/crc64.h
../../../src/crc64.c
../../../src/dna_seq_indexer.h
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/MurmurHash2.h
../../../src/murmurhash2.c
../../../src/obiavl.h
../../../src/obiavl.c
../../../src/obiblob_indexer.h
../../../src/obiblob_indexer.c
../../../src/obiblob.h
../../../src/obiblob.c
../../../src/obidebug.h
../../../src/obidms_taxonomy.h
../../../src/obidms_taxonomy.c
../../../src/obidms.h
../../../src/obidms.c
../../../src/obidmscolumn_idx.h
../../../src/obidmscolumn_idx.c
../../../src/obidmscolumn.h
../../../src/obidmscolumn.c
../../../src/obidmscolumndir.h
../../../src/obidmscolumndir.c
../../../src/obierrno.h
../../../src/obierrno.c
../../../src/obilittlebigman.h
../../../src/obilittlebigman.c
../../../src/obitypes.h
../../../src/obitypes.c
../../../src/obiview.h
../../../src/obiview.c
../../../src/utils.h
../../../src/utils.c

View File

@ -0,0 +1,14 @@
#cython: language_level=3
from .capi.obitypes cimport index_t
from ._obidms cimport OBIDMS_column, OBIDMS_column_multi_elts
cdef class OBIDMS_column_seq(OBIDMS_column):
cpdef object get_line(self, index_t line_nb)
cpdef set_line(self, index_t line_nb, object value)
cdef class OBIDMS_column_multi_elts_seq(OBIDMS_column_multi_elts):
cpdef object get_item(self, index_t line_nb, str element_name)
cpdef object get_line(self, index_t line_nb)
cpdef set_item(self, index_t line_nb, str element_name, object value)

View File

@ -0,0 +1,88 @@
#cython: language_level=3
from .capi.obiview cimport obi_column_get_obiseq_with_elt_name_in_view, \
obi_column_get_obiseq_with_elt_idx_in_view, \
obi_column_set_obiseq_with_elt_name_in_view, \
obi_column_set_obiseq_with_elt_idx_in_view
from .capi.obierrno cimport obi_errno
from .capi.obitypes cimport OBISeq_NA, const_char_p
from obitools3.utils cimport str2bytes, bytes2str
from libc.stdlib cimport free
from libc.string cimport strcmp
cdef class OBIDMS_column_seq(OBIDMS_column):
cpdef object get_line(self, index_t line_nb):
cdef char* value
cdef object result
value = obi_column_get_obiseq_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0)
if obi_errno > 0 :
raise IndexError(line_nb)
if strcmp(value, OBISeq_NA) == 0 :
result = None
else :
result = bytes2str(value)
free(value)
return result
cpdef set_line(self, index_t line_nb, object value):
cdef bytes value_b
if value is None :
value_b = OBISeq_NA
else :
value_b = str2bytes(value)
if obi_column_set_obiseq_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0, value_b) < 0:
raise Exception("Problem setting a value in a column")
cdef class OBIDMS_column_multi_elts_seq(OBIDMS_column_multi_elts):
cpdef object get_item(self, index_t line_nb, str element_name):
cdef char* value
cdef object result
value = obi_column_get_obiseq_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name))
if obi_errno > 0 :
raise IndexError(line_nb, element_name)
if strcmp(value, OBISeq_NA) == 0 :
result = None
else :
result = bytes2str(value)
free(value)
return result
cpdef object get_line(self, index_t line_nb) :
cdef char* value
cdef object value_in_result
cdef dict result
cdef index_t i
cdef bint all_NA
result = {}
all_NA = True
for i in range(self.nb_elements_per_line) :
value = obi_column_get_obiseq_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, i)
if obi_errno > 0 :
raise IndexError(line_nb)
if strcmp(value, OBISeq_NA) == 0 :
value_in_result = None
else :
value_in_result = bytes2str(value)
free(value)
result[self.elements_names[i]] = value_in_result
if all_NA and (value_in_result is not None) :
all_NA = False
if all_NA :
result = None
return result
cpdef set_item(self, index_t line_nb, str element_name, object value):
cdef bytes value_b
if value is None :
value_b = OBISeq_NA
else :
value_b = str2bytes(value)
if obi_column_set_obiseq_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name), value_b) < 0:
raise Exception("Problem setting a value in a column")

View File

@ -1,18 +1,41 @@
../../../src/obidmscolumn_str.c
../../../src/obidmscolumn_str.h
../../../src/bloom.h
../../../src/bloom.c
../../../src/char_str_indexer.h
../../../src/char_str_indexer.c
../../../src/crc64.h
../../../src/crc64.c
../../../src/dna_seq_indexer.h
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/MurmurHash2.h
../../../src/murmurhash2.c
../../../src/obiavl.h
../../../src/obiavl.c
../../../src/obiblob_indexer.h
../../../src/obiblob_indexer.c
../../../src/obiblob.h
../../../src/obiblob.c
../../../src/obidebug.h
../../../src/obidms_taxonomy.h
../../../src/obidms_taxonomy.c
../../../src/obidms.h
../../../src/obidms.c
../../../src/obidmscolumn_idx.h
../../../src/obidmscolumn_idx.c
../../../src/obidmscolumn.h
../../../src/obidmscolumn.c
../../../src/obidmscolumndir.h
../../../src/obidmscolumndir.c
../../../src/obidms.h
../../../src/obidms.c
../../../src/obierrno.h
../../../src/obierrno.c
../../../src/obilittlebigman.h
../../../src/obilittlebigman.c
../../../src/obitypes.h
../../../src/obitypes.c
../../../src/private_at_functions.h
../../../src/private_at_functions.c
../../../src/obiarray.h
../../../src/obiarray.c
../../../src/obiview.h
../../../src/obiview.c
../../../src/utils.h
../../../src/utils.c

View File

@ -1,25 +1,14 @@
#cython: language_level=3
from .capi.obitypes cimport index_t
from ._obidms cimport OBIDMS_column
from ._obidms cimport OBIDMS_column, OBIDMS_column_multi_elts
cdef class OBIDMS_column_str(OBIDMS_column):
cpdef object get_line(self, index_t line_nb)
cpdef set_line(self, index_t line_nb, object value)
cpdef close(self)
cdef class OBIDMS_column_str_writable(OBIDMS_column_str):
cpdef set_line(self, index_t line_nb, object value)
cpdef close(self)
cdef class OBIDMS_column_str_multi_elts(OBIDMS_column_str):
cdef class OBIDMS_column_multi_elts_str(OBIDMS_column_multi_elts):
cpdef object get_item(self, index_t line_nb, str element_name)
cpdef object get_line(self, index_t line_nb)
cpdef set_item(self, index_t line_nb, str element_name, str value)
cpdef set_line(self, index_t line_nb, object values)
cdef class OBIDMS_column_str_multi_elts_writable(OBIDMS_column_str_multi_elts):
cpdef set_item(self, index_t line_nb, str element_name, str value)
cpdef set_line(self, index_t line_nb, object values)
cpdef close(self)
cpdef set_item(self, index_t line_nb, str element_name, object value)

View File

@ -1,103 +1,87 @@
#cython: language_level=3
from .capi.obidmscolumn cimport obi_close_column,\
obi_truncate_and_close_column, \
obi_column_get_obistr_with_elt_name, \
obi_column_get_obistr_with_elt_idx, \
obi_column_set_obistr_with_elt_name, \
obi_column_set_obistr_with_elt_idx
from .capi.obiview cimport obi_column_get_obistr_with_elt_name_in_view, \
obi_column_get_obistr_with_elt_idx_in_view, \
obi_column_set_obistr_with_elt_name_in_view, \
obi_column_set_obistr_with_elt_idx_in_view
from .capi.obierrno cimport obi_errno
from .capi.obitypes cimport OBIIdx_NA, const_char_p
from .capi.obitypes cimport OBIStr_NA, const_char_p
from obitools3.utils cimport str2bytes, bytes2str
from libc.string cimport strcmp
cdef class OBIDMS_column_str(OBIDMS_column):
cpdef object get_line(self, index_t line_nb):
cdef bytes value
cdef const_char_p value
cdef object result
value = <bytes> obi_column_get_obistr_with_elt_idx(self.pointer, line_nb, 0)
value = obi_column_get_obistr_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0)
if obi_errno > 0 :
raise IndexError(line_nb)
if value == OBIIdx_NA :
if strcmp(value, OBIStr_NA) == 0 :
result = None
else :
result = bytes2str(value)
# NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file. (TODO discuss)
return result
cpdef set_line(self, index_t line_nb, object value):
raise Exception("Column is read-only")
cpdef close(self):
if obi_close_column(self.pointer) < 0 :
raise Exception("Problem closing a column")
cdef class OBIDMS_column_str_writable(OBIDMS_column_str):
cpdef set_line(self, index_t line_nb, object value):
if obi_column_set_obistr_with_elt_idx(self.pointer, line_nb, 0, str2bytes(value)) < 0:
cdef bytes value_b
if value is None :
value_b = OBIStr_NA
else :
value_b = str2bytes(value)
if obi_column_set_obistr_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0, value_b) < 0:
raise Exception("Problem setting a value in a column")
cpdef close(self):
if obi_truncate_and_close_column(self.pointer) < 0 :
raise Exception("Problem closing a column")
cdef class OBIDMS_column_str_multi_elts(OBIDMS_column_str):
cdef class OBIDMS_column_multi_elts_str(OBIDMS_column_multi_elts):
cpdef object get_item(self, index_t line_nb, str element_name):
cdef bytes value
cdef const_char_p value
cdef object result
value = <bytes> obi_column_get_obistr_with_elt_name(self.pointer, line_nb, str2bytes(element_name))
value = obi_column_get_obistr_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name))
if obi_errno > 0 :
raise IndexError(line_nb, element_name)
if value == OBIIdx_NA :
if strcmp(value, OBIStr_NA) == 0 :
result = None
else :
result = bytes2str(value)
# NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file. (TODO discuss)
return result
cpdef object get_line(self, index_t line_nb) :
cdef bytes value
cdef object result
cdef const_char_p value
cdef object value_in_result
cdef dict result
cdef index_t i
cdef bint all_NA
result = {}
all_NA = True
for i in range(self.nb_elements_per_line) :
value = <bytes> obi_column_get_obistr_with_elt_idx(self.pointer, line_nb, i)
value = obi_column_get_obistr_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, i)
if obi_errno > 0 :
raise IndexError(line_nb)
result[self.elements_names[i]] = bytes2str(value)
if all_NA and (value != OBIIdx_NA) :
if strcmp(value, OBIStr_NA) == 0 :
value_in_result = None
else :
value_in_result = bytes2str(value)
# NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file. (TODO discuss)
result[self.elements_names[i]] = value_in_result
if all_NA and (value_in_result is not None) :
all_NA = False
if all_NA :
result = None
return result
cpdef set_item(self, index_t line_nb, str element_name, str value):
raise Exception("Column is read-only")
cpdef set_line(self, index_t line_nb, object values):
raise Exception("Column is read-only")
cdef class OBIDMS_column_str_multi_elts_writable(OBIDMS_column_str_multi_elts):
cpdef set_item(self, index_t line_nb, str element_name, str value):
if obi_column_set_obistr_with_elt_name(self.pointer, line_nb, str2bytes(element_name), str2bytes(value)) < 0:
cpdef set_item(self, index_t line_nb, str element_name, object value):
cdef bytes value_b
if value is None :
value_b = OBIStr_NA
else :
value_b = str2bytes(value)
if obi_column_set_obistr_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name), value_b) < 0:
raise Exception("Problem setting a value in a column")
cpdef set_line(self, index_t line_nb, object values):
cdef str value
for element_name in values :
value = values[element_name]
self.set_item(line_nb, element_name, value)
cpdef close(self):
if obi_truncate_and_close_column(self.pointer) < 0 :
raise Exception("Problem closing a column")

View File

@ -0,0 +1,39 @@
../../../src/bloom.h
../../../src/bloom.c
../../../src/char_str_indexer.h
../../../src/char_str_indexer.c
../../../src/crc64.h
../../../src/crc64.c
../../../src/dna_seq_indexer.h
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/MurmurHash2.h
../../../src/murmurhash2.c
../../../src/obiavl.h
../../../src/obiavl.c
../../../src/obiblob_indexer.h
../../../src/obiblob_indexer.c
../../../src/obiblob.h
../../../src/obiblob.c
../../../src/obidebug.h
../../../src/obidms_taxonomy.h
../../../src/obidms_taxonomy.c
../../../src/obidms.h
../../../src/obidms.c
../../../src/obidmscolumn_idx.h
../../../src/obidmscolumn_idx.c
../../../src/obidmscolumn.h
../../../src/obidmscolumn.c
../../../src/obidmscolumndir.h
../../../src/obidmscolumndir.c
../../../src/obierrno.h
../../../src/obierrno.c
../../../src/obilittlebigman.h
../../../src/obilittlebigman.c
../../../src/obitypes.h
../../../src/obitypes.c
../../../src/obiview.h
../../../src/obiview.c
../../../src/utils.h
../../../src/utils.c

View File

@ -0,0 +1,30 @@
#cython: language_level=3
from ._obidms cimport OBIView_line
cdef class OBI_Seq(dict) :
cdef str id
cdef str description
cdef str sequence
cpdef set_id(self, str id)
cpdef get_id(self)
cpdef set_description(self, str description)
cpdef get_description(self)
cpdef get_sequence(self)
cdef class OBI_Nuc_Seq(OBI_Seq) :
#cpdef str reverse_complement(self)
cpdef set_sequence(self, str sequence)
cdef class OBI_Nuc_Seq_Stored(OBIView_line) :
cpdef set_id(self, str id)
cpdef get_id(self)
cpdef set_description(self, str description)
cpdef get_description(self)
cpdef set_sequence(self, str sequence)
cpdef get_sequence(self)
# cpdef str reverse_complement(self)

View File

@ -0,0 +1,75 @@
#cython: language_level=3
from obitools3.utils cimport bytes2str, str2bytes
from .capi.obiview cimport NUC_SEQUENCE_COLUMN, \
ID_COLUMN, \
DESCRIPTION_COLUMN
cdef class OBI_Seq(dict) :
def __init__(self, str id, str seq, str description=None) :
self.set_id(id)
self.set_sequence(seq)
if description is not None :
self.set_description(description)
cpdef set_id(self, str id) :
self.id = id
self[bytes2str(ID_COLUMN)] = id
cpdef get_id(self) :
return self.id
cpdef set_description(self, str description) :
self.description = description
self[bytes2str(DESCRIPTION_COLUMN)] = description
cpdef get_description(self) :
return self.description # TODO no
cpdef get_sequence(self) :
return self.sequence
def __str__(self) :
return self.sequence # or not
cdef class OBI_Nuc_Seq(OBI_Seq) :
cpdef set_sequence(self, str sequence) :
self.sequence = sequence
self[bytes2str(NUC_SEQUENCE_COLUMN)] = sequence
# cpdef str reverse_complement(self) : TODO in C ?
# pass
cdef class OBI_Nuc_Seq_Stored(OBIView_line) :
cpdef set_id(self, str id) :
self[bytes2str(ID_COLUMN)] = id
cpdef get_id(self) :
return self[bytes2str(ID_COLUMN)]
cpdef set_description(self, str description) :
self[bytes2str(DESCRIPTION_COLUMN)] = description
cpdef get_description(self) :
return self[bytes2str(DESCRIPTION_COLUMN)]
cpdef set_sequence(self, str sequence) :
self[bytes2str(NUC_SEQUENCE_COLUMN)] = sequence
cpdef get_sequence(self) :
return self[bytes2str(NUC_SEQUENCE_COLUMN)]
# def __str__(self) :
# return self[bytes2str(NUC_SEQUENCE_COLUMN)] # or not
# cpdef str reverse_complement(self) : TODO in C ?
# pass
# TODO static method to import?

View File

@ -0,0 +1,39 @@
../../../src/bloom.h
../../../src/bloom.c
../../../src/char_str_indexer.h
../../../src/char_str_indexer.c
../../../src/crc64.h
../../../src/crc64.c
../../../src/dna_seq_indexer.h
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/MurmurHash2.h
../../../src/murmurhash2.c
../../../src/obiavl.h
../../../src/obiavl.c
../../../src/obiblob_indexer.h
../../../src/obiblob_indexer.c
../../../src/obiblob.h
../../../src/obiblob.c
../../../src/obidebug.h
../../../src/obidms_taxonomy.h
../../../src/obidms_taxonomy.c
../../../src/obidms.h
../../../src/obidms.c
../../../src/obidmscolumn_idx.h
../../../src/obidmscolumn_idx.c
../../../src/obidmscolumn.h
../../../src/obidmscolumn.c
../../../src/obidmscolumndir.h
../../../src/obidmscolumndir.c
../../../src/obierrno.h
../../../src/obierrno.c
../../../src/obilittlebigman.h
../../../src/obilittlebigman.c
../../../src/obitypes.h
../../../src/obitypes.c
../../../src/obiview.h
../../../src/obiview.c
../../../src/utils.h
../../../src/utils.c

View File

@ -0,0 +1,31 @@
#cython: language_level=3
from .capi.obitaxonomy cimport ecotx_t, OBIDMS_taxonomy_p
from libc.stdint cimport int32_t
cdef class OBI_Taxonomy :
cdef str name
cdef OBIDMS_taxonomy_p pointer
cpdef close(self)
cdef class OBI_Taxon :
cdef ecotx_t* pointer
cdef int32_t taxid
cdef int32_t rank
cdef int32_t farest
cdef ecotx_t* parent
cdef str name
cpdef int32_t taxid(self)
cpdef int32_t rank(self)
cpdef int32_t farest(self)
cpdef OBI_Taxon parent(self)

View File

@ -0,0 +1,65 @@
#cython: language_level=3
from obitools3.utils cimport bytes2str, str2bytes
from .capi.obitaxonomy cimport obi_read_taxonomy, \
obi_close_taxonomy, \
obi_taxo_get_taxon_with_taxid
from ._obidms cimport OBIDMS
from cpython.pycapsule cimport PyCapsule_New, PyCapsule_GetPointer
cdef class OBI_Taxonomy :
def __init__(self, OBIDMS dms, str name) :
self.name = name
self.pointer = obi_read_taxonomy(dms.pointer, str2bytes(name), True) # TODO discuss
def __getitem__(self, object ref):
cdef ecotx_t* taxon_p
cdef object taxon_capsule
if type(ref) == int :
taxon_p = obi_taxo_get_taxon_with_taxid(self.pointer, ref)
taxon_capsule = PyCapsule_New(taxon_p, NULL, NULL)
return OBI_Taxon(taxon_capsule)
cpdef close(self) :
if (obi_close_taxonomy(self.pointer) < 0) :
raise Exception("Error closing the taxonomy")
cdef class OBI_Taxon : # dict subclass?
def __init__(self, object taxon_capsule) :
cdef ecotx_t* taxon
taxon = <ecotx_t*> PyCapsule_GetPointer(taxon_capsule, NULL)
self.pointer = taxon
self.taxid = taxon.taxid
self.rank = taxon.rank
self.farest = taxon.farest
self.parent = taxon.parent
self.name = bytes2str(taxon.name)
cpdef int32_t taxid(self):
return self.taxid
cpdef int32_t rank(self):
return self.rank
cpdef int32_t farest(self):
return self.farest
cpdef OBI_Taxon parent(self):
cdef object parent_capsule
parent_capsule = PyCapsule_New(self.parent, NULL, NULL)
return OBI_Taxon(parent_capsule)

View File

@ -15,24 +15,27 @@ from ..capi.obitypes cimport const_char_p, \
cdef extern from "obidmscolumn.h" nogil:
struct OBIDMS_column_header_t:
bint little_endian
int header_size
size_t header_size
size_t data_size
index_t line_count
index_t lines_used
index_t nb_elements_per_line
const_char_p elements_names
OBIType_t data_type
OBIType_t returned_data_type
OBIType_t stored_data_type
time_t creation_date
obiversion_t version
obiversion_t cloned_from
const_char_p name
const_char_p array_name
const_char_p indexer_name
const_char_p comments
ctypedef OBIDMS_column_header_t* OBIDMS_column_header_p
struct OBIDMS_column_t:
OBIDMS_p dms
OBIDMS_column_header_p header
bint writable
ctypedef OBIDMS_column_t* OBIDMS_column_p
@ -42,7 +45,8 @@ cdef extern from "obidmscolumn.h" nogil:
index_t nb_lines,
index_t nb_elements_per_line,
const_char_p elements_names,
const_char_p array_name)
const_char_p indexer_name,
const_char_p comments)
OBIDMS_column_p obi_open_column(OBIDMS_p dms,
const_char_p column_name,
@ -51,24 +55,29 @@ cdef extern from "obidmscolumn.h" nogil:
int obi_close_column(OBIDMS_column_p column)
OBIDMS_column_p obi_clone_column(OBIDMS_p dms,
OBIDMS_column_p line_selection,
const_char_p column_name,
obiversion_t version_number,
bint clone_data)
int obi_truncate_and_close_column(OBIDMS_column_p column)
int obi_close_column(OBIDMS_column_p column)
obiversion_t obi_column_get_latest_version_from_name(OBIDMS_p dms,
const_char_p column_name)
OBIDMS_column_header_p obi_column_get_header_from_name(OBIDMS_p dms,
const_char_p column_name)
const_char_p column_name,
obiversion_t version_number)
int obi_unmap_header(OBIDMS_column_header_p header)
int obi_close_header(OBIDMS_column_header_p header)
char* obi_column_format_date(time_t date)
int obi_select(OBIDMS_column_p line_selection_column, index_t line_to_grep)
cdef extern from "obidmscolumn_int.h" nogil:
int obi_column_set_obiint_with_elt_name(OBIDMS_column_p column,
index_t line_nb,
const_char_p element_name,
@ -87,7 +96,6 @@ cdef extern from "obidmscolumn_int.h" nogil:
index_t line_nb,
index_t element_idx)
cdef extern from "obidmscolumn_bool.h" nogil:
int obi_column_set_obibool_with_elt_name(OBIDMS_column_p column,
@ -153,12 +161,12 @@ cdef extern from "obidmscolumn_str.h" nogil:
int obi_column_set_obistr_with_elt_name(OBIDMS_column_p column,
index_t line_nb,
const_char_p element_name,
char* value)
const_char_p value)
int obi_column_set_obistr_with_elt_idx(OBIDMS_column_p column,
index_t line_nb,
index_t element_idx,
char* value)
const_char_p value)
const_char_p obi_column_get_obistr_with_elt_name(OBIDMS_column_p column,
index_t line_nb,
@ -168,3 +176,23 @@ cdef extern from "obidmscolumn_str.h" nogil:
index_t line_nb,
index_t element_idx)
cdef extern from "obidmscolumn_seq.h" nogil:
int obi_column_set_obiseq_with_elt_name(OBIDMS_column_p column,
index_t line_nb,
const_char_p element_name,
const_char_p value)
int obi_column_set_obiseq_with_elt_idx(OBIDMS_column_p column,
index_t line_nb,
index_t element_idx,
const_char_p value)
char* obi_column_get_obiseq_with_elt_name(OBIDMS_column_p column,
index_t line_nb,
const_char_p element_name)
char* obi_column_get_obiseq_with_elt_idx(OBIDMS_column_p column,
index_t line_nb,
index_t element_idx)

View File

@ -0,0 +1,42 @@
#cython: language_level=3
from .obitypes cimport const_char_p
from .obidms cimport OBIDMS_p
from libc.stdint cimport int32_t
cdef extern from "obidms_taxonomy.h" nogil:
struct OBIDMS_taxonomy_t
ctypedef OBIDMS_taxonomy_t* OBIDMS_taxonomy_p
struct ecotxnode :
int32_t taxid
int32_t rank
int32_t farest
ecotxnode* parent
char* name
ctypedef ecotxnode ecotx_t
OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const_char_p taxonomy_name, bint read_alternative_names)
int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy)
ecotx_t* obi_taxo_get_parent_at_rank(ecotx_t* taxon, int32_t rankidx)
ecotx_t* obi_taxo_get_taxon_with_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid)
bint obi_taxo_is_taxon_under_taxid(ecotx_t* taxon, int32_t other_taxid)
ecotx_t* obi_taxo_get_species(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
ecotx_t* obi_taxo_get_genus(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
ecotx_t* obi_taxo_get_family(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
ecotx_t* obi_taxo_get_kingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)

View File

@ -8,6 +8,9 @@ from posix.types cimport time_t
cdef extern from *:
ctypedef char* const_char_p "const char*"
cdef extern from "encode.h" nogil:
bint only_ATGC(const_char_p seq)
cdef extern from "obidmscolumn.h" nogil:
@ -16,14 +19,17 @@ cdef extern from "obidmscolumn.h" nogil:
cdef extern from "obitypes.h" nogil:
enum OBIType: # TODO je sais pas si ça sert de declarer le contenu de l'enum
OBI_VOID = 0,
enum OBIType:
OBI_VOID,
OBI_INT,
OBI_FLOAT,
OBI_BOOL,
OBI_CHAR,
OBI_STR,
OBI_SEQ,
OBI_IDX
ctypedef OBIType OBIType_t
enum OBIBool:
@ -40,5 +46,7 @@ cdef extern from "obitypes.h" nogil:
extern obifloat_t OBIFloat_NA
extern obichar_t OBIChar_NA
extern obibool_t OBIBool_NA
extern const_char_p OBISeq_NA
extern const_char_p OBIStr_NA
char* name_data_type(int data_type)
const_char_p name_data_type(int data_type)

View File

@ -0,0 +1,250 @@
#cython: language_level=3
from .obitypes cimport const_char_p, \
OBIType_t, \
obiversion_t, \
obiint_t, \
obibool_t, \
obichar_t, \
obifloat_t, \
index_t, \
time_t
from ..capi.obidms cimport OBIDMS_p
from ..capi.obidmscolumn cimport OBIDMS_column_p
cdef extern from "obiview.h" nogil:
extern const_char_p VIEW_TYPE_NUC_SEQS
extern const_char_p NUC_SEQUENCE_COLUMN
extern const_char_p ID_COLUMN
extern const_char_p DESCRIPTION_COLUMN
struct Column_reference_t :
const_char_p column_name
obiversion_t version
ctypedef Column_reference_t* Column_reference_p
struct Obiview_t :
OBIDMS_p dms
const_char_p name
OBIDMS_column_p line_selection
OBIDMS_column_p new_line_selection
OBIDMS_column_p columns
bint read_only
Column_reference_t line_selection_reference
index_t line_count
int column_count
const_char_p comments
ctypedef Obiview_t* Obiview_p
struct Obiview_infos_t :
int view_number
int column_count
index_t line_count
const_char_p name
const_char_p created_from
time_t creation_date
bint all_lines
Column_reference_t line_selection
Column_reference_p column_references
const_char_p view_type
const_char_p comments
ctypedef Obiview_infos_t* Obiview_infos_p
struct Obiviews_header_t :
size_t header_size
size_t views_size
int view_count
ctypedef Obiviews_header_t* Obiviews_header_p
struct Obiviews_infos_all_t :
Obiviews_header_p header
Obiview_infos_p view_infos
ctypedef Obiviews_infos_all_t* Obiviews_infos_all_p
Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const_char_p view_name, Obiview_p view_to_clone, index_t* line_selection, const_char_p comments)
Obiview_p obi_new_view(OBIDMS_p dms, const_char_p view_name, Obiview_p view_to_clone, index_t* line_selection, const_char_p comments)
Obiview_p obi_new_view_cloned_from_name(OBIDMS_p dms, const_char_p view_name, const_char_p view_to_clone_name, index_t* line_selection, const_char_p comments)
Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const_char_p view_name, const_char_p view_to_clone_name, index_t* line_selection, const_char_p comments)
Obiview_p obi_open_view(OBIDMS_p dms, const_char_p view_name)
Obiviews_infos_all_p obi_read_views(OBIDMS_p dms)
int obi_unmap_read_views(Obiviews_infos_all_p views)
int obi_view_add_column(Obiview_p view,
const_char_p column_name,
obiversion_t version_number,
OBIType_t data_type,
index_t nb_lines,
index_t nb_elements_per_line,
const_char_p elements_names,
const_char_p indexer_name,
const_char_p comments,
bint create)
int obi_view_delete_column(Obiview_p view, const_char_p column_name)
int obi_select_line(Obiview_p view, index_t line_nb)
int obi_select_lines(Obiview_p view, index_t* line_nbs)
OBIDMS_column_p obi_view_clone_column(Obiview_p view, const_char_p column_name)
OBIDMS_column_p obi_view_get_column(Obiview_p view, const_char_p column_name)
OBIDMS_column_p* obi_view_get_pointer_on_column_in_view(Obiview_p view, const_char_p column_name)
int obi_save_view(Obiview_p view)
int obi_close_view(Obiview_p view)
int obi_save_and_close_view(Obiview_p view)
int obi_column_set_obiint_with_elt_name_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
const_char_p element_name,
obiint_t value)
int obi_column_set_obiint_with_elt_idx_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
index_t element_idx,
obiint_t value)
obiint_t obi_column_get_obiint_with_elt_name_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
const_char_p element_name)
obiint_t obi_column_get_obiint_with_elt_idx_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
index_t element_idx)
int obi_column_set_obibool_with_elt_name_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
const_char_p element_name,
obibool_t value)
int obi_column_set_obibool_with_elt_idx_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
index_t element_idx,
obibool_t value)
obibool_t obi_column_get_obibool_with_elt_name_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
const_char_p element_name)
obibool_t obi_column_get_obibool_with_elt_idx_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
index_t element_idx)
int obi_column_set_obichar_with_elt_name_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
const_char_p element_name,
obichar_t value)
int obi_column_set_obichar_with_elt_idx_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
index_t element_idx,
obichar_t value)
obichar_t obi_column_get_obichar_with_elt_name_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
const_char_p element_name)
obichar_t obi_column_get_obichar_with_elt_idx_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
index_t element_idx)
int obi_column_set_obifloat_with_elt_name_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
const_char_p element_name,
obifloat_t value)
int obi_column_set_obifloat_with_elt_idx_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
index_t element_idx,
obifloat_t value)
obifloat_t obi_column_get_obifloat_with_elt_name_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
const_char_p element_name)
obifloat_t obi_column_get_obifloat_with_elt_idx_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
index_t element_idx)
int obi_column_set_obistr_with_elt_name_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
const_char_p element_name,
const_char_p value)
int obi_column_set_obistr_with_elt_idx_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
index_t element_idx,
const_char_p value)
const_char_p obi_column_get_obistr_with_elt_name_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
const_char_p element_name)
const_char_p obi_column_get_obistr_with_elt_idx_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
index_t element_idx)
int obi_column_set_obiseq_with_elt_name_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
const_char_p element_name,
const_char_p value)
int obi_column_set_obiseq_with_elt_idx_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
index_t element_idx,
const_char_p value)
char* obi_column_get_obiseq_with_elt_name_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
const_char_p element_name)
char* obi_column_get_obiseq_with_elt_idx_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
index_t element_idx)

View File

@ -0,0 +1,199 @@
import sys
import argparse
import time
from obitools3.obidms._obidms import OBIDMS
def bufferedRead(fileobj,size=209715200): ## 200 MB
buffer = fileobj.readlines(size)
while buffer:
for l in buffer:
yield l
buffer = fileobj.readlines(size)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Convert a fasta file in an OBIDMS.')
parser.add_argument('-i', '--input', dest='input_file', type=str,
help='Name of the file containing the sequences')
args = parser.parse_args()
d = OBIDMS('tdms')
view = d.new_view('uniq view', view_type="NUC_SEQS_VIEW")
# for i in range(35000000) :
# if (not (i%500000)) :
# print(str(time.time())+'\t'+str(i))
# id = "@HWI-D00405:142:C71BAANXX:4:1101:1234:2234_CONS_SUB_SUB_"+str(i)
# view[i].set_id(id)
# if id != view[i]["ID"] :
# print("nope", id, view[i]["ID"])
input_file = open(args.input_file, 'r')
input_file_buffered = bufferedRead(input_file)
#
# if args.input_file[-1:] == "a" :
#
# i = 0
# next = False
# first = True
#
# for line in input_file :
#
# if line[0] == ">" :
#
# if not first :
# # save seq
# #print(i, id, seq)
# view[i].set_sequence(seq)
# i+=1
#
# first = False
#
# #id = line.split(" ", 1)[0][1:]
# #rest = (line[:-1].split(" ", 1)[1]).split(";")
# #view[i].set_id(id)
#
# # description = ""
# # for j in range(len(rest)) :
# # if "=" in rest[j] :
# # rest[j] = rest[j].strip()
# # rest[j] = rest[j].split("=", 1)
# # column_name = rest[j][0]
# # v = rest[j][1]
# # if ((not v.isalpha()) and (v.isalnum())) :
# # conv_v = int(v)
# # elif (v == "True") or (v == "False") :
# # conv_v = bool(v)
# # else :
# # f = True
# # for letter in v :
# # if ((not letter.isalnum()) or (letter != ".")) :
# # f = False
# # if f :
# # conv_v = float(v)
# # else :
# # conv_v = v
# # view[i][column_name] = conv_v
# # else :
# # description+=rest[j]
# #
# # if description != "" :
# # description = description.strip()
# # view[i].set_description(description)
#
# #print(id)
# #print(rest)
# #print(description)
#
# next = True
#
# elif next == True :
#
# # if not (i % 1E5) :
# # print(i)
#
# seq = line[:-1]
# next = False
#
# elif not next :
#
# seq += line[:-1]
#
#
# elif args.input_file[-1:] == "q" :
#
# i = 0
# l = 0
# next = False
#
l=0
i=0
# while (True):
# l+=1
# line = input_file.readline()
# if line=="":
# break
for line in input_file_buffered :
#
#if i > 1E7 :
# # print('hmm?')
#
# if i == 6000000 :
# break
#
if l%4 == 0 :
#
if (not (i%500000)) :
print(str(time.time())+'\t'+str(i))
# #
# # #print("header", line)
# #
id = line.split(" ", 1)[0][1:]
# print(id)
# # #rest = (line[:-1].split(" ", 1)[1]).split(";")
view[i].set_id(id)
# print(view[i]["ID"])
#
# i+=1
# l+=1
#
# # description = ""
# # for j in range(len(rest)) :
# # if "=" in rest[j] :
# # rest[j] = rest[j].strip()
# # rest[j] = rest[j].split("=", 1)
# # column_name = rest[j][0]
# # #print("COLUMN", column_name)
# # v = rest[j][1]
# # if (v == "") and (column_name in view) and (view[column_name].get_data_type() == "OBI_SEQ") :
# # #print(">>>>>>YUP")
# # conv_v = "aa"
# # else :
# # if ((not v.isalpha()) and (v.isalnum())) :
# # conv_v = int(v)
# # elif (v == "True") or (v == "False") :
# # conv_v = bool(v)
# # else :
# # f = True
# # for letter in v :
# # if ((not letter.isalnum()) or (letter != ".")) :
# # f = False
# # if f :
# # conv_v = float(v)
# # else :
# # conv_v = v
# # view[i][column_name] = conv_v
# # else :
# # description+=rest[j]
# #
# # if description != "" :
# # description = description.strip()
# # view[i].set_description(description)
#
elif l%4 == 1 :
# #
seq = line[:-1]
# #print("seq", seq)
view[i].set_sequence(seq)
i+=1
#
l+=1
#
#
input_file.close()
#print(view)
print(view.__repr__())
view.save_and_close()
d.close()
print("Done.")

View File

@ -10,9 +10,8 @@ from obitools3.obidms._obidms import OBIDMS
LINE_COUNT_FOR_TEST_COLUMN = 10000 # TODO randomize?
SMALLER_LINE_COUNT_FOR_TEST_COLUMN = 1000 # TODO randomize?
NB_ELEMENTS_PER_LINE = 20 # TODO randomize?
NB_ELEMENTS_PER_LINE = 10 # TODO randomize?
DMS_NAME = "unit_test_dms"
DATA_TYPES = ['OBI_INT', 'OBI_FLOAT', 'OBI_BOOL', 'OBI_CHAR', 'OBI_IDX']
def create_test_obidms():
@ -22,26 +21,24 @@ def create_test_obidms():
return (dms, dms_name, dms_dir_name)
def create_test_column(dms, data_type_code, multiple_elements_per_line=False):
data_types = DATA_TYPES
data_type_code = data_type_code
data_type_str = data_types[data_type_code-1]
col_name = "unit_test_"+data_type_str
def create_test_column(dms, data_type, multiple_elements_per_line=False):
col_name = "unit_test_"+data_type
if multiple_elements_per_line :
elts_names = elements_names()
col = dms.open_column(col_name,
create=True,
data_type=data_type_code,
type=data_type,
nb_elements_per_line=NB_ELEMENTS_PER_LINE,
elements_names=elts_names)
return (col, col_name, elts_names, data_type_str)
return (col, col_name, elts_names)
else :
col = dms.open_column(col_name,
create=True,
data_type=data_type_code)
return (col, col_name, data_type_str)
type=data_type)
return (col, col_name)
def elements_names():
@ -58,12 +55,15 @@ def random_obivalue(data_type):
elif data_type == "OBI_BOOL" :
return randint(0,1)
elif data_type == "OBI_CHAR" :
nucs = 'atgc'
return bytes(nucs[randint(0,3)], 'utf-8')
elif data_type == "OBI_IDX" :
length = randint(1,500)
return choice(string.ascii_lowercase)
elif data_type == "OBI_STR" :
length = randint(1,200)
randoms = ''.join(choice(string.ascii_lowercase) for i in range(length))
return randoms
elif data_type == "OBI_SEQ" :
length = randint(1,200)
randoms = ''.join(choice("atgcryswkmdbhvn") for i in range(length))
return randoms
class OBIDMS_Column_TestCase(unittest.TestCase):
def tearDown(self):
@ -71,10 +71,10 @@ class OBIDMS_Column_TestCase(unittest.TestCase):
self.dms.close()
shutil.rmtree(self.dms_dir_name, ignore_errors=True)
def test_OBIDMS_column_type(self):
assert self.col.get_data_type() == self.data_type_str, 'Wrong data type associated with column'
assert self.col.get_data_type() == self.data_type, 'Wrong data type associated with column'
def test_OBIDMS_column_cloning(self):
for i in range(LINE_COUNT_FOR_TEST_COLUMN):
self.col[i]= random_obivalue(self.data_type_str)
self.col[i]= random_obivalue(self.data_type)
self.col.close()
clone = self.dms.open_column(self.col_name, clone=True)
self.col = self.dms.open_column(self.col_name)
@ -86,10 +86,21 @@ class OBIDMS_Column_TestCase(unittest.TestCase):
clone.close()
def test_OBIDMS_column_set_and_get(self):
for i in range(LINE_COUNT_FOR_TEST_COLUMN):
v = random_obivalue(self.data_type_str)
v = random_obivalue(self.data_type)
self.col[i] = v
assert self.col[i] == v, "Different value than the set value"
assert self.col[i] is not None, "None value"
def test_OBIDMS_referring_column(self):
for i in range(LINE_COUNT_FOR_TEST_COLUMN):
self.col[i] = random_obivalue(self.data_type)
ref_col = self.dms.open_column(self.col_name, referring=True)
j = 0
for i in range(LINE_COUNT_FOR_TEST_COLUMN):
if i%2 : # TODO randomize
ref_col.grep_line(i)
assert ref_col[j] == self.col[i], "Different value in original column and returned by referring column"
assert ref_col[j] is not None, "None value"
j+=1
class OBIDMS_Column_multiple_elements_TestCase(OBIDMS_Column_TestCase):
@ -98,7 +109,7 @@ class OBIDMS_Column_multiple_elements_TestCase(OBIDMS_Column_TestCase):
for i in range(SMALLER_LINE_COUNT_FOR_TEST_COLUMN):
v = {}
for e in self.elts_names :
v[e] = random_obivalue(self.data_type_str)
v[e] = random_obivalue(self.data_type)
self.col[i] = v
self.col.close()
clone = self.dms.open_column(self.col_name, clone=True)
@ -112,7 +123,7 @@ class OBIDMS_Column_multiple_elements_TestCase(OBIDMS_Column_TestCase):
def test_OBIDMS_column_set_and_get_with_elements_names(self):
for i in range(SMALLER_LINE_COUNT_FOR_TEST_COLUMN):
for e in range(NB_ELEMENTS_PER_LINE) :
v = random_obivalue(self.data_type_str)
v = random_obivalue(self.data_type)
self.col.set_item(i, self.elts_names[e], v)
assert self.col.get_item(i, self.elts_names[e]) == v, "Different value than the set value"
assert self.col.get_item(i, self.elts_names[e]) is not None, "None value"
@ -120,141 +131,168 @@ class OBIDMS_Column_multiple_elements_TestCase(OBIDMS_Column_TestCase):
for i in range(SMALLER_LINE_COUNT_FOR_TEST_COLUMN):
v = {}
for e in self.elts_names :
v[e] = random_obivalue(self.data_type_str)
v[e] = random_obivalue(self.data_type)
self.col[i] = v
assert self.col[i] == v, "Different value than the set value"
assert self.col[i] is not None, "None value"
def test_OBIDMS_referring_column(self):
for i in range(SMALLER_LINE_COUNT_FOR_TEST_COLUMN):
v = {}
for e in self.elts_names :
v[e] = random_obivalue(self.data_type)
self.col[i] = v
ref_col = self.dms.open_column(self.col_name, referring=True)
j = 0
for i in range(SMALLER_LINE_COUNT_FOR_TEST_COLUMN):
if i%2 : # TODO randomize
ref_col.grep_line(i)
assert ref_col[j] == self.col[i], "Different value in original column and returned by referring column"
assert ref_col[j] is not None, "None value"
j+=1
ref_col.close()
class OBIDMS_Column_OBI_INT_TestCase(OBIDMS_Column_TestCase):
def setUp(self):
self.data_type_code = 1
self.data_type = 'OBI_INT'
self.dms, \
self.dms_name, \
self.dms_dir_name = create_test_obidms()
self.col, \
self.col_name, \
self.data_type_str = create_test_column(self.dms,
self.data_type_code)
self.col_name = create_test_column(self.dms,
self.data_type)
class OBIDMS_Column_OBI_INT_multiple_elements_TestCase(OBIDMS_Column_multiple_elements_TestCase):
def setUp(self):
self.data_type_code = 1
self.data_type = 'OBI_INT'
self.dms, \
self.dms_name, \
self.dms_dir_name = create_test_obidms()
self.col, \
self.col_name, \
self.elts_names, \
self.data_type_str = create_test_column(self.dms,
self.data_type_code,
self.elts_names = create_test_column(self.dms,
self.data_type,
multiple_elements_per_line=True)
class OBIDMS_Column_OBI_FLOAT_TestCase(OBIDMS_Column_TestCase):
def setUp(self):
self.data_type_code = 2
self.data_type = 'OBI_FLOAT'
self.dms, \
self.dms_name, \
self.dms_dir_name = create_test_obidms()
self.col, \
self.col_name, \
self.data_type_str = create_test_column(self.dms,
self.data_type_code)
self.col_name = create_test_column(self.dms,
self.data_type)
class OBIDMS_Column_OBI_FLOAT_multiple_elements_TestCase(OBIDMS_Column_multiple_elements_TestCase):
def setUp(self):
self.data_type_code = 2
self.data_type = 'OBI_FLOAT'
self.dms, \
self.dms_name, \
self.dms_dir_name = create_test_obidms()
self.col, \
self.col_name, \
self.elts_names, \
self.data_type_str = create_test_column(self.dms,
self.data_type_code,
self.elts_names = create_test_column(self.dms,
self.data_type,
multiple_elements_per_line=True)
class OBIDMS_Column_OBI_BOOL_TestCase(OBIDMS_Column_TestCase):
def setUp(self):
self.data_type_code = 3
self.data_type = 'OBI_BOOL'
self.dms, \
self.dms_name, \
self.dms_dir_name = create_test_obidms()
self.col, \
self.col_name, \
self.data_type_str = create_test_column(self.dms,
self.data_type_code)
self.col_name = create_test_column(self.dms,
self.data_type)
class OBIDMS_Column_OBI_BOOL_multiple_elements_TestCase(OBIDMS_Column_multiple_elements_TestCase):
def setUp(self):
self.data_type_code = 3
self.data_type = 'OBI_BOOL'
self.dms, \
self.dms_name, \
self.dms_dir_name = create_test_obidms()
self.col, \
self.col_name, \
self.elts_names, \
self.data_type_str = create_test_column(self.dms,
self.data_type_code,
self.elts_names = create_test_column(self.dms,
self.data_type,
multiple_elements_per_line=True)
class OBIDMS_Column_OBI_CHAR_TestCase(OBIDMS_Column_TestCase):
def setUp(self):
self.data_type_code = 4
self.data_type = 'OBI_CHAR'
self.dms, \
self.dms_name, \
self.dms_dir_name = create_test_obidms()
self.col, \
self.col_name, \
self.data_type_str = create_test_column(self.dms,
self.data_type_code)
self.col_name = create_test_column(self.dms,
self.data_type)
class OBIDMS_Column_OBI_CHAR_multiple_elements_TestCase(OBIDMS_Column_multiple_elements_TestCase):
def setUp(self):
self.data_type_code = 4
self.data_type = 'OBI_CHAR'
self.dms, \
self.dms_name, \
self.dms_dir_name = create_test_obidms()
self.col, \
self.col_name, \
self.elts_names, \
self.data_type_str = create_test_column(self.dms,
self.data_type_code,
self.elts_names = create_test_column(self.dms,
self.data_type,
multiple_elements_per_line=True)
class OBIDMS_Column_OBI_STR_TestCase(OBIDMS_Column_TestCase):
def setUp(self):
self.data_type_code = 5
self.data_type = 'OBI_STR'
self.dms, \
self.dms_name, \
self.dms_dir_name = create_test_obidms()
self.col, \
self.col_name, \
self.data_type_str = create_test_column(self.dms,
self.data_type_code)
self.col_name = create_test_column(self.dms,
self.data_type)
class OBIDMS_Column_OBI_STR_multiple_elements_TestCase(OBIDMS_Column_multiple_elements_TestCase):
def setUp(self):
self.data_type_code = 5
self.data_type = 'OBI_STR'
self.dms, \
self.dms_name, \
self.dms_dir_name = create_test_obidms()
self.col, \
self.col_name, \
self.elts_names, \
self.data_type_str = create_test_column(self.dms,
self.data_type_code,
self.elts_names = create_test_column(self.dms,
self.data_type,
multiple_elements_per_line=True)
class OBIDMS_Column_OBI_SEQ_TestCase(OBIDMS_Column_TestCase):
def setUp(self):
self.data_type = 'OBI_SEQ'
self.dms, \
self.dms_name, \
self.dms_dir_name = create_test_obidms()
self.col, \
self.col_name = create_test_column(self.dms,
self.data_type)
class OBIDMS_Column_OBI_SEQ_multiple_elements_TestCase(OBIDMS_Column_multiple_elements_TestCase):
def setUp(self):
self.data_type = 'OBI_SEQ'
self.dms, \
self.dms_name, \
self.dms_dir_name = create_test_obidms()
self.col, \
self.col_name, \
self.elts_names = create_test_column(self.dms,
self.data_type,
multiple_elements_per_line=True)
if __name__ == '__main__':
unittest.main(verbosity=2, defaultTest=["OBIDMS_Column_OBI_INT_TestCase",
@ -266,6 +304,8 @@ if __name__ == '__main__':
"OBIDMS_Column_OBI_CHAR_TestCase",
"OBIDMS_Column_OBI_CHAR_multiple_elements_TestCase",
"OBIDMS_Column_OBI_STR_TestCase",
"OBIDMS_Column_OBI_STR_multiple_elements_TestCase"])
"OBIDMS_Column_OBI_STR_multiple_elements_TestCase",
"OBIDMS_Column_OBI_SEQ_TestCase",
"OBIDMS_Column_OBI_SEQ_multiple_elements_TestCase"])

View File

@ -1,5 +1,5 @@
major = 1
minor = 1
serial= '16'
major = 0
minor = 0
serial= '0'
version ="%d.%02d.%s" % (major,minor,serial)

Binary file not shown.

View File

@ -1,5 +1,5 @@
--extra-index-url https://pypi.python.org/simple/
Cython>=0.21
Cython==0.23.5
Sphinx>=1.2.0
ipython>=3.0.0
breathe>=4.0.0

64
src/MurmurHash2.c Executable file
View File

@ -0,0 +1,64 @@
//-----------------------------------------------------------------------------
// MurmurHash2, by Austin Appleby
// Note - This code makes a few assumptions about how your machine behaves -
// 1. We can read a 4-byte value from any address without crashing
// 2. sizeof(int) == 4
// And it has a few limitations -
// 1. It will not work incrementally.
// 2. It will not produce the same results on little-endian and big-endian
// machines.
unsigned int murmurhash2(const void * key, int len, const unsigned int seed)
{
// 'm' and 'r' are mixing constants generated offline.
// They're not really 'magic', they just happen to work well.
const unsigned int m = 0x5bd1e995;
const int r = 24;
// Initialize the hash to a 'random' value
unsigned int h = seed ^ len;
// Mix 4 bytes at a time into the hash
const unsigned char * data = (const unsigned char *)key;
while(len >= 4)
{
unsigned int k = *(unsigned int *)data;
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
data += 4;
len -= 4;
}
// Handle the last few bytes of the input array
switch(len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
// Do a few final mixes of the hash to ensure the last few
// bytes are well-incorporated.
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}

248
src/bloom.c Executable file
View File

@ -0,0 +1,248 @@
/*
* Copyright (c) 2012-2015, Jyri J. Virkki
* All rights reserved.
*
* This file is under BSD license. See LICENSE file.
*/
/*
* Refer to bloom.h for documentation on the public interfaces.
*/
#include <assert.h>
#include <fcntl.h>
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include "bloom.h"
#include "murmurhash2.h"
#define MAKESTRING(n) STRING(n)
#define STRING(n) #n
#ifdef __linux__
unsigned detect_bucket_size(unsigned fallback_size);
#endif
static int test_bit_set_bit(unsigned char * buf, unsigned int x, int set_bit)
{
register uint32_t * word_buf = (uint32_t *)buf;
register unsigned int offset = x >> 5;
register uint32_t word = word_buf[offset];
register unsigned int mask = 1 << (x % 32);
if (word & mask) {
return 1;
} else {
if (set_bit) {
word_buf[offset] = word | mask;
}
return 0;
}
}
static int bloom_check_add(struct bloom * bloom,
const void * buffer, int len, int add)
{
if (bloom->ready == 0) {
(void)printf("bloom at %p not initialized!\n", (void *)bloom);
return -1;
}
int hits = 0;
register unsigned int a = murmurhash2(buffer, len, 0x9747b28c);
register unsigned int b = murmurhash2(buffer, len, a);
register unsigned int x;
register int i; // TODO why was it unsigned?
unsigned bucket_index = (a % bloom->buckets);
unsigned char * bucket_ptr =
(bloom->bf + (bucket_index << bloom->bucket_bytes_exponent));
for (i = 0; i < bloom->hashes; i++) {
x = (a + i*b) & bloom->bucket_bits_fast_mod_operand;
if (test_bit_set_bit(bucket_ptr, x, add)) {
hits++;
}
}
if (hits == bloom->hashes) {
return 1; // 1 == element already in (or collision)
}
return 0;
}
static void setup_buckets(struct bloom * bloom, unsigned int cache_size)
{
// If caller passed a non-zero cache_size, use it as given, otherwise
// either compute it or use built-in default
if (cache_size == 0) {
#ifdef __linux__
cache_size = detect_bucket_size(BLOOM_BUCKET_SIZE_FALLBACK);
#else
cache_size = BLOOM_BUCKET_SIZE_FALLBACK;
#endif
}
bloom->buckets = (bloom->bytes / cache_size);
bloom->bucket_bytes = cache_size;
// make sure bloom buffer bytes and bucket_bytes are even
int not_even_by = (bloom->bytes % bloom->bucket_bytes);
if (not_even_by) {
// adjust bytes
bloom->bytes += (bloom->bucket_bytes - not_even_by);
assert((bloom->bytes % bloom->bucket_bytes) == 0); // Should get even
// adjust bits
bloom->bits = bloom->bytes * 8;
// adjust bits per element
bloom->bpe = bloom->bits*1. / bloom->entries;
// adjust buckets
bloom->buckets++;
}
bloom->bucket_bytes_exponent = __builtin_ctz(cache_size);
bloom->bucket_bits_fast_mod_operand = (cache_size * 8 - 1);
}
// TODO
int bloom_filter_size(int entries, double error)
{
int bytes;
double num;
double denom;
double bpe;
int bits;
unsigned bucket_bytes;
int not_even_by;
num = log(error);
denom = 0.480453013918201; // ln(2)^2
bpe = -(num / denom);
bits = (int)(((double)entries) * bpe);
if (bits % 8) {
bytes = (bits / 8) + 1;
}
else {
bytes = bits / 8;
}
bucket_bytes = BLOOM_BUCKET_SIZE_FALLBACK;
not_even_by = bytes % bucket_bytes;
if (not_even_by) {
// adjust bytes
bytes += (bucket_bytes - not_even_by);
}
return bytes;
}
int bloom_init_size(struct bloom * bloom, int entries, double error,
unsigned int cache_size)
{
bloom->ready = 0;
if (entries < 1 || error == 0) {
return 1;
}
bloom->entries = entries;
bloom->error = error;
double num = log(bloom->error);
double denom = 0.480453013918201; // ln(2)^2
bloom->bpe = -(num / denom);
double dentries = (double)entries;
bloom->bits = (int)(dentries * bloom->bpe);
if (bloom->bits % 8) {
bloom->bytes = (bloom->bits / 8) + 1;
} else {
bloom->bytes = bloom->bits / 8;
}
bloom->hashes = (int)ceil(0.693147180559945 * bloom->bpe); // ln(2)
setup_buckets(bloom, cache_size);
// TODO comment
memset(bloom->bf, 0, bloom->bytes);
//bloom->bf = (unsigned char *)calloc(bloom->bytes, sizeof(unsigned char));
//if (bloom->bf == NULL) {
// return 1;
//}
bloom->ready = 1;
return 0;
}
int bloom_init(struct bloom * bloom, int entries) //, double error)
{
return bloom_init_size(bloom, entries, BLOOM_FILTER_ERROR_RATE, 0);
}
int bloom_check(struct bloom * bloom, const void * buffer, int len)
{
return bloom_check_add(bloom, buffer, len, 0);
}
int bloom_add(struct bloom * bloom, const void * buffer, int len)
{
return bloom_check_add(bloom, buffer, len, 1);
}
void bloom_print(struct bloom * bloom)
{
(void)printf("bloom at %p\n", (void *)bloom);
(void)printf(" ->entries = %d\n", bloom->entries);
(void)printf(" ->error = %f\n", bloom->error);
(void)printf(" ->bits = %d\n", bloom->bits);
(void)printf(" ->bits per elem = %f\n", bloom->bpe);
(void)printf(" ->bytes = %d\n", bloom->bytes);
(void)printf(" ->buckets = %u\n", bloom->buckets);
(void)printf(" ->bucket_bytes = %u\n", bloom->bucket_bytes);
(void)printf(" ->bucket_bytes_exponent = %u\n",
bloom->bucket_bytes_exponent);
(void)printf(" ->bucket_bits_fast_mod_operand = 0%o\n",
bloom->bucket_bits_fast_mod_operand);
(void)printf(" ->hash functions = %d\n", bloom->hashes);
}
void bloom_free(struct bloom * bloom)
{
if (bloom->ready) {
free(bloom->bf);
}
bloom->ready = 0;
}
const char * bloom_version()
{
return MAKESTRING(BLOOM_VERSION);
}

199
src/bloom.h Executable file
View File

@ -0,0 +1,199 @@
/*
* Copyright (c) 2012-2015, Jyri J. Virkki
* All rights reserved.
*
* This file is under BSD license. See LICENSE file.
*/
#ifndef _BLOOM_H
#define _BLOOM_H
// TODO
#define BLOOM_FILTER_ERROR_RATE (0.001)
/** ***************************************************************************
* On Linux, the code attempts to compute a bucket size based on CPU cache
* size info, if available. If that fails for any reason, this fallback size
* is used instead.
*
* On non-Linux systems, this is the bucket size always used unless the
* caller overrides it (see bloom_init_size()).
*
*/
#define BLOOM_BUCKET_SIZE_FALLBACK (32 * 1024)
/** ***************************************************************************
* It was found that using multiplier x0.5 for CPU L1 cache size is
* more effective in terms of CPU usage and, surprisingly, collisions
* number.
*
* Feel free to tune this constant the way it will work for you.
*
*/
#define BLOOM_L1_CACHE_SIZE_DIV 1
/** ***************************************************************************
* Structure to keep track of one bloom filter. Caller needs to
* allocate this and pass it to the functions below. First call for
* every struct must be to bloom_init().
*
*/
struct bloom
{
// These fields are part of the public interface of this structure.
// Client code may read these values if desired. Client code MUST NOT
// modify any of these.
int entries;
double error;
int bits;
int bytes;
int hashes;
// Fields below are private to the implementation. These may go away or
// change incompatibly at any moment. Client code MUST NOT access or rely
// on these.
unsigned buckets;
unsigned bucket_bytes;
// x86 CPU divide by/multiply by operation optimization helpers
unsigned bucket_bytes_exponent;
unsigned bucket_bits_fast_mod_operand;
double bpe;
int ready;
unsigned char bf[];
};
typedef struct bloom bloom_t;
// TODO
int bloom_filter_size(int entries, double error);
/** ***************************************************************************
* Initialize the bloom filter for use.
*
* The filter is initialized with a bit field and number of hash functions
* according to the computations from the wikipedia entry:
* http://en.wikipedia.org/wiki/Bloom_filter
*
* Optimal number of bits is:
* bits = (entries * ln(error)) / ln(2)^2
*
* Optimal number of hash functions is:
* hashes = bpe * ln(2)
*
* Parameters:
* -----------
* bloom - Pointer to an allocated struct bloom (see above).
* entries - The expected number of entries which will be inserted.
* error - Probability of collision (as long as entries are not
* exceeded).
*
* Return:
* -------
* 0 - on success
* 1 - on failure
*
*/
int bloom_init(struct bloom * bloom, int entries); //, double error);
/** ***************************************************************************
* Initialize the bloom filter for use.
*
* See comments above for general information.
*
* This is the same as bloom_init() but allows the caller to pass in a
* cache_size to override the internal value (which is either computed
* or the default of BLOOM_BUCKET_SIZE_FALLBACK). Mostly useful for
* experimenting.
*
* See misc/bucketsize for a script which can help identify a good value
* for cache_size.
*
*/
int bloom_init_size(struct bloom * bloom, int entries, double error,
unsigned int cache_size);
/** ***************************************************************************
* Check if the given element is in the bloom filter. Remember this may
* return false positive if a collision occured.
*
* Parameters:
* -----------
* bloom - Pointer to an allocated struct bloom (see above).
* buffer - Pointer to buffer containing element to check.
* len - Size of 'buffer'.
*
* Return:
* -------
* 0 - element is not present
* 1 - element is present (or false positive due to collision)
* -1 - bloom not initialized
*
*/
int bloom_check(struct bloom * bloom, const void * buffer, int len);
/** ***************************************************************************
* Add the given element to the bloom filter.
* The return code indicates if the element (or a collision) was already in,
* so for the common check+add use case, no need to call check separately.
*
* Parameters:
* -----------
* bloom - Pointer to an allocated struct bloom (see above).
* buffer - Pointer to buffer containing element to add.
* len - Size of 'buffer'.
*
* Return:
* -------
* 0 - element was not present and was added
* 1 - element (or a collision) had already been added previously
* -1 - bloom not initialized
*
*/
int bloom_add(struct bloom * bloom, const void * buffer, int len);
/** ***************************************************************************
* Print (to stdout) info about this bloom filter. Debugging aid.
*
*/
void bloom_print(struct bloom * bloom);
/** ***************************************************************************
* Deallocate internal storage.
*
* Upon return, the bloom struct is no longer usable. You may call bloom_init
* again on the same struct to reinitialize it again.
*
* Parameters:
* -----------
* bloom - Pointer to an allocated struct bloom (see above).
*
* Return: none
*
*/
void bloom_free(struct bloom * bloom);
/** ***************************************************************************
* Returns version string compiled into library.
*
* Return: version string
*
*/
const char * bloom_version();
#endif

80
src/char_str_indexer.c Normal file
View File

@ -0,0 +1,80 @@
/****************************************************************************
* Character string indexing functions *
****************************************************************************/
/**
* @file char_str_indexer.c
* @author Celine Mercier
* @date April 12th 2016
* @brief Functions handling the indexing and retrieval of character strings.
*/
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include "obiblob.h"
#include "obiblob_indexer.h"
#include "obidebug.h"
#include "obitypes.h"
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
Obi_blob_p obi_str_to_blob(const char* value)
{
Obi_blob_p value_b;
int32_t length;
// Compute the number of bytes on which the value will be encoded
length = strlen(value) + 1; // +1 to store \0 at the end (makes retrieving faster)
value_b = obi_blob((byte_t*)value, ELEMENT_SIZE_STR, length, length);
if (value_b == NULL)
{
obidebug(1, "\nError encoding a character string in a blob");
return NULL;
}
return value_b;
}
char* obi_blob_to_str(Obi_blob_p value_b)
{
return value_b->value;
}
index_t obi_index_char_str(Obi_indexer_p indexer, const char* value)
{
Obi_blob_p value_b;
index_t idx;
// Encode value
value_b = obi_str_to_blob(value);
if (value_b == NULL)
return -1;
// Add in the indexer
idx = obi_indexer_add(indexer, value_b);
free(value_b);
return idx;
}
char* obi_retrieve_char_str(Obi_indexer_p indexer, index_t idx)
{
Obi_blob_p value_b;
// Get encoded value
value_b = obi_indexer_get(indexer, idx);
// Return decoded character string
return obi_blob_to_str(value_b);
}

61
src/char_str_indexer.h Normal file
View File

@ -0,0 +1,61 @@
/****************************************************************************
* DNA sequence indexer header file *
****************************************************************************/
/**
* @file dna_seq_indexer.h
* @author Celine Mercier
* @date April 12th 2016
* @brief Header file for the functions handling the indexing of DNA sequences.
*/
#ifndef CHAR_STR_INDEXER_H_
#define CHAR_STR_INDEXER_H_
#include <stdlib.h>
#include <stdio.h>
#include "obitypes.h"
#include "obiblob.h"
#include "obiblob_indexer.h"
/**
* @brief Converts a character string to a blob.
*
* @warning The blob must be freed by the caller.
*
* @param value The character string to convert.
*
* @returns A pointer to the blob created.
* @retval NULL if an error occurred.
*
* @since October 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
Obi_blob_p obi_str_to_blob(char* value);
/**
* @brief Converts a blob to a character string.
*
* @param value_b The blob to convert.
*
* @returns A pointer to the character string contained in the blob.
*
* @since October 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
const char* obi_blob_to_str(Obi_blob_p value_b);
// TODO doc
index_t obi_index_char_str(Obi_indexer_p indexer, const char* value);
char* obi_retrieve_char_str(Obi_indexer_p indexer, index_t idx);
#endif /* CHAR_STR_INDEXER_H_ */

198
src/crc64.c Normal file
View File

@ -0,0 +1,198 @@
/* Redis uses the CRC64 variant with "Jones" coefficients and init value of 0.
*
* Specification of this CRC64 variant follows:
* Name: crc-64-jones
* Width: 64 bites
* Poly: 0xad93d23594c935a9
* Reflected In: True
* Xor_In: 0xffffffffffffffff
* Reflected_Out: True
* Xor_Out: 0x0
* Check("123456789"): 0xe9c6d914c4b8d9ca
*
* Copyright (c) 2012, Salvatore Sanfilippo <antirez at gmail dot com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Redis nor the names of its contributors may be used
* to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE. */
#include <stdint.h>
static const uint64_t crc64_tab[256] = {
UINT64_C(0x0000000000000000), UINT64_C(0x7ad870c830358979),
UINT64_C(0xf5b0e190606b12f2), UINT64_C(0x8f689158505e9b8b),
UINT64_C(0xc038e5739841b68f), UINT64_C(0xbae095bba8743ff6),
UINT64_C(0x358804e3f82aa47d), UINT64_C(0x4f50742bc81f2d04),
UINT64_C(0xab28ecb46814fe75), UINT64_C(0xd1f09c7c5821770c),
UINT64_C(0x5e980d24087fec87), UINT64_C(0x24407dec384a65fe),
UINT64_C(0x6b1009c7f05548fa), UINT64_C(0x11c8790fc060c183),
UINT64_C(0x9ea0e857903e5a08), UINT64_C(0xe478989fa00bd371),
UINT64_C(0x7d08ff3b88be6f81), UINT64_C(0x07d08ff3b88be6f8),
UINT64_C(0x88b81eabe8d57d73), UINT64_C(0xf2606e63d8e0f40a),
UINT64_C(0xbd301a4810ffd90e), UINT64_C(0xc7e86a8020ca5077),
UINT64_C(0x4880fbd87094cbfc), UINT64_C(0x32588b1040a14285),
UINT64_C(0xd620138fe0aa91f4), UINT64_C(0xacf86347d09f188d),
UINT64_C(0x2390f21f80c18306), UINT64_C(0x594882d7b0f40a7f),
UINT64_C(0x1618f6fc78eb277b), UINT64_C(0x6cc0863448deae02),
UINT64_C(0xe3a8176c18803589), UINT64_C(0x997067a428b5bcf0),
UINT64_C(0xfa11fe77117cdf02), UINT64_C(0x80c98ebf2149567b),
UINT64_C(0x0fa11fe77117cdf0), UINT64_C(0x75796f2f41224489),
UINT64_C(0x3a291b04893d698d), UINT64_C(0x40f16bccb908e0f4),
UINT64_C(0xcf99fa94e9567b7f), UINT64_C(0xb5418a5cd963f206),
UINT64_C(0x513912c379682177), UINT64_C(0x2be1620b495da80e),
UINT64_C(0xa489f35319033385), UINT64_C(0xde51839b2936bafc),
UINT64_C(0x9101f7b0e12997f8), UINT64_C(0xebd98778d11c1e81),
UINT64_C(0x64b116208142850a), UINT64_C(0x1e6966e8b1770c73),
UINT64_C(0x8719014c99c2b083), UINT64_C(0xfdc17184a9f739fa),
UINT64_C(0x72a9e0dcf9a9a271), UINT64_C(0x08719014c99c2b08),
UINT64_C(0x4721e43f0183060c), UINT64_C(0x3df994f731b68f75),
UINT64_C(0xb29105af61e814fe), UINT64_C(0xc849756751dd9d87),
UINT64_C(0x2c31edf8f1d64ef6), UINT64_C(0x56e99d30c1e3c78f),
UINT64_C(0xd9810c6891bd5c04), UINT64_C(0xa3597ca0a188d57d),
UINT64_C(0xec09088b6997f879), UINT64_C(0x96d1784359a27100),
UINT64_C(0x19b9e91b09fcea8b), UINT64_C(0x636199d339c963f2),
UINT64_C(0xdf7adabd7a6e2d6f), UINT64_C(0xa5a2aa754a5ba416),
UINT64_C(0x2aca3b2d1a053f9d), UINT64_C(0x50124be52a30b6e4),
UINT64_C(0x1f423fcee22f9be0), UINT64_C(0x659a4f06d21a1299),
UINT64_C(0xeaf2de5e82448912), UINT64_C(0x902aae96b271006b),
UINT64_C(0x74523609127ad31a), UINT64_C(0x0e8a46c1224f5a63),
UINT64_C(0x81e2d7997211c1e8), UINT64_C(0xfb3aa75142244891),
UINT64_C(0xb46ad37a8a3b6595), UINT64_C(0xceb2a3b2ba0eecec),
UINT64_C(0x41da32eaea507767), UINT64_C(0x3b024222da65fe1e),
UINT64_C(0xa2722586f2d042ee), UINT64_C(0xd8aa554ec2e5cb97),
UINT64_C(0x57c2c41692bb501c), UINT64_C(0x2d1ab4dea28ed965),
UINT64_C(0x624ac0f56a91f461), UINT64_C(0x1892b03d5aa47d18),
UINT64_C(0x97fa21650afae693), UINT64_C(0xed2251ad3acf6fea),
UINT64_C(0x095ac9329ac4bc9b), UINT64_C(0x7382b9faaaf135e2),
UINT64_C(0xfcea28a2faafae69), UINT64_C(0x8632586aca9a2710),
UINT64_C(0xc9622c4102850a14), UINT64_C(0xb3ba5c8932b0836d),
UINT64_C(0x3cd2cdd162ee18e6), UINT64_C(0x460abd1952db919f),
UINT64_C(0x256b24ca6b12f26d), UINT64_C(0x5fb354025b277b14),
UINT64_C(0xd0dbc55a0b79e09f), UINT64_C(0xaa03b5923b4c69e6),
UINT64_C(0xe553c1b9f35344e2), UINT64_C(0x9f8bb171c366cd9b),
UINT64_C(0x10e3202993385610), UINT64_C(0x6a3b50e1a30ddf69),
UINT64_C(0x8e43c87e03060c18), UINT64_C(0xf49bb8b633338561),
UINT64_C(0x7bf329ee636d1eea), UINT64_C(0x012b592653589793),
UINT64_C(0x4e7b2d0d9b47ba97), UINT64_C(0x34a35dc5ab7233ee),
UINT64_C(0xbbcbcc9dfb2ca865), UINT64_C(0xc113bc55cb19211c),
UINT64_C(0x5863dbf1e3ac9dec), UINT64_C(0x22bbab39d3991495),
UINT64_C(0xadd33a6183c78f1e), UINT64_C(0xd70b4aa9b3f20667),
UINT64_C(0x985b3e827bed2b63), UINT64_C(0xe2834e4a4bd8a21a),
UINT64_C(0x6debdf121b863991), UINT64_C(0x1733afda2bb3b0e8),
UINT64_C(0xf34b37458bb86399), UINT64_C(0x8993478dbb8deae0),
UINT64_C(0x06fbd6d5ebd3716b), UINT64_C(0x7c23a61ddbe6f812),
UINT64_C(0x3373d23613f9d516), UINT64_C(0x49aba2fe23cc5c6f),
UINT64_C(0xc6c333a67392c7e4), UINT64_C(0xbc1b436e43a74e9d),
UINT64_C(0x95ac9329ac4bc9b5), UINT64_C(0xef74e3e19c7e40cc),
UINT64_C(0x601c72b9cc20db47), UINT64_C(0x1ac40271fc15523e),
UINT64_C(0x5594765a340a7f3a), UINT64_C(0x2f4c0692043ff643),
UINT64_C(0xa02497ca54616dc8), UINT64_C(0xdafce7026454e4b1),
UINT64_C(0x3e847f9dc45f37c0), UINT64_C(0x445c0f55f46abeb9),
UINT64_C(0xcb349e0da4342532), UINT64_C(0xb1eceec59401ac4b),
UINT64_C(0xfebc9aee5c1e814f), UINT64_C(0x8464ea266c2b0836),
UINT64_C(0x0b0c7b7e3c7593bd), UINT64_C(0x71d40bb60c401ac4),
UINT64_C(0xe8a46c1224f5a634), UINT64_C(0x927c1cda14c02f4d),
UINT64_C(0x1d148d82449eb4c6), UINT64_C(0x67ccfd4a74ab3dbf),
UINT64_C(0x289c8961bcb410bb), UINT64_C(0x5244f9a98c8199c2),
UINT64_C(0xdd2c68f1dcdf0249), UINT64_C(0xa7f41839ecea8b30),
UINT64_C(0x438c80a64ce15841), UINT64_C(0x3954f06e7cd4d138),
UINT64_C(0xb63c61362c8a4ab3), UINT64_C(0xcce411fe1cbfc3ca),
UINT64_C(0x83b465d5d4a0eece), UINT64_C(0xf96c151de49567b7),
UINT64_C(0x76048445b4cbfc3c), UINT64_C(0x0cdcf48d84fe7545),
UINT64_C(0x6fbd6d5ebd3716b7), UINT64_C(0x15651d968d029fce),
UINT64_C(0x9a0d8ccedd5c0445), UINT64_C(0xe0d5fc06ed698d3c),
UINT64_C(0xaf85882d2576a038), UINT64_C(0xd55df8e515432941),
UINT64_C(0x5a3569bd451db2ca), UINT64_C(0x20ed197575283bb3),
UINT64_C(0xc49581ead523e8c2), UINT64_C(0xbe4df122e51661bb),
UINT64_C(0x3125607ab548fa30), UINT64_C(0x4bfd10b2857d7349),
UINT64_C(0x04ad64994d625e4d), UINT64_C(0x7e7514517d57d734),
UINT64_C(0xf11d85092d094cbf), UINT64_C(0x8bc5f5c11d3cc5c6),
UINT64_C(0x12b5926535897936), UINT64_C(0x686de2ad05bcf04f),
UINT64_C(0xe70573f555e26bc4), UINT64_C(0x9ddd033d65d7e2bd),
UINT64_C(0xd28d7716adc8cfb9), UINT64_C(0xa85507de9dfd46c0),
UINT64_C(0x273d9686cda3dd4b), UINT64_C(0x5de5e64efd965432),
UINT64_C(0xb99d7ed15d9d8743), UINT64_C(0xc3450e196da80e3a),
UINT64_C(0x4c2d9f413df695b1), UINT64_C(0x36f5ef890dc31cc8),
UINT64_C(0x79a59ba2c5dc31cc), UINT64_C(0x037deb6af5e9b8b5),
UINT64_C(0x8c157a32a5b7233e), UINT64_C(0xf6cd0afa9582aa47),
UINT64_C(0x4ad64994d625e4da), UINT64_C(0x300e395ce6106da3),
UINT64_C(0xbf66a804b64ef628), UINT64_C(0xc5bed8cc867b7f51),
UINT64_C(0x8aeeace74e645255), UINT64_C(0xf036dc2f7e51db2c),
UINT64_C(0x7f5e4d772e0f40a7), UINT64_C(0x05863dbf1e3ac9de),
UINT64_C(0xe1fea520be311aaf), UINT64_C(0x9b26d5e88e0493d6),
UINT64_C(0x144e44b0de5a085d), UINT64_C(0x6e963478ee6f8124),
UINT64_C(0x21c640532670ac20), UINT64_C(0x5b1e309b16452559),
UINT64_C(0xd476a1c3461bbed2), UINT64_C(0xaeaed10b762e37ab),
UINT64_C(0x37deb6af5e9b8b5b), UINT64_C(0x4d06c6676eae0222),
UINT64_C(0xc26e573f3ef099a9), UINT64_C(0xb8b627f70ec510d0),
UINT64_C(0xf7e653dcc6da3dd4), UINT64_C(0x8d3e2314f6efb4ad),
UINT64_C(0x0256b24ca6b12f26), UINT64_C(0x788ec2849684a65f),
UINT64_C(0x9cf65a1b368f752e), UINT64_C(0xe62e2ad306bafc57),
UINT64_C(0x6946bb8b56e467dc), UINT64_C(0x139ecb4366d1eea5),
UINT64_C(0x5ccebf68aecec3a1), UINT64_C(0x2616cfa09efb4ad8),
UINT64_C(0xa97e5ef8cea5d153), UINT64_C(0xd3a62e30fe90582a),
UINT64_C(0xb0c7b7e3c7593bd8), UINT64_C(0xca1fc72bf76cb2a1),
UINT64_C(0x45775673a732292a), UINT64_C(0x3faf26bb9707a053),
UINT64_C(0x70ff52905f188d57), UINT64_C(0x0a2722586f2d042e),
UINT64_C(0x854fb3003f739fa5), UINT64_C(0xff97c3c80f4616dc),
UINT64_C(0x1bef5b57af4dc5ad), UINT64_C(0x61372b9f9f784cd4),
UINT64_C(0xee5fbac7cf26d75f), UINT64_C(0x9487ca0fff135e26),
UINT64_C(0xdbd7be24370c7322), UINT64_C(0xa10fceec0739fa5b),
UINT64_C(0x2e675fb4576761d0), UINT64_C(0x54bf2f7c6752e8a9),
UINT64_C(0xcdcf48d84fe75459), UINT64_C(0xb71738107fd2dd20),
UINT64_C(0x387fa9482f8c46ab), UINT64_C(0x42a7d9801fb9cfd2),
UINT64_C(0x0df7adabd7a6e2d6), UINT64_C(0x772fdd63e7936baf),
UINT64_C(0xf8474c3bb7cdf024), UINT64_C(0x829f3cf387f8795d),
UINT64_C(0x66e7a46c27f3aa2c), UINT64_C(0x1c3fd4a417c62355),
UINT64_C(0x935745fc4798b8de), UINT64_C(0xe98f353477ad31a7),
UINT64_C(0xa6df411fbfb21ca3), UINT64_C(0xdc0731d78f8795da),
UINT64_C(0x536fa08fdfd90e51), UINT64_C(0x29b7d047efec8728),
};
uint64_t crc64(const char* s, uint64_t l)
{
uint64_t j;
uint64_t crc = 0;
for (j = 0; j < l; j++)
{
uint8_t byte = s[j];
crc = crc64_tab[(uint8_t)crc ^ byte] ^ (crc >> 8);
}
return crc;
}
/* Test main */
//#ifdef TEST_MAIN
//#include <stdio.h>
//int main(void) {
// printf("e9c6d914c4b8d9ca == %016llx\n",
// (unsigned long long) crc64(0,(unsigned char*)"123456789",9));
// return 0;
//}
//#endif

9
src/crc64.h Normal file
View File

@ -0,0 +1,9 @@
/**
* @file crc64.h
* @date March 24th 2016
* @brief Header file for CRC64 function.
*/
#include <stdint.h>
uint64_t crc64(const char* s, uint64_t l);

102
src/dna_seq_indexer.c Normal file
View File

@ -0,0 +1,102 @@
/****************************************************************************
* DNA sequence indexing functions *
****************************************************************************/
/**
* @file dna_seq_indexer.c
* @author Celine Mercier
* @date April 12th 2016
* @brief Functions handling the indexing and retrieval of DNA sequences.
*/
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include "obiblob.h"
#include "obiblob_indexer.h"
#include "obidebug.h"
#include "obitypes.h"
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
Obi_blob_p obi_seq_to_blob(const char* seq)
{
Obi_blob_p value_b;
int32_t length_encoded_seq; // length of the encoded sequence in bytes
int32_t seq_length;
byte_t* encoded_seq;
seq_length = strlen(seq);
// Check if just ATGC and encode accordingly
if (only_ATGC(seq))
{
// Compute the length (in bytes) of the encoded sequence
length_encoded_seq = ceil((double) seq_length / (double) 4.0);
// Encode
encoded_seq = encode_seq_on_2_bits(seq, seq_length);
if (encoded_seq == NULL)
return NULL;
value_b = obi_blob(encoded_seq, ELEMENT_SIZE_SEQ_2, length_encoded_seq, seq_length);
}
else
{
// Compute the length (in bytes) of the encoded sequence
length_encoded_seq = ceil((double) seq_length / (double) 2.0);
// Encode
encoded_seq = encode_seq_on_4_bits(seq, seq_length);
if (encoded_seq == NULL)
return NULL;
value_b = obi_blob(encoded_seq, ELEMENT_SIZE_SEQ_4, length_encoded_seq, seq_length);
}
free(encoded_seq);
return value_b;
}
char* obi_blob_to_seq(Obi_blob_p value_b)
{
// Decode
if (value_b->element_size == 2)
return decode_seq_on_2_bits(value_b->value, value_b->length_decoded_value);
else
return decode_seq_on_4_bits(value_b->value, value_b->length_decoded_value);
}
index_t obi_index_dna_seq(Obi_indexer_p indexer, const char* value)
{
Obi_blob_p value_b;
index_t idx;
// Encode value
value_b = obi_seq_to_blob(value);
if (value_b == NULL)
return -1;
// Add in the indexer
idx = obi_indexer_add(indexer, value_b);
free(value_b);
return idx;
}
char* obi_retrieve_dna_seq(Obi_indexer_p indexer, index_t idx)
{
Obi_blob_p value_b;
// Get encoded value
value_b = obi_indexer_get(indexer, idx);
// Return decoded sequence
return obi_blob_to_seq(value_b);
}

63
src/dna_seq_indexer.h Normal file
View File

@ -0,0 +1,63 @@
/****************************************************************************
* DNA sequence indexer header file *
****************************************************************************/
/**
* @file dna_seq_indexer.h
* @author Celine Mercier
* @date April 12th 2016
* @brief Header file for the functions handling the indexing of DNA sequences.
*/
#ifndef DNA_SEQ_INDEXER_H_
#define DNA_SEQ_INDEXER_H_
#include <stdlib.h>
#include <stdio.h>
#include "obidms.h"
#include "obitypes.h"
#include "obiblob.h"
#include "obiblob_indexer.h"
/**
* @brief Converts a DNA sequence to a blob.
*
* @warning The blob must be freed by the caller.
*
* @param value The DNA sequence to convert.
*
* @returns A pointer to the blob created.
* @retval NULL if an error occurred.
*
* @since November 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
Obi_blob_p obi_seq_to_blob(const char* seq);
/**
* @brief Converts a blob to a DNA sequence.
*
* @param value_b The blob to convert.
*
* @returns A pointer to the DNA sequence contained in the blob.
* @retval NULL if an error occurred.
*
* @since November 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
char* obi_blob_to_seq(Obi_blob_p value_b);
// TODO doc
index_t obi_index_dna_seq(Obi_indexer_p indexer, const char* value);
char* obi_retrieve_dna_seq(Obi_indexer_p indexer, index_t idx);
#endif /* DNA_SEQ_INDEXER_H_ */

373
src/encode.c Normal file
View File

@ -0,0 +1,373 @@
/****************************************************************************
* Encoding functions *
****************************************************************************/
/**
* @file encode.c
* @author Celine Mercier
* @date November 18th 2015
* @brief Functions encoding DNA sequences.
*/
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <math.h>
#include "encode.h"
#include "obierrno.h"
#include "obitypes.h" // For byte_t type
#include "obidebug.h"
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
// TODO: endianness problem?
bool only_ATGC(const char* seq)
{
const char* c = seq;
while (*c)
{
if (!((*c == 'A') || \
(*c == 'T') || \
(*c == 'G') || \
(*c == 'C') || \
(*c == 'a') || \
(*c == 't') || \
(*c == 'g') || \
(*c == 'c')))
{
return 0;
}
else
{
c++;
}
}
return 1;
}
byte_t* encode_seq_on_2_bits(const char* seq, int32_t length)
{
byte_t* seq_b;
uint8_t modulo;
int32_t length_b;
int32_t i;
length_b = ceil((double) length / (double) 4.0);
seq_b = (byte_t*) malloc(length_b * sizeof(byte_t));
if (seq_b == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR); // TODO
obidebug(1, "\nError allocating memory for an encoded DNA sequence");
return NULL;
}
// Initialize all the bits to 0
memset(seq_b, 0, length_b);
for (i=0; i<length; i++)
{
// Shift of 2 to make place for new nucleotide
seq_b[i/4] <<= 2;
// Add new nucleotide
switch (seq[i])
{
case 'a':
case 'A':
seq_b[i/4] |= NUC_A_2b;
break;
case 'c':
case 'C':
seq_b[i/4] |= NUC_C_2b;
break;
case 'g':
case 'G':
seq_b[i/4] |= NUC_G_2b;
break;
case 't':
case 'T':
seq_b[i/4] |= NUC_T_2b;
break;
default:
obi_set_errno(OBI_ENCODE_ERROR); // TODO
obidebug(1, "\nInvalid nucleotide base when encoding (not [atgcATGC])");
return NULL;
}
}
// Final shift for the last byte if needed
modulo = (length % 4);
if (modulo)
seq_b[(i-1)/4] <<= (2*(4 - modulo));
return seq_b;
}
char* decode_seq_on_2_bits(byte_t* seq_b, int32_t length_seq)
{
char* seq;
int32_t i;
uint8_t shift;
uint8_t mask;
uint8_t nuc;
seq = (char*) malloc((length_seq+1) * sizeof(char));
if (seq == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR); // TODO
obidebug(1, "\nError allocating memory for a decoded DNA sequence");
return NULL;
}
for (i=0; i<length_seq; i++)
{
shift = 6 - 2*(i % 4);
mask = NUC_MASK_2B << shift;
nuc = (seq_b[i/4] & mask) >> shift;
switch (nuc)
{
case NUC_A_2b:
seq[i] = 'a';
break;
case NUC_C_2b:
seq[i] = 'c';
break;
case NUC_G_2b:
seq[i] = 'g';
break;
case NUC_T_2b:
seq[i] = 't';
break;
default:
obi_set_errno(OBI_DECODE_ERROR); // TODO
obidebug(1, "\nInvalid nucleotide base when decoding");
return NULL;
}
}
seq[length_seq] = '\0';
return seq;
}
byte_t* encode_seq_on_4_bits(const char* seq, int32_t length)
{
byte_t* seq_b;
uint8_t modulo;
int32_t length_b;
int32_t i;
length_b = ceil((double) length / (double) 2.0);
seq_b = (byte_t*) malloc(length_b * sizeof(byte_t));
if (seq_b == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR); // TODO
obidebug(1, "\nError allocating memory for an encoded DNA sequence");
return NULL;
}
// Initialize all the bits to 0
memset(seq_b, 0, length_b);
for (i=0; i<length; i++)
{
// Shift of 4 to make place for new nucleotide
seq_b[i/2] <<= 4;
// Add new nucleotide
switch (seq[i])
{
case 'a':
case 'A':
seq_b[i/2] |= NUC_A_4b;
break;
case 'c':
case 'C':
seq_b[i/2] |= NUC_C_4b;
break;
case 'g':
case 'G':
seq_b[i/2] |= NUC_G_4b;
break;
case 't':
case 'T':
seq_b[i/2] |= NUC_T_4b;
break;
case 'r':
case 'R':
seq_b[i/2] |= NUC_R_4b;
break;
case 'y':
case 'Y':
seq_b[i/2] |= NUC_Y_4b;
break;
case 's':
case 'S':
seq_b[i/2] |= NUC_S_4b;
break;
case 'w':
case 'W':
seq_b[i/2] |= NUC_W_4b;
break;
case 'k':
case 'K':
seq_b[i/2] |= NUC_K_4b;
break;
case 'm':
case 'M':
seq_b[i/2] |= NUC_M_4b;
break;
case 'b':
case 'B':
seq_b[i/2] |= NUC_B_4b;
break;
case 'd':
case 'D':
seq_b[i/2] |= NUC_D_4b;
break;
case 'h':
case 'H':
seq_b[i/2] |= NUC_H_4b;
break;
case 'v':
case 'V':
seq_b[i/2] |= NUC_V_4b;
break;
case 'n':
case 'N':
seq_b[i/2] |= NUC_N_4b;
break;
default:
obi_set_errno(OBI_ENCODE_ERROR); // TODO
obidebug(1, "\nInvalid nucleotide base when encoding (not IUPAC)");
return NULL;
}
}
// Final shift for the last byte if needed
modulo = (length % 2);
if (modulo)
seq_b[(i-1)/2] <<= (4*modulo);
return seq_b;
}
char* decode_seq_on_4_bits(byte_t* seq_b, int32_t length_seq)
{
char* seq;
int32_t i;
uint8_t shift;
uint8_t mask;
uint8_t nuc;
seq = (char*) malloc((length_seq+1) * sizeof(char));
if (seq == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR); // TODO
obidebug(1, "\nError allocating memory for a decoded DNA sequence");
return NULL;
}
for (i=0; i<length_seq; i++)
{
shift = 4 - 4*(i % 2);
mask = NUC_MASK_4B << shift;
nuc = (seq_b[i/2] & mask) >> shift;
switch (nuc)
{
case NUC_A_4b:
seq[i] = 'a';
break;
case NUC_C_4b:
seq[i] = 'c';
break;
case NUC_G_4b:
seq[i] = 'g';
break;
case NUC_T_4b:
seq[i] = 't';
break;
case NUC_R_4b:
seq[i] = 'r';
break;
case NUC_Y_4b:
seq[i] = 'y';
break;
case NUC_S_4b:
seq[i] = 's';
break;
case NUC_W_4b:
seq[i] = 'w';
break;
case NUC_K_4b:
seq[i] = 'k';
break;
case NUC_M_4b:
seq[i] = 'm';
break;
case NUC_B_4b:
seq[i] = 'b';
break;
case NUC_D_4b:
seq[i] = 'd';
break;
case NUC_H_4b:
seq[i] = 'h';
break;
case NUC_V_4b:
seq[i] = 'v';
break;
case NUC_N_4b:
seq[i] = 'n';
break;
default:
obi_set_errno(OBI_DECODE_ERROR); // TODO
obidebug(1, "\nInvalid nucleotide base when decoding");
return NULL;
}
}
seq[length_seq] = '\0';
return seq;
}
///////////////////// FOR DEBUGGING ///////////////////////////
//NOTE: The first byte is printed the first (at the left-most).
void print_bits(void* ptr, int32_t size)
{
uint8_t* b = (uint8_t*) ptr;
uint8_t byte;
int32_t i, j;
fprintf(stderr, "\n");
for (i=0;i<size;i++)
{
for (j=7;j>=0;j--)
{
byte = b[i] & (1<<j);
byte >>= j;
fprintf(stderr, "%u", byte);
}
fprintf(stderr, " ");
}
fprintf(stderr, "\n");
}

190
src/encode.h Normal file
View File

@ -0,0 +1,190 @@
/****************************************************************************
* Encoding header file *
****************************************************************************/
/**
* @file encode.h
* @author Celine Mercier
* @date November 18th 2015
* @brief Header file for encoding DNA sequences.
*/
#ifndef ENCODE_H_
#define ENCODE_H_
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <stdbool.h>
#include "obitypes.h"
#define NUC_MASK_2B 0x3 /**< Binary: 11 to use when decoding 2 bits sequences
*/
#define NUC_MASK_4B 0xF /**< Binary: 1111 to use when decoding 4 bits sequences
*/
/**
* @brief enum for the 2-bits codes for each of the 4 nucleotides.
*/
enum
{
NUC_A_2b = 0x0, /* binary: 00 */
NUC_C_2b = 0x1, /* binary: 01 */
NUC_G_2b = 0x2, /* binary: 10 */
NUC_T_2b = 0x3, /* binary: 11 */
};
/**
* @brief enum for the 4-bits codes for each of the 15 IUPAC nucleotides.
*/
enum
{
NUC_A_4b = 0x1, /* binary: 0001 */
NUC_C_4b = 0x2, /* binary: 0010 */
NUC_G_4b = 0x3, /* binary: 0011 */
NUC_T_4b = 0x4, /* binary: 0100 */
NUC_R_4b = 0x5, /* binary: 0101 */
NUC_Y_4b = 0x6, /* binary: 0110 */
NUC_S_4b = 0x7, /* binary: 0111 */
NUC_W_4b = 0x8, /* binary: 1000 */
NUC_K_4b = 0x9, /* binary: 1001 */
NUC_M_4b = 0xA, /* binary: 1010 */
NUC_B_4b = 0xB, /* binary: 1011 */
NUC_D_4b = 0xC, /* binary: 1100 */
NUC_H_4b = 0xD, /* binary: 1101 */
NUC_V_4b = 0xE, /* binary: 1110 */
NUC_N_4b = 0xF, /* binary: 1111 */
};
/**
* @brief Checks if there are only 'atgcATGC' characters in a
* character string.
*
* @param seq The sequence to check.
*
* @returns A boolean value indicating if there are only
* 'atgcATGC' characters in a character string.
*
* @since November 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
bool only_ATGC(const char* seq);
/**
* @brief Encodes a DNA sequence with each nucleotide coded on 2 bits.
*
* A or a : 00
* C or c : 01
* T or t : 10
* G or g : 11
*
* @warning The DNA sequence must contain only 'atgcATGC' characters.
*
* @param seq The sequence to encode.
* @param length The length of the sequence to encode.
*
* @returns The encoded sequence.
*
* @since November 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
byte_t* encode_seq_on_2_bits(const char* seq, int32_t length);
/**
* @brief Decodes a DNA sequence that is coded with each nucleotide on 2 bits.
*
* 00 -> a
* 01 -> c
* 10 -> t
* 11 -> g
*
* @param seq The sequence to decode.
* @param length_seq The initial length of the sequence before it was encoded.
*
* @returns The decoded sequence ended with '\0'.
*
* @since November 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
char* decode_seq_on_2_bits(byte_t* seq_b, int32_t length_seq);
/**
* @brief Encodes a DNA sequence with each nucleotide coded on 4 bits.
*
* A or a : 0001
* C or c : 0010
* G or g : 0011
* T or t : 0100
* R or r : 0101
* Y or y : 0110
* S or s : 0111
* W or w : 1000
* K or k : 1001
* M or m : 1010
* B or b : 1011
* D or d : 1100
* H or h : 1101
* V or v : 1110
* N or n : 1111
*
* @warning The DNA sequence must contain only IUPAC characters.
*
* @param seq The sequence to encode.
* @param length The length of the sequence to encode.
*
* @returns The encoded sequence.
*
* @since November 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
byte_t* encode_seq_on_4_bits(const char* seq, int32_t length);
/**
* @brief Decodes a DNA sequence that is coded with each nucleotide on 4 bits.
*
* A or a : 0001
* C or c : 0010
* G or g : 0011
* T or t : 0100
* R or r : 0101
* Y or y : 0110
* S or s : 0111
* W or w : 1000
* K or k : 1001
* M or m : 1010
* B or b : 1011
* D or d : 1100
* H or h : 1101
* V or v : 1110
* N or n : 1111
*
* @param seq The sequence to decode.
* @param length_seq The initial length of the sequence before it was encoded.
*
* @returns The decoded sequence ended with '\0'.
*
* @since November 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
char* decode_seq_on_4_bits(byte_t* seq_b, int32_t length_seq);
////////// FOR DEBUGGING ///////////
// little endian
void print_bits(void* ptr, int32_t length);
#endif /* ENCODE_H_ */

7
src/murmurhash2.h Executable file
View File

@ -0,0 +1,7 @@
#ifndef _BLOOM_MURMURHASH2
#define _BLOOM_MURMURHASH2
unsigned int murmurhash2(const void * key, int len, const unsigned int seed);
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,285 +0,0 @@
/****************************************************************************
* OBIDMS array header file *
****************************************************************************/
/**
* @file obiarray.h
* @author Celine Mercier
* @date October 19th 2015
* @brief Header file for handling arrays for storing and retrieving byte arrays (i.e. coding for character strings).
*/
#ifndef OBIARRAY_H_
#define OBIARRAY_H_
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <time.h>
#include <sys/types.h>
#include <dirent.h>
#include "obidms.h"
#include "obitypes.h"
#define ARRAY_MAX_NAME (2048) /**< The maximum length of an array name.
*/
#define ARRAY_GROWTH_FACTOR (2) /**< The growth factor when an array is enlarged.
*/
#define BYTE_ARRAY_HEADER_SIZE (5) /**< The size of the header of a byte array.
*/
typedef char byte_t; /**< Defining byte type since data is stored in bits
* and char (stored on one byte) is the smallest addressable unit.
*/
/**
* @brief OBIDMS array data header structure.
*/
typedef struct OBIDMS_array_data_header {
int header_size; /**< Size of the header in bytes.
*/
index_t data_size_used; /**< Size of the data used in bytes.
*/
index_t data_size_max; /**< Max size of the data in bytes.
*/
index_t nb_items; /**< Number of items.
*/
char array_name[ARRAY_MAX_NAME+1]; /**< The array name as a NULL terminated string.
*/
time_t creation_date; /**< Date of creation of the file.
*/
} OBIDMS_array_data_header_t, *OBIDMS_array_data_header_p;
/**
* @brief OBIDMS array data structure.
*/
typedef struct OBIDMS_array_data {
OBIDMS_array_data_header_p header; /**< A pointer to the header of the array data.
*/
byte_t* data; /**< A pointer to the beginning of the data.
*/
} OBIDMS_array_data_t, *OBIDMS_array_data_p;
/**
* @brief OBIDMS array header structure.
*/
typedef struct OBIDMS_array_header {
int header_size; /**< Size of the header in bytes.
*/
size_t array_size; /**< Size of the array in bytes.
*/
index_t nb_items; /**< Number of items in the array.
*/
index_t nb_items_max; /**< Maximum number of items in the array before it has to be enlarged.
*/
char array_name[ARRAY_MAX_NAME+1]; /**< The array name as a NULL terminated string.
*/
time_t creation_date; /**< Date of creation of the file.
*/
} OBIDMS_array_header_t, *OBIDMS_array_header_p;
/**
* @brief OBIDMS array structure.
*/
typedef struct OBIDMS_array {
OBIDMS_array_header_p header; /**< A pointer to the header of the array.
*/
index_t* first; /**< A pointer to the beginning of the array itself.
*/
OBIDMS_array_data_p data; /**< A pointer to the structure containing the data
* that the array references.
*/
DIR* directory; /**< A directory entry usable to
* refer and scan the array directory.
*/
} OBIDMS_array_t, *OBIDMS_array_p;
/**
* @brief Checks if an obiarray already exists or not.
*
* @param dms The OBIDMS to which the obiarray belongs.
* @param array_name The name of the obiarray.
*
* @returns A value indicating whether the obiarray exists or not.
* @retval 1 if the obiarray exists.
* @retval 0 if the obiarray does not exist.
* @retval -1 if an error occurred.
*
* @since October 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_array_exists(OBIDMS_p dms, const char* array_name);
/**
* @brief Opens an obiarray and creates it if it does not already exist.
*
* Note: An obiarray is made of two files (referred to by two structures).
* One file contains the indices referring to the data, and the other
* file contains the data itself. The obiarray as a whole is referred
* to via the OBIDMS_array structure.
*
* @param dms The OBIDMS to which the obiarray belongs.
* @param array_name The name of the obiarray.
*
* @returns A pointer to the obiarray structure.
* @retval NULL if an error occurred.
*
* @since October 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
OBIDMS_array_p obi_array(OBIDMS_p dms, const char* array_name);
/**
* @brief Creates an obiarray. Fails if it already exists.
*
* Note: An obiarray is made of two files (referred to by two structures).
* One file contains the indices referring to the data, and the other
* file contains the data itself. The obiarray as a whole is referred
* to via the OBIDMS_array structure.
*
* @param dms The OBIDMS to which the obiarray belongs.
* @param array_name The name of the obiarray.
*
* @returns A pointer to the newly created obiarray structure.
* @retval NULL if an error occurred.
*
* @since October 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
OBIDMS_array_p obi_create_array(OBIDMS_p dms, const char* array_name);
/**
* @brief Opens an obiarray. Fails if it does not already exist.
*
* Note: An obiarray is made of two files (referred to by two structures).
* One file contains the indices referring to the data, and the other
* file contains the data itself. The obiarray as a whole is referred
* to via the OBIDMS_array structure.
*
* @param dms The OBIDMS to which the obiarray belongs.
* @param array_name The name of the obiarray.
*
* @returns A pointer to the obiarray structure.
* @retval NULL if an error occurred.
*
* @since October 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
OBIDMS_array_p obi_open_array(OBIDMS_p dms, const char* array_name);
/**
* @brief Closes an obiarray.
*
* Note: An obiarray is made of two files (referred to by two structures).
* One file contains the indices referring to the data, and the other
* file contains the data itself. The obiarray as a whole is referred
* to via the OBIDMS_array structure.
*
* @param array A pointer to the obiarray structure to close and free.
*
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
*
* @since October 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_close_array(OBIDMS_array_p array);
/**
* @brief Adds a value (byte array) in an obiarray, checking first if it is already in it.
*
* @warning The byte array to add must already be encoded and contain its header.
*
* @param array A pointer to the obiarray.
* @param value The byte array to add in the obiarray.
*
* @returns The index of the value, whether it was added or already in the obiarray.
* @retval -1 if an error occurred.
*
* @since October 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
index_t obi_array_add(OBIDMS_array_p array, byte_t* value);
/**
* @brief Recovers a value (byte array) in an obiarray.
*
* @warning The byte array recovered is encoded and contains its header.
*
* @param array A pointer to the obiarray.
* @param index The index of the value in the data array.
*
* @returns A pointer to the byte array recovered.
*
* @since October 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
byte_t* obi_array_get(OBIDMS_array_p array, index_t index);
/**
* @brief Searches a value (byte array) in an obiarray performing a binary search.
*
* @warning The byte array to search must already be encoded and contain its header.
*
* @param array A pointer to the obiarray.
* @param value The byte array to add in the obiarray.
*
* @returns If the value is found, its data index is returned.
* If the value is not found, the array index indicating where the value's data index
* should be in the array is returned in the form (- (index + 1)), as data indices in an
* obiarray are sorted according to the ascending order of the values (byte arrays) themselves.
*
* @since October 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
index_t obi_array_search(OBIDMS_array_p array, byte_t* value);
/**
* @brief Converts a character string to a byte array with a header.
*
* @warning The byte array must be freed by the caller.
*
* @param value The character string to convert.
*
* @returns A pointer to the byte array created.
* @retval NULL if an error occurred.
*
* @since October 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
byte_t* obi_str_to_obibytes(char* value);
/**
* @brief Converts a byte array to a character string.
*
* @param value_b The byte array to convert.
*
* @returns A pointer to the character string contained in the byte array.
*
* @since October 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
const char* obi_obibytes_to_str(byte_t* value_b);
#endif /* OBIARRAY_H_ */

2284
src/obiavl.c Normal file

File diff suppressed because it is too large Load Diff

404
src/obiavl.h Normal file
View File

@ -0,0 +1,404 @@
/****************************************************************************
* OBIDMS AVL tree header file *
****************************************************************************/
/**
* @file obiavl.h
* @author Celine Mercier
* @date December 3rd 2015
* @brief Header file for handling AVL trees for storing and retrieving blobs (i.e. coding for character strings).
*/
#ifndef OBIAVL_H_
#define OBIAVL_H_
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <time.h>
#include <sys/types.h>
#include <dirent.h>
#include <stdbool.h>
#include "obidms.h"
#include "obiblob.h"
#include "obitypes.h"
#include "bloom.h"
#include "utils.h"
#include "encode.h"
#define MAX_NB_OF_AVLS_IN_GROUP (100) /**< The maximum number of AVL trees in a group. // TODO discuss
*/
#define MAX_NODE_COUNT_PER_AVL (10000000) /**< The maximum number of nodes in an AVL tree.
* Only used to decide when to create a new AVL in a group, and to initialize the bloom filter // TODO discuss.
*/
#define MAX_DATA_SIZE_PER_AVL (1073741824) /**< The maximum size of the data referred to by an AVL tree in a group.
* Only used to decide when to create a new AVL in a group.
* Should not be greater than int32_t max (2,147,483,647), as indexes will have to be stored on 32 bits.
* Here 1073741824 B = 1 GB
*/
#define AVL_MAX_DEPTH (1024) /**< The maximum depth of an AVL tree. Used to save paths through the tree.
*/
#define AVL_MAX_NAME (1024) /**< The maximum length of an AVL tree name.
*/
#define AVL_GROWTH_FACTOR (2) /**< The growth factor when an AVL tree is enlarged.
*/
#define LEFT_CHILD(node) (avl->tree)+(node->left_child) /**< Pointer to the left child of a node in an AVL tree.
*/
#define RIGHT_CHILD(node) (avl->tree)+(node->right_child) /**< Pointer to the right child of a node in an AVL tree.
*/
/**
* @brief AVL tree node structure.
*/
typedef struct AVL_node {
index_t left_child; /**< Index of left less child node.
*/
index_t right_child; /**< Index of right greater child node.
*/
int8_t balance_factor; /**< Balance factor of the node.
*/
index_t value; /**< Index of the value associated with the node in the data array.
*/
uint64_t crc64; /**< Cyclic Redundancy Check code on 64 bits associated with the value.
*/
} AVL_node_t, *AVL_node_p;
/**
* @brief OBIDMS AVL tree data header structure.
*/
typedef struct OBIDMS_avl_data_header {
int header_size; /**< Size of the header in bytes.
*/
index_t data_size_used; /**< Size of the data used in bytes.
*/
index_t data_size_max; /**< Max size of the data in bytes.
*/
index_t nb_items; /**< Number of items.
*/
char avl_name[AVL_MAX_NAME+1]; /**< The AVL tree name as a NULL terminated string.
*/
time_t creation_date; /**< Date of creation of the file.
*/
} OBIDMS_avl_data_header_t, *OBIDMS_avl_data_header_p;
/**
* @brief OBIDMS AVL tree data structure.
*/
typedef struct OBIDMS_avl_data {
OBIDMS_avl_data_header_p header; /**< A pointer to the header of the AVL tree data.
*/
byte_t* data; /**< A pointer to the beginning of the data.
*/
int data_fd; /**< File descriptor of the file containing the data.
*/
} OBIDMS_avl_data_t, *OBIDMS_avl_data_p;
/**
* @brief OBIDMS AVL tree header structure.
*/
typedef struct OBIDMS_avl_header {
int header_size; /**< Size of the header in bytes.
*/
size_t avl_size; /**< Size of the AVL tree in bytes.
*/
index_t nb_items; /**< Number of items in the AVL tree.
*/
index_t nb_items_max; /**< Maximum number of items in the AVL tree before it has to be enlarged.
*/
index_t root_idx; /**< Index of the root of the AVL tree.
*/
char avl_name[AVL_MAX_NAME+1]; /**< The AVL tree name as a NULL terminated string.
*/
time_t creation_date; /**< Date of creation of the file.
*/
bloom_t bloom_filter; /**< Bloom filter associated with the AVL tree, enabling to know if a value
* might already be stored in the data associated with the tree.
*/
} OBIDMS_avl_header_t, *OBIDMS_avl_header_p;
/**
* @brief OBIDMS AVL tree structure.
* TODO doc
*/
typedef struct OBIDMS_avl {
OBIDMS_p dms; /**< A pointer to the OBIDMS structure to which the AVL tree belongs.
*/
OBIDMS_avl_header_p header; /**< A pointer to the header of the AVL tree.
*/
struct AVL_node* tree; /**< A pointer to the root of the AVL tree.
*/
index_t path_idx[AVL_MAX_DEPTH]; /**< The path taken to a node from the root as an array of node indices.
*/
int8_t path_dir[AVL_MAX_DEPTH]; /**< The path taken to a node from the root as an array of directions
* (0 for left, -1 for right).
*/
OBIDMS_avl_data_p data; /**< A pointer to the structure containing the data
* that the AVL tree references.
*/
DIR* directory; /**< A directory entry usable to
* refer and scan the AVL tree directory.
*/
int dir_fd; /**< The file descriptor of the directory entry
* usable to refer and scan the AVL tree directory.
*/
int avl_fd; /**< The file descriptor of the file containing the AVL tree.
*/
} OBIDMS_avl_t, *OBIDMS_avl_p;
/**
* @brief OBIDMS AVL tree group structure.
*/
typedef struct OBIDMS_avl_group {
OBIDMS_avl_p sub_avls[MAX_NB_OF_AVLS_IN_GROUP]; /**< Array containing the pointers to the AVL trees of the group.
*/
int current_avl_idx; /**< Index in the sub_avls array of the AVL tree currently being filled.
*/
char name[AVL_MAX_NAME+1]; /**< Base name of the AVL group. The AVL trees in it have names of the form basename_idx.
*/
OBIDMS_p dms; /**< Pointer to the OBIDMS structure to which the AVL group belongs.
*/
bool writable; /**< Indicates whether the AVL group is read-only or not.
*/
size_t counter; /**< Indicates by how many threads/programs (TODO) the AVL group is used.
*/
} OBIDMS_avl_group_t, *OBIDMS_avl_group_p;
/**
* @brief Checks if an AVL tree or AVL tree group already exists or not.
*
* @param dms The OBIDMS to which the AVL tree or AVL tree group belongs.
* @param avl_name The name of the AVL treeor the base name of the AVL tree group.
*
* @returns A value indicating whether the AVL tree or AVL tree group exists or not.
* @retval 1 if the AVL tree or AVL tree group exists.
* @retval 0 if the AVL tree or AVL tree group does not exist.
* @retval -1 if an error occurred.
*
* @since December 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_avl_exists(OBIDMS_p dms, const char* avl_name);
/**
* @brief Creates an AVL tree. Fails if it already exists.
*
* Note: An AVL tree is made of two files (referred to by two structures).
* One file contains the indices referring to the data, and the other
* file contains the data itself. The AVL tree as a whole is referred
* to via the OBIDMS_avl structure. An AVL tree is stored in a directory
* with the same name, or with the base name of the AVL group if it is
* part of an AVL group.
*
* @param dms The OBIDMS to which the AVL tree belongs.
* @param avl_name The name of the AVL tree.
* @param avl_idx The index of the AVL tree if it is part of an AVL group.
*
* @returns A pointer to the newly created AVL tree structure.
* @retval NULL if an error occurred.
*
* @since December 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name, int avl_idx);
/**
* @brief Opens an AVL tree. Fails if it does not already exist.
*
* Note: An AVL tree is made of two files (referred to by two structures).
* One file contains the indices referring to the data, and the other
* file contains the data itself. The AVL tree as a whole is referred
* to via the OBIDMS_avl structure.
*
* @param dms The OBIDMS to which the AVL tree belongs.
* @param avl_name The name of the AVL tree.
* @param avl_idx The index of the AVL tree if it is part of an AVL group.
*
* @returns A pointer to the AVL tree structure.
* @retval NULL if an error occurred.
*
* @since December 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name, int avl_idx);
/**
* @brief Opens an AVL tree group and creates it if it does not already exist.
*
* Note: An AVL tree group is composed of multiple AVL trees that all have the
* same base name, and an index differentiating them.
*
* @param dms The OBIDMS to which the AVL tree belongs.
* @param avl_name The base name of the AVL tree group.
*
* @returns A pointer to the AVL tree group structure.
* @retval NULL if an error occurred.
*
* @since April 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
OBIDMS_avl_group_p obi_avl_group(OBIDMS_p dms, const char* avl_name);
/**
* @brief Creates an AVL tree group.
*
* Note: An AVL tree group is composed of multiple AVL trees that all have the
* same base name, and an index differentiating them.
*
* @param dms The OBIDMS to which the AVL tree belongs.
* @param avl_name The base name of the AVL tree group.
*
* @returns A pointer to the AVL tree group structure.
* @retval NULL if an error occurred.
*
* @since April 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
OBIDMS_avl_group_p obi_create_avl_group(OBIDMS_p dms, const char* avl_name);
/**
* @brief Opens an AVL tree group.
*
* Note: An AVL tree group is composed of multiple AVL trees that all have the
* same base name, and an index differentiating them.
*
* @param dms The OBIDMS to which the AVL tree belongs.
* @param avl_name The base name of the AVL tree group.
*
* @returns A pointer to the AVL tree group structure.
* @retval NULL if an error occurred.
*
* @since April 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
OBIDMS_avl_group_p obi_open_avl_group(OBIDMS_p dms, const char* avl_name);
/**
* @brief Closes an AVL tree.
*
* @param avl A pointer to the AVL tree structure to close and free.
*
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
*
* @since December 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_close_avl(OBIDMS_avl_p avl);
/**
* @brief Closes an AVL tree group.
*
* @param avl_group A pointer to the AVL tree group structure to close and free.
*
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
*
* @since April 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_close_avl_group(OBIDMS_avl_group_p avl_group);
/**
* @brief Recovers a value (blob) in an AVL tree.
*
* @warning The blob recovered must be decoded to get the original value.
*
* @param avl A pointer to the AVL tree.
* @param index The index of the value in the data array.
*
* @returns A pointer to the blob recovered.
*
* @since December 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
Obi_blob_p obi_avl_get(OBIDMS_avl_p avl, index_t index);
/**
* @brief Adds a value (blob) in an AVL tree NOT checking first if it is already in it. // TODO to discuss
*
* @warning The value given must be already be encoded into a blob structure (Obi_blob_t).
*
* @param avl A pointer to the AVL tree.
* @param value The blob to add in the AVL tree.
*
* @returns The index of the value newly added in the AVL tree.
* @retval -1 if an error occurred.
*
* @since December 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
index_t obi_avl_add(OBIDMS_avl_p avl, Obi_blob_p value);
/**
* @brief Finds a value (blob) in an AVL tree.
*
* @warning The value given must be already be encoded into a blob structure (Obi_blob_t).
*
* @param avl A pointer to the AVL tree.
* @param value The blob to add in the AVL tree.
*
* @returns The data index of the value.
* @retval -1 if the value is not in the tree.
*
* @since December 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
index_t obi_avl_find(OBIDMS_avl_p avl, Obi_blob_p value);
/**
* @brief Recovers a value (blob) in an AVL tree.
*
* @warning The blob recovered must be decoded to get the original value.
*
* @param avl_group A pointer to the AVL tree.
* @param index The index of the value in the data array.
*
* @returns A pointer to the blob recovered.
*
* @since April 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
Obi_blob_p obi_avl_group_get(OBIDMS_avl_group_p avl_group, index_t idx);
/**
* @brief Adds a value (blob) in an AVL tree group, checking if it is already in it.
*
* @warning The value given must be already be encoded into a blob structure (Obi_blob_t).
*
* @param avl_group A pointer to the AVL tree group.
* @param value The blob to add in the AVL tree group.
*
* @returns The index of the value newly added in the AVL tree group.
* @retval -1 if an error occurred.
*
* @since April 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
index_t obi_avl_group_add(OBIDMS_avl_group_p avl_group, Obi_blob_p value);
#endif /* OBIAVL_H_ */

57
src/obiblob.c Normal file
View File

@ -0,0 +1,57 @@
/****************************************************************************
* Obiblob functions *
****************************************************************************/
/**
* @file obiblob.c
* @author Celine Mercier
* @date April 11th 2016
* @brief Functions handling Obiblob structures.
*/
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <math.h>
#include "obiblob.h"
#include "obierrno.h"
#include "obitypes.h" // For byte_t type
#include "obidebug.h"
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
// TODO: endianness problem?
Obi_blob_p obi_blob(byte_t* encoded_value, uint8_t element_size, int32_t length_encoded_value, int32_t length_decoded_value)
{
Obi_blob_p blob;
// Allocate the memory for the blob structure
blob = (Obi_blob_p) malloc(sizeof(Obi_blob_t) + length_encoded_value);
if (blob == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError allocating memory for a blob");
return NULL;
}
// Store the number of bits on which each element is encoded
blob->element_size = element_size;
// Store the length (in bytes) of the encoded value
blob->length_encoded_value = length_encoded_value;
// Store the initial length (in bytes) of the decoded value
blob->length_decoded_value = length_decoded_value;
// Store the encoded value
memcpy(blob->value, encoded_value, length_encoded_value);
return blob;
}

54
src/obiblob.h Normal file
View File

@ -0,0 +1,54 @@
/****************************************************************************
* Obiblob header file *
****************************************************************************/
/**
* @file obiblob.h
* @author Celine Mercier
* @date November 18th 2015
* @brief Header file for handling Obi_blob structures.
*/
#ifndef OBIBLOB_H_
#define OBIBLOB_H_
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include "obitypes.h"
#define ELEMENT_SIZE_STR (8) /**< The size of an element from a value of type character string.
*/
#define ELEMENT_SIZE_SEQ_2 (2) /**< The size of an element from a value of type DNA sequence encoded on 2 bits.
*/
#define ELEMENT_SIZE_SEQ_4 (4) /**< The size of an element from a value of type DNA sequence encoded on 4 bits.
*/
/**
* @brief Blob structure.
* TODO
*/
typedef struct Obi_blob {
uint8_t element_size; /**< Size in bits of one element from the value.
*/
int32_t length_encoded_value; /**< Length in bytes of the encoded value.
*/
int32_t length_decoded_value; /**< Length in bytes of the decoded value.
*/
byte_t value[]; /**< Encoded value.
*/
} Obi_blob_t, *Obi_blob_p;
// TODO doc
Obi_blob_p obi_blob(byte_t* encoded_value, uint8_t element_size, int32_t length_encoded_value, int32_t length_decoded_value);
#endif /* OBIBLOB_H_ */

37
src/obiblob_indexer.c Normal file
View File

@ -0,0 +1,37 @@
/****************************************************************************
* Obiblob functions *
****************************************************************************/
/**
* @file obiblob_indexer.c
* @author Celine Mercier
* @date April 12th 2016
* @brief Functions handling the indexing and retrieval of blob structures.
*/
#include <stdlib.h>
#include <stdio.h>
#include "obiblob_indexer.h"
#include "obidms.h"
#include "obiavl.h"
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
inline int obi_indexer_exists(OBIDMS_p dms, const char* name);
inline Obi_indexer_p obi_indexer(OBIDMS_p dms, const char* name);
inline Obi_indexer_p obi_create_indexer(OBIDMS_p dms, const char* name);
inline Obi_indexer_p obi_open_indexer(OBIDMS_p dms, const char* name);
inline int obi_close_indexer(Obi_indexer_p indexer);
inline index_t obi_indexer_add(Obi_indexer_p indexer, Obi_blob_p value);
inline Obi_blob_p obi_indexer_get(Obi_indexer_p indexer, index_t idx);

80
src/obiblob_indexer.h Normal file
View File

@ -0,0 +1,80 @@
/****************************************************************************
* Blob indexer header file *
****************************************************************************/
/**
* @file obiblob_indexer.h
* @author Celine Mercier
* @date April 12th 2016
* @brief Header file for the functions handling the indexing of values.
*/
#ifndef OBIBLOB_INDEXER_H_
#define OBIBLOB_INDEXER_H_
#include <stdlib.h>
#include <stdio.h>
#include "obidms.h"
#include "obiavl.h"
#include "obitypes.h"
#include "obiblob.h"
#define INDEXER_MAX_NAME AVL_MAX_NAME /**< Macro to refer to the maximum size of the name of an indexer structure.
*/
typedef struct OBIDMS_avl_group Obi_indexer; /**< Typedef to refer to the used indexer structure.
*/
typedef OBIDMS_avl_group_p Obi_indexer_p; /**< Typedef to refer to the pointer of the used indexer structure.
*/
// TODO doc
inline int obi_indexer_exists(OBIDMS_p dms, const char* name)
{
return obi_avl_exists(dms, name);
}
inline Obi_indexer_p obi_indexer(OBIDMS_p dms, const char* name)
{
return obi_avl_group(dms, name);
}
inline Obi_indexer_p obi_create_indexer(OBIDMS_p dms, const char* name)
{
return obi_create_avl_group(dms, name);
}
inline Obi_indexer_p obi_open_indexer(OBIDMS_p dms, const char* name)
{
return obi_open_avl_group(dms, name);
}
inline int obi_close_indexer(Obi_indexer_p indexer)
{
return obi_close_avl_group(indexer);
}
inline index_t obi_indexer_add(Obi_indexer_p indexer, Obi_blob_p value)
{
return obi_avl_group_add(indexer, value);
}
inline Obi_blob_p obi_indexer_get(Obi_indexer_p indexer, index_t idx)
{
return obi_avl_group_get(indexer, idx);
}
#endif /* OBIBLOB_INDEXER_H_ */

View File

@ -13,6 +13,7 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/types.h>
@ -22,7 +23,9 @@
#include "obierrno.h"
#include "obidebug.h"
#include "obidmscolumn.h"
#include "private_at_functions.h"
#include "obiblob_indexer.h"
#include "utils.h"
#include "obilittlebigman.h"
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
@ -54,6 +57,44 @@
static char* build_directory_name(const char* dms_name);
/**
* Internal function building the informations file name from an OBIDMS name.
*
* The function builds the file name for the informations file of an OBIDMS.
*
* @warning The returned pointer has to be freed by the caller.
*
* @param dms_name The name of the OBIDMS.
*
* @returns A pointer to the file name.
* @retval NULL if an error occurred.
*
* @since November 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
static char* build_infos_file_name(const char* dms_name);
/**
* Internal function creating the file containing basic informations on the OBIDMS.
*
* This file contains:
* - The endianness of the platform
*
* @warning The returned pointer has to be freed by the caller.
*
* @param dms_file_descriptor The file descriptor for the OBIDMS directory.
* @param dms_name The name of the OBIDMS.
*
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
*
* @since November 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int create_dms_infos_file(int dms_file_descriptor, const char* dms_name);
/************************************************************************
*
* D E F I N I T I O N O F T H E P R I V A T E F U N C T I O N S
@ -65,7 +106,8 @@ static char* build_directory_name(const char* dms_name)
char* directory_name;
// Build the database directory name
if (asprintf(&directory_name, "%s.obidms", dms_name) < 0)
directory_name = (char*) malloc((strlen(dms_name) + 8)*sizeof(char));
if (sprintf(directory_name, "%s.obidms", dms_name) < 0)
{
obi_set_errno(OBIDMS_MEMORY_ERROR);
obidebug(1, "\nProblem building an OBIDMS directory name");
@ -85,6 +127,75 @@ static char* build_directory_name(const char* dms_name)
}
static char* build_infos_file_name(const char* dms_name)
{
char* file_name;
// Build file name
file_name = (char*) malloc((strlen(dms_name) + 7)*sizeof(char));
if (sprintf(file_name, "%s_infos", dms_name) < 0)
{
obi_set_errno(OBIDMS_MEMORY_ERROR);
obidebug(1, "\nProblem building an informations file name");
return NULL;
}
return file_name;
}
int create_dms_infos_file(int dms_file_descriptor, const char* dms_name)
{
char* file_name;
int infos_file_descriptor;
off_t file_size;
bool little_endian;
file_size = sizeof(bool);
// Create file name
file_name = build_infos_file_name(dms_name);
if (file_name == NULL)
return -1;
// Create file
infos_file_descriptor = openat(dms_file_descriptor, file_name, O_RDWR | O_CREAT | O_EXCL, 0777);
if (infos_file_descriptor < 0)
{
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
obidebug(1, "\nError creating an informations file");
free(file_name);
return -1;
}
free(file_name);
// Truncate the infos file to the right size
if (ftruncate(infos_file_descriptor, file_size) < 0)
{
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
obidebug(1, "\nError truncating an informations file");
close(infos_file_descriptor);
return -1;
}
// Write endianness
little_endian = obi_is_little_endian();
if (write(infos_file_descriptor, &little_endian, sizeof(bool)) < ((ssize_t) sizeof(bool)))
{
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
obidebug(1, "\nError writing the endianness in an informations file");
close(infos_file_descriptor);
return -1;
}
// Close file
close(infos_file_descriptor);
return 0;
}
/**********************************************************************
*
* D E F I N I T I O N O F T H E P U B L I C F U N C T I O N S
@ -94,7 +205,7 @@ static char* build_directory_name(const char* dms_name)
int obi_dms_exists(const char* dms_name)
{
struct stat buffer;
char *directory_name;
char* directory_name;
int check_dir;
// Build and check the directory name
@ -128,7 +239,10 @@ OBIDMS_p obi_create_dms(const char* dms_name)
if (mkdir(directory_name, 00777) < 0)
{
if (errno == EEXIST)
{
obi_set_errno(OBIDMS_EXIST_ERROR);
obidebug(1, "\nAn OBIDMS directory with the same name already exists in this directory.");
}
else
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
obidebug(1, "\nProblem creating an OBIDMS directory");
@ -136,7 +250,7 @@ OBIDMS_p obi_create_dms(const char* dms_name)
return NULL;
}
// Get file descriptor of DMS directory to create the arrays directory
// Get file descriptor of DMS directory to create the indexer directory
dms_dir = opendir(directory_name);
if (dms_dir == NULL)
{
@ -145,25 +259,28 @@ OBIDMS_p obi_create_dms(const char* dms_name)
free(directory_name);
return NULL;
}
free(directory_name);
dms_file_descriptor = dirfd(dms_dir);
if (dms_file_descriptor < 0)
{
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
obidebug(1, "\nProblem getting the file descriptor of a newly created OBIDMS directory");
free(directory_name);
return NULL;
}
// Create the arrays directory
if (mkdirat(dms_file_descriptor, ARRAY_DIR_NAME, 00777) < 0)
// Create the indexer directory
if (mkdirat(dms_file_descriptor, INDEXER_DIR_NAME, 00777) < 0)
{
obi_set_errno(OBI_ARRAY_ERROR);
obidebug(1, "\nProblem creating an arrays directory");
free(directory_name);
obi_set_errno(OBI_INDEXER_ERROR);
obidebug(1, "\nProblem creating an indexer directory");
return NULL;
}
free(directory_name);
// Create the informations file
if (create_dms_infos_file(dms_file_descriptor, dms_name) < 0)
return NULL;
return obi_open_dms(dms_name);
}
@ -173,19 +290,36 @@ OBIDMS_p obi_open_dms(const char* dms_name)
{
OBIDMS_p dms;
char* directory_name;
DIR* directory;
int dms_file_descriptor;
char* infos_file_name;
int infos_file_descriptor;
bool little_endian_dms;
bool little_endian_platform;
dms = NULL;
// Allocate the data structure
dms = (OBIDMS_p) malloc(sizeof(OBIDMS_t));
if (dms == NULL)
{
obi_set_errno(OBIDMS_MEMORY_ERROR);
obidebug(1, "\nError allocating the memory for the OBIDMS structure");
return NULL;
}
// Build and check the directory name
directory_name = build_directory_name(dms_name);
if (directory_name == NULL)
{
free(dms);
return NULL;
}
strncpy(dms->directory_name, directory_name, OBIDMS_MAX_NAME);
free(directory_name);
// Try to open the directory
directory = opendir(directory_name);
if (directory == NULL)
dms->directory = opendir(dms->directory_name);
if (dms->directory == NULL)
{
switch (errno)
{
@ -205,45 +339,90 @@ OBIDMS_p obi_open_dms(const char* dms_name)
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
}
obidebug(1, "\nCan't open OBIDMS directory");
free(directory_name);
free(dms);
return NULL;
}
// Allocate the data structure
dms = (OBIDMS_p) malloc(sizeof(OBIDMS_t));
if (dms == NULL)
{
obi_set_errno(OBIDMS_MEMORY_ERROR);
obidebug(1, "\nError allocating the memory for the OBIDMS structure");
free(directory_name);
return NULL;
}
// Initialize the data structure
strcpy(dms->directory_name, directory_name);
dms->directory = directory;
// Get file descriptor of DMS directory to open the arrays directory
dms_file_descriptor = dirfd(directory);
if (dms_file_descriptor < 0)
// Get and store file descriptor of DMS directory to open the informations file
dms->dir_fd = dirfd(dms->directory);
if (dms->dir_fd < 0)
{
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
obidebug(1, "\nError getting the file descriptor for a newly created OBIDMS directory");
free(directory_name);
closedir(dms->directory);
free(dms);
return NULL;
}
// Open the arrays directory
dms->array_directory = private_opendirat(dms_file_descriptor, ARRAY_DIR_NAME);
if (dms->array_directory == NULL)
// Open informations file to check endianness
infos_file_name = build_infos_file_name(dms_name);
infos_file_descriptor = openat(dms->dir_fd, infos_file_name, O_RDONLY, 0777);
if (infos_file_descriptor < 0)
{
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
obidebug(1, "\nError opening the arrays directory");
free(directory_name);
obidebug(1, "\nError opening an informations file");
closedir(dms->directory);
free(dms);
return NULL;
}
free(directory_name);
free(infos_file_name);
// Check endianness of the platform and DMS
little_endian_platform = obi_is_little_endian();
if (read(infos_file_descriptor, &little_endian_dms, sizeof(bool)) < ((ssize_t) sizeof(bool)))
{
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
obidebug(1, "\nError reading the endianness in an informations file");
close(infos_file_descriptor);
closedir(dms->directory);
free(dms);
return NULL;
}
if (little_endian_platform != little_endian_dms)
{
obi_set_errno(OBIDMS_BAD_ENDIAN_ERROR);
obidebug(1, "\nError: The DMS and the platform have different endianness");
close(infos_file_descriptor);
closedir(dms->directory);
free(dms);
return NULL;
}
close(infos_file_descriptor);
dms->little_endian = little_endian_dms;
// Open the indexer directory
dms->indexer_directory = opendir_in_dms(dms, INDEXER_DIR_NAME);
if (dms->indexer_directory == NULL)
{
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
obidebug(1, "\nError opening the indexer directory");
closedir(dms->directory);
free(dms);
return NULL;
}
// Store the indexer directory's file descriptor
dms->indexer_dir_fd = dirfd(dms->indexer_directory);
if (dms->indexer_dir_fd < 0)
{
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
obidebug(1, "\nError getting the file descriptor of the indexer directory");
closedir(dms->indexer_directory);
closedir(dms->directory);
free(dms);
return NULL;
}
// Initialize the list of opened columns
dms->opened_columns = (Opened_columns_list_p) malloc(sizeof(Opened_columns_list_t));
(dms->opened_columns)->nb_opened_columns = 0;
// Initialize the list of opened indexers // TODO should be handled somewhere else?
dms->opened_indexers = (Opened_indexers_list_p) malloc(sizeof(Opened_indexers_list_t));
(dms->opened_indexers)->nb_opened_indexers = 0;
return dms;
}
@ -272,6 +451,11 @@ int obi_close_dms(OBIDMS_p dms)
{
if (dms != NULL)
{
// Close all columns
while ((dms->opened_columns)->nb_opened_columns > 0)
obi_close_column(*((dms->opened_columns)->columns));
// Close dms and indexer directories
if (closedir(dms->directory) < 0)
{
obi_set_errno(OBIDMS_MEMORY_ERROR);
@ -279,10 +463,10 @@ int obi_close_dms(OBIDMS_p dms)
free(dms);
return -1;
}
if (closedir(dms->array_directory) < 0)
if (closedir(dms->indexer_directory) < 0) // TODO should be handled somewhere else?
{
obi_set_errno(OBI_ARRAY_ERROR);
obidebug(1, "\nError closing an array directory");
obi_set_errno(OBI_INDEXER_ERROR);
obidebug(1, "\nError closing an indexer directory");
free(dms);
return -1;
}
@ -292,3 +476,175 @@ int obi_close_dms(OBIDMS_p dms)
return 0;
}
int obi_dms_is_column_name_in_list(OBIDMS_p dms, const char* column_name)
{
int i;
Opened_columns_list_p columns_list;
columns_list = dms->opened_columns;
for (i=0; i < (columns_list->nb_opened_columns); i++)
{
if (!strcmp(((*((columns_list->columns)+i))->header)->name, column_name))
{ // Found it
return 0;
}
}
return 1;
}
OBIDMS_column_p obi_dms_get_column_from_list(OBIDMS_p dms, const char* column_name, obiversion_t version)
{
int i;
for (i=0; i < ((dms->opened_columns)->nb_opened_columns); i++)
{
if (!strcmp(((*(((dms->opened_columns)->columns)+i))->header)->name, column_name)
&& (((*(((dms->opened_columns)->columns)+i))->header)->version == version))
{ // Found the column already opened, return it
return *(((dms->opened_columns)->columns)+i);
}
}
// Didn't find the column
return NULL;
}
void obi_dms_list_column(OBIDMS_p dms, OBIDMS_column_p column)
{
*(((dms->opened_columns)->columns)+((dms->opened_columns)->nb_opened_columns)) = column;
((dms->opened_columns)->nb_opened_columns)++;
}
int obi_dms_unlist_column(OBIDMS_p dms, OBIDMS_column_p column)
{
int i;
Opened_columns_list_p columns_list;
columns_list = dms->opened_columns;
for (i=0; i < columns_list->nb_opened_columns; i++)
{
if (!strcmp(((*((columns_list->columns)+i))->header)->name, (column->header)->name)
&& (((*((columns_list->columns)+i))->header)->version == (column->header)->version))
{ // Found the column. Rearrange list
(columns_list->nb_opened_columns)--;
(columns_list->columns)[i] = (columns_list->columns)[columns_list->nb_opened_columns];
return 0;
}
}
obidebug(1, "\nCould not find the column to delete from list of open columns");
return -1;
}
Obi_indexer_p obi_dms_get_indexer_from_list(OBIDMS_p dms, const char* indexer_name)
{
int i;
Opened_indexers_list_p indexers_list;
indexers_list = dms->opened_indexers;
for (i=0; i < (indexers_list->nb_opened_indexers); i++)
{
if (!strcmp(((indexers_list->indexers)[i])->name, indexer_name)) // TODO it references something in AVL_group struct
{ // Found the indexer already opened, return it
return (indexers_list->indexers)[i];
}
}
// Didn't find the indexer
return NULL;
}
void obi_dms_list_indexer(OBIDMS_p dms, Obi_indexer_p indexer)
{
*(((dms->opened_indexers)->indexers)+((dms->opened_indexers)->nb_opened_indexers)) = indexer;
((dms->opened_indexers)->nb_opened_indexers)++;
}
int obi_dms_unlist_indexer(OBIDMS_p dms, Obi_indexer_p indexer)
{
int i;
Opened_indexers_list_p indexers_list;
indexers_list = dms->opened_indexers;
for (i=0; i < indexers_list->nb_opened_indexers; i++)
{
if (!strcmp(((indexers_list->indexers)[i])->name, indexer->name)) // TODO it references something in AVL_group struct
{ // Found the indexer. Rearrange list
(indexers_list->nb_opened_indexers)--;
(indexers_list->indexers)[i] = (indexers_list->indexers)[indexers_list->nb_opened_indexers];
return 0;
}
}
obidebug(1, "\nCould not find the indexer to delete from list of open indexers");
return -1;
}
char* obi_dms_get_dms_path(OBIDMS_p dms)
{
char* full_path;
full_path = (char*) malloc((MAX_PATH_LEN)*sizeof(char));
if (full_path == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError allocating memory for the char* path to a file or directory");
return NULL;
}
if (getcwd(full_path, MAX_PATH_LEN) == NULL) // TODO not sure at all about this because the DMS must be in the working directory.
{ // Maybe better to store when opening, but opening function seems to assume that too.
obi_set_errno(OBI_UTILS_ERROR);
obidebug(1, "\nError getting the path to a file or directory");
return NULL;
}
strcat(full_path, "/");
strcat(full_path, dms->directory_name);
return full_path;
}
char* obi_dms_get_full_path(OBIDMS_p dms, const char* path_name)
{
char* full_path;
full_path = obi_dms_get_dms_path(dms);
strcat(full_path, "/");
strcat(full_path, path_name);
return full_path;
}
DIR* opendir_in_dms(OBIDMS_p dms, const char* path_name)
{
char* full_path;
DIR* directory;
full_path = obi_dms_get_full_path(dms, path_name);
if (full_path == NULL)
return NULL;
directory = opendir(full_path);
if (directory == NULL)
{
obi_set_errno(OBI_UTILS_ERROR);
obidebug(1, "\nError opening a directory");
}
free(full_path);
return directory;
}

View File

@ -20,14 +20,49 @@
#include <dirent.h>
#include <string.h>
#include <stdio.h>
#include <stdbool.h>
#include "obierrno.h"
//#include "obidmscolumn.h"
//#include "obiblob_indexer.h"
#define OBIDMS_MAX_NAME (2048) /**< The maximum length of an OBIDMS name.
*/
#define ARRAY_DIR_NAME "arrays" /**< The name of the arrays directory.
#define INDEXER_DIR_NAME "OBIBLOB_INDEXERS" /**< The name of the Obiblob indexer directory.
*/
#define TAXONOMY_DIR_NAME "TAXONOMY" /**< The name of the taxonomy directory.
*/
#define MAX_NB_OPENED_COLUMNS (100) /**< The maximum number of columns open at the same time.
*/
#define MAX_NB_OPENED_INDEXERS (1000) /**< The maximum number of indexers open at the same time.
*/
#define MAX_PATH_LEN 4096 /**< Maximum length for the character string defining a
* file or directory path.
*/
typedef int32_t obiversion_t; /**< TODO double
*/
struct OBIDMS_column; // TODO
typedef struct OBIDMS_column* OBIDMS_column_p;
typedef struct Opened_columns_list { // TODO Handle the problem linked to columns with the same name + means only one version
int nb_opened_columns;
OBIDMS_column_p columns[MAX_NB_OPENED_COLUMNS];
} Opened_columns_list_t, *Opened_columns_list_p;
struct OBIDMS_avl_group; // TODO
typedef struct OBIDMS_avl_group* OBIDMS_avl_group_p;
typedef OBIDMS_avl_group_p Obi_indexer_p;
typedef struct Opened_indexers_list {
int nb_opened_indexers;
Obi_indexer_p indexers[MAX_NB_OPENED_INDEXERS];
} Opened_indexers_list_t, *Opened_indexers_list_p;
/**
@ -43,8 +78,20 @@ typedef struct OBIDMS {
DIR* directory; /**< A directory entry usable to
* refer and scan the database directory.
*/
DIR* array_directory; /**< A directory entry usable to
* refer and scan the array directory.
int dir_fd; /**< The file descriptor of the directory entry
* usable to refer and scan the database directory.
*/
DIR* indexer_directory; /**< A directory entry usable to
* refer and scan the indexer directory.
*/
int indexer_dir_fd; /**< The file descriptor of the directory entry
* usable to refer and scan the indexer directory.
*/
bool little_endian; /**< Endianness of the database.
*/
Opened_columns_list_p opened_columns; /**< List of opened columns.
*/
Opened_indexers_list_p opened_indexers; /**< List of opened indexers.
*/
} OBIDMS_t, *OBIDMS_p;
@ -72,7 +119,7 @@ int obi_dms_exists(const char* dms_name);
* if a directory with this name does not already exist
* before creating the new database.
*
* A directory to store obiarrays is also created.
* A directory to store Obiblob indexers is also created.
*
* @param dms_name A pointer to a C string containing the name of the database.
* The actual directory name used to store the DMS will be
@ -139,4 +186,63 @@ OBIDMS_p obi_dms(const char* dms_name);
int obi_close_dms(OBIDMS_p dms);
// TODO doc
int obi_dms_is_column_name_in_list(OBIDMS_p dms, const char* column_name);
OBIDMS_column_p obi_dms_get_column_from_list(OBIDMS_p dms, const char* column_name, obiversion_t version);
void obi_dms_list_column(OBIDMS_p dms, OBIDMS_column_p column);
int obi_dms_unlist_column(OBIDMS_p dms, OBIDMS_column_p column);
Obi_indexer_p obi_dms_get_indexer_from_list(OBIDMS_p dms, const char* indexer_name);
void obi_dms_list_indexer(OBIDMS_p dms, Obi_indexer_p indexer);
int obi_dms_unlist_indexer(OBIDMS_p dms, Obi_indexer_p indexer);
char* obi_dms_get_path(OBIDMS_p dms);
/** TODO
* @brief Internal function getting the full path of a file or a directory from its
* path relative to a directory file descriptor.
*
* @warning The returned pointer has to be freed by the caller.
*
* @param directory_file_descriptor The file descriptor for the directory to which
* path_name is relative.
* @param path_name The path name for the file or directory, relative to directory_file_descriptor.
*
* @returns A pointer to the full path.
* @retval NULL if an error occurred.
*
* @since June 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
char* obi_dms_get_full_path(OBIDMS_p dms, const char* path_name);
/**
* @brief Replacement function for opendirat() : open a directory relative to a directory file descriptor.
*
* @param directory_file_descriptor The file descriptor for the directory in which the directory should be opened.
* @param path_name The path name for the directory to be opened, relative to directory_file_descriptor.
*
* @returns The file descriptor of the opened directory.
* @retval NULL if an error occurred.
*
* @since June 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
DIR* opendir_in_dms(OBIDMS_p dms, const char* path_name);
#endif /* OBIDMS_H_ */

682
src/obidms_taxonomy.c Normal file
View File

@ -0,0 +1,682 @@
/********************************************************************
* OBIDMS taxonomy functions *
********************************************************************/
/**
* @file obidms_taxonomy.c
* @author Celine Mercier (celine.mercier@metabarcoding.org)
* @date March 2nd 2016
* @brief Functions for reading binary taxonomy files.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
#include <fcntl.h>
#include "obidms_taxonomy.h"
#include "obidms.h"
#include "obidebug.h"
#include "obierrno.h"
#include "utils.h"
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
// TODO : the malloc aren't checked but won't exist for long because mapping instead
int compareRankLabel(const void *label1, const void *label2)
{
return strcmp((const char*)label1,*(const char**)label2);
}
int32_t rank_index(const char* label, ecorankidx_t* ranks)
{
char **rep;
rep = bsearch(label, ranks->label, ranks->count, sizeof(char*), compareRankLabel);
if (rep)
return rep-ranks->label; // TODO what???
return -1;
}
void* read_ecorecord(FILE* f, int32_t* record_size)
{
static void* buffer = NULL;
int32_t buffer_size = 0;
int32_t read;
if (!record_size)
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError reading a taxonomy file: record_size can not be NULL");
return NULL;
}
read = fread(record_size,
1,
sizeof(int32_t),
f);
if (feof(f))
return NULL;
if (read != sizeof(int32_t))
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError reading a taxonomy file: error reading record size");
return NULL;
}
// if (is_big_endian()) // TODO
// *recordSize=swap_int32_t(*recordSize);
if (buffer_size < *record_size)
{
if (buffer)
buffer = realloc(buffer, *record_size);
else
buffer = malloc(*record_size);
if (buffer == NULL)
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError reading a taxonomy file: error allocating memory");
return NULL;
}
}
read = fread(buffer,
1,
*record_size,
f);
if (read != *record_size)
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError reading a taxonomy file: error reading a record %d, %d", read, *record_size);
return NULL;
}
return buffer;
};
ecotx_t* readnext_ecotaxon(FILE* f, ecotx_t* taxon)
{
ecotxformat_t* raw;
int32_t record_length;
raw = read_ecorecord(f, &record_length);
if (!raw)
return NULL;
// if (is_big_endian()) // TODO
// {
// raw->namelength = swap_int32_t(raw->namelength);
// raw->parent = swap_int32_t(raw->parent);
// raw->rank = swap_int32_t(raw->rank);
// raw->taxid = swap_int32_t(raw->taxid);
// }
taxon->parent = (ecotx_t*) ((size_t) raw->parent);
taxon->taxid = raw->taxid;
taxon->rank = raw->rank;
taxon->farest = -1;
taxon->name = malloc((raw->name_length+1) * sizeof(char));
strncpy(taxon->name, raw->name, raw->name_length);
return taxon;
}
FILE* open_ecorecorddb(const char* file_name,
int32_t* count,
int32_t abort_on_open_error)
{
FILE* f;
int32_t read;
fprintf(stderr, "\n%s\n", file_name);
f = fopen(file_name, "rb");
if (!f)
{
if (abort_on_open_error)
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nCouldn't open a taxonomy file");
return NULL;
}
else
{
*count = 0;
return NULL;
}
}
read = fread(count,
1,
sizeof(int32_t),
f);
if (read != sizeof(int32_t))
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError reading taxonomy record size");
return NULL;
}
// if (!obi_is_little_endian()) // TODO
// *count = swap_int32_t(*count);
return f;
}
ecorankidx_t* read_rankidx(const char* ranks_file_name)
{
int32_t count;
FILE* ranks_file;
ecorankidx_t* ranks_index;
int32_t i;
int32_t rank_length;
char* buffer;
ranks_file = open_ecorecorddb(ranks_file_name, &count, 0);
if (ranks_file==NULL)
return NULL;
ranks_index = (ecorankidx_t*) malloc(sizeof(ecorankidx_t) + sizeof(char*) * (count-1));
ranks_index->count = count;
for (i=0; i < count; i++)
{
buffer = read_ecorecord(ranks_file, &rank_length);
ranks_index->label[i] = (char*) malloc(rank_length+1);
strncpy(ranks_index->label[i], buffer, rank_length);
}
return ranks_index;
}
ecotxidx_t* read_taxonomyidx(const char* taxa_file_name, const char* local_taxa_file_name)
{
int32_t count_taxa;
int32_t count_local_taxa;
FILE* f_taxa;
FILE* f_local_taxa;
ecotxidx_t* taxa_index;
struct ecotxnode* t;
int32_t i;
int32_t j;
f_taxa = open_ecorecorddb(taxa_file_name, &count_taxa,0);
if (f_taxa == NULL)
{
obidebug(1, "\nError reading taxonomy taxa file");
return NULL;
}
f_local_taxa = open_ecorecorddb(local_taxa_file_name, &count_local_taxa, 0);
taxa_index = (ecotxidx_t*) malloc(sizeof(ecotxidx_t) + sizeof(ecotx_t) * (count_taxa + count_local_taxa - 1));
taxa_index->count = count_taxa + count_local_taxa;
taxa_index->buffer_size = taxa_index->count;
taxa_index->max_taxid = 0;
printf("Reading %d taxa...\n", count_taxa);
for (i=0; i<count_taxa; i++)
{
readnext_ecotaxon(f_taxa, &(taxa_index->taxon[i]));
taxa_index->taxon[i].parent = taxa_index->taxon + (size_t) taxa_index->taxon[i].parent;
taxa_index->taxon[i].parent->farest = 0;
if (taxa_index->taxon[i].taxid > taxa_index->max_taxid)
taxa_index->max_taxid = taxa_index->taxon[i].taxid;
}
if (count_local_taxa > 0)
printf("Reading %d local taxa...\n", count_local_taxa);
else
printf("No local taxa\n");
count_taxa = taxa_index->count;
for (; i < count_taxa; i++){
readnext_ecotaxon(f_local_taxa, &(taxa_index->taxon[i]));
taxa_index->taxon[i].parent = taxa_index->taxon + (size_t) taxa_index->taxon[i].parent;
taxa_index->taxon[i].parent->farest=0;
if (taxa_index->taxon[i].taxid > taxa_index->max_taxid)
taxa_index->max_taxid = taxa_index->taxon[i].taxid;
}
printf("Computing longest branches...\n");
for (i=0; i < count_taxa; i++)
{
t = taxa_index->taxon+i;
if (t->farest == -1)
{
t->farest=0;
while (t->parent != t)
{
j = t->farest + 1;
if (j > t->parent->farest)
{
t->parent->farest = j;
t=t->parent;
}
else
t = taxa_index->taxon;
}
}
}
return taxa_index;
}
econame_t* readnext_econame(FILE* f, econame_t* name, OBIDMS_taxonomy_p taxonomy)
{
econameformat_t* raw;
int32_t record_length;
raw = read_ecorecord(f, &record_length);
if (!raw)
return NULL;
// if (is_big_endian()) // TODO
// {
// raw->is_scientificname = swap_int32_t(raw->is_scientificname);
// raw->namelength = swap_int32_t(raw->namelength);
// raw->classlength = swap_int32_t(raw->classlength);
// raw->taxid = swap_int32_t(raw->taxid);
// }
name->is_scientific_name = raw->is_scientific_name;
name->name = malloc((raw->name_length + 1) * sizeof(char));
strncpy(name->name, raw->names, raw->name_length);
name->name[raw->name_length] = 0;
name->class_name = malloc((raw->class_length+1) * sizeof(char));
strncpy(name->class_name,(raw->names + raw->name_length), raw->class_length);
name->class_name[raw->class_length] = 0;
name->taxon = taxonomy->taxa->taxon + raw->taxid;
return name;
}
econameidx_t* read_nameidx(const char *file_name, OBIDMS_taxonomy_p taxonomy)
{
int32_t count;
FILE* f;
econameidx_t* index_names;
int32_t i;
f = open_ecorecorddb(file_name, &count, 0);
if (f == NULL)
return NULL;
index_names = (econameidx_t*) malloc(sizeof(econameidx_t) + sizeof(econame_t) * (count-1));
index_names->count = count;
for (i=0; i < count; i++)
readnext_econame(f, (index_names->names)+i, taxonomy);
return index_names;
}
static int bcomptaxon (const void* ptaxid, const void* ptaxon)
{
ecotx_t* current_taxon = (ecotx_t*) ptaxon;
int32_t taxid = (int32_t) ((size_t) ptaxid);
return taxid - current_taxon->taxid;
}
/////// PUBLIC /////////
OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, bool read_alternative_names)
{
OBIDMS_taxonomy_p tax;
char* main_taxonomy_dir_path;
char* taxonomy_path;
char* ranks_file_name;
char* taxa_file_name;
char* local_taxa_file_name;
char* alter_names_file_name;
int buffer_size;
tax = (OBIDMS_taxonomy_p) malloc(sizeof(OBIDMS_taxonomy_t));
tax->ranks = NULL;
tax->taxa = NULL;
tax->names = NULL;
buffer_size = 2048; // TODO
main_taxonomy_dir_path = obi_dms_get_full_path(dms, TAXONOMY_DIR_NAME);
taxonomy_path = (char*) malloc((strlen(main_taxonomy_dir_path) + strlen(taxonomy_name) + strlen(taxonomy_name) + 3)*sizeof(char));
if (sprintf(taxonomy_path, "%s/%s/%s", main_taxonomy_dir_path, taxonomy_name, taxonomy_name) < 0)
{
free(main_taxonomy_dir_path);
obi_close_taxonomy(tax);
return NULL;
}
free(main_taxonomy_dir_path);
// Read ranks
ranks_file_name = (char*) malloc(buffer_size*sizeof(char));
if (ranks_file_name == NULL)
{
free(taxonomy_path);
obi_close_taxonomy(tax);
return NULL;
}
if (snprintf(ranks_file_name, buffer_size, "%s.rdx", taxonomy_path) < 0)
{
free(taxonomy_path);
free(ranks_file_name);
obi_close_taxonomy(tax);
return NULL;
}
tax->ranks = read_rankidx(ranks_file_name);
if (tax->ranks == NULL)
{
free(ranks_file_name);
obi_close_taxonomy(tax);
return NULL;
}
free(ranks_file_name);
// Read taxa
taxa_file_name = (char*) malloc(buffer_size*sizeof(char));
if (taxa_file_name == NULL)
{
free(taxonomy_path);
obi_close_taxonomy(tax);
return NULL;
}
if (snprintf(taxa_file_name, buffer_size,"%s.tdx", taxonomy_path) < 0)
{
free(taxonomy_path);
free(taxa_file_name);
obi_close_taxonomy(tax);
return NULL;
}
local_taxa_file_name = (char*) malloc(buffer_size*sizeof(char));
if (local_taxa_file_name == NULL)
{
free(taxonomy_path);
free(taxa_file_name);
obi_close_taxonomy(tax);
return NULL;
}
if (snprintf(local_taxa_file_name, buffer_size,"%s.ldx", taxonomy_path) < 0)
{
free(taxonomy_path);
free(taxa_file_name);
free(local_taxa_file_name);
obi_close_taxonomy(tax);
return NULL;
}
tax->taxa = read_taxonomyidx(taxa_file_name, local_taxa_file_name);
if (tax->taxa == NULL)
{
free(taxonomy_path);
free(taxa_file_name);
free(local_taxa_file_name);
obi_close_taxonomy(tax);
return NULL;
}
free(taxa_file_name);
free(local_taxa_file_name);
// Read alternative names
if (read_alternative_names)
{
alter_names_file_name = (char*) malloc(buffer_size*sizeof(char));
if (alter_names_file_name == NULL)
{
free(taxonomy_path);
obi_close_taxonomy(tax);
return NULL;
}
if (snprintf(alter_names_file_name, buffer_size,"%s.ndx", taxonomy_path) < 0)
{
free(taxonomy_path);
free(alter_names_file_name);
obi_close_taxonomy(tax);
return NULL;
}
tax->names = read_nameidx(alter_names_file_name, tax);
if (tax->names == NULL)
{
free(alter_names_file_name);
obi_close_taxonomy(tax);
return NULL;
}
free(alter_names_file_name);
}
free(taxonomy_path);
return tax;
}
int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy)
{
if (taxonomy)
{
if (taxonomy->ranks)
free(taxonomy->ranks); // TODO those don't free everything but mapping will replace anyway
if (taxonomy->names)
free(taxonomy->names);
if (taxonomy->taxa)
free(taxonomy->taxa);
free(taxonomy);
return 0;
}
// TODO no closing files?
return 1;
}
//////////////////////////////////////////////////////////////////////////
ecotx_t* obi_taxo_get_parent_at_rank(ecotx_t* taxon, int32_t rankidx)
{
ecotx_t* current_taxon;
ecotx_t* next_taxon;
current_taxon = taxon;
next_taxon = current_taxon->parent;
while ((current_taxon != next_taxon) && // root node
(current_taxon->rank != rankidx))
{
current_taxon = next_taxon;
next_taxon = current_taxon->parent;
}
if (current_taxon->rank == rankidx)
return current_taxon;
else
return NULL;
}
ecotx_t* obi_taxo_get_taxon_with_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid)
{
ecotx_t *current_taxon;
int32_t count;
count = taxonomy->taxa->count;
current_taxon = (ecotx_t*) bsearch((const void *) ((size_t) taxid),
(const void *) taxonomy->taxa->taxon,
count,
sizeof(ecotx_t),
bcomptaxon);
return current_taxon;
}
bool obi_taxo_is_taxon_under_taxid(ecotx_t* taxon, int32_t other_taxid)
{
ecotx_t* next_parent;
next_parent = taxon->parent;
while ((other_taxid != next_parent->taxid) && (strcmp(next_parent->name, "root")))
next_parent = next_parent->parent;
if (other_taxid == next_parent->taxid)
return 1;
else
return 0;
}
ecotx_t* obi_taxo_get_species(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
{
static OBIDMS_taxonomy_p tax = NULL;
static int32_t rankindex = -1;
if (taxonomy && (tax != taxonomy))
{
rankindex = rank_index("species", taxonomy->ranks);
tax = taxonomy;
}
if (!tax || (rankindex < 0))
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError trying to get the species associated with a taxon: No taxonomy defined");
return NULL;
}
return obi_taxo_get_parent_at_rank(taxon, rankindex);
}
ecotx_t* obi_taxo_get_genus(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
{
static OBIDMS_taxonomy_p tax = NULL;
static int32_t rankindex = -1;
if (taxonomy && (tax != taxonomy))
{
rankindex = rank_index("genus", taxonomy->ranks);
tax = taxonomy;
}
if (!tax || (rankindex < 0))
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError trying to get the genus associated with a taxon: No taxonomy defined");
return NULL;
}
return obi_taxo_get_parent_at_rank(taxon, rankindex);
}
ecotx_t* obi_taxo_get_family(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
{
static OBIDMS_taxonomy_p tax = NULL;
static int32_t rankindex = -1;
if (taxonomy && (tax != taxonomy))
{
rankindex = rank_index("family", taxonomy->ranks);
tax = taxonomy;
}
if (!tax || (rankindex < 0))
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError trying to get the family associated with a taxon: No taxonomy defined");
return NULL;
}
return obi_taxo_get_parent_at_rank(taxon, rankindex);
}
ecotx_t* obi_taxo_get_kingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
{
static OBIDMS_taxonomy_p tax = NULL;
static int32_t rankindex = -1;
if (taxonomy && (tax != taxonomy))
{
rankindex = rank_index("kingdom", taxonomy->ranks);
tax = taxonomy;
}
if (!tax || (rankindex < 0))
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError trying to get the kingdom associated with a taxon: No taxonomy defined");
return NULL;
}
return obi_taxo_get_parent_at_rank(taxon, rankindex);
}
ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
{
static OBIDMS_taxonomy_p tax = NULL;
static int32_t rankindex = -1;
if (taxonomy && (tax != taxonomy))
{
rankindex = rank_index("superkingdom", taxonomy->ranks);
tax = taxonomy;
}
if (!tax || (rankindex < 0))
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError trying to get the superkingdom associated with a taxon: No taxonomy defined");
return NULL;
}
return obi_taxo_get_parent_at_rank(taxon, rankindex);
}

101
src/obidms_taxonomy.h Normal file
View File

@ -0,0 +1,101 @@
/********************************************************************
* OBIDMS taxonomy headeer file *
********************************************************************/
/**
* @file obidms_taxonomy.h
* @author Celine Mercier (celine.mercier@metabarcoding.org)
* @date March 2nd 2016
* @brief Header file for the functions handling the reading of binary taxonomy files.
*/
#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>
#include "obidms.h"
typedef struct {
int32_t taxid;
int32_t rank;
int32_t parent;
int32_t name_length;
char name[1];
} ecotxformat_t;
typedef struct ecotxnode {
int32_t taxid;
int32_t rank;
int32_t farest;
struct ecotxnode* parent;
char* name;
} ecotx_t;
typedef struct {
int32_t count;
int32_t max_taxid;
int32_t buffer_size;
ecotx_t taxon[1];
} ecotxidx_t;
typedef struct {
int32_t count;
char* label[1];
} ecorankidx_t;
typedef struct {
int32_t is_scientific_name;
int32_t name_length;
int32_t class_length;
int32_t taxid;
char names[1];
} econameformat_t;
typedef struct {
char* name;
char* class_name;
int32_t is_scientific_name;
struct ecotxnode* taxon;
} econame_t;
typedef struct {
int32_t count;
econame_t names[1];
} econameidx_t;
typedef struct OBIDMS_taxonomy_t {
ecorankidx_t* ranks;
econameidx_t* names;
ecotxidx_t* taxa;
} OBIDMS_taxonomy_t, *OBIDMS_taxonomy_p;
OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, bool read_alternative_names);
int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy);
ecotx_t* obi_taxo_get_parent_at_rank(ecotx_t* taxon, int32_t rankidx);
ecotx_t* obi_taxo_get_taxon_with_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid);
bool obi_taxo_is_taxon_under_taxid(ecotx_t* taxon, int32_t other_taxid);
ecotx_t* obi_taxo_get_species(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
ecotx_t* obi_taxo_get_genus(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
ecotx_t* obi_taxo_get_family(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
ecotx_t* obi_taxo_get_kingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);

File diff suppressed because it is too large Load Diff

View File

@ -25,20 +25,19 @@
#include "obierrno.h"
#include "obilittlebigman.h"
#include "obidmscolumndir.h"
#include "obiarray.h"
#include "obiblob_indexer.h"
#define ONE_IF_ZERO(x) (((x)==0)?1:(x)) /**< If x is equal to 0, x takes the value 1.
*/
#define ELEMENTS_NAMES_MAX (2048) /**< The maximum length of the list of elements names.
*/
#define COLUMN_GROWTH_FACTOR (2) /**< The growth factor when a column is enlarged.
*/
#define MAXIMUM_LINE_COUNT (1000000) /**< The maximum line count for the data of a column. //TODO
#define MAXIMUM_LINE_COUNT (1000000000) /**< The maximum line count for the data of a column. //TODO
*/
#define FORMATTED_TIME_LENGTH (1024) /**< The length allocated for the character string containing a formatted date
#define FORMATTED_TIME_LENGTH (1024) /**< The length allocated for the character string containing a formatted date.
*/
#define COMMENTS_MAX_LENGTH (2048) /**< The maximum length for comments.
*/
typedef int32_t obiversion_t; /**< Used to store the column version number
*/
@ -48,23 +47,24 @@ typedef int32_t obiversion_t; /**< Used to store the column version number
* @brief OBIDMS column header structure.
*/
typedef struct OBIDMS_column_header {
bool little_endian; /**< Endianness of the column:
* - `true` on little endian platforms
* - `false` on big endian platforms
* @see obi_is_little_endian()
*/
size_t header_size; /**< Size of the header in bytes.
*/
size_t data_size; /**< Size of the data in bytes.
*/
index_t line_count; /**< Number of lines of data allocated.
*/
index_t lines_used; /**< Number of lines of data used.
*/
index_t nb_elements_per_line; /**< Number of elements per line (default: 1).
index_t nb_elements_per_line; /**< Number of elements per line.
*/
char elements_names[ELEMENTS_NAMES_MAX+1]; /**< Names of the line elements with ';' as separator
* (should be the column name if one element per line).
*/
OBIType_t data_type; /**< Type of the data.
OBIType_t returned_data_type; /**< Type of the data that is returned when getting an
* element from the column.
*/
OBIType_t stored_data_type; /**< Type of the data that is actually stored in the data
* part of the column.
*/
time_t creation_date; /**< Date of creation of the file.
*/
@ -76,10 +76,9 @@ typedef struct OBIDMS_column_header {
*/
char name[OBIDMS_COLUMN_MAX_NAME+1]; /**< The column name as a NULL terminated string.
*/
char array_name[ARRAY_MAX_NAME+1]; /**< If there is one, the obi_array name as a NULL terminated string.
char indexer_name[INDEXER_MAX_NAME+1]; /**< If there is one, the indexer name as a NULL terminated string.
*/
char comments[1]; /**< Comments stored as a classical zero end C string.
* The size of the comment is only limited by the header size.
char comments[COMMENTS_MAX_LENGTH+1]; /**< Comments stored as a classical zero end C string.
*/
} OBIDMS_column_header_t, *OBIDMS_column_header_p;
@ -97,20 +96,22 @@ typedef struct OBIDMS_column {
*/
OBIDMS_column_header_p header; /**< A pointer to the header of the column.
*/
OBIDMS_array_p array; /**< A pointer to the array associated with the column if there is one.
Obi_indexer_p indexer; /**< A pointer to the blob indexer associated with the column if there is one.
*/
void* data; /**< A `void` pointer to the beginning of the data.
*
* @warning Never use this member directly outside of the code of the
* low level functions of the OBIDMS.
*/
bool writable; /**< Indicates if the column is writable or not. TODO delete?
bool writable; /**< Indicates if the column is writable or not.
* - `true` the column is writable
* - `false` the column is read-only
*
* A column is writable only by its creator
* until it closes it.
*/
size_t counter; /**< Indicates by how many threads/programs (TODO) the column is used.
*/
} OBIDMS_column_t, *OBIDMS_column_p;
@ -161,7 +162,7 @@ size_t obi_get_platform_header_size();
* @brief Creates a column.
*
* The minimum data size allocated is one memory page, and the data is initialized to the NA value of the OBIType.
* If there is an array associated with the column, it is opened or created if it does not already exist.
* If there is an indexer associated with the column, it is opened or created if it does not already exist.
*
* @warning If there is one element per line, elements_names should be equal to column_name. // TODO change this condition?
*
@ -169,9 +170,10 @@ size_t obi_get_platform_header_size();
* @param column_name The name of the new column.
* @param data_type The OBIType code of the data.
* @param nb_lines The number of lines to be stored.
* @param nb_elements_per_line The number of elements per line.
* @param nb_elements_per_line The number of elements per line. // TODO talk about default values
* @param elements_names The names of the elements with ';' as separator.
* @param array_name The name of the array if there is one associated with the column.
* @param indexer_name The name of the indexer if there is one associated with the column.
* @param comments Optional comments associated with the column.
*
* @returns A pointer on the newly created column structure.
* @retval NULL if an error occurred.
@ -185,7 +187,9 @@ OBIDMS_column_p obi_create_column(OBIDMS_p dms,
index_t nb_lines,
index_t nb_elements_per_line,
const char* elements_names,
const char* array_name);
const char* indexer_name,
const char* comments
);
/**
@ -218,11 +222,11 @@ OBIDMS_column_p obi_open_column(OBIDMS_p dms, const char* column_name, obiversio
* @since August 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
OBIDMS_column_p obi_clone_column(OBIDMS_p dms, const char* column_name, obiversion_t version_number, bool clone_data);
OBIDMS_column_p obi_clone_column(OBIDMS_p dms, OBIDMS_column_p line_selection, const char* column_name, obiversion_t version_number, bool clone_data);
/**
* @brief Closes a column.
* @brief Truncates a column to the number of lines used if it is not read-only and closes it.
*
* @param column A pointer on an OBIDMS column.
*
@ -247,7 +251,7 @@ int obi_close_column(OBIDMS_column_p column);
* @since August 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_truncate_column_to_lines_used(OBIDMS_column_p column);
int obi_truncate_column(OBIDMS_column_p column);
/**
@ -264,21 +268,6 @@ int obi_truncate_column_to_lines_used(OBIDMS_column_p column);
int obi_enlarge_column(OBIDMS_column_p column);
/**
* @brief Truncates a column file to the number of lines used rounded to the nearest
* greater multiple of the page size and closes it.
*
* @param column A pointer on an OBIDMS column.
*
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
*
* @since August 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_truncate_and_close_column(OBIDMS_column_p column);
/*
* @brief Sets the data in a column to the NA value of the data OBIType.
*
@ -289,7 +278,7 @@ int obi_truncate_and_close_column(OBIDMS_column_p column);
* @since August 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
void obi_ini_to_NA_values(OBIDMS_column_p column, index_t first_line_nb, index_t nb_lines); // TO make private?
void obi_ini_to_NA_values(OBIDMS_column_p column, index_t first_line_nb, index_t nb_lines); // TODO make private?
/**
@ -299,6 +288,8 @@ void obi_ini_to_NA_values(OBIDMS_column_p column, index_t first_line_nb, index_t
*
* @param dms A pointer on an OBIDMS.
* @param column_name The name of an OBIDMS column.
* @param version_number The version of the column from which the header should be
* retrieved (-1: latest version).
*
* @returns A pointer on the mmapped header of the column.
* @retval NULL if an error occurred.
@ -306,7 +297,7 @@ void obi_ini_to_NA_values(OBIDMS_column_p column, index_t first_line_nb, index_t
* @since October 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
OBIDMS_column_header_p obi_column_get_header_from_name(OBIDMS_p dms, const char* column_name);
OBIDMS_column_header_p obi_column_get_header_from_name(OBIDMS_p dms, const char* column_name, obiversion_t version_number);
/**
@ -320,7 +311,7 @@ OBIDMS_column_header_p obi_column_get_header_from_name(OBIDMS_p dms, const char*
* @since October 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_unmap_header(OBIDMS_column_header_p header);
int obi_close_header(OBIDMS_column_header_p header);
/**
@ -330,7 +321,7 @@ int obi_unmap_header(OBIDMS_column_header_p header);
* @param element_name The name of the element.
*
* @returns The index of the element in a line of the column.
* @retval SIZE_MAX if an error occurred.
* @retval OBIIdx_NA if an error occurred. // TODO not sure if this is "clean".
*
* @since July 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
@ -338,9 +329,19 @@ int obi_unmap_header(OBIDMS_column_header_p header);
index_t obi_column_get_element_index_from_name(OBIDMS_column_p column, const char* element_name);
/**
// TODO doc
int obi_column_prepare_to_set_value(OBIDMS_column_p column, index_t line_nb);
// TODO doc
int obi_column_prepare_to_get_value(OBIDMS_column_p column, index_t line_nb);
/** TODO put in utils.c
* @brief Formats a date in a way that is easy to read.
*
* @warning The pointer returned must be freed by the caller.
*
* @param date A date.
*
* @returns The date formatted in a way that is easy to read.

View File

@ -28,27 +28,11 @@
*
**********************************************************************/
int obi_column_set_obibool_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, obibool_t value)
{
// Check that the line number is not greater than the maximum allowed
if (line_nb >= MAXIMUM_LINE_COUNT)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError trying to set a value at a line number greater than the maximum allowed");
if (obi_column_prepare_to_set_value(column, line_nb) < 0)
return -1;
}
// Check if the file needs to be enlarged
while ((line_nb+1) > (column->header)->line_count)
{
// Enlarge the file
if (obi_enlarge_column(column) < 0)
return -1;
}
// Update lines used
if ((line_nb+1) > (column->header)->lines_used)
(column->header)->lines_used = line_nb+1;
// Set the value
*(((obibool_t*) (column->data)) + (line_nb * ((column->header)->nb_elements_per_line)) + element_idx) = value;
@ -59,34 +43,29 @@ int obi_column_set_obibool_with_elt_idx(OBIDMS_column_p column, index_t line_nb,
obibool_t obi_column_get_obibool_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx)
{
if ((line_nb+1) > (column->header)->lines_used)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError trying to get a value that is beyond the current number of lines used");
if (obi_column_prepare_to_get_value(column, line_nb) < 0)
return OBIBool_NA;
}
return *(((obibool_t*) (column->data)) + (line_nb * ((column->header)->nb_elements_per_line)) + element_idx);
}
int obi_column_set_obibool_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, obibool_t value)
{
index_t element_idx;
element_idx = obi_column_get_element_index_from_name(column, element_name);
index_t element_idx = obi_column_get_element_index_from_name(column, element_name);
if (element_idx == OBIIdx_NA)
return -1;
obi_column_set_obibool_with_elt_idx(column, line_nb, element_idx, value);
return 0;
return obi_column_set_obibool_with_elt_idx(column, line_nb, element_idx, value);
}
obibool_t obi_column_get_obibool_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name)
{
index_t element_idx;
element_idx = obi_column_get_element_index_from_name(column, element_name);
index_t element_idx = obi_column_get_element_index_from_name(column, element_name);
if (element_idx == OBIIdx_NA)
return OBIBool_NA;
return obi_column_get_obibool_with_elt_idx(column, line_nb, element_idx);
}

View File

@ -38,7 +38,7 @@
* @since July 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_column_set_obibool_with_elt_idx(OBIDMS_column_p column, index_t line_nb, size_t element_idx, obibool_t value);
int obi_column_set_obibool_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, obibool_t value);
/**
@ -54,7 +54,7 @@ int obi_column_set_obibool_with_elt_idx(OBIDMS_column_p column, index_t line_nb,
* @since July 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
obibool_t obi_column_get_obibool_with_elt_idx(OBIDMS_column_p column, index_t line_nb, size_t element_idx);
obibool_t obi_column_get_obibool_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx);
/**

View File

@ -28,27 +28,11 @@
*
**********************************************************************/
int obi_column_set_obichar_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, obichar_t value)
{
// Check that the line number is not greater than the maximum allowed
if (line_nb >= MAXIMUM_LINE_COUNT)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError trying to set a value at a line number greater than the maximum allowed");
if (obi_column_prepare_to_set_value(column, line_nb) < 0)
return -1;
}
// Check if the file needs to be enlarged
while ((line_nb+1) > (column->header)->line_count)
{
// Enlarge the file
if (obi_enlarge_column(column) < 0)
return -1;
}
// Update lines used
if ((line_nb+1) > (column->header)->lines_used)
(column->header)->lines_used = line_nb+1;
// Set the value
*(((obichar_t*) (column->data)) + (line_nb * ((column->header)->nb_elements_per_line)) + element_idx) = value;
@ -59,34 +43,28 @@ int obi_column_set_obichar_with_elt_idx(OBIDMS_column_p column, index_t line_nb,
obichar_t obi_column_get_obichar_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx)
{
if ((line_nb+1) > (column->header)->lines_used)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError trying to get a value that is beyond the current number of lines used");
if (obi_column_prepare_to_get_value(column, line_nb) < 0)
return OBIChar_NA;
}
return *(((obichar_t*) (column->data)) + (line_nb * ((column->header)->nb_elements_per_line)) + element_idx);
}
int obi_column_set_obichar_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, obichar_t value)
{
index_t element_idx;
element_idx = obi_column_get_element_index_from_name(column, element_name);
index_t element_idx = obi_column_get_element_index_from_name(column, element_name);
if (element_idx == OBIIdx_NA)
return -1;
obi_column_set_obichar_with_elt_idx(column, line_nb, element_idx, value);
return 0;
return obi_column_set_obichar_with_elt_idx(column, line_nb, element_idx, value);
}
obichar_t obi_column_get_obichar_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name)
{
index_t element_idx;
element_idx = obi_column_get_element_index_from_name(column, element_name);
index_t element_idx = obi_column_get_element_index_from_name(column, element_name);
if (element_idx == OBIIdx_NA)
return OBIChar_NA;
return obi_column_get_obichar_with_elt_idx(column, line_nb, element_idx);
}

View File

@ -49,7 +49,7 @@ int obi_column_set_obichar_with_elt_idx(OBIDMS_column_p column, index_t line_nb,
* @param element_idx The index of the element that should be recovered in the line.
*
* @returns The recovered value.
* @retval OBIBool_NA the NA value of the type if an error occurred and obi_errno is set.
* @retval OBIChar_NA the NA value of the type if an error occurred and obi_errno is set.
*
* @since July 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
@ -87,7 +87,7 @@ int obi_column_set_obichar_with_elt_name(OBIDMS_column_p column, index_t line_nb
* @param element_name The name of the element that should be recovered in the line.
*
* @returns The recovered value.
* @retval OBIBool_NA the NA value of the type if an error occurred and obi_errno is set.
* @retval OBIChar_NA the NA value of the type if an error occurred and obi_errno is set.
*
* @since August 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)

View File

@ -30,25 +30,8 @@
int obi_column_set_obifloat_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, obifloat_t value)
{
// Check that the line number is not greater than the maximum allowed
if (line_nb >= MAXIMUM_LINE_COUNT)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError trying to set a value at a line number greater than the maximum allowed");
if (obi_column_prepare_to_set_value(column, line_nb) < 0)
return -1;
}
// Check if the file needs to be enlarged
while ((line_nb+1) > (column->header)->line_count)
{
// Enlarge the file
if (obi_enlarge_column(column) < 0)
return -1;
}
// Update lines used
if ((line_nb+1) > (column->header)->lines_used)
(column->header)->lines_used = line_nb+1;
// Set the value
*(((obifloat_t*) (column->data)) + (line_nb * ((column->header)->nb_elements_per_line)) + element_idx) = value;
@ -59,34 +42,29 @@ int obi_column_set_obifloat_with_elt_idx(OBIDMS_column_p column, index_t line_nb
obifloat_t obi_column_get_obifloat_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx)
{
if ((line_nb+1) > (column->header)->lines_used)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError trying to get a value that is beyond the current number of lines used");
if (obi_column_prepare_to_get_value(column, line_nb) < 0)
return OBIFloat_NA;
}
return *(((obifloat_t*) (column->data)) + (line_nb * ((column->header)->nb_elements_per_line)) + element_idx);
}
int obi_column_set_obifloat_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, obifloat_t value)
{
index_t element_idx;
element_idx = obi_column_get_element_index_from_name(column, element_name);
index_t element_idx = obi_column_get_element_index_from_name(column, element_name);
if (element_idx == OBIIdx_NA)
return -1;
obi_column_set_obifloat_with_elt_idx(column, line_nb, element_idx, value);
return 0;
return obi_column_set_obifloat_with_elt_idx(column, line_nb, element_idx, value);
}
obifloat_t obi_column_get_obifloat_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name)
{
index_t element_idx;
element_idx = obi_column_get_element_index_from_name(column, element_name);
index_t element_idx = obi_column_get_element_index_from_name(column, element_name);
if (element_idx == OBIIdx_NA)
return OBIFloat_NA;
return obi_column_get_obifloat_with_elt_idx(column, line_nb, element_idx);
}

View File

@ -49,7 +49,7 @@ int obi_column_set_obifloat_with_elt_idx(OBIDMS_column_p column, index_t line_nb
* @param element_idx The index of the element that should be recovered in the line.
*
* @returns The recovered value.
* @retval OBIBool_NA the NA value of the type if an error occurred and obi_errno is set.
* @retval OBIFloat_NA the NA value of the type if an error occurred and obi_errno is set.
*
* @since July 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
@ -87,7 +87,7 @@ int obi_column_set_obifloat_with_elt_name(OBIDMS_column_p column, index_t line_n
* @param element_name The name of the element that should be recovered in the line.
*
* @returns The recovered value.
* @retval OBIBool_NA the NA value of the type if an error occurred and obi_errno is set.
* @retval OBIFloat_NA the NA value of the type if an error occurred and obi_errno is set.
*
* @since August 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)

51
src/obidmscolumn_idx.c Normal file
View File

@ -0,0 +1,51 @@
/****************************************************************************
* OBIDMS_column_idx functions *
****************************************************************************/
/**
* @file obidsmcolumn_idx.c
* @author Celine Mercier
* @date February 14th 2016
* @brief Functions handling OBIColumns containing data with the index_t type.
*/
#include <stdlib.h>
#include <stdio.h>
#include "obidmscolumn.h"
#include "obitypes.h"
#include "obierrno.h"
#include "obidebug.h"
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
/**********************************************************************
*
* D E F I N I T I O N O F T H E P U B L I C F U N C T I O N S
*
**********************************************************************/
int obi_column_set_index(OBIDMS_column_p column, index_t line_nb, index_t value)
{
if (obi_column_prepare_to_set_value(column, line_nb) < 0)
return -1;
// Set the value
*(((index_t*) (column->data)) + line_nb) = value;
return 0;
}
index_t obi_column_get_index(OBIDMS_column_p column, index_t line_nb)
{
if (obi_column_prepare_to_get_value(column, line_nb) < 0)
return OBIIdx_NA;
return *(((index_t*) (column->data)) + line_nb);
}

31
src/obidmscolumn_idx.h Normal file
View File

@ -0,0 +1,31 @@
/****************************************************************************
* OBIDMS_column_idx header file *
****************************************************************************/
/**
* @file obidsmcolumn_idx.h
* @author Celine Mercier
* @date February 14th 2016
* @brief Header file for the functions handling OBIColumns containing data with the OBIType OBI_IDX.
*/
#ifndef OBIDMSCOLUMN_IDX_H_
#define OBIDMSCOLUMN_IDX_H_
#include <stdlib.h>
#include <stdio.h>
#include "obidmscolumn.h"
#include "obitypes.h"
// TODO doc
int obi_column_set_index(OBIDMS_column_p column, index_t line_nb, index_t value);
index_t obi_column_get_index(OBIDMS_column_p column, index_t line_nb);
#endif /* OBIDMSCOLUMN_IDX_H_ */

View File

@ -30,25 +30,8 @@
int obi_column_set_obiint_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, obiint_t value)
{
// Check that the line number is not greater than the maximum allowed
if (line_nb >= MAXIMUM_LINE_COUNT)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError trying to set a value at a line number greater than the maximum allowed");
if (obi_column_prepare_to_set_value(column, line_nb) < 0)
return -1;
}
// Check if the file needs to be enlarged
while ((line_nb+1) > (column->header)->line_count)
{
// Enlarge the file
if (obi_enlarge_column(column) < 0)
return -1;
}
// Update lines used
if ((line_nb+1) > (column->header)->lines_used)
(column->header)->lines_used = line_nb+1;
// Set the value
*(((obiint_t*) (column->data)) + (line_nb * ((column->header)->nb_elements_per_line)) + element_idx) = value;
@ -59,34 +42,29 @@ int obi_column_set_obiint_with_elt_idx(OBIDMS_column_p column, index_t line_nb,
obiint_t obi_column_get_obiint_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx)
{
if ((line_nb+1) > (column->header)->lines_used)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError trying to get a value that is beyond the current number of lines used");
if (obi_column_prepare_to_get_value(column, line_nb) < 0)
return OBIInt_NA;
}
return *(((obiint_t*) (column->data)) + (line_nb * ((column->header)->nb_elements_per_line)) + element_idx);
}
int obi_column_set_obiint_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, obiint_t value)
{
index_t element_idx;
element_idx = obi_column_get_element_index_from_name(column, element_name);
index_t element_idx = obi_column_get_element_index_from_name(column, element_name);
if (element_idx == OBIIdx_NA)
return -1;
obi_column_set_obiint_with_elt_idx(column, line_nb, element_idx, value);
return 0;
return obi_column_set_obiint_with_elt_idx(column, line_nb, element_idx, value);
}
obiint_t obi_column_get_obiint_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name)
{
index_t element_idx;
element_idx = obi_column_get_element_index_from_name(column, element_name);
index_t element_idx = obi_column_get_element_index_from_name(column, element_name);
if (element_idx == OBIIdx_NA)
return OBIInt_NA;
return obi_column_get_obiint_with_elt_idx(column, line_nb, element_idx);
}

86
src/obidmscolumn_seq.c Normal file
View File

@ -0,0 +1,86 @@
/****************************************************************************
* OBIDMS_column_seq functions *
****************************************************************************/
/**
* @file obidsmcolumn_seq.c
* @author Celine Mercier
* @date November 18th 2015
* @brief Functions handling OBIColumns containing data in the form of indices referring to DNA sequences.
*/
#include <stdlib.h>
#include <stdio.h>
#include "obidmscolumn.h"
#include "obitypes.h"
#include "obierrno.h"
#include "obidebug.h"
#include "dna_seq_indexer.h"
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
/**********************************************************************
*
* D E F I N I T I O N O F T H E P U B L I C F U N C T I O N S
*
**********************************************************************/
int obi_column_set_obiseq_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, const char* value)
{
index_t idx;
if (obi_column_prepare_to_set_value(column, line_nb) < 0)
return -1;
// Add the value in the indexer
idx = obi_index_dna_seq(column->indexer, value);
if (idx == -1)
return -1;
// Add the value's index in the column
*(((index_t*) (column->data)) + (line_nb * ((column->header)->nb_elements_per_line)) + element_idx) = idx;
return 0;
}
char* obi_column_get_obiseq_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx)
{
index_t idx;
if (obi_column_prepare_to_get_value(column, line_nb) < 0)
return OBISeq_NA;
idx = *(((index_t*) (column->data)) + (line_nb * ((column->header)->nb_elements_per_line)) + element_idx);
// Check NA
if (idx == OBIIdx_NA)
return OBISeq_NA;
return obi_retrieve_dna_seq(column->indexer, idx);
}
int obi_column_set_obiseq_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, const char* value)
{
index_t element_idx = obi_column_get_element_index_from_name(column, element_name);
if (element_idx == OBIIdx_NA)
return -1;
return obi_column_set_obiseq_with_elt_idx(column, line_nb, element_idx, value);
}
char* obi_column_get_obiseq_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name)
{
index_t element_idx = obi_column_get_element_index_from_name(column, element_name);
if (element_idx == OBIIdx_NA)
return OBISeq_NA;
return obi_column_get_obiseq_with_elt_idx(column, line_nb, element_idx);
}

102
src/obidmscolumn_seq.h Normal file
View File

@ -0,0 +1,102 @@
/****************************************************************************
* OBIDMS_column_seq header file *
****************************************************************************/
/**
* @file obidsmcolumn_seq.h
* @author Celine Mercier
* @date Novemeber 18th 2015
* @brief Header file for the functions handling OBIColumns containing data in the form of indices referring to DNA sequences.
*/
#ifndef OBIDMSCOLUMN_SEQ_H_
#define OBIDMSCOLUMN_SEQ_H_
#include <stdlib.h>
#include <stdio.h>
#include "obidmscolumn.h"
#include "obitypes.h"
#include "obiview.h"
/**
* @brief Sets a value in an OBIDMS column containing data in the form of indices referring
* to DNA sequences handled by an indexer, and using the index of the element in the column's line.
*
* @warning Pointers returned by obi_open_column() don't allow writing.
*
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
* @param line_nb The number of the line where the value should be set.
* @param element_idx The index of the element that should be set in the line.
* @param value The value that should be set.
*
* @returns An integer value indicating the success of the operation.
* @retval 0 on success.
* @retval -1 if an error occurred.
*
* @since November 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_column_set_obiseq_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, const char* value);
/**
* @brief Recovers a value in an OBIDMS column containing data in the form of indices referring
* to DNA sequences handled by an indexer, and using the index of the element in the column's line.
*
* @param column A pointer as returned by obi_create_column().
* @param line_nb The number of the line where the value should be recovered.
* @param element_idx The index of the element that should be recovered in the line.
*
* @returns The recovered value.
* @retval '\0' the NA value of the type if an error occurred and obi_errno is set.
*
* @since November 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
char* obi_column_get_obiseq_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx);
/**
* @brief Sets a value in an OBIDMS column containing data in the form of indices referring
* to DNA sequences handled by an indexer, using the name of the element in the line.
*
* @warning Pointers returned by obi_open_column() don't allow writing.
*
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
* @param line_nb The number of the line where the value should be set.
* @param element_name The name of the element that should be set in the line.
* @param value The value that should be set.
*
* @returns An integer value indicating the success of the operation.
* @retval 0 on success.
* @retval -1 if an error occurred.
*
* @since November 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_column_set_obiseq_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, const char* value);
/**
* @brief Recovers a value in an OBIDMS column containing data in the form of indices referring
* to DNA sequences handled by an indexer, using the name of the element in the line.
*
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
* @param line_nb The number of the line where the value should be recovered.
* @param element_name The name of the element that should be recovered in the line.
*
* @returns The recovered value.
* @retval '\0' the NA value of the type if an error occurred and obi_errno is set.
*
* @since November 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
char* obi_column_get_obiseq_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name);
#endif /* OBIDMSCOLUMN_SEQ_H_ */

View File

@ -17,7 +17,7 @@
#include "obitypes.h"
#include "obierrno.h"
#include "obidebug.h"
#include "obiarray.h"
#include "char_str_indexer.h"
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
@ -29,46 +29,21 @@
*
**********************************************************************/
int obi_column_set_obistr_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, char* value)
int obi_column_set_obistr_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, const char* value)
{
byte_t* value_b;
index_t idx;
// Check that the line number is not greater than the maximum allowed
if (line_nb >= MAXIMUM_LINE_COUNT)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError trying to set a value at a line number greater than the maximum allowed");
return -1;
}
// Check if the file needs to be enlarged
while ((line_nb+1) > (column->header)->line_count)
{
// Enlarge the file
if (obi_enlarge_column(column) < 0)
return -1;
}
// Update lines used
if ((line_nb+1) > (column->header)->lines_used)
(column->header)->lines_used = line_nb+1;
// Encode the value on a byte array with a header
value_b = obi_str_to_obibytes(value);
if (value_b == NULL)
if (obi_column_prepare_to_set_value(column, line_nb) < 0)
return -1;
// Add in the obiarray
idx = obi_array_add(column->array, value_b);
// Add the value in the indexer
idx = obi_index_char_str(column->indexer, value);
if (idx == -1)
return -1;
// Add the value's index in the column
*(((index_t*) (column->data)) + (line_nb * ((column->header)->nb_elements_per_line)) + element_idx) = idx;
free(value_b);
return 0;
}
@ -76,45 +51,36 @@ int obi_column_set_obistr_with_elt_idx(OBIDMS_column_p column, index_t line_nb,
const char* obi_column_get_obistr_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx)
{
index_t idx;
byte_t* value_b;
if ((line_nb+1) > (column->header)->lines_used)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError trying to get a value that is beyond the current number of lines used");
return "\0"; // TODO
}
if (obi_column_prepare_to_get_value(column, line_nb) < 0)
return OBISeq_NA;
idx = *(((index_t*) (column->data)) + (line_nb * ((column->header)->nb_elements_per_line)) + element_idx);
// Check NA
if (idx == OBIIdx_NA)
return "\0"; // TODO
return OBIStr_NA;
value_b = obi_array_get(column->array, idx);
return obi_obibytes_to_str(value_b);
return obi_retrieve_char_str(column->indexer, idx);
}
int obi_column_set_obistr_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, char* value)
int obi_column_set_obistr_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, const char* value)
{
index_t element_idx;
element_idx = obi_column_get_element_index_from_name(column, element_name);
index_t element_idx = obi_column_get_element_index_from_name(column, element_name);
if (element_idx == OBIIdx_NA)
return -1;
if (obi_column_set_obistr_with_elt_idx(column, line_nb, element_idx, value) < 0)
return -1;
return 0;
return obi_column_set_obistr_with_elt_idx(column, line_nb, element_idx, value);
}
const char* obi_column_get_obistr_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name)
{
index_t element_idx;
element_idx = obi_column_get_element_index_from_name(column, element_name);
index_t element_idx = obi_column_get_element_index_from_name(column, element_name);
if (element_idx == OBIIdx_NA)
return "\0";
return OBIStr_NA;
return obi_column_get_obistr_with_elt_idx(column, line_nb, element_idx);
}

View File

@ -19,11 +19,12 @@
#include "obidmscolumn.h"
#include "obitypes.h"
#include "obiview.h"
/**
* @brief Sets a value in an OBIDMS column containing data in the form of indices referring
* to character strings in an obiarray, using the index of the element in the line.
* to character strings handled by an indexer, using the index of the element in the line.
*
* @warning Pointers returned by obi_open_column() don't allow writing.
*
@ -39,12 +40,12 @@
* @since October 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_column_set_obistr_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, char* value);
int obi_column_set_obistr_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, const char* value);
/**
* @brief Recovers a value in an OBIDMS column containing data in the form of indices referring
* to character strings in an obiarray, using the index of the element in the line.
* to character strings handled by an indexer, using the index of the element in the line.
*
* @param column A pointer as returned by obi_create_column().
* @param line_nb The number of the line where the value should be recovered.
@ -61,7 +62,7 @@ const char* obi_column_get_obistr_with_elt_idx(OBIDMS_column_p column, index_t l
/**
* @brief Sets a value in an OBIDMS column containing data in the form of indices referring
* to character strings in an obiarray, using the name of the element in the line.
* to character strings handled by an indexer, using the name of the element in the line.
*
* @warning Pointers returned by obi_open_column() don't allow writing.
*
@ -77,12 +78,12 @@ const char* obi_column_get_obistr_with_elt_idx(OBIDMS_column_p column, index_t l
* @since October 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_column_set_obistr_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, char* value);
int obi_column_set_obistr_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, const char* value);
/**
* @brief Recovers a value in an OBIDMS column containing data in the form of indices referring
* to character strings in an obiarray, using the name of the element in the line.
* to character strings handled by an indexer, using the name of the element in the line.
*
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
* @param line_nb The number of the line where the value should be recovered.
@ -97,5 +98,5 @@ int obi_column_set_obistr_with_elt_name(OBIDMS_column_p column, index_t line_nb,
const char* obi_column_get_obistr_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name);
#endif /* OBIDMSCOLUMN_IDX_H_ */
#endif /* OBIDMSCOLUMN_STR_H_ */

View File

@ -20,7 +20,7 @@
#include "obidmscolumndir.h"
#include "obidms.h"
#include "private_at_functions.h"
#include "utils.h"
#include "obierrno.h"
#include "obidebug.h"
@ -65,7 +65,8 @@ static char* build_column_directory_name(const char* column_name)
char* column_directory_name;
// Build the database directory name
if (asprintf(&column_directory_name, "%s.obicol", column_name) < 0)
column_directory_name = (char*) malloc((strlen(column_name) + 8)*sizeof(char));
if (sprintf(column_directory_name, "%s.obicol", column_name) < 0)
{
obi_set_errno(OBICOLDIR_MEMORY_ERROR);
obidebug(1, "\nError building a column directory name");
@ -97,25 +98,14 @@ int obi_column_directory_exists(OBIDMS_p dms, const char* column_name)
char* column_directory_name;
char* full_path;
int check_dir;
int dms_file_descriptor;
// Build and check the directory name
column_directory_name = build_column_directory_name(column_name);
if (column_directory_name == NULL)
return -1;
// Get the file descriptor for the dms
dms_file_descriptor = dirfd(dms->directory);
if (dms_file_descriptor < 0)
{
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
obidebug(1, "\nError getting a file descriptor for an OBIDMS directory");
free(column_directory_name);
return -1;
}
// Get the full path for the column directory
full_path = get_full_path(dms_file_descriptor, column_directory_name);
full_path = obi_dms_get_full_path(dms, column_directory_name);
if (full_path == NULL)
{
obi_set_errno(OBICOLDIR_UNKNOWN_ERROR);
@ -139,7 +129,6 @@ int obi_column_directory_exists(OBIDMS_p dms, const char* column_name)
OBIDMS_column_directory_p obi_create_column_directory(OBIDMS_p dms, const char* column_name)
{
char* column_directory_name;
int dms_file_descriptor;
// Build and check the directory name
column_directory_name = build_column_directory_name(column_name);
@ -149,18 +138,8 @@ OBIDMS_column_directory_p obi_create_column_directory(OBIDMS_p dms, const char*
return NULL;
}
// Get the file descriptor for the dms
dms_file_descriptor = dirfd(dms->directory);
if (dms_file_descriptor < 0)
{
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
obidebug(1, "\nError getting a file descriptor for an OBIDMS directory");
free(column_directory_name);
return NULL;
}
// Try to create the directory
if (mkdirat(dms_file_descriptor, column_directory_name, 00777) < 0)
if (mkdirat(dms->dir_fd, column_directory_name, 00777) < 0)
{
if (errno == EEXIST)
obi_set_errno(OBICOLDIR_EXIST_ERROR);
@ -182,7 +161,6 @@ OBIDMS_column_directory_p obi_open_column_directory(OBIDMS_p dms, const char* co
OBIDMS_column_directory_p column_directory;
char* column_directory_name;
DIR* directory;
int dms_file_descriptor;
column_directory = NULL;
@ -191,18 +169,8 @@ OBIDMS_column_directory_p obi_open_column_directory(OBIDMS_p dms, const char* co
if (column_directory_name == NULL)
return NULL;
// Get the file descriptor for the dms
dms_file_descriptor = dirfd(dms->directory);
if (dms_file_descriptor < 0)
{
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
obidebug(1, "\nError getting a file descriptor for an OBIDMS directory");
free(column_directory_name);
return NULL;
}
// Try to open the column directory
directory = private_opendirat(dms_file_descriptor, column_directory_name);
directory = opendir_in_dms(dms, column_directory_name);
if (directory == NULL) {
switch (errno)
{
@ -241,6 +209,15 @@ OBIDMS_column_directory_p obi_open_column_directory(OBIDMS_p dms, const char* co
strcpy(column_directory->column_name, column_name);
column_directory->directory = directory;
column_directory->dir_fd = dirfd(directory);
if (column_directory->dir_fd < 0)
{
obi_set_errno(OBICOLDIR_MEMORY_ERROR);
obidebug(1, "\nError allocating the memory for an OBIDMS column directory structure");
free(column_directory_name);
free(column_directory);
}
free(column_directory_name);
return column_directory;

View File

@ -21,7 +21,7 @@
#include "obidms.h"
#define OBIDMS_COLUMN_MAX_NAME (2048) /**< The maximum length of an OBIDMS column name.
#define OBIDMS_COLUMN_MAX_NAME (1024) /**< The maximum length of an OBIDMS column name.
*/
@ -41,7 +41,10 @@ typedef struct OBIDMS_column_directory {
* containing the column.
*/
DIR* directory; /**< A directory entry usable to
* refer and scan the database directory.
* refer and scan the column directory.
*/
int dir_fd; /**< The file descriptor of the directory entry
* usable to refer and scan the column directory.
*/
} OBIDMS_column_directory_t, *OBIDMS_column_directory_p;

View File

@ -65,25 +65,25 @@ extern int obi_errno;
*/
#define OBIDMS_ACCESS_ERROR (6) /**< Permission error trying to access the database
*/
#define OBICOLDIR_EXIST_ERROR (7) /**< Trying to create an OBIDMS column directory with a name
* that corresponds to an existing one
*/
#define OBICOLDIR_NOT_EXIST_ERROR (8) /**< Trying to open a non-existing OBIDMS column directory
*/
#define OBICOLDIR_LONG_NAME_ERROR (9) /**< The specified OBIDMS column directory name is too long
*/
#define OBICOLDIR_MEMORY_ERROR (10) /**< A memory error occurred during allocation while handling
* an OBIDMS column directory
*/
#define OBICOLDIR_UNKNOWN_ERROR (11) /**< Undetermined error while handling an OBIDMS column directory
*/
#define OBICOLDIR_ACCESS_ERROR (12) /**< Permission error trying to access an OBIDSM column directory
*/
#define OBICOL_BAD_ENDIAN_ERROR (13) /**< The opened data structure does not corresponds
#define OBIDMS_BAD_ENDIAN_ERROR (7) /**< The opened database does not correspond
* to the endianness of the platform.
*/
#define OBICOLDIR_EXIST_ERROR (8) /**< Trying to create an OBIDMS column directory with a name
* that corresponds to an existing one
*/
#define OBICOLDIR_NOT_EXIST_ERROR (9) /**< Trying to open a non-existing OBIDMS column directory
*/
#define OBICOLDIR_LONG_NAME_ERROR (10) /**< The specified OBIDMS column directory name is too long
*/
#define OBICOLDIR_MEMORY_ERROR (11) /**< A memory error occurred during allocation while handling
* an OBIDMS column directory
*/
#define OBICOLDIR_UNKNOWN_ERROR (12) /**< Undetermined error while handling an OBIDMS column directory
*/
#define OBICOLDIR_ACCESS_ERROR (13) /**< Permission error trying to access an OBIDSM column directory
*/
#define OBICOL_EXIST_ERROR (14) /**< Trying to create an OBIDMS column with a name
* that corresponds to an existing one
*/
@ -98,7 +98,21 @@ extern int obi_errno;
*/
#define OBICOL_ACCESS_ERROR (19) /**< Permission error trying to access an OBIDSM column directory
*/
#define OBI_ARRAY_ERROR (20) /** Error while handling an array
#define OBI_AVL_ERROR (20) /** Error while handling an AVL tree // TODO delete and just keep OBI_INDEXER_ERROR?
*/
#define OBIVIEW_ERROR (21) /** Error while handling an OBIView
*/
#define OBI_TAXONOMY_ERROR (22) /** Error while handling binary taxonomy files
*/
#define OBI_MALLOC_ERROR (23) /** Error while allocating memory
*/
#define OBI_ENCODE_ERROR (24) /** Error while encoding a value
*/
#define OBI_DECODE_ERROR (25) /** Error while decoding a value
*/
#define OBI_UTILS_ERROR (26) /** Error in a utils function
*/
#define OBI_INDEXER_ERROR (27) /** Error handling a blob indexer
*/
/**@}*/

View File

@ -40,6 +40,8 @@ size_t obi_sizeof(OBIType_t type)
case OBI_CHAR: size = sizeof(obichar_t);
break;
case OBI_STR: // fallthrough
case OBI_SEQ: // fallthrough
case OBI_IDX: size = sizeof(index_t);
break;
@ -90,6 +92,12 @@ char* name_data_type(int data_type)
case OBI_CHAR: name = strdup("OBI_CHAR");
break;
case OBI_STR: name = strdup("OBI_STR");
break;
case OBI_SEQ: name = strdup("OBI_SEQ");
break;
case OBI_IDX: name = strdup("OBI_IDX");
break;
}

View File

@ -23,6 +23,8 @@
#define OBIFloat_NA (float_NA()) /**< NA value for the type OBI_FLOAT */
#define OBIChar_NA (0) /**< NA value for the type OBI_CHAR */
// TODO not sure about this one as it can be impossible to distinguish from uninitialized values
#define OBISeq_NA ("\0") // TODO
#define OBIStr_NA ("\0") // TODO
/**
@ -32,7 +34,7 @@ typedef enum OBIBool {
FALSE = 0,
TRUE = 1,
OBIBool_NA = 2
} obibool_t, *obibool_p; /**< a boolean true/false value */
} obibool_t, *obibool_p; /**< a boolean true/false value */ // TODO check name convention?
/**
@ -44,7 +46,9 @@ typedef enum OBIType {
OBI_FLOAT, /**< a floating value (C type : double) */
OBI_BOOL, /**< a boolean true/false value, see obibool_t enum */
OBI_CHAR, /**< a character (C type : char) */
OBI_IDX /**< an index in a data structure (C type : int64_t) */
OBI_STR, /**< an index in a data structure (C type : int64_t) referring to a character string */
OBI_SEQ, /**< an index in a data structure (C type : int64_t) referring to a DNA sequence */
OBI_IDX /**< an index referring to a line in another column (C type : int64_t) */ // TODO delete?
} OBIType_t, *OBIType_p;
@ -52,6 +56,11 @@ typedef int64_t index_t;
typedef int32_t obiint_t;
typedef double obifloat_t;
typedef char obichar_t;
// TODO same for obistr_t and obiseq_t ?
typedef char byte_t; /**< Defining byte type.
*/
/**

1444
src/obiview.c Normal file

File diff suppressed because it is too large Load Diff

640
src/obiview.h Normal file
View File

@ -0,0 +1,640 @@
/********************************************************************
* Obiview header file *
********************************************************************/
/**
* @file obiview.h
* @author Celine Mercier (celine.mercier@metabarcoding.org)
* @date 16 December 2015
* @brief Header file for the OBIDMS view functions and structures.
*/
#ifndef OBIVIEW_H_
#define OBIVIEW_H_
#include <stdlib.h>
#include <errno.h>
#include <stdio.h>
#include <stdbool.h>
#include <time.h>
#include <math.h>
#include "obidms.h"
#include "obidmscolumn.h"
#include "obierrno.h"
#define OBIVIEW_NAME_MAX_LENGTH (1000) /**< The maximum length of an OBIDMS view name.
*/
#define OBIVIEW_COMMENTS_MAX_LENGTH (10000)
#define OBIVIEW_FILE_NAME "obiviews"
#define VIEW_TYPE_MAX_NAME (1024)
#define VIEW_TYPE_NUC_SEQS "NUC_SEQS_VIEW"
#define NUC_SEQUENCE_COLUMN "NUC_SEQ"
#define NUC_SEQUENCE_INDEXER "NUC_SEQ_INDEXER"
#define ID_COLUMN "ID"
#define ID_INDEXER "ID_INDEXER"
#define DESCRIPTION_COLUMN "DESCRIPTION"
#define DESCRIPTION_INDEXER "DESCRIPTION_INDEXER"
#define LINES_COLUMN_NAME "LINES"
/**
* @brief .
*/
typedef struct Column_reference {
char column_name[OBIDMS_COLUMN_MAX_NAME+1]; /**< Name of the column.
*/
obiversion_t version; /**< Version of the column.
*/
} Column_reference_t, *Column_reference_p;
/**
* @brief .
*/
typedef struct Obiview {
OBIDMS_p dms;
bool read_only;
OBIDMS_column_p line_selection;
OBIDMS_column_p new_line_selection;
index_t line_count;
int column_count;
OBIDMS_column_p columns[MAX_NB_OPENED_COLUMNS];
char name[OBIVIEW_NAME_MAX_LENGTH+1];
char created_from[OBIVIEW_NAME_MAX_LENGTH+1];
int view_number;
char view_type[VIEW_TYPE_MAX_NAME+1];
char comments[OBIVIEW_COMMENTS_MAX_LENGTH+1];
} Obiview_t, *Obiview_p;
/**
* @brief .
*/
typedef struct Obiview_infos {
int view_number;
int column_count;
index_t line_count;
char name[OBIVIEW_NAME_MAX_LENGTH+1];
char created_from[OBIVIEW_NAME_MAX_LENGTH+1];
time_t creation_date;
bool all_lines;
Column_reference_t line_selection;
Column_reference_t column_references[MAX_NB_OPENED_COLUMNS];
char view_type[VIEW_TYPE_MAX_NAME+1];
char comments[OBIVIEW_COMMENTS_MAX_LENGTH+1];
} Obiview_infos_t, *Obiview_infos_p;
/**
* @brief .
*/
typedef struct Obiviews_header {
size_t header_size;
size_t views_size;
int view_count;
} Obiviews_header_t, *Obiviews_header_p;
/**
* @brief .
*/
typedef struct Obiviews_infos_all {
Obiviews_header_p header;
Obiview_infos_p view_infos;
} Obiviews_infos_all_t, *Obiviews_infos_all_p;
Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const char* view_name, Obiview_p view_to_clone, index_t* line_selection, const char* comments);
Obiview_p obi_new_view(OBIDMS_p dms, const char* view_name, Obiview_p view_to_clone, index_t* line_selection, const char* comments);
Obiview_p obi_new_view_cloned_from_name(OBIDMS_p dms, const char* view_name, const char* view_to_clone_name, index_t* line_selection, const char* comments);
Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const char* view_name, const char* view_to_clone_name, index_t* line_selection, const char* comments);
Obiview_p obi_open_view(OBIDMS_p dms, const char* view_name);
Obiviews_infos_all_p obi_read_views(OBIDMS_p dms);
int obi_unmap_read_views(Obiviews_infos_all_p views);
int obi_view_add_column(Obiview_p view,
const char* column_name,
obiversion_t version_number,
OBIType_t data_type,
index_t nb_lines,
index_t nb_elements_per_line,
const char* elements_names,
const char* indexer_name,
const char* comments,
bool create);
int obi_view_delete_column(Obiview_p view, const char* column_name);
int obi_select_line(Obiview_p view, index_t line_nb);
int obi_select_lines(Obiview_p view, index_t* line_nbs);
int obi_view_update_lines(Obiview_p view, index_t line_count);
OBIDMS_column_p obi_view_clone_column(Obiview_p view, const char* column_name);
OBIDMS_column_p obi_view_get_column(Obiview_p view, const char* column_name);
OBIDMS_column_p* obi_view_get_pointer_on_column_in_view(Obiview_p view, const char* column_name);
int obi_save_view(Obiview_p view);
int obi_close_view(Obiview_p view);
int obi_save_and_close_view(Obiview_p view);
int obi_column_set_obibool_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, index_t element_idx, obibool_t value);
/**
* @brief Sets a value in an OBIDMS column containing data with the type OBI_BOOL, using the index of the element in the line.
*
* @warning Pointers returned by obi_open_column() don't allow writing.
*
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
* @param line_nb The number of the line where the value should be set.
* @param element_idx The index of the element that should be set in the line.
* @param value The value that should be set.
*
* @returns An integer value indicating the success of the operation.
* @retval 0 on success.
* @retval -1 if an error occurred.
*
* @since February 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_column_set_obibool_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, index_t element_idx, obibool_t value);
/**
* @brief Recovers a value in an OBIDMS column containing data with the type OBI_BOOL.
*
* @param column A pointer as returned by obi_create_column().
* @param line_nb The number of the line where the value should be recovered.
* @param element_idx The index of the element that should be recovered in the line.
*
* @returns The recovered value.
* @retval OBIBool_NA the NA value of the type if an error occurred and obi_errno is set.
*
* @since February 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
obibool_t obi_column_get_obibool_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, index_t element_idx);
/**
* @brief Sets a value in an OBIDMS column containing data with the type OBI_BOOL,
* using the name of the element in the line.
*
* @warning Pointers returned by obi_open_column() don't allow writing.
*
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
* @param line_nb The number of the line where the value should be set.
* @param element_name The name of the element that should be set in the line.
* @param value The value that should be set.
*
* @returns An integer value indicating the success of the operation.
* @retval 0 on success.
* @retval -1 if an error occurred.
*
* @since February 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_column_set_obibool_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, const char* element_name, obibool_t value);
/**
* @brief Recovers a value in an OBIDMS column containing data with the type OBI_BOOL,
* using the name of the element in the line.
*
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
* @param line_nb The number of the line where the value should be recovered.
* @param element_name The name of the element that should be recovered in the line.
*
* @returns The recovered value.
* @retval OBIBool_NA the NA value of the type if an error occurred and obi_errno is set.
*
* @since February 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
obibool_t obi_column_get_obibool_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, const char* element_name);
/**
* @brief Sets a value in an OBIDMS column containing data with the type OBI_CHAR, using the index of the element in the line.
*
* @warning Pointers returned by obi_open_column() don't allow writing.
*
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
* @param line_nb The number of the line where the value should be set.
* @param element_idx The index of the element that should be set in the line.
* @param value The value that should be set.
*
* @returns An integer value indicating the success of the operation.
* @retval 0 on success.
* @retval -1 if an error occurred.
*
* @since February 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_column_set_obichar_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, index_t element_idx, obichar_t value);
/**
* @brief Recovers a value in an OBIDMS column containing data with the type OBI_CHAR.
*
* @param column A pointer as returned by obi_create_column().
* @param line_nb The number of the line where the value should be recovered.
* @param element_idx The index of the element that should be recovered in the line.
*
* @returns The recovered value.
* @retval OBIChar_NA the NA value of the type if an error occurred and obi_errno is set.
*
* @since February 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
obichar_t obi_column_get_obichar_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, index_t element_idx);
/**
* @brief Sets a value in an OBIDMS column containing data with the type OBI_CHAR,
* using the name of the element in the line.
*
* @warning Pointers returned by obi_open_column() don't allow writing.
*
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
* @param line_nb The number of the line where the value should be set.
* @param element_name The name of the element that should be set in the line.
* @param value The value that should be set.
*
* @returns An integer value indicating the success of the operation.
* @retval 0 on success.
* @retval -1 if an error occurred.
*
* @since February 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_column_set_obichar_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, const char* element_name, obichar_t value);
/**
* @brief Recovers a value in an OBIDMS column containing data with the type OBI_CHAR,
* using the name of the element in the line.
*
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
* @param line_nb The number of the line where the value should be recovered.
* @param element_name The name of the element that should be recovered in the line.
*
* @returns The recovered value.
* @retval OBIChar_NA the NA value of the type if an error occurred and obi_errno is set.
*
* @since February 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
obichar_t obi_column_get_obichar_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, const char* element_name);
/**
* @brief Sets a value in an OBIDMS column containing data with the type OBI_FLOAT, using the index of the element in the line.
*
* @warning Pointers returned by obi_open_column() don't allow writing.
*
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
* @param line_nb The number of the line where the value should be set.
* @param element_idx The index of the element that should be set in the line.
* @param value The value that should be set.
*
* @returns An integer value indicating the success of the operation.
* @retval 0 on success.
* @retval -1 if an error occurred.
*
* @since February 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_column_set_obifloat_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, index_t element_idx, obifloat_t value);
/**
* @brief Recovers a value in an OBIDMS column containing data with the type OBI_FLOAT.
*
* @param column A pointer as returned by obi_create_column().
* @param line_nb The number of the line where the value should be recovered.
* @param element_idx The index of the element that should be recovered in the line.
*
* @returns The recovered value.
* @retval OBIFloat_NA the NA value of the type if an error occurred and obi_errno is set.
*
* @since February 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
obifloat_t obi_column_get_obifloat_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, index_t element_idx);
/**
* @brief Sets a value in an OBIDMS column containing data with the type OBI_FLOAT,
* using the name of the element in the line.
*
* @warning Pointers returned by obi_open_column() don't allow writing.
*
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
* @param line_nb The number of the line where the value should be set.
* @param element_name The name of the element that should be set in the line.
* @param value The value that should be set.
*
* @returns An integer value indicating the success of the operation.
* @retval 0 on success.
* @retval -1 if an error occurred.
*
* @since February 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_column_set_obifloat_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, const char* element_name, obifloat_t value);
/**
* @brief Recovers a value in an OBIDMS column containing data with the type OBI_FLOAT,
* using the name of the element in the line.
*
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
* @param line_nb The number of the line where the value should be recovered.
* @param element_name The name of the element that should be recovered in the line.
*
* @returns The recovered value.
* @retval OBIFloat_NA the NA value of the type if an error occurred and obi_errno is set.
*
* @since February 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
obifloat_t obi_column_get_obifloat_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, const char* element_name);
/**
* @brief Sets a value in an OBIDMS column containing data with the type OBI_INT, using the index of the element in the line.
*
* @warning Pointers returned by obi_open_column() don't allow writing.
*
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
* @param line_nb The number of the line where the value should be set.
* @param element_idx The index of the element that should be set in the line.
* @param value The value that should be set.
*
* @returns An integer value indicating the success of the operation.
* @retval 0 on success.
* @retval -1 if an error occurred.
*
* @since February 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_column_set_obiint_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, index_t element_idx, obiint_t value);
/**
* @brief Recovers a value in an OBIDMS column containing data with the type OBI_INT.
*
* @param column A pointer as returned by obi_create_column().
* @param line_nb The number of the line where the value should be recovered.
* @param element_idx The index of the element that should be recovered in the line.
*
* @returns The recovered value.
* @retval OBIBool_NA the NA value of the type if an error occurred and obi_errno is set.
*
* @since February 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
obiint_t obi_column_get_obiint_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, index_t element_idx);
/**
* @brief Sets a value in an OBIDMS column containing data with the type OBI_INT,
* using the name of the element in the line.
*
* @warning Pointers returned by obi_open_column() don't allow writing.
*
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
* @param line_nb The number of the line where the value should be set.
* @param element_name The name of the element that should be set in the line.
* @param value The value that should be set.
*
* @returns An integer value indicating the success of the operation.
* @retval 0 on success.
* @retval -1 if an error occurred.
*
* @since February 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_column_set_obiint_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, const char* element_name, obiint_t value);
/**
* @brief Recovers a value in an OBIDMS column containing data with the type OBI_INT,
* using the name of the element in the line.
*
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
* @param line_nb The number of the line where the value should be recovered.
* @param element_name The name of the element that should be recovered in the line.
*
* @returns The recovered value.
* @retval OBIBool_NA the NA value of the type if an error occurred and obi_errno is set.
*
* @since February 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
obiint_t obi_column_get_obiint_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, const char* element_name);
/**
* @brief Sets a value in an OBIDMS column containing data in the form of indices referring
* to DNA sequences handled by an indexer, using the index of the element in the line.
*
* @warning Pointers returned by obi_open_column() don't allow writing.
*
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
* @param line_nb The number of the line where the value should be set.
* @param element_idx The index of the element that should be set in the line.
* @param value The value that should be set.
*
* @returns An integer value indicating the success of the operation.
* @retval 0 on success.
* @retval -1 if an error occurred.
*
* @since February 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_column_set_obiseq_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, index_t element_idx, const char* value);
/**
* @brief Recovers a value in an OBIDMS column containing data in the form of indices referring
* to DNA sequences handled by an indexer, using the index of the element in the line.
*
* @param column A pointer as returned by obi_create_column().
* @param line_nb The number of the line where the value should be recovered.
* @param element_idx The index of the element that should be recovered in the line.
*
* @returns The recovered value.
* @retval '\0' the NA value of the type if an error occurred and obi_errno is set.
*
* @since February 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
char* obi_column_get_obiseq_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, index_t element_idx);
/**
* @brief Sets a value in an OBIDMS column containing data in the form of indices referring
* to DNA sequences handled by an indexer, using the name of the element in the line.
*
* @warning Pointers returned by obi_open_column() don't allow writing.
*
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
* @param line_nb The number of the line where the value should be set.
* @param element_name The name of the element that should be set in the line.
* @param value The value that should be set.
*
* @returns An integer value indicating the success of the operation.
* @retval 0 on success.
* @retval -1 if an error occurred.
*
* @since February 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_column_set_obiseq_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, const char* element_name, const char* value);
/**
* @brief Recovers a value in an OBIDMS column containing data in the form of indices referring
* to DNA sequences handled by an indexer, using the name of the element in the line.
*
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
* @param line_nb The number of the line where the value should be recovered.
* @param element_name The name of the element that should be recovered in the line.
*
* @returns The recovered value.
* @retval '\0' the NA value of the type if an error occurred and obi_errno is set.
*
* @since February 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
char* obi_column_get_obiseq_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, const char* element_name);
/**
* @brief Sets a value in an OBIDMS column containing data in the form of indices referring
* to character strings handled by an indexer, using the index of the element in the line.
*
* @warning Pointers returned by obi_open_column() don't allow writing.
*
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
* @param line_nb The number of the line where the value should be set.
* @param element_idx The index of the element that should be set in the line.
* @param value The value that should be set.
*
* @returns An integer value indicating the success of the operation.
* @retval 0 on success.
* @retval -1 if an error occurred.
*
* @since February 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_column_set_obistr_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, index_t element_idx, const char* value);
/**
* @brief Recovers a value in an OBIDMS column containing data in the form of indices referring
* to character strings handled by an indexer, using the index of the element in the line.
*
* @param column A pointer as returned by obi_create_column().
* @param line_nb The number of the line where the value should be recovered.
* @param element_idx The index of the element that should be recovered in the line.
*
* @returns The recovered value.
* @retval '\0' the NA value of the type if an error occurred and obi_errno is set.
*
* @since February 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
const char* obi_column_get_obistr_with_elt_idx_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, index_t element_idx);
/**
* @brief Sets a value in an OBIDMS column containing data in the form of indices referring
* to character strings handled by an indexer, using the name of the element in the line.
*
* @warning Pointers returned by obi_open_column() don't allow writing.
*
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
* @param line_nb The number of the line where the value should be set.
* @param element_name The name of the element that should be set in the line.
* @param value The value that should be set.
*
* @returns An integer value indicating the success of the operation.
* @retval 0 on success.
* @retval -1 if an error occurred.
*
* @since February 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_column_set_obistr_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, const char* element_name, const char* value);
/**
* @brief Recovers a value in an OBIDMS column containing data in the form of indices referring
* to character strings handled by an indexer, using the name of the element in the line.
*
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
* @param line_nb The number of the line where the value should be recovered.
* @param element_name The name of the element that should be recovered in the line.
*
* @returns The recovered value.
* @retval '\0' the NA value of the type if an error occurred and obi_errno is set.
*
* @since February 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
const char* obi_column_get_obistr_with_elt_name_in_view(Obiview_p view, OBIDMS_column_p column, index_t line_nb, const char* element_name);
#endif /* OBIVIEW_H_ */

View File

@ -1,72 +0,0 @@
/****************************************************************************
* Private *at functions *
****************************************************************************/
/**
* @file private_at_functions.c
* @author Celine Mercier (celine.mercier@metabarcoding.org)
* @date 15 June 2015
* @brief Private replacement functions for *at functions.
*/
#include <fcntl.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <stdio.h>
#include <stdlib.h>
#include <dirent.h>
#include "private_at_functions.h"
#include "obidebug.h"
#include "obierrno.h"
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
char* get_full_path(int directory_file_descriptor, const char* path_name)
{
char* full_path;
full_path = (char*) malloc((MAX_PATH_LEN)*sizeof(char));
if (full_path == NULL)
{
obidebug(1, "\nError allocating memory for the char* path to a file or directory");
return NULL;
}
if (fcntl(directory_file_descriptor, F_GETPATH, full_path) < 0)
{
obidebug(1, "\nError getting the path to a file or directory");
return NULL;
}
// TODO check errors?
strlcat(full_path, "/", MAX_PATH_LEN);
strlcat(full_path, path_name, MAX_PATH_LEN);
return full_path;
}
DIR* private_opendirat(int directory_file_descriptor, const char* path_name)
{
char* full_path;
DIR* directory;
full_path = get_full_path(directory_file_descriptor, path_name);
if (full_path == NULL)
return NULL;
directory = opendir(full_path);
if (directory == NULL)
obidebug(1, "\nError opening a directory");
free(full_path);
return directory;
}

View File

@ -1,58 +0,0 @@
/****************************************************************************
* Header file for private *at functions *
****************************************************************************/
/**
* @file private_at_functions.h
* @author Celine Mercier (celine.mercier@metabarcoding.org)
* @date 15 June 2015
* @brief Header file for the private replacement functions for *at functions.
*/
#ifndef PRIVATE_OPENAT_H_
#define PRIVATE_OPENAT_H_
#include <sys/stat.h>
#define MAX_PATH_LEN 4096 /**< Maximum length for the character string defining a
file or directory path */
/**
* @brief Internal function getting the full path of a file or a directory from its
* path relative to a directory file descriptor.
*
* @warning The returned pointer has to be freed by the caller.
*
* @param directory_file_descriptor The file descriptor for the directory to which
* path_name is relative.
* @param path_name The path name for the file or directory, relative to directory_file_descriptor.
*
* @returns A pointer to the full path.
* @retval NULL if an error occurred.
*
* @since June 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
char* get_full_path(int directory_file_descriptor, const char* path_name);
/**
* @brief Replacement function for opendirat() : open a directory relative to a directory file descriptor.
*
* @param directory_file_descriptor The file descriptor for the directory in which the directory should be opened.
* @param path_name The path name for the directory to be opened, relative to directory_file_descriptor.
*
* @returns The file descriptor of the opened directory.
* @retval NULL if an error occurred.
*
* @since June 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
DIR* private_opendirat(int directory_file_descriptor, const char* path_name);
#endif /* PRIVATEOPENAT_H_ */

58
src/utils.c Normal file
View File

@ -0,0 +1,58 @@
/****************************************************************************
* Utility functions *
****************************************************************************/
/**
* @file utils.c
* @author Celine Mercier (celine.mercier@metabarcoding.org)
* @date 29 March 2016
* @brief Code for utility functions.
*/
#include <fcntl.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <stdio.h>
#include <stdlib.h>
#include <dirent.h>
#include <unistd.h>
#include "utils.h"
#include "obidebug.h"
#include "obierrno.h"
#include "obidms.h"
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
/**********************************************************************
*
* D E F I N I T I O N O F T H E P U B L I C F U N C T I O N S
*
**********************************************************************/
int count_dir(char *dir)
{
struct dirent *dp;
DIR *fd;
int count;
count = 0;
if ((fd = opendir(dir)) == NULL)
{
obi_set_errno(OBI_UTILS_ERROR);
obidebug(1, "Error opening a directory: %s\n", dir);
return -1;
}
while ((dp = readdir(fd)) != NULL)
{
if ((dp->d_name)[0] == '.')
continue;
count++;
}
return count;
}

32
src/utils.h Normal file
View File

@ -0,0 +1,32 @@
/****************************************************************************
* Header file for utility functions *
****************************************************************************/
/**
* @file utils.h
* @author Celine Mercier (celine.mercier@metabarcoding.org)
* @date 29 March 2016
* @brief Header file for utility functions.
*/
#ifndef UTILS_H_
#define UTILS_H_
#include <stdio.h>
#include <sys/stat.h>
#include "obidms.h"
#define ONE_IF_ZERO(x) (((x)==0)?1:(x)) /**< If x is equal to 0, x takes the value 1.
*/
/*
* TODO
*/
int count_dir(char *dir);
#endif /* UTILS_H_ */