Compare commits

...

88 Commits

Author SHA1 Message Date
02d67c257f The default name of an AVL is now the column name + '_indexer', and when
an AVL is opened (as opposed to created), it is read-only
2016-04-15 12:55:26 +02:00
e04ea85d1e Fixed problematic __str__ method and useless declarations in the
OBI_Nuc_Seq_Stored class
2016-04-15 11:22:05 +02:00
527d3555f0 Moved the functions getting full paths for files and directories to
obidms.c/.h files
2016-04-15 11:11:13 +02:00
71492ad229 Made the handling of listing and unlisting opened columns and indexers
functions in the obidms files.
2016-04-15 10:49:12 +02:00
73d64e5aff Renamed 'unmap_header' function to 'close_header' 2016-04-14 15:19:27 +02:00
4cb52e1632 Made the truncating of columns automatic when closing them (note:
already the case for AVLs)
2016-04-14 15:13:30 +02:00
9d042f7bd0 Refactored and relocated the set and get functions of all column types,
both within and out of the context of a view
2016-04-13 15:10:24 +02:00
5ec2d8842e Character string indexer API 2016-04-12 17:21:01 +02:00
04c9470f7d Fixed and cleaned DNA_seq_indexer API 2016-04-12 17:20:24 +02:00
be05c889e2 DNA_seq_indexer API 2016-04-12 16:38:47 +02:00
04e3a7b5a9 Added more references in cython .cfiles files because it seems necessary
for linux distributions
2016-04-12 15:10:54 +02:00
d8107533d8 Obiblob_indexer API 2016-04-12 14:53:33 +02:00
cd4e65e190 Fixed typo and includes in obiblob files 2016-04-12 14:52:27 +02:00
375bfcce8a Renamed "Obi_byte_arrays" to "Obiblobs" and moved Obiblob functions to
separate obiblob.c and obiblob.h files
2016-04-12 11:21:14 +02:00
c225cfd8b6 Fixed bug with retrieval of values from AVLs (bad cast in byte array
structure)
2016-04-11 17:07:22 +02:00
966b1325ed Deleted declaration of obsolete public function 2016-04-11 11:14:20 +02:00
019dfc01b4 Branch to refactor and debug (AVLs bugged) 2016-04-08 15:38:57 +02:00
edc4fd7b3e Fixed minor warning 2016-03-25 16:11:52 +01:00
ff6c27acf2 Implemented the retrieval of values with groups of AVLs 2016-03-25 15:35:16 +01:00
69856f18dd untested (and no possible retrieval) of CRC used to represent data in
AVL trees
2016-03-24 16:38:11 +01:00
58ac860cc7 Added macro for the bloom filter parameters and deleted old unused
macros for crc
2016-03-23 13:33:40 +01:00
d44117d625 obiimport function for testing purposes 2016-03-23 13:00:02 +01:00
6bd42132c4 Minor fixes to silence warnings and replaced two asprintf uses 2016-03-23 12:58:53 +01:00
4085904362 Merge branch 'multiple_avls_bloom' 2016-03-22 14:14:10 +01:00
b04b4b5902 made POSIX compliant 2016-03-21 11:33:06 +01:00
383e738ab7 Merge branch 'master' of git@git.metabarcoding.org:obitools/obitools3.git 2016-03-18 15:49:53 +01:00
3681cecb4d Multiple AVLs with bloom filters (very raw test version) 2016-03-18 11:06:02 +01:00
545ed8111a Code for tests storing data in multiple AVLs.
(note: unretrievable data as implemented)
2016-03-11 15:34:55 +01:00
86071d30c9 Minor improvement in AVL initial size calculation 2016-03-11 14:07:40 +01:00
21d1b2ed3e First implementation of taxonomy reading 2016-03-11 13:56:38 +01:00
6157633137 prototype for the obi unix command and the count sub command 2016-03-08 16:06:00 +01:00
a08def47e6 It is now impossible to create a view with a name identical to one of an
existing written view
2016-03-01 13:36:54 +01:00
fc5a12bad7 Closes #34 2016-02-29 17:56:55 +01:00
e323d8e702 Cython classes for nucleotide sequences (outside or in the context of a
view)
2016-02-29 16:33:30 +01:00
b350ea0393 Fixed minor error 2016-02-29 16:28:34 +01:00
8e9e21a02e Increased the maximum depth of AVL trees 2016-02-29 16:27:23 +01:00
4df313c54a Added Obiviews specialized for the handling of nucleotide sequences 2016-02-25 09:43:27 +01:00
ffc68d448f Deleted a forgotten print statement 2016-02-18 15:15:42 +01:00
a8f03248a8 Major update : views 2016-02-18 10:38:51 +01:00
cfaf069095 Fixed more typos and formatting imperfections. 2015-12-11 17:37:25 +01:00
a6144eabe2 Fixed typos 2015-12-11 17:26:20 +01:00
c139367555 DNA sequences and character strings are now handled using AVL trees. 2015-12-11 17:24:44 +01:00
1586956d57 Added the lists of opened columns and arrays in the OBIDMS structure,
and a counter in the OBIDMS column structure; fixed some bugs and
created tests for referring columns that are bound to disappear anyway.
2015-12-02 17:32:07 +01:00
b45b496b0e Major update: new type of columns containing indices referring to lines
in other columns
2015-11-29 11:57:07 +01:00
2cf10cb6f0 Column type is now passed as a character string when creating the column
(either 'OBI_INT', 'OBI_FLOAT', 'OBI_BOOL', 'OBI_CHAR', 'OBI_STR' or
'OBI_SEQ')
2015-11-23 15:48:27 +01:00
5a5516303d deleting useless .pyc files 2015-11-23 14:43:34 +01:00
d6a99bafea Fixed a major bug with the versioning of columns that was introduced in
f6ec8ba9
2015-11-23 13:34:51 +01:00
08f2657e18 Increased maximum line count of columns to 1^9 2015-11-23 13:23:18 +01:00
6aa2f92930 DNA sequences are now encoded on 4 bits when they are in IUPAC 2015-11-20 15:32:09 +01:00
87044b41d8 modified the encoding function on 2 bits a little 2015-11-20 11:32:47 +01:00
6ab1c83302 New column type for DNA sequences. Only for those coded on 2 bits (only
'ATGCatgc') for now.
2015-11-19 18:12:48 +01:00
e371248567 changed version to 0.0.0 2015-11-19 18:11:21 +01:00
dbf9463238 The endianness of a DMS is now stored in the OBIDMS structure 2015-11-18 15:35:09 +01:00
eb12af4da4 Fixed minor error in the documentation of a function. 2015-11-16 15:38:01 +01:00
e8417b4f6f The endianness of an OBIDMS is now stored in an informations file that
is read when opening the OBIDMS.
2015-11-16 14:37:51 +01:00
6579566c6e Minor changes in code to improve readability and fix C compilation
warnings
2015-11-10 14:37:58 +01:00
410e2e02a0 When retrieving the header of a column, the version number of the column
wanted can now be provided.
2015-11-10 13:30:10 +01:00
8ce4f264aa When enlarging a column, the function doesn't try anymore to keep the
mapped region at the same pointer (never works), and unmap/remap
instead.
2015-11-10 13:18:36 +01:00
d885eb48ff The header size when creating a column is now calculated according to
the size of the header structure and the page size of the platform.
2015-11-10 13:09:30 +01:00
661fe3606a In OBI_CHAR columns, characters are now given and retrieved as decoded
(unicode) characters.
2015-11-10 11:24:08 +01:00
c4b7e579cf Comments in column headers are now working. 2015-11-10 10:56:45 +01:00
f6ec8ba963 The header size is now directly read in the file when a column or an
array is opened.
2015-11-09 17:50:32 +01:00
0e3d6ed2d7 Methods __len__ (number of lines used) and __sizeof__ (total size in
bytes) implemented for columns.
2015-11-09 15:56:20 +01:00
01bfc14503 The data size in bytes is now stored in the header of a column. 2015-11-09 15:55:00 +01:00
65c1b1e8b2 Minor changes to make the creation of files and directories cleaner 2015-11-09 15:22:01 +01:00
b37bd8f21c File descriptors for dms, column and array directories are now stored in
structures.
2015-11-09 15:06:02 +01:00
05e3956a0c Minor changes in code to improve readability (freeing some character
strings earlier)
2015-11-09 11:22:51 +01:00
9b066f4327 Major update: obiarrays with columns containing indices referring to
character strings.
2015-11-06 17:55:15 +01:00
456551ffeb obi arrays that don't work because of cython bug passing wrong pointers 2015-11-03 14:22:00 +01:00
ecb9d97adb Reorganized the code to have less functions, and the functions to get
and format the creation date of a column are now working.
2015-10-15 15:12:45 +02:00
0eaa5aa784 Major changes : new cython subclasses to handle columns with multiple
elements per line in a more efficient way + now elements_names are
passed as a list + new function to recover only the header of a column
2015-10-14 18:05:34 +02:00
21923e213d The unit tests now test for None values 2015-10-12 18:02:40 +02:00
6877fc4892 Fixed a critical bug where values were initialized to NA at the wrong
location when there was multiple elements per line
2015-10-12 17:54:36 +02:00
dbed3d9d1d New module for unit testing with PyUnit 2015-10-09 15:42:57 +02:00
fc8bf16769 Fixed a critical bug in the computation of the new number of lines of a
column when truncating
2015-10-09 13:49:48 +02:00
e114a3c9cb fixed a critical bug where data size was not calculated correctly and
column directory is now closed when column is closed
2015-10-09 10:25:40 +02:00
ebc9f6f512 fixed a bug where Cython was casting doubles in floats 2015-10-08 15:28:30 +02:00
2b3f03ec28 Removed deprecated script 2015-10-08 10:46:46 +02:00
8fd9c06be2 Fixed missing file for documentation compilation 2015-10-08 10:45:54 +02:00
b553eef781 Method to close a DMS is uncommented but not complete yet (columns have
to be closed separately)
2015-10-08 10:44:13 +02:00
ee4c513fd4 Fixed a bug where cloning a column would fail if the data was empty 2015-10-08 10:36:02 +02:00
c013e6ad33 fixed typo in doxygen doc 2015-10-08 10:33:19 +02:00
c98d567e2f Updated the documentation and restructured a bit because it wasn't
compiling (note: Breathe not working)
2015-10-06 11:09:01 +02:00
392f110c8d new functions in the OBIDMS_column class to raise NotImplementedError
exceptions and to get the creation date of a column
2015-10-02 13:51:26 +02:00
6ced3c4896 new functions to get the creation date of a column 2015-10-02 13:47:53 +02:00
4b8bf41a71 closes #13, obi_errno is initialized to 0 2015-10-02 13:46:34 +02:00
c59a244e9d Fixed little typo 2015-09-30 12:07:13 +02:00
4b7f2d268b Doxygen documentation corrected and completed. 2015-09-30 12:03:46 +02:00
119 changed files with 14512 additions and 4420 deletions

2
doc/.gitignore vendored
View File

@ -1,3 +1,5 @@
/build/
/doxygen/
/build_dir.txt
/.DS_Store
/.gitignore

View File

@ -57,7 +57,7 @@ html:
@echo "Generating Doxygen documentation..."
doxygen Doxyfile
@echo "Doxygen documentation generated. \n"
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
$(SPHINXBUILD) -b html -c ./ $(ALLSPHINXOPTS) $(BUILDDIR)/html
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."

View File

@ -36,7 +36,7 @@ extensions = [
'sphinx.ext.pngmath',
'sphinx.ext.ifconfig',
'sphinx.ext.viewcode',
'breathe',
# 'breathe',
]
# Add any paths that contain templates here, relative to this directory.
@ -51,7 +51,7 @@ source_suffix = '.rst'
#source_encoding = 'utf-8-sig'
# The master toctree document.
master_doc = 'index'
master_doc = 'source/index'
# General information about the project.
project = u'OBITools3'
@ -292,7 +292,7 @@ texinfo_documents = [
#texinfo_no_detailmenu = False
#Breathe configuration
sys.path.append( "../breathe/" )
breathe_projects = { "OBITools3": "../doxygen/xml/" }
sys.path.append( "breathe/" )
breathe_projects = { "OBITools3": "doxygen/xml/" }
breathe_default_project = "OBITools3"

View File

@ -1,4 +0,0 @@
# Ignore everything in this directory
*
# Except this file
!.gitignore

View File

@ -13,7 +13,7 @@ Up to now, each of these categories of data were stored in separate
files, and nothing made it mandatory to keep them together.
The `Data Management System` (DMS) of OBITools3 can be regarded as a basic
The `Data Management System` (DMS) of OBITools3 can be viewed like a basic
database system.
@ -27,9 +27,7 @@ OBIDMS UML
An OBIDMS directory contains :
* one `OBIDMS history file <#obidms-history-files>`_
* Two different kinds of directories :
* OBIDMS column directories
* OBIDMS column group directories containing OBIDMS column directories
* OBIDMS column directories
OBIDMS column directories
@ -39,16 +37,9 @@ OBIDMS column directories contain :
* all the different versions of one OBIDMS column, under the form of different files (`OBIDMS column files <#obidms-column-files>`_)
* one `OBIDMS version file <#obidms-version-files>`_
The directory name is the column attribute, or sub-attribute if the column directory is in a column group directory.
The directory name is the column attribute with the extension ``.obicol``.
OBIDMS column group directories
===============================
OBIDMS column group directories contain OBIDMS column directories. They are used to store dictionary-like data, where
each key corresponds to an OBIDMS column.
The directory name is the dictionary attribute. Each key is considered a sub-attribute and is associated to its column.
Example: ``count.obicol``
OBIDMS column files
@ -57,7 +48,7 @@ OBIDMS column files
Each OBIDMS column file contains :
* a header of a size equal to a multiple of PAGESIZE (PAGESIZE being equal to 4096 bytes
on most systems) containing metadata
* one column of data with the same `OBIType <types.html#obitypes>`_
* Lines of data with the same `OBIType <types.html#obitypes>`_
Header
@ -79,7 +70,14 @@ The header of an OBIDMS column contains :
Data
----
A column of data with the same `OBIType <types.html#obitypes>`_.
A line of data corresponds to a vector of elements. Each element is associated with an element name.
Elements names are stored in the header. The correspondance between an element and its name is done
using their order in the lists of elements and elements names. This structure allows the storage of
dictionary-like data.
Example: In the header, the attribute ``elements_names`` will be associated with the value ``"sample_1;
sample_2;sample_3"``, and a line of data with the type ``OBInt_t`` will be stored as an ``OBInt_t`` vector
of size three e.g. ``5|8|4``.
Mandatory columns
@ -158,3 +156,5 @@ operations ever done in the OBIDMS directory and the views in between them :
.. image:: ./images/history.png
:width: 150 px
:align: center

Binary file not shown.

After

Width:  |  Height:  |  Size: 67 KiB

View File

Before

Width:  |  Height:  |  Size: 20 KiB

After

Width:  |  Height:  |  Size: 20 KiB

File diff suppressed because it is too large Load Diff

View File

@ -2,8 +2,8 @@
Data in OBITools3
#################
The OBITools3 inaugure a new way to manage DNA metabarcoding data.
They rely on a `Data management System` (DMS) that can be considered as
The OBITools3 introduce a new way to manage DNA metabarcoding data.
They rely on a `Data management System` (DMS) that can be viewed like
a simplified database system.

View File

@ -70,7 +70,7 @@ Tickets should always be labeled with the branches for which they are relevant.
Documentation
*************
C functions are documented in the header files.
C functions are documented in the header files for public functions, and in the source file for private functions.
**************
@ -92,7 +92,7 @@ C99 :
* Object layer
* OBITools3 library
`Python 3 <https://www.python.org/>`_ :
`Python 3.5 <https://www.python.org/>`_ :
* Top layer code (scripts)
For the documentation, `Sphinx <http://sphinx-doc.org/>`_ should be used for both the original
@ -111,6 +111,8 @@ Enum members, macros, constants: ``ALL_CAPS``
Functions, local variables: ``lower_case``
Public functions: ``obi_lower_case``
Functions that shouldn't be called directly: ``_lower_case`` (``_`` prefix)
Global variables: ``g_lower_case`` (``g_`` prefix)
@ -120,9 +122,6 @@ Pointers: ``pointer_ptr`` (``_ptr`` suffi
.. note::
Underscores are used to delimit 'words'.
.. todo::
``obi_function`` for public functions names?
*****************
Programming rules

View File

Before

Width:  |  Height:  |  Size: 17 KiB

After

Width:  |  Height:  |  Size: 17 KiB

View File

Before

Width:  |  Height:  |  Size: 48 KiB

After

Width:  |  Height:  |  Size: 48 KiB

View File

@ -11,7 +11,6 @@ OBITools3 documentation
Programming guidelines <guidelines>
Data structures <data>
Pistes de reflexion <pistes>
Indices and tables

View File

@ -7,13 +7,16 @@ NA values
=========
All OBITypes have an associated NA (Not Available) value.
NA values are implemented by specifying an explicit NA value for each type, corresponding to the R standards:
NA values are implemented by specifying an explicit NA value for each type,
corresponding to the R standards as much as possible:
* For the types ``OBIInt_t``, ``OBIBool_t``, ``OBIIdx_t`` and ``OBITaxid_t``, the NA value is ``INT_MIN``.
* For the type ``OBIInt_t``, the NA value is ``INT_MIN``.
* For the type ``OBIChar_t``: the NA value is ``\0`` (?).
* For the type ``OBIBool_t``, the NA value is ``2``.
* For the type ``OBIStr_t`` : the NA value is ``\0`` (?).
* For the type ``OBIIdx_t`` and ``OBITaxid_t``, the NA value is ``SIZE_MAX``.
* For the type ``OBIChar_t``: the NA value is ``\0``.
* For the type ``OBIFloat_t``::
@ -29,7 +32,7 @@ NA values are implemented by specifying an explicit NA value for each type, corr
x.word[hw] = 0x7ff00000;
x.word[lw] = 1954;
return x.value;
}
}
Minimum and maximum values for ``OBIInt_t``

View File

@ -7,11 +7,14 @@ OBITypes
:download:`html version of the OBITypes UML file <UML/OBITypes_UML.class.violet.html>`
.. image:: ./UML/Obicolumn_classes_UML.png
:download:`html version of the OBIDMS classes UML file <UML/Obicolumn_classes_UML.class.violet.html>`
.. toctree::
:maxdepth: 2
The elementary types <elementary>
The containers <containers>
Special values <specialvalues>

1
doc/sphinx/build_dir.txt Normal file
View File

@ -0,0 +1 @@
build/lib.macosx-10.6-intel-3.5

Binary file not shown.

Before

Width:  |  Height:  |  Size: 66 KiB

File diff suppressed because it is too large Load Diff

View File

@ -1,23 +0,0 @@
###################
Pistes de reflexion
###################
******************************
Ce que l'on veut pouvoir faire
******************************
* Gerer les valeurs manquantes
* Modifier une colonne en cours d'ecriture (mmap)
* Ajouter des valeurs a la fin du fichier d'une colonne en cours d'ecriture (mmap)
******
Divers
******
* Si l'ordre d'une colonne est change, elle est reecrite (pas d'index).
* Utilisation de semaphores pour la lecture
* Utilisation de tas pour l'indexation des chaines de caracteres. Chaque colonne dont
le type est OBIStr_t est stockee dans 3 fichiers : un fichier contenant les chaines, un
fichier contenant les index, et un fichier contenant le tas.

228
python/obi.py Normal file
View File

@ -0,0 +1,228 @@
#!/usr/local/bin/python3.4
'''
obi -- shortdesc
obi is a description
It defines classes_and_methods
@author: user_name
@copyright: 2014 organization_name. All rights reserved.
@license: license
@contact: user_email
@deffield updated: Updated
'''
import sys
import pkgutil
import argparse
import logging
import json
default_config = {
'obi' : { 'log' : True,
'loglevel' : 'INFO',
'version' : False,
'progress' : True
}
}
from obitools3 import command
from obitools3.version import version
__all__ = []
__version__ = version
__date__ = '2014-09-28'
__updated__ = '2014-09-28'
DEBUG = 1
TESTRUN = 0
PROFILE = 0
def loadCommand(name,loader):
'''
Load a command module from its name and an ImpLoader
This function is for internal use
@param name: name of the module
@type name: str
@param loader: the module loader
@type loader: ImpLoader
@return the loaded module
@rtype: module
'''
module = loader.find_module(name).load_module(name)
return module
def getCommandsList():
'''
Returns the list of sub-commands available to the main `obi` command
@return: a dict instance with key corresponding to each command and
value corresponding to the module
@rtype: dict
'''
cmds = dict((x[1],loadCommand(x[1],x[0]))
for x in pkgutil.iter_modules(command.__path__)
if not x[2])
return cmds
def getLogger(config):
'''
Returns the logger as defined by the command line option
or by the config file
:param config:
'''
output = config['obi']['outputfilename']
level = config['obi']['loglevel']
logfile= config['obi']['log']
rootlogger = logging.getLogger()
logFormatter = logging.Formatter("%(asctime)s [%(levelname)-5.5s] %(message)s")
stderrHandler = logging.StreamHandler(sys.stderr)
stderrHandler.setFormatter(logFormatter)
rootlogger.addHandler(stderrHandler)
if logfile:
fileHandler = logging.FileHandler("%s.log" % output)
fileHandler.setFormatter(logFormatter)
rootlogger.addHandler(fileHandler)
try:
loglevel = getattr(logging, level)
except:
loglevel = logging.INFO
rootlogger.setLevel(loglevel)
config['obi']['logger']=rootlogger
return rootlogger
class ObiParser(argparse.ArgumentParser):
def error(self, message):
sys.stderr.write('error: %s\n' % message)
self.print_help()
sys.exit(2)
def buildArgumentParser():
parser = ObiParser()
parser.add_argument('--version', dest='obi:version',
action='store_true',
default=False,
help='Print the version of the OBITools')
parser.add_argument('--no-log', dest='obi:log',
action='store_false',
default=None,
help='Do not create a logfile for the data analyze')
parser.add_argument('--no-progress', dest='obi:progress',
action='store_false',
default=None,
help='Do not print the progress bar during analyzes')
subparsers = parser.add_subparsers(title='subcommands',
description='valid subcommands',
help='additional help')
commands = getCommandsList()
for c in commands:
module = commands[c]
if hasattr(module, "run"):
if hasattr(module, "__title__"):
sub = subparsers.add_parser(c,help=module.__title__)
else:
sub = subparsers.add_parser(c)
if hasattr(module, "addOptions"):
module.addOptions(sub)
sub.set_defaults(**{'obi:module' : module})
return parser
def buildDefaultConfiguration():
global default_config
commands = getCommandsList()
for c in commands:
module = commands[c]
assert hasattr(module, "run")
if hasattr(module, 'default_config'):
default_config[c]=module.default_config
else:
default_config[c]={}
return default_config
def getConfiguration():
global default_config
if '__done__' in default_config:
return default_config
parser = buildArgumentParser()
options = vars(parser.parse_args())
config = buildDefaultConfiguration()
for k in options:
section,key = k.split(':')
s = config[section]
if options[k] is not None:
s[key]=options[k]
if config['obi']['version']:
print("The OBITools - Version %s" % __version__)
sys.exit(0)
if not 'module' in config['obi']:
print('\nError: No obi command specified',file=sys.stderr)
parser.print_help()
sys.exit(2)
if config['obi']['outputfilename'] is None:
config['obi']['outputfilename']=config['obi']['indexfilename']
getLogger(config)
config['__done__']=True
return config
if __name__ =="__main__":
config = getConfiguration()
config['obi']['module'].run(config)

Binary file not shown.

View File

View File

@ -0,0 +1,36 @@
'''
Created on 8 mars 2016
@author: coissac
'''
__title__="Counts sequences in a sequence set"
default_config = { 'countmode' : None
}
def addOptions(parser):
parser.add_argument(dest='obi:input', metavar='obi:input',
nargs='?',
default=None,
help='input data set' )
group=parser.add_option_group('Obicount specific options')
group.add_option('-s','--sequence',
action="store_true", dest="count:sequence",
default=False,
help="Prints only the number of sequence records."
)
group.add_option('-a','--all',
action="store_true", dest="count:all",
default=False,
help="Prints only the total count of sequence records (if a sequence has no `count` attribute, its default count is 1) (default: False)."
)
def run(config):
# The code of my command
pass

View File

@ -1,31 +0,0 @@
import sys
import argparse
from obitools3.obidms.obidmscolumn.capidmscolumn import OBIDMS_column
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Computes the sum of a column.')
parser.add_argument('-d', '--dms', dest='dms_name', type=str,
help='Name of the OBIDMS containing the column')
parser.add_argument('-c', '--column', dest='column_name', type=str, default='count',
help="Name of the OBIDMS column (default: 'count')")
parser.add_argument('-v', '--version', dest='version_number', type=int, default=-1,
help='Version number of the column (default: latest version)')
args = parser.parse_args()
c = OBIDMS_column.open(args.dms_name, args.column_name, version_number=args.version_number)
# check that 1 element / line and summable type?
total = 0
for count in c :
total+=count
print("Total count = ", total)

View File

@ -1,5 +1,28 @@
../../../src/bloom.h
../../../src/bloom.c
../../../src/char_str_indexer.h
../../../src/char_str_indexer.c
../../../src/crc64.h
../../../src/crc64.c
../../../src/dna_seq_indexer.h
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/MurmurHash2.h
../../../src/murmurhash2.c
../../../src/obiavl.h
../../../src/obiavl.c
../../../src/obiblob_indexer.h
../../../src/obiblob_indexer.c
../../../src/obiblob.h
../../../src/obiblob.c
../../../src/obidebug.h
../../../src/obidms_taxonomy.h
../../../src/obidms_taxonomy.c
../../../src/obidms.h
../../../src/obidms.c
../../../src/obidmscolumn_idx.h
../../../src/obidmscolumn_idx.c
../../../src/obidmscolumn.h
../../../src/obidmscolumn.c
../../../src/obidmscolumndir.h
@ -10,5 +33,19 @@
../../../src/obilittlebigman.c
../../../src/obitypes.h
../../../src/obitypes.c
../../../src/private_at_functions.h
../../../src/private_at_functions.c
../../../src/obiview.h
../../../src/obiview.c
../../../src/utils.h
../../../src/utils.c
../../../src/obidmscolumn_bool.c
../../../src/obidmscolumn_bool.h
../../../src/obidmscolumn_char.c
../../../src/obidmscolumn_char.h
../../../src/obidmscolumn_float.c
../../../src/obidmscolumn_float.h
../../../src/obidmscolumn_int.c
../../../src/obidmscolumn_int.h
../../../src/obidmscolumn_seq.c
../../../src/obidmscolumn_seq.h
../../../src/obidmscolumn_str.c
../../../src/obidmscolumn_str.h

View File

@ -2,37 +2,93 @@
from .capi.obidms cimport OBIDMS_p
from .capi.obidmscolumn cimport OBIDMS_column_p
from .capi.obitypes cimport obiversion_t, OBIType_t
from .capi.obiview cimport Obiview_p
from .capi.obitypes cimport obiversion_t, OBIType_t, index_t
from ._obitaxo cimport OBI_Taxonomy
cdef class OBIDMS_column:
cdef OBIDMS_column_p* pointer
cdef OBIDMS dms
cdef Obiview_p view
cdef str data_type
cdef str dms_name
cdef str column_name
cdef index_t nb_elements_per_line
cdef list elements_names
cpdef update_pointer(self)
cpdef list get_elements_names(self)
cpdef str get_data_type(self)
cpdef index_t get_nb_lines_used(self)
cpdef str get_creation_date(self)
cpdef str get_comments(self)
cpdef close(self)
@staticmethod
cdef object get_subclass_type(OBIDMS_column_p column_p)
cdef class OBIDMS_column_multi_elts(OBIDMS_column):
cpdef set_line(self, index_t line_nb, dict values)
cdef class OBIDMS_column_line:
cdef OBIDMS_column column
cdef index_t index
cdef class OBIView:
cdef Obiview_p pointer
cdef str name
cdef str comments
cdef dict columns
cdef dict columns_pp # TODO this dict might be unnecessary
cdef OBIDMS dms
cpdef delete_column(self, str column_name)
cpdef add_column(self,
str column_name,
obiversion_t version_number=*,
str type=*,
index_t nb_lines=*,
index_t nb_elements_per_line=*,
list elements_names=*,
str indexer_name=*,
str comments=*,
bint create=*
)
cpdef select_line(self, index_t line_nb)
cpdef select_lines(self, list line_selection)
cpdef save_and_close(self)
cdef class OBIView_NUC_SEQS(OBIView):
cdef OBIDMS_column ids
cdef OBIDMS_column sequences
cdef OBIDMS_column descriptions
cpdef delete_column(self, str column_name)
cdef class OBIView_line :
cdef index_t index
cdef OBIView view
cdef class OBIDMS_column
cdef class OBIDMS:
cdef OBIDMS_p pointer
cdef str dms_name
cpdef dict list(self)
cpdef OBIDMS_column open_column(self,
str column_name,
bint create=*,
bint clone=*, bint clone_data=*,
obiversion_t version_number=*,
OBIType_t data_type=*,
size_t nb_lines=*,
size_t nb_elements_per_line=*,
str elements_names=*)
cpdef close(self)
cpdef OBI_Taxonomy open_taxonomy(self, str taxo_name)
cpdef OBIView open_view(self, str view_name)
cpdef OBIView new_view(self, str view_name, object view_to_clone=*, list line_selection=*, str view_type=*, str comments=*)
cpdef dict read_view_infos(self, str view_name)
cpdef dict read_views(self)
cdef class OBIDMS_column:
cdef OBIDMS_column_p pointer
cdef OBIDMS dms
cdef str data_type # TODO keep as OBIType_t? both?
cdef str dms_name
cdef str column_name
cpdef object get_item(self, size_t line_nb, str element_name)
cpdef list get_elements_names(self)
cpdef str get_data_type(self)
cpdef size_t get_nb_lines_used(self)

View File

@ -1,40 +1,635 @@
#cython: language_level=3
from pathlib import Path
from obitools3.utils cimport bytes2str, str2bytes
from .capi.obidms cimport obi_dms
from .capi.obidmscolumn cimport obi_column_get_data_type_from_name, \
obi_column_get_latest_version_from_name, \
obi_column_get_line_count_from_name, \
obi_column_get_nb_lines_used, \
obi_column_get_elements_names, \
obi_create_column, \
obi_clone_column, \
obi_open_column, \
obi_close_column
from .capi.obitypes cimport const_char_p, name_data_type
from .capi.obidms cimport obi_dms, \
obi_close_dms
from .capi.obidmscolumn cimport obi_close_column, \
obi_column_format_date, \
OBIDMS_column_p, \
OBIDMS_column_header_p
from .capi.obitypes cimport const_char_p, \
OBIType_t, \
OBI_INT, \
OBI_FLOAT, \
OBI_BOOL, \
OBI_CHAR, \
OBI_STR, \
OBI_SEQ, \
name_data_type, \
only_ATGC # discuss
from ._obidms cimport OBIDMS, \
OBIDMS_column, \
OBIView, \
OBIView_line
from ._obitaxo cimport OBI_Taxonomy
from ._obidms cimport OBIDMS
from ._obidms cimport OBIDMS_column
from ._obiseq cimport OBI_Nuc_Seq, OBI_Nuc_Seq_Stored
from ._obidmscolumn_int cimport OBIDMS_column_int, \
OBIDMS_column_int_writable
OBIDMS_column_multi_elts_int
from ._obidmscolumn_float cimport OBIDMS_column_float, \
OBIDMS_column_float_writable
OBIDMS_column_multi_elts_float
from ._obidmscolumn_bool cimport OBIDMS_column_bool, \
OBIDMS_column_bool_writable
OBIDMS_column_multi_elts_bool
from ._obidmscolumn_char cimport OBIDMS_column_char, \
OBIDMS_column_char_writable
OBIDMS_column_multi_elts_char
from ._obidmscolumn_idx cimport OBIDMS_column_idx, \
OBIDMS_column_idx_writable
from ._obidmscolumn_str cimport OBIDMS_column_str, \
OBIDMS_column_multi_elts_str
from ._obidmscolumn_seq cimport OBIDMS_column_seq, \
OBIDMS_column_multi_elts_seq
from .capi.obiview cimport Obiview_p, \
Obiviews_infos_all_p, \
Obiview_infos_p, \
Column_reference_p, \
obi_new_view_nuc_seqs, \
obi_new_view, \
obi_new_view_cloned_from_name, \
obi_new_view_nuc_seqs_cloned_from_name, \
obi_open_view, \
obi_read_views, \
obi_unmap_read_views, \
obi_view_delete_column, \
obi_view_add_column, \
obi_view_get_column, \
obi_view_get_pointer_on_column_in_view, \
obi_select_line, \
obi_select_lines, \
obi_save_and_close_view, \
VIEW_TYPE_NUC_SEQS, \
NUC_SEQUENCE_COLUMN, \
ID_COLUMN, \
DESCRIPTION_COLUMN
from libc.stdlib cimport malloc
from cpython.pycapsule cimport PyCapsule_New, PyCapsule_GetPointer
cdef class OBIDMS_column :
# Should only be initialized through a subclass
def __init__(self, OBIView view, str column_name):
cdef OBIDMS_column_p column_p
cdef OBIDMS_column_p* column_pp
column_pp = <OBIDMS_column_p*> PyCapsule_GetPointer(((view.columns_pp)[column_name]), NULL) # or use C function
column_p = column_pp[0] # TODO ugly cython dereferencing but can't find better
# Fill structure
self.pointer = column_pp
self.dms = view.dms
self.view = view.pointer # TODO pointer or instance?
self.data_type = bytes2str(name_data_type((column_p.header).returned_data_type))
self.column_name = bytes2str((column_p.header).name)
self.nb_elements_per_line = (column_p.header).nb_elements_per_line
self.elements_names = (bytes2str((column_p.header).elements_names)).split(';')
def __setitem__(self, index_t line_nb, object value):
self.set_line(line_nb, value)
def __getitem__(self, index_t line_nb):
return self.get_line(line_nb)
def __len__(self):
return (self.pointer)[0].header.lines_used
def __sizeof__(self):
return ((self.pointer)[0].header.header_size + (self.pointer)[0].header.data_size)
def __iter__(self):
# Declarations
cdef index_t lines_used
cdef index_t line_nb
# Yield each line
lines_used = (self.pointer)[0].header.lines_used
for line_nb in range(lines_used):
yield self.get_line(line_nb)
cpdef update_pointer(self):
self.pointer = <OBIDMS_column_p*> obi_view_get_pointer_on_column_in_view(self.view, str2bytes(self.column_name))
cpdef list get_elements_names(self):
return self.elements_names
cpdef str get_data_type(self):
return self.data_type
cpdef index_t get_nb_lines_used(self):
return (self.pointer)[0].header.lines_used
cpdef str get_creation_date(self):
return bytes2str(obi_column_format_date((self.pointer)[0].header.creation_date))
cpdef str get_comments(self):
return bytes2str((self.pointer)[0].header.comments)
def __repr__(self) :
cdef str to_print
to_print = ''
for line in self :
to_print = to_print + str(line) + "\n"
return to_print
cpdef close(self):
if obi_close_column((self.pointer)[0]) < 0 :
raise Exception("Problem closing a column")
@staticmethod
cdef object get_subclass_type(OBIDMS_column_p column_p) :
cdef object subclass
cdef OBIDMS_column_header_p header
cdef OBIType_t col_type
cdef bint col_writable
cdef bint col_one_element_per_line
header = column_p.header
col_type = header.returned_data_type
col_writable = column_p.writable
col_one_element_per_line = ((header.nb_elements_per_line) == 1)
if col_type == OBI_INT :
if col_one_element_per_line :
subclass = OBIDMS_column_int
else :
subclass = OBIDMS_column_multi_elts_int
elif col_type == OBI_FLOAT :
if col_one_element_per_line :
subclass = OBIDMS_column_float
else :
subclass = OBIDMS_column_multi_elts_float
elif col_type == OBI_BOOL :
if col_one_element_per_line :
subclass = OBIDMS_column_bool
else :
subclass = OBIDMS_column_multi_elts_bool
elif col_type == OBI_CHAR :
if col_one_element_per_line :
subclass = OBIDMS_column_char
else :
subclass = OBIDMS_column_multi_elts_char
elif col_type == OBI_STR :
if col_one_element_per_line :
subclass = OBIDMS_column_str
else :
subclass = OBIDMS_column_multi_elts_str
elif col_type == OBI_SEQ :
if col_one_element_per_line :
subclass = OBIDMS_column_seq
else :
subclass = OBIDMS_column_multi_elts_seq
else :
raise Exception("Problem with the data type")
return subclass
######################################################################################################
cdef class OBIDMS_column_multi_elts(OBIDMS_column) :
def __getitem__(self, index_t line_nb):
return OBIDMS_column_line(self, line_nb)
cpdef set_line(self, index_t line_nb, dict values):
for element_name in values :
self.set_item(line_nb, element_name, values[element_name])
######################################################################################################
cdef class OBIDMS_column_line :
def __init__(self, OBIDMS_column column, index_t line_nb) :
self.index = line_nb
self.column = column
def __getitem__(self, str element_name) :
return self.column.get_item(self.index, element_name)
def __setitem__(self, str element_name, object value):
self.column.set_item(self.index, element_name, value)
def __contains__(self, str element_name):
return (element_name in self.column.elements_names)
def __repr__(self) :
return str(self.column.get_line(self.index))
##########################################
cdef class OBIView :
def __init__(self, OBIDMS dms, str view_name, bint new=False, object view_to_clone=None, list line_selection=None, str comments=""):
cdef Obiview_p view = NULL
cdef int i
cdef list col_list
cdef str col_name
cdef OBIDMS_column column
cdef OBIDMS_column_p column_p
cdef OBIDMS_column_p* column_pp
cdef OBIDMS_column_header_p header
cdef index_t* line_selection_p
cdef object col_capsule
self.dms = dms
if line_selection is not None :
line_selection_p = <index_t*> malloc((len(line_selection) + 1) * sizeof(index_t))
for i in range(len(line_selection)) :
line_selection_p[i] = line_selection[i] # TODO type problem?
line_selection_p[len(line_selection)] = -1
else :
line_selection_p = NULL
if new :
if view_to_clone is not None :
if type(view_to_clone) == str :
view = obi_new_view_cloned_from_name(dms.pointer, str2bytes(view_name), str2bytes(view_to_clone), line_selection_p, str2bytes(comments))
else :
view = obi_new_view(dms.pointer, str2bytes(view_name), (<OBIView> view_to_clone).pointer, line_selection_p, str2bytes(comments))
elif view_to_clone is None :
view = obi_new_view(dms.pointer, str2bytes(view_name), NULL, line_selection_p, str2bytes(comments))
elif not new :
if view_name is not None :
view = obi_open_view(dms.pointer, str2bytes(view_name))
elif view_name is None :
view = obi_open_view(dms.pointer, NULL)
if view == NULL :
raise Exception("Error creating/opening view")
self.pointer = view
self.name = bytes2str(view.name)
# go through columns to build list and open python object (TODO make separate function?)
self.columns = {}
self.columns_pp = {}
i = 0
while i < view.column_count :
column_pp = <OBIDMS_column_p*> ((view.columns)+i)
column_p = <OBIDMS_column_p> (view.columns)[i]
header = (column_p).header
col_name = bytes2str(header.name)
col_capsule = PyCapsule_New(column_pp, NULL, NULL) # TODO discuss
(self.columns_pp)[col_name] = col_capsule
subclass = OBIDMS_column.get_subclass_type(column_p)
self.columns[col_name] = subclass(self, col_name)
i+=1
def __repr__(self) :
cdef str s
cdef OBIDMS_column column
cdef OBIDMS_column_p column_p
s = self.name
s = s + ", " + self.comments + ", " + str(self.pointer.line_count) + " lines"
for column_name in self.columns : # TODO make function in OBIDMS_column class
column = self.columns[column_name]
column_p = (column.pointer)[0]
s = s + "\n" + column_name + ", version " + str(column_p.header.version) + ", data type: " + column.data_type
return s
cpdef delete_column(self, str column_name) :
cdef int i
cdef Obiview_p view
cdef OBIDMS_column column
cdef OBIDMS_column_p column_p
cdef OBIDMS_column_p* column_pp
cdef OBIDMS_column_header_p header
cdef str column_n
view = self.pointer
if obi_view_delete_column(view, str2bytes(column_name)) < 0 :
raise Exception("Problem deleting a column from a view")
# Update the dictionaries of column pointers and column objects, and update pointers in column objects (make function?):
(self.columns).pop(column_name)
(self.columns_pp).pop(column_name)
i = 0
while i < view.column_count :
column_pp = <OBIDMS_column_p*> ((view.columns)+i)
column_p = <OBIDMS_column_p> (view.columns)[i]
header = (column_p).header
col_name = bytes2str(header.name)
col_capsule = PyCapsule_New(column_pp, NULL, NULL)
(self.columns_pp)[col_name] = col_capsule
i+=1
for column_n in self.columns :
(self.columns[column_n]).update_pointer()
cpdef add_column(self,
str column_name,
obiversion_t version_number=-1,
str type='',
index_t nb_lines=0,
index_t nb_elements_per_line=1, # TODO 1?
list elements_names=None,
str indexer_name="",
str comments="",
bint create=True # TODO
) :
cdef bytes column_name_b
cdef bytes elements_names_b
cdef object subclass
cdef OBIDMS_column_p* column_pp
cdef OBIDMS_column_p column_p
column_name_b = str2bytes(column_name)
if nb_elements_per_line > 1 :
elements_names_b = str2bytes(';'.join(elements_names))
elif nb_elements_per_line == 1 :
elements_names_b = column_name_b
if type :
if type == 'OBI_INT' :
data_type = OBI_INT
elif type == 'OBI_FLOAT' :
data_type = OBI_FLOAT
elif type == 'OBI_BOOL' :
data_type = OBI_BOOL
elif type == 'OBI_CHAR' :
data_type = OBI_CHAR
elif type == 'OBI_STR' :
data_type = OBI_STR
elif type == 'OBI_SEQ' :
data_type = OBI_SEQ
else :
raise Exception("Invalid provided data type")
if (obi_view_add_column(self.pointer, column_name_b, version_number, # should return pointer on column?
data_type, nb_lines, nb_elements_per_line,
elements_names_b, str2bytes(indexer_name),
str2bytes(comments), create) < 0) :
raise Exception("Problem adding a column in a view")
# Store the column pointer
column_pp = obi_view_get_pointer_on_column_in_view(self.pointer, column_name_b)
if column_pp == NULL :
raise Exception("Problem getting a column in a view")
col_capsule = PyCapsule_New(column_pp, NULL, NULL) # TODO
(self.columns_pp)[column_name] = col_capsule
# Open and store the subclass
column_p = column_pp[0] # TODO ugly cython dereferencing
subclass = OBIDMS_column.get_subclass_type(column_p)
(self.columns)[column_name] = subclass(self, column_name)
cpdef save_and_close(self) :
if (obi_save_and_close_view(self.pointer) < 0) :
raise Exception("Problem closing a view")
def __iter__(self):
# iter on each line of all columns
# Declarations
cdef index_t lines_used
cdef index_t line_nb
cdef OBIView_line line # TODO for NUC SEQS View
# Yield each line TODO line class
lines_used = (self.pointer).line_count
for line_nb in range(lines_used) :
line = self[line_nb]
yield line
def __getitem__(self, object item) :
if type(item) == str :
return (self.columns)[item]
elif type(item) == int : # TODO int?
return OBIView_line(self, item)
cpdef select_line(self, index_t line_nb) :
if obi_select_line(self.pointer, line_nb) < 0 :
raise Exception("Problem selecting a line")
cpdef select_lines(self, list line_selection) :
cdef index_t* line_selection_p
line_selection_p = <index_t*> malloc((len(line_selection) + 1) * sizeof(index_t))
for i in range(len(line_selection)) :
line_selection_p[i] = line_selection[i] # TODO type problem?
line_selection_p[len(line_selection)] = -1
if obi_select_lines(self.pointer, line_selection_p) < 0 :
raise Exception("Problem selecting a list of lines")
def __contains__(self, str column_name):
return (column_name in self.columns)
def __str__(self) :
cdef OBIView_line line
cdef str to_print
to_print = ""
for line in self.__iter__() :
to_print = to_print + str(line) + "\n"
return to_print
#############################################
cdef class OBIView_NUC_SEQS(OBIView):
def __init__(self, OBIDMS dms, str view_name, bint new=False, object view_to_clone=None, list line_selection=None, str comments=""):
cdef Obiview_p view = NULL
cdef int i
cdef list col_list
cdef str col_name
cdef OBIDMS_column column
cdef OBIDMS_column_p column_p
cdef OBIDMS_column_p* column_pp
cdef OBIDMS_column_header_p header
cdef index_t* line_selection_p
self.dms = dms
if line_selection is not None :
line_selection_p = <index_t*> malloc((len(line_selection) + 1) * sizeof(index_t))
for i in range(len(line_selection)) :
line_selection_p[i] = line_selection[i] # TODO type problem?
line_selection_p[len(line_selection)] = -1
else :
line_selection_p = NULL
if new :
if view_to_clone is not None :
if type(view_to_clone) == str :
view = obi_new_view_nuc_seqs_cloned_from_name(dms.pointer, str2bytes(view_name), str2bytes(view_to_clone), line_selection_p, str2bytes(comments))
else :
view = obi_new_view_nuc_seqs(dms.pointer, str2bytes(view_name), (<OBIView> view_to_clone).pointer, line_selection_p, str2bytes(comments))
elif view_to_clone is None :
view = obi_new_view_nuc_seqs(dms.pointer, str2bytes(view_name), NULL, line_selection_p, str2bytes(comments))
elif not new :
if view_name is not None :
view = obi_open_view(dms.pointer, str2bytes(view_name))
elif view_name is None :
view = obi_open_view(dms.pointer, NULL)
if view == NULL :
raise Exception("Error creating/opening view")
self.pointer = view
self.name = bytes2str(view.name)
self.comments = bytes2str(view.comments)
# go through columns to build list and open python object (TODO make separate function?)
self.columns = {}
self.columns_pp = {}
i = 0
while i < view.column_count :
column_pp = <OBIDMS_column_p*> ((view.columns)+i)
column_p = <OBIDMS_column_p> (view.columns)[i]
header = (column_p).header
col_name = bytes2str(header.name)
col_capsule = PyCapsule_New(column_pp, NULL, NULL) # TODO discuss
(self.columns_pp)[col_name] = col_capsule
subclass = OBIDMS_column.get_subclass_type(column_p)
self.columns[col_name] = subclass(self, col_name)
i+=1
self.ids = self.columns[bytes2str(ID_COLUMN)]
self.sequences = self.columns[bytes2str(NUC_SEQUENCE_COLUMN)]
self.descriptions = self.columns[bytes2str(DESCRIPTION_COLUMN)]
cpdef delete_column(self, str column_name) :
cdef int i
cdef Obiview_p view
cdef OBIDMS_column column
cdef OBIDMS_column_p column_p
cdef OBIDMS_column_p* column_pp
cdef OBIDMS_column_header_p header
cdef str column_n
if ((column_name == bytes2str(ID_COLUMN)) or (column_name == bytes2str(NUC_SEQUENCE_COLUMN)) or (column_name == bytes2str(DESCRIPTION_COLUMN))) :
raise Exception("Can't delete an obligatory column from a NUC_SEQS view")
view = self.pointer
if obi_view_delete_column(view, str2bytes(column_name)) < 0 :
raise Exception("Problem deleting a column from a view")
# Update the dictionaries of column pointers and column objects, and update pointers in column objects (make function?):
(self.columns).pop(column_name)
(self.columns_pp).pop(column_name)
i = 0
while i < view.column_count :
column_pp = <OBIDMS_column_p*> ((view.columns)+i)
column_p = <OBIDMS_column_p> (view.columns)[i]
header = (column_p).header
col_name = bytes2str(header.name)
col_capsule = PyCapsule_New(column_pp, NULL, NULL)
(self.columns_pp)[col_name] = col_capsule
i+=1
for column_n in self.columns :
(self.columns[column_n]).update_pointer()
def __getitem__(self, object item) :
if type(item) == str :
return (self.columns)[item]
elif type(item) == int : # TODO int?
return OBI_Nuc_Seq_Stored(self, item)
def __setitem__(self, index_t line_idx, OBI_Nuc_Seq sequence_obj) :
for key in sequence_obj :
self[line_idx][key] = sequence_obj[key]
#############################################
cdef class OBIView_line :
def __init__(self, OBIView view, index_t line_nb) :
self.index = line_nb
self.view = view
def __getitem__(self, str column_name) :
return ((self.view).columns)[column_name][self.index]
def __setitem__(self, str column_name, object value):
# TODO detect multiple elements (dict type)? put somewhere else? but more risky (in get)
cdef type value_type
cdef str value_obitype
if column_name not in self.view :
if value == None :
raise Exception("Trying to create a column from a None value (can't guess type)")
value_type = type(value)
if value_type == int :
value_obitype = 'OBI_INT'
elif value_type == float :
value_obitype = 'OBI_FLOAT'
elif value_type == bool :
value_obitype = 'OBI_BOOL'
elif value_type == str :
if only_ATGC(str2bytes(value)) : # TODO
value_obitype = 'OBI_SEQ'
elif len(value) == 1 :
value_obitype = 'OBI_CHAR'
elif (len(value) > 1) :
value_obitype = 'OBI_STR'
else :
raise Exception("Could not guess the type of a value to create a new column")
self.view.add_column(column_name, type=value_obitype)
(((self.view).columns)[column_name]).set_line(self.index, value)
def __contains__(self, str column_name):
return (column_name in self.view)
def __repr__(self):
cdef dict line
cdef str column_name
line = {}
for column_name in self.view.columns :
line[column_name] = self[column_name]
return str(line)
##########################################
cdef class OBIDMS :
@ -49,235 +644,97 @@ cdef class OBIDMS :
# Fill structure and create or open the DMS
self.dms_name = dms_name
self.pointer = obi_dms(<const_char_p> dms_name_b)
if self.pointer == NULL :
raise Exception("Failed opening or creating an OBIDMS")
# def __del__(self) : # TODO problem with closing dir breaking everything
# obi_close_dms(self.pointer)
cpdef close(self) :
if (obi_close_dms(self.pointer)) < 0 :
raise Exception("Problem closing an OBIDMS")
cpdef dict list(self):
# Declarations
cdef object p
cdef dict dms = {}
cpdef OBI_Taxonomy open_taxonomy(self, str taxo_name) :
return OBI_Taxonomy(self, taxo_name)
cpdef OBIView open_view(self, str view_name) :
cdef object view_class
cdef dict view_infos
view_infos = self.read_view_infos(view_name)
if view_infos["view_type"] == bytes2str(VIEW_TYPE_NUC_SEQS) :
view_class = OBIView_NUC_SEQS
else :
view_class = OBIView
return view_class(self, view_name)
cpdef OBIView new_view(self, str view_name, object view_to_clone=None, list line_selection=None, str view_type=None, str comments="") :
cdef object view_class
if view_type is not None :
if view_type == bytes2str(VIEW_TYPE_NUC_SEQS) :
view_class = OBIView_NUC_SEQS
else :
view_class = OBIView
return view_class(self, view_name, new=True, view_to_clone=view_to_clone, line_selection=line_selection, comments=comments)
cpdef dict read_view_infos(self, str view_name) :
all_views = self.read_views()
return all_views[view_name]
cpdef dict read_views(self) : # TODO function that prints the dic nicely and function that prints 1 view. Add column type in col ref
cdef Obiviews_infos_all_p all_views_p
cdef Obiview_infos_p view_p
cdef Column_reference_p column_refs
cdef int nb_views
cdef int i, j
cdef str view_name
cdef str column_name
cdef bytes column_name_b
cdef str data_type
cdef obiversion_t latest_version
cdef size_t line_count
cdef dict views
cdef bytes name_b
p = Path(self.dms_name+'.obidms')
print("{:<25} {:<25} {:<25} {:<25}".format('-Column name-','-Data type-','-Latest version number-', '-Line count of latest version-'))
for entry in p.iterdir():
if entry.suffix == ".obicol":
column_name = entry.stem
column_name_b = str2bytes(column_name)
dms[column_name] = {}
data_type = bytes2str(name_data_type(obi_column_get_data_type_from_name(self.pointer, column_name_b)))
latest_version = obi_column_get_latest_version_from_name(self.pointer, column_name_b)
line_count = obi_column_get_line_count_from_name(self.pointer, column_name_b)
dms[column_name]['data_type'] = data_type
dms[column_name]['latest_version'] = latest_version
dms[column_name]['line_count'] = line_count
print("{:<25} {:<25} {:<25} {:<25}".format(column_name, data_type, latest_version, line_count))
return dms
cpdef OBIDMS_column open_column(self,
str column_name,
bint create=False,
bint clone=False, bint clone_data=True,
obiversion_t version_number=-1,
OBIType_t data_type= <OBIType_t> 0,
size_t nb_lines=0,
size_t nb_elements_per_line=1,
str elements_names=None):
# Declarations
cdef OBIDMS_column column
cdef bytes column_name_b
# Format the character string to send to C function
column_name_b = str2bytes(column_name)
# Get the data type if not provided
if not data_type :
if create :
raise Exception("A data type must be specified")
views = {}
all_views_p = obi_read_views(self.pointer)
if all_views_p == NULL :
raise Exception("No views to read")
nb_views = <int> (all_views_p.header).view_count
for i in range(nb_views) :
view_p = (<Obiview_infos_p> (all_views_p.view_infos)) + i
view_name = bytes2str(view_p.name)
views[view_name] = {}
views[view_name]["comments"] = bytes2str(view_p.comments)
views[view_name]["view_type"] = bytes2str(view_p.view_type)
views[view_name]["column_count"] = <int> view_p.column_count
views[view_name]["line_count"] = <int> view_p.line_count
views[view_name]["view_number"] = <int> view_p.view_number
views[view_name]["created_from"] = bytes2str(view_p.created_from)
views[view_name]["creation_date"] = bytes2str(obi_column_format_date(view_p.creation_date)) # TODO move this function in utils or somethings
if (view_p.all_lines) :
views[view_name]["line_selection"] = None
else :
data_type = obi_column_get_data_type_from_name(self.pointer, column_name_b)
# Open the column with the right subclass depending on the data type and the mode (read-only or writable)
if data_type == 1 :
if (create or clone) :
column = OBIDMS_column_int_writable(self, column_name,
create, clone, clone_data,
version_number, data_type,
nb_lines, nb_elements_per_line,
elements_names)
else :
column = OBIDMS_column_int(self, column_name,
create, clone, clone_data,
version_number, data_type,
nb_lines, nb_elements_per_line,
elements_names)
elif data_type == 2 :
if (create or clone) :
column = OBIDMS_column_float_writable(self, column_name,
create, clone, clone_data,
version_number, data_type,
nb_lines, nb_elements_per_line,
elements_names)
else :
column = OBIDMS_column_float(self, column_name,
create, clone, clone_data,
version_number, data_type,
nb_lines, nb_elements_per_line,
elements_names)
elif data_type == 3 :
if (create or clone) :
column = OBIDMS_column_bool_writable(self, column_name,
create, clone, clone_data,
version_number, data_type,
nb_lines, nb_elements_per_line,
elements_names)
else :
column = OBIDMS_column_bool(self, column_name,
create, clone, clone_data,
version_number, data_type,
nb_lines, nb_elements_per_line,
elements_names)
elif data_type == 4 :
if (create or clone) :
column = OBIDMS_column_char_writable(self, column_name,
create, clone, clone_data,
version_number, data_type,
nb_lines, nb_elements_per_line,
elements_names)
else :
column = OBIDMS_column_char(self, column_name,
create, clone, clone_data,
version_number, data_type,
nb_lines, nb_elements_per_line,
elements_names)
# elif data_type == 5 :
# if (create or clone) :
# column = OBIDMS_column_idx_writable(self, column_name,
# create, clone, clone_data,
# version_number, data_type,
# nb_lines, nb_elements_per_line,
# elements_names)
# else :
# column = OBIDMS_column_idx(self, column_name,
# create, clone, clone_data,
# version_number, data_type,
# nb_lines, nb_elements_per_line,
# elements_names)
else :
raise Exception("Problem with the data type")
return column
cdef class OBIDMS_column :
# Should only be initialized through a subclass
def __init__(self,
OBIDMS dms,
str column_name,
bint create,
bint clone, bint clone_data,
obiversion_t version_number,
OBIType_t type,
size_t nb_lines,
size_t nb_elements_per_line,
str elements_names):
views[view_name]["line_selection"] = {}
views[view_name]["line_selection"]["column_name"] = bytes2str((view_p.line_selection).column_name)
views[view_name]["line_selection"]["version"] = <int> (view_p.line_selection).version
views[view_name]["column_references"] = {}
column_refs = view_p.column_references
for j in range(views[view_name]["column_count"]) :
column_name = bytes2str((column_refs[j]).column_name)
views[view_name]["column_references"][column_name] = {}
views[view_name]["column_references"][column_name]["version"] = column_refs[j].version
# Declarations
cdef bytes column_name_b
cdef bytes dms_name_b
cdef bytes elements_names_b
obi_unmap_read_views(all_views_p);
# Fill structure
self.dms = dms
self.data_type = bytes2str(name_data_type(type))
self.column_name = column_name
# Format the character strings to send them to C functions
column_name_b = str2bytes(column_name)
dms_name_b = str2bytes(self.dms.dms_name)
# Create, clone or open column
if create :
if elements_names == None :
elements_names_b = column_name_b
else :
elements_names_b = str2bytes(elements_names)
self.pointer = obi_create_column(self.dms.pointer, column_name_b, type, nb_lines, nb_elements_per_line, elements_names_b)
else :
if clone :
self.pointer = obi_clone_column(self.dms.pointer, column_name_b, version_number, clone_data)
else :
self.pointer = obi_open_column(self.dms.pointer, column_name_b, version_number)
return views
def __iter__(self):
# Declarations
cdef list elements_names
cdef str element_name
cdef bint multiple_elements
cdef object line # TODO
cdef size_t lines_used
cdef size_t line_nb
# Check if there are multiple elements per line anf if yes, get their names
elements_names = self.get_elements_names()
if len(elements_names) > 1 :
multiple_elements = True
else :
element_name = elements_names[0]
# Yield each line
lines_used = obi_column_get_nb_lines_used(self.pointer)
for line_nb in xrange(lines_used):
if multiple_elements :
line = []
for element_name in elements_names :
line.append(self.get_item(line_nb, element_name))
else :
line = self.get_item(line_nb, element_name)
yield line
def __setitem__(self, size_t line_nb, object value):
self.set_item(line_nb, "", value)
def __getitem__(self, size_t line_nb):
return self.get_item(line_nb, "")
cpdef object get_item(self, size_t line_nb, str element_name):
raise NotImplementedError
cpdef list get_elements_names(self):
cdef bytes elements_names
elements_names = obi_column_get_elements_names(self.pointer)
return (bytes2str(elements_names)).split(';')
cpdef str get_data_type(self):
return self.data_type
cpdef size_t get_nb_lines_used(self):
return obi_column_get_nb_lines_used(self.pointer)

View File

@ -1,16 +1,41 @@
../../../src/obidmscolumn_bool.c
../../../src/obidmscolumn_bool.h
../../../src/bloom.h
../../../src/bloom.c
../../../src/char_str_indexer.h
../../../src/char_str_indexer.c
../../../src/crc64.h
../../../src/crc64.c
../../../src/dna_seq_indexer.h
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/MurmurHash2.h
../../../src/murmurhash2.c
../../../src/obiavl.h
../../../src/obiavl.c
../../../src/obiblob_indexer.h
../../../src/obiblob_indexer.c
../../../src/obiblob.h
../../../src/obiblob.c
../../../src/obidebug.h
../../../src/obidms_taxonomy.h
../../../src/obidms_taxonomy.c
../../../src/obidms.h
../../../src/obidms.c
../../../src/obidmscolumn_idx.h
../../../src/obidmscolumn_idx.c
../../../src/obidmscolumn.h
../../../src/obidmscolumn.c
../../../src/obidmscolumndir.h
../../../src/obidmscolumndir.c
../../../src/obidms.h
../../../src/obidms.c
../../../src/obierrno.h
../../../src/obierrno.c
../../../src/obilittlebigman.h
../../../src/obilittlebigman.c
../../../src/obitypes.h
../../../src/obitypes.c
../../../src/private_at_functions.h
../../../src/private_at_functions.c
../../../src/obiview.h
../../../src/obiview.c
../../../src/utils.h
../../../src/utils.c

View File

@ -1,17 +1,14 @@
#cython: language_level=3
from .capi.obitypes cimport obibool_t
from .capi.obidmscolumn cimport OBIDMS_column_p
from ._obidms cimport OBIDMS_column
from .capi.obitypes cimport index_t
from ._obidms cimport OBIDMS_column, OBIDMS_column_multi_elts
cdef class OBIDMS_column_bool(OBIDMS_column):
cpdef object get_item(self, size_t line_nb, str element_name)
cpdef set_item(self, size_t line_nb, str element_name, obibool_t value)
cpdef close(self)
cdef class OBIDMS_column_bool_writable(OBIDMS_column_bool):
cpdef set_item(self, size_t line_nb, str element_name, obibool_t value)
cpdef close(self)
cpdef object get_line(self, index_t line_nb)
cpdef set_line(self, index_t line_nb, object value)
cdef class OBIDMS_column_multi_elts_bool(OBIDMS_column_multi_elts):
cpdef object get_item(self, index_t line_nb, str element_name)
cpdef object get_line(self, index_t line_nb)
cpdef set_item(self, index_t line_nb, str element_name, object value)

View File

@ -1,11 +1,11 @@
#cython: language_level=3
from .capi.obidmscolumn cimport obi_close_column,\
obi_truncate_and_close_column, \
obi_column_get_obibool_with_elt_name, \
obi_column_set_obibool_with_elt_name
from .capi.obiview cimport obi_column_get_obibool_with_elt_name_in_view, \
obi_column_get_obibool_with_elt_idx_in_view, \
obi_column_set_obibool_with_elt_name_in_view, \
obi_column_set_obibool_with_elt_idx_in_view
from .capi.obierrno cimport obi_errno
from .capi.obitypes cimport OBIBool_NA
from .capi.obitypes cimport OBIBool_NA, obibool_t
from obitools3.utils cimport str2bytes
@ -13,11 +13,32 @@ from cpython.bool cimport PyBool_FromLong
cdef class OBIDMS_column_bool(OBIDMS_column):
cpdef object get_item(self, size_t line_nb, str element_name):
cpdef object get_line(self, index_t line_nb):
cdef obibool_t value
cdef object result
value = obi_column_get_obibool_with_elt_name(self.pointer, line_nb, str2bytes(element_name))
value = obi_column_get_obibool_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0)
if obi_errno > 0 :
raise IndexError(line_nb)
if value == OBIBool_NA :
result = None
else :
result = PyBool_FromLong(value)
return result
cpdef set_line(self, index_t line_nb, object value):
if value is None :
value = OBIBool_NA
if obi_column_set_obibool_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0, <obibool_t> value) < 0:
raise Exception("Problem setting a value in a column")
cdef class OBIDMS_column_multi_elts_bool(OBIDMS_column_multi_elts):
cpdef object get_item(self, index_t line_nb, str element_name):
cdef obibool_t value
cdef object result
value = obi_column_get_obibool_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name))
if obi_errno > 0 :
raise IndexError(line_nb, element_name)
if value == OBIBool_NA :
@ -25,22 +46,32 @@ cdef class OBIDMS_column_bool(OBIDMS_column):
else :
result = PyBool_FromLong(value)
return result
cpdef set_item(self, size_t line_nb, str element_name, obibool_t value):
raise Exception("Column is read-only")
cpdef close(self):
if obi_close_column(self.pointer) < 0 :
raise Exception("Problem closing a column")
cpdef object get_line(self, index_t line_nb) :
cdef obibool_t value
cdef object value_in_result
cdef dict result
cdef index_t i
cdef bint all_NA
result = {}
all_NA = True
for i in range(self.nb_elements_per_line) :
value = obi_column_get_obibool_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, i)
if obi_errno > 0 :
raise IndexError(line_nb)
if value == OBIBool_NA :
value_in_result = None
else :
value_in_result = PyBool_FromLong(value)
result[self.elements_names[i]] = value_in_result
if all_NA and (value_in_result is not None) :
all_NA = False
if all_NA :
result = None
return result
cdef class OBIDMS_column_bool_writable(OBIDMS_column_bool):
cpdef set_item(self, size_t line_nb, str element_name, obibool_t value):
if obi_column_set_obibool_with_elt_name(self.pointer, line_nb, str2bytes(element_name), value) < 0 :
cpdef set_item(self, index_t line_nb, str element_name, object value):
if value is None :
value = OBIBool_NA
if obi_column_set_obibool_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name), <obibool_t> value) < 0:
raise Exception("Problem setting a value in a column")
cpdef close(self):
if obi_truncate_and_close_column(self.pointer) < 0 :
raise Exception("Problem closing a column")

View File

@ -1,16 +1,41 @@
../../../src/obidmscolumn_char.c
../../../src/obidmscolumn_char.h
../../../src/bloom.h
../../../src/bloom.c
../../../src/char_str_indexer.h
../../../src/char_str_indexer.c
../../../src/crc64.h
../../../src/crc64.c
../../../src/dna_seq_indexer.h
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/MurmurHash2.h
../../../src/murmurhash2.c
../../../src/obiavl.h
../../../src/obiavl.c
../../../src/obiblob_indexer.h
../../../src/obiblob_indexer.c
../../../src/obiblob.h
../../../src/obiblob.c
../../../src/obidebug.h
../../../src/obidms_taxonomy.h
../../../src/obidms_taxonomy.c
../../../src/obidms.h
../../../src/obidms.c
../../../src/obidmscolumn_idx.h
../../../src/obidmscolumn_idx.c
../../../src/obidmscolumn.h
../../../src/obidmscolumn.c
../../../src/obidmscolumndir.h
../../../src/obidmscolumndir.c
../../../src/obidms.h
../../../src/obidms.c
../../../src/obierrno.h
../../../src/obierrno.c
../../../src/obilittlebigman.h
../../../src/obilittlebigman.c
../../../src/obitypes.h
../../../src/obitypes.c
../../../src/private_at_functions.h
../../../src/private_at_functions.c
../../../src/obiview.h
../../../src/obiview.c
../../../src/utils.h
../../../src/utils.c

View File

@ -1,17 +1,14 @@
#cython: language_level=3
from .capi.obitypes cimport obichar_t
from .capi.obidmscolumn cimport OBIDMS_column_p
from ._obidms cimport OBIDMS_column
from .capi.obitypes cimport index_t
from ._obidms cimport OBIDMS_column, OBIDMS_column_multi_elts
cdef class OBIDMS_column_char(OBIDMS_column):
cpdef object get_item(self, size_t line_nb, str element_name)
cpdef set_item(self, size_t line_nb, str element_name, bytes value)
cpdef close(self)
cdef class OBIDMS_column_char_writable(OBIDMS_column_char):
cpdef set_item(self, size_t line_nb, str element_name, bytes value)
cpdef close(self)
cpdef object get_line(self, index_t line_nb)
cpdef set_line(self, index_t line_nb, object value)
cdef class OBIDMS_column_multi_elts_char(OBIDMS_column_multi_elts):
cpdef object get_item(self, index_t line_nb, str element_name)
cpdef object get_line(self, index_t line_nb)
cpdef set_item(self, index_t line_nb, str element_name, object value)

View File

@ -1,44 +1,76 @@
#cython: language_level=3
from .capi.obidmscolumn cimport obi_close_column,\
obi_truncate_and_close_column, \
obi_column_get_obichar_with_elt_name, \
obi_column_set_obichar_with_elt_name
from .capi.obiview cimport obi_column_get_obichar_with_elt_name_in_view, \
obi_column_get_obichar_with_elt_idx_in_view, \
obi_column_set_obichar_with_elt_name_in_view, \
obi_column_set_obichar_with_elt_idx_in_view
from .capi.obierrno cimport obi_errno
from .capi.obitypes cimport OBIChar_NA
from .capi.obitypes cimport OBIChar_NA, obichar_t
from obitools3.utils cimport str2bytes
from obitools3.utils cimport str2bytes, bytes2str
cdef class OBIDMS_column_char(OBIDMS_column) :
cpdef object get_item(self, size_t line_nb, str element_name):
cdef char value
cdef class OBIDMS_column_char(OBIDMS_column):
cpdef object get_line(self, index_t line_nb):
cdef obichar_t value
cdef object result
value = obi_column_get_obichar_with_elt_name(self.pointer, line_nb, str2bytes(element_name))
value = obi_column_get_obichar_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0)
if obi_errno > 0 :
raise IndexError(line_nb)
if value == OBIChar_NA :
result = None
else :
result = bytes2str(value)
return result
cpdef set_line(self, index_t line_nb, object value):
if value is None :
value = OBIChar_NA
if obi_column_set_obichar_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0, str2bytes(value)[0]) < 0:
raise Exception("Problem setting a value in a column")
cdef class OBIDMS_column_multi_elts_char(OBIDMS_column_multi_elts):
cpdef object get_item(self, index_t line_nb, str element_name):
cdef obichar_t value
cdef object result
value = obi_column_get_obichar_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name))
if obi_errno > 0 :
raise IndexError(line_nb, element_name)
if value == OBIChar_NA :
result = None
else :
result = <bytes> value
result = bytes2str(value)
return result
cpdef set_item(self, size_t line_nb, str element_name, bytes value):
raise Exception("Column is read-only")
cpdef close(self):
if obi_close_column(self.pointer) < 0 :
raise Exception("Problem closing a column")
cdef class OBIDMS_column_char_writable(OBIDMS_column_char) :
cpdef set_item(self, size_t line_nb, str element_name, bytes value):
if obi_column_set_obichar_with_elt_name(self.pointer, line_nb, str2bytes(element_name), value[0]) < 0:
cpdef object get_line(self, index_t line_nb) :
cdef obichar_t value
cdef object value_in_result
cdef dict result
cdef index_t i
cdef bint all_NA
result = {}
all_NA = True
for i in range(self.nb_elements_per_line) :
value = obi_column_get_obichar_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, i)
if obi_errno > 0 :
raise IndexError(line_nb)
if value == OBIChar_NA :
value_in_result = None
else :
value_in_result = bytes2str(value)
result[self.elements_names[i]] = value_in_result
if all_NA and (value_in_result is not None) :
all_NA = False
if all_NA :
result = None
return result
cpdef set_item(self, index_t line_nb, str element_name, object value):
if value is None :
value = OBIChar_NA
if obi_column_set_obichar_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name), str2bytes(value)[0]) < 0:
raise Exception("Problem setting a value in a column")
cpdef close(self):
if obi_truncate_and_close_column(self.pointer) < 0 :
raise Exception("Problem closing a column")

View File

@ -1,16 +1,41 @@
../../../src/obidmscolumn_float.c
../../../src/obidmscolumn_float.h
../../../src/char_str_indexer.h
../../../src/char_str_indexer.c
../../../src/bloom.h
../../../src/bloom.c
../../../src/crc64.h
../../../src/crc64.c
../../../src/dna_seq_indexer.h
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/MurmurHash2.h
../../../src/murmurhash2.c
../../../src/obiavl.h
../../../src/obiavl.c
../../../src/obiblob_indexer.h
../../../src/obiblob_indexer.c
../../../src/obiblob.h
../../../src/obiblob.c
../../../src/obidebug.h
../../../src/obidms_taxonomy.h
../../../src/obidms_taxonomy.c
../../../src/obidms.h
../../../src/obidms.c
../../../src/obidmscolumn_idx.h
../../../src/obidmscolumn_idx.c
../../../src/obidmscolumn.h
../../../src/obidmscolumn.c
../../../src/obidmscolumndir.h
../../../src/obidmscolumndir.c
../../../src/obidms.h
../../../src/obidms.c
../../../src/obierrno.h
../../../src/obierrno.c
../../../src/obilittlebigman.h
../../../src/obilittlebigman.c
../../../src/obitypes.h
../../../src/obitypes.c
../../../src/private_at_functions.h
../../../src/private_at_functions.c
../../../src/obiview.h
../../../src/obiview.c
../../../src/utils.h
../../../src/utils.c

View File

@ -1,16 +1,14 @@
#cython: language_level=3
from .capi.obitypes cimport obifloat_t
from .capi.obidmscolumn cimport OBIDMS_column_p
from ._obidms cimport OBIDMS_column
from .capi.obitypes cimport index_t
from ._obidms cimport OBIDMS_column, OBIDMS_column_multi_elts
cdef class OBIDMS_column_float(OBIDMS_column):
cpdef object get_item(self, size_t line_nb, str element_name)
cpdef set_item(self, size_t line_nb, str element_name, obifloat_t value)
cpdef close(self)
cdef class OBIDMS_column_float_writable(OBIDMS_column_float):
cpdef set_item(self, size_t line_nb, str element_name, obifloat_t value)
cpdef close(self)
cpdef object get_line(self, index_t line_nb)
cpdef set_line(self, index_t line_nb, object value)
cdef class OBIDMS_column_multi_elts_float(OBIDMS_column_multi_elts):
cpdef object get_item(self, index_t line_nb, str element_name)
cpdef object get_line(self, index_t line_nb)
cpdef set_item(self, index_t line_nb, str element_name, object value)

View File

@ -1,44 +1,76 @@
#cython: language_level=3
from .capi.obidmscolumn cimport obi_close_column,\
obi_truncate_and_close_column, \
obi_column_get_obifloat_with_elt_name, \
obi_column_set_obifloat_with_elt_name
from .capi.obiview cimport obi_column_get_obifloat_with_elt_name_in_view, \
obi_column_get_obifloat_with_elt_idx_in_view, \
obi_column_set_obifloat_with_elt_name_in_view, \
obi_column_set_obifloat_with_elt_idx_in_view
from .capi.obierrno cimport obi_errno
from .capi.obitypes cimport OBIFloat_NA
from .capi.obitypes cimport OBIFloat_NA, obifloat_t
from obitools3.utils cimport str2bytes
cdef class OBIDMS_column_float(OBIDMS_column):
cpdef object get_item(self, size_t line_nb, str element_name):
cpdef object get_line(self, index_t line_nb):
cdef obifloat_t value
cdef object result
value = obi_column_get_obifloat_with_elt_name(self.pointer, line_nb, str2bytes(element_name))
value = obi_column_get_obifloat_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0)
if obi_errno > 0 :
raise IndexError(line_nb)
if value == OBIFloat_NA :
result = None
else :
result = <double> value
return result
cpdef set_line(self, index_t line_nb, object value):
if value is None :
value = OBIFloat_NA
if obi_column_set_obifloat_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0, <obifloat_t> value) < 0:
raise Exception("Problem setting a value in a column")
cdef class OBIDMS_column_multi_elts_float(OBIDMS_column_multi_elts):
cpdef object get_item(self, index_t line_nb, str element_name):
cdef obifloat_t value
cdef object result
value = obi_column_get_obifloat_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name))
if obi_errno > 0 :
raise IndexError(line_nb, element_name)
if value == OBIFloat_NA :
result = None
else :
result = <float> value
result = <double> value
return result
cpdef set_item(self, size_t line_nb, str element_name, obifloat_t value):
raise Exception("Column is read-only")
cpdef close(self):
if obi_close_column(self.pointer) < 0 :
raise Exception("Problem closing a column")
cdef class OBIDMS_column_float_writable(OBIDMS_column_float):
cpdef set_item(self, size_t line_nb, str element_name, obifloat_t value):
if obi_column_set_obifloat_with_elt_name(self.pointer, line_nb, str2bytes(element_name), value) < 0:
cpdef object get_line(self, index_t line_nb) :
cdef obifloat_t value
cdef object value_in_result
cdef dict result
cdef index_t i
cdef bint all_NA
result = {}
all_NA = True
for i in range(self.nb_elements_per_line) :
value = obi_column_get_obifloat_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, i)
if obi_errno > 0 :
raise IndexError(line_nb)
if value == OBIFloat_NA :
value_in_result = None
else :
value_in_result = <double> value
result[self.elements_names[i]] = value_in_result
if all_NA and (value_in_result is not None) :
all_NA = False
if all_NA :
result = None
return result
cpdef set_item(self, index_t line_nb, str element_name, object value):
if value is None :
value = OBIFloat_NA
if obi_column_set_obifloat_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name), <obifloat_t> value) < 0:
raise Exception("Problem setting a value in a column")
cpdef close(self):
if obi_truncate_and_close_column(self.pointer) < 0 :
raise Exception("Problem closing a column")

View File

@ -1,16 +0,0 @@
../../../src/obidmscolumn_idx.c
../../../src/obidmscolumn_idx.h
../../../src/obidmscolumn.h
../../../src/obidmscolumn.c
../../../src/obidmscolumndir.h
../../../src/obidmscolumndir.c
../../../src/obidms.h
../../../src/obidms.c
../../../src/obierrno.h
../../../src/obierrno.c
../../../src/obilittlebigman.h
../../../src/obilittlebigman.c
../../../src/obitypes.h
../../../src/obitypes.c
../../../src/private_at_functions.h
../../../src/private_at_functions.c

View File

@ -1,16 +0,0 @@
#cython: language_level=3
from .capi.obitypes cimport obiidx_t
from .capi.obidmscolumn cimport OBIDMS_column_p
from ._obidms cimport OBIDMS_column
cdef class OBIDMS_column_idx(OBIDMS_column):
cpdef object get_item(self, size_t line_nb, str element_name)
cpdef set_item(self, size_t line_nb, str element_name, obiidx_t value)
cpdef close(self)
cdef class OBIDMS_column_idx_writable(OBIDMS_column_idx):
cpdef set_item(self, size_t line_nb, str element_name, obiidx_t value)
cpdef close(self)

View File

@ -1,46 +0,0 @@
#cython: language_level=3
from .capi.obidmscolumn cimport obi_close_column,\
obi_truncate_and_close_column, \
obi_column_get_obiidx_with_elt_name, \
obi_column_set_obiidx_with_elt_name
from .capi.obierrno cimport obi_errno
from .capi.obitypes cimport OBIIdx_NA
from obitools3.utils cimport str2bytes
from cpython.int cimport PyInt_FromSsize_t
cdef class OBIDMS_column_idx(OBIDMS_column):
cpdef object get_item(self, size_t line_nb, str element_name):
cdef obiidx_t value
cdef object result
value = obi_column_get_obiidx_with_elt_name(self.pointer, line_nb, str2bytes(element_name))
if obi_errno > 0 :
raise IndexError(line_nb, element_name)
if value == OBIIdx_NA :
result = None
else :
result = PyInt_FromSsize_t(value)
return result
cpdef set_item(self, size_t line_nb, str element_name, obiidx_t value):
raise Exception("Column is read-only")
cpdef close(self):
if obi_close_column(self.pointer) < 0 :
raise Exception("Problem closing a column")
cdef class OBIDMS_column_idx_writable(OBIDMS_column_idx):
cpdef set_item(self, size_t line_nb, str element_name, obiidx_t value):
if obi_column_set_obiidx_with_elt_name(self.pointer, line_nb, str2bytes(element_name), value) < 0:
raise Exception("Problem setting a value in a column")
cpdef close(self):
if obi_truncate_and_close_column(self.pointer) < 0 :
raise Exception("Problem closing a column")

View File

@ -1,16 +1,41 @@
../../../src/obidmscolumn_int.c
../../../src/obidmscolumn_int.h
../../../src/char_str_indexer.h
../../../src/char_str_indexer.c
../../../src/bloom.h
../../../src/bloom.c
../../../src/crc64.h
../../../src/crc64.c
../../../src/dna_seq_indexer.h
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/MurmurHash2.h
../../../src/murmurhash2.c
../../../src/obiavl.h
../../../src/obiavl.c
../../../src/obiblob_indexer.h
../../../src/obiblob_indexer.c
../../../src/obiblob.h
../../../src/obiblob.c
../../../src/obidebug.h
../../../src/obidms_taxonomy.h
../../../src/obidms_taxonomy.c
../../../src/obidms.h
../../../src/obidms.c
../../../src/obidmscolumn_idx.h
../../../src/obidmscolumn_idx.c
../../../src/obidmscolumn.h
../../../src/obidmscolumn.c
../../../src/obidmscolumndir.h
../../../src/obidmscolumndir.c
../../../src/obidms.h
../../../src/obidms.c
../../../src/obierrno.h
../../../src/obierrno.c
../../../src/obilittlebigman.h
../../../src/obilittlebigman.c
../../../src/obitypes.h
../../../src/obitypes.c
../../../src/private_at_functions.h
../../../src/private_at_functions.c
../../../src/obiview.h
../../../src/obiview.c
../../../src/utils.h
../../../src/utils.c

View File

@ -1,16 +1,14 @@
#cython: language_level=3
from .capi.obitypes cimport obiint_t
from .capi.obidmscolumn cimport OBIDMS_column_p
from ._obidms cimport OBIDMS_column
from .capi.obitypes cimport index_t
from ._obidms cimport OBIDMS_column, OBIDMS_column_multi_elts
cdef class OBIDMS_column_int(OBIDMS_column):
cpdef object get_item(self, size_t line_nb, str element_name)
cpdef set_item(self, size_t line_nb, str element_name, obiint_t value)
cpdef close(self)
cdef class OBIDMS_column_int_writable(OBIDMS_column_int):
cpdef set_item(self, size_t line_nb, str element_name, obiint_t value)
cpdef close(self)
cpdef object get_line(self, index_t line_nb)
cpdef set_line(self, index_t line_nb, object value)
cdef class OBIDMS_column_multi_elts_int(OBIDMS_column_multi_elts):
cpdef object get_item(self, index_t line_nb, str element_name)
cpdef object get_line(self, index_t line_nb)
cpdef set_item(self, index_t line_nb, str element_name, object value)

View File

@ -1,25 +1,44 @@
#cython: language_level=3
from .capi.obidmscolumn cimport obi_close_column,\
obi_truncate_and_close_column, \
obi_column_get_obiint_with_elt_name, \
obi_column_set_obiint_with_elt_name
from .capi.obiview cimport obi_column_get_obiint_with_elt_name_in_view, \
obi_column_get_obiint_with_elt_idx_in_view, \
obi_column_set_obiint_with_elt_name_in_view, \
obi_column_set_obiint_with_elt_idx_in_view
from .capi.obierrno cimport obi_errno
from .capi.obitypes cimport OBIInt_NA
from .capi.obitypes cimport OBIInt_NA, obiint_t
from obitools3.utils cimport str2bytes
from cpython.int cimport PyInt_FromLong
from ._obidms cimport OBIDMS_column
cdef class OBIDMS_column_int(OBIDMS_column):
cpdef object get_item(self, size_t line_nb, str element_name):
cpdef object get_line(self, index_t line_nb):
cdef obiint_t value
cdef object result
value = obi_column_get_obiint_with_elt_name(self.pointer, line_nb, str2bytes(element_name))
value = obi_column_get_obiint_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0)
if obi_errno > 0 :
raise IndexError(line_nb)
if value == OBIInt_NA :
result = None
else :
result = PyInt_FromLong(value)
return result
cpdef set_line(self, index_t line_nb, object value):
if value is None :
value = OBIInt_NA
if obi_column_set_obiint_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0, <obiint_t> value) < 0:
raise Exception("Problem setting a value in a column")
cdef class OBIDMS_column_multi_elts_int(OBIDMS_column_multi_elts):
cpdef object get_item(self, index_t line_nb, str element_name):
cdef obiint_t value
cdef object result
value = obi_column_get_obiint_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name))
if obi_errno > 0 :
raise IndexError(line_nb, element_name)
if value == OBIInt_NA :
@ -28,21 +47,32 @@ cdef class OBIDMS_column_int(OBIDMS_column):
result = PyInt_FromLong(value)
return result
cpdef set_item(self, size_t line_nb, str element_name, obiint_t value):
raise Exception("Column is read-only")
cpdef object get_line(self, index_t line_nb) :
cdef obiint_t value
cdef object value_in_result
cdef dict result
cdef index_t i
cdef bint all_NA
result = {}
all_NA = True
for i in range(self.nb_elements_per_line) :
value = obi_column_get_obiint_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, i)
if obi_errno > 0 :
raise IndexError(line_nb)
if value == OBIInt_NA :
value_in_result = None
else :
value_in_result = PyInt_FromLong(value)
result[self.elements_names[i]] = value_in_result
if all_NA and (value_in_result is not None) :
all_NA = False
if all_NA :
result = None # TODO discuss
return result
cpdef close(self):
if obi_close_column(self.pointer) < 0 :
raise Exception("Problem closing a column")
cdef class OBIDMS_column_int_writable(OBIDMS_column_int):
cpdef set_item(self, size_t line_nb, str element_name, obiint_t value):
if obi_column_set_obiint_with_elt_name(self.pointer, line_nb, str2bytes(element_name), value) < 0:
cpdef set_item(self, index_t line_nb, str element_name, object value):
if value is None :
value = OBIInt_NA
if obi_column_set_obiint_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name), <obiint_t> value) < 0:
raise Exception("Problem setting a value in a column")
cpdef close(self):
if obi_truncate_and_close_column(self.pointer) < 0 :
raise Exception("Problem closing a column")

View File

@ -0,0 +1,41 @@
../../../src/obidmscolumn_seq.c
../../../src/obidmscolumn_seq.h
../../../src/bloom.h
../../../src/bloom.c
../../../src/char_str_indexer.h
../../../src/char_str_indexer.c
../../../src/crc64.h
../../../src/crc64.c
../../../src/dna_seq_indexer.h
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/MurmurHash2.h
../../../src/murmurhash2.c
../../../src/obiavl.h
../../../src/obiavl.c
../../../src/obiblob_indexer.h
../../../src/obiblob_indexer.c
../../../src/obiblob.h
../../../src/obiblob.c
../../../src/obidebug.h
../../../src/obidms_taxonomy.h
../../../src/obidms_taxonomy.c
../../../src/obidms.h
../../../src/obidms.c
../../../src/obidmscolumn_idx.h
../../../src/obidmscolumn_idx.c
../../../src/obidmscolumn.h
../../../src/obidmscolumn.c
../../../src/obidmscolumndir.h
../../../src/obidmscolumndir.c
../../../src/obierrno.h
../../../src/obierrno.c
../../../src/obilittlebigman.h
../../../src/obilittlebigman.c
../../../src/obitypes.h
../../../src/obitypes.c
../../../src/obiview.h
../../../src/obiview.c
../../../src/utils.h
../../../src/utils.c

View File

@ -0,0 +1,14 @@
#cython: language_level=3
from .capi.obitypes cimport index_t
from ._obidms cimport OBIDMS_column, OBIDMS_column_multi_elts
cdef class OBIDMS_column_seq(OBIDMS_column):
cpdef object get_line(self, index_t line_nb)
cpdef set_line(self, index_t line_nb, object value)
cdef class OBIDMS_column_multi_elts_seq(OBIDMS_column_multi_elts):
cpdef object get_item(self, index_t line_nb, str element_name)
cpdef object get_line(self, index_t line_nb)
cpdef set_item(self, index_t line_nb, str element_name, object value)

View File

@ -0,0 +1,88 @@
#cython: language_level=3
from .capi.obiview cimport obi_column_get_obiseq_with_elt_name_in_view, \
obi_column_get_obiseq_with_elt_idx_in_view, \
obi_column_set_obiseq_with_elt_name_in_view, \
obi_column_set_obiseq_with_elt_idx_in_view
from .capi.obierrno cimport obi_errno
from .capi.obitypes cimport OBISeq_NA, const_char_p
from obitools3.utils cimport str2bytes, bytes2str
from libc.stdlib cimport free
from libc.string cimport strcmp
cdef class OBIDMS_column_seq(OBIDMS_column):
cpdef object get_line(self, index_t line_nb):
cdef char* value
cdef object result
value = obi_column_get_obiseq_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0)
if obi_errno > 0 :
raise IndexError(line_nb)
if strcmp(value, OBISeq_NA) == 0 :
result = None
else :
result = bytes2str(value)
free(value)
return result
cpdef set_line(self, index_t line_nb, object value):
cdef bytes value_b
if value is None :
value_b = OBISeq_NA
else :
value_b = str2bytes(value)
if obi_column_set_obiseq_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0, value_b) < 0:
raise Exception("Problem setting a value in a column")
cdef class OBIDMS_column_multi_elts_seq(OBIDMS_column_multi_elts):
cpdef object get_item(self, index_t line_nb, str element_name):
cdef char* value
cdef object result
value = obi_column_get_obiseq_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name))
if obi_errno > 0 :
raise IndexError(line_nb, element_name)
if strcmp(value, OBISeq_NA) == 0 :
result = None
else :
result = bytes2str(value)
free(value)
return result
cpdef object get_line(self, index_t line_nb) :
cdef char* value
cdef object value_in_result
cdef dict result
cdef index_t i
cdef bint all_NA
result = {}
all_NA = True
for i in range(self.nb_elements_per_line) :
value = obi_column_get_obiseq_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, i)
if obi_errno > 0 :
raise IndexError(line_nb)
if strcmp(value, OBISeq_NA) == 0 :
value_in_result = None
else :
value_in_result = bytes2str(value)
free(value)
result[self.elements_names[i]] = value_in_result
if all_NA and (value_in_result is not None) :
all_NA = False
if all_NA :
result = None
return result
cpdef set_item(self, index_t line_nb, str element_name, object value):
cdef bytes value_b
if value is None :
value_b = OBISeq_NA
else :
value_b = str2bytes(value)
if obi_column_set_obiseq_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name), value_b) < 0:
raise Exception("Problem setting a value in a column")

View File

@ -0,0 +1,41 @@
../../../src/obidmscolumn_str.c
../../../src/obidmscolumn_str.h
../../../src/bloom.h
../../../src/bloom.c
../../../src/char_str_indexer.h
../../../src/char_str_indexer.c
../../../src/crc64.h
../../../src/crc64.c
../../../src/dna_seq_indexer.h
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/MurmurHash2.h
../../../src/murmurhash2.c
../../../src/obiavl.h
../../../src/obiavl.c
../../../src/obiblob_indexer.h
../../../src/obiblob_indexer.c
../../../src/obiblob.h
../../../src/obiblob.c
../../../src/obidebug.h
../../../src/obidms_taxonomy.h
../../../src/obidms_taxonomy.c
../../../src/obidms.h
../../../src/obidms.c
../../../src/obidmscolumn_idx.h
../../../src/obidmscolumn_idx.c
../../../src/obidmscolumn.h
../../../src/obidmscolumn.c
../../../src/obidmscolumndir.h
../../../src/obidmscolumndir.c
../../../src/obierrno.h
../../../src/obierrno.c
../../../src/obilittlebigman.h
../../../src/obilittlebigman.c
../../../src/obitypes.h
../../../src/obitypes.c
../../../src/obiview.h
../../../src/obiview.c
../../../src/utils.h
../../../src/utils.c

View File

@ -0,0 +1,14 @@
#cython: language_level=3
from .capi.obitypes cimport index_t
from ._obidms cimport OBIDMS_column, OBIDMS_column_multi_elts
cdef class OBIDMS_column_str(OBIDMS_column):
cpdef object get_line(self, index_t line_nb)
cpdef set_line(self, index_t line_nb, object value)
cdef class OBIDMS_column_multi_elts_str(OBIDMS_column_multi_elts):
cpdef object get_item(self, index_t line_nb, str element_name)
cpdef object get_line(self, index_t line_nb)
cpdef set_item(self, index_t line_nb, str element_name, object value)

View File

@ -0,0 +1,87 @@
#cython: language_level=3
from .capi.obiview cimport obi_column_get_obistr_with_elt_name_in_view, \
obi_column_get_obistr_with_elt_idx_in_view, \
obi_column_set_obistr_with_elt_name_in_view, \
obi_column_set_obistr_with_elt_idx_in_view
from .capi.obierrno cimport obi_errno
from .capi.obitypes cimport OBIStr_NA, const_char_p
from obitools3.utils cimport str2bytes, bytes2str
from libc.string cimport strcmp
cdef class OBIDMS_column_str(OBIDMS_column):
cpdef object get_line(self, index_t line_nb):
cdef const_char_p value
cdef object result
value = obi_column_get_obistr_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0)
if obi_errno > 0 :
raise IndexError(line_nb)
if strcmp(value, OBIStr_NA) == 0 :
result = None
else :
result = bytes2str(value)
# NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file. (TODO discuss)
return result
cpdef set_line(self, index_t line_nb, object value):
cdef bytes value_b
if value is None :
value_b = OBIStr_NA
else :
value_b = str2bytes(value)
if obi_column_set_obistr_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0, value_b) < 0:
raise Exception("Problem setting a value in a column")
cdef class OBIDMS_column_multi_elts_str(OBIDMS_column_multi_elts):
cpdef object get_item(self, index_t line_nb, str element_name):
cdef const_char_p value
cdef object result
value = obi_column_get_obistr_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name))
if obi_errno > 0 :
raise IndexError(line_nb, element_name)
if strcmp(value, OBIStr_NA) == 0 :
result = None
else :
result = bytes2str(value)
# NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file. (TODO discuss)
return result
cpdef object get_line(self, index_t line_nb) :
cdef const_char_p value
cdef object value_in_result
cdef dict result
cdef index_t i
cdef bint all_NA
result = {}
all_NA = True
for i in range(self.nb_elements_per_line) :
value = obi_column_get_obistr_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, i)
if obi_errno > 0 :
raise IndexError(line_nb)
if strcmp(value, OBIStr_NA) == 0 :
value_in_result = None
else :
value_in_result = bytes2str(value)
# NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file. (TODO discuss)
result[self.elements_names[i]] = value_in_result
if all_NA and (value_in_result is not None) :
all_NA = False
if all_NA :
result = None
return result
cpdef set_item(self, index_t line_nb, str element_name, object value):
cdef bytes value_b
if value is None :
value_b = OBIStr_NA
else :
value_b = str2bytes(value)
if obi_column_set_obistr_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name), value_b) < 0:
raise Exception("Problem setting a value in a column")

View File

@ -0,0 +1,39 @@
../../../src/bloom.h
../../../src/bloom.c
../../../src/char_str_indexer.h
../../../src/char_str_indexer.c
../../../src/crc64.h
../../../src/crc64.c
../../../src/dna_seq_indexer.h
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/MurmurHash2.h
../../../src/murmurhash2.c
../../../src/obiavl.h
../../../src/obiavl.c
../../../src/obiblob_indexer.h
../../../src/obiblob_indexer.c
../../../src/obiblob.h
../../../src/obiblob.c
../../../src/obidebug.h
../../../src/obidms_taxonomy.h
../../../src/obidms_taxonomy.c
../../../src/obidms.h
../../../src/obidms.c
../../../src/obidmscolumn_idx.h
../../../src/obidmscolumn_idx.c
../../../src/obidmscolumn.h
../../../src/obidmscolumn.c
../../../src/obidmscolumndir.h
../../../src/obidmscolumndir.c
../../../src/obierrno.h
../../../src/obierrno.c
../../../src/obilittlebigman.h
../../../src/obilittlebigman.c
../../../src/obitypes.h
../../../src/obitypes.c
../../../src/obiview.h
../../../src/obiview.c
../../../src/utils.h
../../../src/utils.c

View File

@ -0,0 +1,30 @@
#cython: language_level=3
from ._obidms cimport OBIView_line
cdef class OBI_Seq(dict) :
cdef str id
cdef str description
cdef str sequence
cpdef set_id(self, str id)
cpdef get_id(self)
cpdef set_description(self, str description)
cpdef get_description(self)
cpdef get_sequence(self)
cdef class OBI_Nuc_Seq(OBI_Seq) :
#cpdef str reverse_complement(self)
cpdef set_sequence(self, str sequence)
cdef class OBI_Nuc_Seq_Stored(OBIView_line) :
cpdef set_id(self, str id)
cpdef get_id(self)
cpdef set_description(self, str description)
cpdef get_description(self)
cpdef set_sequence(self, str sequence)
cpdef get_sequence(self)
# cpdef str reverse_complement(self)

View File

@ -0,0 +1,75 @@
#cython: language_level=3
from obitools3.utils cimport bytes2str, str2bytes
from .capi.obiview cimport NUC_SEQUENCE_COLUMN, \
ID_COLUMN, \
DESCRIPTION_COLUMN
cdef class OBI_Seq(dict) :
def __init__(self, str id, str seq, str description=None) :
self.set_id(id)
self.set_sequence(seq)
if description is not None :
self.set_description(description)
cpdef set_id(self, str id) :
self.id = id
self[bytes2str(ID_COLUMN)] = id
cpdef get_id(self) :
return self.id
cpdef set_description(self, str description) :
self.description = description
self[bytes2str(DESCRIPTION_COLUMN)] = description
cpdef get_description(self) :
return self.description # TODO no
cpdef get_sequence(self) :
return self.sequence
def __str__(self) :
return self.sequence # or not
cdef class OBI_Nuc_Seq(OBI_Seq) :
cpdef set_sequence(self, str sequence) :
self.sequence = sequence
self[bytes2str(NUC_SEQUENCE_COLUMN)] = sequence
# cpdef str reverse_complement(self) : TODO in C ?
# pass
cdef class OBI_Nuc_Seq_Stored(OBIView_line) :
cpdef set_id(self, str id) :
self[bytes2str(ID_COLUMN)] = id
cpdef get_id(self) :
return self[bytes2str(ID_COLUMN)]
cpdef set_description(self, str description) :
self[bytes2str(DESCRIPTION_COLUMN)] = description
cpdef get_description(self) :
return self[bytes2str(DESCRIPTION_COLUMN)]
cpdef set_sequence(self, str sequence) :
self[bytes2str(NUC_SEQUENCE_COLUMN)] = sequence
cpdef get_sequence(self) :
return self[bytes2str(NUC_SEQUENCE_COLUMN)]
# def __str__(self) :
# return self[bytes2str(NUC_SEQUENCE_COLUMN)] # or not
# cpdef str reverse_complement(self) : TODO in C ?
# pass
# TODO static method to import?

View File

@ -0,0 +1,39 @@
../../../src/bloom.h
../../../src/bloom.c
../../../src/char_str_indexer.h
../../../src/char_str_indexer.c
../../../src/crc64.h
../../../src/crc64.c
../../../src/dna_seq_indexer.h
../../../src/dna_seq_indexer.c
../../../src/encode.h
../../../src/encode.c
../../../src/MurmurHash2.h
../../../src/murmurhash2.c
../../../src/obiavl.h
../../../src/obiavl.c
../../../src/obiblob_indexer.h
../../../src/obiblob_indexer.c
../../../src/obiblob.h
../../../src/obiblob.c
../../../src/obidebug.h
../../../src/obidms_taxonomy.h
../../../src/obidms_taxonomy.c
../../../src/obidms.h
../../../src/obidms.c
../../../src/obidmscolumn_idx.h
../../../src/obidmscolumn_idx.c
../../../src/obidmscolumn.h
../../../src/obidmscolumn.c
../../../src/obidmscolumndir.h
../../../src/obidmscolumndir.c
../../../src/obierrno.h
../../../src/obierrno.c
../../../src/obilittlebigman.h
../../../src/obilittlebigman.c
../../../src/obitypes.h
../../../src/obitypes.c
../../../src/obiview.h
../../../src/obiview.c
../../../src/utils.h
../../../src/utils.c

View File

@ -0,0 +1,31 @@
#cython: language_level=3
from .capi.obitaxonomy cimport ecotx_t, OBIDMS_taxonomy_p
from libc.stdint cimport int32_t
cdef class OBI_Taxonomy :
cdef str name
cdef OBIDMS_taxonomy_p pointer
cpdef close(self)
cdef class OBI_Taxon :
cdef ecotx_t* pointer
cdef int32_t taxid
cdef int32_t rank
cdef int32_t farest
cdef ecotx_t* parent
cdef str name
cpdef int32_t taxid(self)
cpdef int32_t rank(self)
cpdef int32_t farest(self)
cpdef OBI_Taxon parent(self)

View File

@ -0,0 +1,65 @@
#cython: language_level=3
from obitools3.utils cimport bytes2str, str2bytes
from .capi.obitaxonomy cimport obi_read_taxonomy, \
obi_close_taxonomy, \
obi_taxo_get_taxon_with_taxid
from ._obidms cimport OBIDMS
from cpython.pycapsule cimport PyCapsule_New, PyCapsule_GetPointer
cdef class OBI_Taxonomy :
def __init__(self, OBIDMS dms, str name) :
self.name = name
self.pointer = obi_read_taxonomy(dms.pointer, str2bytes(name), True) # TODO discuss
def __getitem__(self, object ref):
cdef ecotx_t* taxon_p
cdef object taxon_capsule
if type(ref) == int :
taxon_p = obi_taxo_get_taxon_with_taxid(self.pointer, ref)
taxon_capsule = PyCapsule_New(taxon_p, NULL, NULL)
return OBI_Taxon(taxon_capsule)
cpdef close(self) :
if (obi_close_taxonomy(self.pointer) < 0) :
raise Exception("Error closing the taxonomy")
cdef class OBI_Taxon : # dict subclass?
def __init__(self, object taxon_capsule) :
cdef ecotx_t* taxon
taxon = <ecotx_t*> PyCapsule_GetPointer(taxon_capsule, NULL)
self.pointer = taxon
self.taxid = taxon.taxid
self.rank = taxon.rank
self.farest = taxon.farest
self.parent = taxon.parent
self.name = bytes2str(taxon.name)
cpdef int32_t taxid(self):
return self.taxid
cpdef int32_t rank(self):
return self.rank
cpdef int32_t farest(self):
return self.farest
cpdef OBI_Taxon parent(self):
cdef object parent_capsule
parent_capsule = PyCapsule_New(self.parent, NULL, NULL)
return OBI_Taxon(parent_capsule)

View File

@ -8,106 +8,191 @@ from ..capi.obitypes cimport const_char_p, \
obibool_t, \
obichar_t, \
obifloat_t, \
obiidx_t
index_t, \
time_t
cdef extern from "obidmscolumn.h" nogil:
struct OBIDMS_column_t:
pass
struct OBIDMS_column_header_t:
size_t header_size
size_t data_size
index_t line_count
index_t lines_used
index_t nb_elements_per_line
const_char_p elements_names
OBIType_t returned_data_type
OBIType_t stored_data_type
time_t creation_date
obiversion_t version
obiversion_t cloned_from
const_char_p name
const_char_p indexer_name
const_char_p comments
ctypedef OBIDMS_column_header_t* OBIDMS_column_header_p
struct OBIDMS_column_t:
OBIDMS_p dms
OBIDMS_column_header_p header
bint writable
ctypedef OBIDMS_column_t* OBIDMS_column_p
OBIDMS_column_p obi_create_column(OBIDMS_p dms,
const_char_p column_name,
OBIType_t type,
size_t nb_lines,
size_t nb_elements_per_line,
const_char_p elements_names)
size_t obi_column_get_nb_lines_used(OBIDMS_column_p column)
const_char_p obi_column_get_elements_names(OBIDMS_column_p column)
void obi_column_make_unwritable(OBIDMS_column_p column)
index_t nb_lines,
index_t nb_elements_per_line,
const_char_p elements_names,
const_char_p indexer_name,
const_char_p comments)
OBIDMS_column_p obi_open_column(OBIDMS_p dms,
const_char_p column_name,
obiversion_t version_number)
OBIType_t obi_column_get_type(OBIDMS_column_p column)
int obi_close_column(OBIDMS_column_p column)
OBIType_t obi_column_get_data_type_from_name(OBIDMS_p dms,
const_char_p column_name)
OBIDMS_column_p obi_clone_column(OBIDMS_p dms,
OBIDMS_column_p line_selection,
const_char_p column_name,
obiversion_t version_number,
bint clone_data)
int obi_truncate_and_close_column(OBIDMS_column_p column)
int obi_close_column(OBIDMS_column_p column)
obiversion_t obi_column_get_latest_version_from_name(OBIDMS_p dms,
const_char_p column_name)
OBIDMS_column_header_p obi_column_get_header_from_name(OBIDMS_p dms,
const_char_p column_name,
obiversion_t version_number)
OBIType_t obi_column_get_data_type_from_name(OBIDMS_p dms,
const_char_p column_name)
size_t obi_column_get_line_count_from_name(OBIDMS_p dms,
const_char_p column_name)
int obi_close_header(OBIDMS_column_header_p header)
char* obi_column_format_date(time_t date)
int obi_select(OBIDMS_column_p line_selection_column, index_t line_to_grep)
cdef extern from "obidmscolumn_int.h" nogil:
int obi_column_set_obiint_with_elt_name(OBIDMS_column_p column,
size_t line_nb,
index_t line_nb,
const_char_p element_name,
obiint_t value)
int obi_column_set_obiint_with_elt_idx(OBIDMS_column_p column,
index_t line_nb,
index_t element_idx,
obiint_t value)
obiint_t obi_column_get_obiint_with_elt_name(OBIDMS_column_p column,
size_t line_nb,
index_t line_nb,
const_char_p element_name)
obiint_t obi_column_get_obiint_with_elt_idx(OBIDMS_column_p column,
index_t line_nb,
index_t element_idx)
cdef extern from "obidmscolumn_bool.h" nogil:
int obi_column_set_obibool_with_elt_name(OBIDMS_column_p column,
size_t line_nb,
index_t line_nb,
const_char_p element_name,
obibool_t value)
int obi_column_set_obibool_with_elt_idx(OBIDMS_column_p column,
index_t line_nb,
index_t element_idx,
obibool_t value)
obibool_t obi_column_get_obibool_with_elt_name(OBIDMS_column_p column,
size_t line_nb,
index_t line_nb,
const_char_p element_name)
obibool_t obi_column_get_obibool_with_elt_idx(OBIDMS_column_p column,
index_t line_nb,
index_t element_idx)
cdef extern from "obidmscolumn_char.h" nogil:
int obi_column_set_obichar_with_elt_name(OBIDMS_column_p column,
size_t line_nb,
index_t line_nb,
const_char_p element_name,
obichar_t value)
int obi_column_set_obichar_with_elt_idx(OBIDMS_column_p column,
index_t line_nb,
index_t element_idx,
obichar_t value)
obichar_t obi_column_get_obichar_with_elt_name(OBIDMS_column_p column,
size_t line_nb,
index_t line_nb,
const_char_p element_name)
obichar_t obi_column_get_obichar_with_elt_idx(OBIDMS_column_p column,
index_t line_nb,
index_t element_idx)
cdef extern from "obidmscolumn_float.h" nogil:
int obi_column_set_obifloat_with_elt_name(OBIDMS_column_p column,
size_t line_nb,
index_t line_nb,
const_char_p element_name,
obifloat_t value)
int obi_column_set_obifloat_with_elt_idx(OBIDMS_column_p column,
index_t line_nb,
index_t element_idx,
obifloat_t value)
obifloat_t obi_column_get_obifloat_with_elt_name(OBIDMS_column_p column,
size_t line_nb,
index_t line_nb,
const_char_p element_name)
cdef extern from "obidmscolumn_idx.h" nogil:
int obi_column_set_obiidx_with_elt_name(OBIDMS_column_p column,
size_t line_nb,
const_char_p element_name,
obiidx_t value)
obiidx_t obi_column_get_obiidx_with_elt_name(OBIDMS_column_p column,
size_t line_nb,
const_char_p element_name)
obifloat_t obi_column_get_obifloat_with_elt_idx(OBIDMS_column_p column,
index_t line_nb,
index_t element_idx)
cdef extern from "obidmscolumn_str.h" nogil:
int obi_column_set_obistr_with_elt_name(OBIDMS_column_p column,
index_t line_nb,
const_char_p element_name,
const_char_p value)
int obi_column_set_obistr_with_elt_idx(OBIDMS_column_p column,
index_t line_nb,
index_t element_idx,
const_char_p value)
const_char_p obi_column_get_obistr_with_elt_name(OBIDMS_column_p column,
index_t line_nb,
const_char_p element_name)
const_char_p obi_column_get_obistr_with_elt_idx(OBIDMS_column_p column,
index_t line_nb,
index_t element_idx)
cdef extern from "obidmscolumn_seq.h" nogil:
int obi_column_set_obiseq_with_elt_name(OBIDMS_column_p column,
index_t line_nb,
const_char_p element_name,
const_char_p value)
int obi_column_set_obiseq_with_elt_idx(OBIDMS_column_p column,
index_t line_nb,
index_t element_idx,
const_char_p value)
char* obi_column_get_obiseq_with_elt_name(OBIDMS_column_p column,
index_t line_nb,
const_char_p element_name)
char* obi_column_get_obiseq_with_elt_idx(OBIDMS_column_p column,
index_t line_nb,
index_t element_idx)

View File

@ -0,0 +1,42 @@
#cython: language_level=3
from .obitypes cimport const_char_p
from .obidms cimport OBIDMS_p
from libc.stdint cimport int32_t
cdef extern from "obidms_taxonomy.h" nogil:
struct OBIDMS_taxonomy_t
ctypedef OBIDMS_taxonomy_t* OBIDMS_taxonomy_p
struct ecotxnode :
int32_t taxid
int32_t rank
int32_t farest
ecotxnode* parent
char* name
ctypedef ecotxnode ecotx_t
OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const_char_p taxonomy_name, bint read_alternative_names)
int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy)
ecotx_t* obi_taxo_get_parent_at_rank(ecotx_t* taxon, int32_t rankidx)
ecotx_t* obi_taxo_get_taxon_with_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid)
bint obi_taxo_is_taxon_under_taxid(ecotx_t* taxon, int32_t other_taxid)
ecotx_t* obi_taxo_get_species(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
ecotx_t* obi_taxo_get_genus(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
ecotx_t* obi_taxo_get_family(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
ecotx_t* obi_taxo_get_kingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)

View File

@ -1,12 +1,16 @@
#cython: language_level=3
from libc.stdint cimport int32_t
from libc.stdint cimport int32_t, int64_t
from posix.types cimport time_t
cdef extern from *:
ctypedef char* const_char_p "const char*"
cdef extern from "encode.h" nogil:
bint only_ATGC(const_char_p seq)
cdef extern from "obidmscolumn.h" nogil:
@ -15,13 +19,16 @@ cdef extern from "obidmscolumn.h" nogil:
cdef extern from "obitypes.h" nogil:
enum OBIType: # TODO je sais pas si ça sert de declarer le contenu de l'enum
OBI_VOID = 0,
enum OBIType:
OBI_VOID,
OBI_INT,
OBI_FLOAT,
OBI_BOOL,
OBI_CHAR,
OBI_STR,
OBI_SEQ,
OBI_IDX
ctypedef OBIType OBIType_t
@ -32,12 +39,14 @@ cdef extern from "obitypes.h" nogil:
ctypedef int32_t obiint_t
ctypedef double obifloat_t
ctypedef char obichar_t
ctypedef size_t obiidx_t
ctypedef int64_t index_t
extern obiint_t OBIInt_NA
extern obiidx_t OBIIdx_NA
extern obifloat_t OBIFloat_NA
extern obichar_t OBIChar_NA
extern obibool_t OBIBool_NA
extern obiint_t OBIInt_NA
extern index_t OBIIdx_NA
extern obifloat_t OBIFloat_NA
extern obichar_t OBIChar_NA
extern obibool_t OBIBool_NA
extern const_char_p OBISeq_NA
extern const_char_p OBIStr_NA
char* name_data_type(int data_type)
const_char_p name_data_type(int data_type)

View File

@ -0,0 +1,250 @@
#cython: language_level=3
from .obitypes cimport const_char_p, \
OBIType_t, \
obiversion_t, \
obiint_t, \
obibool_t, \
obichar_t, \
obifloat_t, \
index_t, \
time_t
from ..capi.obidms cimport OBIDMS_p
from ..capi.obidmscolumn cimport OBIDMS_column_p
cdef extern from "obiview.h" nogil:
extern const_char_p VIEW_TYPE_NUC_SEQS
extern const_char_p NUC_SEQUENCE_COLUMN
extern const_char_p ID_COLUMN
extern const_char_p DESCRIPTION_COLUMN
struct Column_reference_t :
const_char_p column_name
obiversion_t version
ctypedef Column_reference_t* Column_reference_p
struct Obiview_t :
OBIDMS_p dms
const_char_p name
OBIDMS_column_p line_selection
OBIDMS_column_p new_line_selection
OBIDMS_column_p columns
bint read_only
Column_reference_t line_selection_reference
index_t line_count
int column_count
const_char_p comments
ctypedef Obiview_t* Obiview_p
struct Obiview_infos_t :
int view_number
int column_count
index_t line_count
const_char_p name
const_char_p created_from
time_t creation_date
bint all_lines
Column_reference_t line_selection
Column_reference_p column_references
const_char_p view_type
const_char_p comments
ctypedef Obiview_infos_t* Obiview_infos_p
struct Obiviews_header_t :
size_t header_size
size_t views_size
int view_count
ctypedef Obiviews_header_t* Obiviews_header_p
struct Obiviews_infos_all_t :
Obiviews_header_p header
Obiview_infos_p view_infos
ctypedef Obiviews_infos_all_t* Obiviews_infos_all_p
Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const_char_p view_name, Obiview_p view_to_clone, index_t* line_selection, const_char_p comments)
Obiview_p obi_new_view(OBIDMS_p dms, const_char_p view_name, Obiview_p view_to_clone, index_t* line_selection, const_char_p comments)
Obiview_p obi_new_view_cloned_from_name(OBIDMS_p dms, const_char_p view_name, const_char_p view_to_clone_name, index_t* line_selection, const_char_p comments)
Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const_char_p view_name, const_char_p view_to_clone_name, index_t* line_selection, const_char_p comments)
Obiview_p obi_open_view(OBIDMS_p dms, const_char_p view_name)
Obiviews_infos_all_p obi_read_views(OBIDMS_p dms)
int obi_unmap_read_views(Obiviews_infos_all_p views)
int obi_view_add_column(Obiview_p view,
const_char_p column_name,
obiversion_t version_number,
OBIType_t data_type,
index_t nb_lines,
index_t nb_elements_per_line,
const_char_p elements_names,
const_char_p indexer_name,
const_char_p comments,
bint create)
int obi_view_delete_column(Obiview_p view, const_char_p column_name)
int obi_select_line(Obiview_p view, index_t line_nb)
int obi_select_lines(Obiview_p view, index_t* line_nbs)
OBIDMS_column_p obi_view_clone_column(Obiview_p view, const_char_p column_name)
OBIDMS_column_p obi_view_get_column(Obiview_p view, const_char_p column_name)
OBIDMS_column_p* obi_view_get_pointer_on_column_in_view(Obiview_p view, const_char_p column_name)
int obi_save_view(Obiview_p view)
int obi_close_view(Obiview_p view)
int obi_save_and_close_view(Obiview_p view)
int obi_column_set_obiint_with_elt_name_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
const_char_p element_name,
obiint_t value)
int obi_column_set_obiint_with_elt_idx_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
index_t element_idx,
obiint_t value)
obiint_t obi_column_get_obiint_with_elt_name_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
const_char_p element_name)
obiint_t obi_column_get_obiint_with_elt_idx_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
index_t element_idx)
int obi_column_set_obibool_with_elt_name_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
const_char_p element_name,
obibool_t value)
int obi_column_set_obibool_with_elt_idx_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
index_t element_idx,
obibool_t value)
obibool_t obi_column_get_obibool_with_elt_name_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
const_char_p element_name)
obibool_t obi_column_get_obibool_with_elt_idx_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
index_t element_idx)
int obi_column_set_obichar_with_elt_name_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
const_char_p element_name,
obichar_t value)
int obi_column_set_obichar_with_elt_idx_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
index_t element_idx,
obichar_t value)
obichar_t obi_column_get_obichar_with_elt_name_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
const_char_p element_name)
obichar_t obi_column_get_obichar_with_elt_idx_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
index_t element_idx)
int obi_column_set_obifloat_with_elt_name_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
const_char_p element_name,
obifloat_t value)
int obi_column_set_obifloat_with_elt_idx_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
index_t element_idx,
obifloat_t value)
obifloat_t obi_column_get_obifloat_with_elt_name_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
const_char_p element_name)
obifloat_t obi_column_get_obifloat_with_elt_idx_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
index_t element_idx)
int obi_column_set_obistr_with_elt_name_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
const_char_p element_name,
const_char_p value)
int obi_column_set_obistr_with_elt_idx_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
index_t element_idx,
const_char_p value)
const_char_p obi_column_get_obistr_with_elt_name_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
const_char_p element_name)
const_char_p obi_column_get_obistr_with_elt_idx_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
index_t element_idx)
int obi_column_set_obiseq_with_elt_name_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
const_char_p element_name,
const_char_p value)
int obi_column_set_obiseq_with_elt_idx_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
index_t element_idx,
const_char_p value)
char* obi_column_get_obiseq_with_elt_name_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
const_char_p element_name)
char* obi_column_get_obiseq_with_elt_idx_in_view(Obiview_p view,
OBIDMS_column_p column,
index_t line_nb,
index_t element_idx)

View File

@ -0,0 +1,199 @@
import sys
import argparse
import time
from obitools3.obidms._obidms import OBIDMS
def bufferedRead(fileobj,size=209715200): ## 200 MB
buffer = fileobj.readlines(size)
while buffer:
for l in buffer:
yield l
buffer = fileobj.readlines(size)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Convert a fasta file in an OBIDMS.')
parser.add_argument('-i', '--input', dest='input_file', type=str,
help='Name of the file containing the sequences')
args = parser.parse_args()
d = OBIDMS('tdms')
view = d.new_view('uniq view', view_type="NUC_SEQS_VIEW")
# for i in range(35000000) :
# if (not (i%500000)) :
# print(str(time.time())+'\t'+str(i))
# id = "@HWI-D00405:142:C71BAANXX:4:1101:1234:2234_CONS_SUB_SUB_"+str(i)
# view[i].set_id(id)
# if id != view[i]["ID"] :
# print("nope", id, view[i]["ID"])
input_file = open(args.input_file, 'r')
input_file_buffered = bufferedRead(input_file)
#
# if args.input_file[-1:] == "a" :
#
# i = 0
# next = False
# first = True
#
# for line in input_file :
#
# if line[0] == ">" :
#
# if not first :
# # save seq
# #print(i, id, seq)
# view[i].set_sequence(seq)
# i+=1
#
# first = False
#
# #id = line.split(" ", 1)[0][1:]
# #rest = (line[:-1].split(" ", 1)[1]).split(";")
# #view[i].set_id(id)
#
# # description = ""
# # for j in range(len(rest)) :
# # if "=" in rest[j] :
# # rest[j] = rest[j].strip()
# # rest[j] = rest[j].split("=", 1)
# # column_name = rest[j][0]
# # v = rest[j][1]
# # if ((not v.isalpha()) and (v.isalnum())) :
# # conv_v = int(v)
# # elif (v == "True") or (v == "False") :
# # conv_v = bool(v)
# # else :
# # f = True
# # for letter in v :
# # if ((not letter.isalnum()) or (letter != ".")) :
# # f = False
# # if f :
# # conv_v = float(v)
# # else :
# # conv_v = v
# # view[i][column_name] = conv_v
# # else :
# # description+=rest[j]
# #
# # if description != "" :
# # description = description.strip()
# # view[i].set_description(description)
#
# #print(id)
# #print(rest)
# #print(description)
#
# next = True
#
# elif next == True :
#
# # if not (i % 1E5) :
# # print(i)
#
# seq = line[:-1]
# next = False
#
# elif not next :
#
# seq += line[:-1]
#
#
# elif args.input_file[-1:] == "q" :
#
# i = 0
# l = 0
# next = False
#
l=0
i=0
# while (True):
# l+=1
# line = input_file.readline()
# if line=="":
# break
for line in input_file_buffered :
#
#if i > 1E7 :
# # print('hmm?')
#
# if i == 6000000 :
# break
#
if l%4 == 0 :
#
if (not (i%500000)) :
print(str(time.time())+'\t'+str(i))
# #
# # #print("header", line)
# #
id = line.split(" ", 1)[0][1:]
# print(id)
# # #rest = (line[:-1].split(" ", 1)[1]).split(";")
view[i].set_id(id)
# print(view[i]["ID"])
#
# i+=1
# l+=1
#
# # description = ""
# # for j in range(len(rest)) :
# # if "=" in rest[j] :
# # rest[j] = rest[j].strip()
# # rest[j] = rest[j].split("=", 1)
# # column_name = rest[j][0]
# # #print("COLUMN", column_name)
# # v = rest[j][1]
# # if (v == "") and (column_name in view) and (view[column_name].get_data_type() == "OBI_SEQ") :
# # #print(">>>>>>YUP")
# # conv_v = "aa"
# # else :
# # if ((not v.isalpha()) and (v.isalnum())) :
# # conv_v = int(v)
# # elif (v == "True") or (v == "False") :
# # conv_v = bool(v)
# # else :
# # f = True
# # for letter in v :
# # if ((not letter.isalnum()) or (letter != ".")) :
# # f = False
# # if f :
# # conv_v = float(v)
# # else :
# # conv_v = v
# # view[i][column_name] = conv_v
# # else :
# # description+=rest[j]
# #
# # if description != "" :
# # description = description.strip()
# # view[i].set_description(description)
#
elif l%4 == 1 :
# #
seq = line[:-1]
# #print("seq", seq)
view[i].set_sequence(seq)
i+=1
#
l+=1
#
#
input_file.close()
#print(view)
print(view.__repr__())
view.save_and_close()
d.close()
print("Done.")

View File

@ -0,0 +1,311 @@
import os
import sys
import shutil
import unittest
from random import randint, uniform, choice
import string
from obitools3.obidms._obidms import OBIDMS
LINE_COUNT_FOR_TEST_COLUMN = 10000 # TODO randomize?
SMALLER_LINE_COUNT_FOR_TEST_COLUMN = 1000 # TODO randomize?
NB_ELEMENTS_PER_LINE = 10 # TODO randomize?
DMS_NAME = "unit_test_dms"
def create_test_obidms():
dms_name = DMS_NAME
dms_dir_name = dms_name+'.obidms'
dms = OBIDMS(dms_name)
return (dms, dms_name, dms_dir_name)
def create_test_column(dms, data_type, multiple_elements_per_line=False):
col_name = "unit_test_"+data_type
if multiple_elements_per_line :
elts_names = elements_names()
col = dms.open_column(col_name,
create=True,
type=data_type,
nb_elements_per_line=NB_ELEMENTS_PER_LINE,
elements_names=elts_names)
return (col, col_name, elts_names)
else :
col = dms.open_column(col_name,
create=True,
type=data_type)
return (col, col_name)
def elements_names():
names = [str(i) for i in range(NB_ELEMENTS_PER_LINE)]
return names
def random_obivalue(data_type):
r = 1000000
if data_type == "OBI_INT" :
return randint(-r,r)
elif data_type == "OBI_FLOAT" :
return uniform(-r,r)
elif data_type == "OBI_BOOL" :
return randint(0,1)
elif data_type == "OBI_CHAR" :
return choice(string.ascii_lowercase)
elif data_type == "OBI_STR" :
length = randint(1,200)
randoms = ''.join(choice(string.ascii_lowercase) for i in range(length))
return randoms
elif data_type == "OBI_SEQ" :
length = randint(1,200)
randoms = ''.join(choice("atgcryswkmdbhvn") for i in range(length))
return randoms
class OBIDMS_Column_TestCase(unittest.TestCase):
def tearDown(self):
self.col.close()
self.dms.close()
shutil.rmtree(self.dms_dir_name, ignore_errors=True)
def test_OBIDMS_column_type(self):
assert self.col.get_data_type() == self.data_type, 'Wrong data type associated with column'
def test_OBIDMS_column_cloning(self):
for i in range(LINE_COUNT_FOR_TEST_COLUMN):
self.col[i]= random_obivalue(self.data_type)
self.col.close()
clone = self.dms.open_column(self.col_name, clone=True)
self.col = self.dms.open_column(self.col_name)
assert clone.get_nb_lines_used() == self.col.get_nb_lines_used(), "Cloned column doesn't have the same number of lines used"
i=0
for i in range(clone.get_nb_lines_used()) :
assert clone[i] == self.col[i], "Different value in original column and cloned column"
assert clone[i] is not None, "None value"
clone.close()
def test_OBIDMS_column_set_and_get(self):
for i in range(LINE_COUNT_FOR_TEST_COLUMN):
v = random_obivalue(self.data_type)
self.col[i] = v
assert self.col[i] == v, "Different value than the set value"
assert self.col[i] is not None, "None value"
def test_OBIDMS_referring_column(self):
for i in range(LINE_COUNT_FOR_TEST_COLUMN):
self.col[i] = random_obivalue(self.data_type)
ref_col = self.dms.open_column(self.col_name, referring=True)
j = 0
for i in range(LINE_COUNT_FOR_TEST_COLUMN):
if i%2 : # TODO randomize
ref_col.grep_line(i)
assert ref_col[j] == self.col[i], "Different value in original column and returned by referring column"
assert ref_col[j] is not None, "None value"
j+=1
class OBIDMS_Column_multiple_elements_TestCase(OBIDMS_Column_TestCase):
def test_OBIDMS_column_cloning(self):
pass
for i in range(SMALLER_LINE_COUNT_FOR_TEST_COLUMN):
v = {}
for e in self.elts_names :
v[e] = random_obivalue(self.data_type)
self.col[i] = v
self.col.close()
clone = self.dms.open_column(self.col_name, clone=True)
self.col = self.dms.open_column(self.col_name)
assert clone.get_nb_lines_used() == self.col.get_nb_lines_used(), "Cloned column doesn't have the same number of lines used"
i=0
for i in range(SMALLER_LINE_COUNT_FOR_TEST_COLUMN):
assert self.col[i] == clone[i], "Different value in original column and cloned column"
assert self.col[i] is not None, "None value"
clone.close()
def test_OBIDMS_column_set_and_get_with_elements_names(self):
for i in range(SMALLER_LINE_COUNT_FOR_TEST_COLUMN):
for e in range(NB_ELEMENTS_PER_LINE) :
v = random_obivalue(self.data_type)
self.col.set_item(i, self.elts_names[e], v)
assert self.col.get_item(i, self.elts_names[e]) == v, "Different value than the set value"
assert self.col.get_item(i, self.elts_names[e]) is not None, "None value"
def test_OBIDMS_column_set_and_get(self):
for i in range(SMALLER_LINE_COUNT_FOR_TEST_COLUMN):
v = {}
for e in self.elts_names :
v[e] = random_obivalue(self.data_type)
self.col[i] = v
assert self.col[i] == v, "Different value than the set value"
assert self.col[i] is not None, "None value"
def test_OBIDMS_referring_column(self):
for i in range(SMALLER_LINE_COUNT_FOR_TEST_COLUMN):
v = {}
for e in self.elts_names :
v[e] = random_obivalue(self.data_type)
self.col[i] = v
ref_col = self.dms.open_column(self.col_name, referring=True)
j = 0
for i in range(SMALLER_LINE_COUNT_FOR_TEST_COLUMN):
if i%2 : # TODO randomize
ref_col.grep_line(i)
assert ref_col[j] == self.col[i], "Different value in original column and returned by referring column"
assert ref_col[j] is not None, "None value"
j+=1
ref_col.close()
class OBIDMS_Column_OBI_INT_TestCase(OBIDMS_Column_TestCase):
def setUp(self):
self.data_type = 'OBI_INT'
self.dms, \
self.dms_name, \
self.dms_dir_name = create_test_obidms()
self.col, \
self.col_name = create_test_column(self.dms,
self.data_type)
class OBIDMS_Column_OBI_INT_multiple_elements_TestCase(OBIDMS_Column_multiple_elements_TestCase):
def setUp(self):
self.data_type = 'OBI_INT'
self.dms, \
self.dms_name, \
self.dms_dir_name = create_test_obidms()
self.col, \
self.col_name, \
self.elts_names = create_test_column(self.dms,
self.data_type,
multiple_elements_per_line=True)
class OBIDMS_Column_OBI_FLOAT_TestCase(OBIDMS_Column_TestCase):
def setUp(self):
self.data_type = 'OBI_FLOAT'
self.dms, \
self.dms_name, \
self.dms_dir_name = create_test_obidms()
self.col, \
self.col_name = create_test_column(self.dms,
self.data_type)
class OBIDMS_Column_OBI_FLOAT_multiple_elements_TestCase(OBIDMS_Column_multiple_elements_TestCase):
def setUp(self):
self.data_type = 'OBI_FLOAT'
self.dms, \
self.dms_name, \
self.dms_dir_name = create_test_obidms()
self.col, \
self.col_name, \
self.elts_names = create_test_column(self.dms,
self.data_type,
multiple_elements_per_line=True)
class OBIDMS_Column_OBI_BOOL_TestCase(OBIDMS_Column_TestCase):
def setUp(self):
self.data_type = 'OBI_BOOL'
self.dms, \
self.dms_name, \
self.dms_dir_name = create_test_obidms()
self.col, \
self.col_name = create_test_column(self.dms,
self.data_type)
class OBIDMS_Column_OBI_BOOL_multiple_elements_TestCase(OBIDMS_Column_multiple_elements_TestCase):
def setUp(self):
self.data_type = 'OBI_BOOL'
self.dms, \
self.dms_name, \
self.dms_dir_name = create_test_obidms()
self.col, \
self.col_name, \
self.elts_names = create_test_column(self.dms,
self.data_type,
multiple_elements_per_line=True)
class OBIDMS_Column_OBI_CHAR_TestCase(OBIDMS_Column_TestCase):
def setUp(self):
self.data_type = 'OBI_CHAR'
self.dms, \
self.dms_name, \
self.dms_dir_name = create_test_obidms()
self.col, \
self.col_name = create_test_column(self.dms,
self.data_type)
class OBIDMS_Column_OBI_CHAR_multiple_elements_TestCase(OBIDMS_Column_multiple_elements_TestCase):
def setUp(self):
self.data_type = 'OBI_CHAR'
self.dms, \
self.dms_name, \
self.dms_dir_name = create_test_obidms()
self.col, \
self.col_name, \
self.elts_names = create_test_column(self.dms,
self.data_type,
multiple_elements_per_line=True)
class OBIDMS_Column_OBI_STR_TestCase(OBIDMS_Column_TestCase):
def setUp(self):
self.data_type = 'OBI_STR'
self.dms, \
self.dms_name, \
self.dms_dir_name = create_test_obidms()
self.col, \
self.col_name = create_test_column(self.dms,
self.data_type)
class OBIDMS_Column_OBI_STR_multiple_elements_TestCase(OBIDMS_Column_multiple_elements_TestCase):
def setUp(self):
self.data_type = 'OBI_STR'
self.dms, \
self.dms_name, \
self.dms_dir_name = create_test_obidms()
self.col, \
self.col_name, \
self.elts_names = create_test_column(self.dms,
self.data_type,
multiple_elements_per_line=True)
class OBIDMS_Column_OBI_SEQ_TestCase(OBIDMS_Column_TestCase):
def setUp(self):
self.data_type = 'OBI_SEQ'
self.dms, \
self.dms_name, \
self.dms_dir_name = create_test_obidms()
self.col, \
self.col_name = create_test_column(self.dms,
self.data_type)
class OBIDMS_Column_OBI_SEQ_multiple_elements_TestCase(OBIDMS_Column_multiple_elements_TestCase):
def setUp(self):
self.data_type = 'OBI_SEQ'
self.dms, \
self.dms_name, \
self.dms_dir_name = create_test_obidms()
self.col, \
self.col_name, \
self.elts_names = create_test_column(self.dms,
self.data_type,
multiple_elements_per_line=True)
if __name__ == '__main__':
unittest.main(verbosity=2, defaultTest=["OBIDMS_Column_OBI_INT_TestCase",
"OBIDMS_Column_OBI_INT_multiple_elements_TestCase",
"OBIDMS_Column_OBI_FLOAT_TestCase",
"OBIDMS_Column_OBI_FLOAT_multiple_elements_TestCase",
"OBIDMS_Column_OBI_BOOL_TestCase",
"OBIDMS_Column_OBI_BOOL_multiple_elements_TestCase",
"OBIDMS_Column_OBI_CHAR_TestCase",
"OBIDMS_Column_OBI_CHAR_multiple_elements_TestCase",
"OBIDMS_Column_OBI_STR_TestCase",
"OBIDMS_Column_OBI_STR_multiple_elements_TestCase",
"OBIDMS_Column_OBI_SEQ_TestCase",
"OBIDMS_Column_OBI_SEQ_multiple_elements_TestCase"])

View File

@ -1,5 +1,5 @@
major = 1
minor = 1
serial= '16'
major = 0
minor = 0
serial= '0'
version ="%2d.%02d.%s" % (major,minor,serial)
version ="%d.%02d.%s" % (major,minor,serial)

Binary file not shown.

View File

@ -1,5 +1,5 @@
--extra-index-url https://pypi.python.org/simple/
Cython>=0.21
Cython==0.23.5
Sphinx>=1.2.0
ipython>=3.0.0
breathe>=4.0.0

64
src/MurmurHash2.c Executable file
View File

@ -0,0 +1,64 @@
//-----------------------------------------------------------------------------
// MurmurHash2, by Austin Appleby
// Note - This code makes a few assumptions about how your machine behaves -
// 1. We can read a 4-byte value from any address without crashing
// 2. sizeof(int) == 4
// And it has a few limitations -
// 1. It will not work incrementally.
// 2. It will not produce the same results on little-endian and big-endian
// machines.
unsigned int murmurhash2(const void * key, int len, const unsigned int seed)
{
// 'm' and 'r' are mixing constants generated offline.
// They're not really 'magic', they just happen to work well.
const unsigned int m = 0x5bd1e995;
const int r = 24;
// Initialize the hash to a 'random' value
unsigned int h = seed ^ len;
// Mix 4 bytes at a time into the hash
const unsigned char * data = (const unsigned char *)key;
while(len >= 4)
{
unsigned int k = *(unsigned int *)data;
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
data += 4;
len -= 4;
}
// Handle the last few bytes of the input array
switch(len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
// Do a few final mixes of the hash to ensure the last few
// bytes are well-incorporated.
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}

248
src/bloom.c Executable file
View File

@ -0,0 +1,248 @@
/*
* Copyright (c) 2012-2015, Jyri J. Virkki
* All rights reserved.
*
* This file is under BSD license. See LICENSE file.
*/
/*
* Refer to bloom.h for documentation on the public interfaces.
*/
#include <assert.h>
#include <fcntl.h>
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include "bloom.h"
#include "murmurhash2.h"
#define MAKESTRING(n) STRING(n)
#define STRING(n) #n
#ifdef __linux__
unsigned detect_bucket_size(unsigned fallback_size);
#endif
static int test_bit_set_bit(unsigned char * buf, unsigned int x, int set_bit)
{
register uint32_t * word_buf = (uint32_t *)buf;
register unsigned int offset = x >> 5;
register uint32_t word = word_buf[offset];
register unsigned int mask = 1 << (x % 32);
if (word & mask) {
return 1;
} else {
if (set_bit) {
word_buf[offset] = word | mask;
}
return 0;
}
}
static int bloom_check_add(struct bloom * bloom,
const void * buffer, int len, int add)
{
if (bloom->ready == 0) {
(void)printf("bloom at %p not initialized!\n", (void *)bloom);
return -1;
}
int hits = 0;
register unsigned int a = murmurhash2(buffer, len, 0x9747b28c);
register unsigned int b = murmurhash2(buffer, len, a);
register unsigned int x;
register int i; // TODO why was it unsigned?
unsigned bucket_index = (a % bloom->buckets);
unsigned char * bucket_ptr =
(bloom->bf + (bucket_index << bloom->bucket_bytes_exponent));
for (i = 0; i < bloom->hashes; i++) {
x = (a + i*b) & bloom->bucket_bits_fast_mod_operand;
if (test_bit_set_bit(bucket_ptr, x, add)) {
hits++;
}
}
if (hits == bloom->hashes) {
return 1; // 1 == element already in (or collision)
}
return 0;
}
static void setup_buckets(struct bloom * bloom, unsigned int cache_size)
{
// If caller passed a non-zero cache_size, use it as given, otherwise
// either compute it or use built-in default
if (cache_size == 0) {
#ifdef __linux__
cache_size = detect_bucket_size(BLOOM_BUCKET_SIZE_FALLBACK);
#else
cache_size = BLOOM_BUCKET_SIZE_FALLBACK;
#endif
}
bloom->buckets = (bloom->bytes / cache_size);
bloom->bucket_bytes = cache_size;
// make sure bloom buffer bytes and bucket_bytes are even
int not_even_by = (bloom->bytes % bloom->bucket_bytes);
if (not_even_by) {
// adjust bytes
bloom->bytes += (bloom->bucket_bytes - not_even_by);
assert((bloom->bytes % bloom->bucket_bytes) == 0); // Should get even
// adjust bits
bloom->bits = bloom->bytes * 8;
// adjust bits per element
bloom->bpe = bloom->bits*1. / bloom->entries;
// adjust buckets
bloom->buckets++;
}
bloom->bucket_bytes_exponent = __builtin_ctz(cache_size);
bloom->bucket_bits_fast_mod_operand = (cache_size * 8 - 1);
}
// TODO
int bloom_filter_size(int entries, double error)
{
int bytes;
double num;
double denom;
double bpe;
int bits;
unsigned bucket_bytes;
int not_even_by;
num = log(error);
denom = 0.480453013918201; // ln(2)^2
bpe = -(num / denom);
bits = (int)(((double)entries) * bpe);
if (bits % 8) {
bytes = (bits / 8) + 1;
}
else {
bytes = bits / 8;
}
bucket_bytes = BLOOM_BUCKET_SIZE_FALLBACK;
not_even_by = bytes % bucket_bytes;
if (not_even_by) {
// adjust bytes
bytes += (bucket_bytes - not_even_by);
}
return bytes;
}
int bloom_init_size(struct bloom * bloom, int entries, double error,
unsigned int cache_size)
{
bloom->ready = 0;
if (entries < 1 || error == 0) {
return 1;
}
bloom->entries = entries;
bloom->error = error;
double num = log(bloom->error);
double denom = 0.480453013918201; // ln(2)^2
bloom->bpe = -(num / denom);
double dentries = (double)entries;
bloom->bits = (int)(dentries * bloom->bpe);
if (bloom->bits % 8) {
bloom->bytes = (bloom->bits / 8) + 1;
} else {
bloom->bytes = bloom->bits / 8;
}
bloom->hashes = (int)ceil(0.693147180559945 * bloom->bpe); // ln(2)
setup_buckets(bloom, cache_size);
// TODO comment
memset(bloom->bf, 0, bloom->bytes);
//bloom->bf = (unsigned char *)calloc(bloom->bytes, sizeof(unsigned char));
//if (bloom->bf == NULL) {
// return 1;
//}
bloom->ready = 1;
return 0;
}
int bloom_init(struct bloom * bloom, int entries) //, double error)
{
return bloom_init_size(bloom, entries, BLOOM_FILTER_ERROR_RATE, 0);
}
int bloom_check(struct bloom * bloom, const void * buffer, int len)
{
return bloom_check_add(bloom, buffer, len, 0);
}
int bloom_add(struct bloom * bloom, const void * buffer, int len)
{
return bloom_check_add(bloom, buffer, len, 1);
}
void bloom_print(struct bloom * bloom)
{
(void)printf("bloom at %p\n", (void *)bloom);
(void)printf(" ->entries = %d\n", bloom->entries);
(void)printf(" ->error = %f\n", bloom->error);
(void)printf(" ->bits = %d\n", bloom->bits);
(void)printf(" ->bits per elem = %f\n", bloom->bpe);
(void)printf(" ->bytes = %d\n", bloom->bytes);
(void)printf(" ->buckets = %u\n", bloom->buckets);
(void)printf(" ->bucket_bytes = %u\n", bloom->bucket_bytes);
(void)printf(" ->bucket_bytes_exponent = %u\n",
bloom->bucket_bytes_exponent);
(void)printf(" ->bucket_bits_fast_mod_operand = 0%o\n",
bloom->bucket_bits_fast_mod_operand);
(void)printf(" ->hash functions = %d\n", bloom->hashes);
}
void bloom_free(struct bloom * bloom)
{
if (bloom->ready) {
free(bloom->bf);
}
bloom->ready = 0;
}
const char * bloom_version()
{
return MAKESTRING(BLOOM_VERSION);
}

199
src/bloom.h Executable file
View File

@ -0,0 +1,199 @@
/*
* Copyright (c) 2012-2015, Jyri J. Virkki
* All rights reserved.
*
* This file is under BSD license. See LICENSE file.
*/
#ifndef _BLOOM_H
#define _BLOOM_H
// TODO
#define BLOOM_FILTER_ERROR_RATE (0.001)
/** ***************************************************************************
* On Linux, the code attempts to compute a bucket size based on CPU cache
* size info, if available. If that fails for any reason, this fallback size
* is used instead.
*
* On non-Linux systems, this is the bucket size always used unless the
* caller overrides it (see bloom_init_size()).
*
*/
#define BLOOM_BUCKET_SIZE_FALLBACK (32 * 1024)
/** ***************************************************************************
* It was found that using multiplier x0.5 for CPU L1 cache size is
* more effective in terms of CPU usage and, surprisingly, collisions
* number.
*
* Feel free to tune this constant the way it will work for you.
*
*/
#define BLOOM_L1_CACHE_SIZE_DIV 1
/** ***************************************************************************
* Structure to keep track of one bloom filter. Caller needs to
* allocate this and pass it to the functions below. First call for
* every struct must be to bloom_init().
*
*/
struct bloom
{
// These fields are part of the public interface of this structure.
// Client code may read these values if desired. Client code MUST NOT
// modify any of these.
int entries;
double error;
int bits;
int bytes;
int hashes;
// Fields below are private to the implementation. These may go away or
// change incompatibly at any moment. Client code MUST NOT access or rely
// on these.
unsigned buckets;
unsigned bucket_bytes;
// x86 CPU divide by/multiply by operation optimization helpers
unsigned bucket_bytes_exponent;
unsigned bucket_bits_fast_mod_operand;
double bpe;
int ready;
unsigned char bf[];
};
typedef struct bloom bloom_t;
// TODO
int bloom_filter_size(int entries, double error);
/** ***************************************************************************
* Initialize the bloom filter for use.
*
* The filter is initialized with a bit field and number of hash functions
* according to the computations from the wikipedia entry:
* http://en.wikipedia.org/wiki/Bloom_filter
*
* Optimal number of bits is:
* bits = (entries * ln(error)) / ln(2)^2
*
* Optimal number of hash functions is:
* hashes = bpe * ln(2)
*
* Parameters:
* -----------
* bloom - Pointer to an allocated struct bloom (see above).
* entries - The expected number of entries which will be inserted.
* error - Probability of collision (as long as entries are not
* exceeded).
*
* Return:
* -------
* 0 - on success
* 1 - on failure
*
*/
int bloom_init(struct bloom * bloom, int entries); //, double error);
/** ***************************************************************************
* Initialize the bloom filter for use.
*
* See comments above for general information.
*
* This is the same as bloom_init() but allows the caller to pass in a
* cache_size to override the internal value (which is either computed
* or the default of BLOOM_BUCKET_SIZE_FALLBACK). Mostly useful for
* experimenting.
*
* See misc/bucketsize for a script which can help identify a good value
* for cache_size.
*
*/
int bloom_init_size(struct bloom * bloom, int entries, double error,
unsigned int cache_size);
/** ***************************************************************************
* Check if the given element is in the bloom filter. Remember this may
* return false positive if a collision occured.
*
* Parameters:
* -----------
* bloom - Pointer to an allocated struct bloom (see above).
* buffer - Pointer to buffer containing element to check.
* len - Size of 'buffer'.
*
* Return:
* -------
* 0 - element is not present
* 1 - element is present (or false positive due to collision)
* -1 - bloom not initialized
*
*/
int bloom_check(struct bloom * bloom, const void * buffer, int len);
/** ***************************************************************************
* Add the given element to the bloom filter.
* The return code indicates if the element (or a collision) was already in,
* so for the common check+add use case, no need to call check separately.
*
* Parameters:
* -----------
* bloom - Pointer to an allocated struct bloom (see above).
* buffer - Pointer to buffer containing element to add.
* len - Size of 'buffer'.
*
* Return:
* -------
* 0 - element was not present and was added
* 1 - element (or a collision) had already been added previously
* -1 - bloom not initialized
*
*/
int bloom_add(struct bloom * bloom, const void * buffer, int len);
/** ***************************************************************************
* Print (to stdout) info about this bloom filter. Debugging aid.
*
*/
void bloom_print(struct bloom * bloom);
/** ***************************************************************************
* Deallocate internal storage.
*
* Upon return, the bloom struct is no longer usable. You may call bloom_init
* again on the same struct to reinitialize it again.
*
* Parameters:
* -----------
* bloom - Pointer to an allocated struct bloom (see above).
*
* Return: none
*
*/
void bloom_free(struct bloom * bloom);
/** ***************************************************************************
* Returns version string compiled into library.
*
* Return: version string
*
*/
const char * bloom_version();
#endif

80
src/char_str_indexer.c Normal file
View File

@ -0,0 +1,80 @@
/****************************************************************************
* Character string indexing functions *
****************************************************************************/
/**
* @file char_str_indexer.c
* @author Celine Mercier
* @date April 12th 2016
* @brief Functions handling the indexing and retrieval of character strings.
*/
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include "obiblob.h"
#include "obiblob_indexer.h"
#include "obidebug.h"
#include "obitypes.h"
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
Obi_blob_p obi_str_to_blob(const char* value)
{
Obi_blob_p value_b;
int32_t length;
// Compute the number of bytes on which the value will be encoded
length = strlen(value) + 1; // +1 to store \0 at the end (makes retrieving faster)
value_b = obi_blob((byte_t*)value, ELEMENT_SIZE_STR, length, length);
if (value_b == NULL)
{
obidebug(1, "\nError encoding a character string in a blob");
return NULL;
}
return value_b;
}
char* obi_blob_to_str(Obi_blob_p value_b)
{
return value_b->value;
}
index_t obi_index_char_str(Obi_indexer_p indexer, const char* value)
{
Obi_blob_p value_b;
index_t idx;
// Encode value
value_b = obi_str_to_blob(value);
if (value_b == NULL)
return -1;
// Add in the indexer
idx = obi_indexer_add(indexer, value_b);
free(value_b);
return idx;
}
char* obi_retrieve_char_str(Obi_indexer_p indexer, index_t idx)
{
Obi_blob_p value_b;
// Get encoded value
value_b = obi_indexer_get(indexer, idx);
// Return decoded character string
return obi_blob_to_str(value_b);
}

61
src/char_str_indexer.h Normal file
View File

@ -0,0 +1,61 @@
/****************************************************************************
* DNA sequence indexer header file *
****************************************************************************/
/**
* @file dna_seq_indexer.h
* @author Celine Mercier
* @date April 12th 2016
* @brief Header file for the functions handling the indexing of DNA sequences.
*/
#ifndef CHAR_STR_INDEXER_H_
#define CHAR_STR_INDEXER_H_
#include <stdlib.h>
#include <stdio.h>
#include "obitypes.h"
#include "obiblob.h"
#include "obiblob_indexer.h"
/**
* @brief Converts a character string to a blob.
*
* @warning The blob must be freed by the caller.
*
* @param value The character string to convert.
*
* @returns A pointer to the blob created.
* @retval NULL if an error occurred.
*
* @since October 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
Obi_blob_p obi_str_to_blob(char* value);
/**
* @brief Converts a blob to a character string.
*
* @param value_b The blob to convert.
*
* @returns A pointer to the character string contained in the blob.
*
* @since October 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
const char* obi_blob_to_str(Obi_blob_p value_b);
// TODO doc
index_t obi_index_char_str(Obi_indexer_p indexer, const char* value);
char* obi_retrieve_char_str(Obi_indexer_p indexer, index_t idx);
#endif /* CHAR_STR_INDEXER_H_ */

198
src/crc64.c Normal file
View File

@ -0,0 +1,198 @@
/* Redis uses the CRC64 variant with "Jones" coefficients and init value of 0.
*
* Specification of this CRC64 variant follows:
* Name: crc-64-jones
* Width: 64 bites
* Poly: 0xad93d23594c935a9
* Reflected In: True
* Xor_In: 0xffffffffffffffff
* Reflected_Out: True
* Xor_Out: 0x0
* Check("123456789"): 0xe9c6d914c4b8d9ca
*
* Copyright (c) 2012, Salvatore Sanfilippo <antirez at gmail dot com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Redis nor the names of its contributors may be used
* to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE. */
#include <stdint.h>
static const uint64_t crc64_tab[256] = {
UINT64_C(0x0000000000000000), UINT64_C(0x7ad870c830358979),
UINT64_C(0xf5b0e190606b12f2), UINT64_C(0x8f689158505e9b8b),
UINT64_C(0xc038e5739841b68f), UINT64_C(0xbae095bba8743ff6),
UINT64_C(0x358804e3f82aa47d), UINT64_C(0x4f50742bc81f2d04),
UINT64_C(0xab28ecb46814fe75), UINT64_C(0xd1f09c7c5821770c),
UINT64_C(0x5e980d24087fec87), UINT64_C(0x24407dec384a65fe),
UINT64_C(0x6b1009c7f05548fa), UINT64_C(0x11c8790fc060c183),
UINT64_C(0x9ea0e857903e5a08), UINT64_C(0xe478989fa00bd371),
UINT64_C(0x7d08ff3b88be6f81), UINT64_C(0x07d08ff3b88be6f8),
UINT64_C(0x88b81eabe8d57d73), UINT64_C(0xf2606e63d8e0f40a),
UINT64_C(0xbd301a4810ffd90e), UINT64_C(0xc7e86a8020ca5077),
UINT64_C(0x4880fbd87094cbfc), UINT64_C(0x32588b1040a14285),
UINT64_C(0xd620138fe0aa91f4), UINT64_C(0xacf86347d09f188d),
UINT64_C(0x2390f21f80c18306), UINT64_C(0x594882d7b0f40a7f),
UINT64_C(0x1618f6fc78eb277b), UINT64_C(0x6cc0863448deae02),
UINT64_C(0xe3a8176c18803589), UINT64_C(0x997067a428b5bcf0),
UINT64_C(0xfa11fe77117cdf02), UINT64_C(0x80c98ebf2149567b),
UINT64_C(0x0fa11fe77117cdf0), UINT64_C(0x75796f2f41224489),
UINT64_C(0x3a291b04893d698d), UINT64_C(0x40f16bccb908e0f4),
UINT64_C(0xcf99fa94e9567b7f), UINT64_C(0xb5418a5cd963f206),
UINT64_C(0x513912c379682177), UINT64_C(0x2be1620b495da80e),
UINT64_C(0xa489f35319033385), UINT64_C(0xde51839b2936bafc),
UINT64_C(0x9101f7b0e12997f8), UINT64_C(0xebd98778d11c1e81),
UINT64_C(0x64b116208142850a), UINT64_C(0x1e6966e8b1770c73),
UINT64_C(0x8719014c99c2b083), UINT64_C(0xfdc17184a9f739fa),
UINT64_C(0x72a9e0dcf9a9a271), UINT64_C(0x08719014c99c2b08),
UINT64_C(0x4721e43f0183060c), UINT64_C(0x3df994f731b68f75),
UINT64_C(0xb29105af61e814fe), UINT64_C(0xc849756751dd9d87),
UINT64_C(0x2c31edf8f1d64ef6), UINT64_C(0x56e99d30c1e3c78f),
UINT64_C(0xd9810c6891bd5c04), UINT64_C(0xa3597ca0a188d57d),
UINT64_C(0xec09088b6997f879), UINT64_C(0x96d1784359a27100),
UINT64_C(0x19b9e91b09fcea8b), UINT64_C(0x636199d339c963f2),
UINT64_C(0xdf7adabd7a6e2d6f), UINT64_C(0xa5a2aa754a5ba416),
UINT64_C(0x2aca3b2d1a053f9d), UINT64_C(0x50124be52a30b6e4),
UINT64_C(0x1f423fcee22f9be0), UINT64_C(0x659a4f06d21a1299),
UINT64_C(0xeaf2de5e82448912), UINT64_C(0x902aae96b271006b),
UINT64_C(0x74523609127ad31a), UINT64_C(0x0e8a46c1224f5a63),
UINT64_C(0x81e2d7997211c1e8), UINT64_C(0xfb3aa75142244891),
UINT64_C(0xb46ad37a8a3b6595), UINT64_C(0xceb2a3b2ba0eecec),
UINT64_C(0x41da32eaea507767), UINT64_C(0x3b024222da65fe1e),
UINT64_C(0xa2722586f2d042ee), UINT64_C(0xd8aa554ec2e5cb97),
UINT64_C(0x57c2c41692bb501c), UINT64_C(0x2d1ab4dea28ed965),
UINT64_C(0x624ac0f56a91f461), UINT64_C(0x1892b03d5aa47d18),
UINT64_C(0x97fa21650afae693), UINT64_C(0xed2251ad3acf6fea),
UINT64_C(0x095ac9329ac4bc9b), UINT64_C(0x7382b9faaaf135e2),
UINT64_C(0xfcea28a2faafae69), UINT64_C(0x8632586aca9a2710),
UINT64_C(0xc9622c4102850a14), UINT64_C(0xb3ba5c8932b0836d),
UINT64_C(0x3cd2cdd162ee18e6), UINT64_C(0x460abd1952db919f),
UINT64_C(0x256b24ca6b12f26d), UINT64_C(0x5fb354025b277b14),
UINT64_C(0xd0dbc55a0b79e09f), UINT64_C(0xaa03b5923b4c69e6),
UINT64_C(0xe553c1b9f35344e2), UINT64_C(0x9f8bb171c366cd9b),
UINT64_C(0x10e3202993385610), UINT64_C(0x6a3b50e1a30ddf69),
UINT64_C(0x8e43c87e03060c18), UINT64_C(0xf49bb8b633338561),
UINT64_C(0x7bf329ee636d1eea), UINT64_C(0x012b592653589793),
UINT64_C(0x4e7b2d0d9b47ba97), UINT64_C(0x34a35dc5ab7233ee),
UINT64_C(0xbbcbcc9dfb2ca865), UINT64_C(0xc113bc55cb19211c),
UINT64_C(0x5863dbf1e3ac9dec), UINT64_C(0x22bbab39d3991495),
UINT64_C(0xadd33a6183c78f1e), UINT64_C(0xd70b4aa9b3f20667),
UINT64_C(0x985b3e827bed2b63), UINT64_C(0xe2834e4a4bd8a21a),
UINT64_C(0x6debdf121b863991), UINT64_C(0x1733afda2bb3b0e8),
UINT64_C(0xf34b37458bb86399), UINT64_C(0x8993478dbb8deae0),
UINT64_C(0x06fbd6d5ebd3716b), UINT64_C(0x7c23a61ddbe6f812),
UINT64_C(0x3373d23613f9d516), UINT64_C(0x49aba2fe23cc5c6f),
UINT64_C(0xc6c333a67392c7e4), UINT64_C(0xbc1b436e43a74e9d),
UINT64_C(0x95ac9329ac4bc9b5), UINT64_C(0xef74e3e19c7e40cc),
UINT64_C(0x601c72b9cc20db47), UINT64_C(0x1ac40271fc15523e),
UINT64_C(0x5594765a340a7f3a), UINT64_C(0x2f4c0692043ff643),
UINT64_C(0xa02497ca54616dc8), UINT64_C(0xdafce7026454e4b1),
UINT64_C(0x3e847f9dc45f37c0), UINT64_C(0x445c0f55f46abeb9),
UINT64_C(0xcb349e0da4342532), UINT64_C(0xb1eceec59401ac4b),
UINT64_C(0xfebc9aee5c1e814f), UINT64_C(0x8464ea266c2b0836),
UINT64_C(0x0b0c7b7e3c7593bd), UINT64_C(0x71d40bb60c401ac4),
UINT64_C(0xe8a46c1224f5a634), UINT64_C(0x927c1cda14c02f4d),
UINT64_C(0x1d148d82449eb4c6), UINT64_C(0x67ccfd4a74ab3dbf),
UINT64_C(0x289c8961bcb410bb), UINT64_C(0x5244f9a98c8199c2),
UINT64_C(0xdd2c68f1dcdf0249), UINT64_C(0xa7f41839ecea8b30),
UINT64_C(0x438c80a64ce15841), UINT64_C(0x3954f06e7cd4d138),
UINT64_C(0xb63c61362c8a4ab3), UINT64_C(0xcce411fe1cbfc3ca),
UINT64_C(0x83b465d5d4a0eece), UINT64_C(0xf96c151de49567b7),
UINT64_C(0x76048445b4cbfc3c), UINT64_C(0x0cdcf48d84fe7545),
UINT64_C(0x6fbd6d5ebd3716b7), UINT64_C(0x15651d968d029fce),
UINT64_C(0x9a0d8ccedd5c0445), UINT64_C(0xe0d5fc06ed698d3c),
UINT64_C(0xaf85882d2576a038), UINT64_C(0xd55df8e515432941),
UINT64_C(0x5a3569bd451db2ca), UINT64_C(0x20ed197575283bb3),
UINT64_C(0xc49581ead523e8c2), UINT64_C(0xbe4df122e51661bb),
UINT64_C(0x3125607ab548fa30), UINT64_C(0x4bfd10b2857d7349),
UINT64_C(0x04ad64994d625e4d), UINT64_C(0x7e7514517d57d734),
UINT64_C(0xf11d85092d094cbf), UINT64_C(0x8bc5f5c11d3cc5c6),
UINT64_C(0x12b5926535897936), UINT64_C(0x686de2ad05bcf04f),
UINT64_C(0xe70573f555e26bc4), UINT64_C(0x9ddd033d65d7e2bd),
UINT64_C(0xd28d7716adc8cfb9), UINT64_C(0xa85507de9dfd46c0),
UINT64_C(0x273d9686cda3dd4b), UINT64_C(0x5de5e64efd965432),
UINT64_C(0xb99d7ed15d9d8743), UINT64_C(0xc3450e196da80e3a),
UINT64_C(0x4c2d9f413df695b1), UINT64_C(0x36f5ef890dc31cc8),
UINT64_C(0x79a59ba2c5dc31cc), UINT64_C(0x037deb6af5e9b8b5),
UINT64_C(0x8c157a32a5b7233e), UINT64_C(0xf6cd0afa9582aa47),
UINT64_C(0x4ad64994d625e4da), UINT64_C(0x300e395ce6106da3),
UINT64_C(0xbf66a804b64ef628), UINT64_C(0xc5bed8cc867b7f51),
UINT64_C(0x8aeeace74e645255), UINT64_C(0xf036dc2f7e51db2c),
UINT64_C(0x7f5e4d772e0f40a7), UINT64_C(0x05863dbf1e3ac9de),
UINT64_C(0xe1fea520be311aaf), UINT64_C(0x9b26d5e88e0493d6),
UINT64_C(0x144e44b0de5a085d), UINT64_C(0x6e963478ee6f8124),
UINT64_C(0x21c640532670ac20), UINT64_C(0x5b1e309b16452559),
UINT64_C(0xd476a1c3461bbed2), UINT64_C(0xaeaed10b762e37ab),
UINT64_C(0x37deb6af5e9b8b5b), UINT64_C(0x4d06c6676eae0222),
UINT64_C(0xc26e573f3ef099a9), UINT64_C(0xb8b627f70ec510d0),
UINT64_C(0xf7e653dcc6da3dd4), UINT64_C(0x8d3e2314f6efb4ad),
UINT64_C(0x0256b24ca6b12f26), UINT64_C(0x788ec2849684a65f),
UINT64_C(0x9cf65a1b368f752e), UINT64_C(0xe62e2ad306bafc57),
UINT64_C(0x6946bb8b56e467dc), UINT64_C(0x139ecb4366d1eea5),
UINT64_C(0x5ccebf68aecec3a1), UINT64_C(0x2616cfa09efb4ad8),
UINT64_C(0xa97e5ef8cea5d153), UINT64_C(0xd3a62e30fe90582a),
UINT64_C(0xb0c7b7e3c7593bd8), UINT64_C(0xca1fc72bf76cb2a1),
UINT64_C(0x45775673a732292a), UINT64_C(0x3faf26bb9707a053),
UINT64_C(0x70ff52905f188d57), UINT64_C(0x0a2722586f2d042e),
UINT64_C(0x854fb3003f739fa5), UINT64_C(0xff97c3c80f4616dc),
UINT64_C(0x1bef5b57af4dc5ad), UINT64_C(0x61372b9f9f784cd4),
UINT64_C(0xee5fbac7cf26d75f), UINT64_C(0x9487ca0fff135e26),
UINT64_C(0xdbd7be24370c7322), UINT64_C(0xa10fceec0739fa5b),
UINT64_C(0x2e675fb4576761d0), UINT64_C(0x54bf2f7c6752e8a9),
UINT64_C(0xcdcf48d84fe75459), UINT64_C(0xb71738107fd2dd20),
UINT64_C(0x387fa9482f8c46ab), UINT64_C(0x42a7d9801fb9cfd2),
UINT64_C(0x0df7adabd7a6e2d6), UINT64_C(0x772fdd63e7936baf),
UINT64_C(0xf8474c3bb7cdf024), UINT64_C(0x829f3cf387f8795d),
UINT64_C(0x66e7a46c27f3aa2c), UINT64_C(0x1c3fd4a417c62355),
UINT64_C(0x935745fc4798b8de), UINT64_C(0xe98f353477ad31a7),
UINT64_C(0xa6df411fbfb21ca3), UINT64_C(0xdc0731d78f8795da),
UINT64_C(0x536fa08fdfd90e51), UINT64_C(0x29b7d047efec8728),
};
uint64_t crc64(const char* s, uint64_t l)
{
uint64_t j;
uint64_t crc = 0;
for (j = 0; j < l; j++)
{
uint8_t byte = s[j];
crc = crc64_tab[(uint8_t)crc ^ byte] ^ (crc >> 8);
}
return crc;
}
/* Test main */
//#ifdef TEST_MAIN
//#include <stdio.h>
//int main(void) {
// printf("e9c6d914c4b8d9ca == %016llx\n",
// (unsigned long long) crc64(0,(unsigned char*)"123456789",9));
// return 0;
//}
//#endif

9
src/crc64.h Normal file
View File

@ -0,0 +1,9 @@
/**
* @file crc64.h
* @date March 24th 2016
* @brief Header file for CRC64 function.
*/
#include <stdint.h>
uint64_t crc64(const char* s, uint64_t l);

102
src/dna_seq_indexer.c Normal file
View File

@ -0,0 +1,102 @@
/****************************************************************************
* DNA sequence indexing functions *
****************************************************************************/
/**
* @file dna_seq_indexer.c
* @author Celine Mercier
* @date April 12th 2016
* @brief Functions handling the indexing and retrieval of DNA sequences.
*/
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include "obiblob.h"
#include "obiblob_indexer.h"
#include "obidebug.h"
#include "obitypes.h"
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
Obi_blob_p obi_seq_to_blob(const char* seq)
{
Obi_blob_p value_b;
int32_t length_encoded_seq; // length of the encoded sequence in bytes
int32_t seq_length;
byte_t* encoded_seq;
seq_length = strlen(seq);
// Check if just ATGC and encode accordingly
if (only_ATGC(seq))
{
// Compute the length (in bytes) of the encoded sequence
length_encoded_seq = ceil((double) seq_length / (double) 4.0);
// Encode
encoded_seq = encode_seq_on_2_bits(seq, seq_length);
if (encoded_seq == NULL)
return NULL;
value_b = obi_blob(encoded_seq, ELEMENT_SIZE_SEQ_2, length_encoded_seq, seq_length);
}
else
{
// Compute the length (in bytes) of the encoded sequence
length_encoded_seq = ceil((double) seq_length / (double) 2.0);
// Encode
encoded_seq = encode_seq_on_4_bits(seq, seq_length);
if (encoded_seq == NULL)
return NULL;
value_b = obi_blob(encoded_seq, ELEMENT_SIZE_SEQ_4, length_encoded_seq, seq_length);
}
free(encoded_seq);
return value_b;
}
char* obi_blob_to_seq(Obi_blob_p value_b)
{
// Decode
if (value_b->element_size == 2)
return decode_seq_on_2_bits(value_b->value, value_b->length_decoded_value);
else
return decode_seq_on_4_bits(value_b->value, value_b->length_decoded_value);
}
index_t obi_index_dna_seq(Obi_indexer_p indexer, const char* value)
{
Obi_blob_p value_b;
index_t idx;
// Encode value
value_b = obi_seq_to_blob(value);
if (value_b == NULL)
return -1;
// Add in the indexer
idx = obi_indexer_add(indexer, value_b);
free(value_b);
return idx;
}
char* obi_retrieve_dna_seq(Obi_indexer_p indexer, index_t idx)
{
Obi_blob_p value_b;
// Get encoded value
value_b = obi_indexer_get(indexer, idx);
// Return decoded sequence
return obi_blob_to_seq(value_b);
}

63
src/dna_seq_indexer.h Normal file
View File

@ -0,0 +1,63 @@
/****************************************************************************
* DNA sequence indexer header file *
****************************************************************************/
/**
* @file dna_seq_indexer.h
* @author Celine Mercier
* @date April 12th 2016
* @brief Header file for the functions handling the indexing of DNA sequences.
*/
#ifndef DNA_SEQ_INDEXER_H_
#define DNA_SEQ_INDEXER_H_
#include <stdlib.h>
#include <stdio.h>
#include "obidms.h"
#include "obitypes.h"
#include "obiblob.h"
#include "obiblob_indexer.h"
/**
* @brief Converts a DNA sequence to a blob.
*
* @warning The blob must be freed by the caller.
*
* @param value The DNA sequence to convert.
*
* @returns A pointer to the blob created.
* @retval NULL if an error occurred.
*
* @since November 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
Obi_blob_p obi_seq_to_blob(const char* seq);
/**
* @brief Converts a blob to a DNA sequence.
*
* @param value_b The blob to convert.
*
* @returns A pointer to the DNA sequence contained in the blob.
* @retval NULL if an error occurred.
*
* @since November 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
char* obi_blob_to_seq(Obi_blob_p value_b);
// TODO doc
index_t obi_index_dna_seq(Obi_indexer_p indexer, const char* value);
char* obi_retrieve_dna_seq(Obi_indexer_p indexer, index_t idx);
#endif /* DNA_SEQ_INDEXER_H_ */

373
src/encode.c Normal file
View File

@ -0,0 +1,373 @@
/****************************************************************************
* Encoding functions *
****************************************************************************/
/**
* @file encode.c
* @author Celine Mercier
* @date November 18th 2015
* @brief Functions encoding DNA sequences.
*/
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <math.h>
#include "encode.h"
#include "obierrno.h"
#include "obitypes.h" // For byte_t type
#include "obidebug.h"
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
// TODO: endianness problem?
bool only_ATGC(const char* seq)
{
const char* c = seq;
while (*c)
{
if (!((*c == 'A') || \
(*c == 'T') || \
(*c == 'G') || \
(*c == 'C') || \
(*c == 'a') || \
(*c == 't') || \
(*c == 'g') || \
(*c == 'c')))
{
return 0;
}
else
{
c++;
}
}
return 1;
}
byte_t* encode_seq_on_2_bits(const char* seq, int32_t length)
{
byte_t* seq_b;
uint8_t modulo;
int32_t length_b;
int32_t i;
length_b = ceil((double) length / (double) 4.0);
seq_b = (byte_t*) malloc(length_b * sizeof(byte_t));
if (seq_b == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR); // TODO
obidebug(1, "\nError allocating memory for an encoded DNA sequence");
return NULL;
}
// Initialize all the bits to 0
memset(seq_b, 0, length_b);
for (i=0; i<length; i++)
{
// Shift of 2 to make place for new nucleotide
seq_b[i/4] <<= 2;
// Add new nucleotide
switch (seq[i])
{
case 'a':
case 'A':
seq_b[i/4] |= NUC_A_2b;
break;
case 'c':
case 'C':
seq_b[i/4] |= NUC_C_2b;
break;
case 'g':
case 'G':
seq_b[i/4] |= NUC_G_2b;
break;
case 't':
case 'T':
seq_b[i/4] |= NUC_T_2b;
break;
default:
obi_set_errno(OBI_ENCODE_ERROR); // TODO
obidebug(1, "\nInvalid nucleotide base when encoding (not [atgcATGC])");
return NULL;
}
}
// Final shift for the last byte if needed
modulo = (length % 4);
if (modulo)
seq_b[(i-1)/4] <<= (2*(4 - modulo));
return seq_b;
}
char* decode_seq_on_2_bits(byte_t* seq_b, int32_t length_seq)
{
char* seq;
int32_t i;
uint8_t shift;
uint8_t mask;
uint8_t nuc;
seq = (char*) malloc((length_seq+1) * sizeof(char));
if (seq == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR); // TODO
obidebug(1, "\nError allocating memory for a decoded DNA sequence");
return NULL;
}
for (i=0; i<length_seq; i++)
{
shift = 6 - 2*(i % 4);
mask = NUC_MASK_2B << shift;
nuc = (seq_b[i/4] & mask) >> shift;
switch (nuc)
{
case NUC_A_2b:
seq[i] = 'a';
break;
case NUC_C_2b:
seq[i] = 'c';
break;
case NUC_G_2b:
seq[i] = 'g';
break;
case NUC_T_2b:
seq[i] = 't';
break;
default:
obi_set_errno(OBI_DECODE_ERROR); // TODO
obidebug(1, "\nInvalid nucleotide base when decoding");
return NULL;
}
}
seq[length_seq] = '\0';
return seq;
}
byte_t* encode_seq_on_4_bits(const char* seq, int32_t length)
{
byte_t* seq_b;
uint8_t modulo;
int32_t length_b;
int32_t i;
length_b = ceil((double) length / (double) 2.0);
seq_b = (byte_t*) malloc(length_b * sizeof(byte_t));
if (seq_b == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR); // TODO
obidebug(1, "\nError allocating memory for an encoded DNA sequence");
return NULL;
}
// Initialize all the bits to 0
memset(seq_b, 0, length_b);
for (i=0; i<length; i++)
{
// Shift of 4 to make place for new nucleotide
seq_b[i/2] <<= 4;
// Add new nucleotide
switch (seq[i])
{
case 'a':
case 'A':
seq_b[i/2] |= NUC_A_4b;
break;
case 'c':
case 'C':
seq_b[i/2] |= NUC_C_4b;
break;
case 'g':
case 'G':
seq_b[i/2] |= NUC_G_4b;
break;
case 't':
case 'T':
seq_b[i/2] |= NUC_T_4b;
break;
case 'r':
case 'R':
seq_b[i/2] |= NUC_R_4b;
break;
case 'y':
case 'Y':
seq_b[i/2] |= NUC_Y_4b;
break;
case 's':
case 'S':
seq_b[i/2] |= NUC_S_4b;
break;
case 'w':
case 'W':
seq_b[i/2] |= NUC_W_4b;
break;
case 'k':
case 'K':
seq_b[i/2] |= NUC_K_4b;
break;
case 'm':
case 'M':
seq_b[i/2] |= NUC_M_4b;
break;
case 'b':
case 'B':
seq_b[i/2] |= NUC_B_4b;
break;
case 'd':
case 'D':
seq_b[i/2] |= NUC_D_4b;
break;
case 'h':
case 'H':
seq_b[i/2] |= NUC_H_4b;
break;
case 'v':
case 'V':
seq_b[i/2] |= NUC_V_4b;
break;
case 'n':
case 'N':
seq_b[i/2] |= NUC_N_4b;
break;
default:
obi_set_errno(OBI_ENCODE_ERROR); // TODO
obidebug(1, "\nInvalid nucleotide base when encoding (not IUPAC)");
return NULL;
}
}
// Final shift for the last byte if needed
modulo = (length % 2);
if (modulo)
seq_b[(i-1)/2] <<= (4*modulo);
return seq_b;
}
char* decode_seq_on_4_bits(byte_t* seq_b, int32_t length_seq)
{
char* seq;
int32_t i;
uint8_t shift;
uint8_t mask;
uint8_t nuc;
seq = (char*) malloc((length_seq+1) * sizeof(char));
if (seq == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR); // TODO
obidebug(1, "\nError allocating memory for a decoded DNA sequence");
return NULL;
}
for (i=0; i<length_seq; i++)
{
shift = 4 - 4*(i % 2);
mask = NUC_MASK_4B << shift;
nuc = (seq_b[i/2] & mask) >> shift;
switch (nuc)
{
case NUC_A_4b:
seq[i] = 'a';
break;
case NUC_C_4b:
seq[i] = 'c';
break;
case NUC_G_4b:
seq[i] = 'g';
break;
case NUC_T_4b:
seq[i] = 't';
break;
case NUC_R_4b:
seq[i] = 'r';
break;
case NUC_Y_4b:
seq[i] = 'y';
break;
case NUC_S_4b:
seq[i] = 's';
break;
case NUC_W_4b:
seq[i] = 'w';
break;
case NUC_K_4b:
seq[i] = 'k';
break;
case NUC_M_4b:
seq[i] = 'm';
break;
case NUC_B_4b:
seq[i] = 'b';
break;
case NUC_D_4b:
seq[i] = 'd';
break;
case NUC_H_4b:
seq[i] = 'h';
break;
case NUC_V_4b:
seq[i] = 'v';
break;
case NUC_N_4b:
seq[i] = 'n';
break;
default:
obi_set_errno(OBI_DECODE_ERROR); // TODO
obidebug(1, "\nInvalid nucleotide base when decoding");
return NULL;
}
}
seq[length_seq] = '\0';
return seq;
}
///////////////////// FOR DEBUGGING ///////////////////////////
//NOTE: The first byte is printed the first (at the left-most).
void print_bits(void* ptr, int32_t size)
{
uint8_t* b = (uint8_t*) ptr;
uint8_t byte;
int32_t i, j;
fprintf(stderr, "\n");
for (i=0;i<size;i++)
{
for (j=7;j>=0;j--)
{
byte = b[i] & (1<<j);
byte >>= j;
fprintf(stderr, "%u", byte);
}
fprintf(stderr, " ");
}
fprintf(stderr, "\n");
}

190
src/encode.h Normal file
View File

@ -0,0 +1,190 @@
/****************************************************************************
* Encoding header file *
****************************************************************************/
/**
* @file encode.h
* @author Celine Mercier
* @date November 18th 2015
* @brief Header file for encoding DNA sequences.
*/
#ifndef ENCODE_H_
#define ENCODE_H_
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <stdbool.h>
#include "obitypes.h"
#define NUC_MASK_2B 0x3 /**< Binary: 11 to use when decoding 2 bits sequences
*/
#define NUC_MASK_4B 0xF /**< Binary: 1111 to use when decoding 4 bits sequences
*/
/**
* @brief enum for the 2-bits codes for each of the 4 nucleotides.
*/
enum
{
NUC_A_2b = 0x0, /* binary: 00 */
NUC_C_2b = 0x1, /* binary: 01 */
NUC_G_2b = 0x2, /* binary: 10 */
NUC_T_2b = 0x3, /* binary: 11 */
};
/**
* @brief enum for the 4-bits codes for each of the 15 IUPAC nucleotides.
*/
enum
{
NUC_A_4b = 0x1, /* binary: 0001 */
NUC_C_4b = 0x2, /* binary: 0010 */
NUC_G_4b = 0x3, /* binary: 0011 */
NUC_T_4b = 0x4, /* binary: 0100 */
NUC_R_4b = 0x5, /* binary: 0101 */
NUC_Y_4b = 0x6, /* binary: 0110 */
NUC_S_4b = 0x7, /* binary: 0111 */
NUC_W_4b = 0x8, /* binary: 1000 */
NUC_K_4b = 0x9, /* binary: 1001 */
NUC_M_4b = 0xA, /* binary: 1010 */
NUC_B_4b = 0xB, /* binary: 1011 */
NUC_D_4b = 0xC, /* binary: 1100 */
NUC_H_4b = 0xD, /* binary: 1101 */
NUC_V_4b = 0xE, /* binary: 1110 */
NUC_N_4b = 0xF, /* binary: 1111 */
};
/**
* @brief Checks if there are only 'atgcATGC' characters in a
* character string.
*
* @param seq The sequence to check.
*
* @returns A boolean value indicating if there are only
* 'atgcATGC' characters in a character string.
*
* @since November 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
bool only_ATGC(const char* seq);
/**
* @brief Encodes a DNA sequence with each nucleotide coded on 2 bits.
*
* A or a : 00
* C or c : 01
* T or t : 10
* G or g : 11
*
* @warning The DNA sequence must contain only 'atgcATGC' characters.
*
* @param seq The sequence to encode.
* @param length The length of the sequence to encode.
*
* @returns The encoded sequence.
*
* @since November 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
byte_t* encode_seq_on_2_bits(const char* seq, int32_t length);
/**
* @brief Decodes a DNA sequence that is coded with each nucleotide on 2 bits.
*
* 00 -> a
* 01 -> c
* 10 -> t
* 11 -> g
*
* @param seq The sequence to decode.
* @param length_seq The initial length of the sequence before it was encoded.
*
* @returns The decoded sequence ended with '\0'.
*
* @since November 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
char* decode_seq_on_2_bits(byte_t* seq_b, int32_t length_seq);
/**
* @brief Encodes a DNA sequence with each nucleotide coded on 4 bits.
*
* A or a : 0001
* C or c : 0010
* G or g : 0011
* T or t : 0100
* R or r : 0101
* Y or y : 0110
* S or s : 0111
* W or w : 1000
* K or k : 1001
* M or m : 1010
* B or b : 1011
* D or d : 1100
* H or h : 1101
* V or v : 1110
* N or n : 1111
*
* @warning The DNA sequence must contain only IUPAC characters.
*
* @param seq The sequence to encode.
* @param length The length of the sequence to encode.
*
* @returns The encoded sequence.
*
* @since November 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
byte_t* encode_seq_on_4_bits(const char* seq, int32_t length);
/**
* @brief Decodes a DNA sequence that is coded with each nucleotide on 4 bits.
*
* A or a : 0001
* C or c : 0010
* G or g : 0011
* T or t : 0100
* R or r : 0101
* Y or y : 0110
* S or s : 0111
* W or w : 1000
* K or k : 1001
* M or m : 1010
* B or b : 1011
* D or d : 1100
* H or h : 1101
* V or v : 1110
* N or n : 1111
*
* @param seq The sequence to decode.
* @param length_seq The initial length of the sequence before it was encoded.
*
* @returns The decoded sequence ended with '\0'.
*
* @since November 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
char* decode_seq_on_4_bits(byte_t* seq_b, int32_t length_seq);
////////// FOR DEBUGGING ///////////
// little endian
void print_bits(void* ptr, int32_t length);
#endif /* ENCODE_H_ */

7
src/murmurhash2.h Executable file
View File

@ -0,0 +1,7 @@
#ifndef _BLOOM_MURMURHASH2
#define _BLOOM_MURMURHASH2
unsigned int murmurhash2(const void * key, int len, const unsigned int seed);
#endif

2284
src/obiavl.c Normal file

File diff suppressed because it is too large Load Diff

404
src/obiavl.h Normal file
View File

@ -0,0 +1,404 @@
/****************************************************************************
* OBIDMS AVL tree header file *
****************************************************************************/
/**
* @file obiavl.h
* @author Celine Mercier
* @date December 3rd 2015
* @brief Header file for handling AVL trees for storing and retrieving blobs (i.e. coding for character strings).
*/
#ifndef OBIAVL_H_
#define OBIAVL_H_
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <time.h>
#include <sys/types.h>
#include <dirent.h>
#include <stdbool.h>
#include "obidms.h"
#include "obiblob.h"
#include "obitypes.h"
#include "bloom.h"
#include "utils.h"
#include "encode.h"
#define MAX_NB_OF_AVLS_IN_GROUP (100) /**< The maximum number of AVL trees in a group. // TODO discuss
*/
#define MAX_NODE_COUNT_PER_AVL (10000000) /**< The maximum number of nodes in an AVL tree.
* Only used to decide when to create a new AVL in a group, and to initialize the bloom filter // TODO discuss.
*/
#define MAX_DATA_SIZE_PER_AVL (1073741824) /**< The maximum size of the data referred to by an AVL tree in a group.
* Only used to decide when to create a new AVL in a group.
* Should not be greater than int32_t max (2,147,483,647), as indexes will have to be stored on 32 bits.
* Here 1073741824 B = 1 GB
*/
#define AVL_MAX_DEPTH (1024) /**< The maximum depth of an AVL tree. Used to save paths through the tree.
*/
#define AVL_MAX_NAME (1024) /**< The maximum length of an AVL tree name.
*/
#define AVL_GROWTH_FACTOR (2) /**< The growth factor when an AVL tree is enlarged.
*/
#define LEFT_CHILD(node) (avl->tree)+(node->left_child) /**< Pointer to the left child of a node in an AVL tree.
*/
#define RIGHT_CHILD(node) (avl->tree)+(node->right_child) /**< Pointer to the right child of a node in an AVL tree.
*/
/**
* @brief AVL tree node structure.
*/
typedef struct AVL_node {
index_t left_child; /**< Index of left less child node.
*/
index_t right_child; /**< Index of right greater child node.
*/
int8_t balance_factor; /**< Balance factor of the node.
*/
index_t value; /**< Index of the value associated with the node in the data array.
*/
uint64_t crc64; /**< Cyclic Redundancy Check code on 64 bits associated with the value.
*/
} AVL_node_t, *AVL_node_p;
/**
* @brief OBIDMS AVL tree data header structure.
*/
typedef struct OBIDMS_avl_data_header {
int header_size; /**< Size of the header in bytes.
*/
index_t data_size_used; /**< Size of the data used in bytes.
*/
index_t data_size_max; /**< Max size of the data in bytes.
*/
index_t nb_items; /**< Number of items.
*/
char avl_name[AVL_MAX_NAME+1]; /**< The AVL tree name as a NULL terminated string.
*/
time_t creation_date; /**< Date of creation of the file.
*/
} OBIDMS_avl_data_header_t, *OBIDMS_avl_data_header_p;
/**
* @brief OBIDMS AVL tree data structure.
*/
typedef struct OBIDMS_avl_data {
OBIDMS_avl_data_header_p header; /**< A pointer to the header of the AVL tree data.
*/
byte_t* data; /**< A pointer to the beginning of the data.
*/
int data_fd; /**< File descriptor of the file containing the data.
*/
} OBIDMS_avl_data_t, *OBIDMS_avl_data_p;
/**
* @brief OBIDMS AVL tree header structure.
*/
typedef struct OBIDMS_avl_header {
int header_size; /**< Size of the header in bytes.
*/
size_t avl_size; /**< Size of the AVL tree in bytes.
*/
index_t nb_items; /**< Number of items in the AVL tree.
*/
index_t nb_items_max; /**< Maximum number of items in the AVL tree before it has to be enlarged.
*/
index_t root_idx; /**< Index of the root of the AVL tree.
*/
char avl_name[AVL_MAX_NAME+1]; /**< The AVL tree name as a NULL terminated string.
*/
time_t creation_date; /**< Date of creation of the file.
*/
bloom_t bloom_filter; /**< Bloom filter associated with the AVL tree, enabling to know if a value
* might already be stored in the data associated with the tree.
*/
} OBIDMS_avl_header_t, *OBIDMS_avl_header_p;
/**
* @brief OBIDMS AVL tree structure.
* TODO doc
*/
typedef struct OBIDMS_avl {
OBIDMS_p dms; /**< A pointer to the OBIDMS structure to which the AVL tree belongs.
*/
OBIDMS_avl_header_p header; /**< A pointer to the header of the AVL tree.
*/
struct AVL_node* tree; /**< A pointer to the root of the AVL tree.
*/
index_t path_idx[AVL_MAX_DEPTH]; /**< The path taken to a node from the root as an array of node indices.
*/
int8_t path_dir[AVL_MAX_DEPTH]; /**< The path taken to a node from the root as an array of directions
* (0 for left, -1 for right).
*/
OBIDMS_avl_data_p data; /**< A pointer to the structure containing the data
* that the AVL tree references.
*/
DIR* directory; /**< A directory entry usable to
* refer and scan the AVL tree directory.
*/
int dir_fd; /**< The file descriptor of the directory entry
* usable to refer and scan the AVL tree directory.
*/
int avl_fd; /**< The file descriptor of the file containing the AVL tree.
*/
} OBIDMS_avl_t, *OBIDMS_avl_p;
/**
* @brief OBIDMS AVL tree group structure.
*/
typedef struct OBIDMS_avl_group {
OBIDMS_avl_p sub_avls[MAX_NB_OF_AVLS_IN_GROUP]; /**< Array containing the pointers to the AVL trees of the group.
*/
int current_avl_idx; /**< Index in the sub_avls array of the AVL tree currently being filled.
*/
char name[AVL_MAX_NAME+1]; /**< Base name of the AVL group. The AVL trees in it have names of the form basename_idx.
*/
OBIDMS_p dms; /**< Pointer to the OBIDMS structure to which the AVL group belongs.
*/
bool writable; /**< Indicates whether the AVL group is read-only or not.
*/
size_t counter; /**< Indicates by how many threads/programs (TODO) the AVL group is used.
*/
} OBIDMS_avl_group_t, *OBIDMS_avl_group_p;
/**
* @brief Checks if an AVL tree or AVL tree group already exists or not.
*
* @param dms The OBIDMS to which the AVL tree or AVL tree group belongs.
* @param avl_name The name of the AVL treeor the base name of the AVL tree group.
*
* @returns A value indicating whether the AVL tree or AVL tree group exists or not.
* @retval 1 if the AVL tree or AVL tree group exists.
* @retval 0 if the AVL tree or AVL tree group does not exist.
* @retval -1 if an error occurred.
*
* @since December 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_avl_exists(OBIDMS_p dms, const char* avl_name);
/**
* @brief Creates an AVL tree. Fails if it already exists.
*
* Note: An AVL tree is made of two files (referred to by two structures).
* One file contains the indices referring to the data, and the other
* file contains the data itself. The AVL tree as a whole is referred
* to via the OBIDMS_avl structure. An AVL tree is stored in a directory
* with the same name, or with the base name of the AVL group if it is
* part of an AVL group.
*
* @param dms The OBIDMS to which the AVL tree belongs.
* @param avl_name The name of the AVL tree.
* @param avl_idx The index of the AVL tree if it is part of an AVL group.
*
* @returns A pointer to the newly created AVL tree structure.
* @retval NULL if an error occurred.
*
* @since December 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name, int avl_idx);
/**
* @brief Opens an AVL tree. Fails if it does not already exist.
*
* Note: An AVL tree is made of two files (referred to by two structures).
* One file contains the indices referring to the data, and the other
* file contains the data itself. The AVL tree as a whole is referred
* to via the OBIDMS_avl structure.
*
* @param dms The OBIDMS to which the AVL tree belongs.
* @param avl_name The name of the AVL tree.
* @param avl_idx The index of the AVL tree if it is part of an AVL group.
*
* @returns A pointer to the AVL tree structure.
* @retval NULL if an error occurred.
*
* @since December 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name, int avl_idx);
/**
* @brief Opens an AVL tree group and creates it if it does not already exist.
*
* Note: An AVL tree group is composed of multiple AVL trees that all have the
* same base name, and an index differentiating them.
*
* @param dms The OBIDMS to which the AVL tree belongs.
* @param avl_name The base name of the AVL tree group.
*
* @returns A pointer to the AVL tree group structure.
* @retval NULL if an error occurred.
*
* @since April 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
OBIDMS_avl_group_p obi_avl_group(OBIDMS_p dms, const char* avl_name);
/**
* @brief Creates an AVL tree group.
*
* Note: An AVL tree group is composed of multiple AVL trees that all have the
* same base name, and an index differentiating them.
*
* @param dms The OBIDMS to which the AVL tree belongs.
* @param avl_name The base name of the AVL tree group.
*
* @returns A pointer to the AVL tree group structure.
* @retval NULL if an error occurred.
*
* @since April 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
OBIDMS_avl_group_p obi_create_avl_group(OBIDMS_p dms, const char* avl_name);
/**
* @brief Opens an AVL tree group.
*
* Note: An AVL tree group is composed of multiple AVL trees that all have the
* same base name, and an index differentiating them.
*
* @param dms The OBIDMS to which the AVL tree belongs.
* @param avl_name The base name of the AVL tree group.
*
* @returns A pointer to the AVL tree group structure.
* @retval NULL if an error occurred.
*
* @since April 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
OBIDMS_avl_group_p obi_open_avl_group(OBIDMS_p dms, const char* avl_name);
/**
* @brief Closes an AVL tree.
*
* @param avl A pointer to the AVL tree structure to close and free.
*
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
*
* @since December 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_close_avl(OBIDMS_avl_p avl);
/**
* @brief Closes an AVL tree group.
*
* @param avl_group A pointer to the AVL tree group structure to close and free.
*
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
*
* @since April 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_close_avl_group(OBIDMS_avl_group_p avl_group);
/**
* @brief Recovers a value (blob) in an AVL tree.
*
* @warning The blob recovered must be decoded to get the original value.
*
* @param avl A pointer to the AVL tree.
* @param index The index of the value in the data array.
*
* @returns A pointer to the blob recovered.
*
* @since December 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
Obi_blob_p obi_avl_get(OBIDMS_avl_p avl, index_t index);
/**
* @brief Adds a value (blob) in an AVL tree NOT checking first if it is already in it. // TODO to discuss
*
* @warning The value given must be already be encoded into a blob structure (Obi_blob_t).
*
* @param avl A pointer to the AVL tree.
* @param value The blob to add in the AVL tree.
*
* @returns The index of the value newly added in the AVL tree.
* @retval -1 if an error occurred.
*
* @since December 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
index_t obi_avl_add(OBIDMS_avl_p avl, Obi_blob_p value);
/**
* @brief Finds a value (blob) in an AVL tree.
*
* @warning The value given must be already be encoded into a blob structure (Obi_blob_t).
*
* @param avl A pointer to the AVL tree.
* @param value The blob to add in the AVL tree.
*
* @returns The data index of the value.
* @retval -1 if the value is not in the tree.
*
* @since December 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
index_t obi_avl_find(OBIDMS_avl_p avl, Obi_blob_p value);
/**
* @brief Recovers a value (blob) in an AVL tree.
*
* @warning The blob recovered must be decoded to get the original value.
*
* @param avl_group A pointer to the AVL tree.
* @param index The index of the value in the data array.
*
* @returns A pointer to the blob recovered.
*
* @since April 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
Obi_blob_p obi_avl_group_get(OBIDMS_avl_group_p avl_group, index_t idx);
/**
* @brief Adds a value (blob) in an AVL tree group, checking if it is already in it.
*
* @warning The value given must be already be encoded into a blob structure (Obi_blob_t).
*
* @param avl_group A pointer to the AVL tree group.
* @param value The blob to add in the AVL tree group.
*
* @returns The index of the value newly added in the AVL tree group.
* @retval -1 if an error occurred.
*
* @since April 2016
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
index_t obi_avl_group_add(OBIDMS_avl_group_p avl_group, Obi_blob_p value);
#endif /* OBIAVL_H_ */

57
src/obiblob.c Normal file
View File

@ -0,0 +1,57 @@
/****************************************************************************
* Obiblob functions *
****************************************************************************/
/**
* @file obiblob.c
* @author Celine Mercier
* @date April 11th 2016
* @brief Functions handling Obiblob structures.
*/
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <math.h>
#include "obiblob.h"
#include "obierrno.h"
#include "obitypes.h" // For byte_t type
#include "obidebug.h"
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
// TODO: endianness problem?
Obi_blob_p obi_blob(byte_t* encoded_value, uint8_t element_size, int32_t length_encoded_value, int32_t length_decoded_value)
{
Obi_blob_p blob;
// Allocate the memory for the blob structure
blob = (Obi_blob_p) malloc(sizeof(Obi_blob_t) + length_encoded_value);
if (blob == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError allocating memory for a blob");
return NULL;
}
// Store the number of bits on which each element is encoded
blob->element_size = element_size;
// Store the length (in bytes) of the encoded value
blob->length_encoded_value = length_encoded_value;
// Store the initial length (in bytes) of the decoded value
blob->length_decoded_value = length_decoded_value;
// Store the encoded value
memcpy(blob->value, encoded_value, length_encoded_value);
return blob;
}

54
src/obiblob.h Normal file
View File

@ -0,0 +1,54 @@
/****************************************************************************
* Obiblob header file *
****************************************************************************/
/**
* @file obiblob.h
* @author Celine Mercier
* @date November 18th 2015
* @brief Header file for handling Obi_blob structures.
*/
#ifndef OBIBLOB_H_
#define OBIBLOB_H_
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include "obitypes.h"
#define ELEMENT_SIZE_STR (8) /**< The size of an element from a value of type character string.
*/
#define ELEMENT_SIZE_SEQ_2 (2) /**< The size of an element from a value of type DNA sequence encoded on 2 bits.
*/
#define ELEMENT_SIZE_SEQ_4 (4) /**< The size of an element from a value of type DNA sequence encoded on 4 bits.
*/
/**
* @brief Blob structure.
* TODO
*/
typedef struct Obi_blob {
uint8_t element_size; /**< Size in bits of one element from the value.
*/
int32_t length_encoded_value; /**< Length in bytes of the encoded value.
*/
int32_t length_decoded_value; /**< Length in bytes of the decoded value.
*/
byte_t value[]; /**< Encoded value.
*/
} Obi_blob_t, *Obi_blob_p;
// TODO doc
Obi_blob_p obi_blob(byte_t* encoded_value, uint8_t element_size, int32_t length_encoded_value, int32_t length_decoded_value);
#endif /* OBIBLOB_H_ */

37
src/obiblob_indexer.c Normal file
View File

@ -0,0 +1,37 @@
/****************************************************************************
* Obiblob functions *
****************************************************************************/
/**
* @file obiblob_indexer.c
* @author Celine Mercier
* @date April 12th 2016
* @brief Functions handling the indexing and retrieval of blob structures.
*/
#include <stdlib.h>
#include <stdio.h>
#include "obiblob_indexer.h"
#include "obidms.h"
#include "obiavl.h"
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
inline int obi_indexer_exists(OBIDMS_p dms, const char* name);
inline Obi_indexer_p obi_indexer(OBIDMS_p dms, const char* name);
inline Obi_indexer_p obi_create_indexer(OBIDMS_p dms, const char* name);
inline Obi_indexer_p obi_open_indexer(OBIDMS_p dms, const char* name);
inline int obi_close_indexer(Obi_indexer_p indexer);
inline index_t obi_indexer_add(Obi_indexer_p indexer, Obi_blob_p value);
inline Obi_blob_p obi_indexer_get(Obi_indexer_p indexer, index_t idx);

80
src/obiblob_indexer.h Normal file
View File

@ -0,0 +1,80 @@
/****************************************************************************
* Blob indexer header file *
****************************************************************************/
/**
* @file obiblob_indexer.h
* @author Celine Mercier
* @date April 12th 2016
* @brief Header file for the functions handling the indexing of values.
*/
#ifndef OBIBLOB_INDEXER_H_
#define OBIBLOB_INDEXER_H_
#include <stdlib.h>
#include <stdio.h>
#include "obidms.h"
#include "obiavl.h"
#include "obitypes.h"
#include "obiblob.h"
#define INDEXER_MAX_NAME AVL_MAX_NAME /**< Macro to refer to the maximum size of the name of an indexer structure.
*/
typedef struct OBIDMS_avl_group Obi_indexer; /**< Typedef to refer to the used indexer structure.
*/
typedef OBIDMS_avl_group_p Obi_indexer_p; /**< Typedef to refer to the pointer of the used indexer structure.
*/
// TODO doc
inline int obi_indexer_exists(OBIDMS_p dms, const char* name)
{
return obi_avl_exists(dms, name);
}
inline Obi_indexer_p obi_indexer(OBIDMS_p dms, const char* name)
{
return obi_avl_group(dms, name);
}
inline Obi_indexer_p obi_create_indexer(OBIDMS_p dms, const char* name)
{
return obi_create_avl_group(dms, name);
}
inline Obi_indexer_p obi_open_indexer(OBIDMS_p dms, const char* name)
{
return obi_open_avl_group(dms, name);
}
inline int obi_close_indexer(Obi_indexer_p indexer)
{
return obi_close_avl_group(indexer);
}
inline index_t obi_indexer_add(Obi_indexer_p indexer, Obi_blob_p value)
{
return obi_avl_group_add(indexer, value);
}
inline Obi_blob_p obi_indexer_get(Obi_indexer_p indexer, index_t idx)
{
return obi_avl_group_get(indexer, idx);
}
#endif /* OBIBLOB_INDEXER_H_ */

View File

@ -1,8 +1,12 @@
/*
* obidebug.h
*
* Created on: June 25th 2015
* Author: Celine Mercier (celine.mercier@metabarcoding.org)
/****************************************************************************
* Header file for the debugging code *
****************************************************************************/
/**
* @file obidebug.h
* @author Celine Mercier (celine.mercier@metabarcoding.org)
* @date 25 June 2015
* @brief Header file for the debugging code.
*/
@ -12,7 +16,7 @@
#include <limits.h>
//#ifndef DEBUG_LEVEL
//#ifndef DEBUG_LEVEL // TODO
//#define DEBUG_LEVEL MAXINT
//#endif

View File

@ -1,14 +1,21 @@
/*
* obidms.c
*
* @date 23 May 2015
* @Author: coissac
/********************************************************************
* OBIDMS functions *
********************************************************************/
/**
* @file obidms.c
* @author Eric Coissac (eric.coissac@metabarcoding.org)
* @date 23 May 2015
* @brief OBIDMS functions.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/types.h>
#include <dirent.h>
@ -16,9 +23,12 @@
#include "obierrno.h"
#include "obidebug.h"
#include "obidmscolumn.h"
#include "obiblob_indexer.h"
#include "utils.h"
#include "obilittlebigman.h"
#define DEBUG_LEVEL 0
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
/**************************************************************************
@ -29,27 +39,60 @@
/**
* Internal function building the directory name from an OBIDMS name.
* Internal function building the OBIDMS directory name from an OBIDMS name.
*
* The function builds the directory name corresponding to an OBIDMS.
* It checks also that the name is not too long.
* It also checks that the name is not too long.
*
* @warning The returned pointer has to be freed by the caller.
*
* @param dms_name the name of the OBIDMS
* @param dms_name The name of the OBIDMS.
*
* @return a pointer to the directory name
* @retvalue <directory_name> if everything is ok
* @retvalue NULL if an error occurs
*
* ###Error values
* - OBIDMS_MEMORY_ERROR : something wrong occurred during memory allocation.
* - OBIDMS_LONG_NAME_ERROR : the database name exceeds the limit.
* @returns A pointer to the directory name.
* @retval NULL if an error occurred.
*
* @since May 2015
* @author Eric Coissac (eric.coissac@metabarcoding.org)
*/
static char *build_directory_name(const char *dms_name);
static char* build_directory_name(const char* dms_name);
/**
* Internal function building the informations file name from an OBIDMS name.
*
* The function builds the file name for the informations file of an OBIDMS.
*
* @warning The returned pointer has to be freed by the caller.
*
* @param dms_name The name of the OBIDMS.
*
* @returns A pointer to the file name.
* @retval NULL if an error occurred.
*
* @since November 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
static char* build_infos_file_name(const char* dms_name);
/**
* Internal function creating the file containing basic informations on the OBIDMS.
*
* This file contains:
* - The endianness of the platform
*
* @warning The returned pointer has to be freed by the caller.
*
* @param dms_file_descriptor The file descriptor for the OBIDMS directory.
* @param dms_name The name of the OBIDMS.
*
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
*
* @since November 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int create_dms_infos_file(int dms_file_descriptor, const char* dms_name);
/************************************************************************
@ -58,12 +101,13 @@ static char *build_directory_name(const char *dms_name);
*
************************************************************************/
static char *build_directory_name(const char *dms_name)
static char* build_directory_name(const char* dms_name)
{
char *directory_name;
char* directory_name;
// Build the database directory name
if (asprintf(&directory_name, "%s.obidms", dms_name) < 0)
directory_name = (char*) malloc((strlen(dms_name) + 8)*sizeof(char));
if (sprintf(directory_name, "%s.obidms", dms_name) < 0)
{
obi_set_errno(OBIDMS_MEMORY_ERROR);
obidebug(1, "\nProblem building an OBIDMS directory name");
@ -83,6 +127,75 @@ static char *build_directory_name(const char *dms_name)
}
static char* build_infos_file_name(const char* dms_name)
{
char* file_name;
// Build file name
file_name = (char*) malloc((strlen(dms_name) + 7)*sizeof(char));
if (sprintf(file_name, "%s_infos", dms_name) < 0)
{
obi_set_errno(OBIDMS_MEMORY_ERROR);
obidebug(1, "\nProblem building an informations file name");
return NULL;
}
return file_name;
}
int create_dms_infos_file(int dms_file_descriptor, const char* dms_name)
{
char* file_name;
int infos_file_descriptor;
off_t file_size;
bool little_endian;
file_size = sizeof(bool);
// Create file name
file_name = build_infos_file_name(dms_name);
if (file_name == NULL)
return -1;
// Create file
infos_file_descriptor = openat(dms_file_descriptor, file_name, O_RDWR | O_CREAT | O_EXCL, 0777);
if (infos_file_descriptor < 0)
{
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
obidebug(1, "\nError creating an informations file");
free(file_name);
return -1;
}
free(file_name);
// Truncate the infos file to the right size
if (ftruncate(infos_file_descriptor, file_size) < 0)
{
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
obidebug(1, "\nError truncating an informations file");
close(infos_file_descriptor);
return -1;
}
// Write endianness
little_endian = obi_is_little_endian();
if (write(infos_file_descriptor, &little_endian, sizeof(bool)) < ((ssize_t) sizeof(bool)))
{
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
obidebug(1, "\nError writing the endianness in an informations file");
close(infos_file_descriptor);
return -1;
}
// Close file
close(infos_file_descriptor);
return 0;
}
/**********************************************************************
*
* D E F I N I T I O N O F T H E P U B L I C F U N C T I O N S
@ -92,8 +205,8 @@ static char *build_directory_name(const char *dms_name)
int obi_dms_exists(const char* dms_name)
{
struct stat buffer;
char *directory_name;
int check_dir;
char* directory_name;
int check_dir;
// Build and check the directory name
directory_name = build_directory_name(dms_name);
@ -113,7 +226,9 @@ int obi_dms_exists(const char* dms_name)
OBIDMS_p obi_create_dms(const char* dms_name)
{
char *directory_name;
char* directory_name;
DIR* dms_dir;
int dms_file_descriptor;
// Build and check the directory name
directory_name = build_directory_name(dms_name);
@ -124,7 +239,10 @@ OBIDMS_p obi_create_dms(const char* dms_name)
if (mkdir(directory_name, 00777) < 0)
{
if (errno == EEXIST)
{
obi_set_errno(OBIDMS_EXIST_ERROR);
obidebug(1, "\nAn OBIDMS directory with the same name already exists in this directory.");
}
else
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
obidebug(1, "\nProblem creating an OBIDMS directory");
@ -132,8 +250,38 @@ OBIDMS_p obi_create_dms(const char* dms_name)
return NULL;
}
// Get file descriptor of DMS directory to create the indexer directory
dms_dir = opendir(directory_name);
if (dms_dir == NULL)
{
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
obidebug(1, "\nProblem opening a newly created OBIDMS directory");
free(directory_name);
return NULL;
}
free(directory_name);
dms_file_descriptor = dirfd(dms_dir);
if (dms_file_descriptor < 0)
{
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
obidebug(1, "\nProblem getting the file descriptor of a newly created OBIDMS directory");
return NULL;
}
// Create the indexer directory
if (mkdirat(dms_file_descriptor, INDEXER_DIR_NAME, 00777) < 0)
{
obi_set_errno(OBI_INDEXER_ERROR);
obidebug(1, "\nProblem creating an indexer directory");
return NULL;
}
// Create the informations file
if (create_dms_infos_file(dms_file_descriptor, dms_name) < 0)
return NULL;
return obi_open_dms(dms_name);
}
@ -142,18 +290,36 @@ OBIDMS_p obi_open_dms(const char* dms_name)
{
OBIDMS_p dms;
char* directory_name;
DIR* directory;
char* infos_file_name;
int infos_file_descriptor;
bool little_endian_dms;
bool little_endian_platform;
dms = NULL;
// Allocate the data structure
dms = (OBIDMS_p) malloc(sizeof(OBIDMS_t));
if (dms == NULL)
{
obi_set_errno(OBIDMS_MEMORY_ERROR);
obidebug(1, "\nError allocating the memory for the OBIDMS structure");
return NULL;
}
// Build and check the directory name
directory_name = build_directory_name(dms_name);
if (directory_name == NULL)
{
free(dms);
return NULL;
}
strncpy(dms->directory_name, directory_name, OBIDMS_MAX_NAME);
free(directory_name);
// Try to open the directory
directory = opendir(directory_name);
if (directory == NULL)
dms->directory = opendir(dms->directory_name);
if (dms->directory == NULL)
{
switch (errno)
{
@ -173,25 +339,90 @@ OBIDMS_p obi_open_dms(const char* dms_name)
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
}
obidebug(1, "\nCan't open OBIDMS directory");
free(directory_name);
free(dms);
return NULL;
}
// Allocate the data structure
dms = (OBIDMS_p) malloc(sizeof(OBIDMS_t));
if (dms == NULL)
// Get and store file descriptor of DMS directory to open the informations file
dms->dir_fd = dirfd(dms->directory);
if (dms->dir_fd < 0)
{
obi_set_errno(OBIDMS_MEMORY_ERROR);
obidebug(1, "\nError allocating the memory for the OBIDMS structure");
free(directory_name);
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
obidebug(1, "\nError getting the file descriptor for a newly created OBIDMS directory");
closedir(dms->directory);
free(dms);
return NULL;
}
// Initialize the data structure
strcpy(dms->directory_name, directory_name);
dms->directory = directory;
// Open informations file to check endianness
infos_file_name = build_infos_file_name(dms_name);
infos_file_descriptor = openat(dms->dir_fd, infos_file_name, O_RDONLY, 0777);
if (infos_file_descriptor < 0)
{
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
obidebug(1, "\nError opening an informations file");
closedir(dms->directory);
free(dms);
return NULL;
}
free(directory_name);
free(infos_file_name);
// Check endianness of the platform and DMS
little_endian_platform = obi_is_little_endian();
if (read(infos_file_descriptor, &little_endian_dms, sizeof(bool)) < ((ssize_t) sizeof(bool)))
{
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
obidebug(1, "\nError reading the endianness in an informations file");
close(infos_file_descriptor);
closedir(dms->directory);
free(dms);
return NULL;
}
if (little_endian_platform != little_endian_dms)
{
obi_set_errno(OBIDMS_BAD_ENDIAN_ERROR);
obidebug(1, "\nError: The DMS and the platform have different endianness");
close(infos_file_descriptor);
closedir(dms->directory);
free(dms);
return NULL;
}
close(infos_file_descriptor);
dms->little_endian = little_endian_dms;
// Open the indexer directory
dms->indexer_directory = opendir_in_dms(dms, INDEXER_DIR_NAME);
if (dms->indexer_directory == NULL)
{
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
obidebug(1, "\nError opening the indexer directory");
closedir(dms->directory);
free(dms);
return NULL;
}
// Store the indexer directory's file descriptor
dms->indexer_dir_fd = dirfd(dms->indexer_directory);
if (dms->indexer_dir_fd < 0)
{
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
obidebug(1, "\nError getting the file descriptor of the indexer directory");
closedir(dms->indexer_directory);
closedir(dms->directory);
free(dms);
return NULL;
}
// Initialize the list of opened columns
dms->opened_columns = (Opened_columns_list_p) malloc(sizeof(Opened_columns_list_t));
(dms->opened_columns)->nb_opened_columns = 0;
// Initialize the list of opened indexers // TODO should be handled somewhere else?
dms->opened_indexers = (Opened_indexers_list_p) malloc(sizeof(Opened_indexers_list_t));
(dms->opened_indexers)->nb_opened_indexers = 0;
return dms;
}
@ -220,10 +451,22 @@ int obi_close_dms(OBIDMS_p dms)
{
if (dms != NULL)
{
// Close all columns
while ((dms->opened_columns)->nb_opened_columns > 0)
obi_close_column(*((dms->opened_columns)->columns));
// Close dms and indexer directories
if (closedir(dms->directory) < 0)
{
obi_set_errno(OBIDMS_MEMORY_ERROR);
obidebug(1, "\nError closing an OBIDSM directory");
obidebug(1, "\nError closing an OBIDMS directory");
free(dms);
return -1;
}
if (closedir(dms->indexer_directory) < 0) // TODO should be handled somewhere else?
{
obi_set_errno(OBI_INDEXER_ERROR);
obidebug(1, "\nError closing an indexer directory");
free(dms);
return -1;
}
@ -233,3 +476,175 @@ int obi_close_dms(OBIDMS_p dms)
return 0;
}
int obi_dms_is_column_name_in_list(OBIDMS_p dms, const char* column_name)
{
int i;
Opened_columns_list_p columns_list;
columns_list = dms->opened_columns;
for (i=0; i < (columns_list->nb_opened_columns); i++)
{
if (!strcmp(((*((columns_list->columns)+i))->header)->name, column_name))
{ // Found it
return 0;
}
}
return 1;
}
OBIDMS_column_p obi_dms_get_column_from_list(OBIDMS_p dms, const char* column_name, obiversion_t version)
{
int i;
for (i=0; i < ((dms->opened_columns)->nb_opened_columns); i++)
{
if (!strcmp(((*(((dms->opened_columns)->columns)+i))->header)->name, column_name)
&& (((*(((dms->opened_columns)->columns)+i))->header)->version == version))
{ // Found the column already opened, return it
return *(((dms->opened_columns)->columns)+i);
}
}
// Didn't find the column
return NULL;
}
void obi_dms_list_column(OBIDMS_p dms, OBIDMS_column_p column)
{
*(((dms->opened_columns)->columns)+((dms->opened_columns)->nb_opened_columns)) = column;
((dms->opened_columns)->nb_opened_columns)++;
}
int obi_dms_unlist_column(OBIDMS_p dms, OBIDMS_column_p column)
{
int i;
Opened_columns_list_p columns_list;
columns_list = dms->opened_columns;
for (i=0; i < columns_list->nb_opened_columns; i++)
{
if (!strcmp(((*((columns_list->columns)+i))->header)->name, (column->header)->name)
&& (((*((columns_list->columns)+i))->header)->version == (column->header)->version))
{ // Found the column. Rearrange list
(columns_list->nb_opened_columns)--;
(columns_list->columns)[i] = (columns_list->columns)[columns_list->nb_opened_columns];
return 0;
}
}
obidebug(1, "\nCould not find the column to delete from list of open columns");
return -1;
}
Obi_indexer_p obi_dms_get_indexer_from_list(OBIDMS_p dms, const char* indexer_name)
{
int i;
Opened_indexers_list_p indexers_list;
indexers_list = dms->opened_indexers;
for (i=0; i < (indexers_list->nb_opened_indexers); i++)
{
if (!strcmp(((indexers_list->indexers)[i])->name, indexer_name)) // TODO it references something in AVL_group struct
{ // Found the indexer already opened, return it
return (indexers_list->indexers)[i];
}
}
// Didn't find the indexer
return NULL;
}
void obi_dms_list_indexer(OBIDMS_p dms, Obi_indexer_p indexer)
{
*(((dms->opened_indexers)->indexers)+((dms->opened_indexers)->nb_opened_indexers)) = indexer;
((dms->opened_indexers)->nb_opened_indexers)++;
}
int obi_dms_unlist_indexer(OBIDMS_p dms, Obi_indexer_p indexer)
{
int i;
Opened_indexers_list_p indexers_list;
indexers_list = dms->opened_indexers;
for (i=0; i < indexers_list->nb_opened_indexers; i++)
{
if (!strcmp(((indexers_list->indexers)[i])->name, indexer->name)) // TODO it references something in AVL_group struct
{ // Found the indexer. Rearrange list
(indexers_list->nb_opened_indexers)--;
(indexers_list->indexers)[i] = (indexers_list->indexers)[indexers_list->nb_opened_indexers];
return 0;
}
}
obidebug(1, "\nCould not find the indexer to delete from list of open indexers");
return -1;
}
char* obi_dms_get_dms_path(OBIDMS_p dms)
{
char* full_path;
full_path = (char*) malloc((MAX_PATH_LEN)*sizeof(char));
if (full_path == NULL)
{
obi_set_errno(OBI_MALLOC_ERROR);
obidebug(1, "\nError allocating memory for the char* path to a file or directory");
return NULL;
}
if (getcwd(full_path, MAX_PATH_LEN) == NULL) // TODO not sure at all about this because the DMS must be in the working directory.
{ // Maybe better to store when opening, but opening function seems to assume that too.
obi_set_errno(OBI_UTILS_ERROR);
obidebug(1, "\nError getting the path to a file or directory");
return NULL;
}
strcat(full_path, "/");
strcat(full_path, dms->directory_name);
return full_path;
}
char* obi_dms_get_full_path(OBIDMS_p dms, const char* path_name)
{
char* full_path;
full_path = obi_dms_get_dms_path(dms);
strcat(full_path, "/");
strcat(full_path, path_name);
return full_path;
}
DIR* opendir_in_dms(OBIDMS_p dms, const char* path_name)
{
char* full_path;
DIR* directory;
full_path = obi_dms_get_full_path(dms, path_name);
if (full_path == NULL)
return NULL;
directory = opendir(full_path);
if (directory == NULL)
{
obi_set_errno(OBI_UTILS_ERROR);
obidebug(1, "\nError opening a directory");
}
free(full_path);
return directory;
}

View File

@ -1,54 +1,111 @@
/*
* obidms.h
*
* Created on: 23 mai 2015
* Author: coissac
/********************************************************************
* OBIDMS header file *
********************************************************************/
/**
* @file obidmscolumn.h
* @author Eric Coissac (eric.coissac@metabarcoding.org)
* @date 23 May 2015
* @brief Header file for the OBIDMS functions and structures.
*/
#ifndef OBIDMS_H_
#define OBIDMS_H_
#include <stdlib.h>
#include <sys/stat.h>
#include <errno.h>
#include <dirent.h>
#include <string.h>
#include <stdio.h>
#include <stdbool.h>
#include "obierrno.h"
//#include "obidmscolumn.h"
//#include "obiblob_indexer.h"
#define OBIDMS_MAX_NAME (2048) /**< The maximum length of an OBIDMS name
*/
/** @brief A structure describing an OBIDMS instance
#define OBIDMS_MAX_NAME (2048) /**< The maximum length of an OBIDMS name.
*/
#define INDEXER_DIR_NAME "OBIBLOB_INDEXERS" /**< The name of the Obiblob indexer directory.
*/
#define TAXONOMY_DIR_NAME "TAXONOMY" /**< The name of the taxonomy directory.
*/
#define MAX_NB_OPENED_COLUMNS (100) /**< The maximum number of columns open at the same time.
*/
#define MAX_NB_OPENED_INDEXERS (1000) /**< The maximum number of indexers open at the same time.
*/
#define MAX_PATH_LEN 4096 /**< Maximum length for the character string defining a
* file or directory path.
*/
typedef int32_t obiversion_t; /**< TODO double
*/
struct OBIDMS_column; // TODO
typedef struct OBIDMS_column* OBIDMS_column_p;
typedef struct Opened_columns_list { // TODO Handle the problem linked to columns with the same name + means only one version
int nb_opened_columns;
OBIDMS_column_p columns[MAX_NB_OPENED_COLUMNS];
} Opened_columns_list_t, *Opened_columns_list_p;
struct OBIDMS_avl_group; // TODO
typedef struct OBIDMS_avl_group* OBIDMS_avl_group_p;
typedef OBIDMS_avl_group_p Obi_indexer_p;
typedef struct Opened_indexers_list {
int nb_opened_indexers;
Obi_indexer_p indexers[MAX_NB_OPENED_INDEXERS];
} Opened_indexers_list_t, *Opened_indexers_list_p;
/**
* @brief A structure describing an OBIDMS instance
*
* A pointer to this structure is returned on creation
* and opening of an OBITools Data Management System (DMS)
*/
typedef struct OBIDMS {
char directory_name[OBIDMS_MAX_NAME+1]; /**< The name of the directory
* containing the DMS
*/
DIR* directory; /**< A directory entry usable to
* refer and scan the database directory
*/
char directory_name[OBIDMS_MAX_NAME+1]; /**< The name of the directory
* containing the DMS.
*/
DIR* directory; /**< A directory entry usable to
* refer and scan the database directory.
*/
int dir_fd; /**< The file descriptor of the directory entry
* usable to refer and scan the database directory.
*/
DIR* indexer_directory; /**< A directory entry usable to
* refer and scan the indexer directory.
*/
int indexer_dir_fd; /**< The file descriptor of the directory entry
* usable to refer and scan the indexer directory.
*/
bool little_endian; /**< Endianness of the database.
*/
Opened_columns_list_p opened_columns; /**< List of opened columns.
*/
Opened_indexers_list_p opened_indexers; /**< List of opened indexers.
*/
} OBIDMS_t, *OBIDMS_p;
/*@
* @brief Checks if an OBIDMS exists
/**
* @brief Checks if an OBIDMS exists.
*
* @param dms_name a pointer to a C string containing the name of the database.
* The actual directory name used to store the DMS will be
* `<dms_name>.obidms`.
* @param dms_name A pointer to a C string containing the name of the database.
*
* @return an integer value indicating the status of the database
* @retvalue 1 the database exist
* @retvalue 0 the database does not exist
* @retvalue -1 an error occurred
* @returns An integer value indicating the status of the database
* @retval 1 if the database exists.
* @retval 0 if the database does not exist.
* @retval -1 if an error occurred.
*
* @see obi_close_dms()
* @since May 2015
* @author Eric Coissac (eric.coissac@metabarcoding.org)
*/
@ -62,79 +119,64 @@ int obi_dms_exists(const char* dms_name);
* if a directory with this name does not already exist
* before creating the new database.
*
* @param dms_name a pointer to a C string containing the name of the database.
* The actual directory name used to store the DMS will be
* `<name>.obidms`
* A directory to store Obiblob indexers is also created.
*
* @return a pointer to an OBIDMS structure describing the newly created DMS
* @retval NULL on error and the `obi_errno` variable is set.
* @param dms_name A pointer to a C string containing the name of the database.
* The actual directory name used to store the DMS will be
* `<dms_name>.obidms`.
*
* ###Error values
* - OBIDMS_EXIST_ERROR : a database with the same name already exists.
* - OBIDMS_LONG_NAME_ERROR : the database name exceeds the limit.
* - OBIDMS_MEMORY_ERROR : something wrong occurred during memory allocation.
* @returns A pointer to an OBIDMS structure describing the newly created DMS.
* @retval NULL if an error occurred.
*
* @see obi_close_dms()
* @since May 2015
* @author Eric Coissac (eric.coissac@metabarcoding.org)
*/
OBIDMS_p obi_create_dms(const char *dms_name);
OBIDMS_p obi_create_dms(const char* dms_name);
/**
* @brief Opens an existing OBITools Data Management instance (OBIDMS).
*
* @param dms_name a pointer to a C string containing the name of the database.
* The actual directory name used to store the DMS will be
* `<name>.obidms`
* @param dms_name A pointer to a C string containing the name of the database.
*
* @return a pointer to an OBIDMS structure describing the newly created DMS
* @retval NULL on error and the `obi_errno`variable is set.
*
* ###Error values
* - OBIDMS_LONG_NAME_ERROR : the database name exceeds the limit.
* - OBIDMS_MEMORY_ERROR : something wrong occurred during memory allocation.
* @returns A pointer to an OBIDMS structure describing the opened DMS.
* @retval NULL if an error occurred.
*
* @see obi_close_dms()
* @since May 2015
* @author Eric Coissac (eric.coissac@metabarcoding.org)
*/
OBIDMS_p obi_open_dms(const char *dms_name);
OBIDMS_p obi_open_dms(const char* dms_name);
/**
* @brief Creates a new OBIDMS instance.
* @brief Creates or opens a new OBIDMS instance.
*
* If the database already exists, this function opens it, otherwise it
* creates a new database.
*
* @param dms_name a pointer to a C string containing the name of the database.
* The actual directory name used to store the DMS is
* `<name>.obidms`
* @param dms_name A pointer to a C string containing the name of the database.
*
* @return a pointer to an OBIDMS structure describing the newly created DMS
* @retval NULL on error and the `obi_errno`variable is set.
*
* ###Error values
* - OBIDMS_LONG_NAME_ERROR : the database name exceeds the limit.
* - OBIDMS_MEMORY_ERROR : something wrong occurred during memory allocation.
* @returns A pointer to an OBIDMS structure describing the OBIDMS.
* @retval NULL if an error occurred.
*
* @see obi_close_dms()
* @since May 2015
* @author Eric Coissac (eric.coissac@metabarcoding.org)
*/
OBIDMS_p obi_dms(const char *dms_name);
OBIDMS_p obi_dms(const char* dms_name);
/**
* @brief Closes an opened OBITools Data Management instance (OBIDMS).
*
* @param dms a pointer as returned by obi_create_dms() or obi_open_dms()
* @param dms A pointer as returned by obi_create_dms() or obi_open_dms().
*
* @return an integer value indicating the success of the operation. Even on
* error, the `OBIDMS` structure is freed
* @retvalue 0 on success
* @retvalue -1 on failure and the `obi_errno` variable is set.
* @returns An integer value indicating the success of the operation. Even on
* error, the `OBIDMS` structure is freed.
* @retval 0 on success.
* @retval -1 if an error occurred?-.
*
* @see obi_create_dms()
* @see obi_open_dms()
@ -144,4 +186,63 @@ OBIDMS_p obi_dms(const char *dms_name);
int obi_close_dms(OBIDMS_p dms);
// TODO doc
int obi_dms_is_column_name_in_list(OBIDMS_p dms, const char* column_name);
OBIDMS_column_p obi_dms_get_column_from_list(OBIDMS_p dms, const char* column_name, obiversion_t version);
void obi_dms_list_column(OBIDMS_p dms, OBIDMS_column_p column);
int obi_dms_unlist_column(OBIDMS_p dms, OBIDMS_column_p column);
Obi_indexer_p obi_dms_get_indexer_from_list(OBIDMS_p dms, const char* indexer_name);
void obi_dms_list_indexer(OBIDMS_p dms, Obi_indexer_p indexer);
int obi_dms_unlist_indexer(OBIDMS_p dms, Obi_indexer_p indexer);
char* obi_dms_get_path(OBIDMS_p dms);
/** TODO
* @brief Internal function getting the full path of a file or a directory from its
* path relative to a directory file descriptor.
*
* @warning The returned pointer has to be freed by the caller.
*
* @param directory_file_descriptor The file descriptor for the directory to which
* path_name is relative.
* @param path_name The path name for the file or directory, relative to directory_file_descriptor.
*
* @returns A pointer to the full path.
* @retval NULL if an error occurred.
*
* @since June 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
char* obi_dms_get_full_path(OBIDMS_p dms, const char* path_name);
/**
* @brief Replacement function for opendirat() : open a directory relative to a directory file descriptor.
*
* @param directory_file_descriptor The file descriptor for the directory in which the directory should be opened.
* @param path_name The path name for the directory to be opened, relative to directory_file_descriptor.
*
* @returns The file descriptor of the opened directory.
* @retval NULL if an error occurred.
*
* @since June 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
DIR* opendir_in_dms(OBIDMS_p dms, const char* path_name);
#endif /* OBIDMS_H_ */

682
src/obidms_taxonomy.c Normal file
View File

@ -0,0 +1,682 @@
/********************************************************************
* OBIDMS taxonomy functions *
********************************************************************/
/**
* @file obidms_taxonomy.c
* @author Celine Mercier (celine.mercier@metabarcoding.org)
* @date March 2nd 2016
* @brief Functions for reading binary taxonomy files.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
#include <fcntl.h>
#include "obidms_taxonomy.h"
#include "obidms.h"
#include "obidebug.h"
#include "obierrno.h"
#include "utils.h"
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
// TODO : the malloc aren't checked but won't exist for long because mapping instead
int compareRankLabel(const void *label1, const void *label2)
{
return strcmp((const char*)label1,*(const char**)label2);
}
int32_t rank_index(const char* label, ecorankidx_t* ranks)
{
char **rep;
rep = bsearch(label, ranks->label, ranks->count, sizeof(char*), compareRankLabel);
if (rep)
return rep-ranks->label; // TODO what???
return -1;
}
void* read_ecorecord(FILE* f, int32_t* record_size)
{
static void* buffer = NULL;
int32_t buffer_size = 0;
int32_t read;
if (!record_size)
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError reading a taxonomy file: record_size can not be NULL");
return NULL;
}
read = fread(record_size,
1,
sizeof(int32_t),
f);
if (feof(f))
return NULL;
if (read != sizeof(int32_t))
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError reading a taxonomy file: error reading record size");
return NULL;
}
// if (is_big_endian()) // TODO
// *recordSize=swap_int32_t(*recordSize);
if (buffer_size < *record_size)
{
if (buffer)
buffer = realloc(buffer, *record_size);
else
buffer = malloc(*record_size);
if (buffer == NULL)
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError reading a taxonomy file: error allocating memory");
return NULL;
}
}
read = fread(buffer,
1,
*record_size,
f);
if (read != *record_size)
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError reading a taxonomy file: error reading a record %d, %d", read, *record_size);
return NULL;
}
return buffer;
};
ecotx_t* readnext_ecotaxon(FILE* f, ecotx_t* taxon)
{
ecotxformat_t* raw;
int32_t record_length;
raw = read_ecorecord(f, &record_length);
if (!raw)
return NULL;
// if (is_big_endian()) // TODO
// {
// raw->namelength = swap_int32_t(raw->namelength);
// raw->parent = swap_int32_t(raw->parent);
// raw->rank = swap_int32_t(raw->rank);
// raw->taxid = swap_int32_t(raw->taxid);
// }
taxon->parent = (ecotx_t*) ((size_t) raw->parent);
taxon->taxid = raw->taxid;
taxon->rank = raw->rank;
taxon->farest = -1;
taxon->name = malloc((raw->name_length+1) * sizeof(char));
strncpy(taxon->name, raw->name, raw->name_length);
return taxon;
}
FILE* open_ecorecorddb(const char* file_name,
int32_t* count,
int32_t abort_on_open_error)
{
FILE* f;
int32_t read;
fprintf(stderr, "\n%s\n", file_name);
f = fopen(file_name, "rb");
if (!f)
{
if (abort_on_open_error)
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nCouldn't open a taxonomy file");
return NULL;
}
else
{
*count = 0;
return NULL;
}
}
read = fread(count,
1,
sizeof(int32_t),
f);
if (read != sizeof(int32_t))
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError reading taxonomy record size");
return NULL;
}
// if (!obi_is_little_endian()) // TODO
// *count = swap_int32_t(*count);
return f;
}
ecorankidx_t* read_rankidx(const char* ranks_file_name)
{
int32_t count;
FILE* ranks_file;
ecorankidx_t* ranks_index;
int32_t i;
int32_t rank_length;
char* buffer;
ranks_file = open_ecorecorddb(ranks_file_name, &count, 0);
if (ranks_file==NULL)
return NULL;
ranks_index = (ecorankidx_t*) malloc(sizeof(ecorankidx_t) + sizeof(char*) * (count-1));
ranks_index->count = count;
for (i=0; i < count; i++)
{
buffer = read_ecorecord(ranks_file, &rank_length);
ranks_index->label[i] = (char*) malloc(rank_length+1);
strncpy(ranks_index->label[i], buffer, rank_length);
}
return ranks_index;
}
ecotxidx_t* read_taxonomyidx(const char* taxa_file_name, const char* local_taxa_file_name)
{
int32_t count_taxa;
int32_t count_local_taxa;
FILE* f_taxa;
FILE* f_local_taxa;
ecotxidx_t* taxa_index;
struct ecotxnode* t;
int32_t i;
int32_t j;
f_taxa = open_ecorecorddb(taxa_file_name, &count_taxa,0);
if (f_taxa == NULL)
{
obidebug(1, "\nError reading taxonomy taxa file");
return NULL;
}
f_local_taxa = open_ecorecorddb(local_taxa_file_name, &count_local_taxa, 0);
taxa_index = (ecotxidx_t*) malloc(sizeof(ecotxidx_t) + sizeof(ecotx_t) * (count_taxa + count_local_taxa - 1));
taxa_index->count = count_taxa + count_local_taxa;
taxa_index->buffer_size = taxa_index->count;
taxa_index->max_taxid = 0;
printf("Reading %d taxa...\n", count_taxa);
for (i=0; i<count_taxa; i++)
{
readnext_ecotaxon(f_taxa, &(taxa_index->taxon[i]));
taxa_index->taxon[i].parent = taxa_index->taxon + (size_t) taxa_index->taxon[i].parent;
taxa_index->taxon[i].parent->farest = 0;
if (taxa_index->taxon[i].taxid > taxa_index->max_taxid)
taxa_index->max_taxid = taxa_index->taxon[i].taxid;
}
if (count_local_taxa > 0)
printf("Reading %d local taxa...\n", count_local_taxa);
else
printf("No local taxa\n");
count_taxa = taxa_index->count;
for (; i < count_taxa; i++){
readnext_ecotaxon(f_local_taxa, &(taxa_index->taxon[i]));
taxa_index->taxon[i].parent = taxa_index->taxon + (size_t) taxa_index->taxon[i].parent;
taxa_index->taxon[i].parent->farest=0;
if (taxa_index->taxon[i].taxid > taxa_index->max_taxid)
taxa_index->max_taxid = taxa_index->taxon[i].taxid;
}
printf("Computing longest branches...\n");
for (i=0; i < count_taxa; i++)
{
t = taxa_index->taxon+i;
if (t->farest == -1)
{
t->farest=0;
while (t->parent != t)
{
j = t->farest + 1;
if (j > t->parent->farest)
{
t->parent->farest = j;
t=t->parent;
}
else
t = taxa_index->taxon;
}
}
}
return taxa_index;
}
econame_t* readnext_econame(FILE* f, econame_t* name, OBIDMS_taxonomy_p taxonomy)
{
econameformat_t* raw;
int32_t record_length;
raw = read_ecorecord(f, &record_length);
if (!raw)
return NULL;
// if (is_big_endian()) // TODO
// {
// raw->is_scientificname = swap_int32_t(raw->is_scientificname);
// raw->namelength = swap_int32_t(raw->namelength);
// raw->classlength = swap_int32_t(raw->classlength);
// raw->taxid = swap_int32_t(raw->taxid);
// }
name->is_scientific_name = raw->is_scientific_name;
name->name = malloc((raw->name_length + 1) * sizeof(char));
strncpy(name->name, raw->names, raw->name_length);
name->name[raw->name_length] = 0;
name->class_name = malloc((raw->class_length+1) * sizeof(char));
strncpy(name->class_name,(raw->names + raw->name_length), raw->class_length);
name->class_name[raw->class_length] = 0;
name->taxon = taxonomy->taxa->taxon + raw->taxid;
return name;
}
econameidx_t* read_nameidx(const char *file_name, OBIDMS_taxonomy_p taxonomy)
{
int32_t count;
FILE* f;
econameidx_t* index_names;
int32_t i;
f = open_ecorecorddb(file_name, &count, 0);
if (f == NULL)
return NULL;
index_names = (econameidx_t*) malloc(sizeof(econameidx_t) + sizeof(econame_t) * (count-1));
index_names->count = count;
for (i=0; i < count; i++)
readnext_econame(f, (index_names->names)+i, taxonomy);
return index_names;
}
static int bcomptaxon (const void* ptaxid, const void* ptaxon)
{
ecotx_t* current_taxon = (ecotx_t*) ptaxon;
int32_t taxid = (int32_t) ((size_t) ptaxid);
return taxid - current_taxon->taxid;
}
/////// PUBLIC /////////
OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, bool read_alternative_names)
{
OBIDMS_taxonomy_p tax;
char* main_taxonomy_dir_path;
char* taxonomy_path;
char* ranks_file_name;
char* taxa_file_name;
char* local_taxa_file_name;
char* alter_names_file_name;
int buffer_size;
tax = (OBIDMS_taxonomy_p) malloc(sizeof(OBIDMS_taxonomy_t));
tax->ranks = NULL;
tax->taxa = NULL;
tax->names = NULL;
buffer_size = 2048; // TODO
main_taxonomy_dir_path = obi_dms_get_full_path(dms, TAXONOMY_DIR_NAME);
taxonomy_path = (char*) malloc((strlen(main_taxonomy_dir_path) + strlen(taxonomy_name) + strlen(taxonomy_name) + 3)*sizeof(char));
if (sprintf(taxonomy_path, "%s/%s/%s", main_taxonomy_dir_path, taxonomy_name, taxonomy_name) < 0)
{
free(main_taxonomy_dir_path);
obi_close_taxonomy(tax);
return NULL;
}
free(main_taxonomy_dir_path);
// Read ranks
ranks_file_name = (char*) malloc(buffer_size*sizeof(char));
if (ranks_file_name == NULL)
{
free(taxonomy_path);
obi_close_taxonomy(tax);
return NULL;
}
if (snprintf(ranks_file_name, buffer_size, "%s.rdx", taxonomy_path) < 0)
{
free(taxonomy_path);
free(ranks_file_name);
obi_close_taxonomy(tax);
return NULL;
}
tax->ranks = read_rankidx(ranks_file_name);
if (tax->ranks == NULL)
{
free(ranks_file_name);
obi_close_taxonomy(tax);
return NULL;
}
free(ranks_file_name);
// Read taxa
taxa_file_name = (char*) malloc(buffer_size*sizeof(char));
if (taxa_file_name == NULL)
{
free(taxonomy_path);
obi_close_taxonomy(tax);
return NULL;
}
if (snprintf(taxa_file_name, buffer_size,"%s.tdx", taxonomy_path) < 0)
{
free(taxonomy_path);
free(taxa_file_name);
obi_close_taxonomy(tax);
return NULL;
}
local_taxa_file_name = (char*) malloc(buffer_size*sizeof(char));
if (local_taxa_file_name == NULL)
{
free(taxonomy_path);
free(taxa_file_name);
obi_close_taxonomy(tax);
return NULL;
}
if (snprintf(local_taxa_file_name, buffer_size,"%s.ldx", taxonomy_path) < 0)
{
free(taxonomy_path);
free(taxa_file_name);
free(local_taxa_file_name);
obi_close_taxonomy(tax);
return NULL;
}
tax->taxa = read_taxonomyidx(taxa_file_name, local_taxa_file_name);
if (tax->taxa == NULL)
{
free(taxonomy_path);
free(taxa_file_name);
free(local_taxa_file_name);
obi_close_taxonomy(tax);
return NULL;
}
free(taxa_file_name);
free(local_taxa_file_name);
// Read alternative names
if (read_alternative_names)
{
alter_names_file_name = (char*) malloc(buffer_size*sizeof(char));
if (alter_names_file_name == NULL)
{
free(taxonomy_path);
obi_close_taxonomy(tax);
return NULL;
}
if (snprintf(alter_names_file_name, buffer_size,"%s.ndx", taxonomy_path) < 0)
{
free(taxonomy_path);
free(alter_names_file_name);
obi_close_taxonomy(tax);
return NULL;
}
tax->names = read_nameidx(alter_names_file_name, tax);
if (tax->names == NULL)
{
free(alter_names_file_name);
obi_close_taxonomy(tax);
return NULL;
}
free(alter_names_file_name);
}
free(taxonomy_path);
return tax;
}
int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy)
{
if (taxonomy)
{
if (taxonomy->ranks)
free(taxonomy->ranks); // TODO those don't free everything but mapping will replace anyway
if (taxonomy->names)
free(taxonomy->names);
if (taxonomy->taxa)
free(taxonomy->taxa);
free(taxonomy);
return 0;
}
// TODO no closing files?
return 1;
}
//////////////////////////////////////////////////////////////////////////
ecotx_t* obi_taxo_get_parent_at_rank(ecotx_t* taxon, int32_t rankidx)
{
ecotx_t* current_taxon;
ecotx_t* next_taxon;
current_taxon = taxon;
next_taxon = current_taxon->parent;
while ((current_taxon != next_taxon) && // root node
(current_taxon->rank != rankidx))
{
current_taxon = next_taxon;
next_taxon = current_taxon->parent;
}
if (current_taxon->rank == rankidx)
return current_taxon;
else
return NULL;
}
ecotx_t* obi_taxo_get_taxon_with_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid)
{
ecotx_t *current_taxon;
int32_t count;
count = taxonomy->taxa->count;
current_taxon = (ecotx_t*) bsearch((const void *) ((size_t) taxid),
(const void *) taxonomy->taxa->taxon,
count,
sizeof(ecotx_t),
bcomptaxon);
return current_taxon;
}
bool obi_taxo_is_taxon_under_taxid(ecotx_t* taxon, int32_t other_taxid)
{
ecotx_t* next_parent;
next_parent = taxon->parent;
while ((other_taxid != next_parent->taxid) && (strcmp(next_parent->name, "root")))
next_parent = next_parent->parent;
if (other_taxid == next_parent->taxid)
return 1;
else
return 0;
}
ecotx_t* obi_taxo_get_species(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
{
static OBIDMS_taxonomy_p tax = NULL;
static int32_t rankindex = -1;
if (taxonomy && (tax != taxonomy))
{
rankindex = rank_index("species", taxonomy->ranks);
tax = taxonomy;
}
if (!tax || (rankindex < 0))
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError trying to get the species associated with a taxon: No taxonomy defined");
return NULL;
}
return obi_taxo_get_parent_at_rank(taxon, rankindex);
}
ecotx_t* obi_taxo_get_genus(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
{
static OBIDMS_taxonomy_p tax = NULL;
static int32_t rankindex = -1;
if (taxonomy && (tax != taxonomy))
{
rankindex = rank_index("genus", taxonomy->ranks);
tax = taxonomy;
}
if (!tax || (rankindex < 0))
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError trying to get the genus associated with a taxon: No taxonomy defined");
return NULL;
}
return obi_taxo_get_parent_at_rank(taxon, rankindex);
}
ecotx_t* obi_taxo_get_family(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
{
static OBIDMS_taxonomy_p tax = NULL;
static int32_t rankindex = -1;
if (taxonomy && (tax != taxonomy))
{
rankindex = rank_index("family", taxonomy->ranks);
tax = taxonomy;
}
if (!tax || (rankindex < 0))
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError trying to get the family associated with a taxon: No taxonomy defined");
return NULL;
}
return obi_taxo_get_parent_at_rank(taxon, rankindex);
}
ecotx_t* obi_taxo_get_kingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
{
static OBIDMS_taxonomy_p tax = NULL;
static int32_t rankindex = -1;
if (taxonomy && (tax != taxonomy))
{
rankindex = rank_index("kingdom", taxonomy->ranks);
tax = taxonomy;
}
if (!tax || (rankindex < 0))
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError trying to get the kingdom associated with a taxon: No taxonomy defined");
return NULL;
}
return obi_taxo_get_parent_at_rank(taxon, rankindex);
}
ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
{
static OBIDMS_taxonomy_p tax = NULL;
static int32_t rankindex = -1;
if (taxonomy && (tax != taxonomy))
{
rankindex = rank_index("superkingdom", taxonomy->ranks);
tax = taxonomy;
}
if (!tax || (rankindex < 0))
{
obi_set_errno(OBI_TAXONOMY_ERROR);
obidebug(1, "\nError trying to get the superkingdom associated with a taxon: No taxonomy defined");
return NULL;
}
return obi_taxo_get_parent_at_rank(taxon, rankindex);
}

101
src/obidms_taxonomy.h Normal file
View File

@ -0,0 +1,101 @@
/********************************************************************
* OBIDMS taxonomy headeer file *
********************************************************************/
/**
* @file obidms_taxonomy.h
* @author Celine Mercier (celine.mercier@metabarcoding.org)
* @date March 2nd 2016
* @brief Header file for the functions handling the reading of binary taxonomy files.
*/
#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>
#include "obidms.h"
typedef struct {
int32_t taxid;
int32_t rank;
int32_t parent;
int32_t name_length;
char name[1];
} ecotxformat_t;
typedef struct ecotxnode {
int32_t taxid;
int32_t rank;
int32_t farest;
struct ecotxnode* parent;
char* name;
} ecotx_t;
typedef struct {
int32_t count;
int32_t max_taxid;
int32_t buffer_size;
ecotx_t taxon[1];
} ecotxidx_t;
typedef struct {
int32_t count;
char* label[1];
} ecorankidx_t;
typedef struct {
int32_t is_scientific_name;
int32_t name_length;
int32_t class_length;
int32_t taxid;
char names[1];
} econameformat_t;
typedef struct {
char* name;
char* class_name;
int32_t is_scientific_name;
struct ecotxnode* taxon;
} econame_t;
typedef struct {
int32_t count;
econame_t names[1];
} econameidx_t;
typedef struct OBIDMS_taxonomy_t {
ecorankidx_t* ranks;
econameidx_t* names;
ecotxidx_t* taxa;
} OBIDMS_taxonomy_t, *OBIDMS_taxonomy_p;
OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, bool read_alternative_names);
int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy);
ecotx_t* obi_taxo_get_parent_at_rank(ecotx_t* taxon, int32_t rankidx);
ecotx_t* obi_taxo_get_taxon_with_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid);
bool obi_taxo_is_taxon_under_taxid(ecotx_t* taxon, int32_t other_taxid);
ecotx_t* obi_taxo_get_species(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
ecotx_t* obi_taxo_get_genus(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
ecotx_t* obi_taxo_get_family(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
ecotx_t* obi_taxo_get_kingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);

File diff suppressed because it is too large Load Diff

View File

@ -1,14 +1,15 @@
/****************************************************************************
* OBIDMS_column header file *
* OBIDMS columns header file *
****************************************************************************/
/**
* @file obidmscolumn.h
* @author Celine Mercier
* @author Celine Mercier (celine.mercier@metabarcoding.org)
* @date 12 May 2015
* @brief Header file for the shared elements of all the OBIDMS column structures.
* @brief Header file for the functions and structures shared by all the OBIDMS columns.
*/
#ifndef OBIDMSCOLUMN_H_
#define OBIDMSCOLUMN_H_
@ -24,63 +25,83 @@
#include "obierrno.h"
#include "obilittlebigman.h"
#include "obidmscolumndir.h"
#include "obiblob_indexer.h"
#define ELEMENTS_NAMES_MAX (2048)
#define GROWTH_FACTOR (2)
#define MAXIMUM_LINE_COUNT (1000000)
#define ELEMENTS_NAMES_MAX (2048) /**< The maximum length of the list of elements names.
*/
#define COLUMN_GROWTH_FACTOR (2) /**< The growth factor when a column is enlarged.
*/
#define MAXIMUM_LINE_COUNT (1000000000) /**< The maximum line count for the data of a column. //TODO
*/
#define FORMATTED_TIME_LENGTH (1024) /**< The length allocated for the character string containing a formatted date.
*/
#define COMMENTS_MAX_LENGTH (2048) /**< The maximum length for comments.
*/
typedef int32_t obiversion_t; /**< Used to store the column version number
*/
/**
* @brief OBIColumnHeader structure.
*/
/**
* @brief OBIDMS column header structure.
*/
typedef struct OBIDMS_column_header {
bool little_endian; /**< endianness of the column:
* - `true` on little endian platforms
* - `false` on big endian platforms
*
* @see obi_is_little_endian()
*/
int header_size; /**< size of the header in bytes */
size_t line_count; /**< number of lines of data */
size_t lines_used; /**< number of lines of data used */
size_t nb_elements_per_line; /**< number of elements per line (default : 1) */
char elements_names[ELEMENTS_NAMES_MAX+1]; /**< names of the line elements (default : "["column_name"]") */
OBIType_t data_type; /**< type of the data */
time_t creation_date; /**< date of creation of the file */
obiversion_t version; /**< version of the OBIColumn */
obiversion_t cloned_from; /**< version of the OBIColumn from which the column was cloned from (-1 if it does not come from cloning).*/
char name[OBIDMS_MAX_COLNAME+1]; /**< The column name as a NULL
* terminated string.
*/
char comments[1]; /**< comments stored as a classical
zero end C string.
The size of the comment is only limited
by the header size
*/
size_t header_size; /**< Size of the header in bytes.
*/
size_t data_size; /**< Size of the data in bytes.
*/
index_t line_count; /**< Number of lines of data allocated.
*/
index_t lines_used; /**< Number of lines of data used.
*/
index_t nb_elements_per_line; /**< Number of elements per line.
*/
char elements_names[ELEMENTS_NAMES_MAX+1]; /**< Names of the line elements with ';' as separator
* (should be the column name if one element per line).
*/
OBIType_t returned_data_type; /**< Type of the data that is returned when getting an
* element from the column.
*/
OBIType_t stored_data_type; /**< Type of the data that is actually stored in the data
* part of the column.
*/
time_t creation_date; /**< Date of creation of the file.
*/
obiversion_t version; /**< Version of the column.
*/
obiversion_t cloned_from; /**< Version of the column from which this column
* was cloned from (-1 if it was not created by cloning
* another column).
*/
char name[OBIDMS_COLUMN_MAX_NAME+1]; /**< The column name as a NULL terminated string.
*/
char indexer_name[INDEXER_MAX_NAME+1]; /**< If there is one, the indexer name as a NULL terminated string.
*/
char comments[COMMENTS_MAX_LENGTH+1]; /**< Comments stored as a classical zero end C string.
*/
} OBIDMS_column_header_t, *OBIDMS_column_header_p;
/**
* @brief Structure describing a Column of the OBITools DMS
* @brief OBIDMS column structure.
*
* A data structure of this type is returned by the functions
* creating, opening or cloning an OBIDMS_column.
* creating, opening or cloning an OBIDMS column.
*/
typedef struct OBIDMS_column {
OBIDMS_p dms; /**< A pointer to a DMS instance */
OBIDMS_column_directory_p column_directory; /**< A pointer to an OBIDMS column directory instance */
OBIDMS_column_header_p header; /**< A pointer to the header of the column */
void* data; /**< A `void` pointer to the beginning of the
* data.
OBIDMS_p dms; /**< A pointer to the OBIDMS structure to which the column belongs.
*/
OBIDMS_column_directory_p column_directory; /**< A pointer to the OBIDMS column directory structure to which the column belongs.
*/
OBIDMS_column_header_p header; /**< A pointer to the header of the column.
*/
Obi_indexer_p indexer; /**< A pointer to the blob indexer associated with the column if there is one.
*/
void* data; /**< A `void` pointer to the beginning of the data.
*
* @warning never use this member directly
* outside of the code of the
* low level functions
* of the OBITools DMS
* @warning Never use this member directly outside of the code of the
* low level functions of the OBIDMS.
*/
bool writable; /**< Indicates if the column is writable or not.
* - `true` the column is writable
@ -89,27 +110,36 @@ typedef struct OBIDMS_column {
* A column is writable only by its creator
* until it closes it.
*/
size_t counter; /**< Indicates by how many threads/programs (TODO) the column is used.
*/
} OBIDMS_column_t, *OBIDMS_column_p;
/**
* @brief Returns the latest version of a column in a column directory
* @brief Returns the latest version number of a column in a column directory using the column directory structure.
*
* @param column_directory
* @param column_directory A pointer as returned by obi_create_column_directory() or obi_open_column_directory().
*
* @return the latest version number kept in the version file
* @return -1 if an error occurred
* @returns The latest version number kept in the version file.
* @retval -1 if an error occurred.
*
* @since May 2015
* @author Eric Coissac (eric.coissac@metabarcoding.org)
*/
obiversion_t obi_get_latest_version_number(OBIDMS_column_directory_p column_directory);
/**
* @brief Returns the latest version of a column in a column directory
* @brief Returns the latest version of a column in a column directory using the column name.
*
* @param column_name
* @param dms A pointer on an OBIDMS.
* @param column_name The column name.
*
* @return the latest version number kept in the version file
* @return -1 if an error occurred
* @returns The latest version number kept in the version file.
* @retval -1 if an error occurred.
*
* @since May 2015
* @author Eric Coissac (eric.coissac@metabarcoding.org)
*/
obiversion_t obi_column_get_latest_version_from_name(OBIDMS_p dms, const char* column_name);
@ -118,9 +148,9 @@ obiversion_t obi_column_get_latest_version_from_name(OBIDMS_p dms, const char* c
* @brief Returns the header size in bytes of a column on this platform.
*
* The header size is defined as a multiple of the memory page size.
* Up to now the header size is defined as one time the page size.
* As of now the header size is defined as one time the page size.
*
* @return a `size_t` value corresponding to the header size in bytes
* @returns The header size in bytes.
*
* @since May 2015
* @author Eric Coissac (eric.coissac@metabarcoding.org)
@ -131,30 +161,46 @@ size_t obi_get_platform_header_size();
/**
* @brief Creates a column.
*
* @param dms a pointer on an OBIDMS
* @param column_name the name of the new column
* @param type the OBIType code used to create the column
* @param nb_lines the number of lines to be stored
* The minimum data size allocated is one memory page, and the data is initialized to the NA value of the OBIType.
* If there is an indexer associated with the column, it is opened or created if it does not already exist.
*
* @warning If there is one element per line, elements_names should be equal to column_name. // TODO change this condition?
*
* @param dms A pointer on an OBIDMS.
* @param column_name The name of the new column.
* @param data_type The OBIType code of the data.
* @param nb_lines The number of lines to be stored.
* @param nb_elements_per_line The number of elements per line. // TODO talk about default values
* @param elements_names The names of the elements with ';' as separator.
* @param indexer_name The name of the indexer if there is one associated with the column.
* @param comments Optional comments associated with the column.
*
* @returns A pointer on the newly created column structure.
* @retval NULL if an error occurred.
*
* @since May 2015
* @author Eric Coissac (eric.coissac@metabarcoding.org)
*/
OBIDMS_column_p obi_create_column(OBIDMS_p dms,
const char* column_name,
OBIType_t type,
size_t nb_lines,
size_t nb_elements_per_line,
const char* elements_names);
const char* column_name,
OBIType_t data_type,
index_t nb_lines,
index_t nb_elements_per_line,
const char* elements_names,
const char* indexer_name,
const char* comments
);
/**
* @brief Opens a column in read-only mode.
*
* @param dms a pointer on an OBIDMS
* @param column_name the name of the column
* @param version_number the version of the column that should be opened (if -1, the latest version number is retrieved)
* @param dms A pointer on an OBIDMS.
* @param column_name The name of the column.
* @param version_number The version of the column that should be opened (if -1, the latest version is retrieved).
*
* @return a pointer to the opened column
* @returns A pointer on the opened column structure.
* @retval NULL if an error occurred.
*
* @since July 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
@ -165,26 +211,27 @@ OBIDMS_column_p obi_open_column(OBIDMS_p dms, const char* column_name, obiversio
/**
* @brief Clones a column, and returns a pointer to the writable new column.
*
* @param dms a pointer on an OBIDMS
* @param column_name the name of the column to clone
* @param version_number the version of the column that should be cloned (if -1, the latest version number is retrieved)
* @param clone_data whether the data should be copied or not
* @param dms A pointer on an OBIDMS.
* @param column_name The name of the column to clone.
* @param version_number The version of the column that should be cloned (if -1, the latest version is retrieved).
* @param clone_data Whether the data should be copied or not.
*
* @return a pointer to the created column
* @returns A pointer to the created column.
* @retval NULL if an error occurred.
*
* @since August 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
OBIDMS_column_p obi_clone_column(OBIDMS_p dms, const char* column_name, obiversion_t version_number, bool clone_data);
OBIDMS_column_p obi_clone_column(OBIDMS_p dms, OBIDMS_column_p line_selection, const char* column_name, obiversion_t version_number, bool clone_data);
/**
* @brief Closes a column.
* @brief Truncates a column to the number of lines used if it is not read-only and closes it.
*
* @param column a pointer on an OBIDMS column
* @param column A pointer on an OBIDMS column.
*
* @return 0 if the operation was successfully completed
* @return -1 if an error occurred
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
*
* @since July 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
@ -193,26 +240,27 @@ int obi_close_column(OBIDMS_column_p column);
/**
* @brief Truncates a column file to the number of lines used.
* @brief Truncates a column file to the number of lines used rounded to the nearest
* greater multiple of the page size.
*
* @param column a pointer on an OBIDMS column
* @param column A pointer on an OBIDMS column.
*
* @return 0 if the operation was successfully completed
* @return -1 if an error occurred
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
*
* @since August 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_truncate_column_to_lines_used(OBIDMS_column_p column);
int obi_truncate_column(OBIDMS_column_p column);
/**
* @brief Enlarges a column file.
*
* @param column a pointer on an OBIDMS column
* @param column A pointer on an OBIDMS column.
*
* @return 0 if the operation was successfully completed
* @return -1 if an error occurred
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
*
* @since August 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
@ -220,149 +268,88 @@ int obi_truncate_column_to_lines_used(OBIDMS_column_p column);
int obi_enlarge_column(OBIDMS_column_p column);
/**
* @brief Truncates a column file to the number of lines used and closes it.
*
* @param column a pointer on an OBIDMS column
*
* @return 0 if the operation was successfully completed
* @return -1 if an error occurred
*
* @since August 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_truncate_and_close_column(OBIDMS_column_p column);
/*
* @brief Sets the data in a column to the NA value of the data type.
* @brief Sets the data in a column to the NA value of the data OBIType.
*
* @param column a pointer on an OBIDMS column
* @param start the first line number of the block that should be set
* @param nb_lines the number of lines that should be set
* @param column A pointer on an OBIDMS column.
* @param start The first line number of the block that should be set.
* @param nb_lines The number of lines that should be set.
*
* @since August 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
void obi_ini_to_NA_values(OBIDMS_column_p column, size_t start, size_t nb_lines);
void obi_ini_to_NA_values(OBIDMS_column_p column, index_t first_line_nb, index_t nb_lines); // TODO make private?
/**
* @brief Sets the 'writable' header attribute of an OBIDMS column to False.
* @brief Recovers the header of an OBIDMS column from the column name.
*
* @param column a pointer on an OBIDMS column
* @warning The header structure has to be munmapped by the caller.
*
* @param dms A pointer on an OBIDMS.
* @param column_name The name of an OBIDMS column.
* @param version_number The version of the column from which the header should be
* retrieved (-1: latest version).
*
* @returns A pointer on the mmapped header of the column.
* @retval NULL if an error occurred.
*
* @since October 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
OBIDMS_column_header_p obi_column_get_header_from_name(OBIDMS_p dms, const char* column_name, obiversion_t version_number);
/**
* @brief Munmap a mmapped header as returned by obi_column_get_header_from_name().
*
* @param header A pointer on the mmapped header structure.
*
* @retval 0 if the operation was successfully completed.
* @retval -1 if an error occurred.
*
* @since October 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_close_header(OBIDMS_column_header_p header);
/**
* @brief Recovers the index of an element in an OBIDMS column from the element's name.
*
* @param column A pointer on an OBIDMS column.
* @param element_name The name of the element.
*
* @returns The index of the element in a line of the column.
* @retval OBIIdx_NA if an error occurred. // TODO not sure if this is "clean".
*
* @since July 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
void obi_column_make_unwritable(OBIDMS_column_p column);
index_t obi_column_get_element_index_from_name(OBIDMS_column_p column, const char* element_name);
/**
* @brief Recovers the line count of an OBIDMS column.
// TODO doc
int obi_column_prepare_to_set_value(OBIDMS_column_p column, index_t line_nb);
// TODO doc
int obi_column_prepare_to_get_value(OBIDMS_column_p column, index_t line_nb);
/** TODO put in utils.c
* @brief Formats a date in a way that is easy to read.
*
* @param column a pointer on an OBIDMS column
* @warning The pointer returned must be freed by the caller.
*
* @return the line count of the column
* @param date A date.
*
* @since July 2015
* @returns The date formatted in a way that is easy to read.
*
* @since October 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
size_t obi_column_get_line_count(OBIDMS_column_p column);
/**
* @brief Recovers the number of lines used in an OBIDMS column.
*
* @param column a pointer on an OBIDMS column
*
* @return the number of lines used in the column
*
* @since August 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
size_t obi_column_get_nb_lines_used(OBIDMS_column_p column);
/**
* @brief Recovers the data type of an OBIDMS column.
*
* @param column a pointer on an OBIDMS column
*
* @return the data type of the column
*
* @since July 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
OBIType_t obi_column_get_data_type(OBIDMS_column_p column);
/**
* @brief Recovers the data type of an OBIDMS column from the column name.
*
* @param dms a pointer on an OBIDMS
* @param column_name the name of an OBIDMS column
*
* @return the data type of the column
*
* @since July 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
OBIType_t obi_column_get_data_type_from_name(OBIDMS_p dms, const char* column_name);
/**
* @brief Recovers the line count of an OBIDMS column from the column name.
*
* @param dms a pointer on an OBIDMS
* @param column_name the name of an OBIDMS column
*
* @return the line count of the column
*
* @since September 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
size_t obi_column_get_line_count_from_name(OBIDMS_p dms, const char* column_name);
/**
* @brief Recovers the elements names of an OBIDMS column.
*
* @param column a pointer on an OBIDMS column
*
* @return the elements names
*
* @since July 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
const char* obi_column_get_elements_names(OBIDMS_column_p column);
/**
* @brief Recovers the index of an element in an OBIDMS column from its name.
*
* @param column a pointer on an OBIDMS column
* @param element_name the name of the element
*
* @return the index of the element in a line of the column
*
* @since July 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
size_t obi_column_get_element_index_from_name(OBIDMS_column_p column, const char* element_name);
/**
* @brief Recovers the number of elements per line in an OBIDMS column.
*
* @param column a pointer on an OBIDMS column
*
* @return the number of elements per line
*
* @since July 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
size_t obi_column_get_nb_elements_per_line(OBIDMS_column_p column);
char* obi_column_format_date(time_t date);
#endif /* OBIDMSCOLUMN_H_ */

View File

@ -1,5 +1,5 @@
/****************************************************************************
* OBIDMS_column_bool functions *
* OBIDMS_column_bool functions *
****************************************************************************/
/**
@ -19,7 +19,7 @@
#include "obidebug.h"
#define DEBUG_LEVEL 0
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
/**********************************************************************
@ -28,27 +28,11 @@
*
**********************************************************************/
int obi_column_set_obibool_with_elt_idx(OBIDMS_column_p column, size_t line_nb, size_t element_idx, obibool_t value)
int obi_column_set_obibool_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, obibool_t value)
{
// Check that the line number is not greater than the maximum allowed
if (line_nb >= MAXIMUM_LINE_COUNT)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError trying to set a value at a line number greater than the maximum allowed");
return -1;
}
// Check if the file needs to be enlarged
while ((line_nb+1) > (column->header)->line_count)
{
// Enlarge the file
if (obi_enlarge_column(column) < 0)
return -1;
}
// Update lines used
if ((line_nb+1) > (column->header)->lines_used)
(column->header)->lines_used = line_nb+1;
if (obi_column_prepare_to_set_value(column, line_nb) < 0)
return -1;
// Set the value
*(((obibool_t*) (column->data)) + (line_nb * ((column->header)->nb_elements_per_line)) + element_idx) = value;
@ -57,67 +41,30 @@ int obi_column_set_obibool_with_elt_idx(OBIDMS_column_p column, size_t line_nb,
}
obibool_t obi_column_get_obibool_with_elt_idx(OBIDMS_column_p column, size_t line_nb, size_t element_idx)
obibool_t obi_column_get_obibool_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx)
{
if ((line_nb+1) > (column->header)->lines_used)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError trying to get a value that is beyond the current number of lines used");
if (obi_column_prepare_to_get_value(column, line_nb) < 0)
return OBIBool_NA;
}
return *(((obibool_t*) (column->data)) + (line_nb * ((column->header)->nb_elements_per_line)) + element_idx);
}
int obi_column_set_obibool_with_elt_name(OBIDMS_column_p column, size_t line_nb, char* element_name, obibool_t value)
int obi_column_set_obibool_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, obibool_t value)
{
size_t element_idx;
index_t element_idx = obi_column_get_element_index_from_name(column, element_name);
if (element_idx == OBIIdx_NA)
return -1;
if (!strcmp(element_name, "\0")) // element name is empty
{
if (obi_column_get_nb_elements_per_line(column) == 1) // check that there is only one element per line
element_idx = 0;
else // there is more than one element per line
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nAn element name must be specified");
return -1;
}
}
else
{
element_idx = obi_column_get_element_index_from_name(column, element_name);
if (element_idx == -1)
return -1;
}
obi_column_set_obibool_with_elt_idx(column, line_nb, element_idx, value);
return 0;
return obi_column_set_obibool_with_elt_idx(column, line_nb, element_idx, value);
}
obibool_t obi_column_get_obibool_with_elt_name(OBIDMS_column_p column, size_t line_nb, char* element_name)
obibool_t obi_column_get_obibool_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name)
{
size_t element_idx;
if (!strcmp(element_name, "\0")) // element name is empty
{
if (obi_column_get_nb_elements_per_line(column) == 1) // check that there is only one element per line
element_idx = 0;
else // there is more than one element per line
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nAn element name must be specified");
return OBIBool_NA;
}
}
else
{
element_idx = obi_column_get_element_index_from_name(column, element_name);
if (element_idx == -1)
return OBIBool_NA;
}
index_t element_idx = obi_column_get_element_index_from_name(column, element_name);
if (element_idx == OBIIdx_NA)
return OBIBool_NA;
return obi_column_get_obibool_with_elt_idx(column, line_nb, element_idx);
}

View File

@ -10,6 +10,10 @@
*/
#ifndef OBIDMSCOLUMN_BOOL_H_
#define OBIDMSCOLUMN_BOOL_H_
#include <stdlib.h>
#include <stdio.h>
@ -20,82 +24,76 @@
/**
* @brief Sets a value in an OBIDMS column containing data with the type OBI_BOOL, using the index of the element in the line.
*
* @param column a pointer as returned by obi_create_column()
* @warning Pointers returned by obi_open_column() don't allow writing.
*
* @param line_nb the number of the line where the value should be set
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
* @param line_nb The number of the line where the value should be set.
* @param element_idx The index of the element that should be set in the line.
* @param value The value that should be set.
*
* @param element_idx the index of the element that should be set in the line
*
* @param value the value that should be set
*
* @return an integer value indicating the success of the operation.
*
* @retvalue 0 on success
* @retvalue -1 on failure and the `obi_errno` variable is set.
* @returns An integer value indicating the success of the operation.
* @retval 0 on success.
* @retval -1 if an error occurred.
*
* @since July 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_column_set_obibool_with_elt_idx(OBIDMS_column_p column, size_t line_nb, size_t element_idx, obibool_t value);
int obi_column_set_obibool_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, obibool_t value);
/**
* @brief Recovers a value in an OBIDMS column containing data with the type OBI_BOOL.
*
* @param column a pointer as returned by obi_create_column()
* @param column A pointer as returned by obi_create_column().
* @param line_nb The number of the line where the value should be recovered.
* @param element_idx The index of the element that should be recovered in the line.
*
* @param line_nb the number of the line where the value should be recovered
*
* @param element_idx the index of the element that should be recovered in the line
*
* @return the recovered value
* @returns The recovered value.
* @retval OBIBool_NA the NA value of the type if an error occurred and obi_errno is set.
*
* @since July 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
obibool_t obi_column_get_obibool_with_elt_idx(OBIDMS_column_p column, size_t line_nb, size_t element_idx);
obibool_t obi_column_get_obibool_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx);
/**
* @brief Sets a value in an OBIDMS column containing data with the type OBI_BOOL, using the name of the element in the line.
* @brief Sets a value in an OBIDMS column containing data with the type OBI_BOOL,
* using the name of the element in the line.
*
* @param column a pointer as returned by obi_create_column()
* @warning Pointers returned by obi_open_column() don't allow writing.
*
* @param line_nb the number of the line where the value should be set
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
* @param line_nb The number of the line where the value should be set.
* @param element_name The name of the element that should be set in the line.
* @param value The value that should be set.
*
* @param element_name the name of the element that should be set in the line
* If empty, it is checked that there is only one element per line
*
* @param value the value that should be set
*
* @return an integer value indicating the success of the operation.
*
* @retvalue 0 on success
* @retvalue -1 on failure and the `obi_errno` variable is set.
* @returns An integer value indicating the success of the operation.
* @retval 0 on success.
* @retval -1 if an error occurred.
*
* @since August 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_column_set_obibool_with_elt_name(OBIDMS_column_p column, size_t line_nb, char* element_name, obibool_t value);
int obi_column_set_obibool_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, obibool_t value);
/**
* @brief Recovers a value in an OBIDMS column containing data with the type OBI_BOOL, using the name of the element in the line.
* @brief Recovers a value in an OBIDMS column containing data with the type OBI_BOOL,
* using the name of the element in the line.
*
* @param column a pointer as returned by obi_create_column()
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
* @param line_nb The number of the line where the value should be recovered.
* @param element_name The name of the element that should be recovered in the line.
*
* @param line_nb the number of the line where the value should be recovered
*
* @param element_name the name of the element that should be recovered in the line
* If empty, it is checked that there is only one element per line
*
* @return the recovered value
* @retvalue -1 on failure and the `obi_errno` variable is set. TODO an other value must be chosen
* @returns The recovered value.
* @retval OBIBool_NA the NA value of the type if an error occurred and obi_errno is set.
*
* @since August 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
obibool_t obi_column_get_obibool_with_elt_name(OBIDMS_column_p column, size_t line_nb, char* element_name);
obibool_t obi_column_get_obibool_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name);
#endif /* OBIDMSCOLUMN_BOOL_H_ */

View File

@ -19,7 +19,7 @@
#include "obidebug.h"
#define DEBUG_LEVEL 0
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
/**********************************************************************
@ -28,27 +28,11 @@
*
**********************************************************************/
int obi_column_set_obichar_with_elt_idx(OBIDMS_column_p column, size_t line_nb, size_t element_idx, obichar_t value)
int obi_column_set_obichar_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, obichar_t value)
{
// Check that the line number is not greater than the maximum allowed
if (line_nb >= MAXIMUM_LINE_COUNT)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError trying to set a value at a line number greater than the maximum allowed");
return -1;
}
// Check if the file needs to be enlarged
while ((line_nb+1) > (column->header)->line_count)
{
// Enlarge the file
if (obi_enlarge_column(column) < 0)
return -1;
}
// Update lines used
if ((line_nb+1) > (column->header)->lines_used)
(column->header)->lines_used = line_nb+1;
if (obi_column_prepare_to_set_value(column, line_nb) < 0)
return -1;
// Set the value
*(((obichar_t*) (column->data)) + (line_nb * ((column->header)->nb_elements_per_line)) + element_idx) = value;
@ -57,69 +41,30 @@ int obi_column_set_obichar_with_elt_idx(OBIDMS_column_p column, size_t line_nb,
}
obichar_t obi_column_get_obichar_with_elt_idx(OBIDMS_column_p column, size_t line_nb, size_t element_idx)
obichar_t obi_column_get_obichar_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx)
{
if ((line_nb+1) > (column->header)->lines_used)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError trying to get a value that is beyond the current number of lines used");
if (obi_column_prepare_to_get_value(column, line_nb) < 0)
return OBIChar_NA;
}
return *(((obichar_t*) (column->data)) + (line_nb * ((column->header)->nb_elements_per_line)) + element_idx);
}
int obi_column_set_obichar_with_elt_name(OBIDMS_column_p column, size_t line_nb, char* element_name, obichar_t value)
int obi_column_set_obichar_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, obichar_t value)
{
size_t element_idx;
index_t element_idx = obi_column_get_element_index_from_name(column, element_name);
if (element_idx == OBIIdx_NA)
return -1;
if (!strcmp(element_name, "\0")) // element name is empty
{
if (obi_column_get_nb_elements_per_line(column) == 1) // check that there is only one element per line
element_idx = 0;
else // there is more than one element per line
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nAn element name must be specified");
return -1;
}
}
else
{
element_idx = obi_column_get_element_index_from_name(column, element_name);
if (element_idx == -1)
return -1;
}
obi_column_set_obichar_with_elt_idx(column, line_nb, element_idx, value);
return 0;
return obi_column_set_obichar_with_elt_idx(column, line_nb, element_idx, value);
}
obichar_t obi_column_get_obichar_with_elt_name(OBIDMS_column_p column, size_t line_nb, char* element_name)
obichar_t obi_column_get_obichar_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name)
{
size_t element_idx;
if (!strcmp(element_name, "\0")) // element name is empty
{
if (obi_column_get_nb_elements_per_line(column) == 1) // check that there is only one element per line
element_idx = 0;
else // there is more than one element per line
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nAn element name must be specified");
return OBIChar_NA;
}
}
else
{
element_idx = obi_column_get_element_index_from_name(column, element_name);
if (element_idx == -1)
return OBIChar_NA;
}
index_t element_idx = obi_column_get_element_index_from_name(column, element_name);
if (element_idx == OBIIdx_NA)
return OBIChar_NA;
return obi_column_get_obichar_with_elt_idx(column, line_nb, element_idx);
}

View File

@ -10,6 +10,10 @@
*/
#ifndef OBIDMSCOLUMN_CHAR_H_
#define OBIDMSCOLUMN_CHAR_H_
#include <stdlib.h>
#include <stdio.h>
@ -20,82 +24,76 @@
/**
* @brief Sets a value in an OBIDMS column containing data with the type OBI_CHAR, using the index of the element in the line.
*
* @param column a pointer as returned by obi_create_column()
* @warning Pointers returned by obi_open_column() don't allow writing.
*
* @param line_nb the number of the line where the value should be set
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
* @param line_nb The number of the line where the value should be set.
* @param element_idx The index of the element that should be set in the line.
* @param value The value that should be set.
*
* @param element_idx the index of the element that should be set in the line
*
* @param value the value that should be set
*
* @return an integer value indicating the success of the operation.
*
* @retvalue 0 on success
* @retvalue -1 on failure and the `obi_errno` variable is set.
* @returns An integer value indicating the success of the operation.
* @retval 0 on success.
* @retval -1 if an error occurred.
*
* @since July 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_column_set_obichar_with_elt_idx(OBIDMS_column_p column, size_t line_nb, size_t element_idx, obichar_t* value);
int obi_column_set_obichar_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, obichar_t value);
/**
* @brief Recovers a value in an OBIDMS column containing data with the type OBI_CHAR.
*
* @param column a pointer as returned by obi_create_column()
* @param column A pointer as returned by obi_create_column().
* @param line_nb The number of the line where the value should be recovered.
* @param element_idx The index of the element that should be recovered in the line.
*
* @param line_nb the number of the line where the value should be recovered
*
* @param element_idx the index of the element that should be recovered in the line
*
* @return the recovered value
* @returns The recovered value.
* @retval OBIChar_NA the NA value of the type if an error occurred and obi_errno is set.
*
* @since July 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
obichar_t* obi_column_get_obichar_with_elt_idx(OBIDMS_column_p column, size_t line_nb, size_t element_idx);
obichar_t obi_column_get_obichar_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx);
/**
* @brief Sets a value in an OBIDMS column containing data with the type OBI_CHAR, using the name of the element in the line.
* @brief Sets a value in an OBIDMS column containing data with the type OBI_CHAR,
* using the name of the element in the line.
*
* @param column a pointer as returned by obi_create_column()
* @warning Pointers returned by obi_open_column() don't allow writing.
*
* @param line_nb the number of the line where the value should be set
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
* @param line_nb The number of the line where the value should be set.
* @param element_name The name of the element that should be set in the line.
* @param value The value that should be set.
*
* @param element_name the name of the element that should be set in the line
* If empty, it is checked that there is only one element per line
*
* @param value the value that should be set
*
* @return an integer value indicating the success of the operation.
*
* @retvalue 0 on success
* @retvalue -1 on failure and the `obi_errno` variable is set.
* @returns An integer value indicating the success of the operation.
* @retval 0 on success.
* @retval -1 if an error occurred.
*
* @since August 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_column_set_obichar_with_elt_name(OBIDMS_column_p column, size_t line_nb, char* element_name, obichar_t value);
int obi_column_set_obichar_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, obichar_t value);
/**
* @brief Recovers a value in an OBIDMS column containing data with the type OBI_CHAR, using the name of the element in the line.
* @brief Recovers a value in an OBIDMS column containing data with the type OBI_CHAR,
* using the name of the element in the line.
*
* @param column a pointer as returned by obi_create_column()
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
* @param line_nb The number of the line where the value should be recovered.
* @param element_name The name of the element that should be recovered in the line.
*
* @param line_nb the number of the line where the value should be recovered
*
* @param element_name the name of the element that should be recovered in the line
* If empty, it is checked that there is only one element per line
*
* @return the recovered value
* @retvalue NULL on failure and the `obi_errno` variable is set. TODO an other value should be chosen maybe
* @returns The recovered value.
* @retval OBIChar_NA the NA value of the type if an error occurred and obi_errno is set.
*
* @since August 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
obichar_t obi_column_get_obichar_with_elt_name(OBIDMS_column_p column, size_t line_nb, char* element_name);
obichar_t obi_column_get_obichar_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name);
#endif /* OBIDMSCOLUMN_CHAR_H_ */

View File

@ -19,7 +19,7 @@
#include "obidebug.h"
#define DEBUG_LEVEL 0
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
/**********************************************************************
@ -28,27 +28,10 @@
*
**********************************************************************/
int obi_column_set_obifloat_with_elt_idx(OBIDMS_column_p column, size_t line_nb, size_t element_idx, obifloat_t value)
int obi_column_set_obifloat_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, obifloat_t value)
{
// Check that the line number is not greater than the maximum allowed
if (line_nb >= MAXIMUM_LINE_COUNT)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError trying to set a value at a line number greater than the maximum allowed");
return -1;
}
// Check if the file needs to be enlarged
while ((line_nb+1) > (column->header)->line_count)
{
// Enlarge the file
if (obi_enlarge_column(column) < 0)
return -1;
}
// Update lines used
if ((line_nb+1) > (column->header)->lines_used)
(column->header)->lines_used = line_nb+1;
if (obi_column_prepare_to_set_value(column, line_nb) < 0)
return -1;
// Set the value
*(((obifloat_t*) (column->data)) + (line_nb * ((column->header)->nb_elements_per_line)) + element_idx) = value;
@ -57,67 +40,30 @@ int obi_column_set_obifloat_with_elt_idx(OBIDMS_column_p column, size_t line_nb,
}
obifloat_t obi_column_get_obifloat_with_elt_idx(OBIDMS_column_p column, size_t line_nb, size_t element_idx)
obifloat_t obi_column_get_obifloat_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx)
{
if ((line_nb+1) > (column->header)->lines_used)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError trying to get a value that is beyond the current number of lines used");
if (obi_column_prepare_to_get_value(column, line_nb) < 0)
return OBIFloat_NA;
}
return *(((obifloat_t*) (column->data)) + (line_nb * ((column->header)->nb_elements_per_line)) + element_idx);
}
int obi_column_set_obifloat_with_elt_name(OBIDMS_column_p column, size_t line_nb, char* element_name, obifloat_t value)
int obi_column_set_obifloat_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, obifloat_t value)
{
size_t element_idx;
index_t element_idx = obi_column_get_element_index_from_name(column, element_name);
if (element_idx == OBIIdx_NA)
return -1;
if (!strcmp(element_name, "\0")) // element name is empty
{
if (obi_column_get_nb_elements_per_line(column) == 1) // check that there is only one element per line
element_idx = 0;
else // there is more than one element per line
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nAn element name must be specified");
return -1;
}
}
else
{
element_idx = obi_column_get_element_index_from_name(column, element_name);
if (element_idx == -1)
return -1;
}
obi_column_set_obifloat_with_elt_idx(column, line_nb, element_idx, value);
return 0;
return obi_column_set_obifloat_with_elt_idx(column, line_nb, element_idx, value);
}
obifloat_t obi_column_get_obifloat_with_elt_name(OBIDMS_column_p column, size_t line_nb, char* element_name)
obifloat_t obi_column_get_obifloat_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name)
{
size_t element_idx;
if (!strcmp(element_name, "\0")) // element name is empty
{
if (obi_column_get_nb_elements_per_line(column) == 1) // check that there is only one element per line
element_idx = 0;
else // there is more than one element per line
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nAn element name must be specified");
return OBIFloat_NA;
}
}
else
{
element_idx = obi_column_get_element_index_from_name(column, element_name);
if (element_idx == -1)
return OBIFloat_NA;
}
index_t element_idx = obi_column_get_element_index_from_name(column, element_name);
if (element_idx == OBIIdx_NA)
return OBIFloat_NA;
return obi_column_get_obifloat_with_elt_idx(column, line_nb, element_idx);
}

View File

@ -10,6 +10,10 @@
*/
#ifndef OBIDMSCOLUMN_FLOAT_H_
#define OBIDMSCOLUMN_FLOAT_H_
#include <stdlib.h>
#include <stdio.h>
@ -20,82 +24,76 @@
/**
* @brief Sets a value in an OBIDMS column containing data with the type OBI_FLOAT, using the index of the element in the line.
*
* @param column a pointer as returned by obi_create_column()
* @warning Pointers returned by obi_open_column() don't allow writing.
*
* @param line_nb the number of the line where the value should be set
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
* @param line_nb The number of the line where the value should be set.
* @param element_idx The index of the element that should be set in the line.
* @param value The value that should be set.
*
* @param element_idx the index of the element that should be set in the line
*
* @param value the value that should be set
*
* @return an integer value indicating the success of the operation.
*
* @retvalue 0 on success
* @retvalue -1 on failure and the `obi_errno` variable is set.
* @returns An integer value indicating the success of the operation.
* @retval 0 on success.
* @retval -1 if an error occurred.
*
* @since July 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_column_set_obifloat_with_elt_idx(OBIDMS_column_p column, size_t line_nb, size_t element_idx, obifloat_t value);
int obi_column_set_obifloat_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, obifloat_t value);
/**
* @brief Recovers a value in an OBIDMS column containing data with the type OBI_FLOAT.
*
* @param column a pointer as returned by obi_create_column()
* @param column A pointer as returned by obi_create_column().
* @param line_nb The number of the line where the value should be recovered.
* @param element_idx The index of the element that should be recovered in the line.
*
* @param line_nb the number of the line where the value should be recovered
*
* @param element_idx the index of the element that should be recovered in the line
*
* @return the recovered value
* @returns The recovered value.
* @retval OBIFloat_NA the NA value of the type if an error occurred and obi_errno is set.
*
* @since July 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
obifloat_t obi_column_get_obifloat_with_elt_idx(OBIDMS_column_p column, size_t line_nb, size_t element_idx);
obifloat_t obi_column_get_obifloat_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx);
/**
* @brief Sets a value in an OBIDMS column containing data with the type OBI_FLOAT, using the name of the element in the line.
* @brief Sets a value in an OBIDMS column containing data with the type OBI_FLOAT,
* using the name of the element in the line.
*
* @param column a pointer as returned by obi_create_column()
* @warning Pointers returned by obi_open_column() don't allow writing.
*
* @param line_nb the number of the line where the value should be set
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
* @param line_nb The number of the line where the value should be set.
* @param element_name The name of the element that should be set in the line.
* @param value The value that should be set.
*
* @param element_name the name of the element that should be set in the line
* If empty, it is checked that there is only one element per line
*
* @param value the value that should be set
*
* @return an integer value indicating the success of the operation.
*
* @retvalue 0 on success
* @retvalue -1 on failure and the `obi_errno` variable is set.
* @returns An integer value indicating the success of the operation.
* @retval 0 on success.
* @retval -1 if an error occurred.
*
* @since August 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_column_set_obifloat_with_elt_name(OBIDMS_column_p column, size_t line_nb, char* element_name, obifloat_t value);
int obi_column_set_obifloat_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, obifloat_t value);
/**
* @brief Recovers a value in an OBIDMS column containing data with the type OBI_FLOAT, using the name of the element in the line.
* @brief Recovers a value in an OBIDMS column containing data with the type OBI_FLOAT,
* using the name of the element in the line.
*
* @param column a pointer as returned by obi_create_column()
* @param column A pointer as returned by obi_create_column() or obi_clone_column().
* @param line_nb The number of the line where the value should be recovered.
* @param element_name The name of the element that should be recovered in the line.
*
* @param line_nb the number of the line where the value should be recovered
*
* @param element_name the name of the element that should be recovered in the line
* If empty, it is checked that there is only one element per line
*
* @return the recovered value
* @retvalue -1 on failure and the `obi_errno` variable is set. TODO an other value must be chosen
* @returns The recovered value.
* @retval OBIFloat_NA the NA value of the type if an error occurred and obi_errno is set.
*
* @since August 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
obifloat_t obi_column_get_obifloat_with_elt_name(OBIDMS_column_p column, size_t line_nb, char* element_name);
obifloat_t obi_column_get_obifloat_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name);
#endif /* OBIDMSCOLUMN_FLOAT_H_ */

View File

@ -5,8 +5,8 @@
/**
* @file obidsmcolumn_idx.c
* @author Celine Mercier
* @date August 10th 2015
* @brief Functions handling OBIColumns containing data with the OBIType OBI_IDX.
* @date February 14th 2016
* @brief Functions handling OBIColumns containing data with the index_t type.
*/
@ -19,7 +19,7 @@
#include "obidebug.h"
#define DEBUG_LEVEL 0
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
/**********************************************************************
@ -28,97 +28,24 @@
*
**********************************************************************/
int obi_column_set_obiidx_with_elt_idx(OBIDMS_column_p column, size_t line_nb, size_t element_idx, obiidx_t value)
int obi_column_set_index(OBIDMS_column_p column, index_t line_nb, index_t value)
{
// Check that the line number is not greater than the maximum allowed
if (line_nb >= MAXIMUM_LINE_COUNT)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError trying to set a value at a line number greater than the maximum allowed");
return -1;
}
// Check if the file needs to be enlarged
while ((line_nb+1) > (column->header)->line_count)
{
// Enlarge the file
if (obi_enlarge_column(column) < 0)
return -1;
}
// Update lines used
if ((line_nb+1) > (column->header)->lines_used)
(column->header)->lines_used = line_nb+1;
if (obi_column_prepare_to_set_value(column, line_nb) < 0)
return -1;
// Set the value
*(((obiidx_t*) (column->data)) + (line_nb * ((column->header)->nb_elements_per_line)) + element_idx) = value;
*(((index_t*) (column->data)) + line_nb) = value;
return 0;
}
obiidx_t obi_column_get_obiidx_with_elt_idx(OBIDMS_column_p column, size_t line_nb, size_t element_idx)
index_t obi_column_get_index(OBIDMS_column_p column, index_t line_nb)
{
if ((line_nb+1) > (column->header)->lines_used)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError trying to get a value that is beyond the current number of lines used");
if (obi_column_prepare_to_get_value(column, line_nb) < 0)
return OBIIdx_NA;
}
return *(((obiidx_t*) (column->data)) + (line_nb * ((column->header)->nb_elements_per_line)) + element_idx);
}
int obi_column_set_obiidx_with_elt_name(OBIDMS_column_p column, size_t line_nb, char* element_name, obiidx_t value)
{
size_t element_idx;
if (!strcmp(element_name, "\0")) // element name is empty
{
if (obi_column_get_nb_elements_per_line(column) == 1) // check that there is only one element per line
element_idx = 0;
else // there is more than one element per line
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nAn element name must be specified");
return -1;
}
}
else
{
element_idx = obi_column_get_element_index_from_name(column, element_name);
if (element_idx == -1)
return -1;
}
obi_column_set_obiidx_with_elt_idx(column, line_nb, element_idx, value);
return 0;
}
obiidx_t obi_column_get_obiidx_with_elt_name(OBIDMS_column_p column, size_t line_nb, char* element_name)
{
size_t element_idx;
if (!strcmp(element_name, "\0")) // element name is empty
{
if (obi_column_get_nb_elements_per_line(column) == 1) // check that there is only one element per line
element_idx = 0;
else // there is more than one element per line
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nAn element name must be specified");
return OBIIdx_NA;
}
}
else
{
element_idx = obi_column_get_element_index_from_name(column, element_name);
if (element_idx == -1)
return OBIIdx_NA;
}
return obi_column_get_obiidx_with_elt_idx(column, line_nb, element_idx);
return *(((index_t*) (column->data)) + line_nb);
}

View File

@ -5,97 +5,27 @@
/**
* @file obidsmcolumn_idx.h
* @author Celine Mercier
* @date August 10th 2015
* @date February 14th 2016
* @brief Header file for the functions handling OBIColumns containing data with the OBIType OBI_IDX.
*/
#ifndef OBIDMSCOLUMN_IDX_H_
#define OBIDMSCOLUMN_IDX_H_
#include <stdlib.h>
#include <stdio.h>
#include "obidmscolumn.h"
#include "obitypes.h"
// TODO doc
/**
* @brief Sets a value in an OBIDMS column containing data with the type OBI_IDX, using the index of the element in the line.
*
* @param column a pointer as returned by obi_create_column()
* @warning Pointers returned by obi_open_column() don't allow writing.
*
* @param line_nb the number of the line where the value should be set
*
* @param element_idx the index of the element that should be set in the line
*
* @param value the value that should be set
*
* @return an integer value indicating the success of the operation.
*
* @retvalue 0 on success
* @retvalue -1 on failure and the `obi_errno` variable is set.
*
* @since July 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_column_set_obiidx_with_elt_idx(OBIDMS_column_p column, size_t line_nb, size_t element_idx, obiidx_t value);
int obi_column_set_index(OBIDMS_column_p column, index_t line_nb, index_t value);
index_t obi_column_get_index(OBIDMS_column_p column, index_t line_nb);
/**
* @brief Recovers a value in an OBIDMS column containing data with the type OBI_IDX.
*
* @param column a pointer as returned by obi_create_column()
*
* @param line_nb the number of the line where the value should be recovered
*
* @param element_idx the index of the element that should be recovered in the line
*
* @return the recovered value
*
* @since July 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
obiidx_t obi_column_get_obiidx_with_elt_idx(OBIDMS_column_p column, size_t line_nb, size_t element_idx);
/**
* @brief Sets a value in an OBIDMS column containing data with the type OBI_IDX, using the name of the element in the line.
*
* @param column a pointer as returned by obi_create_column()
* @warning Pointers returned by obi_open_column() don't allow writing.
*
* @param line_nb the number of the line where the value should be set
*
* @param element_name the name of the element that should be set in the line
* If empty, it is checked that there is only one element per line
*
* @param value the value that should be set
*
* @return an integer value indicating the success of the operation.
*
* @retvalue 0 on success
* @retvalue -1 on failure and the `obi_errno` variable is set.
*
* @since August 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
int obi_column_set_obiidx_with_elt_name(OBIDMS_column_p column, size_t line_nb, char* element_name, obiidx_t value);
/**
* @brief Recovers a value in an OBIDMS column containing data with the type OBI_IDX, using the name of the element in the line.
*
* @param column a pointer as returned by obi_create_column()
*
* @param line_nb the number of the line where the value should be recovered
*
* @param element_name the name of the element that should be recovered in the line
* If empty, it is checked that there is only one element per line
*
* @return the recovered value
* @retvalue -1 on failure and the `obi_errno` variable is set. TODO an other value must be chosen
*
* @since August 2015
* @author Celine Mercier (celine.mercier@metabarcoding.org)
*/
obiidx_t obi_column_get_obiidx_with_elt_name(OBIDMS_column_p column, size_t line_nb, char* element_name);
#endif /* OBIDMSCOLUMN_IDX_H_ */

View File

@ -19,7 +19,7 @@
#include "obidebug.h"
#define DEBUG_LEVEL 0
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
/**********************************************************************
@ -28,27 +28,10 @@
*
**********************************************************************/
int obi_column_set_obiint_with_elt_idx(OBIDMS_column_p column, size_t line_nb, size_t element_idx, obiint_t value)
int obi_column_set_obiint_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx, obiint_t value)
{
// Check that the line number is not greater than the maximum allowed
if (line_nb >= MAXIMUM_LINE_COUNT)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError trying to set a value at a line number greater than the maximum allowed");
return -1;
}
// Check if the file needs to be enlarged
while ((line_nb+1) > (column->header)->line_count)
{
// Enlarge the file
if (obi_enlarge_column(column) < 0)
return -1;
}
// Update lines used
if ((line_nb+1) > (column->header)->lines_used)
(column->header)->lines_used = line_nb+1;
if (obi_column_prepare_to_set_value(column, line_nb) < 0)
return -1;
// Set the value
*(((obiint_t*) (column->data)) + (line_nb * ((column->header)->nb_elements_per_line)) + element_idx) = value;
@ -57,67 +40,30 @@ int obi_column_set_obiint_with_elt_idx(OBIDMS_column_p column, size_t line_nb, s
}
obiint_t obi_column_get_obiint_with_elt_idx(OBIDMS_column_p column, size_t line_nb, size_t element_idx)
obiint_t obi_column_get_obiint_with_elt_idx(OBIDMS_column_p column, index_t line_nb, index_t element_idx)
{
if ((line_nb+1) > (column->header)->lines_used)
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nError trying to get a value that is beyond the current number of lines used");
if (obi_column_prepare_to_get_value(column, line_nb) < 0)
return OBIInt_NA;
}
return *(((obiint_t*) (column->data)) + (line_nb * ((column->header)->nb_elements_per_line)) + element_idx);
}
int obi_column_set_obiint_with_elt_name(OBIDMS_column_p column, size_t line_nb, char* element_name, obiint_t value)
int obi_column_set_obiint_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name, obiint_t value)
{
size_t element_idx;
index_t element_idx = obi_column_get_element_index_from_name(column, element_name);
if (element_idx == OBIIdx_NA)
return -1;
if (!strcmp(element_name, "\0")) // element name is empty
{
if (obi_column_get_nb_elements_per_line(column) == 1) // check that there is only one element per line
element_idx = 0;
else // there is more than one element per line
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nAn element name must be specified");
return -1;
}
}
else
{
element_idx = obi_column_get_element_index_from_name(column, element_name);
if (element_idx == -1)
return -1;
}
obi_column_set_obiint_with_elt_idx(column, line_nb, element_idx, value);
return 0;
return obi_column_set_obiint_with_elt_idx(column, line_nb, element_idx, value);
}
obiint_t obi_column_get_obiint_with_elt_name(OBIDMS_column_p column, size_t line_nb, char* element_name)
obiint_t obi_column_get_obiint_with_elt_name(OBIDMS_column_p column, index_t line_nb, const char* element_name)
{
size_t element_idx;
if (!strcmp(element_name, "\0")) // element name is empty
{
if (obi_column_get_nb_elements_per_line(column) == 1) // check that there is only one element per line
element_idx = 0;
else // there is more than one element per line
{
obi_set_errno(OBICOL_UNKNOWN_ERROR);
obidebug(1, "\nAn element name must be specified");
return OBIInt_NA;
}
}
else
{
element_idx = obi_column_get_element_index_from_name(column, element_name);
if (element_idx == -1)
return OBIInt_NA;
}
index_t element_idx = obi_column_get_element_index_from_name(column, element_name);
if (element_idx == OBIIdx_NA)
return OBIInt_NA;
return obi_column_get_obiint_with_elt_idx(column, line_nb, element_idx);
}

Some files were not shown because too many files have changed in this diff Show More