Compare commits
139 Commits
cython_bug
...
luke_tests
Author | SHA1 | Date | |
---|---|---|---|
0eca86107e | |||
0de953a3ef | |||
f3b20b809d | |||
d159b921eb | |||
4e4cf46b16 | |||
6b61533650 | |||
419885485b | |||
0c8504b6db | |||
654c34a1a6 | |||
a6c8d35491 | |||
366264828e | |||
d3a6ff6043 | |||
5ca84b91dc | |||
87935c6678 | |||
92980508c0 | |||
65880db422 | |||
767d9c7804 | |||
2566377e2a | |||
1fbbdd43f9 | |||
8cdfbb379e | |||
0a55e26520 | |||
68a8509c12 | |||
5f98d2ed5c | |||
ef1be141c1 | |||
bbfd40d56d | |||
5d08da46a2 | |||
66045acf1d | |||
6977c4315c | |||
839b3000a8 | |||
ffa4557928 | |||
003cd11362 | |||
c87227b65a | |||
c07e75f2ac | |||
6b394a5cf7 | |||
2416b8ccd8 | |||
b9921e111d | |||
8f5aa8841d | |||
900d67de87 | |||
22e3c3eeed | |||
4ead37ee48 | |||
bce360bbd5 | |||
2a68cb26f8 | |||
043e70ff49 | |||
66021367f6 | |||
e69f44ae3d | |||
1941a3785e | |||
c7b8db6a2e | |||
1dc4a3be49 | |||
09597016fd | |||
1a2fa0923c | |||
00f2f2cc51 | |||
7a88ca619a | |||
eddd19a245 | |||
2aafecc3b5 | |||
094b2371e9 | |||
c1034d300d | |||
02d67c257f | |||
e04ea85d1e | |||
527d3555f0 | |||
71492ad229 | |||
73d64e5aff | |||
4cb52e1632 | |||
9d042f7bd0 | |||
5ec2d8842e | |||
04c9470f7d | |||
be05c889e2 | |||
04e3a7b5a9 | |||
d8107533d8 | |||
cd4e65e190 | |||
375bfcce8a | |||
c225cfd8b6 | |||
6fe4c6134a | |||
966b1325ed | |||
019dfc01b4 | |||
45c9c5075c | |||
20b97c972b | |||
efc4a4a3c6 | |||
ce6ea89c21 | |||
4207db7c17 | |||
1cd35b3359 | |||
f51a6df5b2 | |||
94417e1330 | |||
2e17dbce55 | |||
a9eed1f5d9 | |||
2dfab3f378 | |||
e583098a96 | |||
b926ca3997 | |||
aacfefad26 | |||
edc4fd7b3e | |||
ff6c27acf2 | |||
69856f18dd | |||
2c084c8cf7 | |||
58ac860cc7 | |||
d44117d625 | |||
6bd42132c4 | |||
4085904362 | |||
b04b4b5902 | |||
383e738ab7 | |||
3681cecb4d | |||
545ed8111a | |||
86071d30c9 | |||
21d1b2ed3e | |||
6157633137 | |||
a08def47e6 | |||
fc5a12bad7 | |||
e323d8e702 | |||
b350ea0393 | |||
8e9e21a02e | |||
4df313c54a | |||
ffc68d448f | |||
a8f03248a8 | |||
cfaf069095 | |||
a6144eabe2 | |||
c139367555 | |||
1586956d57 | |||
b45b496b0e | |||
2cf10cb6f0 | |||
5a5516303d | |||
d6a99bafea | |||
08f2657e18 | |||
6aa2f92930 | |||
87044b41d8 | |||
6ab1c83302 | |||
e371248567 | |||
dbf9463238 | |||
eb12af4da4 | |||
e8417b4f6f | |||
6579566c6e | |||
410e2e02a0 | |||
8ce4f264aa | |||
d885eb48ff | |||
661fe3606a | |||
c4b7e579cf | |||
f6ec8ba963 | |||
0e3d6ed2d7 | |||
01bfc14503 | |||
65c1b1e8b2 | |||
b37bd8f21c | |||
05e3956a0c |
518
LICENSE
Normal file
518
LICENSE
Normal file
@ -0,0 +1,518 @@
|
||||
|
||||
CeCILL FREE SOFTWARE LICENSE AGREEMENT
|
||||
|
||||
Version 2.1 dated 2013-06-21
|
||||
|
||||
|
||||
Notice
|
||||
|
||||
This Agreement is a Free Software license agreement that is the result
|
||||
of discussions between its authors in order to ensure compliance with
|
||||
the two main principles guiding its drafting:
|
||||
|
||||
* firstly, compliance with the principles governing the distribution
|
||||
of Free Software: access to source code, broad rights granted to users,
|
||||
* secondly, the election of a governing law, French law, with which it
|
||||
is conformant, both as regards the law of torts and intellectual
|
||||
property law, and the protection that it offers to both authors and
|
||||
holders of the economic rights over software.
|
||||
|
||||
The authors of the CeCILL (for Ce[a] C[nrs] I[nria] L[ogiciel] L[ibre])
|
||||
license are:
|
||||
|
||||
Commissariat à l'énergie atomique et aux énergies alternatives - CEA, a
|
||||
public scientific, technical and industrial research establishment,
|
||||
having its principal place of business at 25 rue Leblanc, immeuble Le
|
||||
Ponant D, 75015 Paris, France.
|
||||
|
||||
Centre National de la Recherche Scientifique - CNRS, a public scientific
|
||||
and technological establishment, having its principal place of business
|
||||
at 3 rue Michel-Ange, 75794 Paris cedex 16, France.
|
||||
|
||||
Institut National de Recherche en Informatique et en Automatique -
|
||||
Inria, a public scientific and technological establishment, having its
|
||||
principal place of business at Domaine de Voluceau, Rocquencourt, BP
|
||||
105, 78153 Le Chesnay cedex, France.
|
||||
|
||||
|
||||
Preamble
|
||||
|
||||
The purpose of this Free Software license agreement is to grant users
|
||||
the right to modify and redistribute the software governed by this
|
||||
license within the framework of an open source distribution model.
|
||||
|
||||
The exercising of this right is conditional upon certain obligations for
|
||||
users so as to preserve this status for all subsequent redistributions.
|
||||
|
||||
In consideration of access to the source code and the rights to copy,
|
||||
modify and redistribute granted by the license, users are provided only
|
||||
with a limited warranty and the software's author, the holder of the
|
||||
economic rights, and the successive licensors only have limited liability.
|
||||
|
||||
In this respect, the risks associated with loading, using, modifying
|
||||
and/or developing or reproducing the software by the user are brought to
|
||||
the user's attention, given its Free Software status, which may make it
|
||||
complicated to use, with the result that its use is reserved for
|
||||
developers and experienced professionals having in-depth computer
|
||||
knowledge. Users are therefore encouraged to load and test the
|
||||
suitability of the software as regards their requirements in conditions
|
||||
enabling the security of their systems and/or data to be ensured and,
|
||||
more generally, to use and operate it in the same conditions of
|
||||
security. This Agreement may be freely reproduced and published,
|
||||
provided it is not altered, and that no provisions are either added or
|
||||
removed herefrom.
|
||||
|
||||
This Agreement may apply to any or all software for which the holder of
|
||||
the economic rights decides to submit the use thereof to its provisions.
|
||||
|
||||
Frequently asked questions can be found on the official website of the
|
||||
CeCILL licenses family (http://www.cecill.info/index.en.html) for any
|
||||
necessary clarification.
|
||||
|
||||
|
||||
Article 1 - DEFINITIONS
|
||||
|
||||
For the purpose of this Agreement, when the following expressions
|
||||
commence with a capital letter, they shall have the following meaning:
|
||||
|
||||
Agreement: means this license agreement, and its possible subsequent
|
||||
versions and annexes.
|
||||
|
||||
Software: means the software in its Object Code and/or Source Code form
|
||||
and, where applicable, its documentation, "as is" when the Licensee
|
||||
accepts the Agreement.
|
||||
|
||||
Initial Software: means the Software in its Source Code and possibly its
|
||||
Object Code form and, where applicable, its documentation, "as is" when
|
||||
it is first distributed under the terms and conditions of the Agreement.
|
||||
|
||||
Modified Software: means the Software modified by at least one
|
||||
Contribution.
|
||||
|
||||
Source Code: means all the Software's instructions and program lines to
|
||||
which access is required so as to modify the Software.
|
||||
|
||||
Object Code: means the binary files originating from the compilation of
|
||||
the Source Code.
|
||||
|
||||
Holder: means the holder(s) of the economic rights over the Initial
|
||||
Software.
|
||||
|
||||
Licensee: means the Software user(s) having accepted the Agreement.
|
||||
|
||||
Contributor: means a Licensee having made at least one Contribution.
|
||||
|
||||
Licensor: means the Holder, or any other individual or legal entity, who
|
||||
distributes the Software under the Agreement.
|
||||
|
||||
Contribution: means any or all modifications, corrections, translations,
|
||||
adaptations and/or new functions integrated into the Software by any or
|
||||
all Contributors, as well as any or all Internal Modules.
|
||||
|
||||
Module: means a set of sources files including their documentation that
|
||||
enables supplementary functions or services in addition to those offered
|
||||
by the Software.
|
||||
|
||||
External Module: means any or all Modules, not derived from the
|
||||
Software, so that this Module and the Software run in separate address
|
||||
spaces, with one calling the other when they are run.
|
||||
|
||||
Internal Module: means any or all Module, connected to the Software so
|
||||
that they both execute in the same address space.
|
||||
|
||||
GNU GPL: means the GNU General Public License version 2 or any
|
||||
subsequent version, as published by the Free Software Foundation Inc.
|
||||
|
||||
GNU Affero GPL: means the GNU Affero General Public License version 3 or
|
||||
any subsequent version, as published by the Free Software Foundation Inc.
|
||||
|
||||
EUPL: means the European Union Public License version 1.1 or any
|
||||
subsequent version, as published by the European Commission.
|
||||
|
||||
Parties: mean both the Licensee and the Licensor.
|
||||
|
||||
These expressions may be used both in singular and plural form.
|
||||
|
||||
|
||||
Article 2 - PURPOSE
|
||||
|
||||
The purpose of the Agreement is the grant by the Licensor to the
|
||||
Licensee of a non-exclusive, transferable and worldwide license for the
|
||||
Software as set forth in Article 5 <#scope> hereinafter for the whole
|
||||
term of the protection granted by the rights over said Software.
|
||||
|
||||
|
||||
Article 3 - ACCEPTANCE
|
||||
|
||||
3.1 The Licensee shall be deemed as having accepted the terms and
|
||||
conditions of this Agreement upon the occurrence of the first of the
|
||||
following events:
|
||||
|
||||
* (i) loading the Software by any or all means, notably, by
|
||||
downloading from a remote server, or by loading from a physical medium;
|
||||
* (ii) the first time the Licensee exercises any of the rights granted
|
||||
hereunder.
|
||||
|
||||
3.2 One copy of the Agreement, containing a notice relating to the
|
||||
characteristics of the Software, to the limited warranty, and to the
|
||||
fact that its use is restricted to experienced users has been provided
|
||||
to the Licensee prior to its acceptance as set forth in Article 3.1
|
||||
<#accepting> hereinabove, and the Licensee hereby acknowledges that it
|
||||
has read and understood it.
|
||||
|
||||
|
||||
Article 4 - EFFECTIVE DATE AND TERM
|
||||
|
||||
|
||||
4.1 EFFECTIVE DATE
|
||||
|
||||
The Agreement shall become effective on the date when it is accepted by
|
||||
the Licensee as set forth in Article 3.1 <#accepting>.
|
||||
|
||||
|
||||
4.2 TERM
|
||||
|
||||
The Agreement shall remain in force for the entire legal term of
|
||||
protection of the economic rights over the Software.
|
||||
|
||||
|
||||
Article 5 - SCOPE OF RIGHTS GRANTED
|
||||
|
||||
The Licensor hereby grants to the Licensee, who accepts, the following
|
||||
rights over the Software for any or all use, and for the term of the
|
||||
Agreement, on the basis of the terms and conditions set forth hereinafter.
|
||||
|
||||
Besides, if the Licensor owns or comes to own one or more patents
|
||||
protecting all or part of the functions of the Software or of its
|
||||
components, the Licensor undertakes not to enforce the rights granted by
|
||||
these patents against successive Licensees using, exploiting or
|
||||
modifying the Software. If these patents are transferred, the Licensor
|
||||
undertakes to have the transferees subscribe to the obligations set
|
||||
forth in this paragraph.
|
||||
|
||||
|
||||
5.1 RIGHT OF USE
|
||||
|
||||
The Licensee is authorized to use the Software, without any limitation
|
||||
as to its fields of application, with it being hereinafter specified
|
||||
that this comprises:
|
||||
|
||||
1. permanent or temporary reproduction of all or part of the Software
|
||||
by any or all means and in any or all form.
|
||||
|
||||
2. loading, displaying, running, or storing the Software on any or all
|
||||
medium.
|
||||
|
||||
3. entitlement to observe, study or test its operation so as to
|
||||
determine the ideas and principles behind any or all constituent
|
||||
elements of said Software. This shall apply when the Licensee
|
||||
carries out any or all loading, displaying, running, transmission or
|
||||
storage operation as regards the Software, that it is entitled to
|
||||
carry out hereunder.
|
||||
|
||||
|
||||
5.2 ENTITLEMENT TO MAKE CONTRIBUTIONS
|
||||
|
||||
The right to make Contributions includes the right to translate, adapt,
|
||||
arrange, or make any or all modifications to the Software, and the right
|
||||
to reproduce the resulting software.
|
||||
|
||||
The Licensee is authorized to make any or all Contributions to the
|
||||
Software provided that it includes an explicit notice that it is the
|
||||
author of said Contribution and indicates the date of the creation thereof.
|
||||
|
||||
|
||||
5.3 RIGHT OF DISTRIBUTION
|
||||
|
||||
In particular, the right of distribution includes the right to publish,
|
||||
transmit and communicate the Software to the general public on any or
|
||||
all medium, and by any or all means, and the right to market, either in
|
||||
consideration of a fee, or free of charge, one or more copies of the
|
||||
Software by any means.
|
||||
|
||||
The Licensee is further authorized to distribute copies of the modified
|
||||
or unmodified Software to third parties according to the terms and
|
||||
conditions set forth hereinafter.
|
||||
|
||||
|
||||
5.3.1 DISTRIBUTION OF SOFTWARE WITHOUT MODIFICATION
|
||||
|
||||
The Licensee is authorized to distribute true copies of the Software in
|
||||
Source Code or Object Code form, provided that said distribution
|
||||
complies with all the provisions of the Agreement and is accompanied by:
|
||||
|
||||
1. a copy of the Agreement,
|
||||
|
||||
2. a notice relating to the limitation of both the Licensor's warranty
|
||||
and liability as set forth in Articles 8 and 9,
|
||||
|
||||
and that, in the event that only the Object Code of the Software is
|
||||
redistributed, the Licensee allows effective access to the full Source
|
||||
Code of the Software for a period of at least three years from the
|
||||
distribution of the Software, it being understood that the additional
|
||||
acquisition cost of the Source Code shall not exceed the cost of the
|
||||
data transfer.
|
||||
|
||||
|
||||
5.3.2 DISTRIBUTION OF MODIFIED SOFTWARE
|
||||
|
||||
When the Licensee makes a Contribution to the Software, the terms and
|
||||
conditions for the distribution of the resulting Modified Software
|
||||
become subject to all the provisions of this Agreement.
|
||||
|
||||
The Licensee is authorized to distribute the Modified Software, in
|
||||
source code or object code form, provided that said distribution
|
||||
complies with all the provisions of the Agreement and is accompanied by:
|
||||
|
||||
1. a copy of the Agreement,
|
||||
|
||||
2. a notice relating to the limitation of both the Licensor's warranty
|
||||
and liability as set forth in Articles 8 and 9,
|
||||
|
||||
and, in the event that only the object code of the Modified Software is
|
||||
redistributed,
|
||||
|
||||
3. a note stating the conditions of effective access to the full source
|
||||
code of the Modified Software for a period of at least three years
|
||||
from the distribution of the Modified Software, it being understood
|
||||
that the additional acquisition cost of the source code shall not
|
||||
exceed the cost of the data transfer.
|
||||
|
||||
|
||||
5.3.3 DISTRIBUTION OF EXTERNAL MODULES
|
||||
|
||||
When the Licensee has developed an External Module, the terms and
|
||||
conditions of this Agreement do not apply to said External Module, that
|
||||
may be distributed under a separate license agreement.
|
||||
|
||||
|
||||
5.3.4 COMPATIBILITY WITH OTHER LICENSES
|
||||
|
||||
The Licensee can include a code that is subject to the provisions of one
|
||||
of the versions of the GNU GPL, GNU Affero GPL and/or EUPL in the
|
||||
Modified or unmodified Software, and distribute that entire code under
|
||||
the terms of the same version of the GNU GPL, GNU Affero GPL and/or EUPL.
|
||||
|
||||
The Licensee can include the Modified or unmodified Software in a code
|
||||
that is subject to the provisions of one of the versions of the GNU GPL,
|
||||
GNU Affero GPL and/or EUPL and distribute that entire code under the
|
||||
terms of the same version of the GNU GPL, GNU Affero GPL and/or EUPL.
|
||||
|
||||
|
||||
Article 6 - INTELLECTUAL PROPERTY
|
||||
|
||||
|
||||
6.1 OVER THE INITIAL SOFTWARE
|
||||
|
||||
The Holder owns the economic rights over the Initial Software. Any or
|
||||
all use of the Initial Software is subject to compliance with the terms
|
||||
and conditions under which the Holder has elected to distribute its work
|
||||
and no one shall be entitled to modify the terms and conditions for the
|
||||
distribution of said Initial Software.
|
||||
|
||||
The Holder undertakes that the Initial Software will remain ruled at
|
||||
least by this Agreement, for the duration set forth in Article 4.2 <#term>.
|
||||
|
||||
|
||||
6.2 OVER THE CONTRIBUTIONS
|
||||
|
||||
The Licensee who develops a Contribution is the owner of the
|
||||
intellectual property rights over this Contribution as defined by
|
||||
applicable law.
|
||||
|
||||
|
||||
6.3 OVER THE EXTERNAL MODULES
|
||||
|
||||
The Licensee who develops an External Module is the owner of the
|
||||
intellectual property rights over this External Module as defined by
|
||||
applicable law and is free to choose the type of agreement that shall
|
||||
govern its distribution.
|
||||
|
||||
|
||||
6.4 JOINT PROVISIONS
|
||||
|
||||
The Licensee expressly undertakes:
|
||||
|
||||
1. not to remove, or modify, in any manner, the intellectual property
|
||||
notices attached to the Software;
|
||||
|
||||
2. to reproduce said notices, in an identical manner, in the copies of
|
||||
the Software modified or not.
|
||||
|
||||
The Licensee undertakes not to directly or indirectly infringe the
|
||||
intellectual property rights on the Software of the Holder and/or
|
||||
Contributors, and to take, where applicable, vis-à-vis its staff, any
|
||||
and all measures required to ensure respect of said intellectual
|
||||
property rights of the Holder and/or Contributors.
|
||||
|
||||
|
||||
Article 7 - RELATED SERVICES
|
||||
|
||||
7.1 Under no circumstances shall the Agreement oblige the Licensor to
|
||||
provide technical assistance or maintenance services for the Software.
|
||||
|
||||
However, the Licensor is entitled to offer this type of services. The
|
||||
terms and conditions of such technical assistance, and/or such
|
||||
maintenance, shall be set forth in a separate instrument. Only the
|
||||
Licensor offering said maintenance and/or technical assistance services
|
||||
shall incur liability therefor.
|
||||
|
||||
7.2 Similarly, any Licensor is entitled to offer to its licensees, under
|
||||
its sole responsibility, a warranty, that shall only be binding upon
|
||||
itself, for the redistribution of the Software and/or the Modified
|
||||
Software, under terms and conditions that it is free to decide. Said
|
||||
warranty, and the financial terms and conditions of its application,
|
||||
shall be subject of a separate instrument executed between the Licensor
|
||||
and the Licensee.
|
||||
|
||||
|
||||
Article 8 - LIABILITY
|
||||
|
||||
8.1 Subject to the provisions of Article 8.2, the Licensee shall be
|
||||
entitled to claim compensation for any direct loss it may have suffered
|
||||
from the Software as a result of a fault on the part of the relevant
|
||||
Licensor, subject to providing evidence thereof.
|
||||
|
||||
8.2 The Licensor's liability is limited to the commitments made under
|
||||
this Agreement and shall not be incurred as a result of in particular:
|
||||
(i) loss due the Licensee's total or partial failure to fulfill its
|
||||
obligations, (ii) direct or consequential loss that is suffered by the
|
||||
Licensee due to the use or performance of the Software, and (iii) more
|
||||
generally, any consequential loss. In particular the Parties expressly
|
||||
agree that any or all pecuniary or business loss (i.e. loss of data,
|
||||
loss of profits, operating loss, loss of customers or orders,
|
||||
opportunity cost, any disturbance to business activities) or any or all
|
||||
legal proceedings instituted against the Licensee by a third party,
|
||||
shall constitute consequential loss and shall not provide entitlement to
|
||||
any or all compensation from the Licensor.
|
||||
|
||||
|
||||
Article 9 - WARRANTY
|
||||
|
||||
9.1 The Licensee acknowledges that the scientific and technical
|
||||
state-of-the-art when the Software was distributed did not enable all
|
||||
possible uses to be tested and verified, nor for the presence of
|
||||
possible defects to be detected. In this respect, the Licensee's
|
||||
attention has been drawn to the risks associated with loading, using,
|
||||
modifying and/or developing and reproducing the Software which are
|
||||
reserved for experienced users.
|
||||
|
||||
The Licensee shall be responsible for verifying, by any or all means,
|
||||
the suitability of the product for its requirements, its good working
|
||||
order, and for ensuring that it shall not cause damage to either persons
|
||||
or properties.
|
||||
|
||||
9.2 The Licensor hereby represents, in good faith, that it is entitled
|
||||
to grant all the rights over the Software (including in particular the
|
||||
rights set forth in Article 5 <#scope>).
|
||||
|
||||
9.3 The Licensee acknowledges that the Software is supplied "as is" by
|
||||
the Licensor without any other express or tacit warranty, other than
|
||||
that provided for in Article 9.2 <#good-faith> and, in particular,
|
||||
without any warranty as to its commercial value, its secured, safe,
|
||||
innovative or relevant nature.
|
||||
|
||||
Specifically, the Licensor does not warrant that the Software is free
|
||||
from any error, that it will operate without interruption, that it will
|
||||
be compatible with the Licensee's own equipment and software
|
||||
configuration, nor that it will meet the Licensee's requirements.
|
||||
|
||||
9.4 The Licensor does not either expressly or tacitly warrant that the
|
||||
Software does not infringe any third party intellectual property right
|
||||
relating to a patent, software or any other property right. Therefore,
|
||||
the Licensor disclaims any and all liability towards the Licensee
|
||||
arising out of any or all proceedings for infringement that may be
|
||||
instituted in respect of the use, modification and redistribution of the
|
||||
Software. Nevertheless, should such proceedings be instituted against
|
||||
the Licensee, the Licensor shall provide it with technical and legal
|
||||
expertise for its defense. Such technical and legal expertise shall be
|
||||
decided on a case-by-case basis between the relevant Licensor and the
|
||||
Licensee pursuant to a memorandum of understanding. The Licensor
|
||||
disclaims any and all liability as regards the Licensee's use of the
|
||||
name of the Software. No warranty is given as regards the existence of
|
||||
prior rights over the name of the Software or as regards the existence
|
||||
of a trademark.
|
||||
|
||||
|
||||
Article 10 - TERMINATION
|
||||
|
||||
10.1 In the event of a breach by the Licensee of its obligations
|
||||
hereunder, the Licensor may automatically terminate this Agreement
|
||||
thirty (30) days after notice has been sent to the Licensee and has
|
||||
remained ineffective.
|
||||
|
||||
10.2 A Licensee whose Agreement is terminated shall no longer be
|
||||
authorized to use, modify or distribute the Software. However, any
|
||||
licenses that it may have granted prior to termination of the Agreement
|
||||
shall remain valid subject to their having been granted in compliance
|
||||
with the terms and conditions hereof.
|
||||
|
||||
|
||||
Article 11 - MISCELLANEOUS
|
||||
|
||||
|
||||
11.1 EXCUSABLE EVENTS
|
||||
|
||||
Neither Party shall be liable for any or all delay, or failure to
|
||||
perform the Agreement, that may be attributable to an event of force
|
||||
majeure, an act of God or an outside cause, such as defective
|
||||
functioning or interruptions of the electricity or telecommunications
|
||||
networks, network paralysis following a virus attack, intervention by
|
||||
government authorities, natural disasters, water damage, earthquakes,
|
||||
fire, explosions, strikes and labor unrest, war, etc.
|
||||
|
||||
11.2 Any failure by either Party, on one or more occasions, to invoke
|
||||
one or more of the provisions hereof, shall under no circumstances be
|
||||
interpreted as being a waiver by the interested Party of its right to
|
||||
invoke said provision(s) subsequently.
|
||||
|
||||
11.3 The Agreement cancels and replaces any or all previous agreements,
|
||||
whether written or oral, between the Parties and having the same
|
||||
purpose, and constitutes the entirety of the agreement between said
|
||||
Parties concerning said purpose. No supplement or modification to the
|
||||
terms and conditions hereof shall be effective as between the Parties
|
||||
unless it is made in writing and signed by their duly authorized
|
||||
representatives.
|
||||
|
||||
11.4 In the event that one or more of the provisions hereof were to
|
||||
conflict with a current or future applicable act or legislative text,
|
||||
said act or legislative text shall prevail, and the Parties shall make
|
||||
the necessary amendments so as to comply with said act or legislative
|
||||
text. All other provisions shall remain effective. Similarly, invalidity
|
||||
of a provision of the Agreement, for any reason whatsoever, shall not
|
||||
cause the Agreement as a whole to be invalid.
|
||||
|
||||
|
||||
11.5 LANGUAGE
|
||||
|
||||
The Agreement is drafted in both French and English and both versions
|
||||
are deemed authentic.
|
||||
|
||||
|
||||
Article 12 - NEW VERSIONS OF THE AGREEMENT
|
||||
|
||||
12.1 Any person is authorized to duplicate and distribute copies of this
|
||||
Agreement.
|
||||
|
||||
12.2 So as to ensure coherence, the wording of this Agreement is
|
||||
protected and may only be modified by the authors of the License, who
|
||||
reserve the right to periodically publish updates or new versions of the
|
||||
Agreement, each with a separate number. These subsequent versions may
|
||||
address new issues encountered by Free Software.
|
||||
|
||||
12.3 Any Software distributed under a given version of the Agreement may
|
||||
only be subsequently distributed under the same version of the Agreement
|
||||
or a subsequent version, subject to the provisions of Article 5.3.4
|
||||
<#compatibility>.
|
||||
|
||||
|
||||
Article 13 - GOVERNING LAW AND JURISDICTION
|
||||
|
||||
13.1 The Agreement is governed by French law. The Parties agree to
|
||||
endeavor to seek an amicable solution to any disagreements or disputes
|
||||
that may arise during the performance of the Agreement.
|
||||
|
||||
13.2 Failing an amicable solution within two (2) months as from their
|
||||
occurrence, and unless emergency proceedings are necessary, the
|
||||
disagreements or disputes shall be referred to the Paris Courts having
|
||||
jurisdiction, by the more diligent Party.
|
@ -7,107 +7,109 @@ Created on 13 fevr. 2014
|
||||
from distutils import log
|
||||
import os
|
||||
|
||||
from Cython.Distutils import build_ext as ori_build_ext # @UnresolvedImport
|
||||
|
||||
from Cython.Compiler import Options as cython_options # @UnresolvedImport
|
||||
|
||||
from distutils.errors import DistutilsSetupError
|
||||
|
||||
class build_ext(ori_build_ext):
|
||||
|
||||
try:
|
||||
from Cython.Distutils import build_ext as ori_build_ext # @UnresolvedImport
|
||||
from Cython.Compiler import Options as cython_options # @UnresolvedImport
|
||||
class build_ext(ori_build_ext):
|
||||
|
||||
def modifyDocScripts(self):
|
||||
build_dir_file=open("doc/sphinx/build_dir.txt","w")
|
||||
print(self.build_lib,file=build_dir_file)
|
||||
build_dir_file.close()
|
||||
|
||||
def initialize_options(self):
|
||||
ori_build_ext.initialize_options(self) # @UndefinedVariable
|
||||
self.littlebigman = None
|
||||
self.built_files = None
|
||||
|
||||
|
||||
def finalize_options(self):
|
||||
ori_build_ext.finalize_options(self) # @UndefinedVariable
|
||||
|
||||
self.set_undefined_options('littlebigman',
|
||||
('littlebigman', 'littlebigman'))
|
||||
|
||||
self.set_undefined_options('build_files',
|
||||
('files', 'built_files'))
|
||||
|
||||
self.cython_c_in_temp = 1
|
||||
|
||||
if self.littlebigman =='-DLITTLE_END':
|
||||
if self.define is None:
|
||||
self.define=[('LITTLE_END',None)]
|
||||
else:
|
||||
self.define.append('LITTLE_END',None)
|
||||
|
||||
def substitute_sources(self,exe_name,sources):
|
||||
"""
|
||||
Substitutes source file name starting by an @ by the actual
|
||||
name of the built file (see --> build_files)
|
||||
"""
|
||||
sources = list(sources)
|
||||
for i in range(len(sources)):
|
||||
message = "%s :-> %s" % (exe_name,sources[i])
|
||||
if sources[i][0]=='@':
|
||||
try:
|
||||
filename = self.built_files[sources[i][1:]]
|
||||
except KeyError:
|
||||
tmpfilename = os.path.join(self.build_temp,sources[i][1:])
|
||||
if os.path.isfile (tmpfilename):
|
||||
filename = tmpfilename
|
||||
else:
|
||||
raise DistutilsSetupError(
|
||||
'The %s filename declared in the source '
|
||||
'files of the program %s have not been '
|
||||
'built by the installation process' % (sources[i],
|
||||
exe_name))
|
||||
sources[i]=filename
|
||||
log.info("%s changed to %s",message,filename)
|
||||
else:
|
||||
log.info("%s ok",message)
|
||||
|
||||
return sources
|
||||
|
||||
def build_extensions(self):
|
||||
# First, sanity-check the 'extensions' list
|
||||
|
||||
for ext in self.extensions:
|
||||
ext.sources = self.substitute_sources(ext.name,ext.sources)
|
||||
def modifyDocScripts(self):
|
||||
build_dir_file=open("doc/sphinx/build_dir.txt","w")
|
||||
print(self.build_lib,file=build_dir_file)
|
||||
build_dir_file.close()
|
||||
|
||||
self.check_extensions_list(self.extensions)
|
||||
|
||||
for ext in self.extensions:
|
||||
log.info("%s :-> %s",ext.name,ext.sources)
|
||||
ext.sources = self.cython_sources(ext.sources, ext)
|
||||
self.build_extension(ext)
|
||||
|
||||
|
||||
def run(self):
|
||||
self.modifyDocScripts()
|
||||
|
||||
for cmd_name in self.get_sub_commands():
|
||||
self.run_command(cmd_name)
|
||||
|
||||
cython_options.annotate = True
|
||||
ori_build_ext.run(self) # @UndefinedVariable
|
||||
|
||||
|
||||
def has_files(self):
|
||||
return self.distribution.has_files()
|
||||
|
||||
def has_executables(self):
|
||||
return self.distribution.has_executables()
|
||||
def initialize_options(self):
|
||||
ori_build_ext.initialize_options(self) # @UndefinedVariable
|
||||
self.littlebigman = None
|
||||
self.built_files = None
|
||||
|
||||
sub_commands = [('build_files',has_files),
|
||||
('build_cexe', has_executables)
|
||||
] + \
|
||||
ori_build_ext.sub_commands
|
||||
|
||||
|
||||
def finalize_options(self):
|
||||
ori_build_ext.finalize_options(self) # @UndefinedVariable
|
||||
|
||||
self.set_undefined_options('littlebigman',
|
||||
('littlebigman', 'littlebigman'))
|
||||
|
||||
self.set_undefined_options('build_files',
|
||||
('files', 'built_files'))
|
||||
|
||||
self.cython_c_in_temp = 1
|
||||
|
||||
if self.littlebigman =='-DLITTLE_END':
|
||||
if self.define is None:
|
||||
self.define=[('LITTLE_END',None)]
|
||||
else:
|
||||
self.define.append('LITTLE_END',None)
|
||||
|
||||
def substitute_sources(self,exe_name,sources):
|
||||
"""
|
||||
Substitutes source file name starting by an @ by the actual
|
||||
name of the built file (see --> build_files)
|
||||
"""
|
||||
sources = list(sources)
|
||||
for i in range(len(sources)):
|
||||
message = "%s :-> %s" % (exe_name,sources[i])
|
||||
if sources[i][0]=='@':
|
||||
try:
|
||||
filename = self.built_files[sources[i][1:]]
|
||||
except KeyError:
|
||||
tmpfilename = os.path.join(self.build_temp,sources[i][1:])
|
||||
if os.path.isfile (tmpfilename):
|
||||
filename = tmpfilename
|
||||
else:
|
||||
raise DistutilsSetupError(
|
||||
'The %s filename declared in the source '
|
||||
'files of the program %s have not been '
|
||||
'built by the installation process' % (sources[i],
|
||||
exe_name))
|
||||
sources[i]=filename
|
||||
log.info("%s changed to %s",message,filename)
|
||||
else:
|
||||
log.info("%s ok",message)
|
||||
|
||||
return sources
|
||||
|
||||
def build_extensions(self):
|
||||
# First, sanity-check the 'extensions' list
|
||||
|
||||
for ext in self.extensions:
|
||||
ext.sources = self.substitute_sources(ext.name,ext.sources)
|
||||
|
||||
self.check_extensions_list(self.extensions)
|
||||
|
||||
for ext in self.extensions:
|
||||
log.info("%s :-> %s",ext.name,ext.sources)
|
||||
ext.sources = self.cython_sources(ext.sources, ext)
|
||||
self.build_extension(ext)
|
||||
|
||||
|
||||
def run(self):
|
||||
self.modifyDocScripts()
|
||||
|
||||
for cmd_name in self.get_sub_commands():
|
||||
self.run_command(cmd_name)
|
||||
|
||||
cython_options.annotate = True
|
||||
ori_build_ext.run(self) # @UndefinedVariable
|
||||
|
||||
|
||||
def has_files(self):
|
||||
return self.distribution.has_files()
|
||||
|
||||
def has_executables(self):
|
||||
return self.distribution.has_executables()
|
||||
|
||||
sub_commands = [('build_files',has_files),
|
||||
('build_cexe', has_executables)
|
||||
] + \
|
||||
ori_build_ext.sub_commands
|
||||
|
||||
except ImportError:
|
||||
from distutils.command import build_ext # @UnusedImport
|
||||
|
||||
|
||||
|
||||
|
36
distutils.ext/obidistutils/serenity/bootstrappip.py
Normal file
36
distutils.ext/obidistutils/serenity/bootstrappip.py
Normal file
@ -0,0 +1,36 @@
|
||||
'''
|
||||
Created on 22 janv. 2016
|
||||
|
||||
@author: coissac
|
||||
'''
|
||||
|
||||
import sys
|
||||
from urllib import request
|
||||
import os.path
|
||||
|
||||
from obidistutils.serenity.util import get_serenity_dir
|
||||
from obidistutils.serenity.rerun import rerun_with_anothe_python
|
||||
from obidistutils.serenity.checkpython import is_a_virtualenv_python
|
||||
|
||||
getpipurl="https://bootstrap.pypa.io/get-pip.py"
|
||||
|
||||
def bootstrap():
|
||||
|
||||
getpipfile=os.path.join(get_serenity_dir(),"get-pip.py")
|
||||
|
||||
with request.urlopen(getpipurl) as getpip:
|
||||
with open(getpipfile,"wb") as out:
|
||||
for l in getpip:
|
||||
out.write(l)
|
||||
|
||||
python = sys.executable
|
||||
|
||||
if is_a_virtualenv_python():
|
||||
command= "%s %s" % (python,getpipfile)
|
||||
else:
|
||||
command= "%s %s --user" % (python,getpipfile)
|
||||
|
||||
os.system(command)
|
||||
|
||||
rerun_with_anothe_python(python)
|
||||
|
@ -7,8 +7,13 @@ Created on 2 oct. 2014
|
||||
import re
|
||||
import os
|
||||
|
||||
import pip # @UnresolvedImport
|
||||
from pip.utils import get_installed_distributions # @UnresolvedImport
|
||||
try:
|
||||
import pip # @UnresolvedImport
|
||||
from pip.utils import get_installed_distributions # @UnresolvedImport
|
||||
except ImportError:
|
||||
from .bootstrappip import bootstrap
|
||||
bootstrap()
|
||||
|
||||
from distutils.version import StrictVersion # @UnusedImport
|
||||
from distutils.errors import DistutilsError
|
||||
from distutils import log
|
||||
|
@ -59,7 +59,7 @@ def serenity_virtualenv(envname,package,version,minversion='3.4',maxversion=None
|
||||
clear=True,
|
||||
symlinks=False,
|
||||
with_pip=True)
|
||||
|
||||
|
||||
# check the newly created virtualenv
|
||||
return serenity_virtualenv(envname,package,version)
|
||||
|
||||
|
50
python/obi.py
Normal file
50
python/obi.py
Normal file
@ -0,0 +1,50 @@
|
||||
#!/usr/local/bin/python3.4
|
||||
'''
|
||||
obi -- shortdesc
|
||||
|
||||
obi is a description
|
||||
|
||||
It defines classes_and_methods
|
||||
|
||||
@author: user_name
|
||||
|
||||
@copyright: 2014 organization_name. All rights reserved.
|
||||
|
||||
@license: license
|
||||
|
||||
@contact: user_email
|
||||
@deffield updated: Updated
|
||||
'''
|
||||
|
||||
|
||||
default_config = { 'software' : "The OBITools",
|
||||
'log' : False,
|
||||
'loglevel' : 'INFO',
|
||||
'progress' : True,
|
||||
'defaultdms' : None
|
||||
}
|
||||
|
||||
root_config_name='obi'
|
||||
|
||||
from obitools3.apps.config import getConfiguration # @UnresolvedImport
|
||||
from obitools3.version import version
|
||||
|
||||
__all__ = []
|
||||
__version__ = version
|
||||
__date__ = '2014-09-28'
|
||||
__updated__ = '2014-09-28'
|
||||
|
||||
DEBUG = 1
|
||||
TESTRUN = 0
|
||||
PROFILE = 0
|
||||
|
||||
|
||||
if __name__ =="__main__":
|
||||
|
||||
config = getConfiguration(root_config_name,
|
||||
default_config)
|
||||
|
||||
config[root_config_name]['module'].run(config)
|
||||
|
||||
|
||||
|
Binary file not shown.
0
python/obitools3/apps/__init__.py
Normal file
0
python/obitools3/apps/__init__.py
Normal file
3
python/obitools3/apps/arguments.pxd
Normal file
3
python/obitools3/apps/arguments.pxd
Normal file
@ -0,0 +1,3 @@
|
||||
#cython: language_level=3
|
||||
|
||||
cpdef buildArgumentParser(str configname, str softname)
|
61
python/obitools3/apps/arguments.pyx
Normal file
61
python/obitools3/apps/arguments.pyx
Normal file
@ -0,0 +1,61 @@
|
||||
#cython: language_level=3
|
||||
|
||||
'''
|
||||
Created on 27 mars 2016
|
||||
|
||||
@author: coissac
|
||||
'''
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
|
||||
from .command import getCommandsList
|
||||
|
||||
class ObiParser(argparse.ArgumentParser):
|
||||
def error(self, message):
|
||||
sys.stderr.write('error: %s\n' % message)
|
||||
self.print_help()
|
||||
sys.exit(2)
|
||||
|
||||
cpdef buildArgumentParser(str configname,
|
||||
str softname):
|
||||
parser = ObiParser()
|
||||
|
||||
parser.add_argument('--version', dest='%s:version' % configname,
|
||||
action='store_true',
|
||||
default=False,
|
||||
help='Print the version of %s' % softname)
|
||||
|
||||
parser.add_argument('--log', dest='%s:log' % configname,
|
||||
action='store',
|
||||
type=str,
|
||||
default=None,
|
||||
help='Create a logfile')
|
||||
|
||||
parser.add_argument('--no-progress', dest='%s:progress' % configname,
|
||||
action='store_false',
|
||||
default=None,
|
||||
help='Do not print the progress bar during analyzes')
|
||||
|
||||
subparsers = parser.add_subparsers(title='subcommands',
|
||||
description='valid subcommands',
|
||||
help='additional help')
|
||||
|
||||
commands = getCommandsList()
|
||||
|
||||
for c in commands:
|
||||
module = commands[c]
|
||||
|
||||
if hasattr(module, "run"):
|
||||
if hasattr(module, "__title__"):
|
||||
sub = subparsers.add_parser(c,help=module.__title__)
|
||||
else:
|
||||
sub = subparsers.add_parser(c)
|
||||
|
||||
if hasattr(module, "addOptions"):
|
||||
module.addOptions(sub)
|
||||
|
||||
sub.set_defaults(**{'%s:module' % configname : module})
|
||||
|
||||
return parser
|
3
python/obitools3/apps/command.pxd
Normal file
3
python/obitools3/apps/command.pxd
Normal file
@ -0,0 +1,3 @@
|
||||
#cython: language_level=3
|
||||
|
||||
cdef object loadCommand(str name,loader)
|
44
python/obitools3/apps/command.pyx
Normal file
44
python/obitools3/apps/command.pyx
Normal file
@ -0,0 +1,44 @@
|
||||
#cython: language_level=3
|
||||
|
||||
'''
|
||||
Created on 27 mars 2016
|
||||
|
||||
@author: coissac
|
||||
'''
|
||||
|
||||
import pkgutil
|
||||
|
||||
from obitools3 import commands
|
||||
|
||||
cdef object loadCommand(str name,loader):
|
||||
'''
|
||||
Load a command module from its name and an ImpLoader
|
||||
|
||||
This function is for internal use
|
||||
|
||||
@param name: name of the module
|
||||
@type name: str
|
||||
@param loader: the module loader
|
||||
@type loader: ImpLoader
|
||||
|
||||
@return the loaded module
|
||||
@rtype: module
|
||||
'''
|
||||
|
||||
module = loader.find_module(name).load_module(name)
|
||||
return module
|
||||
|
||||
def getCommandsList():
|
||||
'''
|
||||
Returns the list of sub-commands available to the main `obi` command
|
||||
|
||||
@return: a dict instance with key corresponding to each command and
|
||||
value corresponding to the module
|
||||
|
||||
@rtype: dict
|
||||
'''
|
||||
|
||||
cdef dict cmds = dict((x[1],loadCommand(x[1],x[0]))
|
||||
for x in pkgutil.iter_modules(commands.__path__)
|
||||
if not x[2])
|
||||
return cmds
|
10
python/obitools3/apps/config.pxd
Normal file
10
python/obitools3/apps/config.pxd
Normal file
@ -0,0 +1,10 @@
|
||||
#cython: language_level=3
|
||||
|
||||
cpdef str setRootConfigName(str rootname)
|
||||
cpdef str getRootConfigName()
|
||||
|
||||
cdef dict buildDefaultConfiguration(str root_config_name,
|
||||
dict config)
|
||||
|
||||
cpdef dict getConfiguration(str root_config_name=?,
|
||||
dict config=?)
|
103
python/obitools3/apps/config.pyx
Normal file
103
python/obitools3/apps/config.pyx
Normal file
@ -0,0 +1,103 @@
|
||||
#cython: language_level=3
|
||||
|
||||
'''
|
||||
Created on 27 mars 2016
|
||||
|
||||
@author: coissac
|
||||
'''
|
||||
|
||||
import sys
|
||||
|
||||
from .command import getCommandsList
|
||||
from .logging cimport getLogger
|
||||
from .arguments cimport buildArgumentParser
|
||||
|
||||
from ..version import version
|
||||
from _curses import version
|
||||
|
||||
cdef dict __default_config__ = {}
|
||||
|
||||
|
||||
cpdef str setRootConfigName(str rootname):
|
||||
global __default_config__
|
||||
if '__root_config__' in __default_config__:
|
||||
if __default_config__["__root_config__"] in __default_config__:
|
||||
__default_config__[rootname]=__default_config__[__default_config__["__root_config__"]]
|
||||
del __default_config__[__default_config__["__root_config__"]]
|
||||
__default_config__['__root_config__']=rootname
|
||||
return rootname
|
||||
|
||||
cpdef str getRootConfigName():
|
||||
global __default_config__
|
||||
return __default_config__.get('__root_config__',None)
|
||||
|
||||
cdef dict buildDefaultConfiguration(str root_config_name,
|
||||
dict config):
|
||||
global __default_config__
|
||||
|
||||
__default_config__.clear()
|
||||
setRootConfigName(root_config_name)
|
||||
|
||||
__default_config__[root_config_name]=config
|
||||
|
||||
config['version']=version
|
||||
|
||||
commands = getCommandsList()
|
||||
|
||||
for c in commands:
|
||||
module = commands[c]
|
||||
|
||||
assert hasattr(module, "run")
|
||||
|
||||
if hasattr(module, 'default_config'):
|
||||
__default_config__[c]=module.default_config
|
||||
else:
|
||||
__default_config__[c]={}
|
||||
|
||||
return __default_config__
|
||||
|
||||
|
||||
cpdef dict getConfiguration(str root_config_name="__default__",
|
||||
dict config={}):
|
||||
global __default_config__
|
||||
|
||||
if '__done__' in __default_config__:
|
||||
return __default_config__
|
||||
|
||||
if root_config_name=="__default__":
|
||||
raise RuntimeError("No root_config_name specified")
|
||||
|
||||
if not config:
|
||||
raise RuntimeError("Base configuration is empty")
|
||||
|
||||
|
||||
|
||||
config = buildDefaultConfiguration(root_config_name,
|
||||
config)
|
||||
|
||||
parser = buildArgumentParser(root_config_name,
|
||||
config[root_config_name]['software'])
|
||||
|
||||
options = vars(parser.parse_args())
|
||||
|
||||
if options['%s:version' % root_config_name]:
|
||||
print("%s - Version %s" % (config[root_config_name]['software'],
|
||||
config[root_config_name]['version']))
|
||||
sys.exit(0)
|
||||
|
||||
for k in options:
|
||||
section,key = k.split(':')
|
||||
s = config[section]
|
||||
if options[k] is not None:
|
||||
s[key]=options[k]
|
||||
|
||||
if not 'module' in config[root_config_name]:
|
||||
print('\nError: No command specified',file=sys.stderr)
|
||||
parser.print_help()
|
||||
sys.exit(2)
|
||||
|
||||
getLogger(config)
|
||||
|
||||
config['__done__']=True
|
||||
|
||||
return config
|
3
python/obitools3/apps/logging.pxd
Normal file
3
python/obitools3/apps/logging.pxd
Normal file
@ -0,0 +1,3 @@
|
||||
#cython: language_level=3
|
||||
|
||||
cpdef getLogger(dict config)
|
46
python/obitools3/apps/logging.pyx
Normal file
46
python/obitools3/apps/logging.pyx
Normal file
@ -0,0 +1,46 @@
|
||||
#cython: language_level=3
|
||||
|
||||
'''
|
||||
Created on 27 mars 2016
|
||||
|
||||
@author: coissac
|
||||
'''
|
||||
|
||||
import logging
|
||||
import sys
|
||||
|
||||
cpdef getLogger(dict config):
|
||||
'''
|
||||
Returns the logger as defined by the command line option
|
||||
or by the config file
|
||||
:param config:
|
||||
'''
|
||||
|
||||
root = config["__root_config__"]
|
||||
|
||||
level = config[root]['loglevel']
|
||||
logfile= config[root]['log']
|
||||
|
||||
rootlogger = logging.getLogger()
|
||||
logFormatter = logging.Formatter("%(asctime)s [%(levelname)-5.5s] %(message)s")
|
||||
|
||||
stderrHandler = logging.StreamHandler(sys.stderr)
|
||||
stderrHandler.setFormatter(logFormatter)
|
||||
|
||||
rootlogger.addHandler(stderrHandler)
|
||||
|
||||
if logfile:
|
||||
fileHandler = logging.FileHandler(logfile)
|
||||
fileHandler.setFormatter(logFormatter)
|
||||
rootlogger.addHandler(fileHandler)
|
||||
|
||||
try:
|
||||
loglevel = getattr(logging, level)
|
||||
except:
|
||||
loglevel = logging.INFO
|
||||
|
||||
rootlogger.setLevel(loglevel)
|
||||
|
||||
config[root]['logger']=rootlogger
|
||||
|
||||
return rootlogger
|
65
python/obitools3/apps/progress.pxd
Normal file
65
python/obitools3/apps/progress.pxd
Normal file
@ -0,0 +1,65 @@
|
||||
#cython: language_level=3
|
||||
|
||||
from ..utils cimport str2bytes
|
||||
|
||||
cdef extern from "stdio.h":
|
||||
struct FILE
|
||||
int fprintf(FILE *stream, char *format, ...)
|
||||
FILE* stderr
|
||||
ctypedef unsigned int off_t "unsigned long long"
|
||||
|
||||
cdef extern from "unistd.h":
|
||||
int fsync(int fd);
|
||||
|
||||
cdef extern from "time.h":
|
||||
struct tm :
|
||||
int tm_yday
|
||||
int tm_hour
|
||||
int tm_min
|
||||
int tm_sec
|
||||
|
||||
enum: CLOCKS_PER_SEC
|
||||
|
||||
ctypedef int time_t
|
||||
ctypedef int clock_t
|
||||
ctypedef int suseconds_t
|
||||
|
||||
struct timeval:
|
||||
time_t tv_sec # seconds */
|
||||
suseconds_t tv_usec # microseconds */
|
||||
|
||||
|
||||
struct timezone :
|
||||
int tz_minuteswest; # minutes west of Greenwich
|
||||
int tz_dsttime; # type of DST correction
|
||||
|
||||
|
||||
int gettimeofday(timeval *tv, timezone *tz)
|
||||
|
||||
|
||||
tm *gmtime_r(time_t *clock, tm *result)
|
||||
time_t time(time_t *tloc)
|
||||
clock_t clock()
|
||||
|
||||
cdef class ProgressBar:
|
||||
cdef off_t maxi
|
||||
cdef clock_t starttime
|
||||
cdef clock_t lasttime
|
||||
cdef clock_t tickcount
|
||||
cdef int freq
|
||||
cdef int cycle
|
||||
cdef int arrow
|
||||
cdef int lastlog
|
||||
cdef bint ontty
|
||||
cdef int fd
|
||||
|
||||
cdef bytes head
|
||||
cdef char *chead
|
||||
|
||||
cdef object logger
|
||||
|
||||
cdef char *wheel
|
||||
cdef char *spaces
|
||||
cdef char* diese
|
||||
|
||||
cdef clock_t clock(self)
|
138
python/obitools3/apps/progress.pyx
Normal file
138
python/obitools3/apps/progress.pyx
Normal file
@ -0,0 +1,138 @@
|
||||
#cython: language_level=3
|
||||
|
||||
'''
|
||||
Created on 27 mars 2016
|
||||
|
||||
@author: coissac
|
||||
'''
|
||||
|
||||
import sys
|
||||
from ..utils cimport bytes2str
|
||||
|
||||
cdef class ProgressBar:
|
||||
cdef clock_t clock(self):
|
||||
cdef clock_t t
|
||||
cdef timeval tp
|
||||
cdef clock_t s
|
||||
|
||||
<void> gettimeofday(&tp,NULL)
|
||||
s = <clock_t> (<double> tp.tv_usec * 1.e-6 * <double> CLOCKS_PER_SEC)
|
||||
t = tp.tv_sec * CLOCKS_PER_SEC + s
|
||||
|
||||
return t
|
||||
|
||||
def __init__(self,
|
||||
off_t maxi,
|
||||
dict config,
|
||||
str head="",
|
||||
double seconde=0.1):
|
||||
self.starttime = self.clock()
|
||||
self.lasttime = self.starttime
|
||||
self.tickcount = <clock_t> (seconde * CLOCKS_PER_SEC)
|
||||
self.freq = 1
|
||||
self.cycle = 0
|
||||
self.arrow = 0
|
||||
self.lastlog = 0
|
||||
|
||||
self.ontty = sys.stderr.isatty()
|
||||
|
||||
if (maxi<=0):
|
||||
maxi=1
|
||||
|
||||
self.maxi = maxi
|
||||
self.head = str2bytes(head)
|
||||
self.chead= self.head
|
||||
|
||||
|
||||
self.logger=config[config["__root_config__"]]["logger"]
|
||||
self.wheel = '|/-\\'
|
||||
self.spaces=' ' \
|
||||
' ' \
|
||||
' ' \
|
||||
' ' \
|
||||
' '
|
||||
self.diese ='##########' \
|
||||
'##########' \
|
||||
'##########' \
|
||||
'##########' \
|
||||
'##########'
|
||||
|
||||
def __call__(self,object pos):
|
||||
cdef off_t ipos
|
||||
cdef clock_t elapsed
|
||||
cdef clock_t newtime
|
||||
cdef clock_t delta
|
||||
cdef clock_t more
|
||||
cdef double percent
|
||||
cdef tm remain
|
||||
cdef int days,hour,minu,sec
|
||||
cdef off_t fraction
|
||||
cdef int twentyth
|
||||
|
||||
self.cycle+=1
|
||||
|
||||
if self.cycle % self.freq == 0:
|
||||
self.cycle=1
|
||||
newtime = self.clock()
|
||||
delta = newtime - self.lasttime
|
||||
self.lasttime = newtime
|
||||
elapsed = newtime - self.starttime
|
||||
# print(" ",delta,elapsed,elapsed/CLOCKS_PER_SEC,self.tickcount)
|
||||
|
||||
if delta < self.tickcount / 5 :
|
||||
self.freq*=2
|
||||
elif delta > self.tickcount * 5 and self.freq>1:
|
||||
self.freq/=2
|
||||
|
||||
|
||||
if callable(pos):
|
||||
ipos=pos()
|
||||
else:
|
||||
ipos=pos
|
||||
|
||||
if ipos==0:
|
||||
ipos=1
|
||||
|
||||
percent = <double>ipos/<double>self.maxi
|
||||
more = <time_t>((<double>elapsed / percent * (1. - percent))/CLOCKS_PER_SEC)
|
||||
<void>gmtime_r(&more, &remain)
|
||||
days = remain.tm_yday
|
||||
hour = remain.tm_hour
|
||||
minu = remain.tm_min
|
||||
sec = remain.tm_sec
|
||||
|
||||
if self.ontty:
|
||||
fraction=<int>(percent * 50.)
|
||||
self.arrow=(self.arrow+1) % 4
|
||||
self.diese[fraction]=0
|
||||
self.spaces[50 - fraction]=0
|
||||
|
||||
if days:
|
||||
<void>fprintf(stderr,b'\r%s %5.1f %% |%s%c%s] remain : %d days %02d:%02d:%02d',
|
||||
self.chead,
|
||||
percent*100,
|
||||
self.diese,self.wheel[self.arrow],self.spaces,
|
||||
days,hour,minu,sec)
|
||||
else:
|
||||
<void>fprintf(stderr,b'\r%s %5.1f %% |%s%c%s] remain : %02d:%02d:%02d',
|
||||
self.chead,
|
||||
percent*100.,
|
||||
self.diese,self.wheel[self.arrow],self.spaces,
|
||||
hour,minu,sec)
|
||||
self.diese[fraction]=b'#'
|
||||
self.spaces[50 - fraction]=b' '
|
||||
|
||||
twentyth = int(percent * 20)
|
||||
if twentyth != self.lastlog:
|
||||
|
||||
if self.ontty:
|
||||
<void>fprintf(stderr,b'\n')
|
||||
|
||||
self.logger.info('%s %5.1f %% remain : %02d:%02d:%02d' % (
|
||||
bytes2str(self.head),
|
||||
percent*100.,
|
||||
hour,minu,sec))
|
||||
self.lastlog=twentyth
|
||||
else:
|
||||
self.cycle+=1
|
||||
|
0
python/obitools3/commands/__init__.py
Normal file
0
python/obitools3/commands/__init__.py
Normal file
44
python/obitools3/commands/count.py
Normal file
44
python/obitools3/commands/count.py
Normal file
@ -0,0 +1,44 @@
|
||||
'''
|
||||
Created on 8 mars 2016
|
||||
|
||||
@author: coissac
|
||||
'''
|
||||
|
||||
from obitools3.apps.progress import ProgressBar # @UnresolvedImport
|
||||
import time
|
||||
|
||||
__title__="Counts sequences in a sequence set"
|
||||
|
||||
|
||||
default_config = { 'countmode' : None
|
||||
}
|
||||
|
||||
def addOptions(parser):
|
||||
parser.add_argument(dest='obi:input', metavar='obi:input',
|
||||
nargs='?',
|
||||
default=None,
|
||||
help='input data set' )
|
||||
|
||||
group=parser.add_argument_group('Obicount specific options')
|
||||
group.add_argument('-s','--sequence',
|
||||
action="store_true", dest="count:sequence",
|
||||
default=False,
|
||||
help="Prints only the number of sequence records."
|
||||
)
|
||||
|
||||
group.add_argument('-a','--all',
|
||||
action="store_true", dest="count:all",
|
||||
default=False,
|
||||
help="Prints only the total count of sequence records (if a sequence has no `count` attribute, its default count is 1) (default: False)."
|
||||
)
|
||||
|
||||
|
||||
|
||||
def run(config):
|
||||
# The code of my command
|
||||
pb = ProgressBar(1000,config,seconde=1)
|
||||
|
||||
for i in range(1,1001):
|
||||
pb(i)
|
||||
time.sleep(0.01)
|
||||
|
136
python/obitools3/commands/import.pyx
Normal file
136
python/obitools3/commands/import.pyx
Normal file
@ -0,0 +1,136 @@
|
||||
from obitools3.apps.progress cimport ProgressBar # @UnresolvedImport
|
||||
from obitools3.files.universalopener cimport uopen
|
||||
from obitools3.parsers.fasta import fastaIterator
|
||||
from obitools3.parsers.fastq import fastqIterator
|
||||
from obitools3.obidms._obidms import OBIDMS
|
||||
|
||||
|
||||
import time
|
||||
|
||||
__title__="Counts sequences in a sequence set"
|
||||
|
||||
|
||||
default_config = { 'destview' : None,
|
||||
'skip' : 0,
|
||||
'only' : None,
|
||||
'skiperror' : False,
|
||||
'seqinformat' : None,
|
||||
'moltype' : 'nuc',
|
||||
'filename' : None
|
||||
}
|
||||
|
||||
def addOptions(parser):
|
||||
parser.add_argument(dest='import:filename',
|
||||
metavar='<FILENAME>',
|
||||
nargs='?',
|
||||
default=None,
|
||||
help='sequence file name to be imported' )
|
||||
|
||||
group=parser.add_argument_group('obi import specific options')
|
||||
|
||||
group.add_argument('--default-dms','-d',
|
||||
action="store", dest="obi:defaultdms",
|
||||
metavar='<DMS NAME>',
|
||||
default=None,
|
||||
type=str,
|
||||
help="Name of the default DMS for reading and writing data")
|
||||
|
||||
|
||||
group.add_argument('--destination-view','-v',
|
||||
action="store", dest="import:destview",
|
||||
metavar='<VIEW NAME>',
|
||||
default=None,
|
||||
type=str,
|
||||
required=True,
|
||||
help="Name of the default DMS for reading and writing data")
|
||||
|
||||
group=parser.add_argument_group('obi import specific options')
|
||||
|
||||
group.add_argument('--skip',
|
||||
action="store", dest="import:skip",
|
||||
metavar='<N>',
|
||||
default=None,
|
||||
type=int,
|
||||
help="skip the N first sequences")
|
||||
|
||||
group.add_argument('--only',
|
||||
action="store", dest="import:only",
|
||||
metavar='<N>',
|
||||
default=None,
|
||||
type=int,
|
||||
help="treat only N sequences")
|
||||
|
||||
group.add_argument('--skip-on-error',
|
||||
action="store_true", dest="import:skiperror",
|
||||
default=None,
|
||||
help="Skip sequence entries with parse error")
|
||||
|
||||
group.add_argument('--fasta',
|
||||
action="store_const", dest="import:seqinformat",
|
||||
default=None,
|
||||
const='fasta',
|
||||
help="Input file is in fasta nucleic format (including obitools fasta extentions)")
|
||||
|
||||
group.add_argument('--fastq',
|
||||
action="store_const", dest="import:seqinformat",
|
||||
default=None,
|
||||
const='fastq',
|
||||
help="Input file is in sanger fastq nucleic format (standard fastq)")
|
||||
|
||||
group.add_argument('--nuc',
|
||||
action="store_const", dest="import:moltype",
|
||||
default=None,
|
||||
const='nuc',
|
||||
help="Input file contains nucleic sequences")
|
||||
|
||||
group.add_argument('--prot',
|
||||
action="store_const", dest="import:moltype",
|
||||
default=None,
|
||||
const='pep',
|
||||
help="Input file contains protein sequences")
|
||||
|
||||
|
||||
|
||||
def run(config):
|
||||
pb = ProgressBar(35000000,config,seconde=5)
|
||||
|
||||
inputs = uopen(config['import']['filename'])
|
||||
|
||||
if config['import']['seqinformat']=='fasta':
|
||||
iseq = fastaIterator(inputs)
|
||||
view_type="NUC_SEQS_VIEW"
|
||||
elif config['import']['seqinformat']=='fastq':
|
||||
iseq = fastqIterator(inputs)
|
||||
view_type="NUC_SEQS_VIEW"
|
||||
else:
|
||||
raise RuntimeError('No file format specified')
|
||||
|
||||
# Temporary way to handle NA values
|
||||
#NA_list = ["nan"]
|
||||
|
||||
# Create DMS
|
||||
d = OBIDMS(config['obi']['defaultdms'])
|
||||
|
||||
# Create view
|
||||
view = d.new_view(config['import']['destview'], view_type=view_type)
|
||||
|
||||
i = 0
|
||||
for seq in iseq:
|
||||
pb(i)
|
||||
view[i].set_id(seq['id'])
|
||||
view[i].set_definition(seq['definition'])
|
||||
view[i].set_sequence(seq['sequence'])
|
||||
for tag in seq['tags'] :
|
||||
#print(tag, seq['tags'][tag])
|
||||
#if seq['tags'][tag] not in NA_list :
|
||||
view[i][tag] = seq['tags'][tag]
|
||||
i+=1
|
||||
|
||||
#print(view)
|
||||
print(view.__repr__())
|
||||
|
||||
view.save_and_close()
|
||||
d.close()
|
||||
|
||||
print("Done.")
|
||||
|
0
python/obitools3/files/__init__.py
Normal file
0
python/obitools3/files/__init__.py
Normal file
6
python/obitools3/files/linebuffer.pxd
Normal file
6
python/obitools3/files/linebuffer.pxd
Normal file
@ -0,0 +1,6 @@
|
||||
#cython: language_level=3
|
||||
|
||||
|
||||
cdef class LineBuffer:
|
||||
cdef object fileobj
|
||||
cdef int size
|
23
python/obitools3/files/linebuffer.pyx
Normal file
23
python/obitools3/files/linebuffer.pyx
Normal file
@ -0,0 +1,23 @@
|
||||
#cython: language_level=3
|
||||
|
||||
'''
|
||||
Created on 30 mars 2016
|
||||
|
||||
@author: coissac
|
||||
'''
|
||||
|
||||
cdef class LineBuffer:
|
||||
|
||||
def __init__(self,object fileobj,int size=100000000):
|
||||
self.fileobj=fileobj
|
||||
self.size=size
|
||||
|
||||
def __iter__(self):
|
||||
cdef list buff = self.fileobj.readlines(self.size)
|
||||
cdef str l
|
||||
|
||||
while buff:
|
||||
for l in buff:
|
||||
yield l
|
||||
buff = self.fileobj.readlines(self.size)
|
||||
|
17
python/obitools3/files/uncompress.pxd
Normal file
17
python/obitools3/files/uncompress.pxd
Normal file
@ -0,0 +1,17 @@
|
||||
#cython: language_level=3
|
||||
|
||||
cdef class MagicKeyFile:
|
||||
cdef object stream
|
||||
cdef str stream_mode
|
||||
cdef object binary
|
||||
cdef bytes key
|
||||
cdef int keylength
|
||||
cdef int pos
|
||||
|
||||
cpdef bytes read(self,int size=?)
|
||||
cpdef int tell(self)
|
||||
|
||||
|
||||
cdef class CompressedFile:
|
||||
cdef object accessor
|
||||
|
114
python/obitools3/files/uncompress.pyx
Normal file
114
python/obitools3/files/uncompress.pyx
Normal file
@ -0,0 +1,114 @@
|
||||
#cython: language_level=3
|
||||
|
||||
'''
|
||||
Created on 28 mars 2016
|
||||
|
||||
@author: coissac
|
||||
'''
|
||||
|
||||
import zipfile
|
||||
import bz2
|
||||
import gzip
|
||||
|
||||
import io
|
||||
|
||||
cdef class MagicKeyFile:
|
||||
def __init__(self,stream,length=2):
|
||||
|
||||
binary=stream
|
||||
self.stream = stream
|
||||
self.stream_mode = None
|
||||
if hasattr(stream, "mode"):
|
||||
self.stream_mode = stream.mode
|
||||
if (not 'b' in stream.mode and
|
||||
hasattr(stream, "buffer") and
|
||||
'b' in stream.buffer.mode):
|
||||
binary=stream.buffer
|
||||
|
||||
if (self.stream_mode is None and
|
||||
not (hasattr(stream, 'headers') and
|
||||
hasattr(stream.headers, "keys") and
|
||||
'Content-type' in stream.headers)):
|
||||
raise TypeError("stream does not present the good interface")
|
||||
|
||||
self.binary=binary
|
||||
self.key=binary.read(length)
|
||||
self.keylength=length
|
||||
self.pos=0
|
||||
|
||||
cpdef bytes read(self,int size=-1):
|
||||
cdef bytes r
|
||||
|
||||
if self.pos < self.keylength:
|
||||
if size > (self.keylength - self.pos):
|
||||
size = size - self.keylength + self.pos
|
||||
r = self.key[self.pos:] + self.binary.read(size)
|
||||
self.pos=self.keylength + 1
|
||||
elif size >=0 :
|
||||
r = self.key[self.pos:(self.pos+size)]
|
||||
self.pos+=size
|
||||
else:
|
||||
r = self.key[self.pos:] + self.binary.read(size)
|
||||
self.pos=self.keylength + 1
|
||||
else:
|
||||
r = self.binary.read(size)
|
||||
|
||||
return r
|
||||
|
||||
cpdef int tell(self):
|
||||
cdef int p
|
||||
|
||||
if self.pos < self.keylength:
|
||||
p = self.pos
|
||||
else:
|
||||
p = self.tell()
|
||||
|
||||
return p
|
||||
|
||||
def __getattr__(self,name):
|
||||
return getattr(self.binary, name)
|
||||
|
||||
|
||||
|
||||
cdef class CompressedFile:
|
||||
|
||||
|
||||
def __init__(self,stream):
|
||||
cdef int keylength
|
||||
cdef MagicKeyFile magic
|
||||
cdef str compressor
|
||||
cdef bytes k
|
||||
cdef object c
|
||||
|
||||
cdef dict compress = { 'zip' : (b'\x50\x4b\x03\x04',zipfile.ZipFile),
|
||||
'bz2' : (b'\x42\x5a\x68',bz2.BZ2File),
|
||||
'gz' : (b'\x1f\x8b\x08',gzip.open)
|
||||
}
|
||||
|
||||
keylength = max([len(x[0]) for x in compress.values()])
|
||||
magic=MagicKeyFile(stream,keylength)
|
||||
|
||||
self.accessor = None
|
||||
|
||||
for compressor in compress:
|
||||
k,c = compress[compressor]
|
||||
if magic.key.startswith(k):
|
||||
self.accessor = c(magic)
|
||||
|
||||
if self.accessor is None:
|
||||
self.accessor = magic
|
||||
|
||||
if ((hasattr(stream, 'headers') and
|
||||
hasattr(stream.headers, "keys") and
|
||||
'Content-type' in stream.headers and
|
||||
stream.headers['Content-type'].startswith('text/')) or
|
||||
'b' not in magic.stream_mode):
|
||||
self.accessor = io.TextIOWrapper(self.accessor)
|
||||
|
||||
|
||||
def __getattr__(self,name):
|
||||
return getattr(self.accessor, name)
|
||||
|
||||
def __iter__(self):
|
||||
for x in self.accessor:
|
||||
yield x
|
5
python/obitools3/files/universalopener.pxd
Normal file
5
python/obitools3/files/universalopener.pxd
Normal file
@ -0,0 +1,5 @@
|
||||
#cython: language_level=3
|
||||
|
||||
from .uncompress cimport CompressedFile
|
||||
|
||||
cpdef CompressedFile uopen(str name, mode=?)
|
23
python/obitools3/files/universalopener.pyx
Normal file
23
python/obitools3/files/universalopener.pyx
Normal file
@ -0,0 +1,23 @@
|
||||
#cython: language_level=3
|
||||
|
||||
'''
|
||||
Created on 25 mars 2016
|
||||
|
||||
@author: coissac
|
||||
'''
|
||||
|
||||
from urllib.request import urlopen
|
||||
|
||||
|
||||
cpdef CompressedFile uopen(str name, mode='r'):
|
||||
cdef CompressedFile c
|
||||
|
||||
try:
|
||||
f = urlopen(name)
|
||||
except:
|
||||
f = open(name,mode)
|
||||
|
||||
c = CompressedFile(f)
|
||||
|
||||
return c
|
||||
|
@ -1,5 +1,28 @@
|
||||
../../../src/bloom.h
|
||||
../../../src/bloom.c
|
||||
../../../src/char_str_indexer.h
|
||||
../../../src/char_str_indexer.c
|
||||
../../../src/crc64.h
|
||||
../../../src/crc64.c
|
||||
../../../src/dna_seq_indexer.h
|
||||
../../../src/dna_seq_indexer.c
|
||||
../../../src/encode.h
|
||||
../../../src/encode.c
|
||||
../../../src/murmurhash2.h
|
||||
../../../src/murmurhash2.c
|
||||
../../../src/obiavl.h
|
||||
../../../src/obiavl.c
|
||||
../../../src/obiblob_indexer.h
|
||||
../../../src/obiblob_indexer.c
|
||||
../../../src/obiblob.h
|
||||
../../../src/obiblob.c
|
||||
../../../src/obidebug.h
|
||||
../../../src/obidms_taxonomy.h
|
||||
../../../src/obidms_taxonomy.c
|
||||
../../../src/obidms.h
|
||||
../../../src/obidms.c
|
||||
../../../src/obidmscolumn_idx.h
|
||||
../../../src/obidmscolumn_idx.c
|
||||
../../../src/obidmscolumn.h
|
||||
../../../src/obidmscolumn.c
|
||||
../../../src/obidmscolumndir.h
|
||||
@ -10,7 +33,19 @@
|
||||
../../../src/obilittlebigman.c
|
||||
../../../src/obitypes.h
|
||||
../../../src/obitypes.c
|
||||
../../../src/private_at_functions.h
|
||||
../../../src/private_at_functions.c
|
||||
../../../src/obiarray.h
|
||||
../../../src/obiarray.c
|
||||
../../../src/obiview.h
|
||||
../../../src/obiview.c
|
||||
../../../src/utils.h
|
||||
../../../src/utils.c
|
||||
../../../src/obidmscolumn_bool.c
|
||||
../../../src/obidmscolumn_bool.h
|
||||
../../../src/obidmscolumn_char.c
|
||||
../../../src/obidmscolumn_char.h
|
||||
../../../src/obidmscolumn_float.c
|
||||
../../../src/obidmscolumn_float.h
|
||||
../../../src/obidmscolumn_int.c
|
||||
../../../src/obidmscolumn_int.h
|
||||
../../../src/obidmscolumn_seq.c
|
||||
../../../src/obidmscolumn_seq.h
|
||||
../../../src/obidmscolumn_str.c
|
||||
../../../src/obidmscolumn_str.h
|
||||
|
@ -2,46 +2,93 @@
|
||||
|
||||
from .capi.obidms cimport OBIDMS_p
|
||||
from .capi.obidmscolumn cimport OBIDMS_column_p
|
||||
from .capi.obiview cimport Obiview_p
|
||||
from .capi.obitypes cimport obiversion_t, OBIType_t, index_t
|
||||
from ._obitaxo cimport OBI_Taxonomy
|
||||
|
||||
cdef class OBIDMS_column:
|
||||
|
||||
cdef OBIDMS_column_p* pointer
|
||||
cdef OBIDMS dms
|
||||
cdef Obiview_p view
|
||||
cdef str data_type
|
||||
cdef str dms_name
|
||||
cdef str column_name
|
||||
cdef index_t nb_elements_per_line
|
||||
cdef list elements_names
|
||||
|
||||
cpdef update_pointer(self)
|
||||
cpdef list get_elements_names(self)
|
||||
cpdef str get_data_type(self)
|
||||
cpdef index_t get_nb_lines_used(self)
|
||||
cpdef str get_creation_date(self)
|
||||
cpdef str get_comments(self)
|
||||
cpdef close(self)
|
||||
@staticmethod
|
||||
cdef object get_subclass_type(OBIDMS_column_p column_p)
|
||||
|
||||
|
||||
cdef class OBIDMS_column
|
||||
cdef class OBIDMS_column_multi_elts(OBIDMS_column):
|
||||
|
||||
cpdef set_line(self, index_t line_nb, dict values)
|
||||
|
||||
|
||||
cdef class OBIDMS_column_line:
|
||||
|
||||
cdef OBIDMS_column column
|
||||
cdef index_t index
|
||||
|
||||
|
||||
cdef class OBIView:
|
||||
|
||||
cdef Obiview_p pointer
|
||||
cdef str name
|
||||
cdef str comments
|
||||
cdef dict columns
|
||||
cdef dict columns_pp # TODO this dict might be unnecessary
|
||||
cdef OBIDMS dms
|
||||
|
||||
cpdef delete_column(self, str column_name)
|
||||
cpdef add_column(self,
|
||||
str column_name,
|
||||
obiversion_t version_number=*,
|
||||
str type=*,
|
||||
index_t nb_lines=*,
|
||||
index_t nb_elements_per_line=*,
|
||||
list elements_names=*,
|
||||
str indexer_name=*,
|
||||
str comments=*,
|
||||
bint create=*
|
||||
)
|
||||
cpdef select_line(self, index_t line_nb)
|
||||
cpdef select_lines(self, list line_selection)
|
||||
cpdef save_and_close(self)
|
||||
|
||||
|
||||
cdef class OBIView_NUC_SEQS(OBIView):
|
||||
|
||||
cdef OBIDMS_column ids
|
||||
cdef OBIDMS_column sequences
|
||||
cdef OBIDMS_column definitions
|
||||
|
||||
cpdef delete_column(self, str column_name)
|
||||
|
||||
|
||||
cdef class OBIView_line :
|
||||
|
||||
cdef index_t index
|
||||
cdef OBIView view
|
||||
|
||||
|
||||
|
||||
cdef class OBIDMS:
|
||||
|
||||
cdef OBIDMS_p pointer
|
||||
cdef str dms_name
|
||||
|
||||
cpdef dict list(self)
|
||||
cpdef close(self)
|
||||
cpdef OBIDMS_column open_column(self,
|
||||
str column_name,
|
||||
bint create=*,
|
||||
bint clone=*, bint clone_data=*,
|
||||
obiversion_t version_number=*,
|
||||
OBIType_t data_type=*,
|
||||
index_t nb_lines=*,
|
||||
index_t nb_elements_per_line=*,
|
||||
list elements_names=*,
|
||||
str array_name=*)
|
||||
|
||||
|
||||
cdef class OBIDMS_column:
|
||||
|
||||
cdef OBIDMS_column_p pointer
|
||||
cdef OBIDMS dms
|
||||
cdef str data_type # TODO keep as OBIType_t? both?
|
||||
cdef str dms_name
|
||||
cdef str column_name
|
||||
cdef index_t nb_elements_per_line
|
||||
cdef list elements_names
|
||||
|
||||
# cpdef object get_item(self, index_t line_nb, str element_name) TODO can't declare because not the same in all subclasses
|
||||
# cpdef set_item(self, index_t line_nb, str element_name, object value) TODO can't declare because object value
|
||||
cpdef list get_elements_names(self)
|
||||
cpdef str get_data_type(self)
|
||||
cpdef index_t get_nb_lines_used(self)
|
||||
cpdef str get_creation_date(self)
|
||||
cpdef close(self)
|
||||
cpdef OBI_Taxonomy open_taxonomy(self, str taxo_name)
|
||||
cpdef OBIView open_view(self, str view_name)
|
||||
cpdef OBIView new_view(self, str view_name, object view_to_clone=*, list line_selection=*, str view_type=*, str comments=*)
|
||||
cpdef dict read_view_infos(self, str view_name)
|
||||
cpdef dict read_views(self)
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,18 +1,51 @@
|
||||
../../../src/obidmscolumn_bool.c
|
||||
../../../src/obidmscolumn_bool.h
|
||||
../../../src/bloom.h
|
||||
../../../src/bloom.c
|
||||
../../../src/char_str_indexer.h
|
||||
../../../src/char_str_indexer.c
|
||||
../../../src/crc64.h
|
||||
../../../src/crc64.c
|
||||
../../../src/dna_seq_indexer.h
|
||||
../../../src/dna_seq_indexer.c
|
||||
../../../src/encode.h
|
||||
../../../src/encode.c
|
||||
../../../src/murmurhash2.h
|
||||
../../../src/murmurhash2.c
|
||||
../../../src/obiavl.h
|
||||
../../../src/obiavl.c
|
||||
../../../src/obiblob_indexer.h
|
||||
../../../src/obiblob_indexer.c
|
||||
../../../src/obiblob.h
|
||||
../../../src/obiblob.c
|
||||
../../../src/obidebug.h
|
||||
../../../src/obidms_taxonomy.h
|
||||
../../../src/obidms_taxonomy.c
|
||||
../../../src/obidms.h
|
||||
../../../src/obidms.c
|
||||
../../../src/obidmscolumn_idx.h
|
||||
../../../src/obidmscolumn_idx.c
|
||||
../../../src/obidmscolumn.h
|
||||
../../../src/obidmscolumn.c
|
||||
../../../src/obidmscolumndir.h
|
||||
../../../src/obidmscolumndir.c
|
||||
../../../src/obidms.h
|
||||
../../../src/obidms.c
|
||||
../../../src/obierrno.h
|
||||
../../../src/obierrno.c
|
||||
../../../src/obilittlebigman.h
|
||||
../../../src/obilittlebigman.c
|
||||
../../../src/obitypes.h
|
||||
../../../src/obitypes.c
|
||||
../../../src/private_at_functions.h
|
||||
../../../src/private_at_functions.c
|
||||
../../../src/obiarray.h
|
||||
../../../src/obiarray.c
|
||||
../../../src/obiview.h
|
||||
../../../src/obiview.c
|
||||
../../../src/utils.h
|
||||
../../../src/utils.c
|
||||
../../../src/obidmscolumn_bool.c
|
||||
../../../src/obidmscolumn_bool.h
|
||||
../../../src/obidmscolumn_char.c
|
||||
../../../src/obidmscolumn_char.h
|
||||
../../../src/obidmscolumn_float.c
|
||||
../../../src/obidmscolumn_float.h
|
||||
../../../src/obidmscolumn_int.c
|
||||
../../../src/obidmscolumn_int.h
|
||||
../../../src/obidmscolumn_seq.c
|
||||
../../../src/obidmscolumn_seq.h
|
||||
../../../src/obidmscolumn_str.c
|
||||
../../../src/obidmscolumn_str.h
|
||||
|
@ -1,25 +1,14 @@
|
||||
#cython: language_level=3
|
||||
|
||||
from .capi.obitypes cimport obibool_t, index_t
|
||||
from ._obidms cimport OBIDMS_column
|
||||
from .capi.obitypes cimport index_t
|
||||
from ._obidms cimport OBIDMS_column, OBIDMS_column_multi_elts
|
||||
|
||||
|
||||
cdef class OBIDMS_column_bool(OBIDMS_column):
|
||||
cpdef object get_line(self, index_t line_nb)
|
||||
cpdef set_line(self, index_t line_nb, object value)
|
||||
cpdef close(self)
|
||||
|
||||
cdef class OBIDMS_column_bool_writable(OBIDMS_column_bool):
|
||||
cpdef set_line(self, index_t line_nb, object value)
|
||||
cpdef close(self)
|
||||
|
||||
cdef class OBIDMS_column_bool_multi_elts(OBIDMS_column_bool):
|
||||
cdef class OBIDMS_column_multi_elts_bool(OBIDMS_column_multi_elts):
|
||||
cpdef object get_item(self, index_t line_nb, str element_name)
|
||||
cpdef object get_line(self, index_t line_nb)
|
||||
cpdef set_item(self, index_t line_nb, str element_name, obibool_t value)
|
||||
cpdef set_line(self, index_t line_nb, object values)
|
||||
|
||||
cdef class OBIDMS_column_bool_multi_elts_writable(OBIDMS_column_bool_multi_elts):
|
||||
cpdef set_item(self, index_t line_nb, str element_name, obibool_t value)
|
||||
cpdef set_line(self, index_t line_nb, object values)
|
||||
cpdef close(self)
|
||||
cpdef set_item(self, index_t line_nb, str element_name, object value)
|
||||
|
@ -1,13 +1,11 @@
|
||||
#cython: language_level=3
|
||||
|
||||
from .capi.obidmscolumn cimport obi_close_column,\
|
||||
obi_truncate_and_close_column, \
|
||||
obi_column_get_obibool_with_elt_name, \
|
||||
obi_column_get_obibool_with_elt_idx, \
|
||||
obi_column_set_obibool_with_elt_name, \
|
||||
obi_column_set_obibool_with_elt_idx
|
||||
from .capi.obiview cimport obi_column_get_obibool_with_elt_name_in_view, \
|
||||
obi_column_get_obibool_with_elt_idx_in_view, \
|
||||
obi_column_set_obibool_with_elt_name_in_view, \
|
||||
obi_column_set_obibool_with_elt_idx_in_view
|
||||
from .capi.obierrno cimport obi_errno
|
||||
from .capi.obitypes cimport OBIBool_NA
|
||||
from .capi.obitypes cimport OBIBool_NA, obibool_t
|
||||
|
||||
from obitools3.utils cimport str2bytes
|
||||
|
||||
@ -19,7 +17,7 @@ cdef class OBIDMS_column_bool(OBIDMS_column):
|
||||
cpdef object get_line(self, index_t line_nb):
|
||||
cdef obibool_t value
|
||||
cdef object result
|
||||
value = obi_column_get_obibool_with_elt_idx(self.pointer, line_nb, 0)
|
||||
value = obi_column_get_obibool_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0)
|
||||
if obi_errno > 0 :
|
||||
raise IndexError(line_nb)
|
||||
if value == OBIBool_NA :
|
||||
@ -27,32 +25,20 @@ cdef class OBIDMS_column_bool(OBIDMS_column):
|
||||
else :
|
||||
result = PyBool_FromLong(value)
|
||||
return result
|
||||
|
||||
cpdef set_line(self, index_t line_nb, object value):
|
||||
raise Exception("Column is read-only")
|
||||
|
||||
cpdef close(self):
|
||||
if obi_close_column(self.pointer) < 0 :
|
||||
raise Exception("Problem closing a column")
|
||||
|
||||
|
||||
cdef class OBIDMS_column_bool_writable(OBIDMS_column_bool):
|
||||
|
||||
cpdef set_line(self, index_t line_nb, object value):
|
||||
if obi_column_set_obibool_with_elt_idx(self.pointer, line_nb, 0, <obibool_t> value) < 0:
|
||||
if value is None :
|
||||
value = OBIBool_NA
|
||||
if obi_column_set_obibool_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0, <obibool_t> value) < 0:
|
||||
raise Exception("Problem setting a value in a column")
|
||||
|
||||
cpdef close(self):
|
||||
if obi_truncate_and_close_column(self.pointer) < 0 :
|
||||
raise Exception("Problem closing a column")
|
||||
|
||||
|
||||
cdef class OBIDMS_column_bool_multi_elts(OBIDMS_column_bool):
|
||||
cdef class OBIDMS_column_multi_elts_bool(OBIDMS_column_multi_elts):
|
||||
|
||||
cpdef object get_item(self, index_t line_nb, str element_name):
|
||||
cdef obibool_t value
|
||||
cdef object result
|
||||
value = obi_column_get_obibool_with_elt_name(self.pointer, line_nb, str2bytes(element_name))
|
||||
value = obi_column_get_obibool_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name))
|
||||
if obi_errno > 0 :
|
||||
raise IndexError(line_nb, element_name)
|
||||
if value == OBIBool_NA :
|
||||
@ -63,42 +49,29 @@ cdef class OBIDMS_column_bool_multi_elts(OBIDMS_column_bool):
|
||||
|
||||
cpdef object get_line(self, index_t line_nb) :
|
||||
cdef obibool_t value
|
||||
cdef object result
|
||||
cdef object value_in_result
|
||||
cdef dict result
|
||||
cdef index_t i
|
||||
cdef bint all_NA
|
||||
result = {}
|
||||
all_NA = True
|
||||
for i in range(self.nb_elements_per_line) :
|
||||
value = obi_column_get_obibool_with_elt_idx(self.pointer, line_nb, i)
|
||||
value = obi_column_get_obibool_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, i)
|
||||
if obi_errno > 0 :
|
||||
raise IndexError(line_nb)
|
||||
result[self.elements_names[i]] = PyBool_FromLong(value)
|
||||
if all_NA and (value != OBIBool_NA) :
|
||||
raise IndexError(line_nb)
|
||||
if value == OBIBool_NA :
|
||||
value_in_result = None
|
||||
else :
|
||||
value_in_result = PyBool_FromLong(value)
|
||||
result[self.elements_names[i]] = value_in_result
|
||||
if all_NA and (value_in_result is not None) :
|
||||
all_NA = False
|
||||
if all_NA :
|
||||
result = None
|
||||
return result
|
||||
|
||||
cpdef set_item(self, index_t line_nb, str element_name, obibool_t value):
|
||||
raise Exception("Column is read-only")
|
||||
|
||||
cpdef set_line(self, index_t line_nb, object values):
|
||||
raise Exception("Column is read-only")
|
||||
|
||||
|
||||
cdef class OBIDMS_column_bool_multi_elts_writable(OBIDMS_column_bool_multi_elts):
|
||||
|
||||
cpdef set_item(self, index_t line_nb, str element_name, obibool_t value):
|
||||
if obi_column_set_obibool_with_elt_name(self.pointer, line_nb, str2bytes(element_name), value) < 0:
|
||||
cpdef set_item(self, index_t line_nb, str element_name, object value):
|
||||
if value is None :
|
||||
value = OBIBool_NA
|
||||
if obi_column_set_obibool_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name), <obibool_t> value) < 0:
|
||||
raise Exception("Problem setting a value in a column")
|
||||
|
||||
cpdef set_line(self, index_t line_nb, object values):
|
||||
cdef obibool_t value
|
||||
for element_name in values :
|
||||
value = <obibool_t> values[element_name]
|
||||
self.set_item(line_nb, element_name, value)
|
||||
|
||||
cpdef close(self):
|
||||
if obi_truncate_and_close_column(self.pointer) < 0 :
|
||||
raise Exception("Problem closing a column")
|
||||
|
||||
|
@ -1,18 +1,51 @@
|
||||
../../../src/obidmscolumn_char.c
|
||||
../../../src/obidmscolumn_char.h
|
||||
../../../src/bloom.h
|
||||
../../../src/bloom.c
|
||||
../../../src/char_str_indexer.h
|
||||
../../../src/char_str_indexer.c
|
||||
../../../src/crc64.h
|
||||
../../../src/crc64.c
|
||||
../../../src/dna_seq_indexer.h
|
||||
../../../src/dna_seq_indexer.c
|
||||
../../../src/encode.h
|
||||
../../../src/encode.c
|
||||
../../../src/murmurhash2.h
|
||||
../../../src/murmurhash2.c
|
||||
../../../src/obiavl.h
|
||||
../../../src/obiavl.c
|
||||
../../../src/obiblob_indexer.h
|
||||
../../../src/obiblob_indexer.c
|
||||
../../../src/obiblob.h
|
||||
../../../src/obiblob.c
|
||||
../../../src/obidebug.h
|
||||
../../../src/obidms_taxonomy.h
|
||||
../../../src/obidms_taxonomy.c
|
||||
../../../src/obidms.h
|
||||
../../../src/obidms.c
|
||||
../../../src/obidmscolumn_idx.h
|
||||
../../../src/obidmscolumn_idx.c
|
||||
../../../src/obidmscolumn.h
|
||||
../../../src/obidmscolumn.c
|
||||
../../../src/obidmscolumndir.h
|
||||
../../../src/obidmscolumndir.c
|
||||
../../../src/obidms.h
|
||||
../../../src/obidms.c
|
||||
../../../src/obierrno.h
|
||||
../../../src/obierrno.c
|
||||
../../../src/obilittlebigman.h
|
||||
../../../src/obilittlebigman.c
|
||||
../../../src/obitypes.h
|
||||
../../../src/obitypes.c
|
||||
../../../src/private_at_functions.h
|
||||
../../../src/private_at_functions.c
|
||||
../../../src/obiarray.h
|
||||
../../../src/obiarray.c
|
||||
../../../src/obiview.h
|
||||
../../../src/obiview.c
|
||||
../../../src/utils.h
|
||||
../../../src/utils.c
|
||||
../../../src/obidmscolumn_bool.c
|
||||
../../../src/obidmscolumn_bool.h
|
||||
../../../src/obidmscolumn_char.c
|
||||
../../../src/obidmscolumn_char.h
|
||||
../../../src/obidmscolumn_float.c
|
||||
../../../src/obidmscolumn_float.h
|
||||
../../../src/obidmscolumn_int.c
|
||||
../../../src/obidmscolumn_int.h
|
||||
../../../src/obidmscolumn_seq.c
|
||||
../../../src/obidmscolumn_seq.h
|
||||
../../../src/obidmscolumn_str.c
|
||||
../../../src/obidmscolumn_str.h
|
||||
|
@ -1,25 +1,14 @@
|
||||
#cython: language_level=3
|
||||
|
||||
from .capi.obitypes cimport obichar_t, index_t
|
||||
from ._obidms cimport OBIDMS_column
|
||||
from .capi.obitypes cimport index_t
|
||||
from ._obidms cimport OBIDMS_column, OBIDMS_column_multi_elts
|
||||
|
||||
|
||||
cdef class OBIDMS_column_char(OBIDMS_column):
|
||||
cpdef object get_line(self, index_t line_nb)
|
||||
cpdef set_line(self, index_t line_nb, object value)
|
||||
cpdef close(self)
|
||||
|
||||
cdef class OBIDMS_column_char_writable(OBIDMS_column_char):
|
||||
cpdef set_line(self, index_t line_nb, object value)
|
||||
cpdef close(self)
|
||||
|
||||
cdef class OBIDMS_column_char_multi_elts(OBIDMS_column_char):
|
||||
cdef class OBIDMS_column_multi_elts_char(OBIDMS_column_multi_elts):
|
||||
cpdef object get_item(self, index_t line_nb, str element_name)
|
||||
cpdef object get_line(self, index_t line_nb)
|
||||
cpdef set_item(self, index_t line_nb, str element_name, bytes value)
|
||||
cpdef set_line(self, index_t line_nb, object values)
|
||||
|
||||
cdef class OBIDMS_column_char_multi_elts_writable(OBIDMS_column_char_multi_elts):
|
||||
cpdef set_item(self, index_t line_nb, str element_name, bytes value)
|
||||
cpdef set_line(self, index_t line_nb, object values)
|
||||
cpdef close(self)
|
||||
cpdef set_item(self, index_t line_nb, str element_name, object value)
|
||||
|
@ -1,15 +1,13 @@
|
||||
#cython: language_level=3
|
||||
|
||||
from .capi.obidmscolumn cimport obi_close_column,\
|
||||
obi_truncate_and_close_column, \
|
||||
obi_column_get_obichar_with_elt_name, \
|
||||
obi_column_get_obichar_with_elt_idx, \
|
||||
obi_column_set_obichar_with_elt_name, \
|
||||
obi_column_set_obichar_with_elt_idx
|
||||
from .capi.obiview cimport obi_column_get_obichar_with_elt_name_in_view, \
|
||||
obi_column_get_obichar_with_elt_idx_in_view, \
|
||||
obi_column_set_obichar_with_elt_name_in_view, \
|
||||
obi_column_set_obichar_with_elt_idx_in_view
|
||||
from .capi.obierrno cimport obi_errno
|
||||
from .capi.obitypes cimport OBIChar_NA
|
||||
from .capi.obitypes cimport OBIChar_NA, obichar_t
|
||||
|
||||
from obitools3.utils cimport str2bytes
|
||||
from obitools3.utils cimport str2bytes, bytes2str
|
||||
|
||||
|
||||
cdef class OBIDMS_column_char(OBIDMS_column):
|
||||
@ -17,122 +15,62 @@ cdef class OBIDMS_column_char(OBIDMS_column):
|
||||
cpdef object get_line(self, index_t line_nb):
|
||||
cdef obichar_t value
|
||||
cdef object result
|
||||
value = obi_column_get_obichar_with_elt_idx(self.pointer, line_nb, 0)
|
||||
value = obi_column_get_obichar_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0)
|
||||
if obi_errno > 0 :
|
||||
raise IndexError(line_nb)
|
||||
if value == OBIChar_NA :
|
||||
result = None
|
||||
else :
|
||||
result = <bytes> value
|
||||
result = bytes2str(value)
|
||||
return result
|
||||
|
||||
cpdef set_line(self, index_t line_nb, object value):
|
||||
raise Exception("Column is read-only")
|
||||
|
||||
cpdef close(self):
|
||||
if obi_close_column(self.pointer) < 0 :
|
||||
raise Exception("Problem closing a column")
|
||||
|
||||
|
||||
cdef class OBIDMS_column_char_writable(OBIDMS_column_char):
|
||||
|
||||
cpdef set_line(self, index_t line_nb, object value):
|
||||
if obi_column_set_obichar_with_elt_idx(self.pointer, line_nb, 0, <bytes> value[0]) < 0:
|
||||
if value is None :
|
||||
value = OBIChar_NA
|
||||
if obi_column_set_obichar_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0, str2bytes(value)[0]) < 0:
|
||||
raise Exception("Problem setting a value in a column")
|
||||
|
||||
cpdef close(self):
|
||||
if obi_truncate_and_close_column(self.pointer) < 0 :
|
||||
raise Exception("Problem closing a column")
|
||||
|
||||
|
||||
cdef class OBIDMS_column_char_multi_elts(OBIDMS_column_char):
|
||||
cdef class OBIDMS_column_multi_elts_char(OBIDMS_column_multi_elts):
|
||||
|
||||
cpdef object get_item(self, index_t line_nb, str element_name):
|
||||
cdef obichar_t value
|
||||
cdef object result
|
||||
value = obi_column_get_obichar_with_elt_name(self.pointer, line_nb, str2bytes(element_name))
|
||||
value = obi_column_get_obichar_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name))
|
||||
if obi_errno > 0 :
|
||||
raise IndexError(line_nb, element_name)
|
||||
if value == OBIChar_NA :
|
||||
result = None
|
||||
else :
|
||||
result = <bytes> value
|
||||
result = bytes2str(value)
|
||||
return result
|
||||
|
||||
|
||||
cpdef object get_line(self, index_t line_nb) :
|
||||
cdef obichar_t value
|
||||
cdef object result
|
||||
cdef object value_in_result
|
||||
cdef dict result
|
||||
cdef index_t i
|
||||
cdef bint all_NA
|
||||
result = {}
|
||||
all_NA = True
|
||||
for i in range(self.nb_elements_per_line) :
|
||||
value = obi_column_get_obichar_with_elt_idx(self.pointer, line_nb, i)
|
||||
value = obi_column_get_obichar_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, i)
|
||||
if obi_errno > 0 :
|
||||
raise IndexError(line_nb)
|
||||
result[self.elements_names[i]] = <bytes> value
|
||||
if all_NA and (value != OBIChar_NA) :
|
||||
raise IndexError(line_nb)
|
||||
if value == OBIChar_NA :
|
||||
value_in_result = None
|
||||
else :
|
||||
value_in_result = bytes2str(value)
|
||||
result[self.elements_names[i]] = value_in_result
|
||||
if all_NA and (value_in_result is not None) :
|
||||
all_NA = False
|
||||
if all_NA :
|
||||
result = None
|
||||
return result
|
||||
|
||||
cpdef set_item(self, index_t line_nb, str element_name, bytes value):
|
||||
raise Exception("Column is read-only")
|
||||
|
||||
cpdef set_line(self, index_t line_nb, object values):
|
||||
raise Exception("Column is read-only")
|
||||
|
||||
|
||||
cdef class OBIDMS_column_char_multi_elts_writable(OBIDMS_column_char_multi_elts):
|
||||
|
||||
cpdef set_item(self, index_t line_nb, str element_name, bytes value):
|
||||
if obi_column_set_obichar_with_elt_name(self.pointer, line_nb, str2bytes(element_name), value[0]) < 0:
|
||||
cpdef set_item(self, index_t line_nb, str element_name, object value):
|
||||
if value is None :
|
||||
value = OBIChar_NA
|
||||
if obi_column_set_obichar_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name), str2bytes(value)[0]) < 0:
|
||||
raise Exception("Problem setting a value in a column")
|
||||
|
||||
cpdef set_line(self, index_t line_nb, object values):
|
||||
cdef bytes value
|
||||
for element_name in values :
|
||||
value = <bytes> values[element_name]
|
||||
self.set_item(line_nb, element_name, value)
|
||||
|
||||
cpdef close(self):
|
||||
if obi_truncate_and_close_column(self.pointer) < 0 :
|
||||
raise Exception("Problem closing a column")
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# cdef class OBIDMS_column_char(OBIDMS_column) :
|
||||
#
|
||||
# cpdef object get_item(self, index_t line_nb, str element_name):
|
||||
# cdef char value
|
||||
# cdef object result
|
||||
# value = obi_column_get_obichar_with_elt_name(self.pointer, line_nb, str2bytes(element_name))
|
||||
# if obi_errno > 0 :
|
||||
# raise IndexError(line_nb, element_name)
|
||||
# if value == OBIChar_NA :
|
||||
# result = None
|
||||
# else :
|
||||
# result = <bytes> value
|
||||
# return result
|
||||
#
|
||||
# cpdef set_item(self, index_t line_nb, str element_name, bytes value):
|
||||
# raise Exception("Column is read-only")
|
||||
#
|
||||
# cpdef close(self):
|
||||
# if obi_close_column(self.pointer) < 0 :
|
||||
# raise Exception("Problem closing a column")
|
||||
#
|
||||
#
|
||||
# cdef class OBIDMS_column_char_writable(OBIDMS_column_char) :
|
||||
#
|
||||
# cpdef set_item(self, index_t line_nb, str element_name, bytes value):
|
||||
# if obi_column_set_obichar_with_elt_name(self.pointer, line_nb, str2bytes(element_name), value[0]) < 0:
|
||||
# raise Exception("Problem setting a value in a column")
|
||||
#
|
||||
# cpdef close(self):
|
||||
# if obi_truncate_and_close_column(self.pointer) < 0 :
|
||||
# raise Exception("Problem closing a column")
|
||||
#
|
||||
|
@ -1,18 +1,51 @@
|
||||
../../../src/obidmscolumn_float.c
|
||||
../../../src/obidmscolumn_float.h
|
||||
../../../src/bloom.h
|
||||
../../../src/bloom.c
|
||||
../../../src/char_str_indexer.h
|
||||
../../../src/char_str_indexer.c
|
||||
../../../src/crc64.h
|
||||
../../../src/crc64.c
|
||||
../../../src/dna_seq_indexer.h
|
||||
../../../src/dna_seq_indexer.c
|
||||
../../../src/encode.h
|
||||
../../../src/encode.c
|
||||
../../../src/murmurhash2.h
|
||||
../../../src/murmurhash2.c
|
||||
../../../src/obiavl.h
|
||||
../../../src/obiavl.c
|
||||
../../../src/obiblob_indexer.h
|
||||
../../../src/obiblob_indexer.c
|
||||
../../../src/obiblob.h
|
||||
../../../src/obiblob.c
|
||||
../../../src/obidebug.h
|
||||
../../../src/obidms_taxonomy.h
|
||||
../../../src/obidms_taxonomy.c
|
||||
../../../src/obidms.h
|
||||
../../../src/obidms.c
|
||||
../../../src/obidmscolumn_idx.h
|
||||
../../../src/obidmscolumn_idx.c
|
||||
../../../src/obidmscolumn.h
|
||||
../../../src/obidmscolumn.c
|
||||
../../../src/obidmscolumndir.h
|
||||
../../../src/obidmscolumndir.c
|
||||
../../../src/obidms.h
|
||||
../../../src/obidms.c
|
||||
../../../src/obierrno.h
|
||||
../../../src/obierrno.c
|
||||
../../../src/obilittlebigman.h
|
||||
../../../src/obilittlebigman.c
|
||||
../../../src/obitypes.h
|
||||
../../../src/obitypes.c
|
||||
../../../src/private_at_functions.h
|
||||
../../../src/private_at_functions.c
|
||||
../../../src/obiarray.h
|
||||
../../../src/obiarray.c
|
||||
../../../src/obiview.h
|
||||
../../../src/obiview.c
|
||||
../../../src/utils.h
|
||||
../../../src/utils.c
|
||||
../../../src/obidmscolumn_bool.c
|
||||
../../../src/obidmscolumn_bool.h
|
||||
../../../src/obidmscolumn_char.c
|
||||
../../../src/obidmscolumn_char.h
|
||||
../../../src/obidmscolumn_float.c
|
||||
../../../src/obidmscolumn_float.h
|
||||
../../../src/obidmscolumn_int.c
|
||||
../../../src/obidmscolumn_int.h
|
||||
../../../src/obidmscolumn_seq.c
|
||||
../../../src/obidmscolumn_seq.h
|
||||
../../../src/obidmscolumn_str.c
|
||||
../../../src/obidmscolumn_str.h
|
||||
|
@ -1,25 +1,14 @@
|
||||
#cython: language_level=3
|
||||
|
||||
from .capi.obitypes cimport obifloat_t, index_t
|
||||
from ._obidms cimport OBIDMS_column
|
||||
from .capi.obitypes cimport index_t
|
||||
from ._obidms cimport OBIDMS_column, OBIDMS_column_multi_elts
|
||||
|
||||
|
||||
cdef class OBIDMS_column_float(OBIDMS_column):
|
||||
cpdef object get_line(self, index_t line_nb)
|
||||
cpdef set_line(self, index_t line_nb, object value)
|
||||
cpdef close(self)
|
||||
|
||||
cdef class OBIDMS_column_float_writable(OBIDMS_column_float):
|
||||
cpdef set_line(self, index_t line_nb, object value)
|
||||
cpdef close(self)
|
||||
|
||||
cdef class OBIDMS_column_float_multi_elts(OBIDMS_column_float):
|
||||
cdef class OBIDMS_column_multi_elts_float(OBIDMS_column_multi_elts):
|
||||
cpdef object get_item(self, index_t line_nb, str element_name)
|
||||
cpdef object get_line(self, index_t line_nb)
|
||||
cpdef set_item(self, index_t line_nb, str element_name, obifloat_t value)
|
||||
cpdef set_line(self, index_t line_nb, object values)
|
||||
|
||||
cdef class OBIDMS_column_float_multi_elts_writable(OBIDMS_column_float_multi_elts):
|
||||
cpdef set_item(self, index_t line_nb, str element_name, obifloat_t value)
|
||||
cpdef set_line(self, index_t line_nb, object values)
|
||||
cpdef close(self)
|
||||
cpdef set_item(self, index_t line_nb, str element_name, object value)
|
||||
|
@ -1,13 +1,11 @@
|
||||
#cython: language_level=3
|
||||
|
||||
from .capi.obidmscolumn cimport obi_close_column,\
|
||||
obi_truncate_and_close_column, \
|
||||
obi_column_get_obifloat_with_elt_name, \
|
||||
obi_column_get_obifloat_with_elt_idx, \
|
||||
obi_column_set_obifloat_with_elt_name, \
|
||||
obi_column_set_obifloat_with_elt_idx
|
||||
from .capi.obiview cimport obi_column_get_obifloat_with_elt_name_in_view, \
|
||||
obi_column_get_obifloat_with_elt_idx_in_view, \
|
||||
obi_column_set_obifloat_with_elt_name_in_view, \
|
||||
obi_column_set_obifloat_with_elt_idx_in_view
|
||||
from .capi.obierrno cimport obi_errno
|
||||
from .capi.obitypes cimport OBIFloat_NA
|
||||
from .capi.obitypes cimport OBIFloat_NA, obifloat_t
|
||||
|
||||
from obitools3.utils cimport str2bytes
|
||||
|
||||
@ -17,7 +15,7 @@ cdef class OBIDMS_column_float(OBIDMS_column):
|
||||
cpdef object get_line(self, index_t line_nb):
|
||||
cdef obifloat_t value
|
||||
cdef object result
|
||||
value = obi_column_get_obifloat_with_elt_idx(self.pointer, line_nb, 0)
|
||||
value = obi_column_get_obifloat_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0)
|
||||
if obi_errno > 0 :
|
||||
raise IndexError(line_nb)
|
||||
if value == OBIFloat_NA :
|
||||
@ -27,30 +25,18 @@ cdef class OBIDMS_column_float(OBIDMS_column):
|
||||
return result
|
||||
|
||||
cpdef set_line(self, index_t line_nb, object value):
|
||||
raise Exception("Column is read-only")
|
||||
|
||||
cpdef close(self):
|
||||
if obi_close_column(self.pointer) < 0 :
|
||||
raise Exception("Problem closing a column")
|
||||
|
||||
|
||||
cdef class OBIDMS_column_float_writable(OBIDMS_column_float):
|
||||
|
||||
cpdef set_line(self, index_t line_nb, object value):
|
||||
if obi_column_set_obifloat_with_elt_idx(self.pointer, line_nb, 0, <obifloat_t> value) < 0:
|
||||
if value is None :
|
||||
value = OBIFloat_NA
|
||||
if obi_column_set_obifloat_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0, <obifloat_t> value) < 0:
|
||||
raise Exception("Problem setting a value in a column")
|
||||
|
||||
cpdef close(self):
|
||||
if obi_truncate_and_close_column(self.pointer) < 0 :
|
||||
raise Exception("Problem closing a column")
|
||||
|
||||
|
||||
cdef class OBIDMS_column_float_multi_elts(OBIDMS_column_float):
|
||||
cdef class OBIDMS_column_multi_elts_float(OBIDMS_column_multi_elts):
|
||||
|
||||
cpdef object get_item(self, index_t line_nb, str element_name):
|
||||
cdef obifloat_t value
|
||||
cdef object result
|
||||
value = obi_column_get_obifloat_with_elt_name(self.pointer, line_nb, str2bytes(element_name))
|
||||
value = obi_column_get_obifloat_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name))
|
||||
if obi_errno > 0 :
|
||||
raise IndexError(line_nb, element_name)
|
||||
if value == OBIFloat_NA :
|
||||
@ -58,46 +44,33 @@ cdef class OBIDMS_column_float_multi_elts(OBIDMS_column_float):
|
||||
else :
|
||||
result = <double> value
|
||||
return result
|
||||
|
||||
|
||||
cpdef object get_line(self, index_t line_nb) :
|
||||
cdef obifloat_t value
|
||||
cdef object result
|
||||
cdef object value_in_result
|
||||
cdef dict result
|
||||
cdef index_t i
|
||||
cdef bint all_NA
|
||||
result = {}
|
||||
all_NA = True
|
||||
for i in range(self.nb_elements_per_line) :
|
||||
value = obi_column_get_obifloat_with_elt_idx(self.pointer, line_nb, i)
|
||||
value = obi_column_get_obifloat_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, i)
|
||||
if obi_errno > 0 :
|
||||
raise IndexError(line_nb)
|
||||
result[self.elements_names[i]] = <double> value
|
||||
if all_NA and (value != OBIFloat_NA) :
|
||||
raise IndexError(line_nb)
|
||||
if value == OBIFloat_NA :
|
||||
value_in_result = None
|
||||
else :
|
||||
value_in_result = <double> value
|
||||
result[self.elements_names[i]] = value_in_result
|
||||
if all_NA and (value_in_result is not None) :
|
||||
all_NA = False
|
||||
if all_NA :
|
||||
result = None
|
||||
return result
|
||||
|
||||
cpdef set_item(self, index_t line_nb, str element_name, obifloat_t value):
|
||||
raise Exception("Column is read-only")
|
||||
|
||||
cpdef set_line(self, index_t line_nb, object values):
|
||||
raise Exception("Column is read-only")
|
||||
|
||||
|
||||
cdef class OBIDMS_column_float_multi_elts_writable(OBIDMS_column_float_multi_elts):
|
||||
|
||||
cpdef set_item(self, index_t line_nb, str element_name, obifloat_t value):
|
||||
if obi_column_set_obifloat_with_elt_name(self.pointer, line_nb, str2bytes(element_name), value) < 0:
|
||||
|
||||
cpdef set_item(self, index_t line_nb, str element_name, object value):
|
||||
if value is None :
|
||||
value = OBIFloat_NA
|
||||
if obi_column_set_obifloat_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name), <obifloat_t> value) < 0:
|
||||
raise Exception("Problem setting a value in a column")
|
||||
|
||||
cpdef set_line(self, index_t line_nb, object values):
|
||||
cdef obifloat_t value
|
||||
for element_name in values :
|
||||
value = <obifloat_t> values[element_name]
|
||||
self.set_item(line_nb, element_name, value)
|
||||
|
||||
cpdef close(self):
|
||||
if obi_truncate_and_close_column(self.pointer) < 0 :
|
||||
raise Exception("Problem closing a column")
|
||||
|
||||
|
@ -1,18 +1,51 @@
|
||||
../../../src/obidmscolumn_int.c
|
||||
../../../src/obidmscolumn_int.h
|
||||
../../../src/bloom.h
|
||||
../../../src/bloom.c
|
||||
../../../src/char_str_indexer.h
|
||||
../../../src/char_str_indexer.c
|
||||
../../../src/crc64.h
|
||||
../../../src/crc64.c
|
||||
../../../src/dna_seq_indexer.h
|
||||
../../../src/dna_seq_indexer.c
|
||||
../../../src/encode.h
|
||||
../../../src/encode.c
|
||||
../../../src/murmurhash2.h
|
||||
../../../src/murmurhash2.c
|
||||
../../../src/obiavl.h
|
||||
../../../src/obiavl.c
|
||||
../../../src/obiblob_indexer.h
|
||||
../../../src/obiblob_indexer.c
|
||||
../../../src/obiblob.h
|
||||
../../../src/obiblob.c
|
||||
../../../src/obidebug.h
|
||||
../../../src/obidms_taxonomy.h
|
||||
../../../src/obidms_taxonomy.c
|
||||
../../../src/obidms.h
|
||||
../../../src/obidms.c
|
||||
../../../src/obidmscolumn_idx.h
|
||||
../../../src/obidmscolumn_idx.c
|
||||
../../../src/obidmscolumn.h
|
||||
../../../src/obidmscolumn.c
|
||||
../../../src/obidmscolumndir.h
|
||||
../../../src/obidmscolumndir.c
|
||||
../../../src/obidms.h
|
||||
../../../src/obidms.c
|
||||
../../../src/obierrno.h
|
||||
../../../src/obierrno.c
|
||||
../../../src/obilittlebigman.h
|
||||
../../../src/obilittlebigman.c
|
||||
../../../src/obitypes.h
|
||||
../../../src/obitypes.c
|
||||
../../../src/private_at_functions.h
|
||||
../../../src/private_at_functions.c
|
||||
../../../src/obiarray.h
|
||||
../../../src/obiarray.c
|
||||
../../../src/obiview.h
|
||||
../../../src/obiview.c
|
||||
../../../src/utils.h
|
||||
../../../src/utils.c
|
||||
../../../src/obidmscolumn_bool.c
|
||||
../../../src/obidmscolumn_bool.h
|
||||
../../../src/obidmscolumn_char.c
|
||||
../../../src/obidmscolumn_char.h
|
||||
../../../src/obidmscolumn_float.c
|
||||
../../../src/obidmscolumn_float.h
|
||||
../../../src/obidmscolumn_int.c
|
||||
../../../src/obidmscolumn_int.h
|
||||
../../../src/obidmscolumn_seq.c
|
||||
../../../src/obidmscolumn_seq.h
|
||||
../../../src/obidmscolumn_str.c
|
||||
../../../src/obidmscolumn_str.h
|
||||
|
@ -1,25 +1,14 @@
|
||||
#cython: language_level=3
|
||||
|
||||
from .capi.obitypes cimport obiint_t, index_t
|
||||
from ._obidms cimport OBIDMS_column
|
||||
from .capi.obitypes cimport index_t
|
||||
from ._obidms cimport OBIDMS_column, OBIDMS_column_multi_elts
|
||||
|
||||
|
||||
cdef class OBIDMS_column_int(OBIDMS_column):
|
||||
cpdef object get_line(self, index_t line_nb)
|
||||
cpdef set_line(self, index_t line_nb, object value)
|
||||
cpdef close(self)
|
||||
|
||||
cdef class OBIDMS_column_int_writable(OBIDMS_column_int):
|
||||
cpdef set_line(self, index_t line_nb, object value)
|
||||
cpdef close(self)
|
||||
|
||||
cdef class OBIDMS_column_int_multi_elts(OBIDMS_column_int):
|
||||
cdef class OBIDMS_column_multi_elts_int(OBIDMS_column_multi_elts):
|
||||
cpdef object get_item(self, index_t line_nb, str element_name)
|
||||
cpdef object get_line(self, index_t line_nb)
|
||||
cpdef set_item(self, index_t line_nb, str element_name, obiint_t value)
|
||||
cpdef set_line(self, index_t line_nb, object values)
|
||||
|
||||
cdef class OBIDMS_column_int_multi_elts_writable(OBIDMS_column_int_multi_elts):
|
||||
cpdef set_item(self, index_t line_nb, str element_name, obiint_t value)
|
||||
cpdef set_line(self, index_t line_nb, object values)
|
||||
cpdef close(self)
|
||||
cpdef set_item(self, index_t line_nb, str element_name, object value)
|
||||
|
@ -1,13 +1,11 @@
|
||||
#cython: language_level=3
|
||||
|
||||
from .capi.obidmscolumn cimport obi_close_column,\
|
||||
obi_truncate_and_close_column, \
|
||||
obi_column_get_obiint_with_elt_name, \
|
||||
obi_column_get_obiint_with_elt_idx, \
|
||||
obi_column_set_obiint_with_elt_name, \
|
||||
obi_column_set_obiint_with_elt_idx
|
||||
from .capi.obiview cimport obi_column_get_obiint_with_elt_name_in_view, \
|
||||
obi_column_get_obiint_with_elt_idx_in_view, \
|
||||
obi_column_set_obiint_with_elt_name_in_view, \
|
||||
obi_column_set_obiint_with_elt_idx_in_view
|
||||
from .capi.obierrno cimport obi_errno
|
||||
from .capi.obitypes cimport OBIInt_NA
|
||||
from .capi.obitypes cimport OBIInt_NA, obiint_t
|
||||
|
||||
from obitools3.utils cimport str2bytes
|
||||
|
||||
@ -19,7 +17,7 @@ cdef class OBIDMS_column_int(OBIDMS_column):
|
||||
cpdef object get_line(self, index_t line_nb):
|
||||
cdef obiint_t value
|
||||
cdef object result
|
||||
value = obi_column_get_obiint_with_elt_idx(self.pointer, line_nb, 0)
|
||||
value = obi_column_get_obiint_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0)
|
||||
if obi_errno > 0 :
|
||||
raise IndexError(line_nb)
|
||||
if value == OBIInt_NA :
|
||||
@ -27,32 +25,20 @@ cdef class OBIDMS_column_int(OBIDMS_column):
|
||||
else :
|
||||
result = PyInt_FromLong(value)
|
||||
return result
|
||||
|
||||
|
||||
cpdef set_line(self, index_t line_nb, object value):
|
||||
raise Exception("Column is read-only")
|
||||
|
||||
cpdef close(self):
|
||||
if obi_close_column(self.pointer) < 0 :
|
||||
raise Exception("Problem closing a column")
|
||||
|
||||
|
||||
cdef class OBIDMS_column_int_writable(OBIDMS_column_int):
|
||||
|
||||
cpdef set_line(self, index_t line_nb, object value):
|
||||
if obi_column_set_obiint_with_elt_idx(self.pointer, line_nb, 0, <obiint_t> value) < 0:
|
||||
if value is None :
|
||||
value = OBIInt_NA
|
||||
if obi_column_set_obiint_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0, <obiint_t> value) < 0:
|
||||
raise Exception("Problem setting a value in a column")
|
||||
|
||||
cpdef close(self):
|
||||
if obi_truncate_and_close_column(self.pointer) < 0 :
|
||||
raise Exception("Problem closing a column")
|
||||
|
||||
|
||||
cdef class OBIDMS_column_int_multi_elts(OBIDMS_column_int):
|
||||
cdef class OBIDMS_column_multi_elts_int(OBIDMS_column_multi_elts):
|
||||
|
||||
cpdef object get_item(self, index_t line_nb, str element_name):
|
||||
cdef obiint_t value
|
||||
cdef object result
|
||||
value = obi_column_get_obiint_with_elt_name(self.pointer, line_nb, str2bytes(element_name))
|
||||
value = obi_column_get_obiint_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name))
|
||||
if obi_errno > 0 :
|
||||
raise IndexError(line_nb, element_name)
|
||||
if value == OBIInt_NA :
|
||||
@ -63,42 +49,30 @@ cdef class OBIDMS_column_int_multi_elts(OBIDMS_column_int):
|
||||
|
||||
cpdef object get_line(self, index_t line_nb) :
|
||||
cdef obiint_t value
|
||||
cdef object result
|
||||
cdef object value_in_result
|
||||
cdef dict result
|
||||
cdef index_t i
|
||||
cdef bint all_NA
|
||||
result = {}
|
||||
all_NA = True
|
||||
for i in range(self.nb_elements_per_line) :
|
||||
value = obi_column_get_obiint_with_elt_idx(self.pointer, line_nb, i)
|
||||
value = obi_column_get_obiint_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, i)
|
||||
if obi_errno > 0 :
|
||||
raise IndexError(line_nb)
|
||||
result[self.elements_names[i]] = PyInt_FromLong(value)
|
||||
if all_NA and (value != OBIInt_NA) :
|
||||
raise IndexError(line_nb)
|
||||
if value == OBIInt_NA :
|
||||
value_in_result = None
|
||||
else :
|
||||
value_in_result = PyInt_FromLong(value)
|
||||
result[self.elements_names[i]] = value_in_result
|
||||
if all_NA and (value_in_result is not None) :
|
||||
all_NA = False
|
||||
if all_NA :
|
||||
result = None
|
||||
result = None # TODO discuss
|
||||
return result
|
||||
|
||||
cpdef set_item(self, index_t line_nb, str element_name, obiint_t value):
|
||||
raise Exception("Column is read-only")
|
||||
|
||||
cpdef set_line(self, index_t line_nb, object values):
|
||||
raise Exception("Column is read-only")
|
||||
|
||||
|
||||
cdef class OBIDMS_column_int_multi_elts_writable(OBIDMS_column_int_multi_elts):
|
||||
|
||||
cpdef set_item(self, index_t line_nb, str element_name, obiint_t value):
|
||||
if obi_column_set_obiint_with_elt_name(self.pointer, line_nb, str2bytes(element_name), value) < 0:
|
||||
cpdef set_item(self, index_t line_nb, str element_name, object value):
|
||||
if value is None :
|
||||
value = OBIInt_NA
|
||||
if obi_column_set_obiint_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name), <obiint_t> value) < 0:
|
||||
raise Exception("Problem setting a value in a column")
|
||||
|
||||
cpdef set_line(self, index_t line_nb, object values):
|
||||
cdef obiint_t value
|
||||
for element_name in values :
|
||||
value = <obiint_t> values[element_name]
|
||||
self.set_item(line_nb, element_name, value)
|
||||
|
||||
cpdef close(self):
|
||||
if obi_truncate_and_close_column(self.pointer) < 0 :
|
||||
raise Exception("Problem closing a column")
|
||||
|
||||
|
51
python/obitools3/obidms/_obidmscolumn_seq.cfiles
Normal file
51
python/obitools3/obidms/_obidmscolumn_seq.cfiles
Normal file
@ -0,0 +1,51 @@
|
||||
../../../src/bloom.h
|
||||
../../../src/bloom.c
|
||||
../../../src/char_str_indexer.h
|
||||
../../../src/char_str_indexer.c
|
||||
../../../src/crc64.h
|
||||
../../../src/crc64.c
|
||||
../../../src/dna_seq_indexer.h
|
||||
../../../src/dna_seq_indexer.c
|
||||
../../../src/encode.h
|
||||
../../../src/encode.c
|
||||
../../../src/murmurhash2.h
|
||||
../../../src/murmurhash2.c
|
||||
../../../src/obiavl.h
|
||||
../../../src/obiavl.c
|
||||
../../../src/obiblob_indexer.h
|
||||
../../../src/obiblob_indexer.c
|
||||
../../../src/obiblob.h
|
||||
../../../src/obiblob.c
|
||||
../../../src/obidebug.h
|
||||
../../../src/obidms_taxonomy.h
|
||||
../../../src/obidms_taxonomy.c
|
||||
../../../src/obidms.h
|
||||
../../../src/obidms.c
|
||||
../../../src/obidmscolumn_idx.h
|
||||
../../../src/obidmscolumn_idx.c
|
||||
../../../src/obidmscolumn.h
|
||||
../../../src/obidmscolumn.c
|
||||
../../../src/obidmscolumndir.h
|
||||
../../../src/obidmscolumndir.c
|
||||
../../../src/obierrno.h
|
||||
../../../src/obierrno.c
|
||||
../../../src/obilittlebigman.h
|
||||
../../../src/obilittlebigman.c
|
||||
../../../src/obitypes.h
|
||||
../../../src/obitypes.c
|
||||
../../../src/obiview.h
|
||||
../../../src/obiview.c
|
||||
../../../src/utils.h
|
||||
../../../src/utils.c
|
||||
../../../src/obidmscolumn_bool.c
|
||||
../../../src/obidmscolumn_bool.h
|
||||
../../../src/obidmscolumn_char.c
|
||||
../../../src/obidmscolumn_char.h
|
||||
../../../src/obidmscolumn_float.c
|
||||
../../../src/obidmscolumn_float.h
|
||||
../../../src/obidmscolumn_int.c
|
||||
../../../src/obidmscolumn_int.h
|
||||
../../../src/obidmscolumn_seq.c
|
||||
../../../src/obidmscolumn_seq.h
|
||||
../../../src/obidmscolumn_str.c
|
||||
../../../src/obidmscolumn_str.h
|
14
python/obitools3/obidms/_obidmscolumn_seq.pxd
Normal file
14
python/obitools3/obidms/_obidmscolumn_seq.pxd
Normal file
@ -0,0 +1,14 @@
|
||||
#cython: language_level=3
|
||||
|
||||
from .capi.obitypes cimport index_t
|
||||
from ._obidms cimport OBIDMS_column, OBIDMS_column_multi_elts
|
||||
|
||||
|
||||
cdef class OBIDMS_column_seq(OBIDMS_column):
|
||||
cpdef object get_line(self, index_t line_nb)
|
||||
cpdef set_line(self, index_t line_nb, object value)
|
||||
|
||||
cdef class OBIDMS_column_multi_elts_seq(OBIDMS_column_multi_elts):
|
||||
cpdef object get_item(self, index_t line_nb, str element_name)
|
||||
cpdef object get_line(self, index_t line_nb)
|
||||
cpdef set_item(self, index_t line_nb, str element_name, object value)
|
88
python/obitools3/obidms/_obidmscolumn_seq.pyx
Normal file
88
python/obitools3/obidms/_obidmscolumn_seq.pyx
Normal file
@ -0,0 +1,88 @@
|
||||
#cython: language_level=3
|
||||
|
||||
from .capi.obiview cimport obi_column_get_obiseq_with_elt_name_in_view, \
|
||||
obi_column_get_obiseq_with_elt_idx_in_view, \
|
||||
obi_column_set_obiseq_with_elt_name_in_view, \
|
||||
obi_column_set_obiseq_with_elt_idx_in_view
|
||||
from .capi.obierrno cimport obi_errno
|
||||
from .capi.obitypes cimport OBISeq_NA, const_char_p
|
||||
|
||||
from obitools3.utils cimport str2bytes, bytes2str
|
||||
|
||||
from libc.stdlib cimport free
|
||||
from libc.string cimport strcmp
|
||||
|
||||
|
||||
cdef class OBIDMS_column_seq(OBIDMS_column):
|
||||
|
||||
cpdef object get_line(self, index_t line_nb):
|
||||
cdef char* value
|
||||
cdef object result
|
||||
value = obi_column_get_obiseq_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0)
|
||||
if obi_errno > 0 :
|
||||
raise IndexError(line_nb)
|
||||
if strcmp(value, OBISeq_NA) == 0 :
|
||||
result = None
|
||||
else :
|
||||
result = bytes2str(value)
|
||||
free(value)
|
||||
return result
|
||||
|
||||
cpdef set_line(self, index_t line_nb, object value):
|
||||
cdef bytes value_b
|
||||
if value is None :
|
||||
value_b = OBISeq_NA
|
||||
else :
|
||||
value_b = str2bytes(value)
|
||||
if obi_column_set_obiseq_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0, value_b) < 0:
|
||||
raise Exception("Problem setting a value in a column")
|
||||
|
||||
|
||||
cdef class OBIDMS_column_multi_elts_seq(OBIDMS_column_multi_elts):
|
||||
|
||||
cpdef object get_item(self, index_t line_nb, str element_name):
|
||||
cdef char* value
|
||||
cdef object result
|
||||
value = obi_column_get_obiseq_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name))
|
||||
if obi_errno > 0 :
|
||||
raise IndexError(line_nb, element_name)
|
||||
if strcmp(value, OBISeq_NA) == 0 :
|
||||
result = None
|
||||
else :
|
||||
result = bytes2str(value)
|
||||
free(value)
|
||||
return result
|
||||
|
||||
cpdef object get_line(self, index_t line_nb) :
|
||||
cdef char* value
|
||||
cdef object value_in_result
|
||||
cdef dict result
|
||||
cdef index_t i
|
||||
cdef bint all_NA
|
||||
result = {}
|
||||
all_NA = True
|
||||
for i in range(self.nb_elements_per_line) :
|
||||
value = obi_column_get_obiseq_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, i)
|
||||
if obi_errno > 0 :
|
||||
raise IndexError(line_nb)
|
||||
if strcmp(value, OBISeq_NA) == 0 :
|
||||
value_in_result = None
|
||||
else :
|
||||
value_in_result = bytes2str(value)
|
||||
free(value)
|
||||
result[self.elements_names[i]] = value_in_result
|
||||
if all_NA and (value_in_result is not None) :
|
||||
all_NA = False
|
||||
if all_NA :
|
||||
result = None
|
||||
return result
|
||||
|
||||
cpdef set_item(self, index_t line_nb, str element_name, object value):
|
||||
cdef bytes value_b
|
||||
if value is None :
|
||||
value_b = OBISeq_NA
|
||||
else :
|
||||
value_b = str2bytes(value)
|
||||
if obi_column_set_obiseq_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name), value_b) < 0:
|
||||
raise Exception("Problem setting a value in a column")
|
||||
|
@ -1,18 +1,51 @@
|
||||
../../../src/obidmscolumn_str.c
|
||||
../../../src/obidmscolumn_str.h
|
||||
../../../src/bloom.h
|
||||
../../../src/bloom.c
|
||||
../../../src/char_str_indexer.h
|
||||
../../../src/char_str_indexer.c
|
||||
../../../src/crc64.h
|
||||
../../../src/crc64.c
|
||||
../../../src/dna_seq_indexer.h
|
||||
../../../src/dna_seq_indexer.c
|
||||
../../../src/encode.h
|
||||
../../../src/encode.c
|
||||
../../../src/murmurhash2.h
|
||||
../../../src/murmurhash2.c
|
||||
../../../src/obiavl.h
|
||||
../../../src/obiavl.c
|
||||
../../../src/obiblob_indexer.h
|
||||
../../../src/obiblob_indexer.c
|
||||
../../../src/obiblob.h
|
||||
../../../src/obiblob.c
|
||||
../../../src/obidebug.h
|
||||
../../../src/obidms_taxonomy.h
|
||||
../../../src/obidms_taxonomy.c
|
||||
../../../src/obidms.h
|
||||
../../../src/obidms.c
|
||||
../../../src/obidmscolumn_idx.h
|
||||
../../../src/obidmscolumn_idx.c
|
||||
../../../src/obidmscolumn.h
|
||||
../../../src/obidmscolumn.c
|
||||
../../../src/obidmscolumndir.h
|
||||
../../../src/obidmscolumndir.c
|
||||
../../../src/obidms.h
|
||||
../../../src/obidms.c
|
||||
../../../src/obierrno.h
|
||||
../../../src/obierrno.c
|
||||
../../../src/obilittlebigman.h
|
||||
../../../src/obilittlebigman.c
|
||||
../../../src/obitypes.h
|
||||
../../../src/obitypes.c
|
||||
../../../src/private_at_functions.h
|
||||
../../../src/private_at_functions.c
|
||||
../../../src/obiarray.h
|
||||
../../../src/obiarray.c
|
||||
../../../src/obiview.h
|
||||
../../../src/obiview.c
|
||||
../../../src/utils.h
|
||||
../../../src/utils.c
|
||||
../../../src/obidmscolumn_bool.c
|
||||
../../../src/obidmscolumn_bool.h
|
||||
../../../src/obidmscolumn_char.c
|
||||
../../../src/obidmscolumn_char.h
|
||||
../../../src/obidmscolumn_float.c
|
||||
../../../src/obidmscolumn_float.h
|
||||
../../../src/obidmscolumn_int.c
|
||||
../../../src/obidmscolumn_int.h
|
||||
../../../src/obidmscolumn_seq.c
|
||||
../../../src/obidmscolumn_seq.h
|
||||
../../../src/obidmscolumn_str.c
|
||||
../../../src/obidmscolumn_str.h
|
||||
|
@ -1,25 +1,14 @@
|
||||
#cython: language_level=3
|
||||
|
||||
from .capi.obitypes cimport index_t
|
||||
from ._obidms cimport OBIDMS_column
|
||||
from ._obidms cimport OBIDMS_column, OBIDMS_column_multi_elts
|
||||
|
||||
|
||||
cdef class OBIDMS_column_str(OBIDMS_column):
|
||||
cpdef object get_line(self, index_t line_nb)
|
||||
cpdef set_line(self, index_t line_nb, object value)
|
||||
cpdef close(self)
|
||||
|
||||
cdef class OBIDMS_column_str_writable(OBIDMS_column_str):
|
||||
cpdef set_line(self, index_t line_nb, object value)
|
||||
cpdef close(self)
|
||||
|
||||
cdef class OBIDMS_column_str_multi_elts(OBIDMS_column_str):
|
||||
cdef class OBIDMS_column_multi_elts_str(OBIDMS_column_multi_elts):
|
||||
cpdef object get_item(self, index_t line_nb, str element_name)
|
||||
cpdef object get_line(self, index_t line_nb)
|
||||
cpdef set_item(self, index_t line_nb, str element_name, str value)
|
||||
cpdef set_line(self, index_t line_nb, object values)
|
||||
|
||||
cdef class OBIDMS_column_str_multi_elts_writable(OBIDMS_column_str_multi_elts):
|
||||
cpdef set_item(self, index_t line_nb, str element_name, str value)
|
||||
cpdef set_line(self, index_t line_nb, object values)
|
||||
cpdef close(self)
|
||||
cpdef set_item(self, index_t line_nb, str element_name, object value)
|
||||
|
@ -1,103 +1,87 @@
|
||||
#cython: language_level=3
|
||||
|
||||
from .capi.obidmscolumn cimport obi_close_column,\
|
||||
obi_truncate_and_close_column, \
|
||||
obi_column_get_obistr_with_elt_name, \
|
||||
obi_column_get_obistr_with_elt_idx, \
|
||||
obi_column_set_obistr_with_elt_name, \
|
||||
obi_column_set_obistr_with_elt_idx
|
||||
from .capi.obiview cimport obi_column_get_obistr_with_elt_name_in_view, \
|
||||
obi_column_get_obistr_with_elt_idx_in_view, \
|
||||
obi_column_set_obistr_with_elt_name_in_view, \
|
||||
obi_column_set_obistr_with_elt_idx_in_view
|
||||
from .capi.obierrno cimport obi_errno
|
||||
from .capi.obitypes cimport OBIIdx_NA, const_char_p
|
||||
from .capi.obitypes cimport OBIStr_NA, const_char_p
|
||||
|
||||
from obitools3.utils cimport str2bytes, bytes2str
|
||||
|
||||
from libc.string cimport strcmp
|
||||
|
||||
|
||||
cdef class OBIDMS_column_str(OBIDMS_column):
|
||||
|
||||
cpdef object get_line(self, index_t line_nb):
|
||||
cdef bytes value
|
||||
cdef object result
|
||||
value = <bytes> obi_column_get_obistr_with_elt_idx(self.pointer, line_nb, 0)
|
||||
cdef const_char_p value
|
||||
cdef object result
|
||||
value = obi_column_get_obistr_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0)
|
||||
if obi_errno > 0 :
|
||||
raise IndexError(line_nb)
|
||||
if value == OBIIdx_NA :
|
||||
if strcmp(value, OBIStr_NA) == 0 :
|
||||
result = None
|
||||
else :
|
||||
result = bytes2str(value)
|
||||
# NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file. (TODO discuss)
|
||||
return result
|
||||
|
||||
cpdef set_line(self, index_t line_nb, object value):
|
||||
raise Exception("Column is read-only")
|
||||
|
||||
cpdef close(self):
|
||||
if obi_close_column(self.pointer) < 0 :
|
||||
raise Exception("Problem closing a column")
|
||||
|
||||
|
||||
cdef class OBIDMS_column_str_writable(OBIDMS_column_str):
|
||||
|
||||
cpdef set_line(self, index_t line_nb, object value):
|
||||
if obi_column_set_obistr_with_elt_idx(self.pointer, line_nb, 0, str2bytes(value)) < 0:
|
||||
cdef bytes value_b
|
||||
if value is None :
|
||||
value_b = OBIStr_NA
|
||||
else :
|
||||
value_b = str2bytes(value)
|
||||
if obi_column_set_obistr_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, 0, value_b) < 0:
|
||||
raise Exception("Problem setting a value in a column")
|
||||
|
||||
cpdef close(self):
|
||||
if obi_truncate_and_close_column(self.pointer) < 0 :
|
||||
raise Exception("Problem closing a column")
|
||||
|
||||
|
||||
cdef class OBIDMS_column_str_multi_elts(OBIDMS_column_str):
|
||||
cdef class OBIDMS_column_multi_elts_str(OBIDMS_column_multi_elts):
|
||||
|
||||
cpdef object get_item(self, index_t line_nb, str element_name):
|
||||
cdef bytes value
|
||||
cdef object result
|
||||
value = <bytes> obi_column_get_obistr_with_elt_name(self.pointer, line_nb, str2bytes(element_name))
|
||||
cdef const_char_p value
|
||||
cdef object result
|
||||
value = obi_column_get_obistr_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name))
|
||||
if obi_errno > 0 :
|
||||
raise IndexError(line_nb, element_name)
|
||||
if value == OBIIdx_NA :
|
||||
if strcmp(value, OBIStr_NA) == 0 :
|
||||
result = None
|
||||
else :
|
||||
result = bytes2str(value)
|
||||
# NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file. (TODO discuss)
|
||||
return result
|
||||
|
||||
|
||||
cpdef object get_line(self, index_t line_nb) :
|
||||
cdef bytes value
|
||||
cdef object result
|
||||
cdef index_t i
|
||||
cdef bint all_NA
|
||||
cdef const_char_p value
|
||||
cdef object value_in_result
|
||||
cdef dict result
|
||||
cdef index_t i
|
||||
cdef bint all_NA
|
||||
result = {}
|
||||
all_NA = True
|
||||
for i in range(self.nb_elements_per_line) :
|
||||
value = <bytes> obi_column_get_obistr_with_elt_idx(self.pointer, line_nb, i)
|
||||
value = obi_column_get_obistr_with_elt_idx_in_view(self.view, (self.pointer)[0], line_nb, i)
|
||||
if obi_errno > 0 :
|
||||
raise IndexError(line_nb)
|
||||
result[self.elements_names[i]] = bytes2str(value)
|
||||
if all_NA and (value != OBIIdx_NA) :
|
||||
raise IndexError(line_nb)
|
||||
if strcmp(value, OBIStr_NA) == 0 :
|
||||
value_in_result = None
|
||||
else :
|
||||
value_in_result = bytes2str(value)
|
||||
# NOTE: value is not freed because the pointer points to a mmapped region in an AVL data file. (TODO discuss)
|
||||
result[self.elements_names[i]] = value_in_result
|
||||
if all_NA and (value_in_result is not None) :
|
||||
all_NA = False
|
||||
if all_NA :
|
||||
result = None
|
||||
return result
|
||||
|
||||
cpdef set_item(self, index_t line_nb, str element_name, str value):
|
||||
raise Exception("Column is read-only")
|
||||
|
||||
cpdef set_line(self, index_t line_nb, object values):
|
||||
raise Exception("Column is read-only")
|
||||
|
||||
|
||||
cdef class OBIDMS_column_str_multi_elts_writable(OBIDMS_column_str_multi_elts):
|
||||
|
||||
cpdef set_item(self, index_t line_nb, str element_name, str value):
|
||||
if obi_column_set_obistr_with_elt_name(self.pointer, line_nb, str2bytes(element_name), str2bytes(value)) < 0:
|
||||
cpdef set_item(self, index_t line_nb, str element_name, object value):
|
||||
cdef bytes value_b
|
||||
if value is None :
|
||||
value_b = OBIStr_NA
|
||||
else :
|
||||
value_b = str2bytes(value)
|
||||
if obi_column_set_obistr_with_elt_name_in_view(self.view, (self.pointer)[0], line_nb, str2bytes(element_name), value_b) < 0:
|
||||
raise Exception("Problem setting a value in a column")
|
||||
|
||||
cpdef set_line(self, index_t line_nb, object values):
|
||||
cdef str value
|
||||
for element_name in values :
|
||||
value = values[element_name]
|
||||
self.set_item(line_nb, element_name, value)
|
||||
|
||||
cpdef close(self):
|
||||
if obi_truncate_and_close_column(self.pointer) < 0 :
|
||||
raise Exception("Problem closing a column")
|
||||
|
||||
|
51
python/obitools3/obidms/_obiseq.cfiles
Normal file
51
python/obitools3/obidms/_obiseq.cfiles
Normal file
@ -0,0 +1,51 @@
|
||||
../../../src/bloom.h
|
||||
../../../src/bloom.c
|
||||
../../../src/char_str_indexer.h
|
||||
../../../src/char_str_indexer.c
|
||||
../../../src/crc64.h
|
||||
../../../src/crc64.c
|
||||
../../../src/dna_seq_indexer.h
|
||||
../../../src/dna_seq_indexer.c
|
||||
../../../src/encode.h
|
||||
../../../src/encode.c
|
||||
../../../src/murmurhash2.h
|
||||
../../../src/murmurhash2.c
|
||||
../../../src/obiavl.h
|
||||
../../../src/obiavl.c
|
||||
../../../src/obiblob_indexer.h
|
||||
../../../src/obiblob_indexer.c
|
||||
../../../src/obiblob.h
|
||||
../../../src/obiblob.c
|
||||
../../../src/obidebug.h
|
||||
../../../src/obidms_taxonomy.h
|
||||
../../../src/obidms_taxonomy.c
|
||||
../../../src/obidms.h
|
||||
../../../src/obidms.c
|
||||
../../../src/obidmscolumn_idx.h
|
||||
../../../src/obidmscolumn_idx.c
|
||||
../../../src/obidmscolumn.h
|
||||
../../../src/obidmscolumn.c
|
||||
../../../src/obidmscolumndir.h
|
||||
../../../src/obidmscolumndir.c
|
||||
../../../src/obierrno.h
|
||||
../../../src/obierrno.c
|
||||
../../../src/obilittlebigman.h
|
||||
../../../src/obilittlebigman.c
|
||||
../../../src/obitypes.h
|
||||
../../../src/obitypes.c
|
||||
../../../src/obiview.h
|
||||
../../../src/obiview.c
|
||||
../../../src/utils.h
|
||||
../../../src/utils.c
|
||||
../../../src/obidmscolumn_bool.c
|
||||
../../../src/obidmscolumn_bool.h
|
||||
../../../src/obidmscolumn_char.c
|
||||
../../../src/obidmscolumn_char.h
|
||||
../../../src/obidmscolumn_float.c
|
||||
../../../src/obidmscolumn_float.h
|
||||
../../../src/obidmscolumn_int.c
|
||||
../../../src/obidmscolumn_int.h
|
||||
../../../src/obidmscolumn_seq.c
|
||||
../../../src/obidmscolumn_seq.h
|
||||
../../../src/obidmscolumn_str.c
|
||||
../../../src/obidmscolumn_str.h
|
30
python/obitools3/obidms/_obiseq.pxd
Normal file
30
python/obitools3/obidms/_obiseq.pxd
Normal file
@ -0,0 +1,30 @@
|
||||
#cython: language_level=3
|
||||
|
||||
from ._obidms cimport OBIView_line
|
||||
|
||||
|
||||
cdef class OBI_Seq(dict) :
|
||||
cdef str id
|
||||
cdef str definition
|
||||
cdef str sequence
|
||||
|
||||
cpdef set_id(self, str id)
|
||||
cpdef get_id(self)
|
||||
cpdef set_definition(self, str definition)
|
||||
cpdef get_definition(self)
|
||||
cpdef get_sequence(self)
|
||||
|
||||
|
||||
cdef class OBI_Nuc_Seq(OBI_Seq) :
|
||||
#cpdef str reverse_complement(self)
|
||||
cpdef set_sequence(self, str sequence)
|
||||
|
||||
|
||||
cdef class OBI_Nuc_Seq_Stored(OBIView_line) :
|
||||
cpdef set_id(self, str id)
|
||||
cpdef get_id(self)
|
||||
cpdef set_definition(self, str definition)
|
||||
cpdef get_definition(self)
|
||||
cpdef set_sequence(self, str sequence)
|
||||
cpdef get_sequence(self)
|
||||
# cpdef str reverse_complement(self)
|
75
python/obitools3/obidms/_obiseq.pyx
Normal file
75
python/obitools3/obidms/_obiseq.pyx
Normal file
@ -0,0 +1,75 @@
|
||||
#cython: language_level=3
|
||||
|
||||
from obitools3.utils cimport bytes2str, str2bytes
|
||||
|
||||
from .capi.obiview cimport NUC_SEQUENCE_COLUMN, \
|
||||
ID_COLUMN, \
|
||||
DEFINITION_COLUMN
|
||||
|
||||
cdef class OBI_Seq(dict) :
|
||||
|
||||
def __init__(self, str id, str seq, str definition=None) :
|
||||
self.set_id(id)
|
||||
self.set_sequence(seq)
|
||||
if definition is not None :
|
||||
self.set_definition(definition)
|
||||
|
||||
cpdef set_id(self, str id) :
|
||||
self.id = id
|
||||
self[bytes2str(ID_COLUMN)] = id
|
||||
|
||||
cpdef get_id(self) :
|
||||
return self.id
|
||||
|
||||
cpdef set_definition(self, str definition) :
|
||||
self.definition = definition
|
||||
self[bytes2str(DEFINITION_COLUMN)] = definition
|
||||
|
||||
cpdef get_definition(self) :
|
||||
return self.definition
|
||||
|
||||
cpdef get_sequence(self) :
|
||||
return self.sequence
|
||||
|
||||
def __str__(self) :
|
||||
return self.sequence # or not
|
||||
|
||||
|
||||
cdef class OBI_Nuc_Seq(OBI_Seq) :
|
||||
|
||||
cpdef set_sequence(self, str sequence) :
|
||||
self.sequence = sequence
|
||||
self[bytes2str(NUC_SEQUENCE_COLUMN)] = sequence
|
||||
|
||||
# cpdef str reverse_complement(self) : TODO in C ?
|
||||
# pass
|
||||
|
||||
|
||||
cdef class OBI_Nuc_Seq_Stored(OBIView_line) :
|
||||
|
||||
cpdef set_id(self, str id) :
|
||||
self[bytes2str(ID_COLUMN)] = id
|
||||
|
||||
cpdef get_id(self) :
|
||||
return self[bytes2str(ID_COLUMN)]
|
||||
|
||||
cpdef set_definition(self, str definition) :
|
||||
self[bytes2str(DEFINITION_COLUMN)] = definition
|
||||
|
||||
cpdef get_definition(self) :
|
||||
return self[bytes2str(DEFINITION_COLUMN)]
|
||||
|
||||
cpdef set_sequence(self, str sequence) :
|
||||
self[bytes2str(NUC_SEQUENCE_COLUMN)] = sequence
|
||||
|
||||
cpdef get_sequence(self) :
|
||||
return self[bytes2str(NUC_SEQUENCE_COLUMN)]
|
||||
|
||||
# def __str__(self) :
|
||||
# return self[bytes2str(NUC_SEQUENCE_COLUMN)] # or not
|
||||
|
||||
# cpdef str reverse_complement(self) : TODO in C ?
|
||||
# pass
|
||||
|
||||
# TODO static method to import?
|
||||
|
51
python/obitools3/obidms/_obitaxo.cfiles
Normal file
51
python/obitools3/obidms/_obitaxo.cfiles
Normal file
@ -0,0 +1,51 @@
|
||||
../../../src/bloom.h
|
||||
../../../src/bloom.c
|
||||
../../../src/char_str_indexer.h
|
||||
../../../src/char_str_indexer.c
|
||||
../../../src/crc64.h
|
||||
../../../src/crc64.c
|
||||
../../../src/dna_seq_indexer.h
|
||||
../../../src/dna_seq_indexer.c
|
||||
../../../src/encode.h
|
||||
../../../src/encode.c
|
||||
../../../src/murmurhash2.h
|
||||
../../../src/murmurhash2.c
|
||||
../../../src/obiavl.h
|
||||
../../../src/obiavl.c
|
||||
../../../src/obiblob_indexer.h
|
||||
../../../src/obiblob_indexer.c
|
||||
../../../src/obiblob.h
|
||||
../../../src/obiblob.c
|
||||
../../../src/obidebug.h
|
||||
../../../src/obidms_taxonomy.h
|
||||
../../../src/obidms_taxonomy.c
|
||||
../../../src/obidms.h
|
||||
../../../src/obidms.c
|
||||
../../../src/obidmscolumn_idx.h
|
||||
../../../src/obidmscolumn_idx.c
|
||||
../../../src/obidmscolumn.h
|
||||
../../../src/obidmscolumn.c
|
||||
../../../src/obidmscolumndir.h
|
||||
../../../src/obidmscolumndir.c
|
||||
../../../src/obierrno.h
|
||||
../../../src/obierrno.c
|
||||
../../../src/obilittlebigman.h
|
||||
../../../src/obilittlebigman.c
|
||||
../../../src/obitypes.h
|
||||
../../../src/obitypes.c
|
||||
../../../src/obiview.h
|
||||
../../../src/obiview.c
|
||||
../../../src/utils.h
|
||||
../../../src/utils.c
|
||||
../../../src/obidmscolumn_bool.c
|
||||
../../../src/obidmscolumn_bool.h
|
||||
../../../src/obidmscolumn_char.c
|
||||
../../../src/obidmscolumn_char.h
|
||||
../../../src/obidmscolumn_float.c
|
||||
../../../src/obidmscolumn_float.h
|
||||
../../../src/obidmscolumn_int.c
|
||||
../../../src/obidmscolumn_int.h
|
||||
../../../src/obidmscolumn_seq.c
|
||||
../../../src/obidmscolumn_seq.h
|
||||
../../../src/obidmscolumn_str.c
|
||||
../../../src/obidmscolumn_str.h
|
31
python/obitools3/obidms/_obitaxo.pxd
Normal file
31
python/obitools3/obidms/_obitaxo.pxd
Normal file
@ -0,0 +1,31 @@
|
||||
#cython: language_level=3
|
||||
|
||||
from .capi.obitaxonomy cimport ecotx_t, OBIDMS_taxonomy_p
|
||||
|
||||
from libc.stdint cimport int32_t
|
||||
|
||||
|
||||
cdef class OBI_Taxonomy :
|
||||
|
||||
cdef str name
|
||||
cdef OBIDMS_taxonomy_p pointer
|
||||
|
||||
cpdef close(self)
|
||||
|
||||
|
||||
cdef class OBI_Taxon :
|
||||
|
||||
cdef ecotx_t* pointer
|
||||
cdef int32_t taxid
|
||||
cdef int32_t rank
|
||||
cdef int32_t farest
|
||||
cdef ecotx_t* parent
|
||||
cdef str name
|
||||
|
||||
cpdef int32_t taxid(self)
|
||||
|
||||
cpdef int32_t rank(self)
|
||||
|
||||
cpdef int32_t farest(self)
|
||||
|
||||
cpdef OBI_Taxon parent(self)
|
65
python/obitools3/obidms/_obitaxo.pyx
Normal file
65
python/obitools3/obidms/_obitaxo.pyx
Normal file
@ -0,0 +1,65 @@
|
||||
#cython: language_level=3
|
||||
|
||||
from obitools3.utils cimport bytes2str, str2bytes
|
||||
|
||||
from .capi.obitaxonomy cimport obi_read_taxonomy, \
|
||||
obi_close_taxonomy, \
|
||||
obi_taxo_get_taxon_with_taxid
|
||||
|
||||
from ._obidms cimport OBIDMS
|
||||
|
||||
from cpython.pycapsule cimport PyCapsule_New, PyCapsule_GetPointer
|
||||
|
||||
|
||||
cdef class OBI_Taxonomy :
|
||||
|
||||
def __init__(self, OBIDMS dms, str name) :
|
||||
|
||||
self.name = name
|
||||
self.pointer = obi_read_taxonomy(dms.pointer, str2bytes(name), True) # TODO discuss
|
||||
|
||||
|
||||
def __getitem__(self, object ref):
|
||||
|
||||
cdef ecotx_t* taxon_p
|
||||
cdef object taxon_capsule
|
||||
|
||||
if type(ref) == int :
|
||||
taxon_p = obi_taxo_get_taxon_with_taxid(self.pointer, ref)
|
||||
taxon_capsule = PyCapsule_New(taxon_p, NULL, NULL)
|
||||
return OBI_Taxon(taxon_capsule)
|
||||
|
||||
|
||||
cpdef close(self) :
|
||||
if (obi_close_taxonomy(self.pointer) < 0) :
|
||||
raise Exception("Error closing the taxonomy")
|
||||
|
||||
|
||||
cdef class OBI_Taxon : # dict subclass?
|
||||
|
||||
def __init__(self, object taxon_capsule) :
|
||||
|
||||
cdef ecotx_t* taxon
|
||||
|
||||
taxon = <ecotx_t*> PyCapsule_GetPointer(taxon_capsule, NULL)
|
||||
self.pointer = taxon
|
||||
self.taxid = taxon.taxid
|
||||
self.rank = taxon.rank
|
||||
self.farest = taxon.farest
|
||||
self.parent = taxon.parent
|
||||
self.name = bytes2str(taxon.name)
|
||||
|
||||
cpdef int32_t taxid(self):
|
||||
return self.taxid
|
||||
|
||||
cpdef int32_t rank(self):
|
||||
return self.rank
|
||||
|
||||
cpdef int32_t farest(self):
|
||||
return self.farest
|
||||
|
||||
cpdef OBI_Taxon parent(self):
|
||||
cdef object parent_capsule
|
||||
parent_capsule = PyCapsule_New(self.parent, NULL, NULL)
|
||||
return OBI_Taxon(parent_capsule)
|
||||
|
@ -15,25 +15,28 @@ from ..capi.obitypes cimport const_char_p, \
|
||||
cdef extern from "obidmscolumn.h" nogil:
|
||||
|
||||
struct OBIDMS_column_header_t:
|
||||
bint little_endian
|
||||
int header_size
|
||||
size_t header_size
|
||||
size_t data_size
|
||||
index_t line_count
|
||||
index_t lines_used
|
||||
index_t nb_elements_per_line
|
||||
const_char_p elements_names
|
||||
OBIType_t data_type
|
||||
OBIType_t returned_data_type
|
||||
OBIType_t stored_data_type
|
||||
time_t creation_date
|
||||
obiversion_t version
|
||||
obiversion_t cloned_from
|
||||
const_char_p name
|
||||
const_char_p array_name
|
||||
const_char_p indexer_name
|
||||
const_char_p comments
|
||||
|
||||
ctypedef OBIDMS_column_header_t* OBIDMS_column_header_p
|
||||
|
||||
struct OBIDMS_column_t:
|
||||
OBIDMS_p dms
|
||||
OBIDMS_column_header_p header
|
||||
|
||||
bint writable
|
||||
|
||||
ctypedef OBIDMS_column_t* OBIDMS_column_p
|
||||
|
||||
OBIDMS_column_p obi_create_column(OBIDMS_p dms,
|
||||
@ -42,7 +45,8 @@ cdef extern from "obidmscolumn.h" nogil:
|
||||
index_t nb_lines,
|
||||
index_t nb_elements_per_line,
|
||||
const_char_p elements_names,
|
||||
const_char_p array_name)
|
||||
const_char_p indexer_name,
|
||||
const_char_p comments)
|
||||
|
||||
OBIDMS_column_p obi_open_column(OBIDMS_p dms,
|
||||
const_char_p column_name,
|
||||
@ -51,24 +55,27 @@ cdef extern from "obidmscolumn.h" nogil:
|
||||
int obi_close_column(OBIDMS_column_p column)
|
||||
|
||||
OBIDMS_column_p obi_clone_column(OBIDMS_p dms,
|
||||
OBIDMS_column_p line_selection,
|
||||
const_char_p column_name,
|
||||
obiversion_t version_number,
|
||||
bint clone_data)
|
||||
|
||||
int obi_truncate_and_close_column(OBIDMS_column_p column)
|
||||
int obi_close_column(OBIDMS_column_p column)
|
||||
|
||||
obiversion_t obi_column_get_latest_version_from_name(OBIDMS_p dms,
|
||||
const_char_p column_name)
|
||||
|
||||
OBIDMS_column_header_p obi_column_get_header_from_name(OBIDMS_p dms,
|
||||
const_char_p column_name)
|
||||
const_char_p column_name,
|
||||
obiversion_t version_number)
|
||||
|
||||
int obi_unmap_header(OBIDMS_column_header_p header)
|
||||
|
||||
char* obi_column_format_date(time_t date)
|
||||
int obi_close_header(OBIDMS_column_header_p header)
|
||||
|
||||
int obi_select(OBIDMS_column_p line_selection_column, index_t line_to_grep)
|
||||
|
||||
|
||||
|
||||
cdef extern from "obidmscolumn_int.h" nogil:
|
||||
|
||||
int obi_column_set_obiint_with_elt_name(OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
const_char_p element_name,
|
||||
@ -77,7 +84,7 @@ cdef extern from "obidmscolumn_int.h" nogil:
|
||||
int obi_column_set_obiint_with_elt_idx(OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
index_t element_idx,
|
||||
obiint_t value)
|
||||
obiint_t value)
|
||||
|
||||
obiint_t obi_column_get_obiint_with_elt_name(OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
@ -87,14 +94,13 @@ cdef extern from "obidmscolumn_int.h" nogil:
|
||||
index_t line_nb,
|
||||
index_t element_idx)
|
||||
|
||||
|
||||
cdef extern from "obidmscolumn_bool.h" nogil:
|
||||
|
||||
int obi_column_set_obibool_with_elt_name(OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
const_char_p element_name,
|
||||
obibool_t value)
|
||||
|
||||
|
||||
int obi_column_set_obibool_with_elt_idx(OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
index_t element_idx,
|
||||
@ -114,7 +120,7 @@ cdef extern from "obidmscolumn_char.h" nogil:
|
||||
index_t line_nb,
|
||||
const_char_p element_name,
|
||||
obichar_t value)
|
||||
|
||||
|
||||
int obi_column_set_obichar_with_elt_idx(OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
index_t element_idx,
|
||||
@ -134,7 +140,7 @@ cdef extern from "obidmscolumn_float.h" nogil:
|
||||
index_t line_nb,
|
||||
const_char_p element_name,
|
||||
obifloat_t value)
|
||||
|
||||
|
||||
int obi_column_set_obifloat_with_elt_idx(OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
index_t element_idx,
|
||||
@ -153,18 +159,38 @@ cdef extern from "obidmscolumn_str.h" nogil:
|
||||
int obi_column_set_obistr_with_elt_name(OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
const_char_p element_name,
|
||||
char* value)
|
||||
|
||||
const_char_p value)
|
||||
|
||||
int obi_column_set_obistr_with_elt_idx(OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
index_t element_idx,
|
||||
char* value)
|
||||
const_char_p value)
|
||||
|
||||
const_char_p obi_column_get_obistr_with_elt_name(OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
const_char_p element_name)
|
||||
|
||||
const_char_p obi_column_get_obistr_with_elt_idx(OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
index_t element_idx)
|
||||
|
||||
cdef extern from "obidmscolumn_seq.h" nogil:
|
||||
|
||||
int obi_column_set_obiseq_with_elt_name(OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
const_char_p element_name,
|
||||
const_char_p value)
|
||||
|
||||
int obi_column_set_obiseq_with_elt_idx(OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
index_t element_idx,
|
||||
const_char_p value)
|
||||
|
||||
char* obi_column_get_obiseq_with_elt_name(OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
const_char_p element_name)
|
||||
|
||||
const_char_p obi_column_get_obistr_with_elt_idx(OBIDMS_column_p column,
|
||||
char* obi_column_get_obiseq_with_elt_idx(OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
index_t element_idx)
|
||||
|
||||
|
42
python/obitools3/obidms/capi/obitaxonomy.pxd
Normal file
42
python/obitools3/obidms/capi/obitaxonomy.pxd
Normal file
@ -0,0 +1,42 @@
|
||||
#cython: language_level=3
|
||||
|
||||
from .obitypes cimport const_char_p
|
||||
from .obidms cimport OBIDMS_p
|
||||
from libc.stdint cimport int32_t
|
||||
|
||||
|
||||
cdef extern from "obidms_taxonomy.h" nogil:
|
||||
|
||||
struct OBIDMS_taxonomy_t
|
||||
ctypedef OBIDMS_taxonomy_t* OBIDMS_taxonomy_p
|
||||
|
||||
struct ecotxnode :
|
||||
int32_t taxid
|
||||
int32_t rank
|
||||
int32_t farest
|
||||
ecotxnode* parent
|
||||
char* name
|
||||
|
||||
ctypedef ecotxnode ecotx_t
|
||||
|
||||
|
||||
OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const_char_p taxonomy_name, bint read_alternative_names)
|
||||
|
||||
int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy)
|
||||
|
||||
ecotx_t* obi_taxo_get_parent_at_rank(ecotx_t* taxon, int32_t rankidx)
|
||||
|
||||
ecotx_t* obi_taxo_get_taxon_with_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid)
|
||||
|
||||
bint obi_taxo_is_taxon_under_taxid(ecotx_t* taxon, int32_t other_taxid)
|
||||
|
||||
ecotx_t* obi_taxo_get_species(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
|
||||
|
||||
ecotx_t* obi_taxo_get_genus(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
|
||||
|
||||
ecotx_t* obi_taxo_get_family(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
|
||||
|
||||
ecotx_t* obi_taxo_get_kingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
|
||||
|
||||
ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
|
||||
|
@ -9,20 +9,22 @@ cdef extern from *:
|
||||
ctypedef char* const_char_p "const char*"
|
||||
|
||||
|
||||
cdef extern from "obidmscolumn.h" nogil:
|
||||
|
||||
ctypedef int32_t obiversion_t
|
||||
cdef extern from "encode.h" nogil:
|
||||
bint only_ATGC(const_char_p seq)
|
||||
|
||||
|
||||
cdef extern from "obitypes.h" nogil:
|
||||
|
||||
enum OBIType: # TODO je sais pas si ça sert de declarer le contenu de l'enum
|
||||
OBI_VOID = 0,
|
||||
enum OBIType:
|
||||
OBI_VOID,
|
||||
OBI_INT,
|
||||
OBI_FLOAT,
|
||||
OBI_BOOL,
|
||||
OBI_CHAR,
|
||||
OBI_STR,
|
||||
OBI_SEQ,
|
||||
OBI_IDX
|
||||
|
||||
|
||||
ctypedef OBIType OBIType_t
|
||||
|
||||
@ -31,14 +33,18 @@ cdef extern from "obitypes.h" nogil:
|
||||
|
||||
ctypedef OBIBool obibool_t
|
||||
ctypedef int32_t obiint_t
|
||||
ctypedef double obifloat_t
|
||||
ctypedef char obichar_t
|
||||
ctypedef double obifloat_t
|
||||
ctypedef char obichar_t
|
||||
ctypedef int64_t index_t
|
||||
|
||||
extern obiint_t OBIInt_NA
|
||||
extern index_t OBIIdx_NA
|
||||
extern obifloat_t OBIFloat_NA
|
||||
extern obichar_t OBIChar_NA
|
||||
extern obibool_t OBIBool_NA
|
||||
ctypedef int32_t obiversion_t
|
||||
|
||||
char* name_data_type(int data_type)
|
||||
extern obiint_t OBIInt_NA
|
||||
extern index_t OBIIdx_NA
|
||||
extern obifloat_t OBIFloat_NA
|
||||
extern obichar_t OBIChar_NA
|
||||
extern obibool_t OBIBool_NA
|
||||
extern const_char_p OBISeq_NA
|
||||
extern const_char_p OBIStr_NA
|
||||
|
||||
const_char_p name_data_type(int data_type)
|
||||
|
12
python/obitools3/obidms/capi/obiutils.pxd
Normal file
12
python/obitools3/obidms/capi/obiutils.pxd
Normal file
@ -0,0 +1,12 @@
|
||||
#cython: language_level=3
|
||||
|
||||
|
||||
from posix.types cimport time_t
|
||||
|
||||
from ..capi.obitypes cimport const_char_p
|
||||
|
||||
|
||||
cdef extern from "utils.h" nogil:
|
||||
|
||||
const_char_p obi_format_date(time_t date)
|
||||
|
249
python/obitools3/obidms/capi/obiview.pxd
Normal file
249
python/obitools3/obidms/capi/obiview.pxd
Normal file
@ -0,0 +1,249 @@
|
||||
#cython: language_level=3
|
||||
|
||||
from .obitypes cimport const_char_p, \
|
||||
OBIType_t, \
|
||||
obiversion_t, \
|
||||
obiint_t, \
|
||||
obibool_t, \
|
||||
obichar_t, \
|
||||
obifloat_t, \
|
||||
index_t, \
|
||||
time_t
|
||||
from ..capi.obidms cimport OBIDMS_p
|
||||
from ..capi.obidmscolumn cimport OBIDMS_column_p
|
||||
|
||||
|
||||
cdef extern from "obiview.h" nogil:
|
||||
|
||||
extern const_char_p VIEW_TYPE_NUC_SEQS
|
||||
extern const_char_p NUC_SEQUENCE_COLUMN
|
||||
extern const_char_p ID_COLUMN
|
||||
extern const_char_p DEFINITION_COLUMN
|
||||
|
||||
struct Obiview_t :
|
||||
OBIDMS_p dms
|
||||
const_char_p name
|
||||
const_char_p created_from
|
||||
const_char_p view_type
|
||||
bint read_only
|
||||
OBIDMS_column_p line_selection
|
||||
OBIDMS_column_p new_line_selection
|
||||
index_t line_count
|
||||
int column_count
|
||||
OBIDMS_column_p columns
|
||||
const_char_p comments
|
||||
|
||||
ctypedef Obiview_t* Obiview_p
|
||||
|
||||
|
||||
struct Column_reference_t :
|
||||
const_char_p column_name
|
||||
obiversion_t version
|
||||
|
||||
ctypedef Column_reference_t* Column_reference_p
|
||||
|
||||
|
||||
struct Obiview_infos_t :
|
||||
int view_number
|
||||
time_t creation_date
|
||||
const_char_p name
|
||||
const_char_p created_from
|
||||
const_char_p view_type
|
||||
bint all_lines
|
||||
Column_reference_t line_selection
|
||||
index_t line_count
|
||||
int column_count
|
||||
Column_reference_p column_references
|
||||
const_char_p comments
|
||||
|
||||
ctypedef Obiview_infos_t* Obiview_infos_p
|
||||
|
||||
|
||||
struct Obiviews_header_t :
|
||||
size_t header_size
|
||||
size_t views_size
|
||||
int view_count
|
||||
|
||||
ctypedef Obiviews_header_t* Obiviews_header_p
|
||||
|
||||
|
||||
struct Obiviews_infos_all_t :
|
||||
Obiviews_header_p header
|
||||
Obiview_infos_p view_infos
|
||||
|
||||
ctypedef Obiviews_infos_all_t* Obiviews_infos_all_p
|
||||
|
||||
|
||||
Obiview_p obi_new_view_nuc_seqs(OBIDMS_p dms, const_char_p view_name, Obiview_p view_to_clone, index_t* line_selection, const_char_p comments)
|
||||
|
||||
Obiview_p obi_new_view(OBIDMS_p dms, const_char_p view_name, Obiview_p view_to_clone, index_t* line_selection, const_char_p comments)
|
||||
|
||||
Obiview_p obi_new_view_cloned_from_name(OBIDMS_p dms, const_char_p view_name, const_char_p view_to_clone_name, index_t* line_selection, const_char_p comments)
|
||||
|
||||
Obiview_p obi_new_view_nuc_seqs_cloned_from_name(OBIDMS_p dms, const_char_p view_name, const_char_p view_to_clone_name, index_t* line_selection, const_char_p comments)
|
||||
|
||||
Obiview_p obi_open_view(OBIDMS_p dms, const_char_p view_name)
|
||||
|
||||
int obi_view_add_column(Obiview_p view,
|
||||
const_char_p column_name,
|
||||
obiversion_t version_number,
|
||||
OBIType_t data_type,
|
||||
index_t nb_lines,
|
||||
index_t nb_elements_per_line,
|
||||
const_char_p elements_names,
|
||||
const_char_p indexer_name,
|
||||
const_char_p comments,
|
||||
bint create)
|
||||
|
||||
int obi_view_delete_column(Obiview_p view, const_char_p column_name)
|
||||
|
||||
int obi_select_line(Obiview_p view, index_t line_nb)
|
||||
|
||||
int obi_select_lines(Obiview_p view, index_t* line_nbs)
|
||||
|
||||
OBIDMS_column_p obi_view_get_column(Obiview_p view, const_char_p column_name)
|
||||
|
||||
OBIDMS_column_p* obi_view_get_pointer_on_column_in_view(Obiview_p view, const_char_p column_name)
|
||||
|
||||
int obi_save_view(Obiview_p view)
|
||||
|
||||
int obi_close_view(Obiview_p view)
|
||||
|
||||
int obi_save_and_close_view(Obiview_p view)
|
||||
|
||||
Obiviews_infos_all_p obi_read_view_infos(OBIDMS_p dms)
|
||||
|
||||
int obi_close_view_infos(Obiviews_infos_all_p views)
|
||||
|
||||
int obi_column_set_obiint_with_elt_name_in_view(Obiview_p view,
|
||||
OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
const_char_p element_name,
|
||||
obiint_t value)
|
||||
|
||||
int obi_column_set_obiint_with_elt_idx_in_view(Obiview_p view,
|
||||
OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
index_t element_idx,
|
||||
obiint_t value)
|
||||
|
||||
obiint_t obi_column_get_obiint_with_elt_name_in_view(Obiview_p view,
|
||||
OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
const_char_p element_name)
|
||||
|
||||
obiint_t obi_column_get_obiint_with_elt_idx_in_view(Obiview_p view,
|
||||
OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
index_t element_idx)
|
||||
|
||||
int obi_column_set_obibool_with_elt_name_in_view(Obiview_p view,
|
||||
OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
const_char_p element_name,
|
||||
obibool_t value)
|
||||
|
||||
int obi_column_set_obibool_with_elt_idx_in_view(Obiview_p view,
|
||||
OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
index_t element_idx,
|
||||
obibool_t value)
|
||||
|
||||
obibool_t obi_column_get_obibool_with_elt_name_in_view(Obiview_p view,
|
||||
OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
const_char_p element_name)
|
||||
|
||||
obibool_t obi_column_get_obibool_with_elt_idx_in_view(Obiview_p view,
|
||||
OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
index_t element_idx)
|
||||
|
||||
int obi_column_set_obichar_with_elt_name_in_view(Obiview_p view,
|
||||
OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
const_char_p element_name,
|
||||
obichar_t value)
|
||||
|
||||
int obi_column_set_obichar_with_elt_idx_in_view(Obiview_p view,
|
||||
OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
index_t element_idx,
|
||||
obichar_t value)
|
||||
|
||||
obichar_t obi_column_get_obichar_with_elt_name_in_view(Obiview_p view,
|
||||
OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
const_char_p element_name)
|
||||
|
||||
obichar_t obi_column_get_obichar_with_elt_idx_in_view(Obiview_p view,
|
||||
OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
index_t element_idx)
|
||||
|
||||
int obi_column_set_obifloat_with_elt_name_in_view(Obiview_p view,
|
||||
OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
const_char_p element_name,
|
||||
obifloat_t value)
|
||||
|
||||
int obi_column_set_obifloat_with_elt_idx_in_view(Obiview_p view,
|
||||
OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
index_t element_idx,
|
||||
obifloat_t value)
|
||||
|
||||
obifloat_t obi_column_get_obifloat_with_elt_name_in_view(Obiview_p view,
|
||||
OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
const_char_p element_name)
|
||||
|
||||
obifloat_t obi_column_get_obifloat_with_elt_idx_in_view(Obiview_p view,
|
||||
OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
index_t element_idx)
|
||||
|
||||
int obi_column_set_obistr_with_elt_name_in_view(Obiview_p view,
|
||||
OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
const_char_p element_name,
|
||||
const_char_p value)
|
||||
|
||||
int obi_column_set_obistr_with_elt_idx_in_view(Obiview_p view,
|
||||
OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
index_t element_idx,
|
||||
const_char_p value)
|
||||
|
||||
const_char_p obi_column_get_obistr_with_elt_name_in_view(Obiview_p view,
|
||||
OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
const_char_p element_name)
|
||||
|
||||
const_char_p obi_column_get_obistr_with_elt_idx_in_view(Obiview_p view,
|
||||
OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
index_t element_idx)
|
||||
|
||||
int obi_column_set_obiseq_with_elt_name_in_view(Obiview_p view,
|
||||
OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
const_char_p element_name,
|
||||
const_char_p value)
|
||||
|
||||
int obi_column_set_obiseq_with_elt_idx_in_view(Obiview_p view,
|
||||
OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
index_t element_idx,
|
||||
const_char_p value)
|
||||
|
||||
char* obi_column_get_obiseq_with_elt_name_in_view(Obiview_p view,
|
||||
OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
const_char_p element_name)
|
||||
|
||||
char* obi_column_get_obiseq_with_elt_idx_in_view(Obiview_p view,
|
||||
OBIDMS_column_p column,
|
||||
index_t line_nb,
|
||||
index_t element_idx)
|
||||
|
51
python/obitools3/obigrep.py
Normal file
51
python/obitools3/obigrep.py
Normal file
@ -0,0 +1,51 @@
|
||||
import sys
|
||||
import argparse
|
||||
|
||||
from obitools3.obidms._obidms import OBIDMS
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
parser = argparse.ArgumentParser(description='Pseudo obigrep.')
|
||||
|
||||
parser.add_argument('-V', '--view', dest='view', type=str,
|
||||
help='Name of the view that should be considered')
|
||||
|
||||
parser.add_argument('-N', '--new_view', dest='new_view', type=str,
|
||||
help='Name of the new view that should be created')
|
||||
|
||||
# parser.add_argument('-k', '--key', dest='key', type=str,
|
||||
# help='Name of the key that should be considered')
|
||||
#
|
||||
# parser.add_argument('-c', '--comp', dest='comparison', type=int,
|
||||
# help='Comparison to be made: -1:< ; 0:== ; 1:>')
|
||||
#
|
||||
# parser.add_argument('-v', '--value', dest='value', type=object,
|
||||
# help='Value to be compared')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
d = OBIDMS('tdms')
|
||||
|
||||
#condition = 1
|
||||
line_selec = []
|
||||
|
||||
v = d.open_view(args.view)
|
||||
|
||||
i = 0
|
||||
for l in v :
|
||||
if l['score'] > 350 :
|
||||
line_selec.append(i)
|
||||
i+=1
|
||||
|
||||
new_v = d.new_view(args.new_view, view_to_clone=v, line_selection=line_selec, view_type="NUC_SEQS_VIEW", comments="obigrep "+args.view+" to "+args.new_view) #args.key+" "+str(args.comparison)+" "+str(args.value)+" "+)
|
||||
|
||||
print("\n")
|
||||
print(new_v.__repr__())
|
||||
|
||||
v.save_and_close()
|
||||
new_v.save_and_close()
|
||||
|
||||
d.close()
|
||||
|
||||
print("\nDone.")
|
||||
|
43
python/obitools3/obihead.py
Normal file
43
python/obitools3/obihead.py
Normal file
@ -0,0 +1,43 @@
|
||||
import sys
|
||||
import argparse
|
||||
|
||||
from obitools3.obidms._obidms import OBIDMS
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
parser = argparse.ArgumentParser(description='Pseudo obihead.')
|
||||
|
||||
parser.add_argument('-V', '--view', dest='view', type=str,
|
||||
help='Name of the view that should be considered')
|
||||
|
||||
parser.add_argument('-N', '--new_view', dest='new_view', type=str,
|
||||
help='Name of the new view that should be created')
|
||||
|
||||
parser.add_argument('-n', '--nb', dest='nb_lines', type=int,
|
||||
help='Number of lines that should be taken')
|
||||
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
d = OBIDMS('tdms')
|
||||
|
||||
#condition = 1
|
||||
line_selec = []
|
||||
|
||||
v = d.open_view(args.view)
|
||||
|
||||
for i in range(0, args.nb_lines) :
|
||||
line_selec.append(i)
|
||||
|
||||
new_v = d.new_view(args.new_view, view_to_clone=v, line_selection=line_selec, view_type="NUC_SEQS_VIEW", comments="obihead "+str(args.nb_lines)+", "+args.view+" to "+args.new_view) #args.key+" "+str(args.comparison)+" "+str(args.value)+" "+)
|
||||
|
||||
print("\n")
|
||||
print(new_v.__repr__())
|
||||
|
||||
v.save_and_close()
|
||||
new_v.save_and_close()
|
||||
|
||||
d.close()
|
||||
|
||||
print("\nDone.")
|
||||
|
199
python/obitools3/obiimport.py
Normal file
199
python/obitools3/obiimport.py
Normal file
@ -0,0 +1,199 @@
|
||||
import sys
|
||||
import argparse
|
||||
import time
|
||||
|
||||
from obitools3.obidms._obidms import OBIDMS
|
||||
|
||||
|
||||
def bufferedRead(fileobj,size=209715200): ## 200 MB
|
||||
buffer = fileobj.readlines(size)
|
||||
while buffer:
|
||||
for l in buffer:
|
||||
yield l
|
||||
buffer = fileobj.readlines(size)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
parser = argparse.ArgumentParser(description='Convert a fasta file in an OBIDMS.')
|
||||
|
||||
parser.add_argument('-i', '--input', dest='input_file', type=str,
|
||||
help='Name of the file containing the sequences')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
d = OBIDMS('tdms')
|
||||
|
||||
view = d.new_view('uniq view', view_type="NUC_SEQS_VIEW")
|
||||
|
||||
# for i in range(35000000) :
|
||||
# if (not (i%500000)) :
|
||||
# print(str(time.time())+'\t'+str(i))
|
||||
# id = "@HWI-D00405:142:C71BAANXX:4:1101:1234:2234_CONS_SUB_SUB_"+str(i)
|
||||
# view[i].set_id(id)
|
||||
# if id != view[i]["ID"] :
|
||||
# print("nope", id, view[i]["ID"])
|
||||
|
||||
input_file = open(args.input_file, 'r')
|
||||
input_file_buffered = bufferedRead(input_file)
|
||||
|
||||
#
|
||||
# if args.input_file[-1:] == "a" :
|
||||
#
|
||||
# i = 0
|
||||
# next = False
|
||||
# first = True
|
||||
#
|
||||
# for line in input_file :
|
||||
#
|
||||
# if line[0] == ">" :
|
||||
#
|
||||
# if not first :
|
||||
# # save seq
|
||||
# #print(i, id, seq)
|
||||
# view[i].set_sequence(seq)
|
||||
# i+=1
|
||||
#
|
||||
# first = False
|
||||
#
|
||||
# #id = line.split(" ", 1)[0][1:]
|
||||
# #rest = (line[:-1].split(" ", 1)[1]).split(";")
|
||||
# #view[i].set_id(id)
|
||||
#
|
||||
# # description = ""
|
||||
# # for j in range(len(rest)) :
|
||||
# # if "=" in rest[j] :
|
||||
# # rest[j] = rest[j].strip()
|
||||
# # rest[j] = rest[j].split("=", 1)
|
||||
# # column_name = rest[j][0]
|
||||
# # v = rest[j][1]
|
||||
# # if ((not v.isalpha()) and (v.isalnum())) :
|
||||
# # conv_v = int(v)
|
||||
# # elif (v == "True") or (v == "False") :
|
||||
# # conv_v = bool(v)
|
||||
# # else :
|
||||
# # f = True
|
||||
# # for letter in v :
|
||||
# # if ((not letter.isalnum()) or (letter != ".")) :
|
||||
# # f = False
|
||||
# # if f :
|
||||
# # conv_v = float(v)
|
||||
# # else :
|
||||
# # conv_v = v
|
||||
# # view[i][column_name] = conv_v
|
||||
# # else :
|
||||
# # description+=rest[j]
|
||||
# #
|
||||
# # if description != "" :
|
||||
# # description = description.strip()
|
||||
# # view[i].set_description(description)
|
||||
#
|
||||
# #print(id)
|
||||
# #print(rest)
|
||||
# #print(description)
|
||||
#
|
||||
# next = True
|
||||
#
|
||||
# elif next == True :
|
||||
#
|
||||
# # if not (i % 1E5) :
|
||||
# # print(i)
|
||||
#
|
||||
# seq = line[:-1]
|
||||
# next = False
|
||||
#
|
||||
# elif not next :
|
||||
#
|
||||
# seq += line[:-1]
|
||||
#
|
||||
#
|
||||
# elif args.input_file[-1:] == "q" :
|
||||
#
|
||||
# i = 0
|
||||
# l = 0
|
||||
# next = False
|
||||
#
|
||||
l=0
|
||||
i=0
|
||||
# while (True):
|
||||
# l+=1
|
||||
# line = input_file.readline()
|
||||
# if line=="":
|
||||
# break
|
||||
for line in input_file_buffered :
|
||||
#
|
||||
#if i > 1E7 :
|
||||
# # print('hmm?')
|
||||
#
|
||||
# if i == 6000000 :
|
||||
# break
|
||||
#
|
||||
if l%4 == 0 :
|
||||
#
|
||||
if (not (i%500000)) :
|
||||
print(str(time.time())+'\t'+str(i))
|
||||
# #
|
||||
# # #print("header", line)
|
||||
# #
|
||||
id = line.split(" ", 1)[0][1:]
|
||||
# print(id)
|
||||
# # #rest = (line[:-1].split(" ", 1)[1]).split(";")
|
||||
view[i].set_id(id)
|
||||
# print(view[i]["ID"])
|
||||
#
|
||||
# i+=1
|
||||
|
||||
# l+=1
|
||||
#
|
||||
# # description = ""
|
||||
# # for j in range(len(rest)) :
|
||||
# # if "=" in rest[j] :
|
||||
# # rest[j] = rest[j].strip()
|
||||
# # rest[j] = rest[j].split("=", 1)
|
||||
# # column_name = rest[j][0]
|
||||
# # #print("COLUMN", column_name)
|
||||
# # v = rest[j][1]
|
||||
# # if (v == "") and (column_name in view) and (view[column_name].get_data_type() == "OBI_SEQ") :
|
||||
# # #print(">>>>>>YUP")
|
||||
# # conv_v = "aa"
|
||||
# # else :
|
||||
# # if ((not v.isalpha()) and (v.isalnum())) :
|
||||
# # conv_v = int(v)
|
||||
# # elif (v == "True") or (v == "False") :
|
||||
# # conv_v = bool(v)
|
||||
# # else :
|
||||
# # f = True
|
||||
# # for letter in v :
|
||||
# # if ((not letter.isalnum()) or (letter != ".")) :
|
||||
# # f = False
|
||||
# # if f :
|
||||
# # conv_v = float(v)
|
||||
# # else :
|
||||
# # conv_v = v
|
||||
# # view[i][column_name] = conv_v
|
||||
# # else :
|
||||
# # description+=rest[j]
|
||||
# #
|
||||
# # if description != "" :
|
||||
# # description = description.strip()
|
||||
# # view[i].set_description(description)
|
||||
#
|
||||
elif l%4 == 1 :
|
||||
# #
|
||||
seq = line[:-1]
|
||||
# #print("seq", seq)
|
||||
view[i].set_sequence(seq)
|
||||
i+=1
|
||||
#
|
||||
l+=1
|
||||
#
|
||||
#
|
||||
input_file.close()
|
||||
|
||||
#print(view)
|
||||
print(view.__repr__())
|
||||
|
||||
view.save_and_close()
|
||||
d.close()
|
||||
|
||||
print("Done.")
|
0
python/obitools3/parsers/__init__.py
Normal file
0
python/obitools3/parsers/__init__.py
Normal file
8
python/obitools3/parsers/fasta.pxd
Normal file
8
python/obitools3/parsers/fasta.pxd
Normal file
@ -0,0 +1,8 @@
|
||||
#cython: language_level=3
|
||||
|
||||
from .header cimport parseHeader
|
||||
from ..files.universalopener cimport uopen
|
||||
from ..files.linebuffer cimport LineBuffer
|
||||
|
||||
|
||||
|
46
python/obitools3/parsers/fasta.pyx
Normal file
46
python/obitools3/parsers/fasta.pyx
Normal file
@ -0,0 +1,46 @@
|
||||
#cython: language_level=3
|
||||
|
||||
'''
|
||||
Created on 30 mars 2016
|
||||
|
||||
@author: coissac
|
||||
'''
|
||||
|
||||
def fastaIterator(lineiterator, int buffersize=100000000):
|
||||
cdef LineBuffer lb
|
||||
cdef str ident
|
||||
cdef str definition
|
||||
cdef dict tags
|
||||
cdef list s
|
||||
|
||||
if isinstance(lineiterator,(str,bytes)):
|
||||
lineiterator=uopen(lineiterator)
|
||||
|
||||
if isinstance(lineiterator, LineBuffer):
|
||||
lb=lineiterator
|
||||
else:
|
||||
lb=LineBuffer(lineiterator,buffersize)
|
||||
|
||||
i = iter(lb)
|
||||
line = next(i)
|
||||
|
||||
while True:
|
||||
ident,tags,definition = parseHeader(line)
|
||||
s = []
|
||||
line = next(i)
|
||||
while line[0]!='>':
|
||||
s.append(line[0:-1])
|
||||
line = next(i)
|
||||
sequence = "".join(s)
|
||||
quality = None
|
||||
|
||||
yield { "id" : ident,
|
||||
"definition" : definition,
|
||||
"sequence" : sequence,
|
||||
"quality" : quality,
|
||||
"tags" : tags,
|
||||
"annotation" : {}
|
||||
}
|
||||
|
||||
|
||||
|
8
python/obitools3/parsers/fastq.pxd
Normal file
8
python/obitools3/parsers/fastq.pxd
Normal file
@ -0,0 +1,8 @@
|
||||
#cython: language_level=3
|
||||
|
||||
from .header cimport parseHeader
|
||||
from ..files.universalopener cimport uopen
|
||||
from ..files.linebuffer cimport LineBuffer
|
||||
|
||||
|
||||
|
41
python/obitools3/parsers/fastq.pyx
Normal file
41
python/obitools3/parsers/fastq.pyx
Normal file
@ -0,0 +1,41 @@
|
||||
#cython: language_level=3
|
||||
|
||||
'''
|
||||
Created on 30 mars 2016
|
||||
|
||||
@author: coissac
|
||||
'''
|
||||
|
||||
|
||||
|
||||
def fastqIterator(lineiterator, int buffersize=100000000):
|
||||
cdef LineBuffer lb
|
||||
cdef str ident
|
||||
cdef str definition
|
||||
cdef dict tags
|
||||
|
||||
if isinstance(lineiterator,(str,bytes)):
|
||||
lineiterator=uopen(lineiterator)
|
||||
|
||||
if isinstance(lineiterator, LineBuffer):
|
||||
lb=lineiterator
|
||||
else:
|
||||
lb=LineBuffer(lineiterator,buffersize)
|
||||
|
||||
i = iter(lb)
|
||||
for line in i:
|
||||
ident,tags,definition = parseHeader(line)
|
||||
sequence = next(i)[0:-1]
|
||||
next(i)
|
||||
quality = next(i)[0:-1]
|
||||
|
||||
yield { "id" : ident,
|
||||
"definition" : definition,
|
||||
"sequence" : sequence,
|
||||
"quality" : quality,
|
||||
"tags" : tags,
|
||||
"annotation" : {}
|
||||
}
|
||||
|
||||
|
||||
|
5
python/obitools3/parsers/header.pxd
Normal file
5
python/obitools3/parsers/header.pxd
Normal file
@ -0,0 +1,5 @@
|
||||
#cython: language_level=3
|
||||
|
||||
cdef object __etag__(str x)
|
||||
|
||||
cpdef tuple parseHeader(str header)
|
78
python/obitools3/parsers/header.pyx
Normal file
78
python/obitools3/parsers/header.pyx
Normal file
@ -0,0 +1,78 @@
|
||||
#cython: language_level=3
|
||||
|
||||
'''
|
||||
Created on 25 mars 2016
|
||||
|
||||
@author: coissac
|
||||
'''
|
||||
|
||||
import re
|
||||
|
||||
__ret__ = re.compile('''(([^ ]+)=('[^']*'|"[^"]*"|[^;]+); *)+?''')
|
||||
__re_int__ = re.compile("^[+-]?[0-9]+$")
|
||||
__re_float__ = re.compile("^[+-]?[0-9]+(\.[0-9]*)?([eE][+-]?[0-9]+)?$")
|
||||
__re_str__ = re.compile("""^"[^"]*"|'[^']*'$""")
|
||||
__re_dict__ = re.compile("""^\{\ *
|
||||
(
|
||||
("[^"]*"|'[^']*')
|
||||
\ *:\ *
|
||||
([^,}]+|
|
||||
"[^"]*"|
|
||||
'[^']*'
|
||||
)
|
||||
)?
|
||||
(\ *,\ *
|
||||
("[^"]*"|'[^']*')
|
||||
\ *:\ *
|
||||
([^,}]+|
|
||||
"[^"]*"|
|
||||
'[^']*'
|
||||
)
|
||||
)*\ *\}$""", re.VERBOSE)
|
||||
|
||||
|
||||
cdef object __etag__(str x):
|
||||
if __re_int__.match(x):
|
||||
v=int(x)
|
||||
elif __re_float__.match(x):
|
||||
v=float(x)
|
||||
elif __re_str__.match(x):
|
||||
v=x[1:-1]
|
||||
elif x=='False':
|
||||
v=False
|
||||
elif x=='True':
|
||||
v=True
|
||||
elif __re_dict__.match(x):
|
||||
v=eval(x)
|
||||
else:
|
||||
v=x
|
||||
return v
|
||||
|
||||
cpdef tuple parseHeader(str header):
|
||||
cdef list m
|
||||
cdef dict tags
|
||||
cdef str definition
|
||||
cdef str ident
|
||||
cdef str second
|
||||
|
||||
m=header[1:-1].split(maxsplit=1)
|
||||
|
||||
ident=m[0]
|
||||
|
||||
if len(m)==1:
|
||||
tags={}
|
||||
definition=''
|
||||
else:
|
||||
second=m[1]
|
||||
m = __ret__.findall(second)
|
||||
|
||||
if m:
|
||||
tags = dict([(a[1],__etag__(a[2])) for a in m])
|
||||
definition = second.split(m[-1][0],1)[1].strip()
|
||||
else:
|
||||
tags = {}
|
||||
definition = second.strip()
|
||||
|
||||
return ident,tags,definition
|
||||
|
||||
|
@ -10,9 +10,8 @@ from obitools3.obidms._obidms import OBIDMS
|
||||
|
||||
LINE_COUNT_FOR_TEST_COLUMN = 10000 # TODO randomize?
|
||||
SMALLER_LINE_COUNT_FOR_TEST_COLUMN = 1000 # TODO randomize?
|
||||
NB_ELEMENTS_PER_LINE = 20 # TODO randomize?
|
||||
NB_ELEMENTS_PER_LINE = 10 # TODO randomize?
|
||||
DMS_NAME = "unit_test_dms"
|
||||
DATA_TYPES = ['OBI_INT', 'OBI_FLOAT', 'OBI_BOOL', 'OBI_CHAR', 'OBI_IDX']
|
||||
|
||||
|
||||
def create_test_obidms():
|
||||
@ -22,26 +21,24 @@ def create_test_obidms():
|
||||
return (dms, dms_name, dms_dir_name)
|
||||
|
||||
|
||||
def create_test_column(dms, data_type_code, multiple_elements_per_line=False):
|
||||
data_types = DATA_TYPES
|
||||
data_type_code = data_type_code
|
||||
data_type_str = data_types[data_type_code-1]
|
||||
col_name = "unit_test_"+data_type_str
|
||||
|
||||
def create_test_column(dms, data_type, multiple_elements_per_line=False):
|
||||
|
||||
col_name = "unit_test_"+data_type
|
||||
|
||||
if multiple_elements_per_line :
|
||||
elts_names = elements_names()
|
||||
col = dms.open_column(col_name,
|
||||
create=True,
|
||||
data_type=data_type_code,
|
||||
type=data_type,
|
||||
nb_elements_per_line=NB_ELEMENTS_PER_LINE,
|
||||
elements_names=elts_names)
|
||||
return (col, col_name, elts_names, data_type_str)
|
||||
return (col, col_name, elts_names)
|
||||
|
||||
else :
|
||||
col = dms.open_column(col_name,
|
||||
create=True,
|
||||
data_type=data_type_code)
|
||||
return (col, col_name, data_type_str)
|
||||
type=data_type)
|
||||
return (col, col_name)
|
||||
|
||||
|
||||
def elements_names():
|
||||
@ -58,12 +55,15 @@ def random_obivalue(data_type):
|
||||
elif data_type == "OBI_BOOL" :
|
||||
return randint(0,1)
|
||||
elif data_type == "OBI_CHAR" :
|
||||
nucs = 'atgc'
|
||||
return bytes(nucs[randint(0,3)], 'utf-8')
|
||||
elif data_type == "OBI_IDX" :
|
||||
length = randint(1,500)
|
||||
return choice(string.ascii_lowercase)
|
||||
elif data_type == "OBI_STR" :
|
||||
length = randint(1,200)
|
||||
randoms = ''.join(choice(string.ascii_lowercase) for i in range(length))
|
||||
return randoms
|
||||
elif data_type == "OBI_SEQ" :
|
||||
length = randint(1,200)
|
||||
randoms = ''.join(choice("atgcryswkmdbhvn") for i in range(length))
|
||||
return randoms
|
||||
|
||||
class OBIDMS_Column_TestCase(unittest.TestCase):
|
||||
def tearDown(self):
|
||||
@ -71,10 +71,10 @@ class OBIDMS_Column_TestCase(unittest.TestCase):
|
||||
self.dms.close()
|
||||
shutil.rmtree(self.dms_dir_name, ignore_errors=True)
|
||||
def test_OBIDMS_column_type(self):
|
||||
assert self.col.get_data_type() == self.data_type_str, 'Wrong data type associated with column'
|
||||
assert self.col.get_data_type() == self.data_type, 'Wrong data type associated with column'
|
||||
def test_OBIDMS_column_cloning(self):
|
||||
for i in range(LINE_COUNT_FOR_TEST_COLUMN):
|
||||
self.col[i]= random_obivalue(self.data_type_str)
|
||||
self.col[i]= random_obivalue(self.data_type)
|
||||
self.col.close()
|
||||
clone = self.dms.open_column(self.col_name, clone=True)
|
||||
self.col = self.dms.open_column(self.col_name)
|
||||
@ -86,10 +86,21 @@ class OBIDMS_Column_TestCase(unittest.TestCase):
|
||||
clone.close()
|
||||
def test_OBIDMS_column_set_and_get(self):
|
||||
for i in range(LINE_COUNT_FOR_TEST_COLUMN):
|
||||
v = random_obivalue(self.data_type_str)
|
||||
v = random_obivalue(self.data_type)
|
||||
self.col[i] = v
|
||||
assert self.col[i] == v, "Different value than the set value"
|
||||
assert self.col[i] is not None, "None value"
|
||||
def test_OBIDMS_referring_column(self):
|
||||
for i in range(LINE_COUNT_FOR_TEST_COLUMN):
|
||||
self.col[i] = random_obivalue(self.data_type)
|
||||
ref_col = self.dms.open_column(self.col_name, referring=True)
|
||||
j = 0
|
||||
for i in range(LINE_COUNT_FOR_TEST_COLUMN):
|
||||
if i%2 : # TODO randomize
|
||||
ref_col.grep_line(i)
|
||||
assert ref_col[j] == self.col[i], "Different value in original column and returned by referring column"
|
||||
assert ref_col[j] is not None, "None value"
|
||||
j+=1
|
||||
|
||||
|
||||
class OBIDMS_Column_multiple_elements_TestCase(OBIDMS_Column_TestCase):
|
||||
@ -98,7 +109,7 @@ class OBIDMS_Column_multiple_elements_TestCase(OBIDMS_Column_TestCase):
|
||||
for i in range(SMALLER_LINE_COUNT_FOR_TEST_COLUMN):
|
||||
v = {}
|
||||
for e in self.elts_names :
|
||||
v[e] = random_obivalue(self.data_type_str)
|
||||
v[e] = random_obivalue(self.data_type)
|
||||
self.col[i] = v
|
||||
self.col.close()
|
||||
clone = self.dms.open_column(self.col_name, clone=True)
|
||||
@ -112,7 +123,7 @@ class OBIDMS_Column_multiple_elements_TestCase(OBIDMS_Column_TestCase):
|
||||
def test_OBIDMS_column_set_and_get_with_elements_names(self):
|
||||
for i in range(SMALLER_LINE_COUNT_FOR_TEST_COLUMN):
|
||||
for e in range(NB_ELEMENTS_PER_LINE) :
|
||||
v = random_obivalue(self.data_type_str)
|
||||
v = random_obivalue(self.data_type)
|
||||
self.col.set_item(i, self.elts_names[e], v)
|
||||
assert self.col.get_item(i, self.elts_names[e]) == v, "Different value than the set value"
|
||||
assert self.col.get_item(i, self.elts_names[e]) is not None, "None value"
|
||||
@ -120,141 +131,168 @@ class OBIDMS_Column_multiple_elements_TestCase(OBIDMS_Column_TestCase):
|
||||
for i in range(SMALLER_LINE_COUNT_FOR_TEST_COLUMN):
|
||||
v = {}
|
||||
for e in self.elts_names :
|
||||
v[e] = random_obivalue(self.data_type_str)
|
||||
v[e] = random_obivalue(self.data_type)
|
||||
self.col[i] = v
|
||||
assert self.col[i] == v, "Different value than the set value"
|
||||
assert self.col[i] is not None, "None value"
|
||||
def test_OBIDMS_referring_column(self):
|
||||
for i in range(SMALLER_LINE_COUNT_FOR_TEST_COLUMN):
|
||||
v = {}
|
||||
for e in self.elts_names :
|
||||
v[e] = random_obivalue(self.data_type)
|
||||
self.col[i] = v
|
||||
ref_col = self.dms.open_column(self.col_name, referring=True)
|
||||
j = 0
|
||||
for i in range(SMALLER_LINE_COUNT_FOR_TEST_COLUMN):
|
||||
if i%2 : # TODO randomize
|
||||
ref_col.grep_line(i)
|
||||
assert ref_col[j] == self.col[i], "Different value in original column and returned by referring column"
|
||||
assert ref_col[j] is not None, "None value"
|
||||
j+=1
|
||||
ref_col.close()
|
||||
|
||||
|
||||
class OBIDMS_Column_OBI_INT_TestCase(OBIDMS_Column_TestCase):
|
||||
def setUp(self):
|
||||
self.data_type_code = 1
|
||||
self.data_type = 'OBI_INT'
|
||||
self.dms, \
|
||||
self.dms_name, \
|
||||
self.dms_dir_name = create_test_obidms()
|
||||
self.col, \
|
||||
self.col_name, \
|
||||
self.data_type_str = create_test_column(self.dms,
|
||||
self.data_type_code)
|
||||
self.col_name = create_test_column(self.dms,
|
||||
self.data_type)
|
||||
|
||||
|
||||
class OBIDMS_Column_OBI_INT_multiple_elements_TestCase(OBIDMS_Column_multiple_elements_TestCase):
|
||||
def setUp(self):
|
||||
self.data_type_code = 1
|
||||
self.data_type = 'OBI_INT'
|
||||
self.dms, \
|
||||
self.dms_name, \
|
||||
self.dms_dir_name = create_test_obidms()
|
||||
self.col, \
|
||||
self.col_name, \
|
||||
self.elts_names, \
|
||||
self.data_type_str = create_test_column(self.dms,
|
||||
self.data_type_code,
|
||||
multiple_elements_per_line=True)
|
||||
self.elts_names = create_test_column(self.dms,
|
||||
self.data_type,
|
||||
multiple_elements_per_line=True)
|
||||
|
||||
|
||||
class OBIDMS_Column_OBI_FLOAT_TestCase(OBIDMS_Column_TestCase):
|
||||
def setUp(self):
|
||||
self.data_type_code = 2
|
||||
self.data_type = 'OBI_FLOAT'
|
||||
self.dms, \
|
||||
self.dms_name, \
|
||||
self.dms_dir_name = create_test_obidms()
|
||||
self.col, \
|
||||
self.col_name, \
|
||||
self.data_type_str = create_test_column(self.dms,
|
||||
self.data_type_code)
|
||||
self.col_name = create_test_column(self.dms,
|
||||
self.data_type)
|
||||
|
||||
|
||||
class OBIDMS_Column_OBI_FLOAT_multiple_elements_TestCase(OBIDMS_Column_multiple_elements_TestCase):
|
||||
def setUp(self):
|
||||
self.data_type_code = 2
|
||||
self.data_type = 'OBI_FLOAT'
|
||||
self.dms, \
|
||||
self.dms_name, \
|
||||
self.dms_dir_name = create_test_obidms()
|
||||
self.col, \
|
||||
self.col_name, \
|
||||
self.elts_names, \
|
||||
self.data_type_str = create_test_column(self.dms,
|
||||
self.data_type_code,
|
||||
multiple_elements_per_line=True)
|
||||
self.elts_names = create_test_column(self.dms,
|
||||
self.data_type,
|
||||
multiple_elements_per_line=True)
|
||||
|
||||
|
||||
class OBIDMS_Column_OBI_BOOL_TestCase(OBIDMS_Column_TestCase):
|
||||
def setUp(self):
|
||||
self.data_type_code = 3
|
||||
self.data_type = 'OBI_BOOL'
|
||||
self.dms, \
|
||||
self.dms_name, \
|
||||
self.dms_dir_name = create_test_obidms()
|
||||
self.col, \
|
||||
self.col_name, \
|
||||
self.data_type_str = create_test_column(self.dms,
|
||||
self.data_type_code)
|
||||
self.col_name = create_test_column(self.dms,
|
||||
self.data_type)
|
||||
|
||||
|
||||
class OBIDMS_Column_OBI_BOOL_multiple_elements_TestCase(OBIDMS_Column_multiple_elements_TestCase):
|
||||
def setUp(self):
|
||||
self.data_type_code = 3
|
||||
self.data_type = 'OBI_BOOL'
|
||||
self.dms, \
|
||||
self.dms_name, \
|
||||
self.dms_dir_name = create_test_obidms()
|
||||
self.col, \
|
||||
self.col_name, \
|
||||
self.elts_names, \
|
||||
self.data_type_str = create_test_column(self.dms,
|
||||
self.data_type_code,
|
||||
multiple_elements_per_line=True)
|
||||
self.elts_names = create_test_column(self.dms,
|
||||
self.data_type,
|
||||
multiple_elements_per_line=True)
|
||||
|
||||
|
||||
class OBIDMS_Column_OBI_CHAR_TestCase(OBIDMS_Column_TestCase):
|
||||
def setUp(self):
|
||||
self.data_type_code = 4
|
||||
self.data_type = 'OBI_CHAR'
|
||||
self.dms, \
|
||||
self.dms_name, \
|
||||
self.dms_dir_name = create_test_obidms()
|
||||
self.col, \
|
||||
self.col_name, \
|
||||
self.data_type_str = create_test_column(self.dms,
|
||||
self.data_type_code)
|
||||
self.col_name = create_test_column(self.dms,
|
||||
self.data_type)
|
||||
|
||||
|
||||
class OBIDMS_Column_OBI_CHAR_multiple_elements_TestCase(OBIDMS_Column_multiple_elements_TestCase):
|
||||
def setUp(self):
|
||||
self.data_type_code = 4
|
||||
self.data_type = 'OBI_CHAR'
|
||||
self.dms, \
|
||||
self.dms_name, \
|
||||
self.dms_dir_name = create_test_obidms()
|
||||
self.col, \
|
||||
self.col_name, \
|
||||
self.elts_names, \
|
||||
self.data_type_str = create_test_column(self.dms,
|
||||
self.data_type_code,
|
||||
multiple_elements_per_line=True)
|
||||
self.elts_names = create_test_column(self.dms,
|
||||
self.data_type,
|
||||
multiple_elements_per_line=True)
|
||||
|
||||
|
||||
class OBIDMS_Column_OBI_STR_TestCase(OBIDMS_Column_TestCase):
|
||||
def setUp(self):
|
||||
self.data_type_code = 5
|
||||
self.data_type = 'OBI_STR'
|
||||
self.dms, \
|
||||
self.dms_name, \
|
||||
self.dms_dir_name = create_test_obidms()
|
||||
self.col, \
|
||||
self.col_name, \
|
||||
self.data_type_str = create_test_column(self.dms,
|
||||
self.data_type_code)
|
||||
self.col_name = create_test_column(self.dms,
|
||||
self.data_type)
|
||||
|
||||
|
||||
class OBIDMS_Column_OBI_STR_multiple_elements_TestCase(OBIDMS_Column_multiple_elements_TestCase):
|
||||
def setUp(self):
|
||||
self.data_type_code = 5
|
||||
self.data_type = 'OBI_STR'
|
||||
self.dms, \
|
||||
self.dms_name, \
|
||||
self.dms_dir_name = create_test_obidms()
|
||||
self.col, \
|
||||
self.col_name, \
|
||||
self.elts_names, \
|
||||
self.data_type_str = create_test_column(self.dms,
|
||||
self.data_type_code,
|
||||
multiple_elements_per_line=True)
|
||||
self.elts_names = create_test_column(self.dms,
|
||||
self.data_type,
|
||||
multiple_elements_per_line=True)
|
||||
|
||||
class OBIDMS_Column_OBI_SEQ_TestCase(OBIDMS_Column_TestCase):
|
||||
def setUp(self):
|
||||
self.data_type = 'OBI_SEQ'
|
||||
self.dms, \
|
||||
self.dms_name, \
|
||||
self.dms_dir_name = create_test_obidms()
|
||||
self.col, \
|
||||
self.col_name = create_test_column(self.dms,
|
||||
self.data_type)
|
||||
|
||||
|
||||
class OBIDMS_Column_OBI_SEQ_multiple_elements_TestCase(OBIDMS_Column_multiple_elements_TestCase):
|
||||
def setUp(self):
|
||||
self.data_type = 'OBI_SEQ'
|
||||
self.dms, \
|
||||
self.dms_name, \
|
||||
self.dms_dir_name = create_test_obidms()
|
||||
self.col, \
|
||||
self.col_name, \
|
||||
self.elts_names = create_test_column(self.dms,
|
||||
self.data_type,
|
||||
multiple_elements_per_line=True)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main(verbosity=2, defaultTest=["OBIDMS_Column_OBI_INT_TestCase",
|
||||
@ -266,6 +304,8 @@ if __name__ == '__main__':
|
||||
"OBIDMS_Column_OBI_CHAR_TestCase",
|
||||
"OBIDMS_Column_OBI_CHAR_multiple_elements_TestCase",
|
||||
"OBIDMS_Column_OBI_STR_TestCase",
|
||||
"OBIDMS_Column_OBI_STR_multiple_elements_TestCase"])
|
||||
"OBIDMS_Column_OBI_STR_multiple_elements_TestCase",
|
||||
"OBIDMS_Column_OBI_SEQ_TestCase",
|
||||
"OBIDMS_Column_OBI_SEQ_multiple_elements_TestCase"])
|
||||
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
#cython: language_level=3
|
||||
|
||||
|
||||
cdef bytes str2bytes(str string)
|
||||
|
||||
cdef str bytes2str(bytes string)
|
@ -1,7 +1,28 @@
|
||||
#cython: language_level=3
|
||||
|
||||
import sys
|
||||
import io
|
||||
|
||||
cdef bytes str2bytes(str string):
|
||||
"""
|
||||
Short cut to convert ascii encoded python string (str) to bytes
|
||||
which can be easily converted to C-strings.
|
||||
|
||||
@param string: the python string to be converted.
|
||||
@type string: str
|
||||
@return a transcoded string
|
||||
@rtype: bytes
|
||||
"""
|
||||
return string.encode('ascii')
|
||||
|
||||
cdef str bytes2str(bytes string):
|
||||
return string.decode('ascii')
|
||||
"""
|
||||
Short cut to convert bytes (C-strings) to ascii encoded python string (str).
|
||||
|
||||
@param string: the binary (C-string) string to be converted.
|
||||
@type string: bytes
|
||||
@return an ascii transcoded string
|
||||
@rtype: str
|
||||
"""
|
||||
return string.decode('ascii')
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
major = 1
|
||||
minor = 1
|
||||
serial= '16'
|
||||
major = 0
|
||||
minor = 0
|
||||
serial= '0'
|
||||
|
||||
version ="%d.%02d.%s" % (major,minor,serial)
|
||||
|
Binary file not shown.
@ -1,5 +1,5 @@
|
||||
--extra-index-url https://pypi.python.org/simple/
|
||||
Cython>=0.21
|
||||
Cython==0.23.5
|
||||
Sphinx>=1.2.0
|
||||
ipython>=3.0.0
|
||||
breathe>=4.0.0
|
||||
|
248
src/bloom.c
Executable file
248
src/bloom.c
Executable file
@ -0,0 +1,248 @@
|
||||
/*
|
||||
* Copyright (c) 2012-2015, Jyri J. Virkki
|
||||
* All rights reserved.
|
||||
*
|
||||
* This file is under BSD license. See LICENSE file.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Refer to bloom.h for documentation on the public interfaces.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <fcntl.h>
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "bloom.h"
|
||||
#include "murmurhash2.h"
|
||||
|
||||
#define MAKESTRING(n) STRING(n)
|
||||
#define STRING(n) #n
|
||||
|
||||
//#ifdef __linux__ // TODO commented because triggers error on luke21
|
||||
//unsigned detect_bucket_size(unsigned fallback_size);
|
||||
//#endif
|
||||
|
||||
|
||||
static int test_bit_set_bit(unsigned char * buf, unsigned int x, int set_bit)
|
||||
{
|
||||
register uint32_t * word_buf = (uint32_t *)buf;
|
||||
register unsigned int offset = x >> 5;
|
||||
register uint32_t word = word_buf[offset];
|
||||
register unsigned int mask = 1 << (x % 32);
|
||||
|
||||
if (word & mask) {
|
||||
return 1;
|
||||
} else {
|
||||
if (set_bit) {
|
||||
word_buf[offset] = word | mask;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static int bloom_check_add(struct bloom * bloom,
|
||||
const void * buffer, int len, int add)
|
||||
{
|
||||
if (bloom->ready == 0) {
|
||||
(void)printf("bloom at %p not initialized!\n", (void *)bloom);
|
||||
return -1;
|
||||
}
|
||||
|
||||
int hits = 0;
|
||||
register unsigned int a = murmurhash2(buffer, len, 0x9747b28c);
|
||||
register unsigned int b = murmurhash2(buffer, len, a);
|
||||
register unsigned int x;
|
||||
register int i; // TODO why was it unsigned?
|
||||
|
||||
unsigned bucket_index = (a % bloom->buckets);
|
||||
|
||||
unsigned char * bucket_ptr =
|
||||
(bloom->bf + (bucket_index << bloom->bucket_bytes_exponent));
|
||||
|
||||
for (i = 0; i < bloom->hashes; i++) {
|
||||
x = (a + i*b) & bloom->bucket_bits_fast_mod_operand;
|
||||
if (test_bit_set_bit(bucket_ptr, x, add)) {
|
||||
hits++;
|
||||
}
|
||||
}
|
||||
|
||||
if (hits == bloom->hashes) {
|
||||
return 1; // 1 == element already in (or collision)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static void setup_buckets(struct bloom * bloom, unsigned int cache_size)
|
||||
{
|
||||
// If caller passed a non-zero cache_size, use it as given, otherwise
|
||||
// either compute it or use built-in default
|
||||
|
||||
if (cache_size == 0) {
|
||||
//#ifdef __linux__ // TODO commented because triggers error on luke21
|
||||
// cache_size = detect_bucket_size(BLOOM_BUCKET_SIZE_FALLBACK);
|
||||
//#else
|
||||
cache_size = BLOOM_BUCKET_SIZE_FALLBACK;
|
||||
//#endif
|
||||
}
|
||||
|
||||
bloom->buckets = (bloom->bytes / cache_size);
|
||||
bloom->bucket_bytes = cache_size;
|
||||
|
||||
// make sure bloom buffer bytes and bucket_bytes are even
|
||||
int not_even_by = (bloom->bytes % bloom->bucket_bytes);
|
||||
|
||||
if (not_even_by) {
|
||||
// adjust bytes
|
||||
bloom->bytes += (bloom->bucket_bytes - not_even_by);
|
||||
assert((bloom->bytes % bloom->bucket_bytes) == 0); // Should get even
|
||||
|
||||
// adjust bits
|
||||
bloom->bits = bloom->bytes * 8;
|
||||
|
||||
// adjust bits per element
|
||||
bloom->bpe = bloom->bits*1. / bloom->entries;
|
||||
|
||||
// adjust buckets
|
||||
bloom->buckets++;
|
||||
}
|
||||
|
||||
bloom->bucket_bytes_exponent = __builtin_ctz(cache_size);
|
||||
bloom->bucket_bits_fast_mod_operand = (cache_size * 8 - 1);
|
||||
}
|
||||
|
||||
|
||||
int bloom_filter_size(int entries, double error)
|
||||
{
|
||||
int bytes;
|
||||
double num;
|
||||
double denom;
|
||||
double bpe;
|
||||
int bits;
|
||||
unsigned bucket_bytes;
|
||||
int not_even_by;
|
||||
|
||||
num = log(error);
|
||||
denom = 0.480453013918201; // ln(2)^2
|
||||
bpe = -(num / denom);
|
||||
bits = (int)(((double)entries) * bpe);
|
||||
|
||||
if (bits % 8) {
|
||||
bytes = (bits / 8) + 1;
|
||||
}
|
||||
else {
|
||||
bytes = bits / 8;
|
||||
}
|
||||
|
||||
bucket_bytes = BLOOM_BUCKET_SIZE_FALLBACK;
|
||||
not_even_by = bytes % bucket_bytes;
|
||||
if (not_even_by) {
|
||||
// adjust bytes
|
||||
bytes += (bucket_bytes - not_even_by);
|
||||
}
|
||||
|
||||
return bytes;
|
||||
}
|
||||
|
||||
|
||||
int bloom_init_size(struct bloom * bloom, int entries, double error,
|
||||
unsigned int cache_size)
|
||||
{
|
||||
bloom->ready = 0;
|
||||
|
||||
if (entries < 1 || error == 0) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
bloom->entries = entries;
|
||||
bloom->error = error;
|
||||
|
||||
double num = log(bloom->error);
|
||||
double denom = 0.480453013918201; // ln(2)^2
|
||||
bloom->bpe = -(num / denom);
|
||||
|
||||
double dentries = (double)entries;
|
||||
bloom->bits = (int)(dentries * bloom->bpe);
|
||||
|
||||
if (bloom->bits % 8) {
|
||||
bloom->bytes = (bloom->bits / 8) + 1;
|
||||
} else {
|
||||
bloom->bytes = bloom->bits / 8;
|
||||
}
|
||||
|
||||
bloom->hashes = (int)ceil(0.693147180559945 * bloom->bpe); // ln(2)
|
||||
|
||||
setup_buckets(bloom, cache_size);
|
||||
|
||||
// celine.mercier@metabarcoding.org :
|
||||
// Replaced the calloc with a memset, as the memory for the bloom filter is mapped in our data structure
|
||||
memset(bloom->bf, 0, bloom->bytes);
|
||||
//bloom->bf = (unsigned char *)calloc(bloom->bytes, sizeof(unsigned char));
|
||||
//if (bloom->bf == NULL) {
|
||||
// return 1;
|
||||
//}
|
||||
|
||||
bloom->ready = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int bloom_init(struct bloom * bloom, int entries) //, double error)
|
||||
{
|
||||
return bloom_init_size(bloom, entries, BLOOM_FILTER_ERROR_RATE, 0);
|
||||
}
|
||||
|
||||
|
||||
int bloom_check(struct bloom * bloom, const void * buffer, int len)
|
||||
{
|
||||
return bloom_check_add(bloom, buffer, len, 0);
|
||||
}
|
||||
|
||||
|
||||
int bloom_add(struct bloom * bloom, const void * buffer, int len)
|
||||
{
|
||||
return bloom_check_add(bloom, buffer, len, 1);
|
||||
}
|
||||
|
||||
|
||||
void bloom_print(struct bloom * bloom)
|
||||
{
|
||||
(void)printf("bloom at %p\n", (void *)bloom);
|
||||
(void)printf(" ->entries = %d\n", bloom->entries);
|
||||
(void)printf(" ->error = %f\n", bloom->error);
|
||||
(void)printf(" ->bits = %d\n", bloom->bits);
|
||||
(void)printf(" ->bits per elem = %f\n", bloom->bpe);
|
||||
(void)printf(" ->bytes = %d\n", bloom->bytes);
|
||||
(void)printf(" ->buckets = %u\n", bloom->buckets);
|
||||
(void)printf(" ->bucket_bytes = %u\n", bloom->bucket_bytes);
|
||||
(void)printf(" ->bucket_bytes_exponent = %u\n",
|
||||
bloom->bucket_bytes_exponent);
|
||||
(void)printf(" ->bucket_bits_fast_mod_operand = 0%o\n",
|
||||
bloom->bucket_bits_fast_mod_operand);
|
||||
(void)printf(" ->hash functions = %d\n", bloom->hashes);
|
||||
}
|
||||
|
||||
|
||||
void bloom_free(struct bloom * bloom)
|
||||
{
|
||||
if (bloom->ready) {
|
||||
free(bloom->bf);
|
||||
}
|
||||
bloom->ready = 0;
|
||||
}
|
||||
|
||||
|
||||
const char * bloom_version()
|
||||
{
|
||||
return MAKESTRING(BLOOM_VERSION);
|
||||
}
|
210
src/bloom.h
Executable file
210
src/bloom.h
Executable file
@ -0,0 +1,210 @@
|
||||
/*
|
||||
* Copyright (c) 2012-2015, Jyri J. Virkki
|
||||
* All rights reserved.
|
||||
*
|
||||
* This file is under BSD license. See LICENSE file.
|
||||
*/
|
||||
|
||||
#ifndef _BLOOM_H
|
||||
#define _BLOOM_H
|
||||
|
||||
|
||||
/** ***************************************************************************
|
||||
* Added by celine.mercier@metabarcoding.org
|
||||
*
|
||||
* Bloom filter error rate wanted.
|
||||
*
|
||||
*/
|
||||
#define BLOOM_FILTER_ERROR_RATE (0.001)
|
||||
|
||||
|
||||
/** ***************************************************************************
|
||||
* On Linux, the code attempts to compute a bucket size based on CPU cache
|
||||
* size info, if available. If that fails for any reason, this fallback size
|
||||
* is used instead.
|
||||
*
|
||||
* On non-Linux systems, this is the bucket size always used unless the
|
||||
* caller overrides it (see bloom_init_size()).
|
||||
*
|
||||
*/
|
||||
#define BLOOM_BUCKET_SIZE_FALLBACK (32 * 1024)
|
||||
|
||||
|
||||
/** ***************************************************************************
|
||||
* It was found that using multiplier x0.5 for CPU L1 cache size is
|
||||
* more effective in terms of CPU usage and, surprisingly, collisions
|
||||
* number.
|
||||
*
|
||||
* Feel free to tune this constant the way it will work for you.
|
||||
*
|
||||
*/
|
||||
#define BLOOM_L1_CACHE_SIZE_DIV 1
|
||||
|
||||
|
||||
/** ***************************************************************************
|
||||
* Structure to keep track of one bloom filter. Caller needs to
|
||||
* allocate this and pass it to the functions below. First call for
|
||||
* every struct must be to bloom_init().
|
||||
*
|
||||
*/
|
||||
struct bloom
|
||||
{
|
||||
// These fields are part of the public interface of this structure.
|
||||
// Client code may read these values if desired. Client code MUST NOT
|
||||
// modify any of these.
|
||||
int entries;
|
||||
double error;
|
||||
int bits;
|
||||
int bytes;
|
||||
int hashes;
|
||||
|
||||
// Fields below are private to the implementation. These may go away or
|
||||
// change incompatibly at any moment. Client code MUST NOT access or rely
|
||||
// on these.
|
||||
unsigned buckets;
|
||||
unsigned bucket_bytes;
|
||||
|
||||
// x86 CPU divide by/multiply by operation optimization helpers
|
||||
unsigned bucket_bytes_exponent;
|
||||
unsigned bucket_bits_fast_mod_operand;
|
||||
|
||||
double bpe;
|
||||
int ready;
|
||||
|
||||
unsigned char bf[];
|
||||
};
|
||||
|
||||
typedef struct bloom bloom_t;
|
||||
|
||||
|
||||
/** ***************************************************************************
|
||||
* Added by celine.mercier@metabarcoding.org
|
||||
*
|
||||
* This function computes the size needed by the bloom filter
|
||||
* depending on the number of entries and the error rate.
|
||||
*
|
||||
*/
|
||||
int bloom_filter_size(int entries, double error);
|
||||
|
||||
|
||||
/** ***************************************************************************
|
||||
* Initialize the bloom filter for use.
|
||||
*
|
||||
* The filter is initialized with a bit field and number of hash functions
|
||||
* according to the computations from the wikipedia entry:
|
||||
* http://en.wikipedia.org/wiki/Bloom_filter
|
||||
*
|
||||
* Optimal number of bits is:
|
||||
* bits = (entries * ln(error)) / ln(2)^2
|
||||
*
|
||||
* Optimal number of hash functions is:
|
||||
* hashes = bpe * ln(2)
|
||||
*
|
||||
* Parameters:
|
||||
* -----------
|
||||
* bloom - Pointer to an allocated struct bloom (see above).
|
||||
* entries - The expected number of entries which will be inserted.
|
||||
* error - Probability of collision (as long as entries are not
|
||||
* exceeded).
|
||||
*
|
||||
* Return:
|
||||
* -------
|
||||
* 0 - on success
|
||||
* 1 - on failure
|
||||
*
|
||||
*/
|
||||
int bloom_init(struct bloom * bloom, int entries); //, double error);
|
||||
|
||||
|
||||
/** ***************************************************************************
|
||||
* Initialize the bloom filter for use.
|
||||
*
|
||||
* See comments above for general information.
|
||||
*
|
||||
* This is the same as bloom_init() but allows the caller to pass in a
|
||||
* cache_size to override the internal value (which is either computed
|
||||
* or the default of BLOOM_BUCKET_SIZE_FALLBACK). Mostly useful for
|
||||
* experimenting.
|
||||
*
|
||||
* See misc/bucketsize for a script which can help identify a good value
|
||||
* for cache_size.
|
||||
*
|
||||
*/
|
||||
int bloom_init_size(struct bloom * bloom, int entries, double error,
|
||||
unsigned int cache_size);
|
||||
|
||||
|
||||
/** ***************************************************************************
|
||||
* Check if the given element is in the bloom filter. Remember this may
|
||||
* return false positive if a collision occured.
|
||||
*
|
||||
* Parameters:
|
||||
* -----------
|
||||
* bloom - Pointer to an allocated struct bloom (see above).
|
||||
* buffer - Pointer to buffer containing element to check.
|
||||
* len - Size of 'buffer'.
|
||||
*
|
||||
* Return:
|
||||
* -------
|
||||
* 0 - element is not present
|
||||
* 1 - element is present (or false positive due to collision)
|
||||
* -1 - bloom not initialized
|
||||
*
|
||||
*/
|
||||
int bloom_check(struct bloom * bloom, const void * buffer, int len);
|
||||
|
||||
|
||||
/** ***************************************************************************
|
||||
* Add the given element to the bloom filter.
|
||||
* The return code indicates if the element (or a collision) was already in,
|
||||
* so for the common check+add use case, no need to call check separately.
|
||||
*
|
||||
* Parameters:
|
||||
* -----------
|
||||
* bloom - Pointer to an allocated struct bloom (see above).
|
||||
* buffer - Pointer to buffer containing element to add.
|
||||
* len - Size of 'buffer'.
|
||||
*
|
||||
* Return:
|
||||
* -------
|
||||
* 0 - element was not present and was added
|
||||
* 1 - element (or a collision) had already been added previously
|
||||
* -1 - bloom not initialized
|
||||
*
|
||||
*/
|
||||
int bloom_add(struct bloom * bloom, const void * buffer, int len);
|
||||
|
||||
|
||||
/** ***************************************************************************
|
||||
* Print (to stdout) info about this bloom filter. Debugging aid.
|
||||
*
|
||||
*/
|
||||
void bloom_print(struct bloom * bloom);
|
||||
|
||||
|
||||
/** ***************************************************************************
|
||||
* Deallocate internal storage.
|
||||
*
|
||||
* Upon return, the bloom struct is no longer usable. You may call bloom_init
|
||||
* again on the same struct to reinitialize it again.
|
||||
*
|
||||
* Parameters:
|
||||
* -----------
|
||||
* bloom - Pointer to an allocated struct bloom (see above).
|
||||
*
|
||||
* Return: none
|
||||
*
|
||||
*/
|
||||
void bloom_free(struct bloom * bloom);
|
||||
|
||||
|
||||
/** ***************************************************************************
|
||||
* Returns version string compiled into library.
|
||||
*
|
||||
* Return: version string
|
||||
*
|
||||
*/
|
||||
const char * bloom_version();
|
||||
|
||||
|
||||
#endif
|
80
src/char_str_indexer.c
Normal file
80
src/char_str_indexer.c
Normal file
@ -0,0 +1,80 @@
|
||||
/****************************************************************************
|
||||
* Character string indexing functions *
|
||||
****************************************************************************/
|
||||
|
||||
/**
|
||||
* @file char_str_indexer.c
|
||||
* @author Celine Mercier
|
||||
* @date April 12th 2016
|
||||
* @brief Functions handling the indexing and retrieval of character strings.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "obiblob.h"
|
||||
#include "obiblob_indexer.h"
|
||||
#include "obidebug.h"
|
||||
#include "obitypes.h"
|
||||
|
||||
|
||||
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
|
||||
|
||||
|
||||
Obi_blob_p obi_str_to_blob(const char* value)
|
||||
{
|
||||
Obi_blob_p value_b;
|
||||
int32_t length;
|
||||
|
||||
// Compute the number of bytes on which the value will be encoded
|
||||
length = strlen(value) + 1; // +1 to store \0 at the end (makes retrieving faster)
|
||||
|
||||
value_b = obi_blob((byte_t*)value, ELEMENT_SIZE_STR, length, length);
|
||||
if (value_b == NULL)
|
||||
{
|
||||
obidebug(1, "\nError encoding a character string in a blob");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return value_b;
|
||||
}
|
||||
|
||||
|
||||
char* obi_blob_to_str(Obi_blob_p value_b)
|
||||
{
|
||||
return value_b->value;
|
||||
}
|
||||
|
||||
|
||||
index_t obi_index_char_str(Obi_indexer_p indexer, const char* value)
|
||||
{
|
||||
Obi_blob_p value_b;
|
||||
index_t idx;
|
||||
|
||||
// Encode value
|
||||
value_b = obi_str_to_blob(value);
|
||||
if (value_b == NULL)
|
||||
return -1;
|
||||
|
||||
// Add in the indexer
|
||||
idx = obi_indexer_add(indexer, value_b);
|
||||
|
||||
free(value_b);
|
||||
|
||||
return idx;
|
||||
}
|
||||
|
||||
|
||||
char* obi_retrieve_char_str(Obi_indexer_p indexer, index_t idx)
|
||||
{
|
||||
Obi_blob_p value_b;
|
||||
|
||||
// Get encoded value
|
||||
value_b = obi_indexer_get(indexer, idx);
|
||||
|
||||
// Return decoded character string
|
||||
return obi_blob_to_str(value_b);
|
||||
}
|
||||
|
87
src/char_str_indexer.h
Normal file
87
src/char_str_indexer.h
Normal file
@ -0,0 +1,87 @@
|
||||
/****************************************************************************
|
||||
* DNA sequence indexer header file *
|
||||
****************************************************************************/
|
||||
|
||||
/**
|
||||
* @file dna_seq_indexer.h
|
||||
* @author Celine Mercier
|
||||
* @date April 12th 2016
|
||||
* @brief Header file for the functions handling the indexing of DNA sequences.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef CHAR_STR_INDEXER_H_
|
||||
#define CHAR_STR_INDEXER_H_
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "obitypes.h"
|
||||
#include "obiblob.h"
|
||||
#include "obiblob_indexer.h"
|
||||
|
||||
|
||||
/**
|
||||
* @brief Converts a character string to a blob.
|
||||
*
|
||||
* @warning The blob must be freed by the caller.
|
||||
*
|
||||
* @param value The character string to convert.
|
||||
*
|
||||
* @returns A pointer to the blob created.
|
||||
* @retval NULL if an error occurred.
|
||||
*
|
||||
* @since October 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
Obi_blob_p obi_str_to_blob(char* value);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Converts a blob to a character string.
|
||||
*
|
||||
* @warning The character string returned is mapped.
|
||||
*
|
||||
* @param value_b The blob to convert.
|
||||
*
|
||||
* @returns A pointer on the character string contained in the blob.
|
||||
*
|
||||
* @since October 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
const char* obi_blob_to_str(Obi_blob_p value_b);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Stores a character string in an indexer and returns the index.
|
||||
*
|
||||
* @param indexer The indexer structure.
|
||||
* @param value The character string to index.
|
||||
*
|
||||
* @returns The index referring to the stored character string in the indexer.
|
||||
*
|
||||
* @since April 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
index_t obi_index_char_str(Obi_indexer_p indexer, const char* value);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Retrieves a character string from an indexer.
|
||||
*
|
||||
* @warning The character string returned is mapped.
|
||||
*
|
||||
* @param indexer The indexer structure.
|
||||
* @param idx The index referring to the character string to retrieve in the indexer.
|
||||
*
|
||||
* @returns A pointer on the character string.
|
||||
*
|
||||
* @since April 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
char* obi_retrieve_char_str(Obi_indexer_p indexer, index_t idx);
|
||||
|
||||
|
||||
#endif /* CHAR_STR_INDEXER_H_ */
|
||||
|
198
src/crc64.c
Normal file
198
src/crc64.c
Normal file
@ -0,0 +1,198 @@
|
||||
/* Redis uses the CRC64 variant with "Jones" coefficients and init value of 0.
|
||||
*
|
||||
* Specification of this CRC64 variant follows:
|
||||
* Name: crc-64-jones
|
||||
* Width: 64 bites
|
||||
* Poly: 0xad93d23594c935a9
|
||||
* Reflected In: True
|
||||
* Xor_In: 0xffffffffffffffff
|
||||
* Reflected_Out: True
|
||||
* Xor_Out: 0x0
|
||||
* Check("123456789"): 0xe9c6d914c4b8d9ca
|
||||
*
|
||||
* Copyright (c) 2012, Salvatore Sanfilippo <antirez at gmail dot com>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of Redis nor the names of its contributors may be used
|
||||
* to endorse or promote products derived from this software without
|
||||
* specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE. */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
static const uint64_t crc64_tab[256] = {
|
||||
UINT64_C(0x0000000000000000), UINT64_C(0x7ad870c830358979),
|
||||
UINT64_C(0xf5b0e190606b12f2), UINT64_C(0x8f689158505e9b8b),
|
||||
UINT64_C(0xc038e5739841b68f), UINT64_C(0xbae095bba8743ff6),
|
||||
UINT64_C(0x358804e3f82aa47d), UINT64_C(0x4f50742bc81f2d04),
|
||||
UINT64_C(0xab28ecb46814fe75), UINT64_C(0xd1f09c7c5821770c),
|
||||
UINT64_C(0x5e980d24087fec87), UINT64_C(0x24407dec384a65fe),
|
||||
UINT64_C(0x6b1009c7f05548fa), UINT64_C(0x11c8790fc060c183),
|
||||
UINT64_C(0x9ea0e857903e5a08), UINT64_C(0xe478989fa00bd371),
|
||||
UINT64_C(0x7d08ff3b88be6f81), UINT64_C(0x07d08ff3b88be6f8),
|
||||
UINT64_C(0x88b81eabe8d57d73), UINT64_C(0xf2606e63d8e0f40a),
|
||||
UINT64_C(0xbd301a4810ffd90e), UINT64_C(0xc7e86a8020ca5077),
|
||||
UINT64_C(0x4880fbd87094cbfc), UINT64_C(0x32588b1040a14285),
|
||||
UINT64_C(0xd620138fe0aa91f4), UINT64_C(0xacf86347d09f188d),
|
||||
UINT64_C(0x2390f21f80c18306), UINT64_C(0x594882d7b0f40a7f),
|
||||
UINT64_C(0x1618f6fc78eb277b), UINT64_C(0x6cc0863448deae02),
|
||||
UINT64_C(0xe3a8176c18803589), UINT64_C(0x997067a428b5bcf0),
|
||||
UINT64_C(0xfa11fe77117cdf02), UINT64_C(0x80c98ebf2149567b),
|
||||
UINT64_C(0x0fa11fe77117cdf0), UINT64_C(0x75796f2f41224489),
|
||||
UINT64_C(0x3a291b04893d698d), UINT64_C(0x40f16bccb908e0f4),
|
||||
UINT64_C(0xcf99fa94e9567b7f), UINT64_C(0xb5418a5cd963f206),
|
||||
UINT64_C(0x513912c379682177), UINT64_C(0x2be1620b495da80e),
|
||||
UINT64_C(0xa489f35319033385), UINT64_C(0xde51839b2936bafc),
|
||||
UINT64_C(0x9101f7b0e12997f8), UINT64_C(0xebd98778d11c1e81),
|
||||
UINT64_C(0x64b116208142850a), UINT64_C(0x1e6966e8b1770c73),
|
||||
UINT64_C(0x8719014c99c2b083), UINT64_C(0xfdc17184a9f739fa),
|
||||
UINT64_C(0x72a9e0dcf9a9a271), UINT64_C(0x08719014c99c2b08),
|
||||
UINT64_C(0x4721e43f0183060c), UINT64_C(0x3df994f731b68f75),
|
||||
UINT64_C(0xb29105af61e814fe), UINT64_C(0xc849756751dd9d87),
|
||||
UINT64_C(0x2c31edf8f1d64ef6), UINT64_C(0x56e99d30c1e3c78f),
|
||||
UINT64_C(0xd9810c6891bd5c04), UINT64_C(0xa3597ca0a188d57d),
|
||||
UINT64_C(0xec09088b6997f879), UINT64_C(0x96d1784359a27100),
|
||||
UINT64_C(0x19b9e91b09fcea8b), UINT64_C(0x636199d339c963f2),
|
||||
UINT64_C(0xdf7adabd7a6e2d6f), UINT64_C(0xa5a2aa754a5ba416),
|
||||
UINT64_C(0x2aca3b2d1a053f9d), UINT64_C(0x50124be52a30b6e4),
|
||||
UINT64_C(0x1f423fcee22f9be0), UINT64_C(0x659a4f06d21a1299),
|
||||
UINT64_C(0xeaf2de5e82448912), UINT64_C(0x902aae96b271006b),
|
||||
UINT64_C(0x74523609127ad31a), UINT64_C(0x0e8a46c1224f5a63),
|
||||
UINT64_C(0x81e2d7997211c1e8), UINT64_C(0xfb3aa75142244891),
|
||||
UINT64_C(0xb46ad37a8a3b6595), UINT64_C(0xceb2a3b2ba0eecec),
|
||||
UINT64_C(0x41da32eaea507767), UINT64_C(0x3b024222da65fe1e),
|
||||
UINT64_C(0xa2722586f2d042ee), UINT64_C(0xd8aa554ec2e5cb97),
|
||||
UINT64_C(0x57c2c41692bb501c), UINT64_C(0x2d1ab4dea28ed965),
|
||||
UINT64_C(0x624ac0f56a91f461), UINT64_C(0x1892b03d5aa47d18),
|
||||
UINT64_C(0x97fa21650afae693), UINT64_C(0xed2251ad3acf6fea),
|
||||
UINT64_C(0x095ac9329ac4bc9b), UINT64_C(0x7382b9faaaf135e2),
|
||||
UINT64_C(0xfcea28a2faafae69), UINT64_C(0x8632586aca9a2710),
|
||||
UINT64_C(0xc9622c4102850a14), UINT64_C(0xb3ba5c8932b0836d),
|
||||
UINT64_C(0x3cd2cdd162ee18e6), UINT64_C(0x460abd1952db919f),
|
||||
UINT64_C(0x256b24ca6b12f26d), UINT64_C(0x5fb354025b277b14),
|
||||
UINT64_C(0xd0dbc55a0b79e09f), UINT64_C(0xaa03b5923b4c69e6),
|
||||
UINT64_C(0xe553c1b9f35344e2), UINT64_C(0x9f8bb171c366cd9b),
|
||||
UINT64_C(0x10e3202993385610), UINT64_C(0x6a3b50e1a30ddf69),
|
||||
UINT64_C(0x8e43c87e03060c18), UINT64_C(0xf49bb8b633338561),
|
||||
UINT64_C(0x7bf329ee636d1eea), UINT64_C(0x012b592653589793),
|
||||
UINT64_C(0x4e7b2d0d9b47ba97), UINT64_C(0x34a35dc5ab7233ee),
|
||||
UINT64_C(0xbbcbcc9dfb2ca865), UINT64_C(0xc113bc55cb19211c),
|
||||
UINT64_C(0x5863dbf1e3ac9dec), UINT64_C(0x22bbab39d3991495),
|
||||
UINT64_C(0xadd33a6183c78f1e), UINT64_C(0xd70b4aa9b3f20667),
|
||||
UINT64_C(0x985b3e827bed2b63), UINT64_C(0xe2834e4a4bd8a21a),
|
||||
UINT64_C(0x6debdf121b863991), UINT64_C(0x1733afda2bb3b0e8),
|
||||
UINT64_C(0xf34b37458bb86399), UINT64_C(0x8993478dbb8deae0),
|
||||
UINT64_C(0x06fbd6d5ebd3716b), UINT64_C(0x7c23a61ddbe6f812),
|
||||
UINT64_C(0x3373d23613f9d516), UINT64_C(0x49aba2fe23cc5c6f),
|
||||
UINT64_C(0xc6c333a67392c7e4), UINT64_C(0xbc1b436e43a74e9d),
|
||||
UINT64_C(0x95ac9329ac4bc9b5), UINT64_C(0xef74e3e19c7e40cc),
|
||||
UINT64_C(0x601c72b9cc20db47), UINT64_C(0x1ac40271fc15523e),
|
||||
UINT64_C(0x5594765a340a7f3a), UINT64_C(0x2f4c0692043ff643),
|
||||
UINT64_C(0xa02497ca54616dc8), UINT64_C(0xdafce7026454e4b1),
|
||||
UINT64_C(0x3e847f9dc45f37c0), UINT64_C(0x445c0f55f46abeb9),
|
||||
UINT64_C(0xcb349e0da4342532), UINT64_C(0xb1eceec59401ac4b),
|
||||
UINT64_C(0xfebc9aee5c1e814f), UINT64_C(0x8464ea266c2b0836),
|
||||
UINT64_C(0x0b0c7b7e3c7593bd), UINT64_C(0x71d40bb60c401ac4),
|
||||
UINT64_C(0xe8a46c1224f5a634), UINT64_C(0x927c1cda14c02f4d),
|
||||
UINT64_C(0x1d148d82449eb4c6), UINT64_C(0x67ccfd4a74ab3dbf),
|
||||
UINT64_C(0x289c8961bcb410bb), UINT64_C(0x5244f9a98c8199c2),
|
||||
UINT64_C(0xdd2c68f1dcdf0249), UINT64_C(0xa7f41839ecea8b30),
|
||||
UINT64_C(0x438c80a64ce15841), UINT64_C(0x3954f06e7cd4d138),
|
||||
UINT64_C(0xb63c61362c8a4ab3), UINT64_C(0xcce411fe1cbfc3ca),
|
||||
UINT64_C(0x83b465d5d4a0eece), UINT64_C(0xf96c151de49567b7),
|
||||
UINT64_C(0x76048445b4cbfc3c), UINT64_C(0x0cdcf48d84fe7545),
|
||||
UINT64_C(0x6fbd6d5ebd3716b7), UINT64_C(0x15651d968d029fce),
|
||||
UINT64_C(0x9a0d8ccedd5c0445), UINT64_C(0xe0d5fc06ed698d3c),
|
||||
UINT64_C(0xaf85882d2576a038), UINT64_C(0xd55df8e515432941),
|
||||
UINT64_C(0x5a3569bd451db2ca), UINT64_C(0x20ed197575283bb3),
|
||||
UINT64_C(0xc49581ead523e8c2), UINT64_C(0xbe4df122e51661bb),
|
||||
UINT64_C(0x3125607ab548fa30), UINT64_C(0x4bfd10b2857d7349),
|
||||
UINT64_C(0x04ad64994d625e4d), UINT64_C(0x7e7514517d57d734),
|
||||
UINT64_C(0xf11d85092d094cbf), UINT64_C(0x8bc5f5c11d3cc5c6),
|
||||
UINT64_C(0x12b5926535897936), UINT64_C(0x686de2ad05bcf04f),
|
||||
UINT64_C(0xe70573f555e26bc4), UINT64_C(0x9ddd033d65d7e2bd),
|
||||
UINT64_C(0xd28d7716adc8cfb9), UINT64_C(0xa85507de9dfd46c0),
|
||||
UINT64_C(0x273d9686cda3dd4b), UINT64_C(0x5de5e64efd965432),
|
||||
UINT64_C(0xb99d7ed15d9d8743), UINT64_C(0xc3450e196da80e3a),
|
||||
UINT64_C(0x4c2d9f413df695b1), UINT64_C(0x36f5ef890dc31cc8),
|
||||
UINT64_C(0x79a59ba2c5dc31cc), UINT64_C(0x037deb6af5e9b8b5),
|
||||
UINT64_C(0x8c157a32a5b7233e), UINT64_C(0xf6cd0afa9582aa47),
|
||||
UINT64_C(0x4ad64994d625e4da), UINT64_C(0x300e395ce6106da3),
|
||||
UINT64_C(0xbf66a804b64ef628), UINT64_C(0xc5bed8cc867b7f51),
|
||||
UINT64_C(0x8aeeace74e645255), UINT64_C(0xf036dc2f7e51db2c),
|
||||
UINT64_C(0x7f5e4d772e0f40a7), UINT64_C(0x05863dbf1e3ac9de),
|
||||
UINT64_C(0xe1fea520be311aaf), UINT64_C(0x9b26d5e88e0493d6),
|
||||
UINT64_C(0x144e44b0de5a085d), UINT64_C(0x6e963478ee6f8124),
|
||||
UINT64_C(0x21c640532670ac20), UINT64_C(0x5b1e309b16452559),
|
||||
UINT64_C(0xd476a1c3461bbed2), UINT64_C(0xaeaed10b762e37ab),
|
||||
UINT64_C(0x37deb6af5e9b8b5b), UINT64_C(0x4d06c6676eae0222),
|
||||
UINT64_C(0xc26e573f3ef099a9), UINT64_C(0xb8b627f70ec510d0),
|
||||
UINT64_C(0xf7e653dcc6da3dd4), UINT64_C(0x8d3e2314f6efb4ad),
|
||||
UINT64_C(0x0256b24ca6b12f26), UINT64_C(0x788ec2849684a65f),
|
||||
UINT64_C(0x9cf65a1b368f752e), UINT64_C(0xe62e2ad306bafc57),
|
||||
UINT64_C(0x6946bb8b56e467dc), UINT64_C(0x139ecb4366d1eea5),
|
||||
UINT64_C(0x5ccebf68aecec3a1), UINT64_C(0x2616cfa09efb4ad8),
|
||||
UINT64_C(0xa97e5ef8cea5d153), UINT64_C(0xd3a62e30fe90582a),
|
||||
UINT64_C(0xb0c7b7e3c7593bd8), UINT64_C(0xca1fc72bf76cb2a1),
|
||||
UINT64_C(0x45775673a732292a), UINT64_C(0x3faf26bb9707a053),
|
||||
UINT64_C(0x70ff52905f188d57), UINT64_C(0x0a2722586f2d042e),
|
||||
UINT64_C(0x854fb3003f739fa5), UINT64_C(0xff97c3c80f4616dc),
|
||||
UINT64_C(0x1bef5b57af4dc5ad), UINT64_C(0x61372b9f9f784cd4),
|
||||
UINT64_C(0xee5fbac7cf26d75f), UINT64_C(0x9487ca0fff135e26),
|
||||
UINT64_C(0xdbd7be24370c7322), UINT64_C(0xa10fceec0739fa5b),
|
||||
UINT64_C(0x2e675fb4576761d0), UINT64_C(0x54bf2f7c6752e8a9),
|
||||
UINT64_C(0xcdcf48d84fe75459), UINT64_C(0xb71738107fd2dd20),
|
||||
UINT64_C(0x387fa9482f8c46ab), UINT64_C(0x42a7d9801fb9cfd2),
|
||||
UINT64_C(0x0df7adabd7a6e2d6), UINT64_C(0x772fdd63e7936baf),
|
||||
UINT64_C(0xf8474c3bb7cdf024), UINT64_C(0x829f3cf387f8795d),
|
||||
UINT64_C(0x66e7a46c27f3aa2c), UINT64_C(0x1c3fd4a417c62355),
|
||||
UINT64_C(0x935745fc4798b8de), UINT64_C(0xe98f353477ad31a7),
|
||||
UINT64_C(0xa6df411fbfb21ca3), UINT64_C(0xdc0731d78f8795da),
|
||||
UINT64_C(0x536fa08fdfd90e51), UINT64_C(0x29b7d047efec8728),
|
||||
};
|
||||
|
||||
|
||||
uint64_t crc64(const char* s, uint64_t l)
|
||||
{
|
||||
uint64_t j;
|
||||
uint64_t crc = 0;
|
||||
|
||||
for (j = 0; j < l; j++)
|
||||
{
|
||||
uint8_t byte = s[j];
|
||||
crc = crc64_tab[(uint8_t)crc ^ byte] ^ (crc >> 8);
|
||||
}
|
||||
return crc;
|
||||
}
|
||||
|
||||
|
||||
/* Test main */
|
||||
//#ifdef TEST_MAIN
|
||||
//#include <stdio.h>
|
||||
//int main(void) {
|
||||
// printf("e9c6d914c4b8d9ca == %016llx\n",
|
||||
// (unsigned long long) crc64(0,(unsigned char*)"123456789",9));
|
||||
// return 0;
|
||||
//}
|
||||
//#endif
|
||||
|
||||
|
14
src/crc64.h
Normal file
14
src/crc64.h
Normal file
@ -0,0 +1,14 @@
|
||||
/**
|
||||
* @file crc64.h
|
||||
* @date March 24th 2016
|
||||
* @brief Header file for CRC64 function.
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
|
||||
/**
|
||||
* @brief Generates and returns a 64-bit Cyclic Redundancy Check from a
|
||||
* character string s of length l.
|
||||
*/
|
||||
uint64_t crc64(const char* s, uint64_t l);
|
102
src/dna_seq_indexer.c
Normal file
102
src/dna_seq_indexer.c
Normal file
@ -0,0 +1,102 @@
|
||||
/****************************************************************************
|
||||
* DNA sequence indexing functions *
|
||||
****************************************************************************/
|
||||
|
||||
/**
|
||||
* @file dna_seq_indexer.c
|
||||
* @author Celine Mercier
|
||||
* @date April 12th 2016
|
||||
* @brief Functions handling the indexing and retrieval of DNA sequences.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "obiblob.h"
|
||||
#include "obiblob_indexer.h"
|
||||
#include "obidebug.h"
|
||||
#include "obitypes.h"
|
||||
|
||||
|
||||
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
|
||||
|
||||
|
||||
Obi_blob_p obi_seq_to_blob(const char* seq)
|
||||
{
|
||||
Obi_blob_p value_b;
|
||||
int32_t length_encoded_seq; // length of the encoded sequence in bytes
|
||||
int32_t seq_length;
|
||||
byte_t* encoded_seq;
|
||||
|
||||
seq_length = strlen(seq);
|
||||
|
||||
// Check if just ATGC and encode accordingly
|
||||
if (only_ATGC(seq))
|
||||
{
|
||||
// Compute the length (in bytes) of the encoded sequence
|
||||
length_encoded_seq = ceil((double) seq_length / (double) 4.0);
|
||||
// Encode
|
||||
encoded_seq = encode_seq_on_2_bits(seq, seq_length);
|
||||
if (encoded_seq == NULL)
|
||||
return NULL;
|
||||
value_b = obi_blob(encoded_seq, ELEMENT_SIZE_SEQ_2, length_encoded_seq, seq_length);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Compute the length (in bytes) of the encoded sequence
|
||||
length_encoded_seq = ceil((double) seq_length / (double) 2.0);
|
||||
// Encode
|
||||
encoded_seq = encode_seq_on_4_bits(seq, seq_length);
|
||||
if (encoded_seq == NULL)
|
||||
return NULL;
|
||||
value_b = obi_blob(encoded_seq, ELEMENT_SIZE_SEQ_4, length_encoded_seq, seq_length);
|
||||
}
|
||||
|
||||
free(encoded_seq);
|
||||
|
||||
return value_b;
|
||||
}
|
||||
|
||||
|
||||
char* obi_blob_to_seq(Obi_blob_p value_b)
|
||||
{
|
||||
// Decode
|
||||
if (value_b->element_size == 2)
|
||||
return decode_seq_on_2_bits(value_b->value, value_b->length_decoded_value);
|
||||
else
|
||||
return decode_seq_on_4_bits(value_b->value, value_b->length_decoded_value);
|
||||
}
|
||||
|
||||
|
||||
index_t obi_index_dna_seq(Obi_indexer_p indexer, const char* value)
|
||||
{
|
||||
Obi_blob_p value_b;
|
||||
index_t idx;
|
||||
|
||||
// Encode value
|
||||
value_b = obi_seq_to_blob(value);
|
||||
if (value_b == NULL)
|
||||
return -1;
|
||||
|
||||
// Add in the indexer
|
||||
idx = obi_indexer_add(indexer, value_b);
|
||||
|
||||
free(value_b);
|
||||
|
||||
return idx;
|
||||
}
|
||||
|
||||
|
||||
char* obi_retrieve_dna_seq(Obi_indexer_p indexer, index_t idx)
|
||||
{
|
||||
Obi_blob_p value_b;
|
||||
|
||||
// Get encoded value
|
||||
value_b = obi_indexer_get(indexer, idx);
|
||||
|
||||
// Return decoded sequence
|
||||
return obi_blob_to_seq(value_b);
|
||||
}
|
||||
|
87
src/dna_seq_indexer.h
Normal file
87
src/dna_seq_indexer.h
Normal file
@ -0,0 +1,87 @@
|
||||
/****************************************************************************
|
||||
* DNA sequence indexer header file *
|
||||
****************************************************************************/
|
||||
|
||||
/**
|
||||
* @file dna_seq_indexer.h
|
||||
* @author Celine Mercier
|
||||
* @date April 12th 2016
|
||||
* @brief Header file for the functions handling the indexing of DNA sequences.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef DNA_SEQ_INDEXER_H_
|
||||
#define DNA_SEQ_INDEXER_H_
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "obidms.h"
|
||||
#include "obitypes.h"
|
||||
#include "obiblob.h"
|
||||
#include "obiblob_indexer.h"
|
||||
|
||||
|
||||
/**
|
||||
* @brief Converts a DNA sequence to a blob.
|
||||
*
|
||||
* @warning The blob must be freed by the caller.
|
||||
*
|
||||
* @param value The DNA sequence to convert.
|
||||
*
|
||||
* @returns A pointer to the blob created.
|
||||
* @retval NULL if an error occurred.
|
||||
*
|
||||
* @since November 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
Obi_blob_p obi_seq_to_blob(const char* seq);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Converts a blob to a DNA sequence.
|
||||
*
|
||||
* @param value_b The blob to convert.
|
||||
*
|
||||
* @returns A pointer to the DNA sequence contained in the blob.
|
||||
* @retval NULL if an error occurred.
|
||||
*
|
||||
* @since November 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
char* obi_blob_to_seq(Obi_blob_p value_b);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Stores a DNA sequence in an indexer and returns the index.
|
||||
*
|
||||
* @param indexer The indexer structure.
|
||||
* @param value The DNA sequence to index.
|
||||
*
|
||||
* @returns The index referring to the stored DNA sequence in the indexer.
|
||||
*
|
||||
* @since April 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
index_t obi_index_dna_seq(Obi_indexer_p indexer, const char* value);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Retrieves a DNA sequence from an indexer.
|
||||
*
|
||||
* @warning The DNA sequence returned must be freed by the caller.
|
||||
*
|
||||
* @param indexer The indexer structure.
|
||||
* @param idx The index referring to the DNA sequence to retrieve in the indexer.
|
||||
*
|
||||
* @returns A pointer on the DNA sequence.
|
||||
*
|
||||
* @since April 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
char* obi_retrieve_dna_seq(Obi_indexer_p indexer, index_t idx);
|
||||
|
||||
|
||||
#endif /* DNA_SEQ_INDEXER_H_ */
|
||||
|
367
src/encode.c
Normal file
367
src/encode.c
Normal file
@ -0,0 +1,367 @@
|
||||
/****************************************************************************
|
||||
* Encoding functions *
|
||||
****************************************************************************/
|
||||
|
||||
/**
|
||||
* @file encode.c
|
||||
* @author Celine Mercier
|
||||
* @date November 18th 2015
|
||||
* @brief Functions encoding DNA sequences.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "encode.h"
|
||||
#include "obierrno.h"
|
||||
#include "obitypes.h" // For byte_t type
|
||||
#include "obidebug.h"
|
||||
|
||||
|
||||
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
|
||||
|
||||
|
||||
// TODO: endianness problem?
|
||||
|
||||
|
||||
|
||||
bool only_ATGC(const char* seq)
|
||||
{
|
||||
const char* c = seq;
|
||||
|
||||
while (*c)
|
||||
{
|
||||
if (!((*c == 'A') || \
|
||||
(*c == 'T') || \
|
||||
(*c == 'G') || \
|
||||
(*c == 'C') || \
|
||||
(*c == 'a') || \
|
||||
(*c == 't') || \
|
||||
(*c == 'g') || \
|
||||
(*c == 'c')))
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
c++;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
byte_t* encode_seq_on_2_bits(const char* seq, int32_t length)
|
||||
{
|
||||
byte_t* seq_b;
|
||||
uint8_t modulo;
|
||||
int32_t length_b;
|
||||
int32_t i;
|
||||
|
||||
length_b = ceil((double) length / (double) 4.0);
|
||||
|
||||
seq_b = (byte_t*) calloc(length_b, sizeof(byte_t));
|
||||
if (seq_b == NULL)
|
||||
{
|
||||
obi_set_errno(OBI_MALLOC_ERROR);
|
||||
obidebug(1, "\nError allocating memory for an encoded DNA sequence");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (i=0; i<length; i++)
|
||||
{
|
||||
// Shift of 2 to make place for new nucleotide
|
||||
seq_b[i/4] <<= 2;
|
||||
|
||||
// Add new nucleotide
|
||||
switch (seq[i])
|
||||
{
|
||||
case 'a':
|
||||
case 'A':
|
||||
seq_b[i/4] |= NUC_A_2b;
|
||||
break;
|
||||
case 'c':
|
||||
case 'C':
|
||||
seq_b[i/4] |= NUC_C_2b;
|
||||
break;
|
||||
case 'g':
|
||||
case 'G':
|
||||
seq_b[i/4] |= NUC_G_2b;
|
||||
break;
|
||||
case 't':
|
||||
case 'T':
|
||||
seq_b[i/4] |= NUC_T_2b;
|
||||
break;
|
||||
default:
|
||||
obi_set_errno(OBI_ENCODE_ERROR);
|
||||
obidebug(1, "\nInvalid nucleotide base when encoding (not [atgcATGC])");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// Final shift for the last byte if needed
|
||||
modulo = (length % 4);
|
||||
if (modulo)
|
||||
seq_b[(i-1)/4] <<= (2*(4 - modulo));
|
||||
|
||||
return seq_b;
|
||||
}
|
||||
|
||||
|
||||
char* decode_seq_on_2_bits(byte_t* seq_b, int32_t length_seq)
|
||||
{
|
||||
char* seq;
|
||||
int32_t i;
|
||||
uint8_t shift;
|
||||
uint8_t mask;
|
||||
uint8_t nuc;
|
||||
|
||||
seq = (char*) malloc((length_seq+1) * sizeof(char));
|
||||
if (seq == NULL)
|
||||
{
|
||||
obi_set_errno(OBI_MALLOC_ERROR);
|
||||
obidebug(1, "\nError allocating memory for a decoded DNA sequence");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (i=0; i<length_seq; i++)
|
||||
{
|
||||
shift = 6 - 2*(i % 4);
|
||||
mask = NUC_MASK_2B << shift;
|
||||
nuc = (seq_b[i/4] & mask) >> shift;
|
||||
|
||||
switch (nuc)
|
||||
{
|
||||
case NUC_A_2b:
|
||||
seq[i] = 'a';
|
||||
break;
|
||||
case NUC_C_2b:
|
||||
seq[i] = 'c';
|
||||
break;
|
||||
case NUC_G_2b:
|
||||
seq[i] = 'g';
|
||||
break;
|
||||
case NUC_T_2b:
|
||||
seq[i] = 't';
|
||||
break;
|
||||
default:
|
||||
obi_set_errno(OBI_DECODE_ERROR);
|
||||
obidebug(1, "\nInvalid nucleotide base when decoding");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
seq[length_seq] = '\0';
|
||||
|
||||
return seq;
|
||||
}
|
||||
|
||||
|
||||
byte_t* encode_seq_on_4_bits(const char* seq, int32_t length)
|
||||
{
|
||||
byte_t* seq_b;
|
||||
uint8_t modulo;
|
||||
int32_t length_b;
|
||||
int32_t i;
|
||||
|
||||
length_b = ceil((double) length / (double) 2.0);
|
||||
|
||||
seq_b = (byte_t*) calloc(length_b, sizeof(byte_t));
|
||||
if (seq_b == NULL)
|
||||
{
|
||||
obi_set_errno(OBI_MALLOC_ERROR);
|
||||
obidebug(1, "\nError allocating memory for an encoded DNA sequence");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (i=0; i<length; i++)
|
||||
{
|
||||
// Shift of 4 to make place for new nucleotide
|
||||
seq_b[i/2] <<= 4;
|
||||
|
||||
// Add new nucleotide
|
||||
switch (seq[i])
|
||||
{
|
||||
case 'a':
|
||||
case 'A':
|
||||
seq_b[i/2] |= NUC_A_4b;
|
||||
break;
|
||||
case 'c':
|
||||
case 'C':
|
||||
seq_b[i/2] |= NUC_C_4b;
|
||||
break;
|
||||
case 'g':
|
||||
case 'G':
|
||||
seq_b[i/2] |= NUC_G_4b;
|
||||
break;
|
||||
case 't':
|
||||
case 'T':
|
||||
seq_b[i/2] |= NUC_T_4b;
|
||||
break;
|
||||
case 'r':
|
||||
case 'R':
|
||||
seq_b[i/2] |= NUC_R_4b;
|
||||
break;
|
||||
case 'y':
|
||||
case 'Y':
|
||||
seq_b[i/2] |= NUC_Y_4b;
|
||||
break;
|
||||
case 's':
|
||||
case 'S':
|
||||
seq_b[i/2] |= NUC_S_4b;
|
||||
break;
|
||||
case 'w':
|
||||
case 'W':
|
||||
seq_b[i/2] |= NUC_W_4b;
|
||||
break;
|
||||
case 'k':
|
||||
case 'K':
|
||||
seq_b[i/2] |= NUC_K_4b;
|
||||
break;
|
||||
case 'm':
|
||||
case 'M':
|
||||
seq_b[i/2] |= NUC_M_4b;
|
||||
break;
|
||||
case 'b':
|
||||
case 'B':
|
||||
seq_b[i/2] |= NUC_B_4b;
|
||||
break;
|
||||
case 'd':
|
||||
case 'D':
|
||||
seq_b[i/2] |= NUC_D_4b;
|
||||
break;
|
||||
case 'h':
|
||||
case 'H':
|
||||
seq_b[i/2] |= NUC_H_4b;
|
||||
break;
|
||||
case 'v':
|
||||
case 'V':
|
||||
seq_b[i/2] |= NUC_V_4b;
|
||||
break;
|
||||
case 'n':
|
||||
case 'N':
|
||||
seq_b[i/2] |= NUC_N_4b;
|
||||
break;
|
||||
default:
|
||||
obi_set_errno(OBI_ENCODE_ERROR);
|
||||
obidebug(1, "\nInvalid nucleotide base when encoding (not IUPAC)");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// Final shift for the last byte if needed
|
||||
modulo = (length % 2);
|
||||
if (modulo)
|
||||
seq_b[(i-1)/2] <<= (4*modulo);
|
||||
|
||||
return seq_b;
|
||||
}
|
||||
|
||||
|
||||
char* decode_seq_on_4_bits(byte_t* seq_b, int32_t length_seq)
|
||||
{
|
||||
char* seq;
|
||||
int32_t i;
|
||||
uint8_t shift;
|
||||
uint8_t mask;
|
||||
uint8_t nuc;
|
||||
|
||||
seq = (char*) malloc((length_seq+1) * sizeof(char));
|
||||
if (seq == NULL)
|
||||
{
|
||||
obi_set_errno(OBI_MALLOC_ERROR);
|
||||
obidebug(1, "\nError allocating memory for a decoded DNA sequence");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (i=0; i<length_seq; i++)
|
||||
{
|
||||
shift = 4 - 4*(i % 2);
|
||||
mask = NUC_MASK_4B << shift;
|
||||
nuc = (seq_b[i/2] & mask) >> shift;
|
||||
|
||||
switch (nuc)
|
||||
{
|
||||
case NUC_A_4b:
|
||||
seq[i] = 'a';
|
||||
break;
|
||||
case NUC_C_4b:
|
||||
seq[i] = 'c';
|
||||
break;
|
||||
case NUC_G_4b:
|
||||
seq[i] = 'g';
|
||||
break;
|
||||
case NUC_T_4b:
|
||||
seq[i] = 't';
|
||||
break;
|
||||
case NUC_R_4b:
|
||||
seq[i] = 'r';
|
||||
break;
|
||||
case NUC_Y_4b:
|
||||
seq[i] = 'y';
|
||||
break;
|
||||
case NUC_S_4b:
|
||||
seq[i] = 's';
|
||||
break;
|
||||
case NUC_W_4b:
|
||||
seq[i] = 'w';
|
||||
break;
|
||||
case NUC_K_4b:
|
||||
seq[i] = 'k';
|
||||
break;
|
||||
case NUC_M_4b:
|
||||
seq[i] = 'm';
|
||||
break;
|
||||
case NUC_B_4b:
|
||||
seq[i] = 'b';
|
||||
break;
|
||||
case NUC_D_4b:
|
||||
seq[i] = 'd';
|
||||
break;
|
||||
case NUC_H_4b:
|
||||
seq[i] = 'h';
|
||||
break;
|
||||
case NUC_V_4b:
|
||||
seq[i] = 'v';
|
||||
break;
|
||||
case NUC_N_4b:
|
||||
seq[i] = 'n';
|
||||
break;
|
||||
default:
|
||||
obi_set_errno(OBI_DECODE_ERROR);
|
||||
obidebug(1, "\nInvalid nucleotide base when decoding");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
seq[length_seq] = '\0';
|
||||
|
||||
return seq;
|
||||
}
|
||||
|
||||
|
||||
///////////////////// FOR DEBUGGING ///////////////////////////
|
||||
//NOTE: The first byte is printed the first (at the left-most).
|
||||
|
||||
void print_bits(void* ptr, int32_t size)
|
||||
{
|
||||
uint8_t* b = (uint8_t*) ptr;
|
||||
uint8_t byte;
|
||||
int32_t i, j;
|
||||
|
||||
fprintf(stderr, "\n");
|
||||
for (i=0;i<size;i++)
|
||||
{
|
||||
for (j=7;j>=0;j--)
|
||||
{
|
||||
byte = b[i] & (1<<j);
|
||||
byte >>= j;
|
||||
fprintf(stderr, "%u", byte);
|
||||
}
|
||||
fprintf(stderr, " ");
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
}
|
190
src/encode.h
Normal file
190
src/encode.h
Normal file
@ -0,0 +1,190 @@
|
||||
/****************************************************************************
|
||||
* Encoding header file *
|
||||
****************************************************************************/
|
||||
|
||||
/**
|
||||
* @file encode.h
|
||||
* @author Celine Mercier
|
||||
* @date November 18th 2015
|
||||
* @brief Header file for encoding DNA sequences.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef ENCODE_H_
|
||||
#define ENCODE_H_
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "obitypes.h"
|
||||
|
||||
|
||||
#define NUC_MASK_2B 0x3 /**< Binary: 11 to use when decoding 2 bits sequences
|
||||
*/
|
||||
#define NUC_MASK_4B 0xF /**< Binary: 1111 to use when decoding 4 bits sequences
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief enum for the 2-bits codes for each of the 4 nucleotides.
|
||||
*/
|
||||
enum
|
||||
{
|
||||
NUC_A_2b = 0x0, /* binary: 00 */
|
||||
NUC_C_2b = 0x1, /* binary: 01 */
|
||||
NUC_G_2b = 0x2, /* binary: 10 */
|
||||
NUC_T_2b = 0x3, /* binary: 11 */
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* @brief enum for the 4-bits codes for each of the 15 IUPAC nucleotides.
|
||||
*/
|
||||
enum
|
||||
{
|
||||
NUC_A_4b = 0x1, /* binary: 0001 */
|
||||
NUC_C_4b = 0x2, /* binary: 0010 */
|
||||
NUC_G_4b = 0x3, /* binary: 0011 */
|
||||
NUC_T_4b = 0x4, /* binary: 0100 */
|
||||
NUC_R_4b = 0x5, /* binary: 0101 */
|
||||
NUC_Y_4b = 0x6, /* binary: 0110 */
|
||||
NUC_S_4b = 0x7, /* binary: 0111 */
|
||||
NUC_W_4b = 0x8, /* binary: 1000 */
|
||||
NUC_K_4b = 0x9, /* binary: 1001 */
|
||||
NUC_M_4b = 0xA, /* binary: 1010 */
|
||||
NUC_B_4b = 0xB, /* binary: 1011 */
|
||||
NUC_D_4b = 0xC, /* binary: 1100 */
|
||||
NUC_H_4b = 0xD, /* binary: 1101 */
|
||||
NUC_V_4b = 0xE, /* binary: 1110 */
|
||||
NUC_N_4b = 0xF, /* binary: 1111 */
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* @brief Checks if there are only 'atgcATGC' characters in a
|
||||
* character string.
|
||||
*
|
||||
* @param seq The sequence to check.
|
||||
*
|
||||
* @returns A boolean value indicating if there are only
|
||||
* 'atgcATGC' characters in a character string.
|
||||
*
|
||||
* @since November 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
bool only_ATGC(const char* seq);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Encodes a DNA sequence with each nucleotide coded on 2 bits.
|
||||
*
|
||||
* A or a : 00
|
||||
* C or c : 01
|
||||
* T or t : 10
|
||||
* G or g : 11
|
||||
*
|
||||
* @warning The DNA sequence must contain only 'atgcATGC' characters.
|
||||
*
|
||||
* @param seq The sequence to encode.
|
||||
* @param length The length of the sequence to encode.
|
||||
*
|
||||
* @returns The encoded sequence.
|
||||
*
|
||||
* @since November 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
byte_t* encode_seq_on_2_bits(const char* seq, int32_t length);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Decodes a DNA sequence that is coded with each nucleotide on 2 bits.
|
||||
*
|
||||
* 00 -> a
|
||||
* 01 -> c
|
||||
* 10 -> t
|
||||
* 11 -> g
|
||||
*
|
||||
* @param seq_b The sequence to decode.
|
||||
* @param length_seq The initial length of the sequence before it was encoded.
|
||||
*
|
||||
* @returns The decoded sequence ended with '\0'.
|
||||
*
|
||||
* @since November 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
char* decode_seq_on_2_bits(byte_t* seq_b, int32_t length_seq);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Encodes a DNA sequence with each nucleotide coded on 4 bits.
|
||||
*
|
||||
* A or a : 0001
|
||||
* C or c : 0010
|
||||
* G or g : 0011
|
||||
* T or t : 0100
|
||||
* R or r : 0101
|
||||
* Y or y : 0110
|
||||
* S or s : 0111
|
||||
* W or w : 1000
|
||||
* K or k : 1001
|
||||
* M or m : 1010
|
||||
* B or b : 1011
|
||||
* D or d : 1100
|
||||
* H or h : 1101
|
||||
* V or v : 1110
|
||||
* N or n : 1111
|
||||
*
|
||||
* @warning The DNA sequence must contain only IUPAC characters.
|
||||
*
|
||||
* @param seq The sequence to encode.
|
||||
* @param length The length of the sequence to encode.
|
||||
*
|
||||
* @returns The encoded sequence.
|
||||
*
|
||||
* @since November 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
byte_t* encode_seq_on_4_bits(const char* seq, int32_t length);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Decodes a DNA sequence that is coded with each nucleotide on 4 bits.
|
||||
*
|
||||
* A or a : 0001
|
||||
* C or c : 0010
|
||||
* G or g : 0011
|
||||
* T or t : 0100
|
||||
* R or r : 0101
|
||||
* Y or y : 0110
|
||||
* S or s : 0111
|
||||
* W or w : 1000
|
||||
* K or k : 1001
|
||||
* M or m : 1010
|
||||
* B or b : 1011
|
||||
* D or d : 1100
|
||||
* H or h : 1101
|
||||
* V or v : 1110
|
||||
* N or n : 1111
|
||||
*
|
||||
* @param seq_b The sequence to decode.
|
||||
* @param length_seq The initial length of the sequence before it was encoded.
|
||||
*
|
||||
* @returns The decoded sequence ended with '\0'.
|
||||
*
|
||||
* @since November 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
char* decode_seq_on_4_bits(byte_t* seq_b, int32_t length_seq);
|
||||
|
||||
|
||||
////////// FOR DEBUGGING ///////////
|
||||
|
||||
// little endian
|
||||
void print_bits(void* ptr, int32_t length);
|
||||
|
||||
|
||||
#endif /* ENCODE_H_ */
|
||||
|
64
src/murmurhash2.c
Executable file
64
src/murmurhash2.c
Executable file
@ -0,0 +1,64 @@
|
||||
//-----------------------------------------------------------------------------
|
||||
// MurmurHash2, by Austin Appleby
|
||||
|
||||
// Note - This code makes a few assumptions about how your machine behaves -
|
||||
|
||||
// 1. We can read a 4-byte value from any address without crashing
|
||||
// 2. sizeof(int) == 4
|
||||
|
||||
// And it has a few limitations -
|
||||
|
||||
// 1. It will not work incrementally.
|
||||
// 2. It will not produce the same results on little-endian and big-endian
|
||||
// machines. ->TODO
|
||||
|
||||
unsigned int murmurhash2(const void * key, int len, const unsigned int seed)
|
||||
{
|
||||
// 'm' and 'r' are mixing constants generated offline.
|
||||
// They're not really 'magic', they just happen to work well.
|
||||
|
||||
const unsigned int m = 0x5bd1e995;
|
||||
const int r = 24;
|
||||
|
||||
// Initialize the hash to a 'random' value
|
||||
|
||||
unsigned int h = seed ^ len;
|
||||
|
||||
// Mix 4 bytes at a time into the hash
|
||||
|
||||
const unsigned char * data = (const unsigned char *)key;
|
||||
|
||||
while(len >= 4)
|
||||
{
|
||||
unsigned int k = *(unsigned int *)data;
|
||||
|
||||
k *= m;
|
||||
k ^= k >> r;
|
||||
k *= m;
|
||||
|
||||
h *= m;
|
||||
h ^= k;
|
||||
|
||||
data += 4;
|
||||
len -= 4;
|
||||
}
|
||||
|
||||
// Handle the last few bytes of the input array
|
||||
|
||||
switch(len)
|
||||
{
|
||||
case 3: h ^= data[2] << 16;
|
||||
case 2: h ^= data[1] << 8;
|
||||
case 1: h ^= data[0];
|
||||
h *= m;
|
||||
};
|
||||
|
||||
// Do a few final mixes of the hash to ensure the last few
|
||||
// bytes are well-incorporated.
|
||||
|
||||
h ^= h >> 13;
|
||||
h *= m;
|
||||
h ^= h >> 15;
|
||||
|
||||
return h;
|
||||
}
|
11
src/murmurhash2.h
Executable file
11
src/murmurhash2.h
Executable file
@ -0,0 +1,11 @@
|
||||
|
||||
#ifndef _BLOOM_MURMURHASH2
|
||||
#define _BLOOM_MURMURHASH2
|
||||
|
||||
|
||||
/**
|
||||
* @brief Generates and returns a hash code from a byte array.
|
||||
*/
|
||||
unsigned int murmurhash2(const void * key, int len, const unsigned int seed);
|
||||
|
||||
#endif
|
1176
src/obiarray.c
1176
src/obiarray.c
File diff suppressed because it is too large
Load Diff
285
src/obiarray.h
285
src/obiarray.h
@ -1,285 +0,0 @@
|
||||
/****************************************************************************
|
||||
* OBIDMS array header file *
|
||||
****************************************************************************/
|
||||
|
||||
/**
|
||||
* @file obiarray.h
|
||||
* @author Celine Mercier
|
||||
* @date October 19th 2015
|
||||
* @brief Header file for handling arrays for storing and retrieving byte arrays (i.e. coding for character strings).
|
||||
*/
|
||||
|
||||
|
||||
#ifndef OBIARRAY_H_
|
||||
#define OBIARRAY_H_
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <time.h>
|
||||
#include <sys/types.h>
|
||||
#include <dirent.h>
|
||||
|
||||
#include "obidms.h"
|
||||
#include "obitypes.h"
|
||||
|
||||
|
||||
#define ARRAY_MAX_NAME (2048) /**< The maximum length of an array name.
|
||||
*/
|
||||
#define ARRAY_GROWTH_FACTOR (2) /**< The growth factor when an array is enlarged.
|
||||
*/
|
||||
#define BYTE_ARRAY_HEADER_SIZE (5) /**< The size of the header of a byte array.
|
||||
*/
|
||||
|
||||
|
||||
typedef char byte_t; /**< Defining byte type since data is stored in bits
|
||||
* and char (stored on one byte) is the smallest addressable unit.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief OBIDMS array data header structure.
|
||||
*/
|
||||
typedef struct OBIDMS_array_data_header {
|
||||
int header_size; /**< Size of the header in bytes.
|
||||
*/
|
||||
index_t data_size_used; /**< Size of the data used in bytes.
|
||||
*/
|
||||
index_t data_size_max; /**< Max size of the data in bytes.
|
||||
*/
|
||||
index_t nb_items; /**< Number of items.
|
||||
*/
|
||||
char array_name[ARRAY_MAX_NAME+1]; /**< The array name as a NULL terminated string.
|
||||
*/
|
||||
time_t creation_date; /**< Date of creation of the file.
|
||||
*/
|
||||
} OBIDMS_array_data_header_t, *OBIDMS_array_data_header_p;
|
||||
|
||||
|
||||
/**
|
||||
* @brief OBIDMS array data structure.
|
||||
*/
|
||||
typedef struct OBIDMS_array_data {
|
||||
OBIDMS_array_data_header_p header; /**< A pointer to the header of the array data.
|
||||
*/
|
||||
byte_t* data; /**< A pointer to the beginning of the data.
|
||||
*/
|
||||
} OBIDMS_array_data_t, *OBIDMS_array_data_p;
|
||||
|
||||
|
||||
/**
|
||||
* @brief OBIDMS array header structure.
|
||||
*/
|
||||
typedef struct OBIDMS_array_header {
|
||||
int header_size; /**< Size of the header in bytes.
|
||||
*/
|
||||
size_t array_size; /**< Size of the array in bytes.
|
||||
*/
|
||||
index_t nb_items; /**< Number of items in the array.
|
||||
*/
|
||||
index_t nb_items_max; /**< Maximum number of items in the array before it has to be enlarged.
|
||||
*/
|
||||
char array_name[ARRAY_MAX_NAME+1]; /**< The array name as a NULL terminated string.
|
||||
*/
|
||||
time_t creation_date; /**< Date of creation of the file.
|
||||
*/
|
||||
} OBIDMS_array_header_t, *OBIDMS_array_header_p;
|
||||
|
||||
|
||||
/**
|
||||
* @brief OBIDMS array structure.
|
||||
*/
|
||||
typedef struct OBIDMS_array {
|
||||
OBIDMS_array_header_p header; /**< A pointer to the header of the array.
|
||||
*/
|
||||
index_t* first; /**< A pointer to the beginning of the array itself.
|
||||
*/
|
||||
OBIDMS_array_data_p data; /**< A pointer to the structure containing the data
|
||||
* that the array references.
|
||||
*/
|
||||
DIR* directory; /**< A directory entry usable to
|
||||
* refer and scan the array directory.
|
||||
*/
|
||||
} OBIDMS_array_t, *OBIDMS_array_p;
|
||||
|
||||
|
||||
/**
|
||||
* @brief Checks if an obiarray already exists or not.
|
||||
*
|
||||
* @param dms The OBIDMS to which the obiarray belongs.
|
||||
* @param array_name The name of the obiarray.
|
||||
*
|
||||
* @returns A value indicating whether the obiarray exists or not.
|
||||
* @retval 1 if the obiarray exists.
|
||||
* @retval 0 if the obiarray does not exist.
|
||||
* @retval -1 if an error occurred.
|
||||
*
|
||||
* @since October 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int obi_array_exists(OBIDMS_p dms, const char* array_name);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Opens an obiarray and creates it if it does not already exist.
|
||||
*
|
||||
* Note: An obiarray is made of two files (referred to by two structures).
|
||||
* One file contains the indices referring to the data, and the other
|
||||
* file contains the data itself. The obiarray as a whole is referred
|
||||
* to via the OBIDMS_array structure.
|
||||
*
|
||||
* @param dms The OBIDMS to which the obiarray belongs.
|
||||
* @param array_name The name of the obiarray.
|
||||
*
|
||||
* @returns A pointer to the obiarray structure.
|
||||
* @retval NULL if an error occurred.
|
||||
*
|
||||
* @since October 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
OBIDMS_array_p obi_array(OBIDMS_p dms, const char* array_name);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Creates an obiarray. Fails if it already exists.
|
||||
*
|
||||
* Note: An obiarray is made of two files (referred to by two structures).
|
||||
* One file contains the indices referring to the data, and the other
|
||||
* file contains the data itself. The obiarray as a whole is referred
|
||||
* to via the OBIDMS_array structure.
|
||||
*
|
||||
* @param dms The OBIDMS to which the obiarray belongs.
|
||||
* @param array_name The name of the obiarray.
|
||||
*
|
||||
* @returns A pointer to the newly created obiarray structure.
|
||||
* @retval NULL if an error occurred.
|
||||
*
|
||||
* @since October 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
OBIDMS_array_p obi_create_array(OBIDMS_p dms, const char* array_name);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Opens an obiarray. Fails if it does not already exist.
|
||||
*
|
||||
* Note: An obiarray is made of two files (referred to by two structures).
|
||||
* One file contains the indices referring to the data, and the other
|
||||
* file contains the data itself. The obiarray as a whole is referred
|
||||
* to via the OBIDMS_array structure.
|
||||
*
|
||||
* @param dms The OBIDMS to which the obiarray belongs.
|
||||
* @param array_name The name of the obiarray.
|
||||
*
|
||||
* @returns A pointer to the obiarray structure.
|
||||
* @retval NULL if an error occurred.
|
||||
*
|
||||
* @since October 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
OBIDMS_array_p obi_open_array(OBIDMS_p dms, const char* array_name);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Closes an obiarray.
|
||||
*
|
||||
* Note: An obiarray is made of two files (referred to by two structures).
|
||||
* One file contains the indices referring to the data, and the other
|
||||
* file contains the data itself. The obiarray as a whole is referred
|
||||
* to via the OBIDMS_array structure.
|
||||
*
|
||||
* @param array A pointer to the obiarray structure to close and free.
|
||||
*
|
||||
* @retval 0 if the operation was successfully completed.
|
||||
* @retval -1 if an error occurred.
|
||||
*
|
||||
* @since October 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int obi_close_array(OBIDMS_array_p array);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Adds a value (byte array) in an obiarray, checking first if it is already in it.
|
||||
*
|
||||
* @warning The byte array to add must already be encoded and contain its header.
|
||||
*
|
||||
* @param array A pointer to the obiarray.
|
||||
* @param value The byte array to add in the obiarray.
|
||||
*
|
||||
* @returns The index of the value, whether it was added or already in the obiarray.
|
||||
* @retval -1 if an error occurred.
|
||||
*
|
||||
* @since October 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
index_t obi_array_add(OBIDMS_array_p array, byte_t* value);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Recovers a value (byte array) in an obiarray.
|
||||
*
|
||||
* @warning The byte array recovered is encoded and contains its header.
|
||||
*
|
||||
* @param array A pointer to the obiarray.
|
||||
* @param index The index of the value in the data array.
|
||||
*
|
||||
* @returns A pointer to the byte array recovered.
|
||||
*
|
||||
* @since October 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
byte_t* obi_array_get(OBIDMS_array_p array, index_t index);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Searches a value (byte array) in an obiarray performing a binary search.
|
||||
*
|
||||
* @warning The byte array to search must already be encoded and contain its header.
|
||||
*
|
||||
* @param array A pointer to the obiarray.
|
||||
* @param value The byte array to add in the obiarray.
|
||||
*
|
||||
* @returns If the value is found, its data index is returned.
|
||||
* If the value is not found, the array index indicating where the value's data index
|
||||
* should be in the array is returned in the form (- (index + 1)), as data indices in an
|
||||
* obiarray are sorted according to the ascending order of the values (byte arrays) themselves.
|
||||
*
|
||||
* @since October 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
index_t obi_array_search(OBIDMS_array_p array, byte_t* value);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Converts a character string to a byte array with a header.
|
||||
*
|
||||
* @warning The byte array must be freed by the caller.
|
||||
*
|
||||
* @param value The character string to convert.
|
||||
*
|
||||
* @returns A pointer to the byte array created.
|
||||
* @retval NULL if an error occurred.
|
||||
*
|
||||
* @since October 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
byte_t* obi_str_to_obibytes(char* value);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Converts a byte array to a character string.
|
||||
*
|
||||
* @param value_b The byte array to convert.
|
||||
*
|
||||
* @returns A pointer to the character string contained in the byte array.
|
||||
*
|
||||
* @since October 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
const char* obi_obibytes_to_str(byte_t* value_b);
|
||||
|
||||
|
||||
#endif /* OBIARRAY_H_ */
|
||||
|
2219
src/obiavl.c
Normal file
2219
src/obiavl.c
Normal file
File diff suppressed because it is too large
Load Diff
414
src/obiavl.h
Normal file
414
src/obiavl.h
Normal file
@ -0,0 +1,414 @@
|
||||
/****************************************************************************
|
||||
* OBIDMS AVL tree header file *
|
||||
****************************************************************************/
|
||||
|
||||
/**
|
||||
* @file obiavl.h
|
||||
* @author Celine Mercier
|
||||
* @date December 3rd 2015
|
||||
* @brief Header file for handling AVL trees for storing and retrieving blobs.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef OBIAVL_H_
|
||||
#define OBIAVL_H_
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <time.h>
|
||||
#include <sys/types.h>
|
||||
#include <dirent.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "obidms.h"
|
||||
#include "obiblob.h"
|
||||
#include "obitypes.h"
|
||||
#include "bloom.h"
|
||||
#include "utils.h"
|
||||
#include "encode.h"
|
||||
|
||||
|
||||
#define MAX_NB_OF_AVLS_IN_GROUP (100) /**< The maximum number of AVL trees in a group. // TODO discuss
|
||||
*/
|
||||
#define MAX_NODE_COUNT_PER_AVL (10000000) /**< The maximum number of nodes in an AVL tree.
|
||||
* Only used to decide when to create a new AVL in a group, and to initialize the bloom filter // TODO discuss.
|
||||
*/
|
||||
#define MAX_DATA_SIZE_PER_AVL (1073741824) /**< The maximum size of the data referred to by an AVL tree in a group.
|
||||
* Only used to decide when to create a new AVL in a group.
|
||||
* Should not be greater than int32_t max (2,147,483,647), as indexes will have to be stored on 32 bits.
|
||||
* Here 1073741824 B = 1 GB
|
||||
*/
|
||||
#define AVL_MAX_DEPTH (1024) /**< The maximum depth of an AVL tree. Used to save paths through the tree.
|
||||
*/
|
||||
#define AVL_MAX_NAME (1024) /**< The maximum length of an AVL tree name.
|
||||
*/
|
||||
#define AVL_GROWTH_FACTOR (2) /**< The growth factor when an AVL tree is enlarged.
|
||||
*/
|
||||
#define LEFT_CHILD(node) (avl->tree)+(node->left_child) /**< Pointer to the left child of a node in an AVL tree.
|
||||
*/
|
||||
#define RIGHT_CHILD(node) (avl->tree)+(node->right_child) /**< Pointer to the right child of a node in an AVL tree.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief AVL tree node structure.
|
||||
*/
|
||||
typedef struct AVL_node {
|
||||
index_t left_child; /**< Index of left less child node.
|
||||
*/
|
||||
index_t right_child; /**< Index of right greater child node.
|
||||
*/
|
||||
int8_t balance_factor; /**< Balance factor of the node.
|
||||
*/
|
||||
index_t value; /**< Index of the value associated with the node in the data array.
|
||||
*/
|
||||
uint64_t crc64; /**< Cyclic Redundancy Check code on 64 bits associated with the value.
|
||||
*/
|
||||
} AVL_node_t, *AVL_node_p;
|
||||
|
||||
|
||||
/**
|
||||
* @brief OBIDMS AVL tree data header structure.
|
||||
*/
|
||||
typedef struct OBIDMS_avl_data_header {
|
||||
int header_size; /**< Size of the header in bytes.
|
||||
*/
|
||||
index_t data_size_used; /**< Size of the data used in bytes.
|
||||
*/
|
||||
index_t data_size_max; /**< Max size of the data in bytes.
|
||||
*/
|
||||
index_t nb_items; /**< Number of items.
|
||||
*/
|
||||
char avl_name[AVL_MAX_NAME+1]; /**< The AVL tree name as a NULL terminated string.
|
||||
*/
|
||||
time_t creation_date; /**< Date of creation of the file.
|
||||
*/
|
||||
} OBIDMS_avl_data_header_t, *OBIDMS_avl_data_header_p;
|
||||
|
||||
|
||||
/**
|
||||
* @brief OBIDMS AVL tree data structure.
|
||||
*/
|
||||
typedef struct OBIDMS_avl_data {
|
||||
OBIDMS_avl_data_header_p header; /**< A pointer to the header of the AVL tree data.
|
||||
*/
|
||||
byte_t* data; /**< A pointer to the beginning of the data.
|
||||
*/
|
||||
int data_fd; /**< File descriptor of the file containing the data.
|
||||
*/
|
||||
} OBIDMS_avl_data_t, *OBIDMS_avl_data_p;
|
||||
|
||||
|
||||
/**
|
||||
* @brief OBIDMS AVL tree header structure.
|
||||
*/
|
||||
typedef struct OBIDMS_avl_header {
|
||||
int header_size; /**< Size of the header in bytes.
|
||||
*/
|
||||
size_t avl_size; /**< Size of the AVL tree in bytes.
|
||||
*/
|
||||
index_t nb_items; /**< Number of items in the AVL tree.
|
||||
*/
|
||||
index_t nb_items_max; /**< Maximum number of items in the AVL tree before it has to be enlarged.
|
||||
*/
|
||||
index_t root_idx; /**< Index of the root of the AVL tree.
|
||||
*/
|
||||
char avl_name[AVL_MAX_NAME+1]; /**< The AVL tree name as a NULL terminated string.
|
||||
*/
|
||||
time_t creation_date; /**< Date of creation of the file.
|
||||
*/
|
||||
bloom_t bloom_filter; /**< Bloom filter associated with the AVL tree, enabling to know if a value
|
||||
* might already be stored in the data referred to by the tree.
|
||||
*/
|
||||
} OBIDMS_avl_header_t, *OBIDMS_avl_header_p;
|
||||
|
||||
|
||||
/**
|
||||
* @brief OBIDMS AVL tree structure.
|
||||
*/
|
||||
typedef struct OBIDMS_avl {
|
||||
OBIDMS_p dms; /**< A pointer to the OBIDMS structure to which the AVL tree belongs.
|
||||
*/
|
||||
OBIDMS_avl_header_p header; /**< A pointer to the header of the AVL tree.
|
||||
*/
|
||||
struct AVL_node* tree; /**< A pointer to the root of the AVL tree.
|
||||
*/
|
||||
index_t path_idx[AVL_MAX_DEPTH]; /**< The path taken to a node from the root as an array of node indices.
|
||||
*/
|
||||
int8_t path_dir[AVL_MAX_DEPTH]; /**< The path taken to a node from the root as an array of directions
|
||||
* (0 for left, -1 for right).
|
||||
*/
|
||||
OBIDMS_avl_data_p data; /**< A pointer to the structure containing the data
|
||||
* that the AVL tree references.
|
||||
*/
|
||||
DIR* directory; /**< A directory entry usable to
|
||||
* refer and scan the AVL tree directory.
|
||||
*/
|
||||
int dir_fd; /**< The file descriptor of the directory entry
|
||||
* usable to refer and scan the AVL tree directory.
|
||||
*/
|
||||
int avl_fd; /**< The file descriptor of the file containing the AVL tree.
|
||||
*/
|
||||
} OBIDMS_avl_t, *OBIDMS_avl_p;
|
||||
|
||||
|
||||
/**
|
||||
* @brief OBIDMS AVL tree group structure.
|
||||
*/
|
||||
typedef struct OBIDMS_avl_group {
|
||||
OBIDMS_avl_p sub_avls[MAX_NB_OF_AVLS_IN_GROUP]; /**< Array containing the pointers to the AVL trees of the group.
|
||||
*/
|
||||
int current_avl_idx; /**< Index in the sub_avls array of the AVL tree currently being filled.
|
||||
*/
|
||||
char name[AVL_MAX_NAME+1]; /**< Base name of the AVL group. The AVL trees in it have names of the form basename_idx.
|
||||
*/
|
||||
OBIDMS_p dms; /**< Pointer to the OBIDMS structure to which the AVL group belongs.
|
||||
*/
|
||||
bool writable; /**< Indicates whether the AVL group is read-only or not.
|
||||
*/
|
||||
size_t counter; /**< Indicates by how many threads/programs (TODO) the AVL group is used.
|
||||
*/
|
||||
} OBIDMS_avl_group_t, *OBIDMS_avl_group_p;
|
||||
|
||||
|
||||
/**
|
||||
* @brief Checks if an AVL tree or AVL tree group already exists or not.
|
||||
*
|
||||
* @param dms The OBIDMS to which the AVL tree or AVL tree group belongs.
|
||||
* @param avl_name The name of the AVL treeor the base name of the AVL tree group.
|
||||
*
|
||||
* @returns A value indicating whether the AVL tree or AVL tree group exists or not.
|
||||
* @retval 1 if the AVL tree or AVL tree group exists.
|
||||
* @retval 0 if the AVL tree or AVL tree group does not exist.
|
||||
* @retval -1 if an error occurred.
|
||||
*
|
||||
* @since December 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int obi_avl_exists(OBIDMS_p dms, const char* avl_name);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Creates an AVL tree. Fails if it already exists.
|
||||
*
|
||||
* Note: An AVL tree is made of two files (referred to by two structures).
|
||||
* One file contains the tree referring to the data, and the other
|
||||
* file contains the data itself. The AVL tree as a whole is referred
|
||||
* to via the OBIDMS_avl structure. An AVL tree is stored in a directory
|
||||
* with the same name, or with the base name of the AVL group if it is
|
||||
* part of an AVL group.
|
||||
*
|
||||
* @param dms The OBIDMS to which the AVL tree belongs.
|
||||
* @param avl_name The name of the AVL tree.
|
||||
* @param avl_idx The index of the AVL tree if it is part of an AVL group,
|
||||
* or -1 if it is not part of an AVL group.
|
||||
*
|
||||
* @returns A pointer to the newly created AVL tree structure.
|
||||
* @retval NULL if an error occurred.
|
||||
*
|
||||
* @since December 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
OBIDMS_avl_p obi_create_avl(OBIDMS_p dms, const char* avl_name, int avl_idx);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Opens an AVL tree in read-only mode. Fails if it does not already exist.
|
||||
*
|
||||
* Note: An AVL tree is made of two files (referred to by two structures).
|
||||
* One file contains the tree referring to the data, and the other
|
||||
* file contains the data itself. The AVL tree as a whole is referred
|
||||
* to via the OBIDMS_avl structure.
|
||||
*
|
||||
* @param dms The OBIDMS to which the AVL tree belongs.
|
||||
* @param avl_name The name of the AVL tree.
|
||||
* @param avl_idx The index of the AVL tree if it is part of an AVL group,
|
||||
* or -1 if it is not part of an AVL group.
|
||||
*
|
||||
* @returns A pointer to the AVL tree structure.
|
||||
* @retval NULL if an error occurred.
|
||||
*
|
||||
* @since December 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
OBIDMS_avl_p obi_open_avl(OBIDMS_p dms, const char* avl_name, int avl_idx);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Opens an AVL tree group and creates it if it does not already exist.
|
||||
*
|
||||
* Note: An AVL tree group is composed of multiple AVL trees that all have the
|
||||
* same base name, and an index differentiating them.
|
||||
*
|
||||
* @param dms The OBIDMS to which the AVL tree belongs.
|
||||
* @param avl_name The base name of the AVL tree group.
|
||||
*
|
||||
* @returns A pointer to the AVL tree group structure.
|
||||
* @retval NULL if an error occurred.
|
||||
*
|
||||
* @since April 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
OBIDMS_avl_group_p obi_avl_group(OBIDMS_p dms, const char* avl_name);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Creates an AVL tree group.
|
||||
*
|
||||
* Note: An AVL tree group is composed of multiple AVL trees that all have the
|
||||
* same base name, and an index differentiating them.
|
||||
*
|
||||
* @param dms The OBIDMS to which the AVL tree belongs.
|
||||
* @param avl_name The base name of the AVL tree group.
|
||||
*
|
||||
* @returns A pointer to the AVL tree group structure.
|
||||
* @retval NULL if an error occurred.
|
||||
*
|
||||
* @since April 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
OBIDMS_avl_group_p obi_create_avl_group(OBIDMS_p dms, const char* avl_name);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Opens an AVL tree group in read-only mode.
|
||||
*
|
||||
* Note: An AVL tree group is composed of multiple AVL trees that all have the
|
||||
* same base name, and an index differentiating them.
|
||||
*
|
||||
* @param dms The OBIDMS to which the AVL tree belongs.
|
||||
* @param avl_name The base name of the AVL tree group.
|
||||
*
|
||||
* @returns A pointer to the AVL tree group structure.
|
||||
* @retval NULL if an error occurred.
|
||||
*
|
||||
* @since April 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
OBIDMS_avl_group_p obi_open_avl_group(OBIDMS_p dms, const char* avl_name);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Closes an AVL tree.
|
||||
*
|
||||
* @param avl A pointer to the AVL tree structure to close and free.
|
||||
*
|
||||
* @retval 0 if the operation was successfully completed.
|
||||
* @retval -1 if an error occurred.
|
||||
*
|
||||
* @since December 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int obi_close_avl(OBIDMS_avl_p avl);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Closes an AVL tree group.
|
||||
*
|
||||
* @param avl_group A pointer to the AVL tree group structure to close and free.
|
||||
*
|
||||
* @retval 0 if the operation was successfully completed.
|
||||
* @retval -1 if an error occurred.
|
||||
*
|
||||
* @since April 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int obi_close_avl_group(OBIDMS_avl_group_p avl_group);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Recovers a value (blob) in an AVL tree.
|
||||
*
|
||||
* @warning The blob recovered must be decoded to get the original value.
|
||||
* @warning The blob recovered is mapped in memory.
|
||||
*
|
||||
* @param avl A pointer to the AVL tree.
|
||||
* @param index The index of the value in the data array.
|
||||
*
|
||||
* @returns A pointer to the blob recovered.
|
||||
*
|
||||
* @since December 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
Obi_blob_p obi_avl_get(OBIDMS_avl_p avl, index_t index);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Adds a value (blob) in an AVL tree.
|
||||
*
|
||||
* @warning The value given must be already be encoded into a blob structure (Obi_blob_t).
|
||||
* @warning If the value is already in the AVL tree, an error will be triggered. // TODO to discuss
|
||||
*
|
||||
* @param avl A pointer to the AVL tree.
|
||||
* @param value The blob to add in the AVL tree.
|
||||
*
|
||||
* @returns The index of the value newly added in the AVL tree.
|
||||
* @retval -1 if an error occurred.
|
||||
*
|
||||
* @since December 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
index_t obi_avl_add(OBIDMS_avl_p avl, Obi_blob_p value);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Finds a value (blob) in an AVL tree.
|
||||
*
|
||||
* @warning The value given must be already be encoded into a blob structure (Obi_blob_t).
|
||||
*
|
||||
* @param avl A pointer to the AVL tree.
|
||||
* @param value The blob to add in the AVL tree.
|
||||
*
|
||||
* @returns The data index of the value.
|
||||
* @retval -1 if the value is not in the tree.
|
||||
*
|
||||
* @since December 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
index_t obi_avl_find(OBIDMS_avl_p avl, Obi_blob_p value);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Recovers a value (blob) in an AVL tree.
|
||||
*
|
||||
* @warning The blob recovered must be decoded to get the original value.
|
||||
* @warning The blob recovered is mapped in memory.
|
||||
*
|
||||
* @param avl_group A pointer to the AVL tree.
|
||||
* @param index The index of the value in the form of a 64-bit integer
|
||||
* with the 32 left-most bits coding for the index of the tree of
|
||||
* the group in which the value is stored, and the 32 right-most bits
|
||||
* coding for the index at which the value is stored in that AVL tree.
|
||||
*
|
||||
* @returns A pointer to the blob recovered.
|
||||
*
|
||||
* @since April 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
Obi_blob_p obi_avl_group_get(OBIDMS_avl_group_p avl_group, index_t idx);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Adds a value (blob) in an AVL tree group, checking if it is already in it.
|
||||
*
|
||||
* @warning The value given must be already be encoded into a blob structure (Obi_blob_t).
|
||||
*
|
||||
* @param avl_group A pointer to the AVL tree group.
|
||||
* @param value The blob to add in the AVL tree group.
|
||||
*
|
||||
* @returns The index of the value newly added in the AVL tree group, in the form of a
|
||||
* 64-bit integer with the 32 left-most bits coding for the index of the tree
|
||||
* of the group in which the value is stored, and the 32 right-most bits
|
||||
* coding for the index at which the value is stored in that AVL tree.
|
||||
* @retval -1 if an error occurred.
|
||||
*
|
||||
* @since April 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
index_t obi_avl_group_add(OBIDMS_avl_group_p avl_group, Obi_blob_p value);
|
||||
|
||||
|
||||
#endif /* OBIAVL_H_ */
|
||||
|
91
src/obiblob.c
Normal file
91
src/obiblob.c
Normal file
@ -0,0 +1,91 @@
|
||||
/****************************************************************************
|
||||
* Obiblob functions *
|
||||
****************************************************************************/
|
||||
|
||||
/**
|
||||
* @file obiblob.c
|
||||
* @author Celine Mercier
|
||||
* @date April 11th 2016
|
||||
* @brief Functions handling Obiblob structures.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "obiblob.h"
|
||||
#include "obierrno.h"
|
||||
#include "obitypes.h" // For byte_t type
|
||||
#include "obidebug.h"
|
||||
|
||||
|
||||
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
|
||||
|
||||
|
||||
// TODO: endianness problem?
|
||||
|
||||
|
||||
Obi_blob_p obi_blob(byte_t* encoded_value, uint8_t element_size, int32_t length_encoded_value, int32_t length_decoded_value)
|
||||
{
|
||||
Obi_blob_p blob;
|
||||
|
||||
// Allocate the memory for the blob structure
|
||||
blob = (Obi_blob_p) malloc(sizeof(Obi_blob_t) + length_encoded_value);
|
||||
if (blob == NULL)
|
||||
{
|
||||
obi_set_errno(OBI_MALLOC_ERROR);
|
||||
obidebug(1, "\nError allocating memory for a blob");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Store the number of bits on which each element is encoded
|
||||
blob->element_size = element_size;
|
||||
|
||||
// Store the length (in bytes) of the encoded value
|
||||
blob->length_encoded_value = length_encoded_value;
|
||||
|
||||
// Store the initial length (in bytes) of the decoded value
|
||||
blob->length_decoded_value = length_decoded_value;
|
||||
|
||||
// Store the encoded value
|
||||
memcpy(blob->value, encoded_value, length_encoded_value);
|
||||
|
||||
return blob;
|
||||
}
|
||||
|
||||
|
||||
int obi_blob_compare(Obi_blob_p value_1, Obi_blob_p value_2)
|
||||
{
|
||||
int comp;
|
||||
int32_t b;
|
||||
|
||||
if (value_1->element_size != value_2->element_size)
|
||||
return (value_1->element_size - value_2->element_size);
|
||||
|
||||
if (value_1->length_encoded_value != value_2->length_encoded_value)
|
||||
return (value_1->length_encoded_value - value_2->length_encoded_value);
|
||||
|
||||
if (value_1->element_size != ELEMENT_SIZE_STR) // because if so, length_decoded_value == length_encoded_value
|
||||
{
|
||||
if (value_1->length_decoded_value != value_2->length_decoded_value)
|
||||
return (value_1->length_decoded_value - value_2->length_decoded_value);
|
||||
}
|
||||
|
||||
b = 0;
|
||||
comp = 0;
|
||||
while (!comp && (b < value_1->length_encoded_value))
|
||||
{
|
||||
comp = *((value_1->value)+b) - *((value_2->value)+b);
|
||||
b++;
|
||||
}
|
||||
return comp;
|
||||
}
|
||||
|
||||
|
||||
int obi_blob_sizeof(Obi_blob_p value)
|
||||
{
|
||||
return (sizeof(Obi_blob_t) + (value->length_encoded_value));
|
||||
}
|
||||
|
98
src/obiblob.h
Normal file
98
src/obiblob.h
Normal file
@ -0,0 +1,98 @@
|
||||
/****************************************************************************
|
||||
* Obiblob header file *
|
||||
****************************************************************************/
|
||||
|
||||
/**
|
||||
* @file obiblob.h
|
||||
* @author Celine Mercier
|
||||
* @date November 18th 2015
|
||||
* @brief Header file for handling Obi_blob structures.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef OBIBLOB_H_
|
||||
#define OBIBLOB_H_
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "obitypes.h"
|
||||
|
||||
|
||||
#define ELEMENT_SIZE_STR (8) /**< The size of an element from a value of type character string.
|
||||
*/
|
||||
#define ELEMENT_SIZE_SEQ_2 (2) /**< The size of an element from a value of type DNA sequence encoded on 2 bits.
|
||||
*/
|
||||
#define ELEMENT_SIZE_SEQ_4 (4) /**< The size of an element from a value of type DNA sequence encoded on 4 bits.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Blob structure, for handling encoded values.
|
||||
*/
|
||||
typedef struct Obi_blob {
|
||||
uint8_t element_size; /**< Size in bits of one element from the encoded value.
|
||||
*/
|
||||
int32_t length_encoded_value; /**< Length in bytes of the encoded value.
|
||||
*/
|
||||
int32_t length_decoded_value; /**< Length in bytes of the decoded value.
|
||||
*/
|
||||
byte_t value[]; /**< Encoded value.
|
||||
*/
|
||||
} Obi_blob_t, *Obi_blob_p;
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @brief Function building a blob structure.
|
||||
*
|
||||
* @param encoded_value A pointer to the encoded value that will be stored in the blob.
|
||||
* @param element_size The size in bits of one element from the encoded value.
|
||||
* @param length_encoded_value The length in bytes of the encoded value.
|
||||
* @param length_decoded_value The length in bytes of the decoded value.
|
||||
*
|
||||
* @returns A pointer to the created blob structure.
|
||||
* @retval NULL if an error occurred.
|
||||
*
|
||||
* @since April 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
Obi_blob_p obi_blob(byte_t* encoded_value, uint8_t element_size, int32_t length_encoded_value, int32_t length_decoded_value);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Function comparing two blobs.
|
||||
*
|
||||
* The encoding is compared first, then the length of the
|
||||
* values, then the values themselves.
|
||||
*
|
||||
* @param value_1 A pointer to the first blob structure.
|
||||
* @param value_2 A pointer to the second blob structure.
|
||||
*
|
||||
* @returns A value < 0 if value_1 < value_2,
|
||||
* a value > 0 if value_1 > value_2,
|
||||
* and 0 if value_1 == value_2.
|
||||
*
|
||||
* @since October 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int obi_blob_compare(Obi_blob_p value_1, Obi_blob_p value_2);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Function calculating the size in bytes of a blob.
|
||||
*
|
||||
* @param value A pointer to the blob structure.
|
||||
*
|
||||
* @returns The size of the blob in bytes.
|
||||
*
|
||||
* @since October 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int obi_blob_sizeof(Obi_blob_p value);
|
||||
|
||||
|
||||
#endif /* OBIBLOB_H_ */
|
||||
|
57
src/obiblob_indexer.c
Normal file
57
src/obiblob_indexer.c
Normal file
@ -0,0 +1,57 @@
|
||||
/****************************************************************************
|
||||
* Obiblob functions *
|
||||
****************************************************************************/
|
||||
|
||||
/**
|
||||
* @file obiblob_indexer.c
|
||||
* @author Celine Mercier
|
||||
* @date April 12th 2016
|
||||
* @brief Functions handling the indexing and retrieval of blob structures.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "obiblob_indexer.h"
|
||||
#include "obidms.h"
|
||||
#include "obierrno.h"
|
||||
#include "obidebug.h"
|
||||
|
||||
|
||||
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
|
||||
|
||||
|
||||
//inline int obi_indexer_exists(OBIDMS_p dms, const char* name);
|
||||
|
||||
//inline Obi_indexer_p obi_indexer(OBIDMS_p dms, const char* name);
|
||||
|
||||
//inline Obi_indexer_p obi_create_indexer(OBIDMS_p dms, const char* name);
|
||||
|
||||
//inline Obi_indexer_p obi_open_indexer(OBIDMS_p dms, const char* name);
|
||||
|
||||
//inline int obi_close_indexer(Obi_indexer_p indexer);
|
||||
|
||||
//inline index_t obi_indexer_add(Obi_indexer_p indexer, Obi_blob_p value);
|
||||
|
||||
//inline Obi_blob_p obi_indexer_get(Obi_indexer_p indexer, index_t idx);
|
||||
|
||||
|
||||
char* obi_build_indexer_name(const char* column_name, obiversion_t column_version)
|
||||
{
|
||||
char* indexer_name;
|
||||
|
||||
indexer_name = (char*) malloc(INDEXER_MAX_NAME * sizeof(char));
|
||||
if (indexer_name == NULL)
|
||||
{
|
||||
obi_set_errno(OBI_MALLOC_ERROR);
|
||||
obidebug(1, "\nError allocating the memory for an indexer name");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
strcpy(indexer_name, column_name);
|
||||
sprintf(indexer_name+strlen(column_name), "_%d_indexer", column_version);
|
||||
|
||||
return indexer_name;
|
||||
}
|
||||
|
159
src/obiblob_indexer.h
Normal file
159
src/obiblob_indexer.h
Normal file
@ -0,0 +1,159 @@
|
||||
/****************************************************************************
|
||||
* Blob indexer header file *
|
||||
****************************************************************************/
|
||||
|
||||
/**
|
||||
* @file obiblob_indexer.h
|
||||
* @author Celine Mercier
|
||||
* @date April 12th 2016
|
||||
* @brief Header file for the functions handling the indexing of values.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef OBIBLOB_INDEXER_H_
|
||||
#define OBIBLOB_INDEXER_H_
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "obidms.h"
|
||||
#include "obiavl.h"
|
||||
#include "obitypes.h"
|
||||
#include "obiblob.h"
|
||||
|
||||
|
||||
#define INDEXER_MAX_NAME AVL_MAX_NAME /**< Macro to refer to the maximum size of the name of an indexer structure.
|
||||
*/
|
||||
|
||||
|
||||
typedef struct OBIDMS_avl_group Obi_indexer; /**< Typedef to refer to the used indexer structure.
|
||||
*/
|
||||
typedef OBIDMS_avl_group_p Obi_indexer_p; /**< Typedef to refer to the pointer of the used indexer structure.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Checks if an indexer already exists or not.
|
||||
*
|
||||
* @param dms The OBIDMS to which the indexer belongs.
|
||||
* @param name The name of the indexer.
|
||||
*
|
||||
* @returns A value indicating whether the indexer exists or not.
|
||||
* @retval 1 if the indexer exists.
|
||||
* @retval 0 if the indexer does not exist.
|
||||
* @retval -1 if an error occurred.
|
||||
*
|
||||
* @since April 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
#define obi_indexer_exists obi_avl_exists
|
||||
|
||||
|
||||
/**
|
||||
* @brief Opens an indexer and creates it if it does not already exist.
|
||||
*
|
||||
* @param dms The OBIDMS to which the indexer belongs.
|
||||
* @param name The name of the indexer.
|
||||
*
|
||||
* @returns A pointer to the indexer structure.
|
||||
* @retval NULL if an error occurred.
|
||||
*
|
||||
* @since April 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
#define obi_indexer obi_avl_group
|
||||
|
||||
|
||||
/**
|
||||
* @brief Creates an indexer.
|
||||
*
|
||||
* @param dms The OBIDMS to which the indexer belongs.
|
||||
* @param name The name of the indexer.
|
||||
*
|
||||
* @returns A pointer to the AVL tree group structure.
|
||||
* @retval NULL if an error occurred.
|
||||
*
|
||||
* @since April 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
#define obi_create_indexer obi_create_avl_group
|
||||
|
||||
|
||||
/**
|
||||
* @brief Opens an indexer.
|
||||
*
|
||||
* @param dms The OBIDMS to which the indexer belongs.
|
||||
* @param name The name of the indexer.
|
||||
*
|
||||
* @returns A pointer to the indexer structure.
|
||||
* @retval NULL if an error occurred.
|
||||
*
|
||||
* @since April 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
#define obi_open_indexer obi_open_avl_group
|
||||
|
||||
|
||||
/**
|
||||
* @brief Closes an indexer.
|
||||
*
|
||||
* @param indexer A pointer to the indexer structure to close and free.
|
||||
*
|
||||
* @retval 0 if the operation was successfully completed.
|
||||
* @retval -1 if an error occurred.
|
||||
*
|
||||
* @since April 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
#define obi_close_indexer obi_close_avl_group
|
||||
|
||||
|
||||
/**
|
||||
* @brief Indexes a blob in an indexer and returns the index referring to the blob.
|
||||
*
|
||||
* @param indexer A pointer to the indexer.
|
||||
* @param value The blob to index.
|
||||
*
|
||||
* @returns The index of the blob newly added in the indexer.
|
||||
* @retval -1 if an error occurred.
|
||||
*
|
||||
* @since April 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
#define obi_indexer_add obi_avl_group_add
|
||||
|
||||
|
||||
/**
|
||||
* @brief Recovers a blob from an indexer.
|
||||
*
|
||||
* @param indexer A pointer to the indexer.
|
||||
* @param index The index of the blob in the indexer.
|
||||
*
|
||||
* @returns A pointer to the blob recovered.
|
||||
*
|
||||
* @since April 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
#define obi_indexer_get obi_avl_group_get
|
||||
|
||||
|
||||
/**
|
||||
* @brief Builds an indexer name in the form columnname_columnversion_indexer.
|
||||
*
|
||||
* @warning The returned pointer has to be freed by the caller.
|
||||
*
|
||||
* @param column_name The name of the column associated with the indexer.
|
||||
* @param column_version The version of the column associated with the indexer.
|
||||
*
|
||||
* @returns A pointer on the indexer name built.
|
||||
* @retval NULL if an error occurred.
|
||||
*
|
||||
* @since April 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
char* obi_build_indexer_name(const char* column_name, obiversion_t column_version);
|
||||
|
||||
|
||||
#endif /* OBIBLOB_INDEXER_H_ */
|
||||
|
448
src/obidms.c
448
src/obidms.c
@ -13,6 +13,7 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/types.h>
|
||||
@ -22,7 +23,9 @@
|
||||
#include "obierrno.h"
|
||||
#include "obidebug.h"
|
||||
#include "obidmscolumn.h"
|
||||
#include "private_at_functions.h"
|
||||
#include "obiblob_indexer.h"
|
||||
#include "utils.h"
|
||||
#include "obilittlebigman.h"
|
||||
|
||||
|
||||
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
|
||||
@ -54,6 +57,44 @@
|
||||
static char* build_directory_name(const char* dms_name);
|
||||
|
||||
|
||||
/**
|
||||
* Internal function building the informations file name from an OBIDMS name.
|
||||
*
|
||||
* The function builds the file name for the informations file of an OBIDMS.
|
||||
*
|
||||
* @warning The returned pointer has to be freed by the caller.
|
||||
*
|
||||
* @param dms_name The name of the OBIDMS.
|
||||
*
|
||||
* @returns A pointer to the file name.
|
||||
* @retval NULL if an error occurred.
|
||||
*
|
||||
* @since November 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
static char* build_infos_file_name(const char* dms_name);
|
||||
|
||||
|
||||
/**
|
||||
* Internal function creating the file containing basic informations on the OBIDMS.
|
||||
*
|
||||
* This file contains:
|
||||
* - The endianness of the platform
|
||||
*
|
||||
* @warning The returned pointer has to be freed by the caller.
|
||||
*
|
||||
* @param dms_file_descriptor The file descriptor for the OBIDMS directory.
|
||||
* @param dms_name The name of the OBIDMS.
|
||||
*
|
||||
* @retval 0 if the operation was successfully completed.
|
||||
* @retval -1 if an error occurred.
|
||||
*
|
||||
* @since November 2015
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int create_dms_infos_file(int dms_file_descriptor, const char* dms_name);
|
||||
|
||||
|
||||
/************************************************************************
|
||||
*
|
||||
* D E F I N I T I O N O F T H E P R I V A T E F U N C T I O N S
|
||||
@ -65,7 +106,8 @@ static char* build_directory_name(const char* dms_name)
|
||||
char* directory_name;
|
||||
|
||||
// Build the database directory name
|
||||
if (asprintf(&directory_name, "%s.obidms", dms_name) < 0)
|
||||
directory_name = (char*) malloc((strlen(dms_name) + 8)*sizeof(char));
|
||||
if (sprintf(directory_name, "%s.obidms", dms_name) < 0)
|
||||
{
|
||||
obi_set_errno(OBIDMS_MEMORY_ERROR);
|
||||
obidebug(1, "\nProblem building an OBIDMS directory name");
|
||||
@ -85,6 +127,75 @@ static char* build_directory_name(const char* dms_name)
|
||||
}
|
||||
|
||||
|
||||
static char* build_infos_file_name(const char* dms_name)
|
||||
{
|
||||
char* file_name;
|
||||
|
||||
// Build file name
|
||||
file_name = (char*) malloc((strlen(dms_name) + 7)*sizeof(char));
|
||||
if (sprintf(file_name, "%s_infos", dms_name) < 0)
|
||||
{
|
||||
obi_set_errno(OBIDMS_MEMORY_ERROR);
|
||||
obidebug(1, "\nProblem building an informations file name");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return file_name;
|
||||
}
|
||||
|
||||
|
||||
int create_dms_infos_file(int dms_file_descriptor, const char* dms_name)
|
||||
{
|
||||
char* file_name;
|
||||
int infos_file_descriptor;
|
||||
off_t file_size;
|
||||
bool little_endian;
|
||||
|
||||
file_size = sizeof(bool);
|
||||
|
||||
// Create file name
|
||||
file_name = build_infos_file_name(dms_name);
|
||||
if (file_name == NULL)
|
||||
return -1;
|
||||
|
||||
// Create file
|
||||
infos_file_descriptor = openat(dms_file_descriptor, file_name, O_RDWR | O_CREAT | O_EXCL, 0777);
|
||||
if (infos_file_descriptor < 0)
|
||||
{
|
||||
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
|
||||
obidebug(1, "\nError creating an informations file");
|
||||
free(file_name);
|
||||
return -1;
|
||||
}
|
||||
|
||||
free(file_name);
|
||||
|
||||
// Truncate the infos file to the right size
|
||||
if (ftruncate(infos_file_descriptor, file_size) < 0)
|
||||
{
|
||||
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
|
||||
obidebug(1, "\nError truncating an informations file");
|
||||
close(infos_file_descriptor);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Write endianness
|
||||
little_endian = obi_is_little_endian();
|
||||
if (write(infos_file_descriptor, &little_endian, sizeof(bool)) < ((ssize_t) sizeof(bool)))
|
||||
{
|
||||
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
|
||||
obidebug(1, "\nError writing the endianness in an informations file");
|
||||
close(infos_file_descriptor);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Close file
|
||||
close(infos_file_descriptor);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
*
|
||||
* D E F I N I T I O N O F T H E P U B L I C F U N C T I O N S
|
||||
@ -94,8 +205,8 @@ static char* build_directory_name(const char* dms_name)
|
||||
int obi_dms_exists(const char* dms_name)
|
||||
{
|
||||
struct stat buffer;
|
||||
char *directory_name;
|
||||
int check_dir;
|
||||
char* directory_name;
|
||||
int check_dir;
|
||||
|
||||
// Build and check the directory name
|
||||
directory_name = build_directory_name(dms_name);
|
||||
@ -116,8 +227,8 @@ int obi_dms_exists(const char* dms_name)
|
||||
OBIDMS_p obi_create_dms(const char* dms_name)
|
||||
{
|
||||
char* directory_name;
|
||||
DIR* dms_dir;
|
||||
int dms_file_descriptor;
|
||||
DIR* dms_dir;
|
||||
int dms_file_descriptor;
|
||||
|
||||
// Build and check the directory name
|
||||
directory_name = build_directory_name(dms_name);
|
||||
@ -128,7 +239,10 @@ OBIDMS_p obi_create_dms(const char* dms_name)
|
||||
if (mkdir(directory_name, 00777) < 0)
|
||||
{
|
||||
if (errno == EEXIST)
|
||||
{
|
||||
obi_set_errno(OBIDMS_EXIST_ERROR);
|
||||
obidebug(1, "\nAn OBIDMS directory with the same name already exists in this directory.");
|
||||
}
|
||||
else
|
||||
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
|
||||
obidebug(1, "\nProblem creating an OBIDMS directory");
|
||||
@ -136,7 +250,7 @@ OBIDMS_p obi_create_dms(const char* dms_name)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Get file descriptor of DMS directory to create the arrays directory
|
||||
// Get file descriptor of DMS directory to create the indexer directory
|
||||
dms_dir = opendir(directory_name);
|
||||
if (dms_dir == NULL)
|
||||
{
|
||||
@ -145,25 +259,28 @@ OBIDMS_p obi_create_dms(const char* dms_name)
|
||||
free(directory_name);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
free(directory_name);
|
||||
|
||||
dms_file_descriptor = dirfd(dms_dir);
|
||||
if (dms_file_descriptor < 0)
|
||||
{
|
||||
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
|
||||
obidebug(1, "\nProblem getting the file descriptor of a newly created OBIDMS directory");
|
||||
free(directory_name);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Create the arrays directory
|
||||
if (mkdirat(dms_file_descriptor, ARRAY_DIR_NAME, 00777) < 0)
|
||||
// Create the indexer directory
|
||||
if (mkdirat(dms_file_descriptor, INDEXER_DIR_NAME, 00777) < 0)
|
||||
{
|
||||
obi_set_errno(OBI_ARRAY_ERROR);
|
||||
obidebug(1, "\nProblem creating an arrays directory");
|
||||
free(directory_name);
|
||||
obi_set_errno(OBI_INDEXER_ERROR);
|
||||
obidebug(1, "\nProblem creating an indexer directory");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
free(directory_name);
|
||||
// Create the informations file
|
||||
if (create_dms_infos_file(dms_file_descriptor, dms_name) < 0)
|
||||
return NULL;
|
||||
|
||||
return obi_open_dms(dms_name);
|
||||
}
|
||||
@ -173,19 +290,36 @@ OBIDMS_p obi_open_dms(const char* dms_name)
|
||||
{
|
||||
OBIDMS_p dms;
|
||||
char* directory_name;
|
||||
DIR* directory;
|
||||
int dms_file_descriptor;
|
||||
char* infos_file_name;
|
||||
int infos_file_descriptor;
|
||||
bool little_endian_dms;
|
||||
bool little_endian_platform;
|
||||
|
||||
dms = NULL;
|
||||
|
||||
// Allocate the data structure
|
||||
dms = (OBIDMS_p) malloc(sizeof(OBIDMS_t));
|
||||
if (dms == NULL)
|
||||
{
|
||||
obi_set_errno(OBIDMS_MEMORY_ERROR);
|
||||
obidebug(1, "\nError allocating the memory for the OBIDMS structure");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Build and check the directory name
|
||||
directory_name = build_directory_name(dms_name);
|
||||
if (directory_name == NULL)
|
||||
{
|
||||
free(dms);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
strncpy(dms->directory_name, directory_name, OBIDMS_MAX_NAME);
|
||||
free(directory_name);
|
||||
|
||||
// Try to open the directory
|
||||
directory = opendir(directory_name);
|
||||
if (directory == NULL)
|
||||
dms->directory = opendir(dms->directory_name);
|
||||
if (dms->directory == NULL)
|
||||
{
|
||||
switch (errno)
|
||||
{
|
||||
@ -205,45 +339,90 @@ OBIDMS_p obi_open_dms(const char* dms_name)
|
||||
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
|
||||
}
|
||||
obidebug(1, "\nCan't open OBIDMS directory");
|
||||
free(directory_name);
|
||||
free(dms);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Allocate the data structure
|
||||
dms = (OBIDMS_p) malloc(sizeof(OBIDMS_t));
|
||||
if (dms == NULL)
|
||||
{
|
||||
obi_set_errno(OBIDMS_MEMORY_ERROR);
|
||||
obidebug(1, "\nError allocating the memory for the OBIDMS structure");
|
||||
free(directory_name);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Initialize the data structure
|
||||
strcpy(dms->directory_name, directory_name);
|
||||
dms->directory = directory;
|
||||
|
||||
// Get file descriptor of DMS directory to open the arrays directory
|
||||
dms_file_descriptor = dirfd(directory);
|
||||
if (dms_file_descriptor < 0)
|
||||
// Get and store file descriptor of DMS directory to open the informations file
|
||||
dms->dir_fd = dirfd(dms->directory);
|
||||
if (dms->dir_fd < 0)
|
||||
{
|
||||
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
|
||||
obidebug(1, "\nError getting the file descriptor for a newly created OBIDMS directory");
|
||||
free(directory_name);
|
||||
closedir(dms->directory);
|
||||
free(dms);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Open the arrays directory
|
||||
dms->array_directory = private_opendirat(dms_file_descriptor, ARRAY_DIR_NAME);
|
||||
if (dms->array_directory == NULL)
|
||||
// Open informations file to check endianness
|
||||
infos_file_name = build_infos_file_name(dms_name);
|
||||
infos_file_descriptor = openat(dms->dir_fd, infos_file_name, O_RDONLY, 0777);
|
||||
if (infos_file_descriptor < 0)
|
||||
{
|
||||
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
|
||||
obidebug(1, "\nError opening the arrays directory");
|
||||
free(directory_name);
|
||||
obidebug(1, "\nError opening an informations file");
|
||||
closedir(dms->directory);
|
||||
free(dms);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
free(directory_name);
|
||||
free(infos_file_name);
|
||||
|
||||
// Check endianness of the platform and DMS
|
||||
little_endian_platform = obi_is_little_endian();
|
||||
if (read(infos_file_descriptor, &little_endian_dms, sizeof(bool)) < ((ssize_t) sizeof(bool)))
|
||||
{
|
||||
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
|
||||
obidebug(1, "\nError reading the endianness in an informations file");
|
||||
close(infos_file_descriptor);
|
||||
closedir(dms->directory);
|
||||
free(dms);
|
||||
return NULL;
|
||||
}
|
||||
if (little_endian_platform != little_endian_dms)
|
||||
{
|
||||
obi_set_errno(OBIDMS_BAD_ENDIAN_ERROR);
|
||||
obidebug(1, "\nError: The DMS and the platform have different endianness");
|
||||
close(infos_file_descriptor);
|
||||
closedir(dms->directory);
|
||||
free(dms);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
close(infos_file_descriptor);
|
||||
|
||||
dms->little_endian = little_endian_dms;
|
||||
|
||||
// Open the indexer directory
|
||||
dms->indexer_directory = opendir_in_dms(dms, INDEXER_DIR_NAME);
|
||||
if (dms->indexer_directory == NULL)
|
||||
{
|
||||
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
|
||||
obidebug(1, "\nError opening the indexer directory");
|
||||
closedir(dms->directory);
|
||||
free(dms);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Store the indexer directory's file descriptor
|
||||
dms->indexer_dir_fd = dirfd(dms->indexer_directory);
|
||||
if (dms->indexer_dir_fd < 0)
|
||||
{
|
||||
obi_set_errno(OBIDMS_UNKNOWN_ERROR);
|
||||
obidebug(1, "\nError getting the file descriptor of the indexer directory");
|
||||
closedir(dms->indexer_directory);
|
||||
closedir(dms->directory);
|
||||
free(dms);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Initialize the list of opened columns
|
||||
dms->opened_columns = (Opened_columns_list_p) malloc(sizeof(Opened_columns_list_t));
|
||||
(dms->opened_columns)->nb_opened_columns = 0;
|
||||
|
||||
// Initialize the list of opened indexers
|
||||
dms->opened_indexers = (Opened_indexers_list_p) malloc(sizeof(Opened_indexers_list_t));
|
||||
(dms->opened_indexers)->nb_opened_indexers = 0;
|
||||
|
||||
return dms;
|
||||
}
|
||||
@ -272,6 +451,11 @@ int obi_close_dms(OBIDMS_p dms)
|
||||
{
|
||||
if (dms != NULL)
|
||||
{
|
||||
// Close all columns
|
||||
while ((dms->opened_columns)->nb_opened_columns > 0)
|
||||
obi_close_column(*((dms->opened_columns)->columns));
|
||||
|
||||
// Close dms and indexer directories
|
||||
if (closedir(dms->directory) < 0)
|
||||
{
|
||||
obi_set_errno(OBIDMS_MEMORY_ERROR);
|
||||
@ -279,10 +463,10 @@ int obi_close_dms(OBIDMS_p dms)
|
||||
free(dms);
|
||||
return -1;
|
||||
}
|
||||
if (closedir(dms->array_directory) < 0)
|
||||
if (closedir(dms->indexer_directory) < 0)
|
||||
{
|
||||
obi_set_errno(OBI_ARRAY_ERROR);
|
||||
obidebug(1, "\nError closing an array directory");
|
||||
obi_set_errno(OBI_INDEXER_ERROR);
|
||||
obidebug(1, "\nError closing an indexer directory");
|
||||
free(dms);
|
||||
return -1;
|
||||
}
|
||||
@ -292,3 +476,175 @@ int obi_close_dms(OBIDMS_p dms)
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int obi_dms_is_column_name_in_list(OBIDMS_p dms, const char* column_name)
|
||||
{
|
||||
int i;
|
||||
Opened_columns_list_p columns_list;
|
||||
|
||||
columns_list = dms->opened_columns;
|
||||
|
||||
for (i=0; i < (columns_list->nb_opened_columns); i++)
|
||||
{
|
||||
if (!strcmp(((*((columns_list->columns)+i))->header)->name, column_name))
|
||||
{ // Found it
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
OBIDMS_column_p obi_dms_get_column_from_list(OBIDMS_p dms, const char* column_name, obiversion_t version)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i < ((dms->opened_columns)->nb_opened_columns); i++)
|
||||
{
|
||||
if (!strcmp(((*(((dms->opened_columns)->columns)+i))->header)->name, column_name)
|
||||
&& (((*(((dms->opened_columns)->columns)+i))->header)->version == version))
|
||||
{ // Found the column already opened, return it
|
||||
return *(((dms->opened_columns)->columns)+i);
|
||||
}
|
||||
}
|
||||
// Didn't find the column
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
void obi_dms_list_column(OBIDMS_p dms, OBIDMS_column_p column) // TODO add check if column already in list?
|
||||
{
|
||||
*(((dms->opened_columns)->columns)+((dms->opened_columns)->nb_opened_columns)) = column;
|
||||
((dms->opened_columns)->nb_opened_columns)++;
|
||||
}
|
||||
|
||||
|
||||
int obi_dms_unlist_column(OBIDMS_p dms, OBIDMS_column_p column)
|
||||
{
|
||||
int i;
|
||||
Opened_columns_list_p columns_list;
|
||||
|
||||
columns_list = dms->opened_columns;
|
||||
|
||||
for (i=0; i < columns_list->nb_opened_columns; i++)
|
||||
{
|
||||
if (!strcmp(((*((columns_list->columns)+i))->header)->name, (column->header)->name)
|
||||
&& (((*((columns_list->columns)+i))->header)->version == (column->header)->version))
|
||||
{ // Found the column. Rearrange list
|
||||
(columns_list->nb_opened_columns)--;
|
||||
(columns_list->columns)[i] = (columns_list->columns)[columns_list->nb_opened_columns];
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
obidebug(1, "\nCould not find the column to delete from list of open columns");
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
Obi_indexer_p obi_dms_get_indexer_from_list(OBIDMS_p dms, const char* indexer_name)
|
||||
{
|
||||
int i;
|
||||
Opened_indexers_list_p indexers_list;
|
||||
|
||||
indexers_list = dms->opened_indexers;
|
||||
|
||||
for (i=0; i < (indexers_list->nb_opened_indexers); i++)
|
||||
{
|
||||
if (!strcmp(((indexers_list->indexers)[i])->name, indexer_name)) // TODO get_name function indexer
|
||||
{ // Found the indexer already opened, return it
|
||||
return (indexers_list->indexers)[i];
|
||||
}
|
||||
}
|
||||
// Didn't find the indexer
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
void obi_dms_list_indexer(OBIDMS_p dms, Obi_indexer_p indexer)
|
||||
{
|
||||
*(((dms->opened_indexers)->indexers)+((dms->opened_indexers)->nb_opened_indexers)) = indexer;
|
||||
((dms->opened_indexers)->nb_opened_indexers)++;
|
||||
}
|
||||
|
||||
|
||||
int obi_dms_unlist_indexer(OBIDMS_p dms, Obi_indexer_p indexer)
|
||||
{
|
||||
int i;
|
||||
Opened_indexers_list_p indexers_list;
|
||||
|
||||
indexers_list = dms->opened_indexers;
|
||||
|
||||
for (i=0; i < indexers_list->nb_opened_indexers; i++)
|
||||
{
|
||||
if (!strcmp(((indexers_list->indexers)[i])->name, indexer->name)) // TODO get_name function indexer
|
||||
{ // Found the indexer. Rearrange list
|
||||
(indexers_list->nb_opened_indexers)--;
|
||||
(indexers_list->indexers)[i] = (indexers_list->indexers)[indexers_list->nb_opened_indexers];
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
obidebug(1, "\nCould not find the indexer to delete from list of open indexers");
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
char* obi_dms_get_dms_path(OBIDMS_p dms)
|
||||
{
|
||||
char* full_path;
|
||||
|
||||
full_path = (char*) malloc((MAX_PATH_LEN)*sizeof(char));
|
||||
if (full_path == NULL)
|
||||
{
|
||||
obi_set_errno(OBI_MALLOC_ERROR);
|
||||
obidebug(1, "\nError allocating memory for the char* path to a file or directory");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (getcwd(full_path, MAX_PATH_LEN) == NULL) // TODO store when opening
|
||||
{
|
||||
obi_set_errno(OBI_UTILS_ERROR);
|
||||
obidebug(1, "\nError getting the path to a file or directory");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
strcat(full_path, "/");
|
||||
strcat(full_path, dms->directory_name);
|
||||
|
||||
return full_path;
|
||||
}
|
||||
|
||||
|
||||
char* obi_dms_get_full_path(OBIDMS_p dms, const char* path_name)
|
||||
{
|
||||
char* full_path;
|
||||
|
||||
full_path = obi_dms_get_dms_path(dms);
|
||||
strcat(full_path, "/");
|
||||
strcat(full_path, path_name);
|
||||
|
||||
return full_path;
|
||||
}
|
||||
|
||||
|
||||
DIR* opendir_in_dms(OBIDMS_p dms, const char* path_name)
|
||||
{
|
||||
char* full_path;
|
||||
DIR* directory;
|
||||
|
||||
full_path = obi_dms_get_full_path(dms, path_name);
|
||||
if (full_path == NULL)
|
||||
return NULL;
|
||||
|
||||
directory = opendir(full_path);
|
||||
if (directory == NULL)
|
||||
{
|
||||
obi_set_errno(OBI_UTILS_ERROR);
|
||||
obidebug(1, "\nError opening a directory");
|
||||
}
|
||||
|
||||
free(full_path);
|
||||
|
||||
return directory;
|
||||
}
|
||||
|
220
src/obidms.h
220
src/obidms.h
@ -20,14 +20,55 @@
|
||||
#include <dirent.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "obierrno.h"
|
||||
#include "obitypes.h"
|
||||
|
||||
|
||||
#define OBIDMS_MAX_NAME (2048) /**< The maximum length of an OBIDMS name.
|
||||
*/
|
||||
#define ARRAY_DIR_NAME "arrays" /**< The name of the arrays directory.
|
||||
*/
|
||||
#define OBIDMS_MAX_NAME (2048) /**< The maximum length of an OBIDMS name.
|
||||
*/
|
||||
#define INDEXER_DIR_NAME "OBIBLOB_INDEXERS" /**< The name of the Obiblob indexer directory.
|
||||
*/
|
||||
#define TAXONOMY_DIR_NAME "TAXONOMY" /**< The name of the taxonomy directory.
|
||||
*/
|
||||
#define MAX_NB_OPENED_COLUMNS (100) /**< The maximum number of columns open at the same time.
|
||||
*/
|
||||
#define MAX_NB_OPENED_INDEXERS (1000) /**< The maximum number of indexers open at the same time.
|
||||
*/
|
||||
#define MAX_PATH_LEN 4096 /**< Maximum length for the character string defining a
|
||||
* file or directory path.
|
||||
*/
|
||||
|
||||
|
||||
struct OBIDMS_column; /**< Declarations to avoid circular dependencies. */
|
||||
typedef struct OBIDMS_column* OBIDMS_column_p; /**< Declarations to avoid circular dependencies. */
|
||||
|
||||
/**
|
||||
* @brief Structure listing the columns opened in a DMS, identified by their name and version number.
|
||||
*/
|
||||
typedef struct Opened_columns_list {
|
||||
int nb_opened_columns; /**< Number of opened columns.
|
||||
*/
|
||||
OBIDMS_column_p columns[MAX_NB_OPENED_COLUMNS]; /**< Array of pointers on the opened columns.
|
||||
*/
|
||||
} Opened_columns_list_t, *Opened_columns_list_p;
|
||||
|
||||
|
||||
// TODO Need to find a way to not refer to AVLs specifically
|
||||
struct OBIDMS_avl_group; /**< Declarations to avoid circular dependencies. */
|
||||
typedef struct OBIDMS_avl_group* OBIDMS_avl_group_p; /**< Declarations to avoid circular dependencies. */
|
||||
typedef OBIDMS_avl_group_p Obi_indexer_p; /**< Declarations to avoid circular dependencies. */
|
||||
|
||||
/**
|
||||
* @brief Structure listing the indexers opened in a DMS, identified by their name.
|
||||
*/
|
||||
typedef struct Opened_indexers_list {
|
||||
int nb_opened_indexers; /**< Number of opened indexers.
|
||||
*/
|
||||
Obi_indexer_p indexers[MAX_NB_OPENED_INDEXERS]; /**< Array of pointers on the opened indexers.
|
||||
*/
|
||||
} Opened_indexers_list_t, *Opened_indexers_list_p;
|
||||
|
||||
|
||||
/**
|
||||
@ -37,15 +78,27 @@
|
||||
* and opening of an OBITools Data Management System (DMS)
|
||||
*/
|
||||
typedef struct OBIDMS {
|
||||
char directory_name[OBIDMS_MAX_NAME+1]; /**< The name of the directory
|
||||
* containing the DMS.
|
||||
*/
|
||||
DIR* directory; /**< A directory entry usable to
|
||||
* refer and scan the database directory.
|
||||
*/
|
||||
DIR* array_directory; /**< A directory entry usable to
|
||||
* refer and scan the array directory.
|
||||
*/
|
||||
char directory_name[OBIDMS_MAX_NAME+1]; /**< The name of the directory
|
||||
* containing the DMS.
|
||||
*/
|
||||
DIR* directory; /**< A directory entry usable to
|
||||
* refer and scan the database directory.
|
||||
*/
|
||||
int dir_fd; /**< The file descriptor of the directory entry
|
||||
* usable to refer and scan the database directory.
|
||||
*/
|
||||
DIR* indexer_directory; /**< A directory entry usable to
|
||||
* refer and scan the indexer directory.
|
||||
*/
|
||||
int indexer_dir_fd; /**< The file descriptor of the directory entry
|
||||
* usable to refer and scan the indexer directory.
|
||||
*/
|
||||
bool little_endian; /**< Endianness of the database.
|
||||
*/
|
||||
Opened_columns_list_p opened_columns; /**< List of opened columns.
|
||||
*/
|
||||
Opened_indexers_list_p opened_indexers; /**< List of opened indexers.
|
||||
*/
|
||||
} OBIDMS_t, *OBIDMS_p;
|
||||
|
||||
|
||||
@ -54,7 +107,7 @@ typedef struct OBIDMS {
|
||||
*
|
||||
* @param dms_name A pointer to a C string containing the name of the database.
|
||||
*
|
||||
* @returns An integer value indicating the status of the database
|
||||
* @returns An integer value indicating the status of the database.
|
||||
* @retval 1 if the database exists.
|
||||
* @retval 0 if the database does not exist.
|
||||
* @retval -1 if an error occurred.
|
||||
@ -72,7 +125,7 @@ int obi_dms_exists(const char* dms_name);
|
||||
* if a directory with this name does not already exist
|
||||
* before creating the new database.
|
||||
*
|
||||
* A directory to store obiarrays is also created.
|
||||
* A directory to store Obiblob indexers is also created.
|
||||
*
|
||||
* @param dms_name A pointer to a C string containing the name of the database.
|
||||
* The actual directory name used to store the DMS will be
|
||||
@ -139,4 +192,141 @@ OBIDMS_p obi_dms(const char* dms_name);
|
||||
int obi_close_dms(OBIDMS_p dms);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Checks if a column with a given name is in the list of opened columns.
|
||||
*
|
||||
* @warning: Checking only the name means that the column can be of any version.
|
||||
*
|
||||
* @param dms The OBIDMS.
|
||||
* @param column_name The column name that should be looked for.
|
||||
*
|
||||
* @returns An integer value indicating whether there is at least one column with that name
|
||||
* in the list of opened columns.
|
||||
* @retval 0 if there is at least one column with that name in the list of opened columns.
|
||||
* @retval 1 if there is no column with that name in the list of opened columns.
|
||||
*
|
||||
* @since April 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int obi_dms_is_column_name_in_list(OBIDMS_p dms, const char* column_name);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Returns a column identified by its name and its version number from the list of opened columns.
|
||||
*
|
||||
* @param dms The OBIDMS.
|
||||
* @param column_name The column name that should be looked for.
|
||||
* @param version The version number of the column that should be looked for.
|
||||
*
|
||||
* @returns A pointer on the column if it was found in the list of opened columns.
|
||||
* @retval NULL if the column was not found in the list of opened columns.
|
||||
*
|
||||
* @since April 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
OBIDMS_column_p obi_dms_get_column_from_list(OBIDMS_p dms, const char* column_name, obiversion_t version);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Adds a column identified by its name and its version number in the list of opened columns.
|
||||
*
|
||||
* @param dms The OBIDMS.
|
||||
* @param column A pointer on the column that should be added in the list of opened columns.
|
||||
*
|
||||
* @since April 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
void obi_dms_list_column(OBIDMS_p dms, OBIDMS_column_p column);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Removes a column identified by its name and its version number from the list of opened columns.
|
||||
*
|
||||
* @param dms The OBIDMS.
|
||||
* @param column A pointer on the column that should be removed from the list of opened columns.
|
||||
*
|
||||
* @since April 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int obi_dms_unlist_column(OBIDMS_p dms, OBIDMS_column_p column);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Returns an indexer identified by its name from the list of opened indexers.
|
||||
*
|
||||
* @param dms The OBIDMS.
|
||||
* @param indexer_name The indexer name that should be looked for.
|
||||
*
|
||||
* @returns A pointer on the indexer if it was found in the list of opened indexers.
|
||||
* @retval NULL if the indexer was not found in the list of opened indexers.
|
||||
*
|
||||
* @since April 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
Obi_indexer_p obi_dms_get_indexer_from_list(OBIDMS_p dms, const char* indexer_name);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Adds an indexer identified by its name in the list of opened indexers.
|
||||
*
|
||||
* @param dms The OBIDMS.
|
||||
* @param indexer A pointer on the indexer that should be added in the list of opened indexers.
|
||||
*
|
||||
* @since April 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
void obi_dms_list_indexer(OBIDMS_p dms, Obi_indexer_p indexer);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Removes an indexer identified by its name from the list of opened indexers.
|
||||
*
|
||||
* @param dms The OBIDMS.
|
||||
* @param column A pointer on the indexer that should be removed from the list of opened indexers.
|
||||
*
|
||||
* @since April 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
int obi_dms_unlist_indexer(OBIDMS_p dms, Obi_indexer_p indexer);
|
||||
|
||||
|
||||
/**
|
||||
* Function meant to disappear soon
|
||||
*/
|
||||
char* obi_dms_get_path(OBIDMS_p dms);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Gets the full path of a file or a directory from its
|
||||
* path relative to the DMS.
|
||||
*
|
||||
* @warning The returned pointer has to be freed by the caller.
|
||||
*
|
||||
* @param dms The DMS to which path_name is relative.
|
||||
* @param path_name The path name for the file or directory, relative to the DMS.
|
||||
*
|
||||
* @returns A pointer to the full path.
|
||||
* @retval NULL if an error occurred.
|
||||
*
|
||||
* @since April 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
char* obi_dms_get_full_path(OBIDMS_p dms, const char* path_name);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Opens a directory relative to the DMS.
|
||||
*
|
||||
* @param dms The DMS to which path_name is relative.
|
||||
* @param path_name The path name for the directory to be opened, relative to the DMS.
|
||||
*
|
||||
* @returns The directory stream of the opened directory.
|
||||
* @retval NULL if an error occurred.
|
||||
*
|
||||
* @since April 2016
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
*/
|
||||
DIR* opendir_in_dms(OBIDMS_p dms, const char* path_name);
|
||||
|
||||
|
||||
#endif /* OBIDMS_H_ */
|
||||
|
682
src/obidms_taxonomy.c
Normal file
682
src/obidms_taxonomy.c
Normal file
@ -0,0 +1,682 @@
|
||||
/********************************************************************
|
||||
* OBIDMS taxonomy functions *
|
||||
********************************************************************/
|
||||
|
||||
/**
|
||||
* @file obidms_taxonomy.c
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
* @date March 2nd 2016
|
||||
* @brief Functions for reading binary taxonomy files.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#include "obidms_taxonomy.h"
|
||||
#include "obidms.h"
|
||||
#include "obidebug.h"
|
||||
#include "obierrno.h"
|
||||
#include "utils.h"
|
||||
|
||||
|
||||
#define DEBUG_LEVEL 0 // TODO has to be defined somewhere else (cython compil flag?)
|
||||
|
||||
// TODO : the malloc aren't checked but won't exist for long because mapping instead
|
||||
|
||||
|
||||
int compareRankLabel(const void *label1, const void *label2)
|
||||
{
|
||||
return strcmp((const char*)label1,*(const char**)label2);
|
||||
}
|
||||
|
||||
|
||||
int32_t rank_index(const char* label, ecorankidx_t* ranks)
|
||||
{
|
||||
char **rep;
|
||||
|
||||
rep = bsearch(label, ranks->label, ranks->count, sizeof(char*), compareRankLabel);
|
||||
|
||||
if (rep)
|
||||
return rep-ranks->label; // TODO what???
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
void* read_ecorecord(FILE* f, int32_t* record_size)
|
||||
{
|
||||
static void* buffer = NULL;
|
||||
int32_t buffer_size = 0;
|
||||
int32_t read;
|
||||
|
||||
if (!record_size)
|
||||
{
|
||||
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||
obidebug(1, "\nError reading a taxonomy file: record_size can not be NULL");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
read = fread(record_size,
|
||||
1,
|
||||
sizeof(int32_t),
|
||||
f);
|
||||
|
||||
if (feof(f))
|
||||
return NULL;
|
||||
|
||||
if (read != sizeof(int32_t))
|
||||
{
|
||||
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||
obidebug(1, "\nError reading a taxonomy file: error reading record size");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// if (is_big_endian()) // TODO
|
||||
// *recordSize=swap_int32_t(*recordSize);
|
||||
|
||||
if (buffer_size < *record_size)
|
||||
{
|
||||
if (buffer)
|
||||
buffer = realloc(buffer, *record_size);
|
||||
else
|
||||
buffer = malloc(*record_size);
|
||||
if (buffer == NULL)
|
||||
{
|
||||
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||
obidebug(1, "\nError reading a taxonomy file: error allocating memory");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
read = fread(buffer,
|
||||
1,
|
||||
*record_size,
|
||||
f);
|
||||
|
||||
if (read != *record_size)
|
||||
{
|
||||
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||
obidebug(1, "\nError reading a taxonomy file: error reading a record %d, %d", read, *record_size);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return buffer;
|
||||
};
|
||||
|
||||
|
||||
ecotx_t* readnext_ecotaxon(FILE* f, ecotx_t* taxon)
|
||||
{
|
||||
ecotxformat_t* raw;
|
||||
int32_t record_length;
|
||||
|
||||
raw = read_ecorecord(f, &record_length);
|
||||
|
||||
if (!raw)
|
||||
return NULL;
|
||||
|
||||
// if (is_big_endian()) // TODO
|
||||
// {
|
||||
// raw->namelength = swap_int32_t(raw->namelength);
|
||||
// raw->parent = swap_int32_t(raw->parent);
|
||||
// raw->rank = swap_int32_t(raw->rank);
|
||||
// raw->taxid = swap_int32_t(raw->taxid);
|
||||
// }
|
||||
|
||||
taxon->parent = (ecotx_t*) ((size_t) raw->parent);
|
||||
taxon->taxid = raw->taxid;
|
||||
taxon->rank = raw->rank;
|
||||
taxon->farest = -1;
|
||||
|
||||
taxon->name = malloc((raw->name_length+1) * sizeof(char));
|
||||
|
||||
strncpy(taxon->name, raw->name, raw->name_length);
|
||||
|
||||
return taxon;
|
||||
}
|
||||
|
||||
|
||||
FILE* open_ecorecorddb(const char* file_name,
|
||||
int32_t* count,
|
||||
int32_t abort_on_open_error)
|
||||
{
|
||||
FILE* f;
|
||||
int32_t read;
|
||||
|
||||
fprintf(stderr, "\n%s\n", file_name);
|
||||
|
||||
f = fopen(file_name, "rb");
|
||||
|
||||
if (!f)
|
||||
{
|
||||
if (abort_on_open_error)
|
||||
{
|
||||
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||
obidebug(1, "\nCouldn't open a taxonomy file");
|
||||
return NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
*count = 0;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
read = fread(count,
|
||||
1,
|
||||
sizeof(int32_t),
|
||||
f);
|
||||
|
||||
if (read != sizeof(int32_t))
|
||||
{
|
||||
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||
obidebug(1, "\nError reading taxonomy record size");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// if (!obi_is_little_endian()) // TODO
|
||||
// *count = swap_int32_t(*count);
|
||||
|
||||
return f;
|
||||
}
|
||||
|
||||
|
||||
ecorankidx_t* read_rankidx(const char* ranks_file_name)
|
||||
{
|
||||
int32_t count;
|
||||
FILE* ranks_file;
|
||||
ecorankidx_t* ranks_index;
|
||||
int32_t i;
|
||||
int32_t rank_length;
|
||||
char* buffer;
|
||||
|
||||
ranks_file = open_ecorecorddb(ranks_file_name, &count, 0);
|
||||
|
||||
if (ranks_file==NULL)
|
||||
return NULL;
|
||||
|
||||
ranks_index = (ecorankidx_t*) malloc(sizeof(ecorankidx_t) + sizeof(char*) * (count-1));
|
||||
|
||||
ranks_index->count = count;
|
||||
|
||||
for (i=0; i < count; i++)
|
||||
{
|
||||
buffer = read_ecorecord(ranks_file, &rank_length);
|
||||
ranks_index->label[i] = (char*) malloc(rank_length+1);
|
||||
strncpy(ranks_index->label[i], buffer, rank_length);
|
||||
}
|
||||
|
||||
return ranks_index;
|
||||
}
|
||||
|
||||
|
||||
ecotxidx_t* read_taxonomyidx(const char* taxa_file_name, const char* local_taxa_file_name)
|
||||
{
|
||||
int32_t count_taxa;
|
||||
int32_t count_local_taxa;
|
||||
FILE* f_taxa;
|
||||
FILE* f_local_taxa;
|
||||
ecotxidx_t* taxa_index;
|
||||
struct ecotxnode* t;
|
||||
int32_t i;
|
||||
int32_t j;
|
||||
|
||||
f_taxa = open_ecorecorddb(taxa_file_name, &count_taxa,0);
|
||||
|
||||
if (f_taxa == NULL)
|
||||
{
|
||||
obidebug(1, "\nError reading taxonomy taxa file");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
f_local_taxa = open_ecorecorddb(local_taxa_file_name, &count_local_taxa, 0);
|
||||
|
||||
taxa_index = (ecotxidx_t*) malloc(sizeof(ecotxidx_t) + sizeof(ecotx_t) * (count_taxa + count_local_taxa - 1));
|
||||
|
||||
taxa_index->count = count_taxa + count_local_taxa;
|
||||
taxa_index->buffer_size = taxa_index->count;
|
||||
|
||||
taxa_index->max_taxid = 0;
|
||||
printf("Reading %d taxa...\n", count_taxa);
|
||||
for (i=0; i<count_taxa; i++)
|
||||
{
|
||||
readnext_ecotaxon(f_taxa, &(taxa_index->taxon[i]));
|
||||
taxa_index->taxon[i].parent = taxa_index->taxon + (size_t) taxa_index->taxon[i].parent;
|
||||
taxa_index->taxon[i].parent->farest = 0;
|
||||
if (taxa_index->taxon[i].taxid > taxa_index->max_taxid)
|
||||
taxa_index->max_taxid = taxa_index->taxon[i].taxid;
|
||||
}
|
||||
|
||||
if (count_local_taxa > 0)
|
||||
printf("Reading %d local taxa...\n", count_local_taxa);
|
||||
else
|
||||
printf("No local taxa\n");
|
||||
|
||||
count_taxa = taxa_index->count;
|
||||
|
||||
for (; i < count_taxa; i++){
|
||||
readnext_ecotaxon(f_local_taxa, &(taxa_index->taxon[i]));
|
||||
taxa_index->taxon[i].parent = taxa_index->taxon + (size_t) taxa_index->taxon[i].parent;
|
||||
taxa_index->taxon[i].parent->farest=0;
|
||||
if (taxa_index->taxon[i].taxid > taxa_index->max_taxid)
|
||||
taxa_index->max_taxid = taxa_index->taxon[i].taxid;
|
||||
}
|
||||
|
||||
printf("Computing longest branches...\n");
|
||||
|
||||
for (i=0; i < count_taxa; i++)
|
||||
{
|
||||
t = taxa_index->taxon+i;
|
||||
if (t->farest == -1)
|
||||
{
|
||||
t->farest=0;
|
||||
while (t->parent != t)
|
||||
{
|
||||
j = t->farest + 1;
|
||||
if (j > t->parent->farest)
|
||||
{
|
||||
t->parent->farest = j;
|
||||
t=t->parent;
|
||||
}
|
||||
else
|
||||
t = taxa_index->taxon;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return taxa_index;
|
||||
}
|
||||
|
||||
|
||||
econame_t* readnext_econame(FILE* f, econame_t* name, OBIDMS_taxonomy_p taxonomy)
|
||||
{
|
||||
|
||||
econameformat_t* raw;
|
||||
int32_t record_length;
|
||||
|
||||
raw = read_ecorecord(f, &record_length);
|
||||
|
||||
if (!raw)
|
||||
return NULL;
|
||||
|
||||
// if (is_big_endian()) // TODO
|
||||
// {
|
||||
// raw->is_scientificname = swap_int32_t(raw->is_scientificname);
|
||||
// raw->namelength = swap_int32_t(raw->namelength);
|
||||
// raw->classlength = swap_int32_t(raw->classlength);
|
||||
// raw->taxid = swap_int32_t(raw->taxid);
|
||||
// }
|
||||
|
||||
name->is_scientific_name = raw->is_scientific_name;
|
||||
|
||||
name->name = malloc((raw->name_length + 1) * sizeof(char));
|
||||
strncpy(name->name, raw->names, raw->name_length);
|
||||
name->name[raw->name_length] = 0;
|
||||
|
||||
name->class_name = malloc((raw->class_length+1) * sizeof(char));
|
||||
strncpy(name->class_name,(raw->names + raw->name_length), raw->class_length);
|
||||
name->class_name[raw->class_length] = 0;
|
||||
|
||||
name->taxon = taxonomy->taxa->taxon + raw->taxid;
|
||||
|
||||
return name;
|
||||
}
|
||||
|
||||
|
||||
econameidx_t* read_nameidx(const char *file_name, OBIDMS_taxonomy_p taxonomy)
|
||||
{
|
||||
int32_t count;
|
||||
FILE* f;
|
||||
econameidx_t* index_names;
|
||||
int32_t i;
|
||||
|
||||
f = open_ecorecorddb(file_name, &count, 0);
|
||||
|
||||
if (f == NULL)
|
||||
return NULL;
|
||||
|
||||
index_names = (econameidx_t*) malloc(sizeof(econameidx_t) + sizeof(econame_t) * (count-1));
|
||||
|
||||
index_names->count = count;
|
||||
|
||||
for (i=0; i < count; i++)
|
||||
readnext_econame(f, (index_names->names)+i, taxonomy);
|
||||
|
||||
return index_names;
|
||||
}
|
||||
|
||||
|
||||
static int bcomptaxon (const void* ptaxid, const void* ptaxon)
|
||||
{
|
||||
ecotx_t* current_taxon = (ecotx_t*) ptaxon;
|
||||
int32_t taxid = (int32_t) ((size_t) ptaxid);
|
||||
return taxid - current_taxon->taxid;
|
||||
}
|
||||
|
||||
|
||||
/////// PUBLIC /////////
|
||||
|
||||
|
||||
OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, bool read_alternative_names)
|
||||
{
|
||||
OBIDMS_taxonomy_p tax;
|
||||
char* main_taxonomy_dir_path;
|
||||
char* taxonomy_path;
|
||||
char* ranks_file_name;
|
||||
char* taxa_file_name;
|
||||
char* local_taxa_file_name;
|
||||
char* alter_names_file_name;
|
||||
int buffer_size;
|
||||
|
||||
tax = (OBIDMS_taxonomy_p) malloc(sizeof(OBIDMS_taxonomy_t));
|
||||
|
||||
tax->ranks = NULL;
|
||||
tax->taxa = NULL;
|
||||
tax->names = NULL;
|
||||
|
||||
buffer_size = 2048; // TODO
|
||||
|
||||
main_taxonomy_dir_path = obi_dms_get_full_path(dms, TAXONOMY_DIR_NAME);
|
||||
taxonomy_path = (char*) malloc((strlen(main_taxonomy_dir_path) + strlen(taxonomy_name) + strlen(taxonomy_name) + 3)*sizeof(char));
|
||||
if (sprintf(taxonomy_path, "%s/%s/%s", main_taxonomy_dir_path, taxonomy_name, taxonomy_name) < 0)
|
||||
{
|
||||
free(main_taxonomy_dir_path);
|
||||
obi_close_taxonomy(tax);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
free(main_taxonomy_dir_path);
|
||||
|
||||
// Read ranks
|
||||
ranks_file_name = (char*) malloc(buffer_size*sizeof(char));
|
||||
if (ranks_file_name == NULL)
|
||||
{
|
||||
free(taxonomy_path);
|
||||
obi_close_taxonomy(tax);
|
||||
return NULL;
|
||||
}
|
||||
if (snprintf(ranks_file_name, buffer_size, "%s.rdx", taxonomy_path) < 0)
|
||||
{
|
||||
free(taxonomy_path);
|
||||
free(ranks_file_name);
|
||||
obi_close_taxonomy(tax);
|
||||
return NULL;
|
||||
}
|
||||
tax->ranks = read_rankidx(ranks_file_name);
|
||||
if (tax->ranks == NULL)
|
||||
{
|
||||
free(ranks_file_name);
|
||||
obi_close_taxonomy(tax);
|
||||
return NULL;
|
||||
}
|
||||
free(ranks_file_name);
|
||||
|
||||
// Read taxa
|
||||
taxa_file_name = (char*) malloc(buffer_size*sizeof(char));
|
||||
if (taxa_file_name == NULL)
|
||||
{
|
||||
free(taxonomy_path);
|
||||
obi_close_taxonomy(tax);
|
||||
return NULL;
|
||||
}
|
||||
if (snprintf(taxa_file_name, buffer_size,"%s.tdx", taxonomy_path) < 0)
|
||||
{
|
||||
free(taxonomy_path);
|
||||
free(taxa_file_name);
|
||||
obi_close_taxonomy(tax);
|
||||
return NULL;
|
||||
}
|
||||
local_taxa_file_name = (char*) malloc(buffer_size*sizeof(char));
|
||||
if (local_taxa_file_name == NULL)
|
||||
{
|
||||
free(taxonomy_path);
|
||||
free(taxa_file_name);
|
||||
obi_close_taxonomy(tax);
|
||||
return NULL;
|
||||
}
|
||||
if (snprintf(local_taxa_file_name, buffer_size,"%s.ldx", taxonomy_path) < 0)
|
||||
{
|
||||
free(taxonomy_path);
|
||||
free(taxa_file_name);
|
||||
free(local_taxa_file_name);
|
||||
obi_close_taxonomy(tax);
|
||||
return NULL;
|
||||
}
|
||||
tax->taxa = read_taxonomyidx(taxa_file_name, local_taxa_file_name);
|
||||
if (tax->taxa == NULL)
|
||||
{
|
||||
free(taxonomy_path);
|
||||
free(taxa_file_name);
|
||||
free(local_taxa_file_name);
|
||||
obi_close_taxonomy(tax);
|
||||
return NULL;
|
||||
}
|
||||
free(taxa_file_name);
|
||||
free(local_taxa_file_name);
|
||||
|
||||
// Read alternative names
|
||||
if (read_alternative_names)
|
||||
{
|
||||
alter_names_file_name = (char*) malloc(buffer_size*sizeof(char));
|
||||
if (alter_names_file_name == NULL)
|
||||
{
|
||||
free(taxonomy_path);
|
||||
obi_close_taxonomy(tax);
|
||||
return NULL;
|
||||
}
|
||||
if (snprintf(alter_names_file_name, buffer_size,"%s.ndx", taxonomy_path) < 0)
|
||||
{
|
||||
free(taxonomy_path);
|
||||
free(alter_names_file_name);
|
||||
obi_close_taxonomy(tax);
|
||||
return NULL;
|
||||
}
|
||||
tax->names = read_nameidx(alter_names_file_name, tax);
|
||||
if (tax->names == NULL)
|
||||
{
|
||||
free(alter_names_file_name);
|
||||
obi_close_taxonomy(tax);
|
||||
return NULL;
|
||||
}
|
||||
free(alter_names_file_name);
|
||||
}
|
||||
|
||||
free(taxonomy_path);
|
||||
|
||||
return tax;
|
||||
}
|
||||
|
||||
|
||||
int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy)
|
||||
{
|
||||
if (taxonomy)
|
||||
{
|
||||
if (taxonomy->ranks)
|
||||
free(taxonomy->ranks); // TODO those don't free everything but mapping will replace anyway
|
||||
|
||||
if (taxonomy->names)
|
||||
free(taxonomy->names);
|
||||
|
||||
if (taxonomy->taxa)
|
||||
free(taxonomy->taxa);
|
||||
|
||||
free(taxonomy);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// TODO no closing files?
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
ecotx_t* obi_taxo_get_parent_at_rank(ecotx_t* taxon, int32_t rankidx)
|
||||
{
|
||||
ecotx_t* current_taxon;
|
||||
ecotx_t* next_taxon;
|
||||
|
||||
current_taxon = taxon;
|
||||
next_taxon = current_taxon->parent;
|
||||
|
||||
while ((current_taxon != next_taxon) && // root node
|
||||
(current_taxon->rank != rankidx))
|
||||
{
|
||||
current_taxon = next_taxon;
|
||||
next_taxon = current_taxon->parent;
|
||||
}
|
||||
|
||||
if (current_taxon->rank == rankidx)
|
||||
return current_taxon;
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
ecotx_t* obi_taxo_get_taxon_with_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid)
|
||||
{
|
||||
ecotx_t *current_taxon;
|
||||
int32_t count;
|
||||
|
||||
count = taxonomy->taxa->count;
|
||||
|
||||
current_taxon = (ecotx_t*) bsearch((const void *) ((size_t) taxid),
|
||||
(const void *) taxonomy->taxa->taxon,
|
||||
count,
|
||||
sizeof(ecotx_t),
|
||||
bcomptaxon);
|
||||
return current_taxon;
|
||||
}
|
||||
|
||||
|
||||
bool obi_taxo_is_taxon_under_taxid(ecotx_t* taxon, int32_t other_taxid)
|
||||
{
|
||||
ecotx_t* next_parent;
|
||||
|
||||
next_parent = taxon->parent;
|
||||
|
||||
while ((other_taxid != next_parent->taxid) && (strcmp(next_parent->name, "root")))
|
||||
next_parent = next_parent->parent;
|
||||
|
||||
if (other_taxid == next_parent->taxid)
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
ecotx_t* obi_taxo_get_species(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
|
||||
{
|
||||
static OBIDMS_taxonomy_p tax = NULL;
|
||||
static int32_t rankindex = -1;
|
||||
|
||||
if (taxonomy && (tax != taxonomy))
|
||||
{
|
||||
rankindex = rank_index("species", taxonomy->ranks);
|
||||
tax = taxonomy;
|
||||
}
|
||||
|
||||
if (!tax || (rankindex < 0))
|
||||
{
|
||||
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||
obidebug(1, "\nError trying to get the species associated with a taxon: No taxonomy defined");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return obi_taxo_get_parent_at_rank(taxon, rankindex);
|
||||
}
|
||||
|
||||
|
||||
ecotx_t* obi_taxo_get_genus(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
|
||||
{
|
||||
static OBIDMS_taxonomy_p tax = NULL;
|
||||
static int32_t rankindex = -1;
|
||||
|
||||
if (taxonomy && (tax != taxonomy))
|
||||
{
|
||||
rankindex = rank_index("genus", taxonomy->ranks);
|
||||
tax = taxonomy;
|
||||
}
|
||||
|
||||
if (!tax || (rankindex < 0))
|
||||
{
|
||||
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||
obidebug(1, "\nError trying to get the genus associated with a taxon: No taxonomy defined");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return obi_taxo_get_parent_at_rank(taxon, rankindex);
|
||||
}
|
||||
|
||||
|
||||
ecotx_t* obi_taxo_get_family(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
|
||||
{
|
||||
static OBIDMS_taxonomy_p tax = NULL;
|
||||
static int32_t rankindex = -1;
|
||||
|
||||
if (taxonomy && (tax != taxonomy))
|
||||
{
|
||||
rankindex = rank_index("family", taxonomy->ranks);
|
||||
tax = taxonomy;
|
||||
}
|
||||
|
||||
if (!tax || (rankindex < 0))
|
||||
{
|
||||
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||
obidebug(1, "\nError trying to get the family associated with a taxon: No taxonomy defined");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return obi_taxo_get_parent_at_rank(taxon, rankindex);
|
||||
}
|
||||
|
||||
|
||||
ecotx_t* obi_taxo_get_kingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
|
||||
{
|
||||
static OBIDMS_taxonomy_p tax = NULL;
|
||||
static int32_t rankindex = -1;
|
||||
|
||||
if (taxonomy && (tax != taxonomy))
|
||||
{
|
||||
rankindex = rank_index("kingdom", taxonomy->ranks);
|
||||
tax = taxonomy;
|
||||
}
|
||||
|
||||
if (!tax || (rankindex < 0))
|
||||
{
|
||||
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||
obidebug(1, "\nError trying to get the kingdom associated with a taxon: No taxonomy defined");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return obi_taxo_get_parent_at_rank(taxon, rankindex);
|
||||
}
|
||||
|
||||
|
||||
ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy)
|
||||
{
|
||||
static OBIDMS_taxonomy_p tax = NULL;
|
||||
static int32_t rankindex = -1;
|
||||
|
||||
if (taxonomy && (tax != taxonomy))
|
||||
{
|
||||
rankindex = rank_index("superkingdom", taxonomy->ranks);
|
||||
tax = taxonomy;
|
||||
}
|
||||
|
||||
if (!tax || (rankindex < 0))
|
||||
{
|
||||
obi_set_errno(OBI_TAXONOMY_ERROR);
|
||||
obidebug(1, "\nError trying to get the superkingdom associated with a taxon: No taxonomy defined");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return obi_taxo_get_parent_at_rank(taxon, rankindex);
|
||||
}
|
||||
|
101
src/obidms_taxonomy.h
Normal file
101
src/obidms_taxonomy.h
Normal file
@ -0,0 +1,101 @@
|
||||
/********************************************************************
|
||||
* OBIDMS taxonomy headeer file *
|
||||
********************************************************************/
|
||||
|
||||
/**
|
||||
* @file obidms_taxonomy.h
|
||||
* @author Celine Mercier (celine.mercier@metabarcoding.org)
|
||||
* @date March 2nd 2016
|
||||
* @brief Header file for the functions handling the reading of binary taxonomy files.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "obidms.h"
|
||||
|
||||
|
||||
typedef struct {
|
||||
int32_t taxid;
|
||||
int32_t rank;
|
||||
int32_t parent;
|
||||
int32_t name_length;
|
||||
char name[1];
|
||||
} ecotxformat_t;
|
||||
|
||||
|
||||
typedef struct ecotxnode {
|
||||
int32_t taxid;
|
||||
int32_t rank;
|
||||
int32_t farest;
|
||||
struct ecotxnode* parent;
|
||||
char* name;
|
||||
} ecotx_t;
|
||||
|
||||
|
||||
typedef struct {
|
||||
int32_t count;
|
||||
int32_t max_taxid;
|
||||
int32_t buffer_size;
|
||||
ecotx_t taxon[1];
|
||||
} ecotxidx_t;
|
||||
|
||||
|
||||
typedef struct {
|
||||
int32_t count;
|
||||
char* label[1];
|
||||
} ecorankidx_t;
|
||||
|
||||
|
||||
typedef struct {
|
||||
int32_t is_scientific_name;
|
||||
int32_t name_length;
|
||||
int32_t class_length;
|
||||
int32_t taxid;
|
||||
char names[1];
|
||||
} econameformat_t;
|
||||
|
||||
|
||||
typedef struct {
|
||||
char* name;
|
||||
char* class_name;
|
||||
int32_t is_scientific_name;
|
||||
struct ecotxnode* taxon;
|
||||
} econame_t;
|
||||
|
||||
|
||||
typedef struct {
|
||||
int32_t count;
|
||||
econame_t names[1];
|
||||
} econameidx_t;
|
||||
|
||||
|
||||
typedef struct OBIDMS_taxonomy_t {
|
||||
ecorankidx_t* ranks;
|
||||
econameidx_t* names;
|
||||
ecotxidx_t* taxa;
|
||||
} OBIDMS_taxonomy_t, *OBIDMS_taxonomy_p;
|
||||
|
||||
|
||||
OBIDMS_taxonomy_p obi_read_taxonomy(OBIDMS_p dms, const char* taxonomy_name, bool read_alternative_names);
|
||||
|
||||
int obi_close_taxonomy(OBIDMS_taxonomy_p taxonomy);
|
||||
|
||||
ecotx_t* obi_taxo_get_parent_at_rank(ecotx_t* taxon, int32_t rankidx);
|
||||
|
||||
ecotx_t* obi_taxo_get_taxon_with_taxid(OBIDMS_taxonomy_p taxonomy, int32_t taxid);
|
||||
|
||||
bool obi_taxo_is_taxon_under_taxid(ecotx_t* taxon, int32_t other_taxid);
|
||||
|
||||
ecotx_t* obi_taxo_get_species(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
|
||||
|
||||
ecotx_t* obi_taxo_get_genus(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
|
||||
|
||||
ecotx_t* obi_taxo_get_family(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
|
||||
|
||||
ecotx_t* obi_taxo_get_kingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
|
||||
|
||||
ecotx_t* obi_taxo_get_superkingdom(ecotx_t* taxon, OBIDMS_taxonomy_p taxonomy);
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user