diff --git a/.cproject b/.cproject
new file mode 100644
index 0000000..9f894fd
--- /dev/null
+++ b/.cproject
@@ -0,0 +1,151 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/.project b/.project
new file mode 100644
index 0000000..ffa37e5
--- /dev/null
+++ b/.project
@@ -0,0 +1,77 @@
+
+
+ ecoPrimers
+
+
+
+
+
+ org.eclipse.cdt.managedbuilder.core.genmakebuilder
+ clean,full,incremental,
+
+
+ org.eclipse.cdt.make.core.fullBuildTarget
+ all
+
+
+ ?name?
+
+
+
+ org.eclipse.cdt.make.core.enableAutoBuild
+ false
+
+
+ org.eclipse.cdt.make.core.enableFullBuild
+ true
+
+
+ org.eclipse.cdt.make.core.enableCleanBuild
+ true
+
+
+ org.eclipse.cdt.make.core.cleanBuildTarget
+ clean
+
+
+ org.eclipse.cdt.make.core.append_environment
+ true
+
+
+ org.eclipse.cdt.make.core.contents
+ org.eclipse.cdt.make.core.activeConfigSettings
+
+
+ org.eclipse.cdt.make.core.useDefaultBuildCmd
+ true
+
+
+ org.eclipse.cdt.make.core.buildArguments
+
+
+
+ org.eclipse.cdt.make.core.buildCommand
+ make
+
+
+ org.eclipse.cdt.make.core.autoBuildTarget
+ all
+
+
+ org.eclipse.cdt.make.core.stopOnError
+ true
+
+
+
+
+ org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder
+
+
+
+
+
+ org.eclipse.cdt.core.cnature
+ org.eclipse.cdt.managedbuilder.core.ScannerConfigNature
+ org.eclipse.cdt.managedbuilder.core.managedBuildNature
+
+
diff --git a/Documents/CalculTM.xls b/Documents/CalculTM.xls
new file mode 100644
index 0000000..094eb31
Binary files /dev/null and b/Documents/CalculTM.xls differ
diff --git a/Licence_CeCILL_V2-en.txt b/Licence_CeCILL_V2-en.txt
new file mode 100644
index 0000000..fcc8df2
--- /dev/null
+++ b/Licence_CeCILL_V2-en.txt
@@ -0,0 +1,506 @@
+
+CeCILL FREE SOFTWARE LICENSE AGREEMENT
+
+
+ Notice
+
+This Agreement is a Free Software license agreement that is the result
+of discussions between its authors in order to ensure compliance with
+the two main principles guiding its drafting:
+
+ * firstly, compliance with the principles governing the distribution
+ of Free Software: access to source code, broad rights granted to
+ users,
+ * secondly, the election of a governing law, French law, with which
+ it is conformant, both as regards the law of torts and
+ intellectual property law, and the protection that it offers to
+ both authors and holders of the economic rights over software.
+
+The authors of the CeCILL (for Ce[a] C[nrs] I[nria] L[ogiciel] L[ibre])
+license are:
+
+Commissariat à l'Energie Atomique - CEA, a public scientific, technical
+and industrial research establishment, having its principal place of
+business at 25 rue Leblanc, immeuble Le Ponant D, 75015 Paris, France.
+
+Centre National de la Recherche Scientifique - CNRS, a public scientific
+and technological establishment, having its principal place of business
+at 3 rue Michel-Ange, 75794 Paris cedex 16, France.
+
+Institut National de Recherche en Informatique et en Automatique -
+INRIA, a public scientific and technological establishment, having its
+principal place of business at Domaine de Voluceau, Rocquencourt, BP
+105, 78153 Le Chesnay cedex, France.
+
+
+ Preamble
+
+The purpose of this Free Software license agreement is to grant users
+the right to modify and redistribute the software governed by this
+license within the framework of an open source distribution model.
+
+The exercising of these rights is conditional upon certain obligations
+for users so as to preserve this status for all subsequent redistributions.
+
+In consideration of access to the source code and the rights to copy,
+modify and redistribute granted by the license, users are provided only
+with a limited warranty and the software's author, the holder of the
+economic rights, and the successive licensors only have limited liability.
+
+In this respect, the risks associated with loading, using, modifying
+and/or developing or reproducing the software by the user are brought to
+the user's attention, given its Free Software status, which may make it
+complicated to use, with the result that its use is reserved for
+developers and experienced professionals having in-depth computer
+knowledge. Users are therefore encouraged to load and test the
+suitability of the software as regards their requirements in conditions
+enabling the security of their systems and/or data to be ensured and,
+more generally, to use and operate it in the same conditions of
+security. This Agreement may be freely reproduced and published,
+provided it is not altered, and that no provisions are either added or
+removed herefrom.
+
+This Agreement may apply to any or all software for which the holder of
+the economic rights decides to submit the use thereof to its provisions.
+
+
+ Article 1 - DEFINITIONS
+
+For the purpose of this Agreement, when the following expressions
+commence with a capital letter, they shall have the following meaning:
+
+Agreement: means this license agreement, and its possible subsequent
+versions and annexes.
+
+Software: means the software in its Object Code and/or Source Code form
+and, where applicable, its documentation, "as is" when the Licensee
+accepts the Agreement.
+
+Initial Software: means the Software in its Source Code and possibly its
+Object Code form and, where applicable, its documentation, "as is" when
+it is first distributed under the terms and conditions of the Agreement.
+
+Modified Software: means the Software modified by at least one
+Contribution.
+
+Source Code: means all the Software's instructions and program lines to
+which access is required so as to modify the Software.
+
+Object Code: means the binary files originating from the compilation of
+the Source Code.
+
+Holder: means the holder(s) of the economic rights over the Initial
+Software.
+
+Licensee: means the Software user(s) having accepted the Agreement.
+
+Contributor: means a Licensee having made at least one Contribution.
+
+Licensor: means the Holder, or any other individual or legal entity, who
+distributes the Software under the Agreement.
+
+Contribution: means any or all modifications, corrections, translations,
+adaptations and/or new functions integrated into the Software by any or
+all Contributors, as well as any or all Internal Modules.
+
+Module: means a set of sources files including their documentation that
+enables supplementary functions or services in addition to those offered
+by the Software.
+
+External Module: means any or all Modules, not derived from the
+Software, so that this Module and the Software run in separate address
+spaces, with one calling the other when they are run.
+
+Internal Module: means any or all Module, connected to the Software so
+that they both execute in the same address space.
+
+GNU GPL: means the GNU General Public License version 2 or any
+subsequent version, as published by the Free Software Foundation Inc.
+
+Parties: mean both the Licensee and the Licensor.
+
+These expressions may be used both in singular and plural form.
+
+
+ Article 2 - PURPOSE
+
+The purpose of the Agreement is the grant by the Licensor to the
+Licensee of a non-exclusive, transferable and worldwide license for the
+Software as set forth in Article 5 hereinafter for the whole term of the
+protection granted by the rights over said Software.
+
+
+ Article 3 - ACCEPTANCE
+
+3.1 The Licensee shall be deemed as having accepted the terms and
+conditions of this Agreement upon the occurrence of the first of the
+following events:
+
+ * (i) loading the Software by any or all means, notably, by
+ downloading from a remote server, or by loading from a physical
+ medium;
+ * (ii) the first time the Licensee exercises any of the rights
+ granted hereunder.
+
+3.2 One copy of the Agreement, containing a notice relating to the
+characteristics of the Software, to the limited warranty, and to the
+fact that its use is restricted to experienced users has been provided
+to the Licensee prior to its acceptance as set forth in Article 3.1
+hereinabove, and the Licensee hereby acknowledges that it has read and
+understood it.
+
+
+ Article 4 - EFFECTIVE DATE AND TERM
+
+
+ 4.1 EFFECTIVE DATE
+
+The Agreement shall become effective on the date when it is accepted by
+the Licensee as set forth in Article 3.1.
+
+
+ 4.2 TERM
+
+The Agreement shall remain in force for the entire legal term of
+protection of the economic rights over the Software.
+
+
+ Article 5 - SCOPE OF RIGHTS GRANTED
+
+The Licensor hereby grants to the Licensee, who accepts, the following
+rights over the Software for any or all use, and for the term of the
+Agreement, on the basis of the terms and conditions set forth hereinafter.
+
+Besides, if the Licensor owns or comes to own one or more patents
+protecting all or part of the functions of the Software or of its
+components, the Licensor undertakes not to enforce the rights granted by
+these patents against successive Licensees using, exploiting or
+modifying the Software. If these patents are transferred, the Licensor
+undertakes to have the transferees subscribe to the obligations set
+forth in this paragraph.
+
+
+ 5.1 RIGHT OF USE
+
+The Licensee is authorized to use the Software, without any limitation
+as to its fields of application, with it being hereinafter specified
+that this comprises:
+
+ 1. permanent or temporary reproduction of all or part of the Software
+ by any or all means and in any or all form.
+
+ 2. loading, displaying, running, or storing the Software on any or
+ all medium.
+
+ 3. entitlement to observe, study or test its operation so as to
+ determine the ideas and principles behind any or all constituent
+ elements of said Software. This shall apply when the Licensee
+ carries out any or all loading, displaying, running, transmission
+ or storage operation as regards the Software, that it is entitled
+ to carry out hereunder.
+
+
+ 5.2 ENTITLEMENT TO MAKE CONTRIBUTIONS
+
+The right to make Contributions includes the right to translate, adapt,
+arrange, or make any or all modifications to the Software, and the right
+to reproduce the resulting software.
+
+The Licensee is authorized to make any or all Contributions to the
+Software provided that it includes an explicit notice that it is the
+author of said Contribution and indicates the date of the creation thereof.
+
+
+ 5.3 RIGHT OF DISTRIBUTION
+
+In particular, the right of distribution includes the right to publish,
+transmit and communicate the Software to the general public on any or
+all medium, and by any or all means, and the right to market, either in
+consideration of a fee, or free of charge, one or more copies of the
+Software by any means.
+
+The Licensee is further authorized to distribute copies of the modified
+or unmodified Software to third parties according to the terms and
+conditions set forth hereinafter.
+
+
+ 5.3.1 DISTRIBUTION OF SOFTWARE WITHOUT MODIFICATION
+
+The Licensee is authorized to distribute true copies of the Software in
+Source Code or Object Code form, provided that said distribution
+complies with all the provisions of the Agreement and is accompanied by:
+
+ 1. a copy of the Agreement,
+
+ 2. a notice relating to the limitation of both the Licensor's
+ warranty and liability as set forth in Articles 8 and 9,
+
+and that, in the event that only the Object Code of the Software is
+redistributed, the Licensee allows future Licensees unhindered access to
+the full Source Code of the Software by indicating how to access it, it
+being understood that the additional cost of acquiring the Source Code
+shall not exceed the cost of transferring the data.
+
+
+ 5.3.2 DISTRIBUTION OF MODIFIED SOFTWARE
+
+When the Licensee makes a Contribution to the Software, the terms and
+conditions for the distribution of the resulting Modified Software
+become subject to all the provisions of this Agreement.
+
+The Licensee is authorized to distribute the Modified Software, in
+source code or object code form, provided that said distribution
+complies with all the provisions of the Agreement and is accompanied by:
+
+ 1. a copy of the Agreement,
+
+ 2. a notice relating to the limitation of both the Licensor's
+ warranty and liability as set forth in Articles 8 and 9,
+
+and that, in the event that only the object code of the Modified
+Software is redistributed, the Licensee allows future Licensees
+unhindered access to the full source code of the Modified Software by
+indicating how to access it, it being understood that the additional
+cost of acquiring the source code shall not exceed the cost of
+transferring the data.
+
+
+ 5.3.3 DISTRIBUTION OF EXTERNAL MODULES
+
+When the Licensee has developed an External Module, the terms and
+conditions of this Agreement do not apply to said External Module, that
+may be distributed under a separate license agreement.
+
+
+ 5.3.4 COMPATIBILITY WITH THE GNU GPL
+
+The Licensee can include a code that is subject to the provisions of one
+of the versions of the GNU GPL in the Modified or unmodified Software,
+and distribute that entire code under the terms of the same version of
+the GNU GPL.
+
+The Licensee can include the Modified or unmodified Software in a code
+that is subject to the provisions of one of the versions of the GNU GPL,
+and distribute that entire code under the terms of the same version of
+the GNU GPL.
+
+
+ Article 6 - INTELLECTUAL PROPERTY
+
+
+ 6.1 OVER THE INITIAL SOFTWARE
+
+The Holder owns the economic rights over the Initial Software. Any or
+all use of the Initial Software is subject to compliance with the terms
+and conditions under which the Holder has elected to distribute its work
+and no one shall be entitled to modify the terms and conditions for the
+distribution of said Initial Software.
+
+The Holder undertakes that the Initial Software will remain ruled at
+least by this Agreement, for the duration set forth in Article 4.2.
+
+
+ 6.2 OVER THE CONTRIBUTIONS
+
+The Licensee who develops a Contribution is the owner of the
+intellectual property rights over this Contribution as defined by
+applicable law.
+
+
+ 6.3 OVER THE EXTERNAL MODULES
+
+The Licensee who develops an External Module is the owner of the
+intellectual property rights over this External Module as defined by
+applicable law and is free to choose the type of agreement that shall
+govern its distribution.
+
+
+ 6.4 JOINT PROVISIONS
+
+The Licensee expressly undertakes:
+
+ 1. not to remove, or modify, in any manner, the intellectual property
+ notices attached to the Software;
+
+ 2. to reproduce said notices, in an identical manner, in the copies
+ of the Software modified or not.
+
+The Licensee undertakes not to directly or indirectly infringe the
+intellectual property rights of the Holder and/or Contributors on the
+Software and to take, where applicable, vis-à-vis its staff, any and all
+measures required to ensure respect of said intellectual property rights
+of the Holder and/or Contributors.
+
+
+ Article 7 - RELATED SERVICES
+
+7.1 Under no circumstances shall the Agreement oblige the Licensor to
+provide technical assistance or maintenance services for the Software.
+
+However, the Licensor is entitled to offer this type of services. The
+terms and conditions of such technical assistance, and/or such
+maintenance, shall be set forth in a separate instrument. Only the
+Licensor offering said maintenance and/or technical assistance services
+shall incur liability therefor.
+
+7.2 Similarly, any Licensor is entitled to offer to its licensees, under
+its sole responsibility, a warranty, that shall only be binding upon
+itself, for the redistribution of the Software and/or the Modified
+Software, under terms and conditions that it is free to decide. Said
+warranty, and the financial terms and conditions of its application,
+shall be subject of a separate instrument executed between the Licensor
+and the Licensee.
+
+
+ Article 8 - LIABILITY
+
+8.1 Subject to the provisions of Article 8.2, the Licensee shall be
+entitled to claim compensation for any direct loss it may have suffered
+from the Software as a result of a fault on the part of the relevant
+Licensor, subject to providing evidence thereof.
+
+8.2 The Licensor's liability is limited to the commitments made under
+this Agreement and shall not be incurred as a result of in particular:
+(i) loss due the Licensee's total or partial failure to fulfill its
+obligations, (ii) direct or consequential loss that is suffered by the
+Licensee due to the use or performance of the Software, and (iii) more
+generally, any consequential loss. In particular the Parties expressly
+agree that any or all pecuniary or business loss (i.e. loss of data,
+loss of profits, operating loss, loss of customers or orders,
+opportunity cost, any disturbance to business activities) or any or all
+legal proceedings instituted against the Licensee by a third party,
+shall constitute consequential loss and shall not provide entitlement to
+any or all compensation from the Licensor.
+
+
+ Article 9 - WARRANTY
+
+9.1 The Licensee acknowledges that the scientific and technical
+state-of-the-art when the Software was distributed did not enable all
+possible uses to be tested and verified, nor for the presence of
+possible defects to be detected. In this respect, the Licensee's
+attention has been drawn to the risks associated with loading, using,
+modifying and/or developing and reproducing the Software which are
+reserved for experienced users.
+
+The Licensee shall be responsible for verifying, by any or all means,
+the suitability of the product for its requirements, its good working
+order, and for ensuring that it shall not cause damage to either persons
+or properties.
+
+9.2 The Licensor hereby represents, in good faith, that it is entitled
+to grant all the rights over the Software (including in particular the
+rights set forth in Article 5).
+
+9.3 The Licensee acknowledges that the Software is supplied "as is" by
+the Licensor without any other express or tacit warranty, other than
+that provided for in Article 9.2 and, in particular, without any warranty
+as to its commercial value, its secured, safe, innovative or relevant
+nature.
+
+Specifically, the Licensor does not warrant that the Software is free
+from any error, that it will operate without interruption, that it will
+be compatible with the Licensee's own equipment and software
+configuration, nor that it will meet the Licensee's requirements.
+
+9.4 The Licensor does not either expressly or tacitly warrant that the
+Software does not infringe any third party intellectual property right
+relating to a patent, software or any other property right. Therefore,
+the Licensor disclaims any and all liability towards the Licensee
+arising out of any or all proceedings for infringement that may be
+instituted in respect of the use, modification and redistribution of the
+Software. Nevertheless, should such proceedings be instituted against
+the Licensee, the Licensor shall provide it with technical and legal
+assistance for its defense. Such technical and legal assistance shall be
+decided on a case-by-case basis between the relevant Licensor and the
+Licensee pursuant to a memorandum of understanding. The Licensor
+disclaims any and all liability as regards the Licensee's use of the
+name of the Software. No warranty is given as regards the existence of
+prior rights over the name of the Software or as regards the existence
+of a trademark.
+
+
+ Article 10 - TERMINATION
+
+10.1 In the event of a breach by the Licensee of its obligations
+hereunder, the Licensor may automatically terminate this Agreement
+thirty (30) days after notice has been sent to the Licensee and has
+remained ineffective.
+
+10.2 A Licensee whose Agreement is terminated shall no longer be
+authorized to use, modify or distribute the Software. However, any
+licenses that it may have granted prior to termination of the Agreement
+shall remain valid subject to their having been granted in compliance
+with the terms and conditions hereof.
+
+
+ Article 11 - MISCELLANEOUS
+
+
+ 11.1 EXCUSABLE EVENTS
+
+Neither Party shall be liable for any or all delay, or failure to
+perform the Agreement, that may be attributable to an event of force
+majeure, an act of God or an outside cause, such as defective
+functioning or interruptions of the electricity or telecommunications
+networks, network paralysis following a virus attack, intervention by
+government authorities, natural disasters, water damage, earthquakes,
+fire, explosions, strikes and labor unrest, war, etc.
+
+11.2 Any failure by either Party, on one or more occasions, to invoke
+one or more of the provisions hereof, shall under no circumstances be
+interpreted as being a waiver by the interested Party of its right to
+invoke said provision(s) subsequently.
+
+11.3 The Agreement cancels and replaces any or all previous agreements,
+whether written or oral, between the Parties and having the same
+purpose, and constitutes the entirety of the agreement between said
+Parties concerning said purpose. No supplement or modification to the
+terms and conditions hereof shall be effective as between the Parties
+unless it is made in writing and signed by their duly authorized
+representatives.
+
+11.4 In the event that one or more of the provisions hereof were to
+conflict with a current or future applicable act or legislative text,
+said act or legislative text shall prevail, and the Parties shall make
+the necessary amendments so as to comply with said act or legislative
+text. All other provisions shall remain effective. Similarly, invalidity
+of a provision of the Agreement, for any reason whatsoever, shall not
+cause the Agreement as a whole to be invalid.
+
+
+ 11.5 LANGUAGE
+
+The Agreement is drafted in both French and English and both versions
+are deemed authentic.
+
+
+ Article 12 - NEW VERSIONS OF THE AGREEMENT
+
+12.1 Any person is authorized to duplicate and distribute copies of this
+Agreement.
+
+12.2 So as to ensure coherence, the wording of this Agreement is
+protected and may only be modified by the authors of the License, who
+reserve the right to periodically publish updates or new versions of the
+Agreement, each with a separate number. These subsequent versions may
+address new issues encountered by Free Software.
+
+12.3 Any Software distributed under a given version of the Agreement may
+only be subsequently distributed under the same version of the Agreement
+or a subsequent version, subject to the provisions of Article 5.3.4.
+
+
+ Article 13 - GOVERNING LAW AND JURISDICTION
+
+13.1 The Agreement is governed by French law. The Parties agree to
+endeavor to seek an amicable solution to any disagreements or disputes
+that may arise during the performance of the Agreement.
+
+13.2 Failing an amicable solution within two (2) months as from their
+occurrence, and unless emergency proceedings are necessary, the
+disagreements or disputes shall be referred to the Paris Courts having
+jurisdiction, by the more diligent Party.
+
+
+Version 2.0 dated 2006-09-05.
diff --git a/Licence_CeCILL_V2-fr.txt b/Licence_CeCILL_V2-fr.txt
new file mode 100644
index 0000000..1613fca
--- /dev/null
+++ b/Licence_CeCILL_V2-fr.txt
@@ -0,0 +1,512 @@
+
+CONTRAT DE LICENCE DE LOGICIEL LIBRE CeCILL
+
+
+ Avertissement
+
+Ce contrat est une licence de logiciel libre issue d'une concertation
+entre ses auteurs afin que le respect de deux grands principes préside à
+sa rédaction:
+
+ * d'une part, le respect des principes de diffusion des logiciels
+ libres: accès au code source, droits étendus conférés aux
+ utilisateurs,
+ * d'autre part, la désignation d'un droit applicable, le droit
+ français, auquel elle est conforme, tant au regard du droit de la
+ responsabilité civile que du droit de la propriété intellectuelle
+ et de la protection qu'il offre aux auteurs et titulaires des
+ droits patrimoniaux sur un logiciel.
+
+Les auteurs de la licence CeCILL (pour Ce[a] C[nrs] I[nria] L[ogiciel]
+L[ibre]) sont:
+
+Commissariat à l'Energie Atomique - CEA, établissement public de
+recherche à caractère scientifique, technique et industriel, dont le
+siège est situé 25 rue Leblanc, immeuble Le Ponant D, 75015 Paris.
+
+Centre National de la Recherche Scientifique - CNRS, établissement
+public à caractère scientifique et technologique, dont le siège est
+situé 3 rue Michel-Ange, 75794 Paris cedex 16.
+
+Institut National de Recherche en Informatique et en Automatique -
+INRIA, établissement public à caractère scientifique et technologique,
+dont le siège est situé Domaine de Voluceau, Rocquencourt, BP 105, 78153
+Le Chesnay cedex.
+
+
+ Préambule
+
+Ce contrat est une licence de logiciel libre dont l'objectif est de
+conférer aux utilisateurs la liberté de modification et de
+redistribution du logiciel régi par cette licence dans le cadre d'un
+modèle de diffusion en logiciel libre.
+
+L'exercice de ces libertés est assorti de certains devoirs à la charge
+des utilisateurs afin de préserver ce statut au cours des
+redistributions ultérieures.
+
+L'accessibilité au code source et les droits de copie, de modification
+et de redistribution qui en découlent ont pour contrepartie de n'offrir
+aux utilisateurs qu'une garantie limitée et de ne faire peser sur
+l'auteur du logiciel, le titulaire des droits patrimoniaux et les
+concédants successifs qu'une responsabilité restreinte.
+
+A cet égard l'attention de l'utilisateur est attirée sur les risques
+associés au chargement, à l'utilisation, à la modification et/ou au
+développement et à la reproduction du logiciel par l'utilisateur étant
+donné sa spécificité de logiciel libre, qui peut le rendre complexe à
+manipuler et qui le réserve donc à des développeurs ou des
+professionnels avertis possédant des connaissances informatiques
+approfondies. Les utilisateurs sont donc invités à charger et tester
+l'adéquation du logiciel à leurs besoins dans des conditions permettant
+d'assurer la sécurité de leurs systèmes et/ou de leurs données et, plus
+généralement, à l'utiliser et l'exploiter dans les mêmes conditions de
+sécurité. Ce contrat peut être reproduit et diffusé librement, sous
+réserve de le conserver en l'état, sans ajout ni suppression de clauses.
+
+Ce contrat est susceptible de s'appliquer à tout logiciel dont le
+titulaire des droits patrimoniaux décide de soumettre l'exploitation aux
+dispositions qu'il contient.
+
+
+ Article 1 - DEFINITIONS
+
+Dans ce contrat, les termes suivants, lorsqu'ils seront écrits avec une
+lettre capitale, auront la signification suivante:
+
+Contrat: désigne le présent contrat de licence, ses éventuelles versions
+postérieures et annexes.
+
+Logiciel: désigne le logiciel sous sa forme de Code Objet et/ou de Code
+Source et le cas échéant sa documentation, dans leur état au moment de
+l'acceptation du Contrat par le Licencié.
+
+Logiciel Initial: désigne le Logiciel sous sa forme de Code Source et
+éventuellement de Code Objet et le cas échéant sa documentation, dans
+leur état au moment de leur première diffusion sous les termes du Contrat.
+
+Logiciel Modifié: désigne le Logiciel modifié par au moins une
+Contribution.
+
+Code Source: désigne l'ensemble des instructions et des lignes de
+programme du Logiciel et auquel l'accès est nécessaire en vue de
+modifier le Logiciel.
+
+Code Objet: désigne les fichiers binaires issus de la compilation du
+Code Source.
+
+Titulaire: désigne le ou les détenteurs des droits patrimoniaux d'auteur
+sur le Logiciel Initial.
+
+Licencié: désigne le ou les utilisateurs du Logiciel ayant accepté le
+Contrat.
+
+Contributeur: désigne le Licencié auteur d'au moins une Contribution.
+
+Concédant: désigne le Titulaire ou toute personne physique ou morale
+distribuant le Logiciel sous le Contrat.
+
+Contribution: désigne l'ensemble des modifications, corrections,
+traductions, adaptations et/ou nouvelles fonctionnalités intégrées dans
+le Logiciel par tout Contributeur, ainsi que tout Module Interne.
+
+Module: désigne un ensemble de fichiers sources y compris leur
+documentation qui permet de réaliser des fonctionnalités ou services
+supplémentaires à ceux fournis par le Logiciel.
+
+Module Externe: désigne tout Module, non dérivé du Logiciel, tel que ce
+Module et le Logiciel s'exécutent dans des espaces d'adressage
+différents, l'un appelant l'autre au moment de leur exécution.
+
+Module Interne: désigne tout Module lié au Logiciel de telle sorte
+qu'ils s'exécutent dans le même espace d'adressage.
+
+GNU GPL: désigne la GNU General Public License dans sa version 2 ou
+toute version ultérieure, telle que publiée par Free Software Foundation
+Inc.
+
+Parties: désigne collectivement le Licencié et le Concédant.
+
+Ces termes s'entendent au singulier comme au pluriel.
+
+
+ Article 2 - OBJET
+
+Le Contrat a pour objet la concession par le Concédant au Licencié d'une
+licence non exclusive, cessible et mondiale du Logiciel telle que
+définie ci-après à l'article 5 pour toute la durée de protection des droits
+portant sur ce Logiciel.
+
+
+ Article 3 - ACCEPTATION
+
+3.1 L'acceptation par le Licencié des termes du Contrat est réputée
+acquise du fait du premier des faits suivants:
+
+ * (i) le chargement du Logiciel par tout moyen notamment par
+ téléchargement à partir d'un serveur distant ou par chargement à
+ partir d'un support physique;
+ * (ii) le premier exercice par le Licencié de l'un quelconque des
+ droits concédés par le Contrat.
+
+3.2 Un exemplaire du Contrat, contenant notamment un avertissement
+relatif aux spécificités du Logiciel, à la restriction de garantie et à
+la limitation à un usage par des utilisateurs expérimentés a été mis à
+disposition du Licencié préalablement à son acceptation telle que
+définie à l'article 3.1 ci dessus et le Licencié reconnaît en avoir pris
+connaissance.
+
+
+ Article 4 - ENTREE EN VIGUEUR ET DUREE
+
+
+ 4.1 ENTREE EN VIGUEUR
+
+Le Contrat entre en vigueur à la date de son acceptation par le Licencié
+telle que définie en 3.1.
+
+
+ 4.2 DUREE
+
+Le Contrat produira ses effets pendant toute la durée légale de
+protection des droits patrimoniaux portant sur le Logiciel.
+
+
+ Article 5 - ETENDUE DES DROITS CONCEDES
+
+Le Concédant concède au Licencié, qui accepte, les droits suivants sur
+le Logiciel pour toutes destinations et pour la durée du Contrat dans
+les conditions ci-après détaillées.
+
+Par ailleurs, si le Concédant détient ou venait à détenir un ou
+plusieurs brevets d'invention protégeant tout ou partie des
+fonctionnalités du Logiciel ou de ses composants, il s'engage à ne pas
+opposer les éventuels droits conférés par ces brevets aux Licenciés
+successifs qui utiliseraient, exploiteraient ou modifieraient le
+Logiciel. En cas de cession de ces brevets, le Concédant s'engage à
+faire reprendre les obligations du présent alinéa aux cessionnaires.
+
+
+ 5.1 DROIT D'UTILISATION
+
+Le Licencié est autorisé à utiliser le Logiciel, sans restriction quant
+aux domaines d'application, étant ci-après précisé que cela comporte:
+
+ 1. la reproduction permanente ou provisoire du Logiciel en tout ou
+ partie par tout moyen et sous toute forme.
+
+ 2. le chargement, l'affichage, l'exécution, ou le stockage du
+ Logiciel sur tout support.
+
+ 3. la possibilité d'en observer, d'en étudier, ou d'en tester le
+ fonctionnement afin de déterminer les idées et principes qui sont
+ à la base de n'importe quel élément de ce Logiciel; et ceci,
+ lorsque le Licencié effectue toute opération de chargement,
+ d'affichage, d'exécution, de transmission ou de stockage du
+ Logiciel qu'il est en droit d'effectuer en vertu du Contrat.
+
+
+ 5.2 DROIT D'APPORTER DES CONTRIBUTIONS
+
+Le droit d'apporter des Contributions comporte le droit de traduire,
+d'adapter, d'arranger ou d'apporter toute autre modification au Logiciel
+et le droit de reproduire le logiciel en résultant.
+
+Le Licencié est autorisé à apporter toute Contribution au Logiciel sous
+réserve de mentionner, de façon explicite, son nom en tant qu'auteur de
+cette Contribution et la date de création de celle-ci.
+
+
+ 5.3 DROIT DE DISTRIBUTION
+
+Le droit de distribution comporte notamment le droit de diffuser, de
+transmettre et de communiquer le Logiciel au public sur tout support et
+par tout moyen ainsi que le droit de mettre sur le marché à titre
+onéreux ou gratuit, un ou des exemplaires du Logiciel par tout procédé.
+
+Le Licencié est autorisé à distribuer des copies du Logiciel, modifié ou
+non, à des tiers dans les conditions ci-après détaillées.
+
+
+ 5.3.1 DISTRIBUTION DU LOGICIEL SANS MODIFICATION
+
+Le Licencié est autorisé à distribuer des copies conformes du Logiciel,
+sous forme de Code Source ou de Code Objet, à condition que cette
+distribution respecte les dispositions du Contrat dans leur totalité et
+soit accompagnée:
+
+ 1. d'un exemplaire du Contrat,
+
+ 2. d'un avertissement relatif à la restriction de garantie et de
+ responsabilité du Concédant telle que prévue aux articles 8
+ et 9,
+
+et que, dans le cas où seul le Code Objet du Logiciel est redistribué,
+le Licencié permette aux futurs Licenciés d'accéder facilement au Code
+Source complet du Logiciel en indiquant les modalités d'accès, étant
+entendu que le coût additionnel d'acquisition du Code Source ne devra
+pas excéder le simple coût de transfert des données.
+
+
+ 5.3.2 DISTRIBUTION DU LOGICIEL MODIFIE
+
+Lorsque le Licencié apporte une Contribution au Logiciel, les conditions
+de distribution du Logiciel Modifié en résultant sont alors soumises à
+l'intégralité des dispositions du Contrat.
+
+Le Licencié est autorisé à distribuer le Logiciel Modifié, sous forme de
+code source ou de code objet, à condition que cette distribution
+respecte les dispositions du Contrat dans leur totalité et soit
+accompagnée:
+
+ 1. d'un exemplaire du Contrat,
+
+ 2. d'un avertissement relatif à la restriction de garantie et de
+ responsabilité du Concédant telle que prévue aux articles 8
+ et 9,
+
+et que, dans le cas où seul le code objet du Logiciel Modifié est
+redistribué, le Licencié permette aux futurs Licenciés d'accéder
+facilement au code source complet du Logiciel Modifié en indiquant les
+modalités d'accès, étant entendu que le coût additionnel d'acquisition
+du code source ne devra pas excéder le simple coût de transfert des données.
+
+
+ 5.3.3 DISTRIBUTION DES MODULES EXTERNES
+
+Lorsque le Licencié a développé un Module Externe les conditions du
+Contrat ne s'appliquent pas à ce Module Externe, qui peut être distribué
+sous un contrat de licence différent.
+
+
+ 5.3.4 COMPATIBILITE AVEC LA LICENCE GNU GPL
+
+Le Licencié peut inclure un code soumis aux dispositions d'une des
+versions de la licence GNU GPL dans le Logiciel modifié ou non et
+distribuer l'ensemble sous les conditions de la même version de la
+licence GNU GPL.
+
+Le Licencié peut inclure le Logiciel modifié ou non dans un code soumis
+aux dispositions d'une des versions de la licence GNU GPL et distribuer
+l'ensemble sous les conditions de la même version de la licence GNU GPL.
+
+
+ Article 6 - PROPRIETE INTELLECTUELLE
+
+
+ 6.1 SUR LE LOGICIEL INITIAL
+
+Le Titulaire est détenteur des droits patrimoniaux sur le Logiciel
+Initial. Toute utilisation du Logiciel Initial est soumise au respect
+des conditions dans lesquelles le Titulaire a choisi de diffuser son
+oeuvre et nul autre n'a la faculté de modifier les conditions de
+diffusion de ce Logiciel Initial.
+
+Le Titulaire s'engage à ce que le Logiciel Initial reste au moins régi
+par le Contrat et ce, pour la durée visée à l'article 4.2.
+
+
+ 6.2 SUR LES CONTRIBUTIONS
+
+Le Licencié qui a développé une Contribution est titulaire sur celle-ci
+des droits de propriété intellectuelle dans les conditions définies par
+la législation applicable.
+
+
+ 6.3 SUR LES MODULES EXTERNES
+
+Le Licencié qui a développé un Module Externe est titulaire sur celui-ci
+des droits de propriété intellectuelle dans les conditions définies par
+la législation applicable et reste libre du choix du contrat régissant
+sa diffusion.
+
+
+ 6.4 DISPOSITIONS COMMUNES
+
+Le Licencié s'engage expressément:
+
+ 1. à ne pas supprimer ou modifier de quelque manière que ce soit les
+ mentions de propriété intellectuelle apposées sur le Logiciel;
+
+ 2. à reproduire à l'identique lesdites mentions de propriété
+ intellectuelle sur les copies du Logiciel modifié ou non.
+
+Le Licencié s'engage à ne pas porter atteinte, directement ou
+indirectement, aux droits de propriété intellectuelle du Titulaire et/ou
+des Contributeurs sur le Logiciel et à prendre, le cas échéant, à
+l'égard de son personnel toutes les mesures nécessaires pour assurer le
+respect des dits droits de propriété intellectuelle du Titulaire et/ou
+des Contributeurs.
+
+
+ Article 7 - SERVICES ASSOCIES
+
+7.1 Le Contrat n'oblige en aucun cas le Concédant à la réalisation de
+prestations d'assistance technique ou de maintenance du Logiciel.
+
+Cependant le Concédant reste libre de proposer ce type de services. Les
+termes et conditions d'une telle assistance technique et/ou d'une telle
+maintenance seront alors déterminés dans un acte séparé. Ces actes de
+maintenance et/ou assistance technique n'engageront que la seule
+responsabilité du Concédant qui les propose.
+
+7.2 De même, tout Concédant est libre de proposer, sous sa seule
+responsabilité, à ses licenciés une garantie, qui n'engagera que lui,
+lors de la redistribution du Logiciel et/ou du Logiciel Modifié et ce,
+dans les conditions qu'il souhaite. Cette garantie et les modalités
+financières de son application feront l'objet d'un acte séparé entre le
+Concédant et le Licencié.
+
+
+ Article 8 - RESPONSABILITE
+
+8.1 Sous réserve des dispositions de l'article 8.2, le Licencié a la
+faculté, sous réserve de prouver la faute du Concédant concerné, de
+solliciter la réparation du préjudice direct qu'il subirait du fait du
+Logiciel et dont il apportera la preuve.
+
+8.2 La responsabilité du Concédant est limitée aux engagements pris en
+application du Contrat et ne saurait être engagée en raison notamment:
+(i) des dommages dus à l'inexécution, totale ou partielle, de ses
+obligations par le Licencié, (ii) des dommages directs ou indirects
+découlant de l'utilisation ou des performances du Logiciel subis par le
+Licencié et (iii) plus généralement d'un quelconque dommage indirect. En
+particulier, les Parties conviennent expressément que tout préjudice
+financier ou commercial (par exemple perte de données, perte de
+bénéfices, perte d'exploitation, perte de clientèle ou de commandes,
+manque à gagner, trouble commercial quelconque) ou toute action dirigée
+contre le Licencié par un tiers, constitue un dommage indirect et
+n'ouvre pas droit à réparation par le Concédant.
+
+
+ Article 9 - GARANTIE
+
+9.1 Le Licencié reconnaît que l'état actuel des connaissances
+scientifiques et techniques au moment de la mise en circulation du
+Logiciel ne permet pas d'en tester et d'en vérifier toutes les
+utilisations ni de détecter l'existence d'éventuels défauts. L'attention
+du Licencié a été attirée sur ce point sur les risques associés au
+chargement, à l'utilisation, la modification et/ou au développement et à
+la reproduction du Logiciel qui sont réservés à des utilisateurs avertis.
+
+Il relève de la responsabilité du Licencié de contrôler, par tous
+moyens, l'adéquation du produit à ses besoins, son bon fonctionnement et
+de s'assurer qu'il ne causera pas de dommages aux personnes et aux biens.
+
+9.2 Le Concédant déclare de bonne foi être en droit de concéder
+l'ensemble des droits attachés au Logiciel (comprenant notamment les
+droits visés à l'article 5).
+
+9.3 Le Licencié reconnaît que le Logiciel est fourni "en l'état" par le
+Concédant sans autre garantie, expresse ou tacite, que celle prévue à
+l'article 9.2 et notamment sans aucune garantie sur sa valeur commerciale,
+son caractère sécurisé, innovant ou pertinent.
+
+En particulier, le Concédant ne garantit pas que le Logiciel est exempt
+d'erreur, qu'il fonctionnera sans interruption, qu'il sera compatible
+avec l'équipement du Licencié et sa configuration logicielle ni qu'il
+remplira les besoins du Licencié.
+
+9.4 Le Concédant ne garantit pas, de manière expresse ou tacite, que le
+Logiciel ne porte pas atteinte à un quelconque droit de propriété
+intellectuelle d'un tiers portant sur un brevet, un logiciel ou sur tout
+autre droit de propriété. Ainsi, le Concédant exclut toute garantie au
+profit du Licencié contre les actions en contrefaçon qui pourraient être
+diligentées au titre de l'utilisation, de la modification, et de la
+redistribution du Logiciel. Néanmoins, si de telles actions sont
+exercées contre le Licencié, le Concédant lui apportera son aide
+technique et juridique pour sa défense. Cette aide technique et
+juridique est déterminée au cas par cas entre le Concédant concerné et
+le Licencié dans le cadre d'un protocole d'accord. Le Concédant dégage
+toute responsabilité quant à l'utilisation de la dénomination du
+Logiciel par le Licencié. Aucune garantie n'est apportée quant à
+l'existence de droits antérieurs sur le nom du Logiciel et sur
+l'existence d'une marque.
+
+
+ Article 10 - RESILIATION
+
+10.1 En cas de manquement par le Licencié aux obligations mises à sa
+charge par le Contrat, le Concédant pourra résilier de plein droit le
+Contrat trente (30) jours après notification adressée au Licencié et
+restée sans effet.
+
+10.2 Le Licencié dont le Contrat est résilié n'est plus autorisé à
+utiliser, modifier ou distribuer le Logiciel. Cependant, toutes les
+licences qu'il aura concédées antérieurement à la résiliation du Contrat
+resteront valides sous réserve qu'elles aient été effectuées en
+conformité avec le Contrat.
+
+
+ Article 11 - DISPOSITIONS DIVERSES
+
+
+ 11.1 CAUSE EXTERIEURE
+
+Aucune des Parties ne sera responsable d'un retard ou d'une défaillance
+d'exécution du Contrat qui serait dû à un cas de force majeure, un cas
+fortuit ou une cause extérieure, telle que, notamment, le mauvais
+fonctionnement ou les interruptions du réseau électrique ou de
+télécommunication, la paralysie du réseau liée à une attaque
+informatique, l'intervention des autorités gouvernementales, les
+catastrophes naturelles, les dégâts des eaux, les tremblements de terre,
+le feu, les explosions, les grèves et les conflits sociaux, l'état de
+guerre...
+
+11.2 Le fait, par l'une ou l'autre des Parties, d'omettre en une ou
+plusieurs occasions de se prévaloir d'une ou plusieurs dispositions du
+Contrat, ne pourra en aucun cas impliquer renonciation par la Partie
+intéressée à s'en prévaloir ultérieurement.
+
+11.3 Le Contrat annule et remplace toute convention antérieure, écrite
+ou orale, entre les Parties sur le même objet et constitue l'accord
+entier entre les Parties sur cet objet. Aucune addition ou modification
+aux termes du Contrat n'aura d'effet à l'égard des Parties à moins
+d'être faite par écrit et signée par leurs représentants dûment habilités.
+
+11.4 Dans l'hypothèse où une ou plusieurs des dispositions du Contrat
+s'avèrerait contraire à une loi ou à un texte applicable, existants ou
+futurs, cette loi ou ce texte prévaudrait, et les Parties feraient les
+amendements nécessaires pour se conformer à cette loi ou à ce texte.
+Toutes les autres dispositions resteront en vigueur. De même, la
+nullité, pour quelque raison que ce soit, d'une des dispositions du
+Contrat ne saurait entraîner la nullité de l'ensemble du Contrat.
+
+
+ 11.5 LANGUE
+
+Le Contrat est rédigé en langue française et en langue anglaise, ces
+deux versions faisant également foi.
+
+
+ Article 12 - NOUVELLES VERSIONS DU CONTRAT
+
+12.1 Toute personne est autorisée à copier et distribuer des copies de
+ce Contrat.
+
+12.2 Afin d'en préserver la cohérence, le texte du Contrat est protégé
+et ne peut être modifié que par les auteurs de la licence, lesquels se
+réservent le droit de publier périodiquement des mises à jour ou de
+nouvelles versions du Contrat, qui posséderont chacune un numéro
+distinct. Ces versions ultérieures seront susceptibles de prendre en
+compte de nouvelles problématiques rencontrées par les logiciels libres.
+
+12.3 Tout Logiciel diffusé sous une version donnée du Contrat ne pourra
+faire l'objet d'une diffusion ultérieure que sous la même version du
+Contrat ou une version postérieure, sous réserve des dispositions de
+l'article 5.3.4.
+
+
+ Article 13 - LOI APPLICABLE ET COMPETENCE TERRITORIALE
+
+13.1 Le Contrat est régi par la loi française. Les Parties conviennent
+de tenter de régler à l'amiable les différends ou litiges qui
+viendraient à se produire par suite ou à l'occasion du Contrat.
+
+13.2 A défaut d'accord amiable dans un délai de deux (2) mois à compter
+de leur survenance et sauf situation relevant d'une procédure d'urgence,
+les différends ou litiges seront portés par la Partie la plus diligente
+devant les Tribunaux compétents de Paris.
+
+
+Version 2.0 du 2006-09-05.
diff --git a/src/Makefile b/src/Makefile
new file mode 100644
index 0000000..078bc78
--- /dev/null
+++ b/src/Makefile
@@ -0,0 +1,63 @@
+EXEC=ecoPrimer
+
+PRIMER_SRC= ecoprimer.c
+PRIMER_OBJ= $(patsubst %.c,%.o,$(PRIMER_SRC))
+
+
+SRCS= $(PRIMER_SRC)
+
+LIB= -lecoPCR -lapat -lKMRK -lz -lm
+
+LIBFILE= libapat/libapat.a \
+ libecoPCR/libecoPCR.a \
+ libKMRK/libKMRK.a
+
+
+include global.mk
+
+all: $(EXEC)
+
+
+########
+#
+# ecoPrimer compilation
+#
+########
+
+# executable compilation and link
+
+ecoPrimer: $(PRIMER_OBJ) $(LIBFILE)
+ $(CC) $(LDFLAGS) -o $@ $< $(LIBPATH) $(LIB)
+
+
+########
+#
+# library compilation
+#
+########
+
+libapat/libapat.a:
+ $(MAKE) -C libapat
+
+libecoPCR/libecoPCR.a:
+ $(MAKE) -C libecoPCR
+
+libKMRK/libKMRK.a:
+ $(MAKE) -C libKMRK
+
+
+########
+#
+# project management
+#
+########
+
+clean:
+ rm -f *.o
+ rm -f $(EXEC)
+ $(MAKE) -C libapat clean
+ $(MAKE) -C libecoPCR clean
+ $(MAKE) -C libKMRK clean
+
+
+
\ No newline at end of file
diff --git a/src/ecoprimer.c b/src/ecoprimer.c
new file mode 100644
index 0000000..e69de29
diff --git a/src/global.mk b/src/global.mk
new file mode 100644
index 0000000..4f32e2e
--- /dev/null
+++ b/src/global.mk
@@ -0,0 +1,18 @@
+MACHINE=MAC_OS_X
+LIBPATH= -Llibapat -LlibecoPCR -LlibKMRK
+MAKEDEPEND = gcc -D$(MACHINE) -M $(CPPFLAGS) -o $*.d $<
+
+CC=gcc
+CFLAGS= -W -Wall -O2 -g
+
+default: all
+
+%.o: %.c
+ $(CC) -D$(MACHINE) $(CFLAGS) -c -o $@ $<
+
+%.P : %.c
+ $(MAKEDEPEND)
+ @sed 's/\($*\)\.o[ :]*/\1.o $@ : /g' < $*.d > $@; \
+ rm -f $*.d; [ -s $@ ] || rm -f $@
+
+include $(SRCS:.c=.P)
\ No newline at end of file
diff --git a/src/libKMRK/KMRK.c b/src/libKMRK/KMRK.c
new file mode 100644
index 0000000..e768f75
--- /dev/null
+++ b/src/libKMRK/KMRK.c
@@ -0,0 +1,1009 @@
+#include "KMRK.h"
+
+#include "repseek_types.h"
+#include "memory.h"
+
+#include
+#include
+#include
+#include
+
+
+/********************************************
+ ********************************************
+ **
+ ** Declaration of static functions
+ **
+ ********************************************
+ ********************************************/
+
+
+/**
+ * @brief Allocate a couple of vector v and n of same size.
+ * Never return if memory space is not available.
+ *
+ * @param size size of the new allocated vector couple
+ * @param count number of sequence encodedable in the VN
+ * structure allocated
+ *
+ * @return a pointer to a vn_type structure
+ */
+
+static vn_type* new_vn(int32_t size,int32_t count);
+
+
+
+
+
+/**
+ * Set to 0 all elements of the n vector of a
+ * vn structure.
+ *
+ * @param x a pointer to a vn_type structure
+ */
+
+static void clearNVector(vn_type* x);
+
+
+/**
+ * Set to 0 all elements of the v vector of a
+ * vn structure.
+ *
+ * @param x a pointer to a vn_type structure
+ */
+
+static void clearVVector(vn_type* x);
+
+
+/* static int32_t *copyNVector(vn_type *x, int32_t *dest); */
+
+
+/**
+ * @brief Encode a nucleotide in a numeric format.
+ *
+ * @param nucleic a char value containing A,C,G or T.
+ * All others values induce 0 as result.
+ *
+ * @return an interger code corresponding to the nucleotide
+ */
+
+static int32_t code(char nucleic);
+
+
+/**
+ * @brief Construct linked list of similar symboles present in
+ * v part of x.
+ *
+ * Equivalent to filling of P stack in standard implementation
+ *
+ * Result is stored in n part of x.
+ *
+ * @param x a pointer to a vn_type structure
+ */
+
+static void phase1(vn_type* x);
+
+
+/* static void unroll1(vn_type* x); */
+
+
+static void phase2(vn_type* x, int32_t k);
+
+
+static void unroll2(vn_type* x);
+
+
+static void unstack(vn_type* x, int32_t k);
+
+static vn_type *hashSequence(char *sequence, int32_t lmax, int32_t *k,
+ masked_area_table_t *mask);
+
+static void clearNMark(vn_type *x);
+
+static vn_type* encodeSequence(char *sequence,
+ masked_area_table_t *mask);
+
+static void applyQuorum(vn_type *x,
+ int32_t count,
+ int32_t countSeq,
+ KMRK_QUORUM_FUNC_PTR(quorum));
+
+/********************************************
+ ********************************************
+ **
+ ** Defintion of static functions
+ **
+ ********************************************
+ ********************************************/
+
+
+
+static int32_t code(char nucleic)
+{
+
+ if(nucleic == 'A')
+ return 1;
+ if(nucleic == 'C')
+ return 2;
+ if(nucleic == 'G')
+ return 3;
+ if(nucleic == 'T')
+ return 4;
+
+ return 0;
+}
+
+
+/*static void displayVN(vn_type * x)
+{
+ int32_t i;
+
+ for (i=0; i < x->size; i++)
+ {
+ printf("%-4i => %-4i ; %-4i\n",i+1,x->v[i],x->n[i]);
+ }
+
+ printf("\n");
+}*/
+
+static void clearNVector(vn_type* x)
+{
+ memset(x->n,0,sizeof(int32_t)*x->size);
+};
+
+static void clearVVector(vn_type* x)
+{
+ memset(x->v,0,sizeof(int32_t)*x->size);
+};
+
+static void clearNMark(vn_type *x)
+{
+ int32_t i;
+ int32_t *n;
+ int32_t size;
+
+ n = x->n;
+ size = x->size;
+
+ for (i=0; i < size; i++)
+ if (n[i] < 0) n[i] = -n[i];
+
+}
+
+/*
+static int32_t *copyNVector(vn_type *x, int32_t *dest)
+{
+ int32_t* reponse;
+
+ if (dest)
+ reponse=dest;
+ else
+ KMRK_MALLOC(reponse,int32_t,x->size * sizeof(int32_t),
+ "copyNVector: I cannot not allocate a copy "
+ "of n vector");
+
+ memcpy(reponse,x->n,x->size*sizeof(int32_t));
+
+ return reponse;
+}
+*/
+
+static vn_type* new_vn(int32_t size,int32_t count)
+{
+ vn_type* reponse;
+
+ reponse = MyCalloc( 1, sizeof(vn_type)+sizeof(int32_t) * (count -1) , "vn_type: first calloc" );
+
+ reponse -> v = (int32_t *)MyCalloc(size, sizeof(int32_t),
+ "new_vn: I cannot not allocate a new VN structure");
+ reponse->n = (int32_t *)MyCalloc(size, sizeof(int32_t),
+ "new_vn: I cannot not allocate a new VN structure");
+
+
+ reponse->size = size;
+ reponse->seqCount= count;
+
+ return reponse;
+}
+
+
+
+static void phase1(vn_type* x)
+{
+
+ int32_t *v;
+ int32_t *n;
+ int32_t size;
+ int32_t i;
+ int32_t symbole;
+ int32_t oldlast;
+
+ v = GETVECTOR(x,v);
+ n = GETVECTOR(x,n);
+ size = x->size;
+
+ clearNVector(x);
+
+ for(i = 1; i <= size; i++)
+ {
+ symbole = GET(v,i);
+ if (symbole == i)
+
+ SET(n,i,i);
+
+ else if (symbole)
+ {
+ oldlast = GET(n,symbole);
+ SET(n,symbole,i);
+ SET(n,i,GET(n,oldlast));
+ SET(n,oldlast,i);
+ }
+ }
+
+}
+
+
+
+static void phase2(vn_type* x, int32_t k)
+{
+
+ int32_t *v; /* vector v of the stack 'x' */
+ int32_t *n; /* vector n of the stack 'x' */
+ int32_t size; /* size of the vector v of stack 'x' */
+ int32_t i;
+ int32_t j;
+ int32_t next;
+ int32_t jump;
+ int32_t pi;
+ int32_t nipi;
+ int32_t symbmax;
+
+ v = GETVECTOR(x,v);
+ n = GETVECTOR(x,n);
+ size = x->size - k;
+ symbmax = x->symbmax;
+
+ for (j=1; j <= symbmax; j++)
+ if (SYMBOLE(v,j) == j)
+ { /* debut de chaine rouge */
+ next = j;
+ do
+ {
+ i = next;
+ next = GET(n,i);
+
+ jump = SYMBOLE(v,i+k);
+
+ if (jump)
+ {
+ pi = GET(v,jump);
+ nipi = GET(n,pi);
+ if ((! IS_MARKED(n,pi)) ||
+ ( SYMBOLE(v,i) != SYMBOLE(v,nipi)) ||
+ ( jump != SYMBOLE(v,nipi+k)))
+ {
+ /* beginning of a green list */
+ SETMARKED(v,jump,i);
+ //MARK(v,jump);
+
+ SETMARKED(n,i,i);
+ //MARK(n,i);
+ }
+ else
+ {
+ SET(n,pi,i); /* le nouveau dernier */
+ SET(n,i,GET(n,nipi)); /* */
+ SET(n,nipi,i);
+ MARK(n,pi);
+ }
+ }
+
+ } while (( i != next ) &&
+ (next <= size));
+
+ }
+}
+
+
+static void unroll2(vn_type* x)
+{
+ int32_t *v;
+ int32_t *n;
+ int32_t size;
+ int32_t i;
+ int32_t second;
+ int32_t last;
+
+ v = GETVECTOR(x,v);
+ n = GETVECTOR(x,n);
+ size = x->size;
+
+ for(i = 1; i <= size; i++)
+ if (IS_MARKED(n,i))
+ {
+ last = GET(n,i);
+ second = GET(n,last);
+ SET(n,last,last);
+ SETMARKED(n,i,second);
+ }
+}
+
+
+
+static void unstack(vn_type* x, int32_t k)
+{
+ int32_t *v;
+ int32_t *n;
+ int32_t size;
+ int32_t i;
+ int32_t j;
+ int32_t classe=0;
+ int32_t next;
+
+
+ v = GETVECTOR(x,v);
+ n = GETVECTOR(x,n);
+ size = x->size;
+
+ clearVVector(x);
+
+ for(j = 1; j <= size; j++)
+ if (IS_MARKED(n,j))
+ {
+
+ classe=j;
+ next =j;
+
+ do
+ {
+
+ i = next;
+ next = GET(n,i);
+ SET(v,i,classe);
+
+ } while (i != next);
+ }
+
+ x->symbmax=classe;
+ x->size-=k;
+}
+
+
+static vn_type *hashSequence(char *sequence,
+ int32_t lmax,
+ int32_t *k,
+ masked_area_table_t *mask)
+{
+ vn_type *reponse;
+ int32_t ws;
+ int32_t clef;
+ int32_t *n;
+ int32_t *v;
+ int32_t size;
+ int32_t symbole;
+ int32_t word;
+ int32_t i;
+ int32_t maskZero;
+ int32_t maskClef;
+ int32_t zero;
+ int32_t count;
+ char* index;
+ int32_t symbmax;
+ int32_t posinseq;
+
+ posinseq=0;
+
+ size=0;
+ count=1;
+ symbmax = 0;
+ for (index=sequence; *index; index++)
+ {
+ size++;
+ if (*index == '@')
+ count++;
+ }
+
+ reponse = new_vn(size,count);
+
+ n = GETVECTOR(reponse,n);
+ v = reponse->v;
+
+ /* estimate hash word size */
+
+ ws = (int32_t)floor(log((double)size+1)/log(4.0));
+ if (ws > lmax)
+ ws=lmax;
+
+ if (ws <= 1)
+ {
+ *k=1;
+ return NULL;
+ }
+
+ maskClef=0;
+ maskZero=0;
+ clef =0;
+ zero =0;
+ count =0;
+
+ maskZero= (1 << ws) - 1;
+ maskClef= (1 << (ws*2)) - 1;
+
+
+ for (i=0; i < ws-1; i++)
+ {
+ if (sequence[i]=='@')
+ {
+ reponse->limits[count]=i;
+ count++;
+ posinseq=0;
+ }
+ symbole = code(sequence[i]);
+
+ if (KMRK_isMasked(mask,count,posinseq))
+ symbole=0;
+
+ clef <<= 2;
+ clef |= symbole-1;
+
+ zero <<= 1;
+ zero |= (symbole) ? 0:1;
+ posinseq++;
+ }
+
+ clef &= maskClef;
+ zero &= maskZero;
+
+ for (i=ws-1; i < size; i++)
+ {
+ if (sequence[i]=='@')
+ {
+ reponse->limits[count]=i;
+ count++;
+ posinseq=0;
+ }
+ symbole = code(sequence[i]);
+
+ if (KMRK_isMasked(mask,count,posinseq))
+ symbole=0;
+
+ clef <<= 2;
+ clef |= symbole-1;
+ clef &= maskClef;
+
+
+ zero <<= 1;
+ zero |= (symbole) ? 0:1;
+ zero &= maskZero;
+
+ word = (zero) ? 0:(clef+1);
+
+ if (word)
+ {
+ symbole = GET(n,word);
+ if (!symbole)
+ {
+ symbole = i+2-ws;
+ SET(n,word,symbole);
+ }
+ }
+ else
+ symbole=0;
+
+ if (symbole > symbmax)
+ symbmax = symbole;
+
+ v[i-ws+1]=symbole;
+ posinseq++;
+ }
+
+ reponse->limits[count]= size;
+ reponse->symbmax = symbmax;
+
+ for (i=reponse->size; i < size; i++)
+ v[i]=0;
+
+ *k=ws;
+
+ return reponse;
+}
+
+static vn_type* encodeSequence(char *sequence,masked_area_table_t *mask)
+{
+ int32_t size;
+ vn_type *reponse;
+ int32_t i;
+ int32_t nucleotide;
+ int32_t symbole;
+ int32_t *n;
+ int32_t *v;
+ int32_t count;
+ char* index;
+ int32_t symbmax;
+ int32_t posinseq;
+
+ size=0;
+ count=1;
+ posinseq=0;
+
+ for (index=sequence; *index; index++)
+ {
+ size++;
+ if (*index == '@')
+ count++;
+ }
+
+ reponse = new_vn(size,count);
+
+ n = GETVECTOR(reponse,n);
+ v = reponse->v;
+ count = 0;
+ symbmax = 0;
+
+ for (i=0; i < size; i++)
+ {
+ if (sequence[i]=='@')
+ {
+ reponse->limits[count]=i;
+ count++;
+ posinseq=0;
+ }
+ nucleotide = code(sequence[i]);
+
+ if (KMRK_isMasked(mask,count,posinseq))
+ nucleotide=0;
+
+
+ if (nucleotide)
+ {
+ symbole = GET(n,nucleotide);
+ if (!symbole)
+ {
+ symbole = i + 1;
+ SET(n,nucleotide,symbole);
+ }
+ }
+ else
+ symbole=0;
+
+ if (symbole > symbmax)
+ symbmax = symbole;
+
+ v[i]=symbole;
+ posinseq++;
+ }
+
+ reponse->limits[count]= size;
+ reponse->symbmax = symbmax;
+ return reponse;
+}
+
+
+static void applyQuorum(vn_type *x,
+ int32_t count,
+ int32_t countSeq,
+ KMRK_QUORUM_FUNC_PTR(quorum))
+{
+ int32_t *n;
+ int32_t *v;
+ int32_t size;
+ int32_t i;
+
+ n = GETVECTOR(x,n);
+ v = GETVECTOR(x,v);
+ size = x->size;
+
+ for (i=1; i < size; i++)
+ if ((IS_MARKED(n,i)) &&
+ (! quorum(x,i,count,countSeq)))
+ UNMARK(n,i);
+}
+
+
+
+
+/********************************************
+ ********************************************
+ **
+ ** Defintion of public functions
+ **
+ ********************************************
+ ********************************************/
+
+/*
+ Count the number of direct pairs in the KMR-k result
+ input 'x' is the input stack -- see KMRK.h
+ vector n is a chained list where each n[i] is the next occurence of i
+*/
+int32_t KMRK_upperCoupleCount(vn_type *x)
+{
+ int32_t i; /* i and j, dummy counters */
+ int32_t j;
+ int32_t *n; /* the n vector of stack 'x' */
+ int32_t size; /* its size */
+ int32_t reponse; /* number of pairs */
+ int32_t lllength; /* number of occurence */
+ int32_t next; /* the next occurence in vector 'n' of stack 'x' */
+ int32_t xmax; /* limit of the first sequence: from 1 to xmax */
+
+ xmax = x->limits[0];
+
+ reponse = 0;
+
+ n = GETVECTOR(x,n); /* set n as the vector x->n */
+ size = x->size; /* set its size */
+
+ for (j=1; j < xmax; j++) /* for all pos of the first sequence */
+ if (IS_MARKED(n,j)) /* check if it is marked */
+ {
+ next = j;
+ lllength = 0;
+ do
+ {
+ i = next;
+ next = GET(n,i); /* get the next occurence position */
+ lllength++;
+
+ } while ((next <= xmax) && (i != next)); /* while the next occurence are in direct seq *
+ * and not themselves */
+
+
+ reponse += lllength * (lllength - 1) / 2; /* lllength choose 2 */
+
+ }
+
+ return reponse;
+}
+
+
+/*
+ Count the number of inverted pairs in the KMR-k result
+ input 'x' is the input stack -- see KMRK.h
+ vector n is a chained list where each n[i] is the next occurence of i
+*/
+int32_t KMRK_upperInvertedCount(vn_type* x,int32_t wordsize)
+{
+ int32_t i; /* i and j, dummy counters */
+ int32_t j;
+ int32_t *n; /* the n vector of stack 'x' */
+ int32_t size; /* its size */
+ int32_t reponse; /* number of pairs with at least one inverted occurence */
+ int32_t direct; /* occurence in the direct sequence */
+ int32_t inverted; /* ccurence in the inverted sequence */
+ int32_t next; /* the next occurence in vector 'n' of stack 'x' */
+ int32_t posinv;
+ int32_t posdeb;
+
+ /*
+ if there was no inverted sequence, there is no inverted seeds
+ */
+ if ((x->seqCount == 1) ||
+ (! x->complement))
+ return 0;
+
+ reponse = 0;
+
+ n = GETVECTOR(x,n); /* set n as the vector n of stack 'x' */
+ size = x->size; /* set its size */
+
+ for (j=1; j <= x->limits[0]; j++) /* for all position of the first (direct) sequence */
+ if( IS_MARKED(n,j) ) /* if it is tagged (negative) */
+ {
+ posdeb = j;
+ next = j;
+ direct = 0;
+ inverted = 0;
+
+ do
+ {
+ i = next; /* set i as the position to check */
+ next = GET(n,i); /* next is the symbol n[i] -- pos of the next occurence */
+
+ if ((i > x->limits[0]) &&
+ (i < x->limits[1]) ) /* if it position is in seq inverted */
+ inverted++;
+ else
+ direct++;
+
+ } while (i != next); /* when the next occurence is the same pos, stop */
+
+ posinv = 2 * x->limits[0] - i -wordsize + 3;
+ if (inverted && (posinv == posdeb))
+ reponse += direct;
+ reponse += inverted * direct; /* the number of occurence for this seed. *
+ * NB: if inv is 0, there is no inv occurence */
+ }
+
+/* return (reponse+1)/2; .... weird ?? */
+
+ return reponse / 2;
+}
+
+int32_t KMRK_upperInterCount(vn_type* x,int32_t seq1,int32_t seq2,int32_t wordsize)
+{
+ int32_t i;
+ int32_t j;
+ int32_t *n;
+ int32_t size;
+ int32_t reponse;
+ int32_t seqcount1;
+ int32_t seqcount2;
+ int32_t next;
+
+ int min1,min2,max1,max2;
+
+ if (seq1 > seq2)
+ {
+ i=seq1;
+ seq1=seq2;
+ seq2=i;
+ }
+
+ if (seq1==seq2 && seq1 == 0)
+ return KMRK_upperCoupleCount(x);
+
+ if (seq1==0 && seq2==1 && x->complement)
+ return KMRK_upperInvertedCount(x,wordsize);
+
+ reponse = 0;
+
+ n = GETVECTOR(x,n);
+ size = x->size;
+
+ if (seq1==0)
+ min1=0;
+ else
+ min1 = x->limits[seq1-1];
+
+ max1 = x->limits[seq1];
+
+ min2 =x->limits[seq2-1];
+ max2 =x->limits[seq2];
+
+ for (j=1; j <= max1; j++)
+ if (IS_MARKED(n,j))
+ {
+ next = j;
+ seqcount1 = 0;
+ seqcount2 = 0;
+ do
+ {
+ i = next;
+ next = GET(n,i);
+ if ((i > min1) &&
+ (i <=max1))
+ seqcount1++;
+ else if ((i > min2) &&
+ (i <=max2))
+ seqcount2++;
+ } while ((i != next) && (next <= max2));
+
+ reponse += seqcount1 * seqcount2;
+ }
+
+ /*fprintf(stderr,"\ncouple count : %i\n",reponse);*/
+ return reponse;
+}
+
+
+void KMRK_markStart(vn_type* x)
+{
+ int32_t *v;
+ int32_t *n;
+ int32_t size;
+ int32_t i;
+
+ v = GETVECTOR(x,v);
+ n = GETVECTOR(x,n);
+ size = x->size;
+
+ for(i = 1; i <= size; i++)
+ if (SYMBOLE(v,i) == i)
+ MARK(n,i);
+ else
+ UNMARK(n,i);
+
+
+}
+
+
+int32_t KMRK_CoupleQuorum(vn_type* x,
+ int32_t pos,
+ int32_t count,
+ int32_t countSeq)
+{
+ return (GET(GETVECTOR(x,n),pos) != pos) ? 1:0;
+}
+
+int32_t KMRK_DirInvQuorum(vn_type* x,
+ int32_t pos,
+ int32_t count,
+ int32_t countSeq)
+{
+ return ((pos >= x->limits[0]) ||
+ (GET(GETVECTOR(x,n),pos) == pos)) ? 0:1;
+}
+
+int32_t KMRK_InvQuorum(vn_type* x,
+ int32_t pos,
+ int32_t count,
+ int32_t countSeq)
+{
+ return ((pos < x->limits[0]) &&
+ (GET(GETVECTOR(x,n),pos) >= x->limits[0])) ? 1:0;
+}
+
+int32_t KMRK_Int12Quorum(vn_type* x,
+ int32_t pos,
+ int32_t count,
+ int32_t countSeq)
+{
+ return ((pos < x->limits[0]) &&
+ (GET(GETVECTOR(x,n),pos) >= x->limits[0])) ? 1:0;
+}
+
+
+int32_t KMRK_IntInv12Quorum(vn_type* x,
+ int32_t pos,
+ int32_t count,
+ int32_t countSeq)
+{
+ return ((pos < x->limits[1]) &&
+ (pos >= x->limits[0]) &&
+ (GET(GETVECTOR(x,n),pos) >= x->limits[1])) ? 1:0;
+}
+
+int32_t KMRK_IntDirInv12Quorum(vn_type* x,
+ int32_t pos,
+ int32_t count,
+ int32_t countSeq)
+{
+ return ((pos < x->limits[1]) &&
+ (GET(GETVECTOR(x,n),pos) >= x->limits[1])) ? 1:0;
+}
+
+
+
+
+void KMRK_RunKMRK(vn_type *x,
+ int32_t k,
+ int32_t count,
+ int32_t countSeq,
+ KMRK_QUORUM_FUNC_PTR(quorum))
+{
+ clearNMark(x);
+ phase2(x,k);
+ applyQuorum(x,count,countSeq,quorum);
+ unroll2(x);
+ unstack(x,k);
+}
+
+vn_type *KMRK_HashOneSequence(char *sequence,
+ int32_t complement,
+ int32_t count,
+ int32_t countSeq,
+ int32_t *k,
+ KMRK_QUORUM_FUNC_PTR(quorum),
+ masked_area_table_t *mask)
+{
+
+ int32_t i;
+ int32_t *n;
+ int32_t *v;
+ int32_t size;
+ vn_type *reponse;
+ int32_t lmax;
+
+ lmax = *k;
+
+ reponse = hashSequence(sequence,lmax,k,mask);
+
+ if (!reponse)
+ {
+ reponse = encodeSequence(sequence,mask);
+ *k=1;
+ }
+
+ reponse->complement = complement;
+
+ phase1(reponse);
+
+ n = GETVECTOR(reponse,n);
+ v = GETVECTOR(reponse,v);
+ size = reponse->size;
+
+ for (i=1; i <= size; i++)
+ if (SYMBOLE(v,i) == i)
+ MARK(n,i);
+
+ applyQuorum(reponse,count,countSeq,quorum);
+
+ unroll2(reponse);
+ unstack(reponse,(*k)-1);
+
+ /* fprintf(stderr, "Hash up to size %d\n",*k); */
+
+ return reponse;
+}
+
+vn_type* KMRK_InitOneSequence(char *sequence,
+ int32_t complement,
+ int32_t count,
+ int32_t countSeq,
+ int32_t *k,
+ KMRK_QUORUM_FUNC_PTR(quorum),
+ masked_area_table_t *mask)
+{
+
+ vn_type *reponse;
+ int32_t i;
+ int32_t *n;
+ int32_t *v;
+ int32_t size;
+
+ reponse = encodeSequence(sequence,mask);
+ reponse->complement = complement;
+
+ phase1(reponse);
+
+ n = GETVECTOR(reponse,n);
+ v = GETVECTOR(reponse,v);
+ size = reponse->size;
+
+ for (i=1; i <= size; i++)
+ if (SYMBOLE(v,i) == i)
+ MARK(n,i);
+
+ applyQuorum(reponse,count,countSeq,quorum);
+
+ unroll2(reponse);
+ unstack(reponse,0);
+ *k=1;
+ return reponse;
+
+}
+
+
+
+vn_type *KMRK_RunTo(char *sequence,
+ int32_t size,
+ int32_t complement,
+ int32_t count,
+ int32_t countSeq,
+ KMRK_QUORUM_FUNC_PTR(quorum),
+ KMRK_INIT_FUNC_PTR(init,quorum),
+ masked_area_table_t *mask)
+{
+
+ int32_t k;
+ vn_type *reponse;
+
+ k=size;
+
+ reponse = init(sequence,complement,count,countSeq,&k,quorum,mask);
+
+ while(k <= size/2)
+ {
+ /*fprintf(stderr,"KMRK step %d\n",k);*/
+ KMRK_RunKMRK(reponse,k,count,countSeq,quorum);
+ k*=2;
+ }
+
+ if (k < size)
+ KMRK_RunKMRK(reponse,size - k,count,countSeq,quorum);
+
+ return reponse;
+}
+
+
+void KMRK_FreeVN(vn_type *x)
+{
+ if (x)
+ {
+ if (x->v) MyFree(x->v, x->size*sizeof(int32_t));
+ if (x->n) MyFree(x->n, x->size*sizeof(int32_t));
+ MyFree(x, 1*sizeof(vn_type));
+ }
+}
+
+
diff --git a/src/libKMRK/KMRK.h b/src/libKMRK/KMRK.h
new file mode 100644
index 0000000..fba733e
--- /dev/null
+++ b/src/libKMRK/KMRK.h
@@ -0,0 +1,309 @@
+#ifndef _KMRK_H_
+#define _KMRK_H_
+
+/********************************************
+ ********************************************
+ **
+ ** Declaration of struct
+ **
+ ********************************************
+ ********************************************/
+
+#include
+#include "repseek_types.h"
+#include "KMRK_mask.h"
+
+/**
+ * Structure used to manipulate simultanously
+ * the v and n vector
+ *
+ */
+
+typedef struct {
+
+ int32_t size; /**< size of vectors */
+ int32_t seqCount; /**< count of concatenated sequences */
+ int32_t complement; /**< if seqCount > 1 and complement !=0
+ * then second sequence is the inverted complement
+ * strand of first one */
+ int32_t symbmax;
+ int32_t* v; /**< sequence vector */
+ int32_t* n; /**< linked list vector */
+ int32_t limits[1]; /**< array of end limits of concatenated
+ * sequences in v (array size is seqCount) */
+} vn_type;
+
+
+
+/********************************************
+ ********************************************
+ **
+ ** Declaration of public macro
+ **
+ ********************************************
+ ********************************************/
+
+// Return a pointer to a vector from a vn_type structure
+#define GETVECTOR(x,vector) (((x)->vector) - 1)
+
+#define IS_MARKED(x,i) ((x)[i] < 0)
+#define MARK(x,i) ((x)[i]) = -ABS((x)[i])
+#define UNMARK(x,i) ((x)[i]) = ABS((x)[i])
+
+#define SET(x,i,v) ((x)[i]) = (v)
+
+// set and mark in one operation
+#define SETMARKED(x,i,v) ((x)[i]) = -(v)
+
+//internal macro
+#define GET(x,i) ABS((x)[i])
+
+// get symbole at position i in vector x
+#define SYMBOLE(x,i) ((IS_MARKED((x),(i))) ? (i): (GET(x,i)))
+
+
+
+/**
+ * Macro used to declare a pointer to a quorum function.
+ *
+ * @param name name of the pointer
+ *
+ */
+
+#define KMRK_QUORUM_FUNC_PTR(name) int32_t (*name)(vn_type* x, \
+ int32_t pos, \
+ int32_t count, \
+ int32_t countSeq)
+
+
+
+
+/**
+ * Macro used to declare a pointer to an initialisation function.
+ *
+ * @param name name of the pointer
+ * @param quorum name used for the quorum assiciated function
+ *
+ * @see KMRK_QUORUM_FUNC_PTR
+ *
+ */
+
+#define KMRK_INIT_FUNC_PTR(name,quorum) vn_type* (*name)(char *sequence, \
+ int32_t complement, \
+ int32_t count, \
+ int32_t countSeq, \
+ int32_t *k, \
+ KMRK_QUORUM_FUNC_PTR(quorum),\
+ masked_area_table_t *mask)
+
+
+/********************************************
+ ********************************************
+ **
+ ** Declaration of public functions
+ **
+ ********************************************
+ ********************************************/
+
+
+
+
+
+/**
+ * Initialise a vn_type record from one sequence to run KMRK algorithm
+ *
+ * @param sequence pointer to a C string containing the sequence
+ * @param complement != 0 means that seq one and two are the two strands of
+ * the same sequence.
+ * @param count parameter count passed to the quorun function
+ * @param countSeq parametter countSeq passed to the quorun function
+ * @param k length of the word represented by each symbole of v.
+ * k is an output parametter
+ * @param quorum pointer to a quorum function
+ *
+ * @return a pointer to vn_type structure correctly initialized
+ * to be used by KMRK_RunKMRK
+ *
+ * @see KMRK_HashOneSequence
+ */
+
+vn_type* KMRK_InitOneSequence(char *sequence,
+ int32_t complement,
+ int32_t count,
+ int32_t countSeq,
+ int32_t *k,
+ KMRK_QUORUM_FUNC_PTR(quorum),
+ masked_area_table_t *mask);
+
+
+
+
+
+/**
+ * Initialise a vn_type record from one sequence to run KMRK algorithme.
+ * In more than KMRK_InitOneSequence, KMRK_HashOneSequence construct
+ * word of len k with an hash algorithm. k used is a function of
+ * sequence size and alphabet size. If calculed k is superior to lmax
+ * then k = lmax.
+ *
+ * @param sequence pointer to a C string containing the sequence
+ * @param complement != 0 means that seq one and two are the two strands of
+ * the same sequence.
+ * @param count parametter count passed to the quorun function
+ * @param countSeq parametter countSeq passed to the quorun function
+ * @param k maximum length of the created word (input)
+ * length of the word represented by each symbole
+ * of v (output).
+ * @param quorum pointer to a quorum function
+ *
+ * @return a pointer to vn_type structure correctly initialized
+ * to be used by KMRK_RunKMRK
+ *
+ * @see KMRK_InitOneSequence
+ */
+
+vn_type* KMRK_HashOneSequence(char *sequence,
+ int32_t complement,
+ int32_t count,
+ int32_t countSeq,
+ int32_t *k,
+ KMRK_QUORUM_FUNC_PTR(quorum),
+ masked_area_table_t *mask);
+
+
+/**
+ * An example of quorum function testing than a factor is
+ * present a least two times. Because of definition of this
+ * quorum function count and countSeq parametter have no meanning
+ * in this instance of quorum function
+ *
+ * @param x a pointer to vn_type structure to check
+ * @param pos position in n of the beginning of the linked list to test
+ * @param count minimal number of occurence of factor
+ * @param countSeq minimal number of sequences concerned
+ *
+ * @return 1 if quorum is ok 0 otherwise.
+ */
+
+int32_t KMRK_CoupleQuorum(vn_type* x,
+ int32_t pos,
+ int32_t count,
+ int32_t countSeq);
+
+
+/**
+ * An example of quorum function testing than a factor is
+ * present a least two times in the direct strand of a sequence or
+ * at least one time in the direct strand and one time in the reverse
+ * strand. Because of definition of this
+ * quorum function count and countSeq parametter have no meanning
+ * in this instance of quorum function
+ *
+ * @param x a pointer to vn_type structure to check
+ * @param pos position in n of the beginning of the linked list to test
+ * @param count minimal number of occurence of factor
+ * @param countSeq minimal number of sequences concerned
+ *
+ * @return 1 if quorum is ok 0 otherwise.
+ */
+
+int32_t KMRK_DirInvQuorum(vn_type* x,
+ int32_t pos,
+ int32_t count,
+ int32_t countSeq);
+
+/**
+ * An example of quorum function testing than a factor is
+ * present a least one time in the direct strand and one time in the reverse
+ * strand. Because of definition of this
+ * quorum function count and countSeq parametter have no meanning
+ * in this instance of quorum function
+ *
+ * @param x a pointer to vn_type structure to check
+ * @param pos position in n of the beginning of the linked list to test
+ * @param count minimal number of occurence of factor
+ * @param countSeq minimal number of sequences concerned
+ *
+ * @return 1 if quorum is ok 0 otherwise.
+ */
+
+int32_t KMRK_InvQuorum(vn_type* x,
+ int32_t pos,
+ int32_t count,
+ int32_t countSeq);
+
+int32_t KMRK_Int12Quorum(vn_type* x,
+ int32_t pos,
+ int32_t count,
+ int32_t countSeq);
+
+int32_t KMRK_IntInv12Quorum(vn_type* x,
+ int32_t pos,
+ int32_t count,
+ int32_t countSeq);
+
+int32_t KMRK_IntDirInv12Quorum(vn_type* x,
+ int32_t pos,
+ int32_t count,
+ int32_t countSeq);
+/**
+ * realize one cycle of KMR.
+ *
+ * @param x a pointer to vn_type created by an intialisation
+ * function or returned by this function.
+ * @param k step used to join two words
+ * @param count parametter count passed to the quorun function
+ * @param countSeq parametter countSeq passed to the quorun function
+ * @param KMRK_QUORUM_FUNC_PTR quorum pointer to a quorum function
+ */
+
+void KMRK_RunKMRK(vn_type *x,
+ int32_t k,
+ int32_t count,
+ int32_t countSeq,
+ KMRK_QUORUM_FUNC_PTR(quorum));
+
+
+
+
+/**
+ * realises serveral run of KMR cycle to make from a sequence
+ * a vn_type structure describing sequences of factors of a precise size.
+ *
+ * @param sequence pointer to a C string containing the sequence
+ * @param size word size to construct
+ * @param count parametter count passed to the quorun function
+ * @param countSeq parametter countSeq passed to the quorun function
+ * @param quorum pointer to a quorum function
+ * @param init pointer to a initialisation function
+ *
+ * @return a vn_type pointer to a structure containing sequences of factors
+ */
+
+vn_type *KMRK_RunTo(char *sequence,
+ int32_t size,
+ int32_t complement,
+ int32_t count,
+ int32_t countSeq,
+ KMRK_QUORUM_FUNC_PTR(quorum),
+ KMRK_INIT_FUNC_PTR(init,quorum),
+ masked_area_table_t *mask);
+
+
+
+/**
+ * free memory associated to a vn_type pointer
+ *
+ * @param x a pointer to vn_type structure
+ */
+
+void KMRK_FreeVN(vn_type *x);
+
+
+int32_t KMRK_upperCoupleCount(vn_type *x);
+int32_t KMRK_upperInvertedCount(vn_type* x,int32_t wordsize);
+int32_t KMRK_upperInterCount(vn_type* x,int32_t seq1,int32_t seq2,int32_t wordsize);
+
+void KMRK_markStart(vn_type* x);
+
+#endif //_KMRK_H_
diff --git a/src/libKMRK/KMRK_Seeds.c b/src/libKMRK/KMRK_Seeds.c
new file mode 100644
index 0000000..8083d5c
--- /dev/null
+++ b/src/libKMRK/KMRK_Seeds.c
@@ -0,0 +1,908 @@
+#include "KMRK_Seeds.h"
+#include "memory.h"
+#include
+#include
+#include "sequence.h"
+
+
+static void SetMultipleLenInvSeeds(SmallSeed_type* seeds,
+ int32_t nseeds,
+ int32_t wordSize,
+ int8_t same,
+ AllSeeds_type *PtrAllSeeds);
+
+
+/*
+ Concatenate
+ DirSeq\0InvSeq\0
+*/
+static char* makeDirInvSeq(char* seq, int32_t size)
+{
+ char *SeqInv;
+
+ seq = (char *)MyRealloc( (void *)seq, (size*2+2)*sizeof(char),
+ (size+1)* sizeof(char), "Cannot allocate space for reverse sequence");
+
+ SeqInv= seq + size + 1;
+ seq[size]=0;
+ invseq(seq, SeqInv);
+ seq[size]='@';
+ SeqInv[size]=0;
+
+ return seq;
+}
+
+/*
+ Merge the seq1 with seq2
+*/
+static char *merge2seq(char* seq1, char* seq2,
+ int32_t size1, int32_t size2)
+{
+ char * dest;
+
+ seq1 = (char *)MyRealloc((void *)seq1, (size1+size2+2) *sizeof(char),
+ (size1+1)*sizeof(char), "Cannot allocate space for reverse sequence");
+
+ dest = seq1 + size1 + 1;
+ seq1[size1]='@';
+ memcpy(dest,seq2,size2);
+ dest[size2]=0;
+
+ return seq1;
+}
+
+static int32_t dirDelta(SmallSeed_type* seed)
+{
+ return seed->pos2 - seed->pos1;
+}
+
+void KMRK_SetMultipleLenDirSeeds(SmallSeed_type* seeds,
+ int32_t nseeds,
+ int32_t wordSize,
+ AllSeeds_type *PtrAllSeeds)
+{
+
+ int32_t i,j; /* dummy counters j=kept seeds ; i = current seed */
+ int32_t curLen=wordSize; /* Length of the current seed */
+ int32_t delta;
+ SmallSeed_type *mainSeed;
+ SmallSeed_type *curSeed;
+ int32_t add;
+
+/* fprintf(stderr,"New Version\n");*/
+
+ KMRK_sortSeeds(seeds,nseeds,KMRK_cmpDeltaSeedsPos);
+
+ for(j=0,mainSeed=seeds ;
+ j < nseeds;
+ j=i,mainSeed=curSeed)
+ {
+ /* foreach seed */
+ delta = dirDelta(mainSeed);
+ curLen=wordSize;
+
+ for (i=j+1,curSeed=mainSeed+1;
+ i < nseeds &&
+ dirDelta(curSeed)==delta &&
+ (curSeed->pos1 - mainSeed->pos1) <= curLen;
+ i++,curSeed++)
+ {
+ add=wordSize - mainSeed->pos1 - curLen + curSeed->pos1;
+ if (add < 0) add = 0;
+ curLen+=add;
+ }
+
+ KMRK_pushSeed(PtrAllSeeds,
+ seeds[j].pos1,seeds[j].pos2,
+ curLen,
+ 1);
+ }
+}
+
+static int32_t invDelta(SmallSeed_type* seed)
+{
+ return seed->pos2 + seed->pos1;
+}
+
+static void SetMultipleLenInvSeeds(SmallSeed_type* seeds,
+ int32_t nseeds,
+ int32_t wordSize,
+ int8_t same,
+ AllSeeds_type *PtrAllSeeds)
+{
+
+ int32_t i,j; /* dummy counters j=kept seeds ; i = current seed */
+ int32_t curLen=wordSize; /* Length of the current seed */
+ int32_t delta;
+ int32_t pos2;
+ SmallSeed_type *mainSeed;
+ SmallSeed_type *curSeed;
+ int32_t add;
+
+/* fprintf(stderr,"New Version\n");*/
+
+ KMRK_sortSeeds(seeds,nseeds,KMRK_cmpDeltaInvSeedsPos);
+
+ for(j=0,mainSeed=seeds ;
+ j < nseeds;
+ j=i,mainSeed=curSeed)
+ {
+ /* foreach seed */
+ delta = invDelta(mainSeed);
+ curLen=wordSize;
+
+ for (i=j+1,curSeed=mainSeed+1;
+ i < nseeds &&
+ invDelta(curSeed)==delta &&
+ (curSeed->pos1 - mainSeed->pos1) <= curLen;
+ i++,curSeed++)
+ {
+ add=wordSize - mainSeed->pos1 - curLen + curSeed->pos1;
+ if (add < 0) add = 0;
+ curLen+=add;
+ }
+
+ if ( same && (seeds[j].pos1+curLen>= seeds[i-1].pos2))
+ {
+ curLen = (seeds[i-1].pos2 - seeds[j].pos1 + 1) * 2;
+ pos2 = seeds[j].pos1;
+ }
+ else
+ pos2 = seeds[i-1].pos2;
+
+ KMRK_pushSeed(PtrAllSeeds,
+ seeds[j].pos1,pos2,
+ curLen,
+ 0);
+ }
+}
+
+AllSeeds_type *KMRK_allocSeeds(AllSeeds_type *AllSeeds,
+ int32_t size,
+ int8_t opt_dir,
+ int8_t opt_inv)
+{
+
+ AllSeeds_type *reponse;
+
+ if(opt_inv != 1 && opt_dir != 1)
+ {
+ fprintf(stderr,"AllocSeeds: requiere at least "
+ "one of opt_dir and opt_inv to be 1\n");
+ exit(4);
+ }
+
+ reponse = AllSeeds;
+
+ if (!reponse)
+ reponse = MyCalloc( 1, sizeof(AllSeeds_type),"KMRK_allocSeeds: cannot allocate new data structure");
+
+ if(opt_dir)
+ {
+ if (reponse->dirSeeds==NULL)
+ reponse->dirSeeds = (Seed_type *)MyCalloc( size,sizeof(Seed_type),"KMRK_allocSeeds: cannot allocate new data structure");
+ else
+ {
+ if(size)
+ reponse->dirSeeds = (Seed_type *)MyRealloc( (void *)reponse->dirSeeds,
+ size*sizeof(Seed_type), reponse->cDirSeeds*sizeof(Seed_type),
+ "allocSeeds: cannot reallocate data structure" );
+ else
+ {
+ MyFree(reponse->dirSeeds, reponse->cDirSeeds*sizeof(Seed_type));
+ reponse->dirSeeds=NULL;
+ reponse->cDirSeeds=0;
+ }
+ }
+
+ reponse->cDirSeeds=size;
+ }
+
+
+ if(opt_inv)
+ {
+ if (reponse->invSeeds==NULL)
+ reponse->invSeeds = (Seed_type *)MyCalloc( size, sizeof(Seed_type),"KMRK_allocSeeds: cannot allocate new data structure");
+ else
+ {
+ if(size)
+ reponse->invSeeds = (Seed_type *)MyRealloc( (void *)reponse->invSeeds,
+ size*sizeof(Seed_type), reponse->cInvSeeds*sizeof(Seed_type),
+ "allocSeeds: cannot reallocate data structure" );
+ else
+ {
+ MyFree(reponse->invSeeds, reponse->cInvSeeds*sizeof(Seed_type));
+ reponse->invSeeds=NULL;
+ reponse->cInvSeeds=0;
+ }
+ }
+
+ reponse->cInvSeeds=size;
+ }
+
+
+ return reponse;
+}
+
+void KMRK_freeSeeds(AllSeeds_type *AllSeeds)
+{
+ if (!AllSeeds)
+ return;
+
+ if (AllSeeds->dirSeeds)
+ MyFree(AllSeeds->dirSeeds, AllSeeds->cDirSeeds*sizeof(Seed_type) );
+ AllSeeds->dirSeeds=NULL;
+
+ if (AllSeeds->invSeeds)
+ MyFree(AllSeeds->invSeeds, AllSeeds->cInvSeeds*sizeof(Seed_type) );
+ AllSeeds->dirSeeds=NULL;
+
+ MyFree(AllSeeds, 1*sizeof( AllSeeds_type ) );
+}
+
+void KMRK_compactSeeds(AllSeeds_type *AllSeeds)
+{
+ if (AllSeeds)
+ {
+ if (AllSeeds->dirSeeds)
+ KMRK_allocSeeds(AllSeeds,
+ AllSeeds->nDirSeeds,
+ 1,
+ 0);
+
+ if (AllSeeds->invSeeds)
+ KMRK_allocSeeds(AllSeeds,
+ AllSeeds->nInvSeeds,
+ 0,
+ 1);
+
+ }
+}
+
+
+void KMRK_pushSeed(AllSeeds_type *AllSeeds,
+ int32_t pos1,
+ int32_t pos2,
+ int32_t length,
+ int8_t dir)
+{
+ Seed_type* stack;
+ int32_t maxcount;
+ int32_t index;
+
+ if (dir)
+ {
+ dir = 1;
+ stack = AllSeeds->dirSeeds;
+ maxcount = AllSeeds->cDirSeeds;
+ index = AllSeeds->nDirSeeds;
+ }
+ else
+ {
+ dir = 0;
+ stack = AllSeeds->invSeeds;
+ maxcount = AllSeeds->cInvSeeds;
+ index = AllSeeds->nInvSeeds;
+ }
+
+ if (index == maxcount)
+ {
+ (void) KMRK_allocSeeds(AllSeeds,
+ maxcount * 2,
+ dir,
+ !dir);
+
+ if (dir)
+ stack = AllSeeds->dirSeeds;
+ else
+ stack = AllSeeds->invSeeds;
+ }
+
+ stack+=index;
+
+ stack->pos1 = pos1;
+ stack->pos2 = pos2;
+ stack->length = length;
+
+ if (dir)
+ AllSeeds->nDirSeeds++;
+ else
+ AllSeeds->nInvSeeds++;
+
+}
+
+
+
+
+AllSeeds_type* KMRK_enumerateDirectCouple(AllSeeds_type* Seeds,
+ int32_t expected,
+ int32_t wordSize,
+ vn_type* stack,
+ int32_t seq)
+{
+
+ int32_t xmin;
+ int32_t xmax;
+ int32_t i;
+ int32_t j;
+ int32_t k;
+ int32_t next;
+ int32_t* n;
+ int32_t nseeds;
+
+ SmallSeed_type *list;
+
+ list = (SmallSeed_type *)MyCalloc( expected, sizeof(SmallSeed_type) ,
+ "KMRK_enumerateDirectCouple: cannot allocate DirectCouple memory");
+
+ nseeds = 0;
+
+ n = GETVECTOR(stack,n);
+
+ if (seq)
+ xmin = stack->limits[seq-1];
+ else
+ xmin = 0;
+
+ xmax = stack->limits[seq];
+
+ for (i=1; i <= xmax; i++)
+ if (IS_MARKED(n,i)) /* Check begining of chained list */
+ {
+ /* Look for begining of sequence of interest */
+ for( ;(i <= xmin) && (i != GET(n,i));
+ i=GET(n,i));
+
+ /* for each factor in sequence of interest */
+ for (j=i;
+ (j != GET(n,j)) && (j <= xmax);
+ j = GET(n,j))
+ {
+ next = GET(n,j);
+ if (next <= xmax)
+ do
+ {
+ k = next;
+ next = GET(n,k);
+ list[nseeds].pos1 = j-1;
+ list[nseeds].pos2 = k-1;
+ nseeds++;
+ } while ((k!=next) && (next <= xmax));
+ }
+ } ;
+
+ Seeds = KMRK_allocSeeds(Seeds,
+ expected/20+1,
+ 1,0);
+
+/* fprintf(stderr,"Expected direct couple : %d\n",expected);*/
+
+ KMRK_SetMultipleLenDirSeeds(list,nseeds,wordSize,Seeds);
+ MyFree(list, expected*sizeof(SmallSeed_type) );
+ KMRK_compactSeeds(Seeds);
+
+ return Seeds;
+}
+
+
+/*
+ From KMR-K Stacks to SmallSeeds
+*/
+AllSeeds_type* KMRK_enumerateInvertedCouple(AllSeeds_type* Seeds,
+ int32_t expected,
+ int32_t wordSize,
+ vn_type* stack)
+{
+ int32_t xmax;
+ int32_t invmax;
+ int32_t posinv;
+ int32_t i;
+ int32_t j;
+ int32_t k;
+ int32_t memk;
+ int32_t* n;
+ int32_t next;
+
+ int32_t nseeds; /* number of seeds */
+
+ SmallSeed_type *list; /* seed list with only pos1 and pos2 --simple output from kmrk */
+
+
+ list = (SmallSeed_type *)MyCalloc( expected, sizeof(SmallSeed_type) ,
+ "KMRK_enumerateInvertedCouple: cannot allocate InvertedCouple memory");
+
+ nseeds = 0;
+
+ n = GETVECTOR(stack,n);
+
+ xmax = stack->limits[0];
+ invmax = stack->limits[1];
+
+ for (i=1; i <= xmax; i++)
+ if (IS_MARKED(n,i))
+ {
+ for(memk=i ;
+ (memk < xmax) &&
+ memk != GET(n,memk) &&
+ (memk <= invmax);
+ memk=GET(n,memk));
+
+ if ((memk > xmax) && (memk <= invmax))
+ for (j=i;
+ (j <= xmax) && (j != GET(n,j));
+ j=GET(n,j))
+ {
+ next = memk;
+ do
+ {
+ k = next;
+ next = GET(n,k);
+ posinv = 2 * xmax - k -wordSize + 3;
+ if (j <= posinv)
+ {
+ list[nseeds].pos1=j-1;
+ list[nseeds].pos2=posinv-1;
+ nseeds++;
+ }
+ } while ((next <= invmax) && (k != next));
+ }
+ }
+
+ Seeds = KMRK_allocSeeds(Seeds,
+ expected/20+1,
+ 0,1);
+
+/* fprintf(stderr,"Expected inverted couple : %d\n",expected);*/
+
+ SetMultipleLenInvSeeds(list,nseeds,wordSize,1,Seeds); /* turn the Small seeds into merged seeds */
+ MyFree(list, expected*sizeof(SmallSeed_type) );
+ KMRK_compactSeeds(Seeds);
+
+
+ return Seeds;
+}
+
+AllSeeds_type* KMRK_enumerateInterCouple(AllSeeds_type* Seeds,
+ int32_t seq1,
+ int32_t seq2,
+ int32_t expected,
+ int32_t wordSize,
+ vn_type* stack)
+{
+ int32_t xmin;
+ int32_t xmax;
+ int32_t ymax;
+ int32_t ymin;
+ int32_t pos1;
+ int32_t pos2;
+ int32_t i;
+ int32_t j;
+ int32_t k;
+ int32_t memj;
+ int32_t memk;
+ int32_t* n;
+ int32_t next;
+
+ int32_t nseeds;
+
+
+ SmallSeed_type *list;
+ nseeds=0;
+
+ list = (SmallSeed_type *)MyCalloc( expected, sizeof(SmallSeed_type) ,
+ "KMRK_enumerateInterCouple: cannot allocate InterCouple memory");
+
+ n = GETVECTOR(stack,n);
+
+ if (seq1==0)
+ xmin=0;
+ else
+ xmin = stack->limits[seq1-1];
+
+ xmax = stack->limits[seq1];
+ ymin = stack->limits[seq2-1];
+ ymax = stack->limits[seq2];
+
+ for (i=1; i <= xmax; i++)
+ if (IS_MARKED(n,i))
+ {
+ for(memj=i ;
+ (memj < xmin) &&
+ memj != GET(n,memj);
+ memj=GET(n,memj));
+
+ if ((memj > xmin) && (memj <= xmax))
+ {
+ for(memk=memj ;
+ (memk < ymin) &&
+ memk != GET(n,memk);
+ memk=GET(n,memk));
+
+ if ((memk > ymin) && (memk <= ymax))
+ for (j=memj;
+ (j <= xmax) && (j != GET(n,j));
+ j=GET(n,j))
+ {
+ next = memk;
+ do
+ {
+ k = next;
+ next = GET(n,k);
+ if (seq1 > 0)
+ pos1 = j - xmin - 1;
+ else
+ pos1 = j;
+ pos2 = k - ymin - 1;
+ list[nseeds].pos1 = pos1 - 1;
+ list[nseeds].pos2 = pos2 - 1;
+ nseeds++;
+ } while ((next <= ymax) && (k != next));
+ }
+ }
+ }
+
+
+ Seeds = KMRK_allocSeeds(Seeds,
+ expected/20+1,
+ 1,0);
+
+ /* fprintf(stderr,"Expected inter-direct couple : %d\n",expected);*/
+ KMRK_SetMultipleLenDirSeeds(list,nseeds,wordSize,Seeds);
+
+ MyFree(list, expected*sizeof(SmallSeed_type) );
+
+ KMRK_compactSeeds(Seeds);
+
+ return Seeds;
+}
+
+AllSeeds_type* KMRK_enumerateInterInvertedCouple(AllSeeds_type* Seeds,
+ int32_t seq2,
+ int32_t expected,
+ int32_t wordSize,
+ vn_type* stack)
+
+{
+ int32_t xmin;
+ int32_t xmax;
+ int32_t ymax;
+ int32_t ymin;
+ int32_t posinv;
+ int32_t pos2;
+ int32_t i;
+ int32_t j;
+ int32_t k;
+ int32_t memj;
+ int32_t memk;
+ int32_t* n;
+ int32_t next;
+
+ int32_t nseeds;
+
+ SmallSeed_type *list;
+
+ list = (SmallSeed_type *)MyCalloc( expected, sizeof(SmallSeed_type) ,
+ "KMRK_enumerateInterCouple: cannot allocate InterCouple memory");
+
+
+ nseeds = 0;
+ n = GETVECTOR(stack,n);
+
+ if (seq2 < 2)
+ {
+ fprintf(stderr,"enumerateInterInvertedCouple: seq2 must be differente to 0");
+ exit(4);
+ }
+
+
+ xmin = stack->limits[0];
+ xmax = stack->limits[1];
+
+ ymin = stack->limits[seq2-1];
+ ymax = stack->limits[seq2];
+
+ Seeds = KMRK_allocSeeds(Seeds,
+ expected,
+ 0,1);
+
+ for (i=1; i <= xmax; i++)
+ if (IS_MARKED(n,i))
+ {
+ for(memj=i ;
+ (memj < xmin) &&
+ memj != GET(n,memj);
+ memj=GET(n,memj));
+
+ if ((memj > xmin) && (memj <= xmax))
+ {
+ for(memk=memj ;
+ (memk < ymin) &&
+ memk != GET(n,memk);
+ memk=GET(n,memk));
+
+ if ((memk > ymin) && (memk <= ymax))
+ for (j=memj;
+ (j <= xmax) && (j != GET(n,j));
+ j=GET(n,j))
+ {
+ next = memk;
+ do
+ {
+ k = next;
+ next = GET(n,k);
+ posinv = 2 * xmin - j -wordSize + 3;
+ pos2 = k - ymin - 1;
+ list[nseeds].pos1=posinv-1;
+ list[nseeds].pos2=pos2-1;
+ nseeds++;
+ } while ((next <= ymax) && (k != next));
+ }
+ }
+ }
+
+ Seeds = KMRK_allocSeeds(Seeds,
+ expected/20+1,
+ 0,1);
+
+/* fprintf(stderr,"Expected inter-inverted couple : %d\n",expected);*/
+ SetMultipleLenInvSeeds(list,nseeds,wordSize,0,Seeds);
+ MyFree(list, expected* sizeof(SmallSeed_type) );
+ KMRK_compactSeeds(Seeds);
+
+
+ return Seeds;
+}
+
+
+int32_t KMRK_cmpSeedsPos(SmallSeed_type *s1, SmallSeed_type *s2)
+{
+ if (s1->pos1==s2->pos1)
+ return s1->pos2 - s2->pos2;
+ else
+ return s1->pos1 - s2->pos1;
+}
+
+int32_t KMRK_cmpDeltaSeedsPos(SmallSeed_type *s1, SmallSeed_type *s2)
+{
+ int32_t delta1 = s1->pos2-s1->pos1;
+ int32_t delta2 = s2->pos2-s2->pos1;
+
+ if (delta1==delta2)
+ return s1->pos1 - s2->pos1;
+ else
+ return delta1 - delta2;
+}
+
+int32_t KMRK_cmpDeltaInvSeedsPos(SmallSeed_type *s1, SmallSeed_type *s2)
+{
+ int32_t delta1 = s1->pos2+s1->pos1;
+ int32_t delta2 = s2->pos2+s2->pos1;
+
+ if (delta1==delta2)
+
+ return s1->pos1 - s2->pos1;
+ else
+ return delta1 - delta2;
+}
+
+
+void KMRK_sortSeeds(SmallSeed_type* seeds,
+ int32_t nseeds,
+ KMRK_SORT_SEEDS_FUNC_PTR(compare))
+{
+
+ qsort(seeds,
+ nseeds,
+ sizeof(SmallSeed_type),
+ (int (*)(const void *, const void *))compare);
+}
+
+AllSeeds_type* KMRK_get_seeds(char **seq,
+ int32_t SimpleSeqLen,
+ int16_t Lmin,
+ int8_t opt_dir,
+ int8_t opt_inv,
+ int8_t opt_verbose,
+ masked_area_table_t *mask)
+{
+ AllSeeds_type* AllSeeds;
+ char *SeqDir = *seq;
+ vn_type * stacks;
+ int32_t dirExpect=0;
+ int32_t invExpect=0;
+ KMRK_QUORUM_FUNC_PTR(quorum);
+
+ if(opt_inv != 1 &&
+ opt_dir != 1)
+ {
+ fprintf(stderr,
+ "get_seeds: requiered at least "
+ "opt_dir or opt_inv to be 1\n");
+ exit(4);
+ }
+
+ if(opt_inv)
+ SeqDir = makeDirInvSeq(SeqDir,SimpleSeqLen); /* create a sequence with "DirSeq\0InvSeq\0" */
+
+ if (opt_inv){ /* Are we interested in dir, inv or both ? */
+ if (opt_dir)
+ quorum = KMRK_DirInvQuorum;
+ else
+ quorum = KMRK_InvQuorum;
+ }
+ else
+ quorum = KMRK_CoupleQuorum;
+
+ stacks = KMRK_RunTo(SeqDir,
+ Lmin,
+ opt_inv,
+ 2,
+ 1,
+ quorum,
+ KMRK_HashOneSequence,
+ mask);
+
+ invExpect =0;
+
+ KMRK_markStart(stacks);
+
+ if (opt_inv)
+ {
+ SeqDir = (char *)MyRealloc( (void *)SeqDir, (SimpleSeqLen+1)*sizeof(char),
+ (2*SimpleSeqLen+2)*sizeof(char) , "KRMK_get_seeds: Cannot shrink memory"); /* reset mem to a sigle sequence */
+ SeqDir[SimpleSeqLen]=0;
+ }
+
+ if(opt_inv)
+ invExpect = KMRK_upperInvertedCount(stacks,Lmin);
+
+ if(opt_dir)
+ dirExpect = KMRK_upperCoupleCount(stacks);
+
+
+ AllSeeds = NULL;
+
+
+ MyFree(stacks->v, stacks->size * sizeof(int32_t));
+ stacks->v=NULL;
+
+
+ if (opt_dir)
+ AllSeeds = KMRK_enumerateDirectCouple(AllSeeds,dirExpect,Lmin ,stacks,0);
+
+ if (opt_inv)
+ AllSeeds = KMRK_enumerateInvertedCouple(AllSeeds,invExpect,Lmin,stacks);
+
+ KMRK_FreeVN(stacks);
+
+ *seq = SeqDir;
+
+ return AllSeeds;
+}
+
+AllSeeds_type* KMRK_get_seeds_2seqs(char **seq1,
+ char **seq2,
+ int32_t size1,
+ int32_t size2,
+ int16_t Lmin,
+ int8_t opt_dir,
+ int8_t opt_inv,
+ int8_t opt_verbose,
+ masked_area_table_t *mask)
+{
+
+ AllSeeds_type* AllSeeds;
+ char *sequence1 = *seq1;
+ char *sequence2 = *seq2;
+ vn_type * stacks;
+ int32_t dirExpect=0;
+ int32_t invExpect=0;
+ KMRK_QUORUM_FUNC_PTR(quorum);
+ int32_t sizef;
+
+ if(opt_inv != 1 &&
+ opt_dir != 1)
+ {
+ fprintf(stderr,
+ "get_seeds_2seqs: requiered at least "
+ "opt_dir or opt_inv to be 1\n");
+ exit(4);
+ }
+
+ sizef = size1;
+
+ if(opt_inv)
+ {
+ sequence1 = makeDirInvSeq(sequence1,size1);
+ sizef+=(1+size1);
+ }
+
+ sequence1 = merge2seq(sequence1,sequence2,sizef,size2);
+
+ if (opt_inv)
+ if (opt_dir)
+ quorum = KMRK_IntDirInv12Quorum;
+ else
+ quorum = KMRK_IntInv12Quorum;
+ else
+ quorum = KMRK_Int12Quorum;
+
+ stacks = KMRK_RunTo(sequence1,
+ Lmin,
+ opt_inv,
+ 2,
+ 2,
+ quorum,
+ KMRK_HashOneSequence,
+ mask);
+
+ KMRK_markStart(stacks);
+
+ sequence1= (char *)MyRealloc(
+ (void *)sequence1,
+ (size1+1)*sizeof(char),
+ (sizef+size2+2)*sizeof(char),
+ "KMRK_get_seeds_2seqs: shrink memory from 3N to 1N... ???");
+
+ sequence1[size1]=0;
+
+ if (opt_dir){
+ if (opt_inv)
+ dirExpect = KMRK_upperInterCount(stacks,0,2,Lmin);
+ else
+ dirExpect = KMRK_upperInterCount(stacks,0,1,Lmin);
+ }
+ if (opt_inv)
+ invExpect = KMRK_upperInterCount(stacks,1,2,Lmin);
+
+ AllSeeds = NULL;
+ MyFree(stacks->v, stacks->size*sizeof(int32_t));
+ stacks->v=NULL;
+
+ if (opt_dir){
+ if (opt_inv)
+ AllSeeds = KMRK_enumerateInterCouple(AllSeeds,
+ 0,2,
+ dirExpect,
+ Lmin ,
+ stacks);
+ else
+ AllSeeds = KMRK_enumerateInterCouple(AllSeeds,
+ 0,1,
+ dirExpect,
+ Lmin ,
+ stacks);
+
+ }
+ if (opt_inv)
+ AllSeeds = KMRK_enumerateInterInvertedCouple(AllSeeds,
+ 2,
+ invExpect,
+ Lmin ,
+ stacks);
+
+
+ KMRK_FreeVN(stacks);
+
+ *seq1 = sequence1;
+
+ return AllSeeds;
+}
+
+#define SIGN(x) (((x)<0) ? -1:(((x)>0) ? 1:0))
+
+static int32_t compareSeedsByPos(Seed_type* s1,Seed_type* s2)
+{
+ if (s1->pos1 == s2->pos1)
+ return SIGN(s1->pos2 - s2->pos2);
+ else
+ return SIGN(s1->pos1 - s2->pos1);
+}
+
+void KMRK_sortSeedsByPos(Seed_type* seeds, int32_t count)
+{
+ qsort(seeds,
+ count,
+ sizeof(Seed_type),
+ (int (*)(const void *, const void *))compareSeedsByPos);
+};
diff --git a/src/libKMRK/KMRK_Seeds.h b/src/libKMRK/KMRK_Seeds.h
new file mode 100644
index 0000000..a97fcab
--- /dev/null
+++ b/src/libKMRK/KMRK_Seeds.h
@@ -0,0 +1,126 @@
+#ifndef KMRK_Seeds_h
+#define KMRK_Seeds_h
+
+/********************************************
+ ********************************************
+ **
+ ** Declaration of struct
+ **
+ ********************************************
+ ********************************************/
+
+#include
+#include
+#include "KMRK.h"
+
+
+
+
+#define KMRK_SORT_SEEDS_FUNC_PTR(name) int32_t (*name)(SmallSeed_type*, \
+ SmallSeed_type*)
+
+#define KMRK_DELTA_SEEDS_FUNC_PTR(name) int32_t (*name)(SmallSeed_type*)
+
+/********************************************
+ ********************************************
+ **
+ ** Declaration of public functions
+ **
+ ********************************************
+ ********************************************/
+
+
+AllSeeds_type *KMRK_allocSeeds(AllSeeds_type *AllSeeds,
+ int32_t size,
+ int8_t opt_dir,
+ int8_t opt_inv);
+
+void KMRK_SetMultipleLenDirSeeds(SmallSeed_type* seeds,
+ int32_t nseeds,
+ int32_t wordSize,
+ AllSeeds_type *PtrAllSeeds);
+
+
+void KMRK_freeSeeds(AllSeeds_type *AllSeeds);
+
+void KMRK_compactSeeds(AllSeeds_type *AllSeeds);
+
+void KMRK_pushSeed(AllSeeds_type *AllSeeds,
+ int32_t pos1,
+ int32_t pos2,
+ int32_t length,
+ int8_t dir);
+
+AllSeeds_type* KMRK_enumerateDirectCouple(AllSeeds_type* Seeds,
+ int32_t expected,
+ int32_t wordSize,
+ vn_type* stack,
+ int32_t seq);
+
+AllSeeds_type* KMRK_enumerateInvertedCouple(AllSeeds_type* Seeds,
+ int32_t expected,
+ int32_t wordSize,
+ vn_type* stack);
+
+AllSeeds_type* KMRK_enumerateInterCouple(AllSeeds_type* Seeds,
+ int32_t seq1,
+ int32_t seq2,
+ int32_t expected,
+ int32_t wordSize,
+ vn_type* stack);
+
+AllSeeds_type* KMRK_enumerateInterInvertedCouple(AllSeeds_type* Seeds,
+ int32_t seq2,
+ int32_t expected,
+ int32_t wordSize,
+ vn_type* stack);
+
+
+/**
+ * Compare two seeds and return an integer less than, equal to or greater
+ * than zero considering the relative order of the two seeds. This
+ * version take into account only pos1 and pos2 of seeds without taking
+ * account of the sequences or of the relative direction
+ *
+ * @param s1 pointer to seed one
+ * @param s2 pointer to seed two
+ *
+ * @return a integer less than, equal to or greater than zero
+ */
+
+int32_t KMRK_cmpSeedsPos(SmallSeed_type *s1, SmallSeed_type *s2);
+int32_t KMRK_cmpDeltaSeedsPos(SmallSeed_type *s1, SmallSeed_type *s2);
+int32_t KMRK_cmpDeltaInvSeedsPos(SmallSeed_type *s1, SmallSeed_type *s2);
+
+void KMRK_sortSeeds(SmallSeed_type* seeds,
+ int32_t nseeds,
+ KMRK_SORT_SEEDS_FUNC_PTR(compare));
+
+
+AllSeeds_type* KMRK_get_seeds(char **seq,
+ int32_t SimpleSeqLen,
+ int16_t Lmin,
+ int8_t opt_dir,
+ int8_t opt_inv,
+ int8_t opt_verbose,
+ masked_area_table_t *mask);
+
+AllSeeds_type* KMRK_get_seeds_2seqs(char **seq1,
+ char **seq2,
+ int32_t size1,
+ int32_t size2,
+ int16_t Lmin,
+ int8_t opt_dir,
+ int8_t opt_inv,
+ int8_t opt_verbose,
+ masked_area_table_t *mask);
+
+/**
+ * Order an array of seeds by pos1,pos2
+ *
+ * @param seeds pointer to an array of Seed_type object to sort
+ * @param count count of element in the array
+ */
+void KMRK_sortSeedsByPos(Seed_type* seeds, int32_t count);
+
+#endif /* KMRK_Seeds_h */
diff --git a/src/libKMRK/KMRK_mask.c b/src/libKMRK/KMRK_mask.c
new file mode 100644
index 0000000..2f92540
--- /dev/null
+++ b/src/libKMRK/KMRK_mask.c
@@ -0,0 +1,259 @@
+/*
+ * KMRK_mask.c
+ * repseek
+ *
+ * Created by Eric Coissac on 04/12/04.
+ * Copyright 2004 __MyCompanyName__. All rights reserved.
+ *
+ */
+
+#include "KMRK_mask.h"
+#include
+#include
+#include "memory.h"
+
+#define MASKED_AREA_TABLE_SIZE(seqcount) (sizeof(masked_area_table_t) + (sizeof(masked_area_list_t*) * ((seqcount)-1)))
+#define MASKED_AREA_LIST_SIZE(areacount) (sizeof(masked_area_list_t) + (sizeof(masked_area_t) * ((areacount)-1)))
+
+#define AREA_COUNT_INIT (1000)
+
+static masked_area_table_t *new_masked_area_table(int32_t seqcount, int32_t areacount);
+static masked_area_list_t *new_masked_area_list(int32_t areacount);
+static masked_area_list_t *realloc_masked_area_list(masked_area_list_t *list,int32_t areacount);
+static int32_t push_area(masked_area_table_t* table,int32_t sequence,int32_t begin,int32_t end);
+static void sort_area_table(masked_area_table_t* table);
+static int32_t compare_area(const masked_area_t* v1,const masked_area_t* v2);
+static int32_t search_area(const masked_area_t* v1,const masked_area_t* v2);
+
+static masked_area_list_t *strip_area_list(masked_area_list_t* list);
+static void strip_area_table(masked_area_table_t* table);
+
+
+static masked_area_list_t *new_masked_area_list(int32_t areacount)
+{
+ masked_area_list_t *list;
+
+ list = MyCalloc(1,MASKED_AREA_LIST_SIZE(areacount),"Not enougth memory for mask table");
+ list->reserved=areacount;
+
+ return list;
+}
+
+static masked_area_list_t *realloc_masked_area_list(masked_area_list_t *list,int32_t areacount)
+{
+ list = MyRealloc(list,
+ MASKED_AREA_LIST_SIZE(areacount),
+ MASKED_AREA_LIST_SIZE(list->reserved),
+ "Not enougth memory for mask table");
+
+ list->reserved=areacount;
+
+ return list;
+}
+
+static masked_area_table_t *new_masked_area_table(int32_t seqcount, int32_t areacount)
+{
+ masked_area_table_t *table;
+ int32_t i;
+
+ table = MyCalloc(1,MASKED_AREA_TABLE_SIZE(seqcount),"Not enougth memory for mask table");
+ table->seqcount=seqcount;
+
+ for (i=0;isequence[i]=new_masked_area_list(areacount);
+
+ return table;
+}
+
+static int32_t push_area(masked_area_table_t* table,int32_t sequence,int32_t begin,int32_t end)
+{
+ masked_area_list_t * list;
+
+ if (sequence >= table->seqcount)
+ return -1;
+
+ list = table->sequence[sequence];
+
+ if (list->reserved == list->count)
+ {
+ list = realloc_masked_area_list(list,list->reserved*2);
+ table->sequence[sequence]=list;
+ }
+
+ list->area[list->count].begin=begin;
+ list->area[list->count].end=end;
+
+ list->count++;
+ table->total++;
+
+ return table->total;
+}
+
+static int32_t compare_area(const masked_area_t* v1,const masked_area_t* v2)
+{
+ return v1->begin - v2->begin;
+}
+
+static void sort_area_table(masked_area_table_t* table)
+{
+ int32_t i;
+
+ for (i=0; iseqcount;i++)
+ {
+ qsort(table->sequence[i]->area,
+ table->sequence[i]->count,
+ sizeof(masked_area_t),
+ (int (*)(const void *, const void *))compare_area);
+ }
+}
+
+static masked_area_list_t *strip_area_list(masked_area_list_t* list)
+{
+ int32_t i;
+ int32_t j;
+ int32_t count;
+ int32_t newcount;
+
+ count = list->count;
+ newcount=count;
+
+ for (i=1;i%d %d->%d ==>",list->area[i-1].begin,list->area[i-1].end,list->area[i].begin,list->area[i].end); */
+ if ((list->area[i].begin-1) <= list->area[i-1].end)
+ {
+ /* fprintf(stderr," joined"); */
+ list->area[i].begin=list->area[i-1].begin;
+ list->area[i-1].begin=-1;
+ newcount--;
+ }
+ }
+ list->count=newcount;
+
+ for (i=0,j=0;iarea[i].begin>=0)
+ {
+ if (i!=j)
+ list->area[j]=list->area[i];
+ j++;
+ }
+ }
+
+ return realloc_masked_area_list(list,newcount);
+
+}
+
+static void strip_area_table(masked_area_table_t* table)
+{
+ int32_t seq;
+ int32_t oldcount;
+ masked_area_list_t* list;
+
+ sort_area_table(table);
+
+ for (seq=0; seq < table->seqcount;seq++)
+ {
+ list = table->sequence[seq];
+ oldcount = list->count;
+ table->sequence[seq]=strip_area_list(list);
+ table->total-=oldcount - table->sequence[seq]->count;
+ }
+}
+
+
+static int32_t search_area(const masked_area_t* v1,const masked_area_t* v2)
+{
+ int32_t pos;
+
+ pos = v1->begin;
+
+ if (pos < v2->begin)
+ return -1;
+
+ if (pos > v2->end)
+ return 1;
+
+ return 0;
+}
+
+
+masked_area_table_t *KMRK_ReadMaskArea(char *areafile,int32_t seqcount,int32_t complement)
+{
+ FILE* area;
+ char buffer[1000];
+ char *ok;
+ int32_t begin;
+ int32_t end;
+ int32_t sequence;
+ int32_t column;
+ int32_t linecount;
+ masked_area_table_t *table;
+
+ if (complement > 0)
+ seqcount++;
+ else
+ complement=0;
+
+ area = fopen(areafile,"r");
+ linecount=0;
+ table=new_masked_area_table(seqcount,AREA_COUNT_INIT);
+
+ do {
+ linecount++;
+ ok = fgets(buffer,999,area);
+ if (ok)
+ {
+ column = sscanf(buffer,"%d %d %d",&begin,&end,&sequence);
+ if (column > 1 && begin <= end)
+ {
+ begin--;
+ end--;
+ if (column==3)
+ sequence--;
+ else
+ sequence=0;
+
+ if (sequence && complement)
+ sequence++;
+
+ push_area(table,sequence,begin,end);
+
+ if (!sequence && complement)
+ push_area(table,1,complement -1 - end,complement -1 -begin);
+
+ }
+
+ if (column==1)
+ fprintf(stderr,"WARNING in mask file reading line %d\n",linecount);
+
+ }
+ } while (ok);
+
+ fprintf(stderr,"\nread %d masked areas from file\n",table->total);
+ strip_area_table(table);
+ fprintf(stderr,"strip to %d non overlaping areas\n",table->total);
+
+ return table;
+}
+
+char KMRK_isMasked(masked_area_table_t *mask,int32_t seq, int32_t position)
+{
+ masked_area_t input;
+ int32_t result;
+ masked_area_list_t *list;
+
+ if (! mask || (seq >= mask->seqcount))
+ return 0;
+
+ list = mask->sequence[seq];
+ input.begin=position;
+
+ result = bsearch(&input,
+ list->area,
+ list->count,
+ sizeof(masked_area_t),
+ (int (*)(const void *, const void *))search_area) != NULL;
+
+ return result;
+}
\ No newline at end of file
diff --git a/src/libKMRK/KMRK_mask.h b/src/libKMRK/KMRK_mask.h
new file mode 100644
index 0000000..3f4c8f3
--- /dev/null
+++ b/src/libKMRK/KMRK_mask.h
@@ -0,0 +1,37 @@
+/*
+ * KMRK_mask.h
+ * repseek
+ *
+ * Created by Eric Coissac on 04/12/04.
+ * Copyright 2004 __MyCompanyName__. All rights reserved.
+ *
+ */
+
+#include
+
+#ifndef _KMRK_MASK_H_
+#define _KMRK_MASK_H_
+
+typedef struct {
+ int32_t begin;
+ int32_t end;
+} masked_area_t;
+
+typedef struct {
+ int32_t reserved;
+ int32_t count;
+
+ masked_area_t area[1];
+} masked_area_list_t;
+
+typedef struct {
+ int32_t seqcount;
+ int32_t total;
+
+ masked_area_list_t *sequence[1];
+} masked_area_table_t;
+
+masked_area_table_t *KMRK_ReadMaskArea(char *areafile,int32_t seqcount,int32_t complement);
+char KMRK_isMasked(masked_area_table_t *mask,int32_t seq, int32_t position);
+
+#endif
\ No newline at end of file
diff --git a/src/libKMRK/KMRK_merge_seeds.c b/src/libKMRK/KMRK_merge_seeds.c
new file mode 100644
index 0000000..768ccb9
--- /dev/null
+++ b/src/libKMRK/KMRK_merge_seeds.c
@@ -0,0 +1,123 @@
+/**
+ * @file KMRK_merge_seeds.c
+ * @author Eric Coissac
+ * @date Wed Mar 3 11:15:57 2004
+ *
+ * @brief Merge function of overlapping seeds
+ *
+ *
+ */
+
+#include "KMRK_merge_seeds.h"
+
+void KMRK_MergeSeeds(AllSeeds_type *AllSeeds,
+ int8_t opt_dir,
+ int8_t opt_inv)
+{
+
+ int32_t i; /* the current seed */
+ int32_t j; /* the checked seed */
+
+ int32_t N; /* the kept ones */
+ Seed_type* seeds;
+
+
+ if(opt_dir){
+ seeds = AllSeeds->dirSeeds;
+ for(i=0, N=0 ;inDirSeeds; i++){
+
+ if(seeds[i].pos1==-1) /* any seed at -1 is removed */
+ continue;
+
+
+ j=i+1;
+
+ while( (seeds[j].pos1!=-1) &&
+ (seeds[i].pos1!=-1) &&
+ (j < AllSeeds->nDirSeeds) &&
+ (seeds[j].pos1 < seeds[i].pos1+ seeds[i].length))
+ {
+
+ if(
+ ((seeds[j].pos2 >= seeds[i].pos2) &&
+ (seeds[j].pos2 < seeds[i].pos2 + seeds[i].length)) || /* if the seeds are overlapping */
+ ((seeds[j].pos2 + seeds[j].length >= seeds[i].pos2) &&
+ (seeds[j].pos2 + seeds[j].length < seeds[i].pos2 + seeds[i].length)))
+ {
+ if(seeds[j].length <= seeds[i].length)
+ seeds[j].pos1=seeds[j].pos2=seeds[j].length=-1; /* removed the smallest */
+ else
+ seeds[i].pos1=seeds[i].pos2=seeds[i].length=-1;
+ }
+
+ j++;
+ }
+
+ if(seeds[i].pos1 !=-1)
+ { /* if this seed is not out, store it */
+
+ seeds[N].pos1 = seeds[i].pos1;
+ seeds[N].pos2 = seeds[i].pos2;
+ seeds[N].length = seeds[i].length;
+ N++;
+ }
+
+ }
+
+ AllSeeds->nFilteredDirSeeds += AllSeeds->nDirSeeds-N;
+ AllSeeds->nDirSeeds=N;
+
+
+ }
+
+ if(opt_inv){
+ seeds = AllSeeds->invSeeds;
+
+ for(i=0, N=0 ;inInvSeeds; i++){
+
+ if(seeds[i].pos1==-1)
+ continue;
+
+
+ j=i+1;
+
+ while( (seeds[j].pos1!=-1 ) &&
+ (seeds[i].pos1!=-1 ) &&
+ (j < AllSeeds->nInvSeeds) &&
+ (seeds[j].pos1 < seeds[i].pos1+seeds[i].length))
+ {
+
+ if(
+ ((seeds[j].pos2 >= seeds[i].pos2) && /* if the seeds are overlapping */
+ (seeds[j].pos2 < seeds[i].pos2+seeds[i].length)) ||
+ ((seeds[j].pos2 + seeds[j].length >= seeds[i].pos2) &&
+ (seeds[j].pos2 + seeds[j].length < seeds[i].pos2+seeds[i].length)))
+ {
+
+ if(seeds[j].length <= seeds[i].length)
+ seeds[j].pos1=seeds[j].pos2=seeds[j].length=-1; /* removed the smallest */
+ else
+ seeds[i].pos1=seeds[i].pos2=seeds[i].length=-1;
+ }
+
+ j++;
+ }
+
+ if(seeds[i].pos1!=-1)
+ { /* if this seed is not out, store it */
+
+ seeds[N].pos1 = seeds[i].pos1;
+ seeds[N].pos2 = seeds[i].pos2;
+ seeds[N].length = seeds[i].length;
+ N++;
+ }
+
+ }
+
+ AllSeeds->nFilteredInvSeeds += AllSeeds->nInvSeeds-N;
+ AllSeeds->nInvSeeds=N;
+ }
+
+ KMRK_compactSeeds(AllSeeds);
+
+}
diff --git a/src/libKMRK/KMRK_merge_seeds.h b/src/libKMRK/KMRK_merge_seeds.h
new file mode 100644
index 0000000..a9fd6f1
--- /dev/null
+++ b/src/libKMRK/KMRK_merge_seeds.h
@@ -0,0 +1,11 @@
+#ifndef KMRK_merge_seeds_h
+#define KMRK_merge_seeds_h
+
+#include "KMRK_Seeds.h"
+
+void KMRK_MergeSeeds(AllSeeds_type *AllSeeds,
+ int8_t opt_dir,
+ int8_t opt_inv);
+
+
+#endif /* KMRK_MergeSeeds */
diff --git a/src/libKMRK/Makefile b/src/libKMRK/Makefile
new file mode 100644
index 0000000..a3ce2d5
--- /dev/null
+++ b/src/libKMRK/Makefile
@@ -0,0 +1,25 @@
+
+SOURCES = KMRK.c \
+ KMRK_mask.c \
+ KMRK_merge_seeds.c \
+ KMRK_Seeds.c
+
+SRCS=$(SOURCES)
+
+OBJECTS= $(patsubst %.c,%.o,$(SOURCES))
+
+LIBFILE= libKMRK.a
+RANLIB= ranlib
+
+
+include ../global.mk
+
+
+all: $(LIBFILE)
+
+clean:
+ rm -rf $(OBJECTS) $(LIBFILE)
+
+$(LIBFILE): $(OBJECTS)
+ ar -cr $@ $?
+ $(RANLIB) $@
diff --git a/src/libKMRK/memory.c b/src/libKMRK/memory.c
new file mode 100644
index 0000000..c633c76
--- /dev/null
+++ b/src/libKMRK/memory.c
@@ -0,0 +1,224 @@
+/******
+ file : memory.c
+ function : All about memory of the KMR, Seeds and Repeats
+ All MyMalloc() series is about follwoing how mauch memory has been used
+
+ created : 19 Sep 2003
+ modif : Oct 2003, Feb 2004
+ modif : Dec 2004 ; Corrected Memory declaration
+ author : amikezor
+*****/
+
+#include
+#include
+#include "repseek_types.h"
+#include "memory.h"
+
+
+MemUsage Memory;
+
+
+/*
+ Functions to count the memory usage all along
+ dybamic allocation and free
+*/
+void PrintMem(char *Comment){
+
+ extern MemUsage Memory;
+
+ fprintf(stderr,"\n%s\nMemory Usage\n\t* Max is: %d bytes, %.2f Kb, %.2f Mb\n\t* Cur is: %d bytes, %.2f Kb, %.2f Mb\n",
+ Comment,
+ Memory.Max, (float)Memory.Max/1024, (float)Memory.Max/(1024*1024),
+ Memory.Current, (float)Memory.Current/1024, (float)Memory.Current/(1024*1024));
+}
+void PrintMaxMem( void ){
+
+ extern MemUsage Memory;
+
+ if(Memory.Max < 1024)
+ fprintf(stderr,"Max Memory Usage.. %d bytes\n", Memory.Max);
+ else if(Memory.Max < 1024*1024)
+ fprintf(stderr,"Max Memory Usage.. %.2f Kilobytes\n", (float)Memory.Max/1024);
+ else if(Memory.Max < 1024*1024*1024)
+ fprintf(stderr,"Max Memory Usage.. %.2f Megabytes\n", (float)Memory.Max/(1024*1024));
+ else
+ fprintf(stderr,"Max Memory Usage.. %.2f Gigabytes\n", (float)Memory.Max/(1024*1024*1024));
+}
+void Update_Mem(int32_t Value){
+
+ extern MemUsage Memory;
+
+ Memory.Current += Value;
+ Memory.Max = (Memory.Current>Memory.Max)?Memory.Current:Memory.Max;
+}
+void Init_Mem(int32_t Value){
+
+ extern MemUsage Memory;
+
+ Memory.Current = Value;
+ Memory.Max = Value;
+}
+
+
+/*
+ Replace functions of dynamic allocation
+ to allow the tracking of memory usage
+*/
+void *MyMalloc( int32_t size , char *Error ){
+
+ void *pointer;
+
+ pointer = malloc(size);
+ if(!pointer)fprintf(stderr,"%s\n",Error),exit(3);
+
+ Update_Mem(size);
+
+ return pointer;
+}
+void *MyCalloc( int32_t number, int32_t TypeSize , char *Error ){
+
+ void *pointer;
+
+ pointer = calloc(number, TypeSize);
+ if(!pointer)fprintf(stderr,"%s\n",Error),exit(3);
+
+ Update_Mem(number*TypeSize );
+
+ return pointer;
+}
+void MyFree( void *Pointer, int32_t size){
+ free(Pointer);
+ Pointer=NULL;
+ Update_Mem(-size);
+}
+void *MyRealloc( void *Pointer, int32_t newsize, int32_t oldsize, char *Error){
+
+ Pointer = realloc(Pointer,newsize);
+ if(!Pointer)fprintf(stderr,"%s\n",Error),exit(3);
+ Update_Mem( newsize-oldsize );
+ return Pointer;
+}
+
+
+/*
+ Deal with Stacks structure for KMR
+
+void MallocStack(Stacks *s, int32_t number, int32_t *histo, int32_t AllValues){
+
+ int32_t i;
+
+ s->nStacks = number;
+ s->nValues = AllValues;
+
+ s->ppStacks = (int32_t **)MyMalloc( number * sizeof(int32_t *),
+ "MallocStack: ppStacks malloc error, bye\n");
+ s->lenStacks = (int32_t *)MyMalloc( number * sizeof(int32_t),
+ "MallocStack: lenStacks malloc error, bye\n");
+ s->ppStacks[0] = (int32_t *)MyMalloc( AllValues * sizeof(int32_t),
+ "MallocStack: ppStacks[0] malloc error, bye\n");
+ s->lenStacks[0]=0;
+
+ for(i=1;i < number; i++){
+ s->lenStacks[i]=0;
+ s->ppStacks[i] = s->ppStacks[i-1] + histo[i] ;
+ }
+}
+
+void FreeStack( Stacks *p){
+ MyFree(p->ppStacks[0] , p->nValues*sizeof(int32_t) );
+ MyFree(p->ppStacks , p->nStacks*sizeof(int32_t *) );
+ MyFree(p->lenStacks , p->nStacks*sizeof(int32_t));
+}
+*/
+
+
+/*
+ Deal with the Seeds part
+
+
+void free_Seeds(Seeds AllSeeds)
+{
+
+ if( AllSeeds.nDirSeeds ){
+ MyFree(AllSeeds.DirPos1, AllSeeds.nDirSeeds*sizeof(int32_t));
+ MyFree(AllSeeds.DirPos2, AllSeeds.nDirSeeds*sizeof(int32_t));
+ MyFree(AllSeeds.DirLen, AllSeeds.nDirSeeds*sizeof(int32_t));
+ MyFree(AllSeeds.DirMeanR, AllSeeds.nDirSeeds*sizeof(float));
+ }
+
+ if(AllSeeds.nInvSeeds ){
+ MyFree(AllSeeds.InvPos1, AllSeeds.nInvSeeds*sizeof(int32_t));
+ MyFree(AllSeeds.InvPos2, AllSeeds.nInvSeeds*sizeof(int32_t));
+ MyFree(AllSeeds.InvLen, AllSeeds.nInvSeeds*sizeof(int32_t));
+ MyFree(AllSeeds.InvMeanR, AllSeeds.nInvSeeds*sizeof(float));
+ }
+
+}
+*/
+/*
+ Malloc if it is the first time otherwise readjust
+
+void AllocSeeds(Seeds *AllSeeds, int32_t size, int32_t old_size, int8_t opt_dir, int8_t opt_inv){
+
+ if(opt_inv != 1 && opt_dir != 1)
+ fprintf(stderr,"AllocSeeds: requiere at least one of opt_dir and opt_inv to be 1\n"),exit(4);
+
+ if(opt_dir == 1){
+ if( AllSeeds->DirPos1 == NULL){
+ AllSeeds->DirPos1 = (int32_t *)MyCalloc(size , sizeof(int32_t),"AllocSeeds: Alloc for DirPos1 failed, bye");
+ AllSeeds->DirPos2 = (int32_t *)MyCalloc(size , sizeof(int32_t),"AllocSeeds: Alloc for DirPos2 failed, bye");
+ }
+ else{
+ AllSeeds->DirPos1 = (int32_t *)MyRealloc(AllSeeds->DirPos1, size * sizeof(int32_t), old_size* sizeof(int32_t),
+ "AllocSeeds: realloc for DirPos1 failed, bye");
+ AllSeeds->DirPos2 = (int32_t *)MyRealloc(AllSeeds->DirPos2, size * sizeof(int32_t), old_size* sizeof(int32_t),
+ "AllocSeeds: realloc for DirPos2 failed, bye");
+ }
+ }
+
+ if(opt_inv == 1){
+ if( AllSeeds->InvPos1 == NULL){
+ AllSeeds->InvPos1 = (int32_t *)MyCalloc(size , sizeof(int32_t), "AllocSeeds: Alloc for InvPos1 failed, bye");
+ AllSeeds->InvPos2 = (int32_t *)MyCalloc(size , sizeof(int32_t), "AllocSeeds: Alloc for InvPos2 failed, bye");
+ }
+ else{
+ AllSeeds->InvPos1 = (int32_t *)MyRealloc(AllSeeds->InvPos1, size * sizeof(int32_t), old_size* sizeof(int32_t),
+ "AllocSeeds: realloc for InvPos1 failed, bye");
+ AllSeeds->InvPos2 = (int32_t *)MyRealloc(AllSeeds->InvPos2, size * sizeof(int32_t), old_size* sizeof(int32_t),
+ "AllocSeeds: realloc for InvPos2 failed, bye");
+ }
+ }
+}
+*/
+
+/*
+ Deal with the Repeats structure
+*/
+Repeats mem_Repeats(int32_t Ndir, int32_t Ninv){
+
+ Repeats AllRepeats; /* All Repeats structure */
+
+ AllRepeats.nDirRep = Ndir; /* set the number of repeats to the number of seeds */
+ AllRepeats.nInvRep = Ninv;
+ AllRepeats.nDirBadRep = 0; /* set the "bad" repet (included into another rep) as 0 */
+ AllRepeats.nInvBadRep = 0;
+
+ if(AllRepeats.nDirRep)
+ AllRepeats.DirRep = (Rep *)MyMalloc( (AllRepeats.nDirRep)*sizeof(Rep), "init_Repeats: repdir malloc error" );
+ else
+ AllRepeats.DirRep = NULL;
+
+ if(AllRepeats.nInvRep)
+ AllRepeats.InvRep = (Rep *)MyMalloc( (AllRepeats.nInvRep)*sizeof(Rep), "init_Repeats: repinv malloc error" );
+ else
+ AllRepeats.InvRep = NULL;
+
+ return AllRepeats;
+}
+void free_Repeats(Repeats AllRep)
+{
+ if(AllRep.nDirRep)
+ MyFree(AllRep.DirRep, AllRep.nDirRep*sizeof(Rep));
+ if(AllRep.nInvRep)
+ MyFree(AllRep.InvRep, AllRep.nInvRep*sizeof(Rep));
+}
diff --git a/src/libKMRK/memory.h b/src/libKMRK/memory.h
new file mode 100644
index 0000000..abc5853
--- /dev/null
+++ b/src/libKMRK/memory.h
@@ -0,0 +1,105 @@
+/**
+ * @file memory.h
+ * @author Achaz G
+ * @date April 2004
+ *
+ * @brief header for memory alloc/dealloc
+ * modif : Dec 2004 ; Corrected Memory declaration
+ *
+ *
+ */
+
+#ifndef _MEMORY_H_
+#define _MEMORY_H_
+
+#include "repseek_types.h"
+
+typedef struct { /********** Memory Usage structure **************/
+
+ int32_t Max;
+ int32_t Current;
+
+} MemUsage;
+
+#include
+
+/********** **********
+
+ Global Variable(s)
+
+ ********** **********/
+
+extern MemUsage Memory; /* Instance of the global variable for memory tracking */
+
+/*
+ Follow memory usage
+*/
+void PrintMem(char *Comment);
+void PrintMaxMem( void );
+void Update_Mem(int32_t Value);
+void Init_Mem(int32_t Value);
+/*
+ All Alloc/Free to follow of memory usage
+*/
+void *MyMalloc( int32_t size , char *Error );
+void *MyCalloc( int32_t number, int32_t TypeSize , char *Error );
+void MyFree( void *Pointer, int32_t size);
+void *MyRealloc( void *Pointer, int32_t newsize, int32_t oldsize, char *Error);
+/*
+ For Stacks
+
+void MallocStack(Stacks *s, int32_t number, int32_t *histo, int32_t AllValues);
+void FreeStack( Stacks *p);
+
+ For Seeds
+
+void free_Seeds(Seeds AllSeeds);
+void AllocSeeds(Seeds *AllSeeds, int32_t size, int32_t old_size, int8_t opt_dir, int8_t opt_inv);
+*/
+
+/*
+ For Repeats
+*/
+Repeats mem_Repeats(int32_t Ndir, int32_t Ninv);
+void free_Repeats(Repeats AllRep);
+
+
+
+
+/*
+ Not used anymore, but just in case
+*/
+
+
+#include
+#include
+
+#define KMRK_MALLOC(var,type,size,message) { \
+ var = (type*) malloc(size); \
+ if (!var) \
+ { \
+ fprintf(stderr,"%s\n",message); \
+ exit(4); \
+ } \
+ }
+
+#define KMRK_CALLOC(var,type,length,message) { \
+ var = (type*) calloc(length,sizeof(type)); \
+ if (!var) \
+ { \
+ fprintf(stderr,"%s\n",message); \
+ exit(4); \
+ } \
+ }
+
+#define KMRK_REALLOC(var,type,size,message) { \
+ var = (type*) realloc(var,size); \
+ if (!var) \
+ { \
+ fprintf(stderr,"%s\n",message); \
+ exit(4); \
+ } \
+ }
+
+
+#endif /* _MEMORY_H_*/
diff --git a/src/libKMRK/repseek_types.h b/src/libKMRK/repseek_types.h
new file mode 100644
index 0000000..3bd8779
--- /dev/null
+++ b/src/libKMRK/repseek_types.h
@@ -0,0 +1,197 @@
+/**
+ * @file repseek_types.h
+ * @author Guillaume Achaz
+ * @date April 2004
+ * @modif July 2004 turn scores into doubles
+ * @brief definition of general types and macros for repseek
+ *
+ *
+ */
+
+
+
+#ifndef _REPSEEK_TYPES_
+#define _REPSEEK_TYPES_
+
+/*
+ Version of the program
+*/
+#define REPSEEK_VERSION "4.2"
+#define REPSEEK_DATE "Nov 2004"
+
+
+
+/********** **********
+
+ General Macros
+
+ ********** **********/
+
+/*
+ Macros to compare 2 or three values
+*/
+#define MAX2( A, B ) ( ((A)>(B))?(A):(B) )
+#define MAX3( A, B, C ) ( ((MAX2(A,B))>(C))?(MAX2(A,B)):(C) )
+#define MIN2( A, B ) ( ((A)<(B))?(A):(B) )
+
+#define MAX( A, B ) MAX2( A, B )
+#define MIN( A, B ) MIN2( A, B )
+
+/*
+ Absolute values
+*/
+#define ABS(x) (( (x)>=0 )? (x) : -(x))
+
+
+
+/********** **********
+
+ All types used in repseek
+
+ ********** **********/
+
+#include /* The type FILE * is defined here */
+#include /* all, the int??_t are defined in there */
+
+
+/**
+ * Store informations about one STRICT repeat (seeds)
+ *
+ */
+typedef struct { /* the complete seed structure */
+
+ int32_t pos1; /**< position of the first copy */
+ int32_t pos2; /**< position of the second copy */
+
+ int32_t length; /**< length of the strict repeats */
+ float rmean; /**< mean repeat leavel */
+
+} Seed_type;
+
+typedef struct { /* Just after a KMRK length X, only the 2 pos matter */
+
+ int32_t pos1; /**< postion of the first copy */
+ int32_t pos2; /**< postion of the second copy */
+
+} SmallSeed_type;
+
+
+
+
+/**
+ * Store informations about all strict repeat (seeds)
+ *
+ */
+typedef struct {
+
+ int32_t cDirSeeds; /**< currently allocated space in dirSeeds array */
+ int32_t nDirSeeds; /**< count of direct strict repeats */
+ int32_t nFilteredDirSeeds; /**< ??? */
+
+ Seed_type* dirSeeds; /**< array of direct repeats */
+
+ int32_t cInvSeeds; /**< currently allocated space in invSeeds array */
+ int32_t nInvSeeds; /**< count of inverted strict repeats */
+ int32_t nFilteredInvSeeds; /**< ??? */
+
+ Seed_type* invSeeds; /**< array of inverted repeats */
+
+} AllSeeds_type;
+
+
+
+
+
+/**
+ * Store informations about one GENERIC repeat
+ *
+ */
+typedef struct{
+
+ char type[20]; /* its name; i.e. Tandem, Palindrome, etc... */
+
+ int32_t pos1, pos2, /* both copies postions */
+ len1, len2, /* both copies length */
+ seed_pos1,seed_pos2, /* pos1 and pos2 of the originate seed */
+ seed_len, /* len of the seed */
+
+ match, align_len; /* number of match and length of alignment */
+
+ double score; /* the alignment score */
+
+ float seed_meanR; /* the seed meanR */
+
+ float meanR; /* The mean R-level of the repeat */
+ int32_t mainR; /* its Mode R */
+ float fraction_mainR; /* the fraction of length containing the Mode R */
+
+
+} Rep;
+
+
+
+/**
+ * Store informations about All GENERIC repeats
+ *
+ */
+typedef struct {
+
+ int32_t nDirRep; /* Total Number of Direct Repats in Mem */
+ int32_t nDirBadRep; /* Direct repeats set to -1 -- filtered out and co. */
+ Rep *DirRep; /* The array of Dir Rep */
+
+ int32_t nInvRep; /* Total Number of Inverted Repats in Mem */
+ int32_t nInvBadRep; /* Inverted Repeats set to -1 -- filtered out and co. */
+ Rep *InvRep; /* The array of Inverted Rep */
+
+} Repeats;
+
+
+#define MATRIX_SIZE 26
+
+typedef struct { /******* The scoring Matrix ************/
+
+ double matrix[MATRIX_SIZE][MATRIX_SIZE]; /* the matrix of match/missmatch */
+ double gap_open; /* value of gap-open */
+ double gap_ext; /* value of gap_ext */
+ double expect;
+
+} SCORING;
+
+
+
+typedef struct { /******* The Results of Alignement by Dynamik programming ************/
+
+
+ double *scores; /* the score strings (+/- 'matrix') */
+ double *pscore; /* pointer to the current score */
+ char *traces; /* the path matrix - could take values 'l'eft, 'd'iagonal, or 'a'bove or 'u'nknown */
+
+ double *F; /* *Above -- needed for memorizing deletion in seq2 */
+
+ double *pBestScore; /* pointer to it */
+ int32_t BestScore_row; /* its row and col */
+ int32_t BestScore_col;
+
+ char *traceback; /* all you need for bactracking */
+ char *traceback_f; /* memory for forward traceback and then check other seeds */
+ char *traceback_b; /* memory needed for backward traceback - to avoid erasing the forward one */
+
+ int32_t alignment_len; /* guess ?? */
+ int32_t matches; /* number of match (score>0 in scoring matrix) */
+
+ int32_t nSegment; /* number of segment */
+ int32_t *Segment_begin; /* begin and end of each segment */
+ int32_t *Segment_end;
+
+ int32_t max_scores; /* size of the matrices only for memory purposes */
+ int32_t max_col;
+ int32_t max_row;
+ int32_t max_alignlen;
+
+} RESULTS;
+
+
+
+#endif /* _REPSEEK_TYPES_ */
+
diff --git a/src/libKMRK/sequence.h b/src/libKMRK/sequence.h
new file mode 100644
index 0000000..4c8a105
--- /dev/null
+++ b/src/libKMRK/sequence.h
@@ -0,0 +1,25 @@
+/**
+ * @file KMRK_sequence.h
+ * @author Eric Coissac
+ * @date Tue Feb 24 22:22:57 2004
+ *
+ * @brief Header file for sequence utilities
+ *
+ *
+ */
+
+#ifndef KMRK_sequence_h
+#define KMRK_sequence_h
+
+#include
+
+int8_t CheckSeq(char *seq, char *alpha);
+
+void nonACGTXtoN(char *seq);
+
+void UpperSequence(char *seq);
+
+void invseq(char *seqsrc, char *seqdest);
+
+
+#endif /* KMRK_sequence_h */
diff --git a/src/libapat/CODES/dft_code.h b/src/libapat/CODES/dft_code.h
new file mode 100644
index 0000000..b9caf28
--- /dev/null
+++ b/src/libapat/CODES/dft_code.h
@@ -0,0 +1,14 @@
+/* ----------------------------------------------- */
+/* dft_pat_seq_code.h */
+/* default alphabet encoding for alpha */
+/* ----------------------------------------------- */
+
+ 0x00000001 /* A */, 0x00000002 /* B */, 0x00000004 /* C */,
+ 0x00000008 /* D */, 0x00000010 /* E */, 0x00000020 /* F */,
+ 0x00000040 /* G */, 0x00000080 /* H */, 0x00000100 /* I */,
+ 0x00000200 /* J */, 0x00000400 /* K */, 0x00000800 /* L */,
+ 0x00001000 /* M */, 0x00002000 /* N */, 0x00004000 /* O */,
+ 0x00008000 /* P */, 0x00010000 /* Q */, 0x00020000 /* R */,
+ 0x00040000 /* S */, 0x00080000 /* T */, 0x00100000 /* U */,
+ 0x00200000 /* V */, 0x00400000 /* W */, 0x00800000 /* X */,
+ 0x01000000 /* Y */, 0x02000000 /* Z */
diff --git a/src/libapat/CODES/dna_code.h b/src/libapat/CODES/dna_code.h
new file mode 100644
index 0000000..0febf41
--- /dev/null
+++ b/src/libapat/CODES/dna_code.h
@@ -0,0 +1,71 @@
+/* ----------------------------------------------- */
+/* dna_code.h */
+/* alphabet encoding for dna/rna */
+/* ----------------------------------------- */
+/* IUPAC encoding */
+/* ----------------------------------------- */
+/* G/A/T/C */
+/* U=T */
+/* R=AG */
+/* Y=CT */
+/* M=AC */
+/* K=GT */
+/* S=CG */
+/* W=AT */
+/* H=ACT */
+/* B=CGT */
+/* V=ACG */
+/* D=AGT */
+/* N=ACGT */
+/* X=ACGT */
+/* EFIJLOPQZ not recognized */
+/* ----------------------------------------- */
+/* dual encoding */
+/* ----------------------------------------- */
+/* A=ADHMNRVW */
+/* B=BCDGHKMNRSTUVWY */
+/* C=BCHMNSVY */
+/* D=ABDGHKMNRSTUVWY */
+/* G=BDGKNRSV */
+/* H=ABCDHKMNRSTUVWY */
+/* K=BDGHKNRSTUVWY */
+/* M=ABCDHMNRSVWY */
+/* N=ABCDGHKMNRSTUVWY */
+/* R=ABDGHKMNRSVW */
+/* S=BCDGHKMNRSVY */
+/* T=BDHKNTUWY */
+/* U=BDHKNTUWY */
+/* V=ABCDGHKMNRSVWY */
+/* W=ABDHKMNRTUVWY */
+/* X=ABCDGHKMNRSTUVWY */
+/* Y=BCDHKMNSTUVWY */
+/* EFIJLOPQZ not recognized */
+/* ----------------------------------------------- */
+
+#ifndef USE_DUAL
+
+ /* IUPAC */
+
+ 0x00000001 /* A */, 0x00080044 /* B */, 0x00000004 /* C */,
+ 0x00080041 /* D */, 0x00000000 /* E */, 0x00000000 /* F */,
+ 0x00000040 /* G */, 0x00080005 /* H */, 0x00000000 /* I */,
+ 0x00000000 /* J */, 0x00080040 /* K */, 0x00000000 /* L */,
+ 0x00000005 /* M */, 0x00080045 /* N */, 0x00000000 /* O */,
+ 0x00000000 /* P */, 0x00000000 /* Q */, 0x00000041 /* R */,
+ 0x00000044 /* S */, 0x00080000 /* T */, 0x00080000 /* U */,
+ 0x00000045 /* V */, 0x00080001 /* W */, 0x00080045 /* X */,
+ 0x00080004 /* Y */, 0x00000000 /* Z */
+
+#else
+ /* DUAL */
+
+ 0x00623089 /* A */, 0x017e34ce /* B */, 0x01243086 /* C */,
+ 0x017e34cb /* D */, 0x00000000 /* E */, 0x00000000 /* F */,
+ 0x0026244a /* G */, 0x017e348f /* H */, 0x00000000 /* I */,
+ 0x00000000 /* J */, 0x017e24ca /* K */, 0x00000000 /* L */,
+ 0x0166308f /* M */, 0x017e34cf /* N */, 0x00000000 /* O */,
+ 0x00000000 /* P */, 0x00000000 /* Q */, 0x006634cb /* R */,
+ 0x012634ce /* S */, 0x0158248a /* T */, 0x0158248a /* U */,
+ 0x016634cf /* V */, 0x017a348b /* W */, 0x017e34cf /* X */,
+ 0x017c348e /* Y */, 0x00000000 /* Z */
+#endif
diff --git a/src/libapat/CODES/prot_code.h b/src/libapat/CODES/prot_code.h
new file mode 100644
index 0000000..edcdfc1
--- /dev/null
+++ b/src/libapat/CODES/prot_code.h
@@ -0,0 +1,51 @@
+/* ----------------------------------------------- */
+/* prot_code.h */
+/* alphabet encoding for proteins */
+/* ----------------------------------------- */
+/* IUPAC encoding */
+/* ----------------------------------------- */
+/* B=DN */
+/* Z=EQ */
+/* X=any - {X} */
+/* JOU not recognized */
+/* ----------------------------------------- */
+/* dual encoding */
+/* ----------------------------------------- */
+/* B=BDN */
+/* D=BD */
+/* E=EZ */
+/* N=BN */
+/* Q=QZ */
+/* X=any - {X} */
+/* Z=EQZ */
+/* JOU not recognized */
+/* ----------------------------------------------- */
+
+#ifndef USE_DUAL
+
+ /* IUPAC */
+
+ 0x00000001 /* A */, 0x00002008 /* B */, 0x00000004 /* C */,
+ 0x00000008 /* D */, 0x00000010 /* E */, 0x00000020 /* F */,
+ 0x00000040 /* G */, 0x00000080 /* H */, 0x00000100 /* I */,
+ 0x00000000 /* J */, 0x00000400 /* K */, 0x00000800 /* L */,
+ 0x00001000 /* M */, 0x00002000 /* N */, 0x00000000 /* O */,
+ 0x00008000 /* P */, 0x00010000 /* Q */, 0x00020000 /* R */,
+ 0x00040000 /* S */, 0x00080000 /* T */, 0x00000000 /* U */,
+ 0x00200000 /* V */, 0x00400000 /* W */, 0x037fffff /* X */,
+ 0x01000000 /* Y */, 0x00010010 /* Z */
+
+#else
+ /* DUAL */
+
+ 0x00000001 /* A */, 0x0000200a /* B */, 0x00000004 /* C */,
+ 0x0000000a /* D */, 0x02000010 /* E */, 0x00000020 /* F */,
+ 0x00000040 /* G */, 0x00000080 /* H */, 0x00000100 /* I */,
+ 0x00000000 /* J */, 0x00000400 /* K */, 0x00000800 /* L */,
+ 0x00001000 /* M */, 0x00002002 /* N */, 0x00000000 /* O */,
+ 0x00008000 /* P */, 0x02010000 /* Q */, 0x00020000 /* R */,
+ 0x00040000 /* S */, 0x00080000 /* T */, 0x00000000 /* U */,
+ 0x00200000 /* V */, 0x00400000 /* W */, 0x037fffff /* X */,
+ 0x01000000 /* Y */, 0x02010010 /* Z */
+
+#endif
diff --git a/src/libapat/Gmach.h b/src/libapat/Gmach.h
new file mode 100644
index 0000000..8fb1c69
--- /dev/null
+++ b/src/libapat/Gmach.h
@@ -0,0 +1,97 @@
+/* ---------------------------------------------------------------- */
+/* Copyright (c) Atelier de BioInformatique */
+/* @file: Gmach.h */
+/* @desc: machine dependant setup */
+/* @+ *should* be included in all ABI softs */
+/* */
+/* @history: */
+/* @+ : Jul 95 : MWC first draft */
+/* @+ : Jan 96 : adapted to Pwg */
+/* @+ : Nov 00 : adapted to Mac_OS_X */
+/* ---------------------------------------------------------------- */
+
+#ifndef _H_Gmach
+
+ /* OS names */
+
+#define _H_Gmach
+
+ /* Macintosh Classic */
+ /* Think C environment */
+#ifdef THINK_C
+#define MACINTOSH
+#define MAC_OS_C
+#endif
+
+
+ /* Macintosh Classic */
+ /* Code-Warrior */
+#ifdef __MWERKS__
+#define MACINTOSH
+#define MAC_OS_C
+#endif
+
+ /* Macintosh OS-X */
+#ifdef MAC_OS_X
+#define MACINTOSH
+#define UNIX
+#define UNIX_BSD
+#endif
+
+ /* LINUX */
+#ifdef LINUX
+#define UNIX
+#define UNIX_BSD
+#endif
+
+ /* other Unix Boxes */
+ /* SunOS / Solaris */
+#ifdef SUN
+#define UNIX
+#ifdef SOLARIS
+#define UNIX_S7
+#else
+#define UNIX_BSD
+#endif
+#endif
+
+ /* SGI Irix */
+#ifdef SGI
+#define UNIX
+#define UNIX_S7
+#endif
+
+/* ansi setup */
+/* for unix machines see makefile */
+
+#ifndef PROTO
+#define PROTO 1
+#endif
+
+#ifndef ANSI_PROTO
+#define ANSI_PROTO PROTO
+#endif
+
+#ifndef ANSI_STR
+#define ANSI_STR 1
+#endif
+
+/* unistd.h header file */
+
+#ifdef UNIX
+#define HAS_UNISTD_H
+#endif
+
+/* getopt.h header file */
+
+#ifdef MAC_OS_C
+#define HAS_GETOPT_H "getopt.h"
+#endif
+
+#ifdef SGI
+#define HAS_GETOPT_H
+#endif
+
+
+
+#endif
diff --git a/src/libapat/Gtypes.h b/src/libapat/Gtypes.h
new file mode 100644
index 0000000..9bf5a93
--- /dev/null
+++ b/src/libapat/Gtypes.h
@@ -0,0 +1,104 @@
+/* ---------------------------------------------------------------- */
+/* Copyright (c) Atelier de BioInformatique */
+/* @file: Gtypes.h */
+/* @desc: general & machine dependant types */
+/* @+ *should* be included in all ABI softs */
+/* */
+/* @history: */
+/* @+ : Jan 91 : MWC first draft */
+/* @+ : Jul 95 : Gmach addition */
+/* ---------------------------------------------------------------- */
+
+#define _H_Gtypes
+
+#ifndef _H_Gmach
+#include "Gmach.h"
+#endif
+
+#ifndef NULL
+#include /* is the official NULL here ? */
+#endif
+
+/* ==================================================== */
+/* constantes */
+/* ==================================================== */
+
+#ifndef PROTO
+#define PROTO 1 /* prototypes flag */
+#endif
+
+#ifdef MAC_OS_C
+#define Vrai true /* TC boolean values */
+#define Faux false /* */
+#else
+#define Vrai 0x1 /* bool values = TRUE */
+#define Faux 0x0 /* = FALSE */
+#endif
+
+#define Nil NULL /* nil pointer */
+
+#define kBigInt16 0x7fff /* plus grand 16 bits signe */
+#define kBigInt32 0x7fffffff /* plus grand 32 bits signe */
+#define kBigUInt16 0xffff /* plus grand 16 bits ~signe */
+#define kBigUInt32 0xffffffff /* plus grand 32 bits ~signe */
+
+#ifdef MAC_OS_C
+/* ==================================================== */
+/* Types (for Macintosh ThinK C || MWerks) */
+/* ==================================================== */
+
+ /* --- specific sizes --------- */
+typedef long Int32; /* Int32 = 32 bits signe */
+typedef unsigned long UInt32; /* UInt32 = 32 bits ~signe */
+typedef short Int16; /* Int16 = 16 bits signe */
+typedef unsigned short UInt16; /* UInt32 = 16 bits ~signe */
+typedef char Int8; /* Int8 = 8 bits signe */
+typedef unsigned char UInt8; /* UInt8 = 8 bits ~signe */
+
+ /* --- default types ---------- */
+
+typedef Boolean Bool; /* booleen */
+
+typedef long Int; /* 'natural' int (>= 32 bits) */
+
+typedef void *Ptr; /* pointeur */
+
+#elif ((defined SUN) || (defined SGI) || (defined UNIX))
+/* ==================================================== */
+/* Types (for Sun & Iris - 32 bits machines) */
+/* ==================================================== */
+
+ /* --- specific sizes --------- */
+typedef int Int32; /* Int32 = 32 bits signe */
+typedef unsigned int UInt32; /* UInt32 = 32 bits ~signe */
+typedef short Int16; /* Int16 = 16 bits signe */
+typedef unsigned short UInt16; /* UInt32 = 16 bits ~signe */
+typedef char Int8; /* Int8 = 8 bits signe */
+typedef unsigned char UInt8; /* UInt8 = 8 bits ~signe */
+
+ /* --- default types ---------- */
+
+typedef int Bool; /* booleen (int for ANSI) */
+
+typedef int Int; /* 'natural' int (>= 32 bits) */
+
+typedef void *Ptr; /* pointeur */
+
+#else
+/* ==================================================== */
+/* Types (for undefined machines) */
+/* ==================================================== */
+
+#error undefined MACHINE
+
+#endif
+
+/* ==================================================== */
+/* special macro for prototypes */
+/* ==================================================== */
+
+#if PROTO
+#define P(s) s
+#else
+#define P(s) ()
+#endif
diff --git a/src/libapat/Makefile b/src/libapat/Makefile
new file mode 100644
index 0000000..b4dc9be
--- /dev/null
+++ b/src/libapat/Makefile
@@ -0,0 +1,24 @@
+
+SOURCES = apat_parse.c \
+ apat_search.c \
+ libstki.c
+
+SRCS=$(SOURCES)
+
+
+OBJECTS= $(patsubst %.c,%.o,$(SOURCES))
+
+LIBFILE= libapat.a
+RANLIB=ranlib
+
+
+include ../global.mk
+
+all: $(LIBFILE)
+
+clean:
+ rm -rf $(OBJECTS) $(LIBFILE)
+
+$(LIBFILE): $(OBJECTS)
+ ar -cr $@ $?
+ $(RANLIB) $@
diff --git a/src/libapat/apat.h b/src/libapat/apat.h
new file mode 100644
index 0000000..eaa06df
--- /dev/null
+++ b/src/libapat/apat.h
@@ -0,0 +1,173 @@
+/* ==================================================== */
+/* Copyright (c) Atelier de BioInformatique */
+/* Dec. 94 */
+/* File: apat.h */
+/* Purpose: pattern scan */
+/* History: */
+/* 28/12/94 : ascan first version */
+/* 14/05/99 : last revision */
+/* ==================================================== */
+
+#ifndef _H_Gtypes
+#include "Gtypes.h"
+#endif
+
+#ifndef _H_libstki
+#include "libstki.h"
+#endif
+
+#define H_apat
+
+/* ----------------------------------------------- */
+/* constantes */
+/* ----------------------------------------------- */
+
+#ifndef BUFSIZ
+#define BUFSIZ 1024 /* io buffer size */
+#endif
+
+#define MAX_NAME_LEN BUFSIZ /* max length of sequence name */
+
+#define ALPHA_LEN 26 /* alphabet length */
+ /* *DO NOT* modify */
+
+#define MAX_PATTERN 4 /* max # of patterns */
+ /* *DO NOT* modify */
+
+#define MAX_PAT_LEN 32 /* max pattern length */
+ /* *DO NOT* modify */
+
+#define MAX_PAT_ERR 32 /* max # of errors */
+ /* *DO NOT* modify */
+
+#define PATMASK 0x3ffffff /* mask for 26 symbols */
+ /* *DO NOT* modify */
+
+#define OBLIBIT 0x4000000 /* bit 27 to 1 -> oblig. pos */
+ /* *DO NOT* modify */
+
+ /* mask for position */
+#define ONEMASK 0x80000000 /* mask for highest position */
+
+ /* masks for Levenhstein edit */
+#define OPER_IDT 0x00000000 /* identity */
+#define OPER_INS 0x40000000 /* insertion */
+#define OPER_DEL 0x80000000 /* deletion */
+#define OPER_SUB 0xc0000000 /* substitution */
+
+#define OPER_SHFT 30 /* shift */
+
+ /* Levenhstein Opcodes */
+#define SOPER_IDT 0x0 /* identity */
+#define SOPER_INS 0x1 /* insertion */
+#define SOPER_DEL 0x2 /* deletion */
+#define SOPER_SUB 0x3 /* substitution */
+
+ /* Levenhstein Opcodes masks */
+#define OPERMASK 0xc0000000 /* mask for Opcodes */
+#define NOPERMASK 0x3fffffff /* negate of previous */
+
+ /* special chars in pattern */
+#define PATCHARS "[]!#"
+
+ /* 26 letter alphabet */
+ /* in alphabetical order */
+
+#define ORD_ALPHA "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+
+ /* protein alphabet */
+
+#define PROT_ALPHA "ACDEFGHIKLMNPQRSTVWY"
+
+ /* dna/rna alphabet */
+
+#define DNA_ALPHA "ABCDGHKMNRSTUVWXY"
+
+
+/* ----------------------------------------------- */
+/* data structures */
+/* ----------------------------------------------- */
+
+ /* -------------------- */
+typedef enum { /* data encoding */
+ /* -------------------- */
+ alpha = 0, /* [A-Z] */
+ dna, /* IUPAC DNA */
+ protein /* IUPAC proteins */
+} CodType;
+
+ /* -------------------- */
+typedef struct { /* sequence */
+ /* -------------------- */
+ char *name; /* sequence name */
+ Int32 seqlen; /* sequence length */
+ Int32 seqsiz; /* sequence buffer size */
+ Int32 datsiz; /* data buffer size */
+ Int32 circular;
+ UInt8 *data; /* data buffer */
+ char *cseq; /* sequence buffer */
+ StackiPtr hitpos[MAX_PATTERN]; /* stack of hit pos. */
+ StackiPtr hiterr[MAX_PATTERN]; /* stack of errors */
+} Seq, *SeqPtr;
+
+ /* -------------------- */
+typedef struct { /* pattern */
+ /* -------------------- */
+ int patlen; /* pattern length */
+ int maxerr; /* max # of errors */
+ char *cpat; /* pattern string */
+ Int32 *patcode; /* encoded pattern */
+ UInt32 *smat; /* S matrix */
+ UInt32 omask; /* oblig. bits mask */
+ Bool hasIndel; /* are indels allowed */
+ Bool ok; /* is pattern ok */
+} Pattern, *PatternPtr;
+
+/* ----------------------------------------------- */
+/* macros */
+/* ----------------------------------------------- */
+
+#ifndef NEW
+#define NEW(typ) (typ*)malloc(sizeof(typ))
+#define NEWN(typ, dim) (typ*)malloc((unsigned long)(dim) * sizeof(typ))
+#define REALLOC(typ, ptr, dim) (typ*)realloc((void *) (ptr), (unsigned long)(dim) * sizeof(typ))
+#define FREE(ptr) free((void *) ptr)
+#endif
+
+/* ----------------------------------------------- */
+/* prototypes */
+/* ----------------------------------------------- */
+
+ /* apat_seq.c */
+
+SeqPtr FreeSequence (SeqPtr pseq);
+SeqPtr NewSequence (void);
+int ReadNextSequence (SeqPtr pseq);
+int WriteSequence (FILE *filou , SeqPtr pseq);
+
+ /* apat_parse.c */
+
+Int32 *GetCode (CodType ctype);
+int CheckPattern (Pattern *ppat);
+int EncodePattern (Pattern *ppat, CodType ctype);
+int ReadPattern (Pattern *ppat);
+void PrintDebugPattern (Pattern *ppat);
+
+ /* apat_search.c */
+
+int CreateS (Pattern *ppat, Int32 lalpha);
+Int32 ManberNoErr (Seq *pseq , Pattern *ppat, int patnum,int begin,int length);
+Int32 ManberSub (Seq *pseq , Pattern *ppat, int patnum,int begin,int length);
+Int32 ManberIndel (Seq *pseq , Pattern *ppat, int patnum,int begin,int length);
+Int32 ManberAll (Seq *pseq , Pattern *ppat, int patnum,int begin,int length);
+Int32 NwsPatAlign (Seq *pseq , Pattern *ppat, Int32 nerr ,
+ Int32 *reslen , Int32 *reserr);
+
+ /* apat_sys.c */
+
+float UserCpuTime (int reset);
+float SysCpuTime (int reset);
+char *StrCpuTime (int reset);
+void Erreur (char *msg , int stat);
+int AccessFile (char *path, char *mode);
+
diff --git a/src/libapat/apat_parse.c b/src/libapat/apat_parse.c
new file mode 100644
index 0000000..43cda48
--- /dev/null
+++ b/src/libapat/apat_parse.c
@@ -0,0 +1,369 @@
+/* ==================================================== */
+/* Copyright (c) Atelier de BioInformatique */
+/* Mar. 92 */
+/* File: apat_parse.c */
+/* Purpose: Codage du pattern */
+/* History: */
+/* 00/07/94 : first version (stanford) */
+/* 00/11/94 : revised for DNA/PROTEIN */
+/* 30/12/94 : modified EncodePattern */
+/* for manber search */
+/* 14/05/99 : indels added */
+/* ==================================================== */
+
+#include
+#include
+#include
+#include
+
+#include "Gtypes.h"
+#include "apat.h"
+ /* -------------------- */
+ /* default char */
+ /* encodings */
+ /* -------------------- */
+
+static Int32 sDftCode[] = {
+
+#include "CODES/dft_code.h"
+
+};
+ /* -------------------- */
+ /* char encodings */
+ /* IUPAC */
+ /* -------------------- */
+
+ /* IUPAC Proteins */
+static Int32 sProtCode[] = {
+
+#include "CODES/prot_code.h"
+
+};
+ /* IUPAC Dna/Rna */
+static Int32 sDnaCode[] = {
+
+#include "CODES/dna_code.h"
+
+};
+
+
+/* -------------------------------------------- */
+/* internal replacement of gets */
+/* -------------------------------------------- */
+static char *sGets(char *buffer, int size) {
+
+ char *ebuf;
+
+ if (! fgets(buffer, size-1, stdin))
+ return NULL;
+
+ /* remove trailing line feed */
+
+ ebuf = buffer + strlen(buffer);
+
+ while (--ebuf >= buffer) {
+ if ((*ebuf == '\n') || (*ebuf == '\r'))
+ *ebuf = '\000';
+ else
+ break;
+ }
+
+ return buffer;
+}
+
+/* -------------------------------------------- */
+/* returns actual code associated to type */
+/* -------------------------------------------- */
+
+Int32 *GetCode(CodType ctype)
+{
+ Int32 *code = sDftCode;
+
+ switch (ctype) {
+ case dna : code = sDnaCode ; break;
+ case protein : code = sProtCode ; break;
+ default : code = sDftCode ; break;
+ }
+
+ return code;
+}
+
+/* -------------------------------------------- */
+
+#define BAD_IF(tst) if (tst) return 0
+
+int CheckPattern(Pattern *ppat)
+{
+ int lev;
+ char *pat;
+
+ pat = ppat->cpat;
+
+ BAD_IF (*pat == '#');
+
+ for (lev = 0; *pat ; pat++)
+
+ switch (*pat) {
+
+ case '[' :
+ BAD_IF (lev);
+ BAD_IF (*(pat+1) == ']');
+ lev++;
+ break;
+
+ case ']' :
+ lev--;
+ BAD_IF (lev);
+ break;
+
+ case '!' :
+ BAD_IF (lev);
+ BAD_IF (! *(pat+1));
+ BAD_IF (*(pat+1) == ']');
+ break;
+
+ case '#' :
+ BAD_IF (lev);
+ BAD_IF (*(pat-1) == '[');
+ break;
+
+ default :
+ if (! isupper(*pat))
+ return 0;
+ break;
+ }
+
+ return (lev ? 0 : 1);
+}
+
+#undef BAD_IF
+
+
+/* -------------------------------------------- */
+static char *skipOblig(char *pat)
+{
+ return (*(pat+1) == '#' ? pat+1 : pat);
+}
+
+/* -------------------------------------------- */
+static char *splitPattern(char *pat)
+{
+ switch (*pat) {
+
+ case '[' :
+ for (; *pat; pat++)
+ if (*pat == ']')
+ return skipOblig(pat);
+ return NULL;
+ break;
+
+ case '!' :
+ return splitPattern(pat+1);
+ break;
+
+ }
+
+ return skipOblig(pat);
+}
+
+/* -------------------------------------------- */
+static Int32 valPattern(char *pat, Int32 *code)
+{
+ Int32 val;
+
+ switch (*pat) {
+
+ case '[' :
+ return valPattern(pat+1, code);
+ break;
+
+ case '!' :
+ val = valPattern(pat+1, code);
+ return (~val & PATMASK);
+ break;
+
+ default :
+ val = 0x0;
+ while (isupper(*pat)) {
+ val |= code[*pat - 'A'];
+ pat++;
+ }
+ return val;
+ }
+
+ return 0x0;
+}
+
+/* -------------------------------------------- */
+static Int32 obliBitPattern(char *pat)
+{
+ return (*(pat + strlen(pat) - 1) == '#' ? OBLIBIT : 0x0);
+}
+
+
+/* -------------------------------------------- */
+static int lenPattern(char *pat)
+{
+ int lpat;
+
+ lpat = 0;
+
+ while (*pat) {
+
+ if (! (pat = splitPattern(pat)))
+ return 0;
+
+ pat++;
+
+ lpat++;
+ }
+
+ return lpat;
+}
+
+/* -------------------------------------------- */
+/* Interface */
+/* -------------------------------------------- */
+
+/* -------------------------------------------- */
+/* encode un pattern */
+/* -------------------------------------------- */
+int EncodePattern(Pattern *ppat, CodType ctype)
+{
+ int pos, lpat;
+ Int32 *code;
+ char *pp, *pa, c;
+
+ ppat->ok = Faux;
+
+ code = GetCode(ctype);
+
+ ppat->patlen = lpat = lenPattern(ppat->cpat);
+
+ if (lpat <= 0)
+ return 0;
+
+ if (! (ppat->patcode = NEWN(Int32, lpat)))
+ return 0;
+
+ pa = pp = ppat->cpat;
+
+ pos = 0;
+
+ while (*pa) {
+
+ pp = splitPattern(pa);
+
+ c = *++pp;
+
+ *pp = '\000';
+
+ ppat->patcode[pos++] = valPattern(pa, code) | obliBitPattern(pa);
+
+ *pp = c;
+
+ pa = pp;
+ }
+
+ ppat->ok = Vrai;
+
+ return lpat;
+}
+
+/* -------------------------------------------- */
+/* remove blanks */
+/* -------------------------------------------- */
+static char *RemBlanks(char *s)
+{
+ char *sb, *sc;
+
+ for (sb = sc = s ; *sb ; sb++)
+ if (! isspace(*sb))
+ *sc++ = *sb;
+
+ return s;
+}
+
+/* -------------------------------------------- */
+/* count non blanks */
+/* -------------------------------------------- */
+static Int32 CountAlpha(char *s)
+{
+ Int32 n;
+
+ for (n = 0 ; *s ; s++)
+ if (! isspace(*s))
+ n++;
+
+ return n;
+}
+
+
+/* -------------------------------------------- */
+/* lit un pattern */
+/* #mis */
+/* ligne starting with '/' are comments */
+/* -------------------------------------------- */
+int ReadPattern(Pattern *ppat)
+{
+ int val;
+ char *spac;
+ char buffer[BUFSIZ];
+
+ ppat->ok = Vrai;
+
+ if (! sGets(buffer, sizeof(buffer)))
+ return 0;
+
+ if (*buffer == '/')
+ return ReadPattern(ppat);
+
+ if (! CountAlpha(buffer))
+ return ReadPattern(ppat);
+
+ for (spac = buffer ; *spac ; spac++)
+ if ((*spac == ' ') || (*spac == '\t'))
+ break;
+
+ ppat->ok = Faux;
+
+ if (! *spac)
+ return 0;
+
+ if (sscanf(spac, "%d", &val) != 1)
+ return 0;
+
+ ppat->hasIndel = (val < 0);
+
+ ppat->maxerr = ((val >= 0) ? val : -val);
+
+ *spac = '\000';
+
+ (void) RemBlanks(buffer);
+
+ if ((ppat->cpat = NEWN(char, strlen(buffer)+1)))
+ strcpy(ppat->cpat, buffer);
+
+ ppat->ok = (ppat->cpat != NULL);
+
+ return (ppat->ok ? 1 : 0);
+}
+
+/* -------------------------------------------- */
+/* ecrit un pattern - Debug - */
+/* -------------------------------------------- */
+void PrintDebugPattern(Pattern *ppat)
+{
+ int i;
+
+ printf("Pattern : %s\n", ppat->cpat);
+ printf("Encoding : \n\t");
+
+ for (i = 0 ; i < ppat->patlen ; i++) {
+ printf("0x%8.8x ", ppat->patcode[i]);
+ if (i%4 == 3)
+ printf("\n\t");
+ }
+ printf("\n");
+}
+
diff --git a/src/libapat/apat_search.c b/src/libapat/apat_search.c
new file mode 100644
index 0000000..f0dd394
--- /dev/null
+++ b/src/libapat/apat_search.c
@@ -0,0 +1,339 @@
+/* ==================================================== */
+/* Copyright (c) Atelier de BioInformatique */
+/* Dec. 94 */
+/* File: apat_search.c */
+/* Purpose: recherche du pattern */
+/* algorithme de Baeza-Yates/Gonnet */
+/* Manber (agrep) */
+/* History: */
+/* 07/12/94 : first version */
+/* 28/12/94 : revised version */
+/* 14/05/99 : last revision */
+/* ==================================================== */
+
+#if 0
+#ifndef THINK_C
+#include
+#endif
+#endif
+
+#include
+#include
+#include
+
+#include "Gtypes.h"
+#include "libstki.h"
+#include "apat.h"
+
+#define POP PopiOut
+#define PUSH PushiIn
+#define TOPCURS CursiToTop
+#define DOWNREAD ReadiDown
+
+#define KRONECK(x, msk) ((~x & msk) ? 0 : 1)
+#define MIN(x, y) ((x) < (y) ? (x) : (y))
+
+/* -------------------------------------------- */
+/* Construction de la matrice S */
+/* -------------------------------------------- */
+
+int CreateS(Pattern *ppat, Int32 lalpha)
+{
+ Int32 i, j, indx;
+ UInt32 pindx, amask, omask, *smat;
+
+ ppat->ok = Faux;
+
+ omask = 0x0L;
+
+ if (! (smat = NEWN(UInt32, lalpha)))
+ return 0;
+
+ for (i = 0 ; i < lalpha ; i++)
+ smat[i] = 0x0;
+
+ for (i = ppat->patlen - 1, amask = 0x1L ; i >= 0 ; i--, amask <<= 1) {
+
+ indx = ppat->patcode[i];
+
+ if (ppat->patcode[i] & OBLIBIT)
+ omask |= amask;
+
+ for (j = 0, pindx = 0x1L ; j < lalpha ; j++, pindx <<= 1)
+ if (indx & pindx)
+ smat[j] |= amask;
+ }
+
+ ppat->smat = smat;
+
+ ppat->omask = omask;
+
+ ppat->ok = Vrai;
+
+ return 1;
+
+}
+
+/* -------------------------------------------- */
+/* Baeza-Yates/Manber algorithm */
+/* NoError */
+/* -------------------------------------------- */
+Int32 ManberNoErr(Seq *pseq, Pattern *ppat, int patnum,int begin,int length)
+{
+ UInt32 pos;
+ UInt32 smask, r;
+ UInt8 *data;
+ StackiPtr *stkpos, *stkerr;
+ UInt32 end;
+
+ end = begin + length;
+ end = (end <= (size_t)(pseq->seqlen+pseq->circular)) ? end:(size_t)(pseq->seqlen+pseq->circular);
+
+
+ /* create local masks */
+
+ smask = r = 0x1L << ppat->patlen;
+
+ /* init. scan */
+ data = pseq->data + begin;
+ stkpos = pseq->hitpos + patnum;
+ stkerr = pseq->hiterr + patnum;
+
+ /* loop on text data */
+
+ for (pos = begin ; pos < end ; pos++) {
+
+ r = (r >> 1) & ppat->smat[*data++];
+
+ if (r & 0x1L) {
+ PUSH(stkpos, pos - ppat->patlen + 1);
+ PUSH(stkerr, 0);
+ }
+
+ r |= smask;
+ }
+
+ return (*stkpos)->top; /* aka # of hits */
+}
+
+/* -------------------------------------------- */
+/* Baeza-Yates/Manber algorithm */
+/* Substitution only */
+/* */
+/* Note : r array is stored as : */
+/* 0 0 r(0,j) r(0,j+1) r(1,j) r(1,j+1) ... */
+/* */
+/* -------------------------------------------- */
+Int32 ManberSub(Seq *pseq, Pattern *ppat, int patnum,int begin,int length)
+{
+ int e, emax, found;
+ UInt32 pos;
+ UInt32 smask, cmask, sindx;
+ UInt32 *pr, r[2 * MAX_PAT_ERR + 2];
+ UInt8 *data;
+ StackiPtr *stkpos, *stkerr;
+ UInt32 end;
+
+ end = begin + length;
+ end = (end <= (size_t)(pseq->seqlen+pseq->circular)) ? end:(size_t)(pseq->seqlen+pseq->circular);
+
+ /* create local masks */
+ emax = ppat->maxerr;
+
+ r[0] = r[1] = 0x0;
+
+ cmask = smask = 0x1L << ppat->patlen;
+
+ for (e = 0, pr = r + 3 ; e <= emax ; e++, pr += 2)
+ *pr = cmask;
+
+ cmask = ~ ppat->omask;
+
+ /* init. scan */
+ data = pseq->data + begin;
+ stkpos = pseq->hitpos + patnum;
+ stkerr = pseq->hiterr + patnum;
+
+ /* loop on text data */
+
+ for (pos = begin ; pos < end ; pos++) {
+
+ sindx = ppat->smat[*data++];
+
+ for (e = found = 0, pr = r ; e <= emax ; e++, pr += 2) {
+
+ pr[2] = pr[3] | smask;
+
+ pr[3] = ((pr[0] >> 1) & cmask) /* sub */
+ | ((pr[2] >> 1) & sindx); /* ident */
+
+ if (pr[3] & 0x1L) { /* found */
+ if (! found) {
+ PUSH(stkpos, pos - ppat->patlen + 1);
+ PUSH(stkerr, e);
+ }
+ found++;
+ }
+ }
+ }
+
+ return (*stkpos)->top; /* aka # of hits */
+}
+
+/* -------------------------------------------- */
+/* Baeza-Yates/Manber algorithm */
+/* Substitution + Indels */
+/* */
+/* Note : r array is stored as : */
+/* 0 0 r(0,j) r(0,j+1) r(1,j) r(1,j+1) ... */
+/* */
+/* Warning: may return shifted pos. */
+/* */
+/* -------------------------------------------- */
+Int32 ManberIndel(Seq *pseq, Pattern *ppat, int patnum,int begin,int length)
+{
+ int e, emax, found;
+ UInt32 pos;
+ UInt32 smask, cmask, sindx;
+ UInt32 *pr, r[2 * MAX_PAT_ERR + 2];
+ UInt8 *data;
+ StackiPtr *stkpos, *stkerr;
+ UInt32 end;
+
+ end = begin + length;
+ end = (end <= (size_t)(pseq->seqlen+pseq->circular)) ? end:(size_t)(pseq->seqlen+pseq->circular);
+
+ /* create local masks */
+ emax = ppat->maxerr;
+
+ r[0] = r[1] = 0x0;
+
+ cmask = smask = 0x1L << ppat->patlen;
+
+ for (e = 0, pr = r + 3 ; e <= emax ; e++, pr += 2) {
+ *pr = cmask;
+ cmask = (cmask >> 1) | smask;
+ }
+
+ cmask = ~ ppat->omask;
+
+ /* init. scan */
+ data = pseq->data + begin;
+ stkpos = pseq->hitpos + patnum;
+ stkerr = pseq->hiterr + patnum;
+
+ /* loop on text data */
+
+ for (pos = begin ; pos < end ; pos++) {
+
+ sindx = ppat->smat[*data++];
+
+ for (e = found = 0, pr = r ; e <= emax ; e++, pr += 2) {
+
+ pr[2] = pr[3] | smask;
+
+ pr[3] = (( pr[0] /* ins */
+ | (pr[0] >> 1) /* sub */
+ | (pr[1] >> 1)) /* del */
+ & cmask)
+ | ((pr[2] >> 1) & sindx); /* ident */
+
+ if (pr[3] & 0x1L) { /* found */
+ if (! found) {
+ PUSH(stkpos, pos - ppat->patlen + 1);
+ PUSH(stkerr, e);
+ }
+ found++;
+ }
+
+ }
+ }
+
+ return (*stkpos)->top; /* aka # of hits */
+}
+
+/* -------------------------------------------- */
+/* Baeza-Yates/Manber algorithm */
+/* API call to previous functions */
+/* -------------------------------------------- */
+Int32 ManberAll(Seq *pseq, Pattern *ppat, int patnum,int begin,int length)
+{
+ if (ppat->maxerr == 0)
+ return ManberNoErr(pseq, ppat, patnum, begin, length);
+ else if (ppat->hasIndel)
+ return ManberIndel(pseq, ppat, patnum, begin, length);
+ else
+ return ManberSub(pseq, ppat, patnum, begin, length);
+}
+
+
+/* -------------------------------------------- */
+/* Alignement NWS */
+/* pour edition des hits */
+/* (avec substitution obligatoire aux bords) */
+/* -------------------------------------------- */
+
+Int32 NwsPatAlign(pseq, ppat, nerr, reslen, reserr)
+ Seq *pseq;
+ Pattern *ppat;
+ Int32 nerr, *reslen, *reserr;
+{
+ UInt8 *sseq, *px;
+ Int32 i, j, lseq, lpat, npos, dindel, dsub,
+ *pc, *pi, *pd, *ps;
+ UInt32 amask;
+
+ static Int32 sTab[(MAX_PAT_LEN+MAX_PAT_ERR+1) * (MAX_PAT_LEN+1)];
+
+ lseq = pseq->seqlen;
+
+ pc = sTab; /* |----|----| --> i */
+ pi = pc - 1; /* | ps | pd | | */
+ pd = pi - lseq; /* |----|----| | */
+ ps = pd - 1; /* | pi | pc | v j */
+ /* |---------| */
+
+ lseq = pseq->seqlen;
+ lpat = ppat->patlen;
+
+ sseq = pseq->data - 1;
+
+ amask = ONEMASK >> lpat;
+
+ for (j = 0 ; j <= lpat ; j++) {
+
+ for (i = 0 , px = sseq ; i <= lseq ; i++, px++) {
+
+ if (i && j) {
+ dindel = MIN(*pi, *pd) + 1;
+ dsub = *ps + KRONECK(ppat->smat[*px], amask);
+ *pc = MIN(dindel, dsub);
+ }
+ else if (i) /* j == 0 */
+ *pc = *pi + 1;
+ else if (j) /* i == 0 */
+ *pc = *pd + 1;
+ else /* root */
+ *pc = 0;
+
+ pc++;
+ pi++;
+ pd++;
+ ps++;
+ }
+
+ amask <<= 1;
+ }
+
+ pc--;
+
+ for (i = lseq, npos = 0 ; i >= 0 ; i--, pc--) {
+ if (*pc <= nerr) {
+ *reslen++ = i;
+ *reserr++ = *pc;
+ npos++;
+ }
+ }
+
+ return npos;
+}
diff --git a/src/libapat/libstki.c b/src/libapat/libstki.c
new file mode 100644
index 0000000..1ca9868
--- /dev/null
+++ b/src/libapat/libstki.c
@@ -0,0 +1,379 @@
+/* ==================================================== */
+/* Copyright (c) Atelier de BioInformatique */
+/* Mar. 92 */
+/* File: libstki.c */
+/* Purpose: A library to deal with 'stacks' of */
+/* integers */
+/* Note: 'stacks' are dynamic (i.e. size is */
+/* automatically readjusted when needed) */
+/* History: */
+/* 00/03/92 : first draft */
+/* 15/08/93 : revised version */
+/* 14/05/99 : last revision */
+/* ==================================================== */
+
+#include
+#include
+#include
+
+#include "Gtypes.h"
+#include "libstki.h"
+
+
+/* ============================ */
+/* Constantes et Macros locales */
+/* ============================ */
+
+#define ExpandStack(stkh) ResizeStacki((stkh), (*stkh)->size << 1)
+
+#define ShrinkStack(stkh) ResizeStacki((stkh), (*stkh)->size >> 1)
+
+
+static Int16 sStkiLastError = kStkiNoErr;
+
+/* -------------------------------------------- */
+/* gestion des erreurs */
+/* get/reset erreur flag */
+/* */
+/* @function: StkiError */
+/* -------------------------------------------- */
+
+Int16 StkiError(Bool reset)
+{
+ Int16 err;
+
+ err = sStkiLastError;
+
+ if (reset)
+ sStkiLastError = kStkiNoErr;
+
+ return err;
+
+} /* end of StkiError */
+
+/* -------------------------------------------- */
+/* creation d'un stack */
+/* */
+/* @function: NewStacki */
+/* -------------------------------------------- */
+
+StackiPtr NewStacki(Int32 size)
+{
+ StackiPtr stki;
+
+ if (! (stki = NEW(Stacki)))
+ return NULL;
+
+ stki->size = size;
+ stki->top = 0;
+ stki->cursor = 0;
+
+ if ( ! (stki->val = NEWN(Int32, size))) {
+ sStkiLastError = kStkiMemErr;
+ return FreeStacki(stki);
+ }
+
+ return stki;
+
+} /* end of NewStacki */
+
+
+/* -------------------------------------------- */
+/* liberation d'un stack */
+/* */
+/* @function: FreeStacki */
+/* -------------------------------------------- */
+
+StackiPtr FreeStacki(StackiPtr stki)
+{
+ if (stki) {
+ if (stki->val)
+ FREE(stki->val);
+ FREE(stki);
+ }
+
+ return NULL;
+
+} /* end of FreeStacki */
+
+/* -------------------------------------------- */
+/* creation d'un vecteur de stacks */
+/* */
+/* @function: NewStackiVector */
+/* -------------------------------------------- */
+
+StackiHdle NewStackiVector(Int32 vectSize, Int32 stackSize)
+{
+ Int32 i;
+ StackiHdle stkh;
+
+ if (! (stkh = NEWN(StackiPtr, vectSize))) {
+ sStkiLastError = kStkiMemErr;
+ return NULL;
+ }
+
+ for (i = 0 ; i < vectSize ; i++)
+ if (! (stkh[i] = NewStacki(stackSize)))
+ return FreeStackiVector(stkh, i);
+
+ return stkh;
+
+} /* end of NewStackiVector */
+
+
+/* -------------------------------------------- */
+/* liberation d'un vecteur de stacks */
+/* */
+/* @function: FreeStackiVector */
+/* -------------------------------------------- */
+
+StackiHdle FreeStackiVector(StackiHdle stkh, Int32 vectSize)
+{
+ Int32 i;
+
+ if (stkh) {
+ for (i = 0 ; i < vectSize ; i++)
+ (void) FreeStacki(stkh[i]);
+ FREE(stkh);
+ }
+
+ return NULL;
+
+} /* end of FreeStackiVector */
+
+/* -------------------------------------------- */
+/* resize d'un stack */
+/* */
+/* @function: ResizeStacki */
+/* -------------------------------------------- */
+
+Int32 ResizeStacki(StackiHdle stkh, Int32 size)
+{
+ Int32 resize = 0; /* assume error */
+ Int32 *val;
+
+ if ((val = REALLOC(Int32, (*stkh)->val, size))) {
+ (*stkh)->size = resize = size;
+ (*stkh)->val = val;
+ }
+
+ if (! resize)
+ sStkiLastError = kStkiMemErr;
+
+ return resize;
+
+} /* end of ResizeStacki */
+
+/* -------------------------------------------- */
+/* empilage(/lement) */
+/* */
+/* @function: PushiIn */
+/* -------------------------------------------- */
+
+Bool PushiIn(StackiHdle stkh, Int32 val)
+{
+ if (((*stkh)->top >= (*stkh)->size) && (! ExpandStack(stkh)))
+ return Faux;
+
+ (*stkh)->val[((*stkh)->top)++] = val;
+
+ return Vrai;
+
+} /* end of PushiIn */
+
+/* -------------------------------------------- */
+/* depilage(/lement) */
+/* */
+/* @function: PopiOut */
+/* -------------------------------------------- */
+
+Bool PopiOut(StackiHdle stkh, Int32 *val)
+{
+ if ((*stkh)->top <= 0)
+ return Faux;
+
+ *val = (*stkh)->val[--((*stkh)->top)];
+
+ if ( ((*stkh)->top < ((*stkh)->size >> 1))
+ && ((*stkh)->top > kMinStackiSize))
+
+ (void) ShrinkStack(stkh);
+
+ return Vrai;
+
+} /* end of PopiOut */
+
+/* -------------------------------------------- */
+/* lecture descendante */
+/* */
+/* @function: ReadiDown */
+/* -------------------------------------------- */
+
+Bool ReadiDown(StackiPtr stki, Int32 *val)
+{
+ if (stki->cursor <= 0)
+ return Faux;
+
+ *val = stki->val[--(stki->cursor)];
+
+ return Vrai;
+
+} /* end of ReadiDown */
+
+/* -------------------------------------------- */
+/* lecture ascendante */
+/* */
+/* @function: ReadiUp */
+/* -------------------------------------------- */
+
+Bool ReadiUp(StackiPtr stki, Int32 *val)
+{
+ if (stki->cursor >= stki->top)
+ return Faux;
+
+ *val = stki->val[(stki->cursor)++];
+
+ return Vrai;
+
+} /* end of ReadiUp */
+
+/* -------------------------------------------- */
+/* remontee/descente du curseur */
+/* */
+/* @function: CursiToTop */
+/* @function: CursiToBottom */
+/* -------------------------------------------- */
+
+void CursiToTop(StackiPtr stki)
+{
+ stki->cursor = stki->top;
+
+} /* end of CursiToTop */
+
+void CursiToBottom(stki)
+ StackiPtr stki;
+{
+ stki->cursor = 0;
+
+} /* end of CursiToBottom */
+
+/* -------------------------------------------- */
+/* echange des valeurs cursor <-> (top - 1) */
+/* */
+/* @function: CursiSwap */
+/* -------------------------------------------- */
+
+void CursiSwap(StackiPtr stki)
+{
+ Int32 tmp;
+
+ if ((stki->top <= 0) || (stki->cursor < 0))
+ return;
+
+ tmp = stki->val[stki->cursor];
+ stki->val[stki->cursor] = stki->val[stki->top - 1];
+ stki->val[stki->top - 1] = tmp;
+
+} /* end of CursiSwap */
+
+/* -------------------------------------------- */
+/* Recherche d'une valeur en stack a partir du */
+/* curseur courant en descendant. */
+/* on laisse le curseur a l'endroit trouve */
+/* */
+/* @function: SearchDownStacki */
+/* -------------------------------------------- */
+
+Bool SearchDownStacki(StackiPtr stki, Int32 sval)
+{
+ Int32 val;
+ Bool more;
+
+ while ((more = ReadiDown(stki, &val)))
+ if (val == sval)
+ break;
+
+ return more;
+
+} /* end of SearchDownStacki */
+
+/* -------------------------------------------- */
+/* Recherche dichotomique d'une valeur en stack */
+/* le stack est suppose trie par valeurs */
+/* croissantes. */
+/* on place le curseur a l'endroit trouve */
+/* */
+/* @function: BinSearchStacki */
+/* -------------------------------------------- */
+
+Bool BinSearchStacki(StackiPtr stki, Int32 sval)
+{
+ Int32 midd, low, high, span;
+
+ low = 0;
+ high = stki->top - 1;
+
+ while (high >= low) {
+
+ midd = (high + low) / 2;
+
+ span = stki->val[midd] - sval;
+
+ if (span == 0) {
+ stki->cursor = midd;
+ return Vrai;
+ }
+
+ if (span > 0)
+ high = midd - 1;
+ else
+ low = midd + 1;
+ }
+
+ return Faux;
+
+} /* end of BinSearchStacki */
+
+/* -------------------------------------------- */
+/* teste l'egalite *physique* de deux stacks */
+/* */
+/* @function: SameStacki */
+/* -------------------------------------------- */
+
+Bool SameStacki(StackiPtr stki1, StackiPtr stki2)
+{
+ if (stki1->top != stki2->top)
+ return Faux;
+
+ return ((memcmp(stki1->val, stki2->val,
+ stki1->top * sizeof(Int32)) == 0) ? Vrai : Faux);
+
+} /* end of SameStacki */
+
+
+/* -------------------------------------------- */
+/* inverse l'ordre des elements dans un stack */
+/* */
+/* @function: ReverseStacki */
+/* -------------------------------------------- */
+
+Bool ReverseStacki(StackiPtr stki)
+{
+ Int32 *t, *b, swp;
+
+ if (stki->top <= 0)
+ return Faux;
+
+ b = stki->val;
+ t = b + stki->top - 1;
+
+ while (t > b) {
+ swp = *t;
+ *t-- = *b;
+ *b++ = swp;
+ }
+
+ return Vrai;
+
+} /* end of ReverseStacki */
+
diff --git a/src/libapat/libstki.h b/src/libapat/libstki.h
new file mode 100644
index 0000000..6331ae7
--- /dev/null
+++ b/src/libapat/libstki.h
@@ -0,0 +1,87 @@
+/* ==================================================== */
+/* Copyright (c) Atelier de BioInformatique */
+/* Mar. 92 */
+/* File: libstki.h */
+/* Purpose: library of dynamic stacks holding */
+/* integer values */
+/* History: */
+/* 00/03/92 : first draft */
+/* 07/07/93 : complete revision */
+/* 10/03/94 : added xxxVector funcs */
+/* 14/05/99 : last revision */
+/* ==================================================== */
+
+#ifndef _H_Gtypes
+#include "Gtypes.h"
+#endif
+
+#define _H_libstki
+
+/* ==================================================== */
+/* Constantes de dimensionnement */
+/* ==================================================== */
+
+#ifndef kMinStackiSize
+#define kMinStackiSize 2 /* taille mini stack */
+#endif
+
+
+#define kStkiNoErr 0 /* ok */
+#define kStkiMemErr 1 /* not enough memory */
+
+#define kStkiReset Vrai
+#define kStkiGet Faux
+
+/* ==================================================== */
+/* Macros standards */
+/* ==================================================== */
+
+#ifndef NEW
+#define NEW(typ) (typ*)malloc(sizeof(typ))
+#define NEWN(typ, dim) (typ*)malloc((unsigned long)(dim) * sizeof(typ))
+#define REALLOC(typ, ptr, dim) (typ*)realloc((void *) (ptr), (unsigned long)(dim) * sizeof(typ))
+#define FREE(ptr) free((Ptr) ptr)
+#endif
+
+
+/* ==================================================== */
+/* Types & Structures de donnees */
+/* ==================================================== */
+
+ /* -------------------- */
+ /* structure : pile */
+ /* -------------------- */
+typedef struct Stacki {
+ /* ---------------------*/
+ Int32 size; /* stack size */
+ Int32 top; /* current free pos. */
+ Int32 cursor; /* current cursor */
+ Int32 *val; /* values */
+ /* ---------------------*/
+} Stacki, *StackiPtr, **StackiHdle;
+
+
+
+/* ==================================================== */
+/* Prototypes (generated by mproto) */
+/* ==================================================== */
+
+ /* libstki.c */
+
+Int16 StkiError (Bool reset );
+StackiPtr NewStacki (Int32 size );
+StackiPtr FreeStacki (StackiPtr stki );
+StackiHdle NewStackiVector (Int32 vectSize, Int32 stackSize );
+StackiHdle FreeStackiVector (StackiHdle stkh, Int32 vectSize );
+Int32 ResizeStacki (StackiHdle stkh , Int32 size );
+Bool PushiIn (StackiHdle stkh , Int32 val );
+Bool PopiOut (StackiHdle stkh , Int32 *val );
+Bool ReadiDown (StackiPtr stki , Int32 *val );
+Bool ReadiUp (StackiPtr stki , Int32 *val );
+void CursiToTop (StackiPtr stki );
+void CursiToBottom (StackiPtr stki );
+void CursiSwap (StackiPtr stki );
+Bool SearchDownStacki (StackiPtr stki , Int32 sval );
+Bool BinSearchStacki (StackiPtr stki , Int32 sval );
+Bool SameStacki (StackiPtr stki1 , StackiPtr stki2 );
+Bool ReverseStacki (StackiPtr stki );
diff --git a/src/libecoPCR/Makefile b/src/libecoPCR/Makefile
new file mode 100644
index 0000000..08f3745
--- /dev/null
+++ b/src/libecoPCR/Makefile
@@ -0,0 +1,31 @@
+
+SOURCES = ecoapat.c \
+ ecodna.c \
+ ecoError.c \
+ ecoIOUtils.c \
+ ecoMalloc.c \
+ ecorank.c \
+ ecoseq.c \
+ ecotax.c \
+ ecofilter.c \
+ econame.c
+
+SRCS=$(SOURCES)
+
+OBJECTS= $(patsubst %.c,%.o,$(SOURCES))
+
+LIBFILE= libecoPCR.a
+RANLIB= ranlib
+
+
+include ../global.mk
+
+
+all: $(LIBFILE)
+
+clean:
+ rm -rf $(OBJECTS) $(LIBFILE)
+
+$(LIBFILE): $(OBJECTS)
+ ar -cr $@ $?
+ $(RANLIB) $@
diff --git a/src/libecoPCR/ecoError.c b/src/libecoPCR/ecoError.c
new file mode 100644
index 0000000..00bbfa2
--- /dev/null
+++ b/src/libecoPCR/ecoError.c
@@ -0,0 +1,26 @@
+#include "ecoPCR.h"
+#include
+#include
+
+/*
+ * print the message given as argument and exit the program
+ * @param error error number
+ * @param message the text explaining what's going on
+ * @param filename the file source where the program failed
+ * @param linenumber the line where it has failed
+ * filename and linenumber are written at pre-processing
+ * time by a macro
+ */
+void ecoError(int32_t error,
+ const char* message,
+ const char * filename,
+ int linenumber)
+{
+ fprintf(stderr,"Error %d in file %s line %d : %s\n",
+ error,
+ filename,
+ linenumber,
+ message);
+
+ abort();
+}
diff --git a/src/libecoPCR/ecoIOUtils.c b/src/libecoPCR/ecoIOUtils.c
new file mode 100644
index 0000000..8d7ce82
--- /dev/null
+++ b/src/libecoPCR/ecoIOUtils.c
@@ -0,0 +1,122 @@
+#include "ecoPCR.h"
+#include
+#include
+
+#define SWAPINT32(x) ((((x) << 24) & 0xFF000000) | (((x) << 8) & 0xFF0000) | \
+ (((x) >> 8) & 0xFF00) | (((x) >> 24) & 0xFF))
+
+
+int32_t is_big_endian()
+{
+ int32_t i=1;
+
+ return (int32_t)((char*)&i)[0];
+}
+
+
+
+
+int32_t swap_int32_t(int32_t i)
+{
+ return SWAPINT32(i);
+}
+
+
+/**
+ * Read part of the file
+ * @param *f the database
+ * @param recordSize the size to be read
+ *
+ * @return buffer
+ */
+void *read_ecorecord(FILE *f,int32_t *recordSize)
+{
+ static void *buffer =NULL;
+ int32_t buffersize=0;
+ int32_t read;
+
+ if (!recordSize)
+ ECOERROR(ECO_ASSERT_ERROR,
+ "recordSize cannot be NULL");
+
+ read = fread(recordSize,
+ 1,
+ sizeof(int32_t),
+ f);
+
+ if (feof(f))
+ return NULL;
+
+ if (read != sizeof(int32_t))
+ ECOERROR(ECO_IO_ERROR,"Reading record size error");
+
+ if (is_big_endian())
+ *recordSize=swap_int32_t(*recordSize);
+
+ if (buffersize < *recordSize)
+ {
+ if (buffer)
+ buffer = ECOREALLOC(buffer,*recordSize,
+ "Increase size of record buffer");
+ else
+ buffer = ECOMALLOC(*recordSize,
+ "Allocate record buffer");
+ }
+
+ read = fread(buffer,
+ 1,
+ *recordSize,
+ f);
+
+ if (read != *recordSize)
+ ECOERROR(ECO_IO_ERROR,"Reading record data error");
+
+ return buffer;
+};
+
+
+
+
+
+/**
+ * Open the database and check it's readable
+ * @param filename name of the database (.sdx, .rdx, .tbx)
+ * @param sequencecount buffer - pointer to variable storing the number of occurence
+ * @param abort_on_open_error boolean to define the behaviour in case of error
+ * while opening the database
+ * @return FILE type
+ **/
+FILE *open_ecorecorddb(const char *filename,
+ int32_t *sequencecount,
+ int32_t abort_on_open_error)
+{
+ FILE *f;
+ int32_t read;
+
+ f = fopen(filename,"rb");
+
+ if (!f)
+ {
+ if (abort_on_open_error)
+ ECOERROR(ECO_IO_ERROR,"Cannot open file");
+ else
+ {
+ *sequencecount=0;
+ return NULL;
+ }
+ }
+
+ read = fread(sequencecount,
+ 1,
+ sizeof(int32_t),
+ f);
+
+ if (read != sizeof(int32_t))
+ ECOERROR(ECO_IO_ERROR,"Reading record size error");
+
+ if (is_big_endian())
+ *sequencecount=swap_int32_t(*sequencecount);
+
+ return f;
+}
+
diff --git a/src/libecoPCR/ecoMalloc.c b/src/libecoPCR/ecoMalloc.c
new file mode 100644
index 0000000..0ea8d3b
--- /dev/null
+++ b/src/libecoPCR/ecoMalloc.c
@@ -0,0 +1,79 @@
+#include "ecoPCR.h"
+#include
+
+static int eco_log_malloc = 0;
+
+void eco_trace_memory_allocation()
+{
+ eco_log_malloc=1;
+}
+
+void eco_untrace_memory_allocation()
+{
+ eco_log_malloc=0;
+}
+
+
+void *eco_malloc(int32_t chunksize,
+ const char *error_message,
+ const char *filename,
+ int32_t line)
+{
+ void * chunk;
+
+ chunk = calloc(1,chunksize);
+
+ if (!chunk)
+ ecoError(ECO_MEM_ERROR,error_message,filename,line);
+
+ if (eco_log_malloc)
+ fprintf(stderr,
+ "Memory segment located at %p of size %d is allocated (file : %s [%d])",
+ chunk,
+ chunksize,
+ filename,
+ line);
+
+ return chunk;
+}
+
+void *eco_realloc(void *chunk,
+ int32_t newsize,
+ const char *error_message,
+ const char *filename,
+ int32_t line)
+{
+ void *newchunk;
+
+ newchunk = realloc(chunk,newsize);
+
+ if (!newchunk)
+ ecoError(ECO_MEM_ERROR,error_message,filename,line);
+
+ if (eco_log_malloc)
+ fprintf(stderr,
+ "Old memory segment %p is reallocated at %p with a size of %d (file : %s [%d])",
+ chunk,
+ newchunk,
+ newsize,
+ filename,
+ line);
+
+ return newchunk;
+}
+
+void eco_free(void *chunk,
+ const char *error_message,
+ const char *filename,
+ int32_t line)
+{
+ free(chunk);
+
+ if (eco_log_malloc)
+ fprintf(stderr,
+ "Memory segment %p is released => %s (file : %s [%d])",
+ chunk,
+ error_message,
+ filename,
+ line);
+}
diff --git a/src/libecoPCR/ecoPCR.h b/src/libecoPCR/ecoPCR.h
new file mode 100644
index 0000000..3e13b18
--- /dev/null
+++ b/src/libecoPCR/ecoPCR.h
@@ -0,0 +1,269 @@
+#ifndef ECOPCR_H_
+#define ECOPCR_H_
+
+#include
+#include
+
+#ifndef H_apat
+#include "../libapat/apat.h"
+#endif
+
+/*****************************************************
+ *
+ * Data type declarations
+ *
+ *****************************************************/
+
+/*
+ *
+ * Sequence types
+ *
+ */
+
+typedef struct {
+
+ int32_t taxid;
+ char AC[20];
+ int32_t DE_length;
+ int32_t SQ_length;
+ int32_t CSQ_length;
+
+ char data[1];
+
+} ecoseqformat_t;
+
+typedef struct {
+ int32_t taxid;
+ int32_t SQ_length;
+ char *AC;
+ char *DE;
+ char *SQ;
+} ecoseq_t;
+
+/*
+ *
+ * Taxonomy taxon types
+ *
+ */
+
+
+typedef struct {
+ int32_t taxid;
+ int32_t rank;
+ int32_t parent;
+ int32_t namelength;
+ char name[1];
+
+} ecotxformat_t;
+
+typedef struct ecotxnode {
+ int32_t taxid;
+ int32_t rank;
+ struct ecotxnode *parent;
+ char *name;
+} ecotx_t;
+
+typedef struct {
+ int32_t count;
+ ecotx_t taxon[1];
+} ecotxidx_t;
+
+
+/*
+ *
+ * Taxonomy rank types
+ *
+ */
+
+typedef struct {
+ int32_t count;
+ char* label[1];
+} ecorankidx_t;
+
+/*
+ *
+ * Taxonomy name types
+ *
+ */
+
+typedef struct {
+ int32_t is_scientificname;
+ int32_t namelength;
+ int32_t classlength;
+ int32_t taxid;
+ char names[1];
+} econameformat_t;
+
+
+ typedef struct {
+ char *name;
+ char *classname;
+ int32_t is_scientificname;
+ struct ecotxnode *taxon;
+} econame_t;
+
+
+typedef struct {
+ int32_t count;
+ econame_t names[1];
+} econameidx_t;
+
+
+ typedef struct {
+ ecorankidx_t *ranks;
+ econameidx_t *names;
+ ecotxidx_t *taxons;
+} ecotaxonomy_t;
+
+
+/*****************************************************
+ *
+ * Function declarations
+ *
+ *****************************************************/
+
+/*
+ *
+ * Low level system functions
+ *
+ */
+
+int32_t is_big_endian();
+int32_t swap_int32_t(int32_t);
+
+void *eco_malloc(int32_t chunksize,
+ const char *error_message,
+ const char *filename,
+ int32_t line);
+
+
+void *eco_realloc(void *chunk,
+ int32_t chunksize,
+ const char *error_message,
+ const char *filename,
+ int32_t line);
+
+void eco_free(void *chunk,
+ const char *error_message,
+ const char *filename,
+ int32_t line);
+
+void eco_trace_memory_allocation();
+void eco_untrace_memory_allocation();
+
+#define ECOMALLOC(size,error_message) \
+ eco_malloc((size),(error_message),__FILE__,__LINE__)
+
+#define ECOREALLOC(chunk,size,error_message) \
+ eco_realloc((chunk),(size),(error_message),__FILE__,__LINE__)
+
+#define ECOFREE(chunk,error_message) \
+ eco_free((chunk),(error_message),__FILE__,__LINE__)
+
+
+
+
+/*
+ *
+ * Error managment
+ *
+ */
+
+
+void ecoError(int32_t,const char*,const char *,int);
+
+#define ECOERROR(code,message) ecoError((code),(message),__FILE__,__LINE__)
+
+#define ECO_IO_ERROR (1)
+#define ECO_MEM_ERROR (2)
+#define ECO_ASSERT_ERROR (3)
+#define ECO_NOTFOUND_ERROR (4)
+
+
+/*
+ *
+ * Low level Disk access functions
+ *
+ */
+
+FILE *open_ecorecorddb(const char *filename,
+ int32_t *sequencecount,
+ int32_t abort_on_open_error);
+
+void *read_ecorecord(FILE *,int32_t *recordSize);
+
+
+
+/*
+ * Read function in internal binary format
+ */
+
+FILE *open_ecoseqdb(const char *filename,
+ int32_t *sequencecount);
+
+ecoseq_t *readnext_ecoseq(FILE *);
+
+ecorankidx_t *read_rankidx(const char *filename);
+
+econameidx_t *read_nameidx(const char *filename,ecotaxonomy_t *taxonomy);
+
+
+
+ /**
+ * Read taxonomy data as formated by the ecoPCRFormat.py script.
+ *
+ * This function is normaly uses internaly by the read_taxonomy
+ * function and should not be called directly.
+ *
+ * @arg filename path to the *.tdx file of the reformated db
+ *
+ * @return pointer to a taxonomy index structure
+ */
+
+ecotxidx_t *read_taxonomyidx(const char *filename);
+
+ecotaxonomy_t *read_taxonomy(const char *prefix,int32_t readAlternativeName);
+
+ecotx_t *eco_findtaxonbytaxid(ecotaxonomy_t *taxonomy, int32_t taxid);
+
+int eco_isundertaxon(ecotx_t *taxon, int other_taxid);
+
+ecoseq_t *ecoseq_iterator(const char *prefix);
+
+
+
+ecoseq_t *new_ecoseq();
+int32_t delete_ecoseq(ecoseq_t *);
+ecoseq_t *new_ecoseq_with_data( char *AC,
+ char *DE,
+ char *SQ,
+ int32_t taxid
+ );
+
+
+int32_t delete_taxon(ecotx_t *taxon);
+int32_t delete_taxonomy(ecotxidx_t *index);
+
+
+int32_t rank_index(const char* label,ecorankidx_t* ranks);
+
+int32_t delete_apatseq(SeqPtr pseq);
+PatternPtr buildPattern(const char *pat, int32_t error_max);
+PatternPtr complementPattern(PatternPtr pat);
+
+SeqPtr ecoseq2apatseq(ecoseq_t *in,SeqPtr out,int32_t circular);
+
+char *ecoComplementPattern(char *nucAcSeq);
+char *ecoComplementSequence(char *nucAcSeq);
+char *getSubSequence(char* nucAcSeq,int32_t begin,int32_t end);
+
+ecotx_t *eco_getspecies(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
+ecotx_t *eco_getgenus(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
+ecotx_t *eco_getfamily(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
+ecotx_t *eco_getkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
+ecotx_t *eco_getsuperkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
+
+int eco_is_taxid_ignored(int32_t *ignored_taxid, int32_t tab_len, int32_t taxid);
+int eco_is_taxid_included(ecotaxonomy_t *taxonomy, int32_t *included_taxid, int32_t tab_len, int32_t taxid);
+
+#endif /*ECOPCR_H_*/
diff --git a/src/libecoPCR/ecoapat.c b/src/libecoPCR/ecoapat.c
new file mode 100644
index 0000000..79a722e
--- /dev/null
+++ b/src/libecoPCR/ecoapat.c
@@ -0,0 +1,199 @@
+#include "../libapat/libstki.h"
+#include "../libapat/apat.h"
+
+#include "ecoPCR.h"
+
+#include
+
+static void EncodeSequence(SeqPtr seq);
+static void UpperSequence(char *seq);
+
+/* -------------------------------------------- */
+/* uppercase sequence */
+/* -------------------------------------------- */
+
+#define IS_LOWER(c) (((c) >= 'a') && ((c) <= 'z'))
+#define TO_UPPER(c) ((c) - 'a' + 'A')
+
+void UpperSequence(char *seq)
+{
+ char *cseq;
+
+ for (cseq = seq ; *cseq ; cseq++)
+ if (IS_LOWER(*cseq))
+ *cseq = TO_UPPER(*cseq);
+}
+
+#undef IS_LOWER
+#undef TO_UPPER
+
+
+
+
+/* -------------------------------------------- */
+/* encode sequence */
+/* IS_UPPER is slightly faster than isupper */
+/* -------------------------------------------- */
+
+#define IS_UPPER(c) (((c) >= 'A') && ((c) <= 'Z'))
+
+
+
+void EncodeSequence(SeqPtr seq)
+{
+ int i;
+ UInt8 *data;
+ char *cseq;
+
+ data = seq->data;
+ cseq = seq->cseq;
+
+ while (*cseq) {
+
+ *data = (IS_UPPER(*cseq) ? *cseq - 'A' : 0x0);
+ data++;
+ cseq++;
+ }
+
+ for (i=0,cseq=seq->cseq;i < seq->circular; i++,cseq++,data++)
+ *data = (IS_UPPER(*cseq) ? *cseq - 'A' : 0x0);
+
+ for (i = 0 ; i < MAX_PATTERN ; i++)
+ seq->hitpos[i]->top = seq->hiterr[i]->top = 0;
+
+}
+
+#undef IS_UPPER
+
+
+SeqPtr ecoseq2apatseq(ecoseq_t *in,SeqPtr out,int32_t circular)
+{
+ int i;
+
+ if (!out)
+ {
+ out = ECOMALLOC(sizeof(Seq),
+ "Error in Allocation of a new Seq structure");
+
+ for (i = 0 ; i < MAX_PATTERN ; i++)
+ {
+
+ if (! (out->hitpos[i] = NewStacki(kMinStackiSize)))
+ ECOERROR(ECO_MEM_ERROR,"Error in hit stack Allocation");
+
+ if (! (out->hiterr[i] = NewStacki(kMinStackiSize)))
+ ECOERROR(ECO_MEM_ERROR,"Error in error stack Allocation");
+ }
+ }
+
+
+ out->name = in->AC;
+ out->seqsiz = out->seqlen = in->SQ_length;
+ out->circular = circular;
+
+ if (!out->data)
+ {
+ out->data = ECOMALLOC((out->seqlen+circular) *sizeof(UInt8),
+ "Error in Allocation of a new Seq data member");
+ out->datsiz= out->seqlen+circular;
+ }
+ else if ((out->seqlen +circular) >= out->datsiz)
+ {
+ out->data = ECOREALLOC(out->data,(out->seqlen+circular),
+ "Error during Seq data buffer realloc");
+ out->datsiz= out->seqlen+circular;
+ }
+
+ out->cseq = in->SQ;
+
+ EncodeSequence(out);
+
+ return out;
+}
+
+int32_t delete_apatseq(SeqPtr pseq)
+{
+ int i;
+
+ if (pseq) {
+
+ if (pseq->data)
+ ECOFREE(pseq->data,"Freeing sequence data buffer");
+
+ for (i = 0 ; i < MAX_PATTERN ; i++) {
+ if (pseq->hitpos[i]) FreeStacki(pseq->hitpos[i]);
+ if (pseq->hiterr[i]) FreeStacki(pseq->hiterr[i]);
+ }
+
+ ECOFREE(pseq,"Freeing apat sequence structure");
+
+ return 0;
+ }
+
+ return 1;
+}
+
+PatternPtr buildPattern(const char *pat, int32_t error_max)
+{
+ PatternPtr pattern;
+ int32_t patlen;
+
+ pattern = ECOMALLOC(sizeof(Pattern),
+ "Error in pattern allocation");
+
+ pattern->ok = Vrai;
+ pattern->hasIndel= Faux;
+ pattern->maxerr = error_max;
+ patlen = strlen(pat);
+
+ pattern->cpat = ECOMALLOC(sizeof(char)*patlen+1,
+ "Error in sequence pattern allocation");
+
+ strncpy(pattern->cpat,pat,patlen);
+ pattern->cpat[patlen]=0;
+ UpperSequence(pattern->cpat);
+
+ if (!CheckPattern(pattern))
+ ECOERROR(ECO_ASSERT_ERROR,"Error in pattern checking");
+
+ if (! EncodePattern(pattern, dna))
+ ECOERROR(ECO_ASSERT_ERROR,"Error in pattern encoding");
+
+ if (! CreateS(pattern, ALPHA_LEN))
+ ECOERROR(ECO_ASSERT_ERROR,"Error in pattern compiling");
+
+ return pattern;
+
+}
+
+PatternPtr complementPattern(PatternPtr pat)
+{
+ PatternPtr pattern;
+
+ pattern = ECOMALLOC(sizeof(Pattern),
+ "Error in pattern allocation");
+
+ pattern->ok = Vrai;
+ pattern->hasIndel= pat->hasIndel;
+ pattern->maxerr = pat->maxerr;
+ pattern->patlen = pat->patlen;
+
+ pattern->cpat = ECOMALLOC(sizeof(char)*(strlen(pat->cpat)+1),
+ "Error in sequence pattern allocation");
+
+ strcpy(pattern->cpat,pat->cpat);
+
+ ecoComplementPattern(pattern->cpat);
+
+ if (!CheckPattern(pattern))
+ ECOERROR(ECO_ASSERT_ERROR,"Error in pattern checking");
+
+ if (! EncodePattern(pattern, dna))
+ ECOERROR(ECO_ASSERT_ERROR,"Error in pattern encoding");
+
+ if (! CreateS(pattern, ALPHA_LEN))
+ ECOERROR(ECO_ASSERT_ERROR,"Error in pattern compiling");
+
+ return pattern;
+
+}
diff --git a/src/libecoPCR/ecodna.c b/src/libecoPCR/ecodna.c
new file mode 100644
index 0000000..7d29a0e
--- /dev/null
+++ b/src/libecoPCR/ecodna.c
@@ -0,0 +1,153 @@
+#include
+#include "ecoPCR.h"
+
+/*
+ * @doc: DNA alphabet (IUPAC)
+ */
+#define LX_BIO_DNA_ALPHA "ABCDEFGHIJKLMNOPQRSTUVWXYZ#![]"
+
+/*
+ * @doc: complementary DNA alphabet (IUPAC)
+ */
+#define LX_BIO_CDNA_ALPHA "TVGHEFCDIJMLKNOPQYSAABWXRZ#!]["
+
+
+static char sNuc[] = LX_BIO_DNA_ALPHA;
+static char sAnuc[] = LX_BIO_CDNA_ALPHA;
+
+static char LXBioBaseComplement(char nucAc);
+static char *LXBioSeqComplement(char *nucAcSeq);
+static char *reverseSequence(char *str,char isPattern);
+
+
+/* ---------------------------- */
+
+char LXBioBaseComplement(char nucAc)
+{
+ char *c;
+
+ if ((c = strchr(sNuc, nucAc)))
+ return sAnuc[(c - sNuc)];
+ else
+ return nucAc;
+}
+
+/* ---------------------------- */
+
+char *LXBioSeqComplement(char *nucAcSeq)
+{
+ char *s;
+
+ for (s = nucAcSeq ; *s ; s++)
+ *s = LXBioBaseComplement(*s);
+
+ return nucAcSeq;
+}
+
+
+char *reverseSequence(char *str,char isPattern)
+{
+ char *sb, *se, c;
+
+ if (! str)
+ return str;
+
+ sb = str;
+ se = str + strlen(str) - 1;
+
+ while(sb <= se) {
+ c = *sb;
+ *sb++ = *se;
+ *se-- = c;
+ }
+
+ sb = str;
+ se = str + strlen(str) - 1;
+
+ if (isPattern)
+ for (;sb < se; sb++)
+ {
+ if (*sb=='#')
+ {
+ if (((se - sb) > 2) && (*(sb+2)=='!'))
+ {
+ *sb='!';
+ sb+=2;
+ *sb='#';
+ }
+ else
+ {
+ *sb=*(sb+1);
+ sb++;
+ *sb='#';
+ }
+ }
+ else if (*sb=='!')
+ {
+ *sb=*(sb-1);
+ *(sb-1)='!';
+ }
+ }
+
+ return str;
+}
+
+char *ecoComplementPattern(char *nucAcSeq)
+{
+ return reverseSequence(LXBioSeqComplement(nucAcSeq),1);
+}
+
+char *ecoComplementSequence(char *nucAcSeq)
+{
+ return reverseSequence(LXBioSeqComplement(nucAcSeq),0);
+}
+
+
+char *getSubSequence(char* nucAcSeq,int32_t begin,int32_t end)
+{
+ static char *buffer = NULL;
+ static int32_t buffSize= 0;
+ int32_t length;
+
+ if (begin < end)
+ {
+ length = end - begin;
+
+ if (length >= buffSize)
+ {
+ buffSize = length+1;
+ if (buffer)
+ buffer=ECOREALLOC(buffer,buffSize,
+ "Error in reallocating sub sequence buffer");
+ else
+ buffer=ECOMALLOC(buffSize,
+ "Error in allocating sub sequence buffer");
+
+ }
+
+ strncpy(buffer,nucAcSeq + begin,length);
+ buffer[length]=0;
+ }
+ else
+ {
+ length = end + strlen(nucAcSeq) - begin;
+
+ if (length >= buffSize)
+ {
+ buffSize = length+1;
+ if (buffer)
+ buffer=ECOREALLOC(buffer,buffSize,
+ "Error in reallocating sub sequence buffer");
+ else
+ buffer=ECOMALLOC(buffSize,
+ "Error in allocating sub sequence buffer");
+
+ }
+ strncpy(buffer,nucAcSeq+begin,length - end);
+ strncpy(buffer+(length-end),nucAcSeq ,end);
+ buffer[length]=0;
+ }
+
+ return buffer;
+}
+
diff --git a/src/libecoPCR/ecofilter.c b/src/libecoPCR/ecofilter.c
new file mode 100644
index 0000000..8a7dbb1
--- /dev/null
+++ b/src/libecoPCR/ecofilter.c
@@ -0,0 +1,19 @@
+#include "ecoPCR.h"
+
+int eco_is_taxid_included( ecotaxonomy_t *taxonomy,
+ int32_t *restricted_taxid,
+ int32_t tab_len,
+ int32_t taxid)
+{
+ int i;
+ ecotx_t *taxon;
+
+ taxon = eco_findtaxonbytaxid(taxonomy, taxid);
+
+ for (i=0; i < tab_len; i++)
+ if ( (taxon->taxid == restricted_taxid[i]) ||
+ (eco_isundertaxon(taxon, restricted_taxid[i])) )
+ return 1;
+
+ return 0;
+}
\ No newline at end of file
diff --git a/src/libecoPCR/econame.c b/src/libecoPCR/econame.c
new file mode 100644
index 0000000..835d79c
--- /dev/null
+++ b/src/libecoPCR/econame.c
@@ -0,0 +1,61 @@
+#include "ecoPCR.h"
+#include
+#include
+
+static econame_t *readnext_econame(FILE *f,econame_t *name,ecotaxonomy_t *taxonomy);
+
+econameidx_t *read_nameidx(const char *filename,ecotaxonomy_t *taxonomy)
+{
+
+ int32_t count;
+ FILE *f;
+ econameidx_t *indexname;
+ int32_t i;
+
+ f = open_ecorecorddb(filename,&count,1);
+
+ indexname = (econameidx_t*) ECOMALLOC(sizeof(econameidx_t) + sizeof(econame_t) * (count-1),"Allocate names");
+
+ indexname->count=count;
+
+ for (i=0; i < count; i++){
+ readnext_econame(f,(indexname->names)+i,taxonomy);
+ }
+
+ return indexname;
+}
+
+econame_t *readnext_econame(FILE *f,econame_t *name,ecotaxonomy_t *taxonomy)
+{
+
+ econameformat_t *raw;
+ int32_t rs;
+
+ raw = read_ecorecord(f,&rs);
+
+ if (!raw)
+ return NULL;
+
+ if (is_big_endian())
+ {
+ raw->is_scientificname = swap_int32_t(raw->is_scientificname);
+ raw->namelength = swap_int32_t(raw->namelength);
+ raw->classlength = swap_int32_t(raw->classlength);
+ raw->taxid = swap_int32_t(raw->taxid);
+ }
+
+ name->is_scientificname=raw->is_scientificname;
+
+ name->name = ECOMALLOC((raw->namelength+1) * sizeof(char),"Allocate name");
+ strncpy(name->name,raw->names,raw->namelength);
+ name->name[raw->namelength]=0;
+
+ name->classname = ECOMALLOC((raw->classlength+1) * sizeof(char),"Allocate classname");
+ strncpy(name->classname,(raw->names+raw->namelength),raw->classlength);
+ name->classname[raw->classlength]=0;
+
+ name->taxon = taxonomy->taxons->taxon + raw->taxid;
+
+ return name;
+}
+
diff --git a/src/libecoPCR/ecorank.c b/src/libecoPCR/ecorank.c
new file mode 100644
index 0000000..4796088
--- /dev/null
+++ b/src/libecoPCR/ecorank.c
@@ -0,0 +1,52 @@
+#include "ecoPCR.h"
+#include
+#include
+
+static int compareRankLabel(const void *label1, const void *label2);
+
+ecorankidx_t *read_rankidx(const char *filename)
+{
+ int32_t count;
+ FILE *f;
+ ecorankidx_t *index;
+ int32_t i;
+ int32_t rs;
+ char *buffer;
+
+ f = open_ecorecorddb(filename,&count,1);
+
+ index = (ecorankidx_t*) ECOMALLOC(sizeof(ecorankidx_t) + sizeof(char*) * (count-1),
+ "Allocate rank index");
+
+ index->count=count;
+
+ for (i=0; i < count; i++)
+ {
+ buffer = read_ecorecord(f,&rs);
+ index->label[i]=(char*) ECOMALLOC(rs+1,
+ "Allocate rank label");
+ strncpy(index->label[i],buffer,rs);
+ }
+
+ return index;
+}
+
+int32_t rank_index(const char* label,ecorankidx_t* ranks)
+{
+ char **rep;
+
+ rep = bsearch(label,ranks->label,ranks->count,sizeof(char*),compareRankLabel);
+
+ if (rep)
+ return rep-ranks->label;
+ else
+ ECOERROR(ECO_NOTFOUND_ERROR,"Rank label not found");
+
+ return -1;
+}
+
+
+int compareRankLabel(const void *label1, const void *label2)
+{
+ return strcmp((const char*)label1,*(const char**)label2);
+}
diff --git a/src/libecoPCR/ecoseq.c b/src/libecoPCR/ecoseq.c
new file mode 100644
index 0000000..2ec2614
--- /dev/null
+++ b/src/libecoPCR/ecoseq.c
@@ -0,0 +1,223 @@
+#include "ecoPCR.h"
+#include
+#include
+#include
+#include
+#include
+
+static FILE *open_seqfile(const char *prefix,int32_t index);
+
+
+ecoseq_t *new_ecoseq()
+{
+ void *tmp;
+
+ tmp = ECOMALLOC(sizeof(ecoseq_t),"Allocate new ecoseq structure");
+
+ return tmp;
+}
+
+int32_t delete_ecoseq(ecoseq_t * seq)
+{
+
+ if (seq)
+ {
+ if (seq->AC)
+ ECOFREE(seq->AC,"Free sequence AC");
+
+ if (seq->DE)
+ ECOFREE(seq->DE,"Free sequence DE");
+
+ if (seq->SQ)
+ ECOFREE(seq->SQ,"Free sequence SQ");
+
+ ECOFREE(seq,"Free sequence structure");
+
+ return 0;
+
+ }
+
+ return 1;
+}
+
+ecoseq_t *new_ecoseq_with_data( char *AC,
+ char *DE,
+ char *SQ,
+ int32_t taxid_idx
+ )
+{
+ ecoseq_t *tmp;
+ int32_t lstr;
+ tmp = new_ecoseq();
+
+ tmp->taxid=taxid_idx;
+
+ if (AC)
+ {
+ lstr =strlen(AC);
+ tmp->AC=ECOMALLOC((lstr+1) * sizeof(char),
+ "Allocate sequence accession");
+ strcpy(tmp->AC,AC);
+ }
+
+ if (DE)
+ {
+ lstr =strlen(DE);
+ tmp->DE=ECOMALLOC((lstr+1) * sizeof(char),
+ "Allocate sequence definition");
+ strcpy(tmp->DE,DE);
+ }
+
+ if (SQ)
+ {
+ lstr =strlen(SQ);
+ tmp->SQ=ECOMALLOC((lstr+1) * sizeof(char),
+ "Allocate sequence data");
+ strcpy(tmp->SQ,SQ);
+ }
+ return tmp;
+
+}
+
+/**
+ * ?? used ??
+ **/
+FILE *open_ecoseqdb(const char *filename,
+ int32_t *sequencecount)
+{
+ return open_ecorecorddb(filename,sequencecount,1);
+}
+
+ecoseq_t *readnext_ecoseq(FILE *f)
+{
+ char *compressed=NULL;
+
+ ecoseqformat_t *raw;
+ ecoseq_t *seq;
+ int32_t comp_status;
+ unsigned long int seqlength;
+ int32_t rs;
+
+ raw = read_ecorecord(f,&rs);
+
+ if (!raw)
+ return NULL;
+
+ if (is_big_endian())
+ {
+ raw->CSQ_length = swap_int32_t(raw->CSQ_length);
+ raw->DE_length = swap_int32_t(raw->DE_length);
+ raw->SQ_length = swap_int32_t(raw->SQ_length);
+ raw->taxid = swap_int32_t(raw->taxid);
+ }
+
+ seq = new_ecoseq();
+
+ seq->taxid = raw->taxid;
+
+ seq->AC = ECOMALLOC(strlen(raw->AC) +1,
+ "Allocate Sequence Accesion number");
+ strncpy(seq->AC,raw->AC,strlen(raw->AC));
+
+
+ seq->DE = ECOMALLOC(raw->DE_length+1,
+ "Allocate Sequence definition");
+ strncpy(seq->DE,raw->data,raw->DE_length);
+
+ seqlength = seq->SQ_length = raw->SQ_length;
+
+ compressed = raw->data + raw->DE_length;
+
+ seq->SQ = ECOMALLOC(seqlength+1,
+ "Allocate sequence buffer");
+
+ comp_status = uncompress((unsigned char*)seq->SQ,
+ &seqlength,
+ (unsigned char*)compressed,
+ raw->CSQ_length);
+
+ if (comp_status != Z_OK)
+ ECOERROR(ECO_IO_ERROR,"I cannot uncompress sequence data");
+
+ return seq;
+}
+
+/**
+ * Open the sequences database (.sdx file)
+ * @param prefix name of the database (radical without extension)
+ * @param index integer
+ *
+ * @return file object
+ */
+FILE *open_seqfile(const char *prefix,int32_t index)
+{
+ char filename_buffer[1024];
+ int32_t filename_length;
+ FILE *input;
+ int32_t seqcount;
+
+ filename_length = snprintf(filename_buffer,
+ 1023,
+ "%s_%03d.sdx",
+ prefix,
+ index);
+
+ fprintf(stderr,"# Coucou %s\n",filename_buffer);
+
+
+ if (filename_length >= 1024)
+ ECOERROR(ECO_ASSERT_ERROR,"file name is too long");
+
+ filename_buffer[filename_length]=0;
+
+ input=open_ecorecorddb(filename_buffer,&seqcount,0);
+
+ if (input)
+ fprintf(stderr,"# Reading file %s containing %d sequences...\n",
+ filename_buffer,
+ seqcount);
+
+ return input;
+}
+
+ecoseq_t *ecoseq_iterator(const char *prefix)
+{
+ static FILE *current_seq_file= NULL;
+ static int32_t current_file_idx = 1;
+ static char current_prefix[1024];
+ ecoseq_t *seq;
+
+ if (prefix)
+ {
+ current_file_idx = 1;
+
+ if (current_seq_file)
+ fclose(current_seq_file);
+
+ strncpy(current_prefix,prefix,1023);
+ current_prefix[1024]=0;
+
+ current_seq_file = open_seqfile(current_prefix,
+ current_file_idx);
+
+ if (!current_seq_file)
+ return NULL;
+
+ }
+
+ seq = readnext_ecoseq(current_seq_file);
+
+ if (!seq && feof(current_seq_file))
+ {
+ current_file_idx++;
+ fclose(current_seq_file);
+ current_seq_file = open_seqfile(current_prefix,
+ current_file_idx);
+
+
+ if (current_seq_file)
+ seq = readnext_ecoseq(current_seq_file);
+ }
+
+ return seq;
+}
\ No newline at end of file
diff --git a/src/libecoPCR/ecotax.c b/src/libecoPCR/ecotax.c
new file mode 100644
index 0000000..89ac82b
--- /dev/null
+++ b/src/libecoPCR/ecotax.c
@@ -0,0 +1,329 @@
+#include "ecoPCR.h"
+#include
+#include
+#include
+
+static ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon);
+
+ /**
+ * Open the taxonomy database
+ * @param pointer to the database (.tdx file)
+ * @return a ecotxidx_t structure
+ */
+ecotxidx_t *read_taxonomyidx(const char *filename)
+{
+ int32_t count;
+ FILE *f;
+ ecotxidx_t *index;
+ int32_t i;
+
+ f = open_ecorecorddb(filename,&count,1);
+
+ index = (ecotxidx_t*) ECOMALLOC(sizeof(ecotxidx_t) + sizeof(ecotx_t) * (count-1),
+ "Allocate taxonomy");
+
+ index->count=count;
+ for (i=0; i < count; i++){
+ readnext_ecotaxon(f,&(index->taxon[i]));
+ index->taxon[i].parent=index->taxon + (int32_t)index->taxon[i].parent;
+ }
+ return index;
+}
+
+
+int32_t delete_taxonomy(ecotxidx_t *index)
+{
+ int32_t i;
+
+ if (index)
+ {
+ for (i=0; i< index->count; i++)
+ if (index->taxon[i].name)
+ ECOFREE(index->taxon[i].name,"Free scientific name");
+
+ ECOFREE(index,"Free Taxonomy");
+
+ return 0;
+ }
+
+ return 1;
+}
+
+
+
+int32_t delete_taxon(ecotx_t *taxon)
+{
+ if (taxon)
+ {
+ if (taxon->name)
+ ECOFREE(taxon->name,"Free scientific name");
+
+ ECOFREE(taxon,"Free Taxon");
+
+ return 0;
+ }
+
+ return 1;
+}
+
+
+/**
+ * Read the database for a given taxon a save the data
+ * into the taxon structure(if any found)
+ * @param *f pointer to FILE type returned by fopen
+ * @param *taxon pointer to the structure
+ *
+ * @return a ecotx_t structure if any taxon found else NULL
+ */
+ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon)
+{
+
+ ecotxformat_t *raw;
+ int32_t rs;
+
+ raw = read_ecorecord(f,&rs);
+
+ if (!raw)
+ return NULL;
+
+ if (is_big_endian())
+ {
+ raw->namelength = swap_int32_t(raw->namelength);
+ raw->parent = swap_int32_t(raw->parent);
+ raw->rank = swap_int32_t(raw->rank);
+ raw->taxid = swap_int32_t(raw->taxid);
+ }
+
+ taxon->parent = (ecotx_t*)raw->parent;
+ taxon->taxid = raw->taxid;
+ taxon->rank = raw->rank;
+
+ taxon->name = ECOMALLOC((raw->namelength+1) * sizeof(char),
+ "Allocate taxon scientific name");
+
+ strncpy(taxon->name,raw->name,raw->namelength);
+
+ return taxon;
+}
+
+
+ecotaxonomy_t *read_taxonomy(const char *prefix,int32_t readAlternativeName)
+{
+ ecotaxonomy_t *tax;
+ char *filename;
+ int buffsize;
+
+ tax = ECOMALLOC(sizeof(ecotaxonomy_t),
+ "Allocate taxonomy structure");
+
+ buffsize = strlen(prefix)+10;
+
+ filename = ECOMALLOC(buffsize,
+ "Allocate filename");
+
+ snprintf(filename,buffsize,"%s.rdx",prefix);
+
+ tax->ranks = read_rankidx(filename);
+
+ snprintf(filename,buffsize,"%s.tdx",prefix);
+
+ tax->taxons = read_taxonomyidx(filename);
+
+ if (readAlternativeName)
+ {
+ snprintf(filename,buffsize,"%s.ndx",prefix);
+ tax->names=read_nameidx(filename,tax);
+ }
+ else
+ tax->names=NULL;
+ return tax;
+
+}
+
+
+
+int32_t delete_ecotaxonomy(ecotaxonomy_t *taxonomy)
+{
+ if (taxonomy)
+ {
+ if (taxonomy->ranks)
+ ECOFREE(taxonomy->ranks,"Free rank index");
+
+ if (taxonomy->taxons)
+ ECOFREE(taxonomy->taxons,"Free taxon index");
+
+ ECOFREE(taxonomy,"Free taxonomy structure");
+
+ return 0;
+ }
+
+ return 1;
+}
+
+ecotx_t *eco_findtaxonatrank(ecotx_t *taxon,
+ int32_t rankidx)
+{
+ ecotx_t *current_taxon;
+ ecotx_t *next_taxon;
+
+ current_taxon = taxon;
+ next_taxon = current_taxon->parent;
+
+ while ((current_taxon!=next_taxon) && // I' am the root node
+ (current_taxon->rank!=rankidx))
+ {
+ current_taxon = next_taxon;
+ next_taxon = current_taxon->parent;
+ }
+
+ if (current_taxon->rank==rankidx)
+ return current_taxon;
+ else
+ return NULL;
+}
+
+/**
+ * Get back information concerning a taxon from a taxonomic id
+ * @param *taxonomy the taxonomy database
+ * @param taxid the taxonomic id
+ *
+ * @result a ecotx_t structure containing the taxonimic information
+ **/
+ecotx_t *eco_findtaxonbytaxid(ecotaxonomy_t *taxonomy,
+ int32_t taxid)
+{
+ ecotx_t *current_taxon;
+ int32_t taxoncount;
+ int32_t i;
+
+ taxoncount=taxonomy->taxons->count;
+
+ for (current_taxon=taxonomy->taxons->taxon,
+ i=0;
+ i < taxoncount;
+ i++,
+ current_taxon++){
+ if (current_taxon->taxid==taxid){
+ return current_taxon;
+ }
+ }
+
+ return (ecotx_t*)NULL;
+}
+
+/**
+ * Find out if taxon is son of other taxon (identified by its taxid)
+ * @param *taxon son taxon
+ * @param parent_taxid taxonomic id of the other taxon
+ *
+ * @return 1 is the other taxid math a parent taxid, else 0
+ **/
+int eco_isundertaxon(ecotx_t *taxon,
+ int other_taxid)
+{
+ ecotx_t *next_parent;
+
+ next_parent = taxon->parent;
+
+ while ( (other_taxid != next_parent->taxid) &&
+ (strcmp(next_parent->name, "root")) )
+ {
+ next_parent = next_parent->parent;
+ }
+
+ if (other_taxid == next_parent->taxid)
+ return 1;
+ else
+ return 0;
+}
+
+ecotx_t *eco_getspecies(ecotx_t *taxon,
+ ecotaxonomy_t *taxonomy)
+{
+ static ecotaxonomy_t *tax=NULL;
+ static int32_t rankindex=-1;
+
+ if (taxonomy && tax!=taxonomy)
+ {
+ rankindex = rank_index("species",taxonomy->ranks);
+ tax=taxonomy;
+ }
+
+ if (!tax || rankindex < 0)
+ ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
+
+ return eco_findtaxonatrank(taxon,rankindex);
+}
+
+ecotx_t *eco_getgenus(ecotx_t *taxon,
+ ecotaxonomy_t *taxonomy)
+{
+ static ecotaxonomy_t *tax=NULL;
+ static int32_t rankindex=-1;
+
+ if (taxonomy && tax!=taxonomy)
+ {
+ rankindex = rank_index("genus",taxonomy->ranks);
+ tax=taxonomy;
+ }
+
+ if (!tax || rankindex < 0)
+ ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
+
+ return eco_findtaxonatrank(taxon,rankindex);
+}
+
+
+ecotx_t *eco_getfamily(ecotx_t *taxon,
+ ecotaxonomy_t *taxonomy)
+{
+ static ecotaxonomy_t *tax=NULL;
+ static int32_t rankindex=-1;
+
+ if (taxonomy && tax!=taxonomy)
+ {
+ rankindex = rank_index("family",taxonomy->ranks);
+ tax=taxonomy;
+ }
+
+ if (!tax || rankindex < 0)
+ ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
+
+ return eco_findtaxonatrank(taxon,rankindex);
+}
+
+ecotx_t *eco_getkingdom(ecotx_t *taxon,
+ ecotaxonomy_t *taxonomy)
+{
+ static ecotaxonomy_t *tax=NULL;
+ static int32_t rankindex=-1;
+
+ if (taxonomy && tax!=taxonomy)
+ {
+ rankindex = rank_index("kingdom",taxonomy->ranks);
+ tax=taxonomy;
+ }
+
+ if (!tax || rankindex < 0)
+ ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
+
+ return eco_findtaxonatrank(taxon,rankindex);
+}
+
+ecotx_t *eco_getsuperkingdom(ecotx_t *taxon,
+ ecotaxonomy_t *taxonomy)
+{
+ static ecotaxonomy_t *tax=NULL;
+ static int32_t rankindex=-1;
+
+ if (taxonomy && tax!=taxonomy)
+ {
+ rankindex = rank_index("superkingdom",taxonomy->ranks);
+ tax=taxonomy;
+ }
+
+ if (!tax || rankindex < 0)
+ ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
+
+ return eco_findtaxonatrank(taxon,rankindex);
+}
\ No newline at end of file
diff --git a/src/libtm/tm.c b/src/libtm/tm.c
new file mode 100644
index 0000000..b143ea5
--- /dev/null
+++ b/src/libtm/tm.c
@@ -0,0 +1,31 @@
+
+/**
+ *
+ * J Jr SantaLucia.
+ * A uniÞed view of polymer, dumbbell, and oligonucleotide
+ * dna nearest-neighbor thermodynamics.
+ * Proc Natl Acad Sci U S A, 95(4):1460Ð1465, 1998 Feb 17.
+ */
+
+
+//Nearest-neighbor sequence
+//(5'-3'/5'-3') deltaH deltaS
+// kcal/mol cal/(moláK)
+//AA/TT -7.9 -22.2
+//AG/CT -7.8 -21.0
+//AT/AT -7.2 -20.4
+//AC/GT -8.4 -22.4
+//GA/TC -8.2 -22.2
+//GG/CC -8.0 -19.9
+//GC/GC -9.8 -24.4
+//TA/TA -7.2 -21.3
+//TG/CA -8.5 -22.7
+//CG/CG -10.6 -27.2
+//Terminal A-T base pair 2.3 4.1
+//Terminal G-C base pair 0.1 -2.8
+
+
+float nearestNeighborTm(const char *oligo,float probe,float target)
+{
+
+}