From 049129488ed96b03a5976512fe328dadeb72ace9 Mon Sep 17 00:00:00 2001 From: Celine Mercier Date: Thu, 16 Jul 2015 14:00:12 +0200 Subject: [PATCH] sumalibs first commit --- Licence_CeCILL_V2-en.txt | 506 ++++++++ Licence_CeCILL_V2-fr.txt | 512 ++++++++ global.mk | 10 + libfasta/.DS_Store | Bin 0 -> 6148 bytes libfasta/Makefile | 33 + libfasta/fasta_header_handler.c | 126 ++ libfasta/fasta_header_handler.h | 23 + libfasta/fasta_header_parser.c | 1954 +++++++++++++++++++++++++++++ libfasta/fasta_header_parser.h | 13 + libfasta/fasta_header_parser.l | 178 +++ libfasta/fasta_seq_writer.c | 76 ++ libfasta/fasta_seq_writer.h | 19 + libfasta/header_mem_handler.c | 93 ++ libfasta/header_mem_handler.h | 22 + libfasta/sequence.c | 450 +++++++ libfasta/sequence.h | 64 + libfile/.DS_Store | Bin 0 -> 6148 bytes libfile/Makefile | 25 + libfile/fileHandling.c | 88 ++ libfile/fileHandling.h | 20 + liblcs/.DS_Store | Bin 0 -> 6148 bytes liblcs/Makefile | 25 + liblcs/_lcs.ext.1.c | 168 +++ liblcs/_lcs.ext.2.c | 34 + liblcs/_lcs.ext.3.c | 34 + liblcs/_lcs.h | 29 + liblcs/_lcs_fast.h | 597 +++++++++ liblcs/banded_LCS_alignment.c | 211 ++++ liblcs/banded_LCS_alignment.h | 9 + liblcs/sse_banded_LCS_alignment.c | 724 +++++++++++ liblcs/sse_banded_LCS_alignment.h | 24 + liblcs/upperband.c | 382 ++++++ liblcs/upperband.h | 18 + libsse/.DS_Store | Bin 0 -> 6148 bytes libsse/_sse.h | 961 ++++++++++++++ libutils/.DS_Store | Bin 0 -> 6148 bytes libutils/Makefile | 25 + libutils/debug.c | 32 + libutils/debug.h | 25 + libutils/utilities.c | 230 ++++ libutils/utilities.h | 56 + 41 files changed, 7796 insertions(+) create mode 100644 Licence_CeCILL_V2-en.txt create mode 100644 Licence_CeCILL_V2-fr.txt create mode 100644 global.mk create mode 100644 libfasta/.DS_Store create mode 100644 libfasta/Makefile create mode 100644 libfasta/fasta_header_handler.c create mode 100644 libfasta/fasta_header_handler.h create mode 100644 libfasta/fasta_header_parser.c create mode 100644 libfasta/fasta_header_parser.h create mode 100644 libfasta/fasta_header_parser.l create mode 100644 libfasta/fasta_seq_writer.c create mode 100644 libfasta/fasta_seq_writer.h create mode 100644 libfasta/header_mem_handler.c create mode 100644 libfasta/header_mem_handler.h create mode 100644 libfasta/sequence.c create mode 100644 libfasta/sequence.h create mode 100644 libfile/.DS_Store create mode 100644 libfile/Makefile create mode 100644 libfile/fileHandling.c create mode 100644 libfile/fileHandling.h create mode 100644 liblcs/.DS_Store create mode 100644 liblcs/Makefile create mode 100644 liblcs/_lcs.ext.1.c create mode 100644 liblcs/_lcs.ext.2.c create mode 100644 liblcs/_lcs.ext.3.c create mode 100644 liblcs/_lcs.h create mode 100644 liblcs/_lcs_fast.h create mode 100644 liblcs/banded_LCS_alignment.c create mode 100644 liblcs/banded_LCS_alignment.h create mode 100644 liblcs/sse_banded_LCS_alignment.c create mode 100644 liblcs/sse_banded_LCS_alignment.h create mode 100644 liblcs/upperband.c create mode 100644 liblcs/upperband.h create mode 100644 libsse/.DS_Store create mode 100644 libsse/_sse.h create mode 100644 libutils/.DS_Store create mode 100644 libutils/Makefile create mode 100644 libutils/debug.c create mode 100644 libutils/debug.h create mode 100644 libutils/utilities.c create mode 100644 libutils/utilities.h diff --git a/Licence_CeCILL_V2-en.txt b/Licence_CeCILL_V2-en.txt new file mode 100644 index 0000000..fcc8df2 --- /dev/null +++ b/Licence_CeCILL_V2-en.txt @@ -0,0 +1,506 @@ + +CeCILL FREE SOFTWARE LICENSE AGREEMENT + + + Notice + +This Agreement is a Free Software license agreement that is the result +of discussions between its authors in order to ensure compliance with +the two main principles guiding its drafting: + + * firstly, compliance with the principles governing the distribution + of Free Software: access to source code, broad rights granted to + users, + * secondly, the election of a governing law, French law, with which + it is conformant, both as regards the law of torts and + intellectual property law, and the protection that it offers to + both authors and holders of the economic rights over software. + +The authors of the CeCILL (for Ce[a] C[nrs] I[nria] L[ogiciel] L[ibre]) +license are: + +Commissariat à l'Energie Atomique - CEA, a public scientific, technical +and industrial research establishment, having its principal place of +business at 25 rue Leblanc, immeuble Le Ponant D, 75015 Paris, France. + +Centre National de la Recherche Scientifique - CNRS, a public scientific +and technological establishment, having its principal place of business +at 3 rue Michel-Ange, 75794 Paris cedex 16, France. + +Institut National de Recherche en Informatique et en Automatique - +INRIA, a public scientific and technological establishment, having its +principal place of business at Domaine de Voluceau, Rocquencourt, BP +105, 78153 Le Chesnay cedex, France. + + + Preamble + +The purpose of this Free Software license agreement is to grant users +the right to modify and redistribute the software governed by this +license within the framework of an open source distribution model. + +The exercising of these rights is conditional upon certain obligations +for users so as to preserve this status for all subsequent redistributions. + +In consideration of access to the source code and the rights to copy, +modify and redistribute granted by the license, users are provided only +with a limited warranty and the software's author, the holder of the +economic rights, and the successive licensors only have limited liability. + +In this respect, the risks associated with loading, using, modifying +and/or developing or reproducing the software by the user are brought to +the user's attention, given its Free Software status, which may make it +complicated to use, with the result that its use is reserved for +developers and experienced professionals having in-depth computer +knowledge. Users are therefore encouraged to load and test the +suitability of the software as regards their requirements in conditions +enabling the security of their systems and/or data to be ensured and, +more generally, to use and operate it in the same conditions of +security. This Agreement may be freely reproduced and published, +provided it is not altered, and that no provisions are either added or +removed herefrom. + +This Agreement may apply to any or all software for which the holder of +the economic rights decides to submit the use thereof to its provisions. + + + Article 1 - DEFINITIONS + +For the purpose of this Agreement, when the following expressions +commence with a capital letter, they shall have the following meaning: + +Agreement: means this license agreement, and its possible subsequent +versions and annexes. + +Software: means the software in its Object Code and/or Source Code form +and, where applicable, its documentation, "as is" when the Licensee +accepts the Agreement. + +Initial Software: means the Software in its Source Code and possibly its +Object Code form and, where applicable, its documentation, "as is" when +it is first distributed under the terms and conditions of the Agreement. + +Modified Software: means the Software modified by at least one +Contribution. + +Source Code: means all the Software's instructions and program lines to +which access is required so as to modify the Software. + +Object Code: means the binary files originating from the compilation of +the Source Code. + +Holder: means the holder(s) of the economic rights over the Initial +Software. + +Licensee: means the Software user(s) having accepted the Agreement. + +Contributor: means a Licensee having made at least one Contribution. + +Licensor: means the Holder, or any other individual or legal entity, who +distributes the Software under the Agreement. + +Contribution: means any or all modifications, corrections, translations, +adaptations and/or new functions integrated into the Software by any or +all Contributors, as well as any or all Internal Modules. + +Module: means a set of sources files including their documentation that +enables supplementary functions or services in addition to those offered +by the Software. + +External Module: means any or all Modules, not derived from the +Software, so that this Module and the Software run in separate address +spaces, with one calling the other when they are run. + +Internal Module: means any or all Module, connected to the Software so +that they both execute in the same address space. + +GNU GPL: means the GNU General Public License version 2 or any +subsequent version, as published by the Free Software Foundation Inc. + +Parties: mean both the Licensee and the Licensor. + +These expressions may be used both in singular and plural form. + + + Article 2 - PURPOSE + +The purpose of the Agreement is the grant by the Licensor to the +Licensee of a non-exclusive, transferable and worldwide license for the +Software as set forth in Article 5 hereinafter for the whole term of the +protection granted by the rights over said Software. + + + Article 3 - ACCEPTANCE + +3.1 The Licensee shall be deemed as having accepted the terms and +conditions of this Agreement upon the occurrence of the first of the +following events: + + * (i) loading the Software by any or all means, notably, by + downloading from a remote server, or by loading from a physical + medium; + * (ii) the first time the Licensee exercises any of the rights + granted hereunder. + +3.2 One copy of the Agreement, containing a notice relating to the +characteristics of the Software, to the limited warranty, and to the +fact that its use is restricted to experienced users has been provided +to the Licensee prior to its acceptance as set forth in Article 3.1 +hereinabove, and the Licensee hereby acknowledges that it has read and +understood it. + + + Article 4 - EFFECTIVE DATE AND TERM + + + 4.1 EFFECTIVE DATE + +The Agreement shall become effective on the date when it is accepted by +the Licensee as set forth in Article 3.1. + + + 4.2 TERM + +The Agreement shall remain in force for the entire legal term of +protection of the economic rights over the Software. + + + Article 5 - SCOPE OF RIGHTS GRANTED + +The Licensor hereby grants to the Licensee, who accepts, the following +rights over the Software for any or all use, and for the term of the +Agreement, on the basis of the terms and conditions set forth hereinafter. + +Besides, if the Licensor owns or comes to own one or more patents +protecting all or part of the functions of the Software or of its +components, the Licensor undertakes not to enforce the rights granted by +these patents against successive Licensees using, exploiting or +modifying the Software. If these patents are transferred, the Licensor +undertakes to have the transferees subscribe to the obligations set +forth in this paragraph. + + + 5.1 RIGHT OF USE + +The Licensee is authorized to use the Software, without any limitation +as to its fields of application, with it being hereinafter specified +that this comprises: + + 1. permanent or temporary reproduction of all or part of the Software + by any or all means and in any or all form. + + 2. loading, displaying, running, or storing the Software on any or + all medium. + + 3. entitlement to observe, study or test its operation so as to + determine the ideas and principles behind any or all constituent + elements of said Software. This shall apply when the Licensee + carries out any or all loading, displaying, running, transmission + or storage operation as regards the Software, that it is entitled + to carry out hereunder. + + + 5.2 ENTITLEMENT TO MAKE CONTRIBUTIONS + +The right to make Contributions includes the right to translate, adapt, +arrange, or make any or all modifications to the Software, and the right +to reproduce the resulting software. + +The Licensee is authorized to make any or all Contributions to the +Software provided that it includes an explicit notice that it is the +author of said Contribution and indicates the date of the creation thereof. + + + 5.3 RIGHT OF DISTRIBUTION + +In particular, the right of distribution includes the right to publish, +transmit and communicate the Software to the general public on any or +all medium, and by any or all means, and the right to market, either in +consideration of a fee, or free of charge, one or more copies of the +Software by any means. + +The Licensee is further authorized to distribute copies of the modified +or unmodified Software to third parties according to the terms and +conditions set forth hereinafter. + + + 5.3.1 DISTRIBUTION OF SOFTWARE WITHOUT MODIFICATION + +The Licensee is authorized to distribute true copies of the Software in +Source Code or Object Code form, provided that said distribution +complies with all the provisions of the Agreement and is accompanied by: + + 1. a copy of the Agreement, + + 2. a notice relating to the limitation of both the Licensor's + warranty and liability as set forth in Articles 8 and 9, + +and that, in the event that only the Object Code of the Software is +redistributed, the Licensee allows future Licensees unhindered access to +the full Source Code of the Software by indicating how to access it, it +being understood that the additional cost of acquiring the Source Code +shall not exceed the cost of transferring the data. + + + 5.3.2 DISTRIBUTION OF MODIFIED SOFTWARE + +When the Licensee makes a Contribution to the Software, the terms and +conditions for the distribution of the resulting Modified Software +become subject to all the provisions of this Agreement. + +The Licensee is authorized to distribute the Modified Software, in +source code or object code form, provided that said distribution +complies with all the provisions of the Agreement and is accompanied by: + + 1. a copy of the Agreement, + + 2. a notice relating to the limitation of both the Licensor's + warranty and liability as set forth in Articles 8 and 9, + +and that, in the event that only the object code of the Modified +Software is redistributed, the Licensee allows future Licensees +unhindered access to the full source code of the Modified Software by +indicating how to access it, it being understood that the additional +cost of acquiring the source code shall not exceed the cost of +transferring the data. + + + 5.3.3 DISTRIBUTION OF EXTERNAL MODULES + +When the Licensee has developed an External Module, the terms and +conditions of this Agreement do not apply to said External Module, that +may be distributed under a separate license agreement. + + + 5.3.4 COMPATIBILITY WITH THE GNU GPL + +The Licensee can include a code that is subject to the provisions of one +of the versions of the GNU GPL in the Modified or unmodified Software, +and distribute that entire code under the terms of the same version of +the GNU GPL. + +The Licensee can include the Modified or unmodified Software in a code +that is subject to the provisions of one of the versions of the GNU GPL, +and distribute that entire code under the terms of the same version of +the GNU GPL. + + + Article 6 - INTELLECTUAL PROPERTY + + + 6.1 OVER THE INITIAL SOFTWARE + +The Holder owns the economic rights over the Initial Software. Any or +all use of the Initial Software is subject to compliance with the terms +and conditions under which the Holder has elected to distribute its work +and no one shall be entitled to modify the terms and conditions for the +distribution of said Initial Software. + +The Holder undertakes that the Initial Software will remain ruled at +least by this Agreement, for the duration set forth in Article 4.2. + + + 6.2 OVER THE CONTRIBUTIONS + +The Licensee who develops a Contribution is the owner of the +intellectual property rights over this Contribution as defined by +applicable law. + + + 6.3 OVER THE EXTERNAL MODULES + +The Licensee who develops an External Module is the owner of the +intellectual property rights over this External Module as defined by +applicable law and is free to choose the type of agreement that shall +govern its distribution. + + + 6.4 JOINT PROVISIONS + +The Licensee expressly undertakes: + + 1. not to remove, or modify, in any manner, the intellectual property + notices attached to the Software; + + 2. to reproduce said notices, in an identical manner, in the copies + of the Software modified or not. + +The Licensee undertakes not to directly or indirectly infringe the +intellectual property rights of the Holder and/or Contributors on the +Software and to take, where applicable, vis-à-vis its staff, any and all +measures required to ensure respect of said intellectual property rights +of the Holder and/or Contributors. + + + Article 7 - RELATED SERVICES + +7.1 Under no circumstances shall the Agreement oblige the Licensor to +provide technical assistance or maintenance services for the Software. + +However, the Licensor is entitled to offer this type of services. The +terms and conditions of such technical assistance, and/or such +maintenance, shall be set forth in a separate instrument. Only the +Licensor offering said maintenance and/or technical assistance services +shall incur liability therefor. + +7.2 Similarly, any Licensor is entitled to offer to its licensees, under +its sole responsibility, a warranty, that shall only be binding upon +itself, for the redistribution of the Software and/or the Modified +Software, under terms and conditions that it is free to decide. Said +warranty, and the financial terms and conditions of its application, +shall be subject of a separate instrument executed between the Licensor +and the Licensee. + + + Article 8 - LIABILITY + +8.1 Subject to the provisions of Article 8.2, the Licensee shall be +entitled to claim compensation for any direct loss it may have suffered +from the Software as a result of a fault on the part of the relevant +Licensor, subject to providing evidence thereof. + +8.2 The Licensor's liability is limited to the commitments made under +this Agreement and shall not be incurred as a result of in particular: +(i) loss due the Licensee's total or partial failure to fulfill its +obligations, (ii) direct or consequential loss that is suffered by the +Licensee due to the use or performance of the Software, and (iii) more +generally, any consequential loss. In particular the Parties expressly +agree that any or all pecuniary or business loss (i.e. loss of data, +loss of profits, operating loss, loss of customers or orders, +opportunity cost, any disturbance to business activities) or any or all +legal proceedings instituted against the Licensee by a third party, +shall constitute consequential loss and shall not provide entitlement to +any or all compensation from the Licensor. + + + Article 9 - WARRANTY + +9.1 The Licensee acknowledges that the scientific and technical +state-of-the-art when the Software was distributed did not enable all +possible uses to be tested and verified, nor for the presence of +possible defects to be detected. In this respect, the Licensee's +attention has been drawn to the risks associated with loading, using, +modifying and/or developing and reproducing the Software which are +reserved for experienced users. + +The Licensee shall be responsible for verifying, by any or all means, +the suitability of the product for its requirements, its good working +order, and for ensuring that it shall not cause damage to either persons +or properties. + +9.2 The Licensor hereby represents, in good faith, that it is entitled +to grant all the rights over the Software (including in particular the +rights set forth in Article 5). + +9.3 The Licensee acknowledges that the Software is supplied "as is" by +the Licensor without any other express or tacit warranty, other than +that provided for in Article 9.2 and, in particular, without any warranty +as to its commercial value, its secured, safe, innovative or relevant +nature. + +Specifically, the Licensor does not warrant that the Software is free +from any error, that it will operate without interruption, that it will +be compatible with the Licensee's own equipment and software +configuration, nor that it will meet the Licensee's requirements. + +9.4 The Licensor does not either expressly or tacitly warrant that the +Software does not infringe any third party intellectual property right +relating to a patent, software or any other property right. Therefore, +the Licensor disclaims any and all liability towards the Licensee +arising out of any or all proceedings for infringement that may be +instituted in respect of the use, modification and redistribution of the +Software. Nevertheless, should such proceedings be instituted against +the Licensee, the Licensor shall provide it with technical and legal +assistance for its defense. Such technical and legal assistance shall be +decided on a case-by-case basis between the relevant Licensor and the +Licensee pursuant to a memorandum of understanding. The Licensor +disclaims any and all liability as regards the Licensee's use of the +name of the Software. No warranty is given as regards the existence of +prior rights over the name of the Software or as regards the existence +of a trademark. + + + Article 10 - TERMINATION + +10.1 In the event of a breach by the Licensee of its obligations +hereunder, the Licensor may automatically terminate this Agreement +thirty (30) days after notice has been sent to the Licensee and has +remained ineffective. + +10.2 A Licensee whose Agreement is terminated shall no longer be +authorized to use, modify or distribute the Software. However, any +licenses that it may have granted prior to termination of the Agreement +shall remain valid subject to their having been granted in compliance +with the terms and conditions hereof. + + + Article 11 - MISCELLANEOUS + + + 11.1 EXCUSABLE EVENTS + +Neither Party shall be liable for any or all delay, or failure to +perform the Agreement, that may be attributable to an event of force +majeure, an act of God or an outside cause, such as defective +functioning or interruptions of the electricity or telecommunications +networks, network paralysis following a virus attack, intervention by +government authorities, natural disasters, water damage, earthquakes, +fire, explosions, strikes and labor unrest, war, etc. + +11.2 Any failure by either Party, on one or more occasions, to invoke +one or more of the provisions hereof, shall under no circumstances be +interpreted as being a waiver by the interested Party of its right to +invoke said provision(s) subsequently. + +11.3 The Agreement cancels and replaces any or all previous agreements, +whether written or oral, between the Parties and having the same +purpose, and constitutes the entirety of the agreement between said +Parties concerning said purpose. No supplement or modification to the +terms and conditions hereof shall be effective as between the Parties +unless it is made in writing and signed by their duly authorized +representatives. + +11.4 In the event that one or more of the provisions hereof were to +conflict with a current or future applicable act or legislative text, +said act or legislative text shall prevail, and the Parties shall make +the necessary amendments so as to comply with said act or legislative +text. All other provisions shall remain effective. Similarly, invalidity +of a provision of the Agreement, for any reason whatsoever, shall not +cause the Agreement as a whole to be invalid. + + + 11.5 LANGUAGE + +The Agreement is drafted in both French and English and both versions +are deemed authentic. + + + Article 12 - NEW VERSIONS OF THE AGREEMENT + +12.1 Any person is authorized to duplicate and distribute copies of this +Agreement. + +12.2 So as to ensure coherence, the wording of this Agreement is +protected and may only be modified by the authors of the License, who +reserve the right to periodically publish updates or new versions of the +Agreement, each with a separate number. These subsequent versions may +address new issues encountered by Free Software. + +12.3 Any Software distributed under a given version of the Agreement may +only be subsequently distributed under the same version of the Agreement +or a subsequent version, subject to the provisions of Article 5.3.4. + + + Article 13 - GOVERNING LAW AND JURISDICTION + +13.1 The Agreement is governed by French law. The Parties agree to +endeavor to seek an amicable solution to any disagreements or disputes +that may arise during the performance of the Agreement. + +13.2 Failing an amicable solution within two (2) months as from their +occurrence, and unless emergency proceedings are necessary, the +disagreements or disputes shall be referred to the Paris Courts having +jurisdiction, by the more diligent Party. + + +Version 2.0 dated 2006-09-05. diff --git a/Licence_CeCILL_V2-fr.txt b/Licence_CeCILL_V2-fr.txt new file mode 100644 index 0000000..1613fca --- /dev/null +++ b/Licence_CeCILL_V2-fr.txt @@ -0,0 +1,512 @@ + +CONTRAT DE LICENCE DE LOGICIEL LIBRE CeCILL + + + Avertissement + +Ce contrat est une licence de logiciel libre issue d'une concertation +entre ses auteurs afin que le respect de deux grands principes préside à +sa rédaction: + + * d'une part, le respect des principes de diffusion des logiciels + libres: accès au code source, droits étendus conférés aux + utilisateurs, + * d'autre part, la désignation d'un droit applicable, le droit + français, auquel elle est conforme, tant au regard du droit de la + responsabilité civile que du droit de la propriété intellectuelle + et de la protection qu'il offre aux auteurs et titulaires des + droits patrimoniaux sur un logiciel. + +Les auteurs de la licence CeCILL (pour Ce[a] C[nrs] I[nria] L[ogiciel] +L[ibre]) sont: + +Commissariat à l'Energie Atomique - CEA, établissement public de +recherche à caractère scientifique, technique et industriel, dont le +siège est situé 25 rue Leblanc, immeuble Le Ponant D, 75015 Paris. + +Centre National de la Recherche Scientifique - CNRS, établissement +public à caractère scientifique et technologique, dont le siège est +situé 3 rue Michel-Ange, 75794 Paris cedex 16. + +Institut National de Recherche en Informatique et en Automatique - +INRIA, établissement public à caractère scientifique et technologique, +dont le siège est situé Domaine de Voluceau, Rocquencourt, BP 105, 78153 +Le Chesnay cedex. + + + Préambule + +Ce contrat est une licence de logiciel libre dont l'objectif est de +conférer aux utilisateurs la liberté de modification et de +redistribution du logiciel régi par cette licence dans le cadre d'un +modèle de diffusion en logiciel libre. + +L'exercice de ces libertés est assorti de certains devoirs à la charge +des utilisateurs afin de préserver ce statut au cours des +redistributions ultérieures. + +L'accessibilité au code source et les droits de copie, de modification +et de redistribution qui en découlent ont pour contrepartie de n'offrir +aux utilisateurs qu'une garantie limitée et de ne faire peser sur +l'auteur du logiciel, le titulaire des droits patrimoniaux et les +concédants successifs qu'une responsabilité restreinte. + +A cet égard l'attention de l'utilisateur est attirée sur les risques +associés au chargement, à l'utilisation, à la modification et/ou au +développement et à la reproduction du logiciel par l'utilisateur étant +donné sa spécificité de logiciel libre, qui peut le rendre complexe à +manipuler et qui le réserve donc à des développeurs ou des +professionnels avertis possédant des connaissances informatiques +approfondies. Les utilisateurs sont donc invités à charger et tester +l'adéquation du logiciel à leurs besoins dans des conditions permettant +d'assurer la sécurité de leurs systèmes et/ou de leurs données et, plus +généralement, à l'utiliser et l'exploiter dans les mêmes conditions de +sécurité. Ce contrat peut être reproduit et diffusé librement, sous +réserve de le conserver en l'état, sans ajout ni suppression de clauses. + +Ce contrat est susceptible de s'appliquer à tout logiciel dont le +titulaire des droits patrimoniaux décide de soumettre l'exploitation aux +dispositions qu'il contient. + + + Article 1 - DEFINITIONS + +Dans ce contrat, les termes suivants, lorsqu'ils seront écrits avec une +lettre capitale, auront la signification suivante: + +Contrat: désigne le présent contrat de licence, ses éventuelles versions +postérieures et annexes. + +Logiciel: désigne le logiciel sous sa forme de Code Objet et/ou de Code +Source et le cas échéant sa documentation, dans leur état au moment de +l'acceptation du Contrat par le Licencié. + +Logiciel Initial: désigne le Logiciel sous sa forme de Code Source et +éventuellement de Code Objet et le cas échéant sa documentation, dans +leur état au moment de leur première diffusion sous les termes du Contrat. + +Logiciel Modifié: désigne le Logiciel modifié par au moins une +Contribution. + +Code Source: désigne l'ensemble des instructions et des lignes de +programme du Logiciel et auquel l'accès est nécessaire en vue de +modifier le Logiciel. + +Code Objet: désigne les fichiers binaires issus de la compilation du +Code Source. + +Titulaire: désigne le ou les détenteurs des droits patrimoniaux d'auteur +sur le Logiciel Initial. + +Licencié: désigne le ou les utilisateurs du Logiciel ayant accepté le +Contrat. + +Contributeur: désigne le Licencié auteur d'au moins une Contribution. + +Concédant: désigne le Titulaire ou toute personne physique ou morale +distribuant le Logiciel sous le Contrat. + +Contribution: désigne l'ensemble des modifications, corrections, +traductions, adaptations et/ou nouvelles fonctionnalités intégrées dans +le Logiciel par tout Contributeur, ainsi que tout Module Interne. + +Module: désigne un ensemble de fichiers sources y compris leur +documentation qui permet de réaliser des fonctionnalités ou services +supplémentaires à ceux fournis par le Logiciel. + +Module Externe: désigne tout Module, non dérivé du Logiciel, tel que ce +Module et le Logiciel s'exécutent dans des espaces d'adressage +différents, l'un appelant l'autre au moment de leur exécution. + +Module Interne: désigne tout Module lié au Logiciel de telle sorte +qu'ils s'exécutent dans le même espace d'adressage. + +GNU GPL: désigne la GNU General Public License dans sa version 2 ou +toute version ultérieure, telle que publiée par Free Software Foundation +Inc. + +Parties: désigne collectivement le Licencié et le Concédant. + +Ces termes s'entendent au singulier comme au pluriel. + + + Article 2 - OBJET + +Le Contrat a pour objet la concession par le Concédant au Licencié d'une +licence non exclusive, cessible et mondiale du Logiciel telle que +définie ci-après à l'article 5 pour toute la durée de protection des droits +portant sur ce Logiciel. + + + Article 3 - ACCEPTATION + +3.1 L'acceptation par le Licencié des termes du Contrat est réputée +acquise du fait du premier des faits suivants: + + * (i) le chargement du Logiciel par tout moyen notamment par + téléchargement à partir d'un serveur distant ou par chargement à + partir d'un support physique; + * (ii) le premier exercice par le Licencié de l'un quelconque des + droits concédés par le Contrat. + +3.2 Un exemplaire du Contrat, contenant notamment un avertissement +relatif aux spécificités du Logiciel, à la restriction de garantie et à +la limitation à un usage par des utilisateurs expérimentés a été mis à +disposition du Licencié préalablement à son acceptation telle que +définie à l'article 3.1 ci dessus et le Licencié reconnaît en avoir pris +connaissance. + + + Article 4 - ENTREE EN VIGUEUR ET DUREE + + + 4.1 ENTREE EN VIGUEUR + +Le Contrat entre en vigueur à la date de son acceptation par le Licencié +telle que définie en 3.1. + + + 4.2 DUREE + +Le Contrat produira ses effets pendant toute la durée légale de +protection des droits patrimoniaux portant sur le Logiciel. + + + Article 5 - ETENDUE DES DROITS CONCEDES + +Le Concédant concède au Licencié, qui accepte, les droits suivants sur +le Logiciel pour toutes destinations et pour la durée du Contrat dans +les conditions ci-après détaillées. + +Par ailleurs, si le Concédant détient ou venait à détenir un ou +plusieurs brevets d'invention protégeant tout ou partie des +fonctionnalités du Logiciel ou de ses composants, il s'engage à ne pas +opposer les éventuels droits conférés par ces brevets aux Licenciés +successifs qui utiliseraient, exploiteraient ou modifieraient le +Logiciel. En cas de cession de ces brevets, le Concédant s'engage à +faire reprendre les obligations du présent alinéa aux cessionnaires. + + + 5.1 DROIT D'UTILISATION + +Le Licencié est autorisé à utiliser le Logiciel, sans restriction quant +aux domaines d'application, étant ci-après précisé que cela comporte: + + 1. la reproduction permanente ou provisoire du Logiciel en tout ou + partie par tout moyen et sous toute forme. + + 2. le chargement, l'affichage, l'exécution, ou le stockage du + Logiciel sur tout support. + + 3. la possibilité d'en observer, d'en étudier, ou d'en tester le + fonctionnement afin de déterminer les idées et principes qui sont + à la base de n'importe quel élément de ce Logiciel; et ceci, + lorsque le Licencié effectue toute opération de chargement, + d'affichage, d'exécution, de transmission ou de stockage du + Logiciel qu'il est en droit d'effectuer en vertu du Contrat. + + + 5.2 DROIT D'APPORTER DES CONTRIBUTIONS + +Le droit d'apporter des Contributions comporte le droit de traduire, +d'adapter, d'arranger ou d'apporter toute autre modification au Logiciel +et le droit de reproduire le logiciel en résultant. + +Le Licencié est autorisé à apporter toute Contribution au Logiciel sous +réserve de mentionner, de façon explicite, son nom en tant qu'auteur de +cette Contribution et la date de création de celle-ci. + + + 5.3 DROIT DE DISTRIBUTION + +Le droit de distribution comporte notamment le droit de diffuser, de +transmettre et de communiquer le Logiciel au public sur tout support et +par tout moyen ainsi que le droit de mettre sur le marché à titre +onéreux ou gratuit, un ou des exemplaires du Logiciel par tout procédé. + +Le Licencié est autorisé à distribuer des copies du Logiciel, modifié ou +non, à des tiers dans les conditions ci-après détaillées. + + + 5.3.1 DISTRIBUTION DU LOGICIEL SANS MODIFICATION + +Le Licencié est autorisé à distribuer des copies conformes du Logiciel, +sous forme de Code Source ou de Code Objet, à condition que cette +distribution respecte les dispositions du Contrat dans leur totalité et +soit accompagnée: + + 1. d'un exemplaire du Contrat, + + 2. d'un avertissement relatif à la restriction de garantie et de + responsabilité du Concédant telle que prévue aux articles 8 + et 9, + +et que, dans le cas où seul le Code Objet du Logiciel est redistribué, +le Licencié permette aux futurs Licenciés d'accéder facilement au Code +Source complet du Logiciel en indiquant les modalités d'accès, étant +entendu que le coût additionnel d'acquisition du Code Source ne devra +pas excéder le simple coût de transfert des données. + + + 5.3.2 DISTRIBUTION DU LOGICIEL MODIFIE + +Lorsque le Licencié apporte une Contribution au Logiciel, les conditions +de distribution du Logiciel Modifié en résultant sont alors soumises à +l'intégralité des dispositions du Contrat. + +Le Licencié est autorisé à distribuer le Logiciel Modifié, sous forme de +code source ou de code objet, à condition que cette distribution +respecte les dispositions du Contrat dans leur totalité et soit +accompagnée: + + 1. d'un exemplaire du Contrat, + + 2. d'un avertissement relatif à la restriction de garantie et de + responsabilité du Concédant telle que prévue aux articles 8 + et 9, + +et que, dans le cas où seul le code objet du Logiciel Modifié est +redistribué, le Licencié permette aux futurs Licenciés d'accéder +facilement au code source complet du Logiciel Modifié en indiquant les +modalités d'accès, étant entendu que le coût additionnel d'acquisition +du code source ne devra pas excéder le simple coût de transfert des données. + + + 5.3.3 DISTRIBUTION DES MODULES EXTERNES + +Lorsque le Licencié a développé un Module Externe les conditions du +Contrat ne s'appliquent pas à ce Module Externe, qui peut être distribué +sous un contrat de licence différent. + + + 5.3.4 COMPATIBILITE AVEC LA LICENCE GNU GPL + +Le Licencié peut inclure un code soumis aux dispositions d'une des +versions de la licence GNU GPL dans le Logiciel modifié ou non et +distribuer l'ensemble sous les conditions de la même version de la +licence GNU GPL. + +Le Licencié peut inclure le Logiciel modifié ou non dans un code soumis +aux dispositions d'une des versions de la licence GNU GPL et distribuer +l'ensemble sous les conditions de la même version de la licence GNU GPL. + + + Article 6 - PROPRIETE INTELLECTUELLE + + + 6.1 SUR LE LOGICIEL INITIAL + +Le Titulaire est détenteur des droits patrimoniaux sur le Logiciel +Initial. Toute utilisation du Logiciel Initial est soumise au respect +des conditions dans lesquelles le Titulaire a choisi de diffuser son +oeuvre et nul autre n'a la faculté de modifier les conditions de +diffusion de ce Logiciel Initial. + +Le Titulaire s'engage à ce que le Logiciel Initial reste au moins régi +par le Contrat et ce, pour la durée visée à l'article 4.2. + + + 6.2 SUR LES CONTRIBUTIONS + +Le Licencié qui a développé une Contribution est titulaire sur celle-ci +des droits de propriété intellectuelle dans les conditions définies par +la législation applicable. + + + 6.3 SUR LES MODULES EXTERNES + +Le Licencié qui a développé un Module Externe est titulaire sur celui-ci +des droits de propriété intellectuelle dans les conditions définies par +la législation applicable et reste libre du choix du contrat régissant +sa diffusion. + + + 6.4 DISPOSITIONS COMMUNES + +Le Licencié s'engage expressément: + + 1. à ne pas supprimer ou modifier de quelque manière que ce soit les + mentions de propriété intellectuelle apposées sur le Logiciel; + + 2. à reproduire à l'identique lesdites mentions de propriété + intellectuelle sur les copies du Logiciel modifié ou non. + +Le Licencié s'engage à ne pas porter atteinte, directement ou +indirectement, aux droits de propriété intellectuelle du Titulaire et/ou +des Contributeurs sur le Logiciel et à prendre, le cas échéant, à +l'égard de son personnel toutes les mesures nécessaires pour assurer le +respect des dits droits de propriété intellectuelle du Titulaire et/ou +des Contributeurs. + + + Article 7 - SERVICES ASSOCIES + +7.1 Le Contrat n'oblige en aucun cas le Concédant à la réalisation de +prestations d'assistance technique ou de maintenance du Logiciel. + +Cependant le Concédant reste libre de proposer ce type de services. Les +termes et conditions d'une telle assistance technique et/ou d'une telle +maintenance seront alors déterminés dans un acte séparé. Ces actes de +maintenance et/ou assistance technique n'engageront que la seule +responsabilité du Concédant qui les propose. + +7.2 De même, tout Concédant est libre de proposer, sous sa seule +responsabilité, à ses licenciés une garantie, qui n'engagera que lui, +lors de la redistribution du Logiciel et/ou du Logiciel Modifié et ce, +dans les conditions qu'il souhaite. Cette garantie et les modalités +financières de son application feront l'objet d'un acte séparé entre le +Concédant et le Licencié. + + + Article 8 - RESPONSABILITE + +8.1 Sous réserve des dispositions de l'article 8.2, le Licencié a la +faculté, sous réserve de prouver la faute du Concédant concerné, de +solliciter la réparation du préjudice direct qu'il subirait du fait du +Logiciel et dont il apportera la preuve. + +8.2 La responsabilité du Concédant est limitée aux engagements pris en +application du Contrat et ne saurait être engagée en raison notamment: +(i) des dommages dus à l'inexécution, totale ou partielle, de ses +obligations par le Licencié, (ii) des dommages directs ou indirects +découlant de l'utilisation ou des performances du Logiciel subis par le +Licencié et (iii) plus généralement d'un quelconque dommage indirect. En +particulier, les Parties conviennent expressément que tout préjudice +financier ou commercial (par exemple perte de données, perte de +bénéfices, perte d'exploitation, perte de clientèle ou de commandes, +manque à gagner, trouble commercial quelconque) ou toute action dirigée +contre le Licencié par un tiers, constitue un dommage indirect et +n'ouvre pas droit à réparation par le Concédant. + + + Article 9 - GARANTIE + +9.1 Le Licencié reconnaît que l'état actuel des connaissances +scientifiques et techniques au moment de la mise en circulation du +Logiciel ne permet pas d'en tester et d'en vérifier toutes les +utilisations ni de détecter l'existence d'éventuels défauts. L'attention +du Licencié a été attirée sur ce point sur les risques associés au +chargement, à l'utilisation, la modification et/ou au développement et à +la reproduction du Logiciel qui sont réservés à des utilisateurs avertis. + +Il relève de la responsabilité du Licencié de contrôler, par tous +moyens, l'adéquation du produit à ses besoins, son bon fonctionnement et +de s'assurer qu'il ne causera pas de dommages aux personnes et aux biens. + +9.2 Le Concédant déclare de bonne foi être en droit de concéder +l'ensemble des droits attachés au Logiciel (comprenant notamment les +droits visés à l'article 5). + +9.3 Le Licencié reconnaît que le Logiciel est fourni "en l'état" par le +Concédant sans autre garantie, expresse ou tacite, que celle prévue à +l'article 9.2 et notamment sans aucune garantie sur sa valeur commerciale, +son caractère sécurisé, innovant ou pertinent. + +En particulier, le Concédant ne garantit pas que le Logiciel est exempt +d'erreur, qu'il fonctionnera sans interruption, qu'il sera compatible +avec l'équipement du Licencié et sa configuration logicielle ni qu'il +remplira les besoins du Licencié. + +9.4 Le Concédant ne garantit pas, de manière expresse ou tacite, que le +Logiciel ne porte pas atteinte à un quelconque droit de propriété +intellectuelle d'un tiers portant sur un brevet, un logiciel ou sur tout +autre droit de propriété. Ainsi, le Concédant exclut toute garantie au +profit du Licencié contre les actions en contrefaçon qui pourraient être +diligentées au titre de l'utilisation, de la modification, et de la +redistribution du Logiciel. Néanmoins, si de telles actions sont +exercées contre le Licencié, le Concédant lui apportera son aide +technique et juridique pour sa défense. Cette aide technique et +juridique est déterminée au cas par cas entre le Concédant concerné et +le Licencié dans le cadre d'un protocole d'accord. Le Concédant dégage +toute responsabilité quant à l'utilisation de la dénomination du +Logiciel par le Licencié. Aucune garantie n'est apportée quant à +l'existence de droits antérieurs sur le nom du Logiciel et sur +l'existence d'une marque. + + + Article 10 - RESILIATION + +10.1 En cas de manquement par le Licencié aux obligations mises à sa +charge par le Contrat, le Concédant pourra résilier de plein droit le +Contrat trente (30) jours après notification adressée au Licencié et +restée sans effet. + +10.2 Le Licencié dont le Contrat est résilié n'est plus autorisé à +utiliser, modifier ou distribuer le Logiciel. Cependant, toutes les +licences qu'il aura concédées antérieurement à la résiliation du Contrat +resteront valides sous réserve qu'elles aient été effectuées en +conformité avec le Contrat. + + + Article 11 - DISPOSITIONS DIVERSES + + + 11.1 CAUSE EXTERIEURE + +Aucune des Parties ne sera responsable d'un retard ou d'une défaillance +d'exécution du Contrat qui serait dû à un cas de force majeure, un cas +fortuit ou une cause extérieure, telle que, notamment, le mauvais +fonctionnement ou les interruptions du réseau électrique ou de +télécommunication, la paralysie du réseau liée à une attaque +informatique, l'intervention des autorités gouvernementales, les +catastrophes naturelles, les dégâts des eaux, les tremblements de terre, +le feu, les explosions, les grèves et les conflits sociaux, l'état de +guerre... + +11.2 Le fait, par l'une ou l'autre des Parties, d'omettre en une ou +plusieurs occasions de se prévaloir d'une ou plusieurs dispositions du +Contrat, ne pourra en aucun cas impliquer renonciation par la Partie +intéressée à s'en prévaloir ultérieurement. + +11.3 Le Contrat annule et remplace toute convention antérieure, écrite +ou orale, entre les Parties sur le même objet et constitue l'accord +entier entre les Parties sur cet objet. Aucune addition ou modification +aux termes du Contrat n'aura d'effet à l'égard des Parties à moins +d'être faite par écrit et signée par leurs représentants dûment habilités. + +11.4 Dans l'hypothèse où une ou plusieurs des dispositions du Contrat +s'avèrerait contraire à une loi ou à un texte applicable, existants ou +futurs, cette loi ou ce texte prévaudrait, et les Parties feraient les +amendements nécessaires pour se conformer à cette loi ou à ce texte. +Toutes les autres dispositions resteront en vigueur. De même, la +nullité, pour quelque raison que ce soit, d'une des dispositions du +Contrat ne saurait entraîner la nullité de l'ensemble du Contrat. + + + 11.5 LANGUE + +Le Contrat est rédigé en langue française et en langue anglaise, ces +deux versions faisant également foi. + + + Article 12 - NOUVELLES VERSIONS DU CONTRAT + +12.1 Toute personne est autorisée à copier et distribuer des copies de +ce Contrat. + +12.2 Afin d'en préserver la cohérence, le texte du Contrat est protégé +et ne peut être modifié que par les auteurs de la licence, lesquels se +réservent le droit de publier périodiquement des mises à jour ou de +nouvelles versions du Contrat, qui posséderont chacune un numéro +distinct. Ces versions ultérieures seront susceptibles de prendre en +compte de nouvelles problématiques rencontrées par les logiciels libres. + +12.3 Tout Logiciel diffusé sous une version donnée du Contrat ne pourra +faire l'objet d'une diffusion ultérieure que sous la même version du +Contrat ou une version postérieure, sous réserve des dispositions de +l'article 5.3.4. + + + Article 13 - LOI APPLICABLE ET COMPETENCE TERRITORIALE + +13.1 Le Contrat est régi par la loi française. Les Parties conviennent +de tenter de régler à l'amiable les différends ou litiges qui +viendraient à se produire par suite ou à l'occasion du Contrat. + +13.2 A défaut d'accord amiable dans un délai de deux (2) mois à compter +de leur survenance et sauf situation relevant d'une procédure d'urgence, +les différends ou litiges seront portés par la Partie la plus diligente +devant les Tribunaux compétents de Paris. + + +Version 2.0 du 2006-09-05. diff --git a/global.mk b/global.mk new file mode 100644 index 0000000..c58123e --- /dev/null +++ b/global.mk @@ -0,0 +1,10 @@ + +CC=gcc +LDFLAGS= + +CFLAGS = -O3 -w + +default: all + +%.o: %.c + $(CC) $(CFLAGS) -c -o $@ $< $(LIB) diff --git a/libfasta/.DS_Store b/libfasta/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6 GIT binary patch literal 6148 zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0 $@ + +dic_parser.c: dic_parser.l + lex -Phashtable_yy -t $< > $@ + +clean: + rm -rf $(OBJECTS) $(LIBFILE) + rm -f *.a + +$(LIBFILE): $(OBJECTS) + ar -cr $@ $? + $(RANLIB) $@ diff --git a/libfasta/fasta_header_handler.c b/libfasta/fasta_header_handler.c new file mode 100644 index 0000000..f57d8c7 --- /dev/null +++ b/libfasta/fasta_header_handler.c @@ -0,0 +1,126 @@ +#include +#include +#include +#include "sequence.h" +#include "fasta_header_parser.h" +#include "fasta_header_handler.h" + + +char* char_header_add_field(char* header, char* name, char* value) +{ + int lheader = strlen(header); + header = (char*) realloc(header, (lheader+strlen(name)+strlen(value)+4)*sizeof(char)); + if (header[lheader-1] == '.') + { + strcpy(header+lheader-1,";"); + strcpy(header+lheader," "); + strcpy(header+lheader+1,name); + strcpy(header+lheader+1+strlen(name),"="); + strcpy(header+lheader+1+strlen(name)+1,value); + } + else + { + strcpy(header+lheader,";"); + strcpy(header+lheader+1," "); + strcpy(header+lheader+2,name); + strcpy(header+lheader+2+strlen(name),"="); + strcpy(header+lheader+2+strlen(name)+1,value); + } + return header; +} + + +char* fastaSeqPtr_header_add_field(fastaSeqPtr seq, char* name, char* value) +{ + int lheader = strlen(seq->rawheader); + int i; + char* buffer; + char* rawheader; + + rawheader = (char*) malloc((lheader+strlen(name)+strlen(value)+5)*sizeof(char)); + strcpy(rawheader, seq->rawheader); + + buffer = calloc(lheader, sizeof(char)); + + i=0; + + while ((rawheader[i] != ' ') && (rawheader[i] != 0)) + i++; + + if (rawheader[i] == ' ') + strcpy(buffer, rawheader+i); + else + strcpy(rawheader+i, " "); + + i++; + + strcpy(rawheader+i,name); + strcpy(rawheader+i+strlen(name),"="); + strcpy(rawheader+i+strlen(name)+1,value); + strcpy(rawheader+i+strlen(name)+1+strlen(value),";"); + strcpy(rawheader+i+strlen(name)+1+strlen(value)+1, buffer); + + free(buffer); + + return(rawheader); +} + + +element_from_header* table_header_add_field(element_from_header* header, char* name, char* value) +{ + int nbf; + nbf = atoi(header[0].value); + nbf++; + header = (element_from_header*) realloc(header, (nbf+1)*sizeof(element_from_header)); + header[nbf].name = (char*) malloc((1+strlen(name))*sizeof(char)); + strcpy(header[nbf].name, name); + header[nbf].value = (char*) malloc((1+strlen(value))*sizeof(char)); + strcpy(header[nbf].value, value); + sprintf(header[0].value, "%d", nbf); + return(header); +} + + +void free_header_table(element_from_header* header) +{ + int i; + int nbf = atoi(header[0].value); + + for (i = 0; i <= nbf; i++) + { + free((header[i]).name); + free((header[i]).value); + } + free(header); +} + + +char* getItemFromHeader(char* name, element_from_header* header) +{ + char* value = 0; + int nbf; + int i; + nbf = atoi(header[0].value); + for (i = 1; i <= nbf; i++) + { + if (strcmp(header[i].name,name)==0) + value = header[i].value; + } + return value; +} + + +void changeValue(element_from_header* header, char* name, char* newValue) +{ + int i; + int nbf = atoi(header[0].value); + + for (i = 1; i <= nbf; i++) + { + if (strcmp(header[i].name, name)==0) + { + header[i].value = realloc(header[i].value, (1+strlen(newValue))*sizeof(char)); + strcpy(header[i].value, newValue); + } + } +} diff --git a/libfasta/fasta_header_handler.h b/libfasta/fasta_header_handler.h new file mode 100644 index 0000000..e68b81a --- /dev/null +++ b/libfasta/fasta_header_handler.h @@ -0,0 +1,23 @@ + +#ifndef FASTA_HEADER_HANDLER_H_ +#define FASTA_HEADER_HANDLER_H_ + + +#include "sequence.h" + + +char* char_header_add_field(char*,char*,char*); + +char* fastaSeqPtr_header_add_field(fastaSeqPtr seq, char* name, char* value); + +element_from_header* table_header_add_dic(element_from_header* header, char* name, struct hashtable *hashtab); + +element_from_header* table_header_add_field(element_from_header* header, char* name, char* value); + +void free_header_table(element_from_header*); + +char* getItemFromHeader(char*, element_from_header*); + +void changeValue(element_from_header* header, char* name, char* newValue); + +#endif diff --git a/libfasta/fasta_header_parser.c b/libfasta/fasta_header_parser.c new file mode 100644 index 0000000..ed53557 --- /dev/null +++ b/libfasta/fasta_header_parser.c @@ -0,0 +1,1954 @@ + +#line 3 "" + +#define YY_INT_ALIGNED short int + +/* A lexical scanner generated by flex */ + +#define yy_create_buffer header_yy_create_buffer +#define yy_delete_buffer header_yy_delete_buffer +#define yy_flex_debug header_yy_flex_debug +#define yy_init_buffer header_yy_init_buffer +#define yy_flush_buffer header_yy_flush_buffer +#define yy_load_buffer_state header_yy_load_buffer_state +#define yy_switch_to_buffer header_yy_switch_to_buffer +#define yyin header_yyin +#define yyleng header_yyleng +#define yylex header_yylex +#define yylineno header_yylineno +#define yyout header_yyout +#define yyrestart header_yyrestart +#define yytext header_yytext +#define yywrap header_yywrap +#define yyalloc header_yyalloc +#define yyrealloc header_yyrealloc +#define yyfree header_yyfree + +#define FLEX_SCANNER +#define YY_FLEX_MAJOR_VERSION 2 +#define YY_FLEX_MINOR_VERSION 5 +#define YY_FLEX_SUBMINOR_VERSION 37 +#if YY_FLEX_SUBMINOR_VERSION > 0 +#define FLEX_BETA +#endif + +/* First, we deal with platform-specific or compiler-specific issues. */ + +/* begin standard C headers. */ +#include +#include +#include +#include + +/* end standard C headers. */ + +/* flex integer type definitions */ + +#ifndef FLEXINT_H +#define FLEXINT_H + +/* C99 systems have . Non-C99 systems may or may not. */ + +#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + +/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h, + * if you want the limit (max/min) macros for int types. + */ +#ifndef __STDC_LIMIT_MACROS +#define __STDC_LIMIT_MACROS 1 +#endif + +#include +typedef int8_t flex_int8_t; +typedef uint8_t flex_uint8_t; +typedef int16_t flex_int16_t; +typedef uint16_t flex_uint16_t; +typedef int32_t flex_int32_t; +typedef uint32_t flex_uint32_t; +#else +typedef signed char flex_int8_t; +typedef short int flex_int16_t; +typedef int flex_int32_t; +typedef unsigned char flex_uint8_t; +typedef unsigned short int flex_uint16_t; +typedef unsigned int flex_uint32_t; + +/* Limits of integral types. */ +#ifndef INT8_MIN +#define INT8_MIN (-128) +#endif +#ifndef INT16_MIN +#define INT16_MIN (-32767-1) +#endif +#ifndef INT32_MIN +#define INT32_MIN (-2147483647-1) +#endif +#ifndef INT8_MAX +#define INT8_MAX (127) +#endif +#ifndef INT16_MAX +#define INT16_MAX (32767) +#endif +#ifndef INT32_MAX +#define INT32_MAX (2147483647) +#endif +#ifndef UINT8_MAX +#define UINT8_MAX (255U) +#endif +#ifndef UINT16_MAX +#define UINT16_MAX (65535U) +#endif +#ifndef UINT32_MAX +#define UINT32_MAX (4294967295U) +#endif + +#endif /* ! C99 */ + +#endif /* ! FLEXINT_H */ + +#ifdef __cplusplus + +/* The "const" storage-class-modifier is valid. */ +#define YY_USE_CONST + +#else /* ! __cplusplus */ + +/* C99 requires __STDC__ to be defined as 1. */ +#if defined (__STDC__) + +#define YY_USE_CONST + +#endif /* defined (__STDC__) */ +#endif /* ! __cplusplus */ + +#ifdef YY_USE_CONST +#define yyconst const +#else +#define yyconst +#endif + +/* Returned upon end-of-file. */ +#define YY_NULL 0 + +/* Promotes a possibly negative, possibly signed char to an unsigned + * integer for use as an array index. If the signed char is negative, + * we want to instead treat it as an 8-bit unsigned char, hence the + * double cast. + */ +#define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c) + +/* Enter a start condition. This macro really ought to take a parameter, + * but we do it the disgusting crufty way forced on us by the ()-less + * definition of BEGIN. + */ +#define BEGIN (yy_start) = 1 + 2 * + +/* Translate the current start state into a value that can be later handed + * to BEGIN to return to the state. The YYSTATE alias is for lex + * compatibility. + */ +#define YY_START (((yy_start) - 1) / 2) +#define YYSTATE YY_START + +/* Action number for EOF rule of a given start state. */ +#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1) + +/* Special action meaning "start processing a new file". */ +#define YY_NEW_FILE header_yyrestart(header_yyin ) + +#define YY_END_OF_BUFFER_CHAR 0 + +/* Size of default input buffer. */ +#ifndef YY_BUF_SIZE +#define YY_BUF_SIZE 16384 +#endif + +/* The state buf must be large enough to hold one state per character in the main buffer. + */ +#define YY_STATE_BUF_SIZE ((YY_BUF_SIZE + 2) * sizeof(yy_state_type)) + +#ifndef YY_TYPEDEF_YY_BUFFER_STATE +#define YY_TYPEDEF_YY_BUFFER_STATE +typedef struct yy_buffer_state *YY_BUFFER_STATE; +#endif + +#ifndef YY_TYPEDEF_YY_SIZE_T +#define YY_TYPEDEF_YY_SIZE_T +typedef size_t yy_size_t; +#endif + +extern yy_size_t header_yyleng; + +extern FILE *header_yyin, *header_yyout; + +#define EOB_ACT_CONTINUE_SCAN 0 +#define EOB_ACT_END_OF_FILE 1 +#define EOB_ACT_LAST_MATCH 2 + + #define YY_LESS_LINENO(n) + +/* Return all but the first "n" matched characters back to the input stream. */ +#define yyless(n) \ + do \ + { \ + /* Undo effects of setting up header_yytext. */ \ + int yyless_macro_arg = (n); \ + YY_LESS_LINENO(yyless_macro_arg);\ + *yy_cp = (yy_hold_char); \ + YY_RESTORE_YY_MORE_OFFSET \ + (yy_c_buf_p) = yy_cp = yy_bp + yyless_macro_arg - YY_MORE_ADJ; \ + YY_DO_BEFORE_ACTION; /* set up header_yytext again */ \ + } \ + while ( 0 ) + +#define unput(c) yyunput( c, (yytext_ptr) ) + +#ifndef YY_STRUCT_YY_BUFFER_STATE +#define YY_STRUCT_YY_BUFFER_STATE +struct yy_buffer_state + { + FILE *yy_input_file; + + char *yy_ch_buf; /* input buffer */ + char *yy_buf_pos; /* current position in input buffer */ + + /* Size of input buffer in bytes, not including room for EOB + * characters. + */ + yy_size_t yy_buf_size; + + /* Number of characters read into yy_ch_buf, not including EOB + * characters. + */ + yy_size_t yy_n_chars; + + /* Whether we "own" the buffer - i.e., we know we created it, + * and can realloc() it to grow it, and should free() it to + * delete it. + */ + int yy_is_our_buffer; + + /* Whether this is an "interactive" input source; if so, and + * if we're using stdio for input, then we want to use getc() + * instead of fread(), to make sure we stop fetching input after + * each newline. + */ + int yy_is_interactive; + + /* Whether we're considered to be at the beginning of a line. + * If so, '^' rules will be active on the next match, otherwise + * not. + */ + int yy_at_bol; + + int yy_bs_lineno; /**< The line count. */ + int yy_bs_column; /**< The column count. */ + + /* Whether to try to fill the input buffer when we reach the + * end of it. + */ + int yy_fill_buffer; + + int yy_buffer_status; + +#define YY_BUFFER_NEW 0 +#define YY_BUFFER_NORMAL 1 + /* When an EOF's been seen but there's still some text to process + * then we mark the buffer as YY_EOF_PENDING, to indicate that we + * shouldn't try reading from the input source any more. We might + * still have a bunch of tokens to match, though, because of + * possible backing-up. + * + * When we actually see the EOF, we change the status to "new" + * (via header_yyrestart()), so that the user can continue scanning by + * just pointing header_yyin at a new input file. + */ +#define YY_BUFFER_EOF_PENDING 2 + + }; +#endif /* !YY_STRUCT_YY_BUFFER_STATE */ + +/* Stack of input buffers. */ +static size_t yy_buffer_stack_top = 0; /**< index of top of stack. */ +static size_t yy_buffer_stack_max = 0; /**< capacity of stack. */ +static YY_BUFFER_STATE * yy_buffer_stack = 0; /**< Stack as an array. */ + +/* We provide macros for accessing buffer states in case in the + * future we want to put the buffer states in a more general + * "scanner state". + * + * Returns the top of the stack, or NULL. + */ +#define YY_CURRENT_BUFFER ( (yy_buffer_stack) \ + ? (yy_buffer_stack)[(yy_buffer_stack_top)] \ + : NULL) + +/* Same as previous macro, but useful when we know that the buffer stack is not + * NULL or when we need an lvalue. For internal use only. + */ +#define YY_CURRENT_BUFFER_LVALUE (yy_buffer_stack)[(yy_buffer_stack_top)] + +/* yy_hold_char holds the character lost when header_yytext is formed. */ +static char yy_hold_char; +static yy_size_t yy_n_chars; /* number of characters read into yy_ch_buf */ +yy_size_t header_yyleng; + +/* Points to current character in buffer. */ +static char *yy_c_buf_p = (char *) 0; +static int yy_init = 0; /* whether we need to initialize */ +static int yy_start = 0; /* start state number */ + +/* Flag which is used to allow header_yywrap()'s to do buffer switches + * instead of setting up a fresh header_yyin. A bit of a hack ... + */ +static int yy_did_buffer_switch_on_eof; + +void header_yyrestart (FILE *input_file ); +void header_yy_switch_to_buffer (YY_BUFFER_STATE new_buffer ); +YY_BUFFER_STATE header_yy_create_buffer (FILE *file,int size ); +void header_yy_delete_buffer (YY_BUFFER_STATE b ); +void header_yy_flush_buffer (YY_BUFFER_STATE b ); +void header_yypush_buffer_state (YY_BUFFER_STATE new_buffer ); +void header_yypop_buffer_state (void ); + +static void header_yyensure_buffer_stack (void ); +static void header_yy_load_buffer_state (void ); +static void header_yy_init_buffer (YY_BUFFER_STATE b,FILE *file ); + +#define YY_FLUSH_BUFFER header_yy_flush_buffer(YY_CURRENT_BUFFER ) + +YY_BUFFER_STATE header_yy_scan_buffer (char *base,yy_size_t size ); +YY_BUFFER_STATE header_yy_scan_string (yyconst char *yy_str ); +YY_BUFFER_STATE header_yy_scan_bytes (yyconst char *bytes,yy_size_t len ); + +void *header_yyalloc (yy_size_t ); +void *header_yyrealloc (void *,yy_size_t ); +void header_yyfree (void * ); + +#define yy_new_buffer header_yy_create_buffer + +#define yy_set_interactive(is_interactive) \ + { \ + if ( ! YY_CURRENT_BUFFER ){ \ + header_yyensure_buffer_stack (); \ + YY_CURRENT_BUFFER_LVALUE = \ + header_yy_create_buffer(header_yyin,YY_BUF_SIZE ); \ + } \ + YY_CURRENT_BUFFER_LVALUE->yy_is_interactive = is_interactive; \ + } + +#define yy_set_bol(at_bol) \ + { \ + if ( ! YY_CURRENT_BUFFER ){\ + header_yyensure_buffer_stack (); \ + YY_CURRENT_BUFFER_LVALUE = \ + header_yy_create_buffer(header_yyin,YY_BUF_SIZE ); \ + } \ + YY_CURRENT_BUFFER_LVALUE->yy_at_bol = at_bol; \ + } + +#define YY_AT_BOL() (YY_CURRENT_BUFFER_LVALUE->yy_at_bol) + +/* Begin user sect3 */ + +typedef unsigned char YY_CHAR; + +FILE *header_yyin = (FILE *) 0, *header_yyout = (FILE *) 0; + +typedef int yy_state_type; + +extern int header_yylineno; + +int header_yylineno = 1; + +extern char *header_yytext; +#define yytext_ptr header_yytext + +static yy_state_type yy_get_previous_state (void ); +static yy_state_type yy_try_NUL_trans (yy_state_type current_state ); +static int yy_get_next_buffer (void ); +static void yy_fatal_error (yyconst char msg[] ); + +/* Done after the current pattern has been matched and before the + * corresponding action - sets up header_yytext. + */ +#define YY_DO_BEFORE_ACTION \ + (yytext_ptr) = yy_bp; \ + header_yyleng = (size_t) (yy_cp - yy_bp); \ + (yy_hold_char) = *yy_cp; \ + *yy_cp = '\0'; \ + (yy_c_buf_p) = yy_cp; + +#define YY_NUM_RULES 12 +#define YY_END_OF_BUFFER 13 +/* This struct is not used in this scanner, + but its presence is necessary. */ +struct yy_trans_info + { + flex_int32_t yy_verify; + flex_int32_t yy_nxt; + }; +static yyconst flex_int16_t yy_accept[29] = + { 0, + 0, 0, 0, 0, 0, 0, 0, 0, 13, 12, + 3, 2, 1, 5, 4, 7, 6, 9, 8, 10, + 11, 3, 2, 5, 4, 9, 8, 0 + } ; + +static yyconst flex_int32_t yy_ec[256] = + { 0, + 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 1, 3, 3, 1, 3, 3, 3, 3, + 3, 1, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 4, 1, + 5, 6, 1, 1, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 1, 3, 1, 3, 1, 3, 3, 3, 3, + + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1 + } ; + +static yyconst flex_int32_t yy_meta[7] = + { 0, + 1, 2, 3, 4, 4, 1 + } ; + +static yyconst flex_int16_t yy_base[35] = + { 0, + 0, 0, 22, 21, 6, 0, 12, 0, 26, 29, + 0, 0, 29, 0, 0, 29, 29, 0, 0, 29, + 29, 0, 0, 0, 0, 0, 0, 29, 23, 16, + 22, 20, 20, 18 + } ; + +static yyconst flex_int16_t yy_def[35] = + { 0, + 28, 1, 1, 1, 28, 5, 28, 7, 28, 28, + 29, 30, 28, 31, 32, 28, 28, 33, 34, 28, + 28, 29, 30, 31, 32, 33, 34, 0, 28, 28, + 28, 28, 28, 28 + } ; + +static yyconst flex_int16_t yy_nxt[36] = + { 0, + 10, 11, 12, 12, 12, 13, 10, 14, 15, 16, + 17, 10, 10, 18, 19, 20, 21, 10, 23, 23, + 27, 26, 25, 24, 22, 28, 10, 10, 9, 28, + 28, 28, 28, 28, 28 + } ; + +static yyconst flex_int16_t yy_chk[36] = + { 0, + 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, + 5, 5, 7, 7, 7, 7, 7, 7, 30, 30, + 34, 33, 32, 31, 29, 9, 4, 3, 28, 28, + 28, 28, 28, 28, 28 + } ; + +static yy_state_type yy_last_accepting_state; +static char *yy_last_accepting_cpos; + +extern int header_yy_flex_debug; +int header_yy_flex_debug = 0; + +/* The intent behind this definition is that it'll catch + * any uses of REJECT which flex missed. + */ +#define REJECT reject_used_but_not_detected +#define yymore() yymore_used_but_not_detected +#define YY_MORE_ADJ 0 +#define YY_RESTORE_YY_MORE_OFFSET +char *header_yytext; +#line 1 "fasta_header_parser.l" +/* + * Add -ll in Makefile if you modify this file to convert to .c + */ + + + +#line 10 "fasta_header_parser.l" + +#include +#include +#include "header_mem_handler.h" +#include "fasta_header_handler.h" + +#define MEMALLOCATED 10 +#define BUFFER 5 + +#define YY_DECL int header_parser(int *nbf, int *memory_allocated, element_from_header **p_header) + + +#line 502 "" + +#define INITIAL 0 +#define REGID 1 +#define REGNAME 2 +#define REGVAL 3 + +#ifndef YY_NO_UNISTD_H +/* Special case for "unistd.h", since it is non-ANSI. We include it way + * down here because we want the user's section 1 to have been scanned first. + * The user has a chance to override it with an option. + */ +#include +#endif + +#ifndef YY_EXTRA_TYPE +#define YY_EXTRA_TYPE void * +#endif + +static int yy_init_globals (void ); + +/* Accessor methods to globals. + These are made visible to non-reentrant scanners for convenience. */ + +int header_yylex_destroy (void ); + +int header_yyget_debug (void ); + +void header_yyset_debug (int debug_flag ); + +YY_EXTRA_TYPE header_yyget_extra (void ); + +void header_yyset_extra (YY_EXTRA_TYPE user_defined ); + +FILE *header_yyget_in (void ); + +void header_yyset_in (FILE * in_str ); + +FILE *header_yyget_out (void ); + +void header_yyset_out (FILE * out_str ); + +yy_size_t header_yyget_leng (void ); + +char *header_yyget_text (void ); + +int header_yyget_lineno (void ); + +void header_yyset_lineno (int line_number ); + +/* Macros after this point can all be overridden by user definitions in + * section 1. + */ + +#ifndef YY_SKIP_YYWRAP +#ifdef __cplusplus +extern "C" int header_yywrap (void ); +#else +extern int header_yywrap (void ); +#endif +#endif + + static void yyunput (int c,char *buf_ptr ); + +#ifndef yytext_ptr +static void yy_flex_strncpy (char *,yyconst char *,int ); +#endif + +#ifdef YY_NEED_STRLEN +static int yy_flex_strlen (yyconst char * ); +#endif + +#ifndef YY_NO_INPUT + +#ifdef __cplusplus +static int yyinput (void ); +#else +static int input (void ); +#endif + +#endif + +/* Amount of stuff to slurp up with each read. */ +#ifndef YY_READ_BUF_SIZE +#define YY_READ_BUF_SIZE 8192 +#endif + +/* Copy whatever the last rule matched to the standard output. */ +#ifndef ECHO +/* This used to be an fputs(), but since the string might contain NUL's, + * we now use fwrite(). + */ +#define ECHO do { if (fwrite( header_yytext, header_yyleng, 1, header_yyout )) {} } while (0) +#endif + +/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL, + * is returned in "result". + */ +#ifndef YY_INPUT +#define YY_INPUT(buf,result,max_size) \ + if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \ + { \ + int c = '*'; \ + size_t n; \ + for ( n = 0; n < max_size && \ + (c = getc( header_yyin )) != EOF && c != '\n'; ++n ) \ + buf[n] = (char) c; \ + if ( c == '\n' ) \ + buf[n++] = (char) c; \ + if ( c == EOF && ferror( header_yyin ) ) \ + YY_FATAL_ERROR( "input in flex scanner failed" ); \ + result = n; \ + } \ + else \ + { \ + errno=0; \ + while ( (result = fread(buf, 1, max_size, header_yyin))==0 && ferror(header_yyin)) \ + { \ + if( errno != EINTR) \ + { \ + YY_FATAL_ERROR( "input in flex scanner failed" ); \ + break; \ + } \ + errno=0; \ + clearerr(header_yyin); \ + } \ + }\ +\ + +#endif + +/* No semi-colon after return; correct usage is to write "yyterminate();" - + * we don't want an extra ';' after the "return" because that will cause + * some compilers to complain about unreachable statements. + */ +#ifndef yyterminate +#define yyterminate() return YY_NULL +#endif + +/* Number of entries by which start-condition stack grows. */ +#ifndef YY_START_STACK_INCR +#define YY_START_STACK_INCR 25 +#endif + +/* Report a fatal error. */ +#ifndef YY_FATAL_ERROR +#define YY_FATAL_ERROR(msg) yy_fatal_error( msg ) +#endif + +/* end tables serialization structures and prototypes */ + +/* Default declaration of generated scanner - a define so the user can + * easily add parameters. + */ +#ifndef YY_DECL +#define YY_DECL_IS_OURS 1 + +extern int header_yylex (void); + +#define YY_DECL int header_yylex (void) +#endif /* !YY_DECL */ + +/* Code executed at the beginning of each rule, after header_yytext and header_yyleng + * have been set up. + */ +#ifndef YY_USER_ACTION +#define YY_USER_ACTION +#endif + +/* Code executed at the end of each rule. */ +#ifndef YY_BREAK +#define YY_BREAK break; +#endif + +#define YY_RULE_SETUP \ + YY_USER_ACTION + +/** The main scanner function which does all the work. + */ +YY_DECL +{ + register yy_state_type yy_current_state; + register char *yy_cp, *yy_bp; + register int yy_act; + +#line 32 "fasta_header_parser.l" + + + int i; + int size_needed; + int free_size; + char* field; + + +#line 696 "" + + if ( !(yy_init) ) + { + (yy_init) = 1; + +#ifdef YY_USER_INIT + YY_USER_INIT; +#endif + + if ( ! (yy_start) ) + (yy_start) = 1; /* first start state */ + + if ( ! header_yyin ) + header_yyin = stdin; + + if ( ! header_yyout ) + header_yyout = stdout; + + if ( ! YY_CURRENT_BUFFER ) { + header_yyensure_buffer_stack (); + YY_CURRENT_BUFFER_LVALUE = + header_yy_create_buffer(header_yyin,YY_BUF_SIZE ); + } + + header_yy_load_buffer_state( ); + } + + while ( 1 ) /* loops until end-of-file is reached */ + { + yy_cp = (yy_c_buf_p); + + /* Support of header_yytext. */ + *yy_cp = (yy_hold_char); + + /* yy_bp points to the position in yy_ch_buf of the start of + * the current run. + */ + yy_bp = yy_cp; + + yy_current_state = (yy_start); +yy_match: + do + { + register YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)]; + if ( yy_accept[yy_current_state] ) + { + (yy_last_accepting_state) = yy_current_state; + (yy_last_accepting_cpos) = yy_cp; + } + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 29 ) + yy_c = yy_meta[(unsigned int) yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; + ++yy_cp; + } + while ( yy_base[yy_current_state] != 29 ); + +yy_find_action: + yy_act = yy_accept[yy_current_state]; + if ( yy_act == 0 ) + { /* have to back up */ + yy_cp = (yy_last_accepting_cpos); + yy_current_state = (yy_last_accepting_state); + yy_act = yy_accept[yy_current_state]; + } + + YY_DO_BEFORE_ACTION; + +do_action: /* This label is used only to access EOF actions. */ + + switch ( yy_act ) + { /* beginning of action switch */ + case 0: /* must back up */ + /* undo the effects of YY_DO_BEFORE_ACTION */ + *yy_cp = (yy_hold_char); + yy_cp = (yy_last_accepting_cpos); + yy_current_state = (yy_last_accepting_state); + goto yy_find_action; + +case 1: +YY_RULE_SETUP +#line 40 "fasta_header_parser.l" +{ + /*printf("\n{SUP},%s",header_yytext);*/ + BEGIN(REGID); + } + YY_BREAK +case 2: +YY_RULE_SETUP +#line 45 "fasta_header_parser.l" +{ + i=0; + + field = malloc_field(&free_size); + (*p_header)[*nbf].name = (char*) malloc(3*sizeof(char)); + strcpy(((*p_header)[*nbf]).name,"id"); + + size_needed = strlen(header_yytext)+1; + (*p_header)[*nbf].value = (char*) malloc(sizeof(char)*size_needed); + strcpy(((*p_header)[*nbf]).value,header_yytext); + + (*nbf)++; + } + YY_BREAK +case 3: +YY_RULE_SETUP +#line 60 "fasta_header_parser.l" +{ + BEGIN(REGNAME); + } + YY_BREAK +case 4: +YY_RULE_SETUP +#line 64 "fasta_header_parser.l" +{ + /*fprintf(stderr,"\n{WORD} **%s**",header_yytext);*/ + field = store_in_field(field,header_yytext,&free_size,&i); + } + YY_BREAK +case 5: +YY_RULE_SETUP +#line 69 "fasta_header_parser.l" +{ + /*fprintf(stderr,"\n{SPACE} **%s**",header_yytext);*/ + if (i != 0) + field = store_in_field(field,header_yytext,&free_size,&i); + } + YY_BREAK +case 6: +YY_RULE_SETUP +#line 75 "fasta_header_parser.l" +{ + /*fprintf(stderr,"\n{EQUAL},%s",header_yytext);*/ + field = store_in_header_table(field, &((*p_header)[*nbf].name), &free_size, &i); + BEGIN(REGVAL); + } + YY_BREAK +case 7: +YY_RULE_SETUP +#line 81 "fasta_header_parser.l" +{ + /*fprintf(stderr,"\n{SEP},%s",header_yytext);*/ + (*p_header)[*nbf].name = (char*) malloc(19*sizeof(char)); + strcpy((*p_header)[*nbf].name,"definition"); + field = store_in_header_table(field, &((*p_header)[*nbf].value), &free_size, &i); + p_header = check_and_realloc_mem_in_header_table(p_header, nbf, memory_allocated); + BEGIN(REGNAME); + } + YY_BREAK +case 8: +YY_RULE_SETUP +#line 90 "fasta_header_parser.l" +{ + /*fprintf(stderr,"\n{WORD} **%s**\n",header_yytext);*/ + field = store_in_field(field,header_yytext,&free_size,&i); + } + YY_BREAK +case 9: +YY_RULE_SETUP +#line 95 "fasta_header_parser.l" +{ + /*fprintf(stderr,"\n{SPACE} **%s**\n",header_yytext);*/ + field = store_in_field(field,header_yytext,&free_size,&i); + } + YY_BREAK +case 10: +YY_RULE_SETUP +#line 100 "fasta_header_parser.l" +{ + /*fprintf(stderr,"\n{SEP},%s\n",header_yytext);*/ + + field = store_in_header_table(field, &((*p_header)[*nbf].value), &free_size, &i); + p_header = check_and_realloc_mem_in_header_table(p_header, nbf, memory_allocated); + BEGIN(REGNAME); + } + YY_BREAK +case 11: +YY_RULE_SETUP +#line 109 "fasta_header_parser.l" +{ + /*fprintf(stderr, "\nWarning : separator ';' probably missing in header after %s",(*p_header)[*nbf].name);*/ + } + YY_BREAK +case YY_STATE_EOF(REGVAL): +#line 113 "fasta_header_parser.l" +{ + field = store_in_header_table(field, &((*p_header)[*nbf].value), &free_size, &i); + p_header = check_and_realloc_mem_in_header_table(p_header, nbf, memory_allocated); + end_header_table(p_header, *nbf); + + free(field); + BEGIN(INITIAL); + return 0; + } + YY_BREAK +case YY_STATE_EOF(REGNAME): +#line 123 "fasta_header_parser.l" +{ + /*(*p_header)[*nbf].name = (char*) malloc(sizeof(char)*19); + strcpy((*p_header)[*nbf].name,"other_informations"); + field = store_in_header_table(field, &((*p_header)[*nbf].value), &free_size, &i); + p_header = check_and_realloc_mem_in_header_table(p_header, nbf, memory_allocated); + */ + end_header_table(p_header, *nbf); + + free(field); + BEGIN(INITIAL); + return 0; + } + YY_BREAK +case 12: +YY_RULE_SETUP +#line 136 "fasta_header_parser.l" +ECHO; + YY_BREAK +#line 915 "" +case YY_STATE_EOF(INITIAL): +case YY_STATE_EOF(REGID): + yyterminate(); + + case YY_END_OF_BUFFER: + { + /* Amount of text matched not including the EOB char. */ + int yy_amount_of_matched_text = (int) (yy_cp - (yytext_ptr)) - 1; + + /* Undo the effects of YY_DO_BEFORE_ACTION. */ + *yy_cp = (yy_hold_char); + YY_RESTORE_YY_MORE_OFFSET + + if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_NEW ) + { + /* We're scanning a new file or input source. It's + * possible that this happened because the user + * just pointed header_yyin at a new source and called + * header_yylex(). If so, then we have to assure + * consistency between YY_CURRENT_BUFFER and our + * globals. Here is the right place to do so, because + * this is the first action (other than possibly a + * back-up) that will match for the new input source. + */ + (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars; + YY_CURRENT_BUFFER_LVALUE->yy_input_file = header_yyin; + YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = YY_BUFFER_NORMAL; + } + + /* Note that here we test for yy_c_buf_p "<=" to the position + * of the first EOB in the buffer, since yy_c_buf_p will + * already have been incremented past the NUL character + * (since all states make transitions on EOB to the + * end-of-buffer state). Contrast this with the test + * in input(). + */ + if ( (yy_c_buf_p) <= &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] ) + { /* This was really a NUL. */ + yy_state_type yy_next_state; + + (yy_c_buf_p) = (yytext_ptr) + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state( ); + + /* Okay, we're now positioned to make the NUL + * transition. We couldn't have + * yy_get_previous_state() go ahead and do it + * for us because it doesn't know how to deal + * with the possibility of jamming (and we don't + * want to build jamming into it because then it + * will run more slowly). + */ + + yy_next_state = yy_try_NUL_trans( yy_current_state ); + + yy_bp = (yytext_ptr) + YY_MORE_ADJ; + + if ( yy_next_state ) + { + /* Consume the NUL. */ + yy_cp = ++(yy_c_buf_p); + yy_current_state = yy_next_state; + goto yy_match; + } + + else + { + yy_cp = (yy_c_buf_p); + goto yy_find_action; + } + } + + else switch ( yy_get_next_buffer( ) ) + { + case EOB_ACT_END_OF_FILE: + { + (yy_did_buffer_switch_on_eof) = 0; + + if ( header_yywrap( ) ) + { + /* Note: because we've taken care in + * yy_get_next_buffer() to have set up + * header_yytext, we can now set up + * yy_c_buf_p so that if some total + * hoser (like flex itself) wants to + * call the scanner after we return the + * YY_NULL, it'll still work - another + * YY_NULL will get returned. + */ + (yy_c_buf_p) = (yytext_ptr) + YY_MORE_ADJ; + + yy_act = YY_STATE_EOF(YY_START); + goto do_action; + } + + else + { + if ( ! (yy_did_buffer_switch_on_eof) ) + YY_NEW_FILE; + } + break; + } + + case EOB_ACT_CONTINUE_SCAN: + (yy_c_buf_p) = + (yytext_ptr) + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state( ); + + yy_cp = (yy_c_buf_p); + yy_bp = (yytext_ptr) + YY_MORE_ADJ; + goto yy_match; + + case EOB_ACT_LAST_MATCH: + (yy_c_buf_p) = + &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)]; + + yy_current_state = yy_get_previous_state( ); + + yy_cp = (yy_c_buf_p); + yy_bp = (yytext_ptr) + YY_MORE_ADJ; + goto yy_find_action; + } + break; + } + + default: + YY_FATAL_ERROR( + "fatal flex scanner internal error--no action found" ); + } /* end of action switch */ + } /* end of scanning one token */ +} /* end of header_yylex */ + +/* yy_get_next_buffer - try to read in a new buffer + * + * Returns a code representing an action: + * EOB_ACT_LAST_MATCH - + * EOB_ACT_CONTINUE_SCAN - continue scanning from current position + * EOB_ACT_END_OF_FILE - end of file + */ +static int yy_get_next_buffer (void) +{ + register char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf; + register char *source = (yytext_ptr); + register int number_to_move, i; + int ret_val; + + if ( (yy_c_buf_p) > &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] ) + YY_FATAL_ERROR( + "fatal flex scanner internal error--end of buffer missed" ); + + if ( YY_CURRENT_BUFFER_LVALUE->yy_fill_buffer == 0 ) + { /* Don't try to fill the buffer, so this is an EOF. */ + if ( (yy_c_buf_p) - (yytext_ptr) - YY_MORE_ADJ == 1 ) + { + /* We matched a single character, the EOB, so + * treat this as a final EOF. + */ + return EOB_ACT_END_OF_FILE; + } + + else + { + /* We matched some text prior to the EOB, first + * process it. + */ + return EOB_ACT_LAST_MATCH; + } + } + + /* Try to read more data. */ + + /* First move last chars to start of buffer. */ + number_to_move = (int) ((yy_c_buf_p) - (yytext_ptr)) - 1; + + for ( i = 0; i < number_to_move; ++i ) + *(dest++) = *(source++); + + if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_EOF_PENDING ) + /* don't do the read, it's not guaranteed to return an EOF, + * just force an EOF + */ + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars) = 0; + + else + { + yy_size_t num_to_read = + YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1; + + while ( num_to_read <= 0 ) + { /* Not enough room in the buffer - grow it. */ + + /* just a shorter name for the current buffer */ + YY_BUFFER_STATE b = YY_CURRENT_BUFFER_LVALUE; + + int yy_c_buf_p_offset = + (int) ((yy_c_buf_p) - b->yy_ch_buf); + + if ( b->yy_is_our_buffer ) + { + yy_size_t new_size = b->yy_buf_size * 2; + + if ( new_size <= 0 ) + b->yy_buf_size += b->yy_buf_size / 8; + else + b->yy_buf_size *= 2; + + b->yy_ch_buf = (char *) + /* Include room in for 2 EOB chars. */ + header_yyrealloc((void *) b->yy_ch_buf,b->yy_buf_size + 2 ); + } + else + /* Can't grow it, we don't own it. */ + b->yy_ch_buf = 0; + + if ( ! b->yy_ch_buf ) + YY_FATAL_ERROR( + "fatal error - scanner input buffer overflow" ); + + (yy_c_buf_p) = &b->yy_ch_buf[yy_c_buf_p_offset]; + + num_to_read = YY_CURRENT_BUFFER_LVALUE->yy_buf_size - + number_to_move - 1; + + } + + if ( num_to_read > YY_READ_BUF_SIZE ) + num_to_read = YY_READ_BUF_SIZE; + + /* Read in more data. */ + YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]), + (yy_n_chars), num_to_read ); + + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars); + } + + if ( (yy_n_chars) == 0 ) + { + if ( number_to_move == YY_MORE_ADJ ) + { + ret_val = EOB_ACT_END_OF_FILE; + header_yyrestart(header_yyin ); + } + + else + { + ret_val = EOB_ACT_LAST_MATCH; + YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = + YY_BUFFER_EOF_PENDING; + } + } + + else + ret_val = EOB_ACT_CONTINUE_SCAN; + + if ((yy_size_t) ((yy_n_chars) + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size) { + /* Extend the array by 50%, plus the number we really need. */ + yy_size_t new_size = (yy_n_chars) + number_to_move + ((yy_n_chars) >> 1); + YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char *) header_yyrealloc((void *) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf,new_size ); + if ( ! YY_CURRENT_BUFFER_LVALUE->yy_ch_buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_get_next_buffer()" ); + } + + (yy_n_chars) += number_to_move; + YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] = YY_END_OF_BUFFER_CHAR; + YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] = YY_END_OF_BUFFER_CHAR; + + (yytext_ptr) = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[0]; + + return ret_val; +} + +/* yy_get_previous_state - get the state just before the EOB char was reached */ + + static yy_state_type yy_get_previous_state (void) +{ + register yy_state_type yy_current_state; + register char *yy_cp; + + yy_current_state = (yy_start); + + for ( yy_cp = (yytext_ptr) + YY_MORE_ADJ; yy_cp < (yy_c_buf_p); ++yy_cp ) + { + register YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1); + if ( yy_accept[yy_current_state] ) + { + (yy_last_accepting_state) = yy_current_state; + (yy_last_accepting_cpos) = yy_cp; + } + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 29 ) + yy_c = yy_meta[(unsigned int) yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; + } + + return yy_current_state; +} + +/* yy_try_NUL_trans - try to make a transition on the NUL character + * + * synopsis + * next_state = yy_try_NUL_trans( current_state ); + */ + static yy_state_type yy_try_NUL_trans (yy_state_type yy_current_state ) +{ + register int yy_is_jam; + register char *yy_cp = (yy_c_buf_p); + + register YY_CHAR yy_c = 1; + if ( yy_accept[yy_current_state] ) + { + (yy_last_accepting_state) = yy_current_state; + (yy_last_accepting_cpos) = yy_cp; + } + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 29 ) + yy_c = yy_meta[(unsigned int) yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; + yy_is_jam = (yy_current_state == 28); + + return yy_is_jam ? 0 : yy_current_state; +} + + static void yyunput (int c, register char * yy_bp ) +{ + register char *yy_cp; + + yy_cp = (yy_c_buf_p); + + /* undo effects of setting up header_yytext */ + *yy_cp = (yy_hold_char); + + if ( yy_cp < YY_CURRENT_BUFFER_LVALUE->yy_ch_buf + 2 ) + { /* need to shift things up to make room */ + /* +2 for EOB chars. */ + register yy_size_t number_to_move = (yy_n_chars) + 2; + register char *dest = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[ + YY_CURRENT_BUFFER_LVALUE->yy_buf_size + 2]; + register char *source = + &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]; + + while ( source > YY_CURRENT_BUFFER_LVALUE->yy_ch_buf ) + *--dest = *--source; + + yy_cp += (int) (dest - source); + yy_bp += (int) (dest - source); + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = + (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_buf_size; + + if ( yy_cp < YY_CURRENT_BUFFER_LVALUE->yy_ch_buf + 2 ) + YY_FATAL_ERROR( "flex scanner push-back overflow" ); + } + + *--yy_cp = (char) c; + + (yytext_ptr) = yy_bp; + (yy_hold_char) = *yy_cp; + (yy_c_buf_p) = yy_cp; +} + +#ifndef YY_NO_INPUT +#ifdef __cplusplus + static int yyinput (void) +#else + static int input (void) +#endif + +{ + int c; + + *(yy_c_buf_p) = (yy_hold_char); + + if ( *(yy_c_buf_p) == YY_END_OF_BUFFER_CHAR ) + { + /* yy_c_buf_p now points to the character we want to return. + * If this occurs *before* the EOB characters, then it's a + * valid NUL; if not, then we've hit the end of the buffer. + */ + if ( (yy_c_buf_p) < &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] ) + /* This was really a NUL. */ + *(yy_c_buf_p) = '\0'; + + else + { /* need more input */ + yy_size_t offset = (yy_c_buf_p) - (yytext_ptr); + ++(yy_c_buf_p); + + switch ( yy_get_next_buffer( ) ) + { + case EOB_ACT_LAST_MATCH: + /* This happens because yy_g_n_b() + * sees that we've accumulated a + * token and flags that we need to + * try matching the token before + * proceeding. But for input(), + * there's no matching to consider. + * So convert the EOB_ACT_LAST_MATCH + * to EOB_ACT_END_OF_FILE. + */ + + /* Reset buffer status. */ + header_yyrestart(header_yyin ); + + /*FALLTHROUGH*/ + + case EOB_ACT_END_OF_FILE: + { + if ( header_yywrap( ) ) + return EOF; + + if ( ! (yy_did_buffer_switch_on_eof) ) + YY_NEW_FILE; +#ifdef __cplusplus + return yyinput(); +#else + return input(); +#endif + } + + case EOB_ACT_CONTINUE_SCAN: + (yy_c_buf_p) = (yytext_ptr) + offset; + break; + } + } + } + + c = *(unsigned char *) (yy_c_buf_p); /* cast for 8-bit char's */ + *(yy_c_buf_p) = '\0'; /* preserve header_yytext */ + (yy_hold_char) = *++(yy_c_buf_p); + + return c; +} +#endif /* ifndef YY_NO_INPUT */ + +/** Immediately switch to a different input stream. + * @param input_file A readable stream. + * + * @note This function does not reset the start condition to @c INITIAL . + */ + void header_yyrestart (FILE * input_file ) +{ + + if ( ! YY_CURRENT_BUFFER ){ + header_yyensure_buffer_stack (); + YY_CURRENT_BUFFER_LVALUE = + header_yy_create_buffer(header_yyin,YY_BUF_SIZE ); + } + + header_yy_init_buffer(YY_CURRENT_BUFFER,input_file ); + header_yy_load_buffer_state( ); +} + +/** Switch to a different input buffer. + * @param new_buffer The new input buffer. + * + */ + void header_yy_switch_to_buffer (YY_BUFFER_STATE new_buffer ) +{ + + /* TODO. We should be able to replace this entire function body + * with + * header_yypop_buffer_state(); + * header_yypush_buffer_state(new_buffer); + */ + header_yyensure_buffer_stack (); + if ( YY_CURRENT_BUFFER == new_buffer ) + return; + + if ( YY_CURRENT_BUFFER ) + { + /* Flush out information for old buffer. */ + *(yy_c_buf_p) = (yy_hold_char); + YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p); + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars); + } + + YY_CURRENT_BUFFER_LVALUE = new_buffer; + header_yy_load_buffer_state( ); + + /* We don't actually know whether we did this switch during + * EOF (header_yywrap()) processing, but the only time this flag + * is looked at is after header_yywrap() is called, so it's safe + * to go ahead and always set it. + */ + (yy_did_buffer_switch_on_eof) = 1; +} + +static void header_yy_load_buffer_state (void) +{ + (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars; + (yytext_ptr) = (yy_c_buf_p) = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos; + header_yyin = YY_CURRENT_BUFFER_LVALUE->yy_input_file; + (yy_hold_char) = *(yy_c_buf_p); +} + +/** Allocate and initialize an input buffer state. + * @param file A readable stream. + * @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE. + * + * @return the allocated buffer state. + */ + YY_BUFFER_STATE header_yy_create_buffer (FILE * file, int size ) +{ + YY_BUFFER_STATE b; + + b = (YY_BUFFER_STATE) header_yyalloc(sizeof( struct yy_buffer_state ) ); + if ( ! b ) + YY_FATAL_ERROR( "out of dynamic memory in header_yy_create_buffer()" ); + + b->yy_buf_size = size; + + /* yy_ch_buf has to be 2 characters longer than the size given because + * we need to put in 2 end-of-buffer characters. + */ + b->yy_ch_buf = (char *) header_yyalloc(b->yy_buf_size + 2 ); + if ( ! b->yy_ch_buf ) + YY_FATAL_ERROR( "out of dynamic memory in header_yy_create_buffer()" ); + + b->yy_is_our_buffer = 1; + + header_yy_init_buffer(b,file ); + + return b; +} + +/** Destroy the buffer. + * @param b a buffer created with header_yy_create_buffer() + * + */ + void header_yy_delete_buffer (YY_BUFFER_STATE b ) +{ + + if ( ! b ) + return; + + if ( b == YY_CURRENT_BUFFER ) /* Not sure if we should pop here. */ + YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0; + + if ( b->yy_is_our_buffer ) + header_yyfree((void *) b->yy_ch_buf ); + + header_yyfree((void *) b ); +} + +/* Initializes or reinitializes a buffer. + * This function is sometimes called more than once on the same buffer, + * such as during a header_yyrestart() or at EOF. + */ + static void header_yy_init_buffer (YY_BUFFER_STATE b, FILE * file ) + +{ + int oerrno = errno; + + header_yy_flush_buffer(b ); + + b->yy_input_file = file; + b->yy_fill_buffer = 1; + + /* If b is the current buffer, then header_yy_init_buffer was _probably_ + * called from header_yyrestart() or through yy_get_next_buffer. + * In that case, we don't want to reset the lineno or column. + */ + if (b != YY_CURRENT_BUFFER){ + b->yy_bs_lineno = 1; + b->yy_bs_column = 0; + } + + b->yy_is_interactive = file ? (isatty( fileno(file) ) > 0) : 0; + + errno = oerrno; +} + +/** Discard all buffered characters. On the next scan, YY_INPUT will be called. + * @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER. + * + */ + void header_yy_flush_buffer (YY_BUFFER_STATE b ) +{ + if ( ! b ) + return; + + b->yy_n_chars = 0; + + /* We always need two end-of-buffer characters. The first causes + * a transition to the end-of-buffer state. The second causes + * a jam in that state. + */ + b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR; + b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR; + + b->yy_buf_pos = &b->yy_ch_buf[0]; + + b->yy_at_bol = 1; + b->yy_buffer_status = YY_BUFFER_NEW; + + if ( b == YY_CURRENT_BUFFER ) + header_yy_load_buffer_state( ); +} + +/** Pushes the new state onto the stack. The new state becomes + * the current state. This function will allocate the stack + * if necessary. + * @param new_buffer The new state. + * + */ +void header_yypush_buffer_state (YY_BUFFER_STATE new_buffer ) +{ + if (new_buffer == NULL) + return; + + header_yyensure_buffer_stack(); + + /* This block is copied from header_yy_switch_to_buffer. */ + if ( YY_CURRENT_BUFFER ) + { + /* Flush out information for old buffer. */ + *(yy_c_buf_p) = (yy_hold_char); + YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p); + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars); + } + + /* Only push if top exists. Otherwise, replace top. */ + if (YY_CURRENT_BUFFER) + (yy_buffer_stack_top)++; + YY_CURRENT_BUFFER_LVALUE = new_buffer; + + /* copied from header_yy_switch_to_buffer. */ + header_yy_load_buffer_state( ); + (yy_did_buffer_switch_on_eof) = 1; +} + +/** Removes and deletes the top of the stack, if present. + * The next element becomes the new top. + * + */ +void header_yypop_buffer_state (void) +{ + if (!YY_CURRENT_BUFFER) + return; + + header_yy_delete_buffer(YY_CURRENT_BUFFER ); + YY_CURRENT_BUFFER_LVALUE = NULL; + if ((yy_buffer_stack_top) > 0) + --(yy_buffer_stack_top); + + if (YY_CURRENT_BUFFER) { + header_yy_load_buffer_state( ); + (yy_did_buffer_switch_on_eof) = 1; + } +} + +/* Allocates the stack if it does not exist. + * Guarantees space for at least one push. + */ +static void header_yyensure_buffer_stack (void) +{ + yy_size_t num_to_alloc; + + if (!(yy_buffer_stack)) { + + /* First allocation is just for 2 elements, since we don't know if this + * scanner will even need a stack. We use 2 instead of 1 to avoid an + * immediate realloc on the next call. + */ + num_to_alloc = 1; + (yy_buffer_stack) = (struct yy_buffer_state**)header_yyalloc + (num_to_alloc * sizeof(struct yy_buffer_state*) + ); + if ( ! (yy_buffer_stack) ) + YY_FATAL_ERROR( "out of dynamic memory in header_yyensure_buffer_stack()" ); + + memset((yy_buffer_stack), 0, num_to_alloc * sizeof(struct yy_buffer_state*)); + + (yy_buffer_stack_max) = num_to_alloc; + (yy_buffer_stack_top) = 0; + return; + } + + if ((yy_buffer_stack_top) >= ((yy_buffer_stack_max)) - 1){ + + /* Increase the buffer to prepare for a possible push. */ + int grow_size = 8 /* arbitrary grow size */; + + num_to_alloc = (yy_buffer_stack_max) + grow_size; + (yy_buffer_stack) = (struct yy_buffer_state**)header_yyrealloc + ((yy_buffer_stack), + num_to_alloc * sizeof(struct yy_buffer_state*) + ); + if ( ! (yy_buffer_stack) ) + YY_FATAL_ERROR( "out of dynamic memory in header_yyensure_buffer_stack()" ); + + /* zero only the new slots.*/ + memset((yy_buffer_stack) + (yy_buffer_stack_max), 0, grow_size * sizeof(struct yy_buffer_state*)); + (yy_buffer_stack_max) = num_to_alloc; + } +} + +/** Setup the input buffer state to scan directly from a user-specified character buffer. + * @param base the character buffer + * @param size the size in bytes of the character buffer + * + * @return the newly allocated buffer state object. + */ +YY_BUFFER_STATE header_yy_scan_buffer (char * base, yy_size_t size ) +{ + YY_BUFFER_STATE b; + + if ( size < 2 || + base[size-2] != YY_END_OF_BUFFER_CHAR || + base[size-1] != YY_END_OF_BUFFER_CHAR ) + /* They forgot to leave room for the EOB's. */ + return 0; + + b = (YY_BUFFER_STATE) header_yyalloc(sizeof( struct yy_buffer_state ) ); + if ( ! b ) + YY_FATAL_ERROR( "out of dynamic memory in header_yy_scan_buffer()" ); + + b->yy_buf_size = size - 2; /* "- 2" to take care of EOB's */ + b->yy_buf_pos = b->yy_ch_buf = base; + b->yy_is_our_buffer = 0; + b->yy_input_file = 0; + b->yy_n_chars = b->yy_buf_size; + b->yy_is_interactive = 0; + b->yy_at_bol = 1; + b->yy_fill_buffer = 0; + b->yy_buffer_status = YY_BUFFER_NEW; + + header_yy_switch_to_buffer(b ); + + return b; +} + +/** Setup the input buffer state to scan a string. The next call to header_yylex() will + * scan from a @e copy of @a str. + * @param yystr a NUL-terminated string to scan + * + * @return the newly allocated buffer state object. + * @note If you want to scan bytes that may contain NUL values, then use + * header_yy_scan_bytes() instead. + */ +YY_BUFFER_STATE header_yy_scan_string (yyconst char * yystr ) +{ + + return header_yy_scan_bytes(yystr,strlen(yystr) ); +} + +/** Setup the input buffer state to scan the given bytes. The next call to header_yylex() will + * scan from a @e copy of @a bytes. + * @param yybytes the byte buffer to scan + * @param _yybytes_len the number of bytes in the buffer pointed to by @a bytes. + * + * @return the newly allocated buffer state object. + */ +YY_BUFFER_STATE header_yy_scan_bytes (yyconst char * yybytes, yy_size_t _yybytes_len ) +{ + YY_BUFFER_STATE b; + char *buf; + yy_size_t n; + int i; + + /* Get memory for full buffer, including space for trailing EOB's. */ + n = _yybytes_len + 2; + buf = (char *) header_yyalloc(n ); + if ( ! buf ) + YY_FATAL_ERROR( "out of dynamic memory in header_yy_scan_bytes()" ); + + for ( i = 0; i < _yybytes_len; ++i ) + buf[i] = yybytes[i]; + + buf[_yybytes_len] = buf[_yybytes_len+1] = YY_END_OF_BUFFER_CHAR; + + b = header_yy_scan_buffer(buf,n ); + if ( ! b ) + YY_FATAL_ERROR( "bad buffer in header_yy_scan_bytes()" ); + + /* It's okay to grow etc. this buffer, and we should throw it + * away when we're done. + */ + b->yy_is_our_buffer = 1; + + return b; +} + +#ifndef YY_EXIT_FAILURE +#define YY_EXIT_FAILURE 2 +#endif + +static void yy_fatal_error (yyconst char* msg ) +{ + (void) fprintf( stderr, "%s\n", msg ); + exit( YY_EXIT_FAILURE ); +} + +/* Redefine yyless() so it works in section 3 code. */ + +#undef yyless +#define yyless(n) \ + do \ + { \ + /* Undo effects of setting up header_yytext. */ \ + int yyless_macro_arg = (n); \ + YY_LESS_LINENO(yyless_macro_arg);\ + header_yytext[header_yyleng] = (yy_hold_char); \ + (yy_c_buf_p) = header_yytext + yyless_macro_arg; \ + (yy_hold_char) = *(yy_c_buf_p); \ + *(yy_c_buf_p) = '\0'; \ + header_yyleng = yyless_macro_arg; \ + } \ + while ( 0 ) + +/* Accessor methods (get/set functions) to struct members. */ + +/** Get the current line number. + * + */ +int header_yyget_lineno (void) +{ + + return header_yylineno; +} + +/** Get the input stream. + * + */ +FILE *header_yyget_in (void) +{ + return header_yyin; +} + +/** Get the output stream. + * + */ +FILE *header_yyget_out (void) +{ + return header_yyout; +} + +/** Get the length of the current token. + * + */ +yy_size_t header_yyget_leng (void) +{ + return header_yyleng; +} + +/** Get the current token. + * + */ + +char *header_yyget_text (void) +{ + return header_yytext; +} + +/** Set the current line number. + * @param line_number + * + */ +void header_yyset_lineno (int line_number ) +{ + + header_yylineno = line_number; +} + +/** Set the input stream. This does not discard the current + * input buffer. + * @param in_str A readable stream. + * + * @see header_yy_switch_to_buffer + */ +void header_yyset_in (FILE * in_str ) +{ + header_yyin = in_str ; +} + +void header_yyset_out (FILE * out_str ) +{ + header_yyout = out_str ; +} + +int header_yyget_debug (void) +{ + return header_yy_flex_debug; +} + +void header_yyset_debug (int bdebug ) +{ + header_yy_flex_debug = bdebug ; +} + +static int yy_init_globals (void) +{ + /* Initialization is the same as for the non-reentrant scanner. + * This function is called from header_yylex_destroy(), so don't allocate here. + */ + + (yy_buffer_stack) = 0; + (yy_buffer_stack_top) = 0; + (yy_buffer_stack_max) = 0; + (yy_c_buf_p) = (char *) 0; + (yy_init) = 0; + (yy_start) = 0; + +/* Defined in main.c */ +#ifdef YY_STDINIT + header_yyin = stdin; + header_yyout = stdout; +#else + header_yyin = (FILE *) 0; + header_yyout = (FILE *) 0; +#endif + + /* For future reference: Set errno on error, since we are called by + * header_yylex_init() + */ + return 0; +} + +/* header_yylex_destroy is for both reentrant and non-reentrant scanners. */ +int header_yylex_destroy (void) +{ + + /* Pop the buffer stack, destroying each element. */ + while(YY_CURRENT_BUFFER){ + header_yy_delete_buffer(YY_CURRENT_BUFFER ); + YY_CURRENT_BUFFER_LVALUE = NULL; + header_yypop_buffer_state(); + } + + /* Destroy the stack itself. */ + header_yyfree((yy_buffer_stack) ); + (yy_buffer_stack) = NULL; + + /* Reset the globals. This is important in a non-reentrant scanner so the next time + * header_yylex() is called, initialization will occur. */ + yy_init_globals( ); + + return 0; +} + +/* + * Internal utility routines. + */ + +#ifndef yytext_ptr +static void yy_flex_strncpy (char* s1, yyconst char * s2, int n ) +{ + register int i; + for ( i = 0; i < n; ++i ) + s1[i] = s2[i]; +} +#endif + +#ifdef YY_NEED_STRLEN +static int yy_flex_strlen (yyconst char * s ) +{ + register int n; + for ( n = 0; s[n]; ++n ) + ; + + return n; +} +#endif + +void *header_yyalloc (yy_size_t size ) +{ + return (void *) malloc( size ); +} + +void *header_yyrealloc (void * ptr, yy_size_t size ) +{ + /* The cast to (char *) in the following accommodates both + * implementations that use char* generic pointers, and those + * that use void* generic pointers. It works with the latter + * because both ANSI C and C++ allow castless assignment from + * any pointer type to void*, and deal with argument conversions + * as though doing an assignment. + */ + return (void *) realloc( (char *) ptr, size ); +} + +void header_yyfree (void * ptr ) +{ + free( (char *) ptr ); /* see header_yyrealloc() for (char *) cast */ +} + +#define YYTABLES_NAME "yytables" + +#line 136 "fasta_header_parser.l" + + + +int header_yywrap() +{ + return 1; +} + +element_from_header* header_parser_main(char *h) +{ + int nbfields,memory_allocated; + element_from_header* header; + char* nbfields_n; + char* nbfields_v; + + nbfields_n = (char*) malloc(9*sizeof(char)); + nbfields_v = (char*) malloc(5*sizeof(char)); + + memory_allocated=MEMALLOCATED; + + nbfields=1; + + strcpy(nbfields_n, "nbfields"); + strcpy(nbfields_v, "1"); + + header = (element_from_header*) malloc(memory_allocated * sizeof(element_from_header)); + + header[0].name = nbfields_n; + header[0].value = nbfields_v; + + YY_BUFFER_STATE state; + + state=header_yy_scan_string(h); + + header_parser(&nbfields, &memory_allocated, &header); + + header_yy_delete_buffer(state); + + return header; +} + + + + + diff --git a/libfasta/fasta_header_parser.h b/libfasta/fasta_header_parser.h new file mode 100644 index 0000000..985b460 --- /dev/null +++ b/libfasta/fasta_header_parser.h @@ -0,0 +1,13 @@ + +#ifndef FASTA_HEADER_PARSER_H_ +#define FASTA_HEADER_PARSER_H_ + +typedef struct { + char *name; + void *value; +}element_from_header; + +element_from_header* header_parser_main(char*); + + +#endif diff --git a/libfasta/fasta_header_parser.l b/libfasta/fasta_header_parser.l new file mode 100644 index 0000000..e379d4e --- /dev/null +++ b/libfasta/fasta_header_parser.l @@ -0,0 +1,178 @@ +/* + * Add -ll in Makefile if you modify this file to convert to .c + */ + +%x REGID +%x REGNAME +%x REGVAL + +%{ + +#include +#include +#include "header_mem_handler.h" +#include "fasta_header_handler.h" + +#define MEMALLOCATED 10 +#define BUFFER 5 + +#define YY_DECL int header_parser(int *nbf, int *memory_allocated, element_from_header **p_header) + + +%} + +WORD [[:alnum:]:\-.{},'_()\#\[\]\|\&\"\'\/\%\+]+ +WORDID [[:alnum:]:\-.{},'_()\#\[\]\|\&\"\'\/\%\+=;]+ +SUP > +EOL \n +SEP ; +SPACE [[:blank:]]+ +EQUAL = + +%% + + int i; + int size_needed; + int free_size; + char* field; + + +{SUP} { + /*printf("\n{SUP},%s",yytext);*/ + BEGIN(REGID); + } + +{WORDID} { + i=0; + + field = malloc_field(&free_size); + (*p_header)[*nbf].name = (char*) malloc(3*sizeof(char)); + strcpy(((*p_header)[*nbf]).name,"id"); + + size_needed = strlen(yytext)+1; + (*p_header)[*nbf].value = (char*) malloc(sizeof(char)*size_needed); + strcpy(((*p_header)[*nbf]).value,yytext); + + (*nbf)++; + } + + +{SPACE} { + BEGIN(REGNAME); + } + +{WORD} { + /*fprintf(stderr,"\n{WORD} **%s**",yytext);*/ + field = store_in_field(field,yytext,&free_size,&i); + } + +{SPACE} { + /*fprintf(stderr,"\n{SPACE} **%s**",yytext);*/ + if (i != 0) + field = store_in_field(field,yytext,&free_size,&i); + } + +{EQUAL} { + /*fprintf(stderr,"\n{EQUAL},%s",yytext);*/ + field = store_in_header_table(field, &((*p_header)[*nbf].name), &free_size, &i); + BEGIN(REGVAL); + } + +{SEP} { + /*fprintf(stderr,"\n{SEP},%s",yytext);*/ + (*p_header)[*nbf].name = (char*) malloc(19*sizeof(char)); + strcpy((*p_header)[*nbf].name,"definition"); + field = store_in_header_table(field, &((*p_header)[*nbf].value), &free_size, &i); + p_header = check_and_realloc_mem_in_header_table(p_header, nbf, memory_allocated); + BEGIN(REGNAME); + } + +{WORD} { + /*fprintf(stderr,"\n{WORD} **%s**\n",yytext);*/ + field = store_in_field(field,yytext,&free_size,&i); + } + +{SPACE} { + /*fprintf(stderr,"\n{SPACE} **%s**\n",yytext);*/ + field = store_in_field(field,yytext,&free_size,&i); + } + +{SEP} { + /*fprintf(stderr,"\n{SEP},%s\n",yytext);*/ + + field = store_in_header_table(field, &((*p_header)[*nbf].value), &free_size, &i); + p_header = check_and_realloc_mem_in_header_table(p_header, nbf, memory_allocated); + BEGIN(REGNAME); + } + + +{EQUAL} { + /*fprintf(stderr, "\nWarning : separator ';' probably missing in header after %s",(*p_header)[*nbf].name);*/ + } + +<> { + field = store_in_header_table(field, &((*p_header)[*nbf].value), &free_size, &i); + p_header = check_and_realloc_mem_in_header_table(p_header, nbf, memory_allocated); + end_header_table(p_header, *nbf); + + free(field); + BEGIN(INITIAL); + return 0; + } + +<> { + /*(*p_header)[*nbf].name = (char*) malloc(sizeof(char)*19); + strcpy((*p_header)[*nbf].name,"other_informations"); + field = store_in_header_table(field, &((*p_header)[*nbf].value), &free_size, &i); + p_header = check_and_realloc_mem_in_header_table(p_header, nbf, memory_allocated); + */ + end_header_table(p_header, *nbf); + + free(field); + BEGIN(INITIAL); + return 0; + } + +%% + +int header_yywrap() +{ + return 1; +} + +element_from_header* header_parser_main(char *h) +{ + int nbfields,memory_allocated; + element_from_header* header; + char* nbfields_n; + char* nbfields_v; + + nbfields_n = (char*) malloc(9*sizeof(char)); + nbfields_v = (char*) malloc(5*sizeof(char)); + + memory_allocated=MEMALLOCATED; + + nbfields=1; + + strcpy(nbfields_n, "nbfields"); + strcpy(nbfields_v, "1"); + + header = (element_from_header*) malloc(memory_allocated * sizeof(element_from_header)); + + header[0].name = nbfields_n; + header[0].value = nbfields_v; + + YY_BUFFER_STATE state; + + state=yy_scan_string(h); + + header_parser(&nbfields, &memory_allocated, &header); + + yy_delete_buffer(state); + + return header; +} + + + + diff --git a/libfasta/fasta_seq_writer.c b/libfasta/fasta_seq_writer.c new file mode 100644 index 0000000..ccee773 --- /dev/null +++ b/libfasta/fasta_seq_writer.c @@ -0,0 +1,76 @@ +#include +#include +#include +#include "sequence.h" +#include "fasta_header_parser.h" + + +void printOnlySeqFromFastaSeqPtr(fastaSeqPtr seq, FILE* output) +{ + char nuc; + int n=60; + int l = strlen(seq->sequence); + for (n=60; nsequence[n]; + seq->sequence[n]=0; + fprintf(output,"%s\n",seq->sequence+n-60); + seq->sequence[n]=nuc; + } + fprintf(output,"%s\n",seq->sequence+n-60); +} + + +void printOnlySeqFromChar(char* seq, FILE* output) +{ + char nuc; + int n=60; + int l = strlen(seq); + for (n=60; n%s\n",seq->rawheader); +} + + +void printOnlyHeaderFromTable(element_from_header* header, FILE* output) +{ + int i; + int nbf; + + nbf = atoi(header[0].value); + + fprintf(output,">%s ",header[1].value); + + for (i = 2; i <= nbf; i++) + { + if (strcmp(header[i].name, "definition") != 0) + { + fprintf(output,"%s",header[i].name); + fprintf(output,"="); + fprintf(output,"%s; ",header[i].value); + } + } + + if (strcmp(header[nbf].name, "definition") == 0) + fprintf(output,"%s; ",header[nbf].value); + + fprintf(output,"\n"); +} + + +void printHeaderAndSeqFromFastaSeqPtr(fastaSeqPtr seq, FILE* output) +{ + printOnlyHeaderFromFastaSeqPtr(seq, output); + printOnlySeqFromFastaSeqPtr(seq, output); +} diff --git a/libfasta/fasta_seq_writer.h b/libfasta/fasta_seq_writer.h new file mode 100644 index 0000000..39f8212 --- /dev/null +++ b/libfasta/fasta_seq_writer.h @@ -0,0 +1,19 @@ + +#ifndef FASTA_SEQ_WRITER_H_ +#define FASTA_SEQ_WRITER_H_ + +#include "sequence.h" + + +void printOnlySeqFromFastaSeqPtr(fastaSeqPtr, FILE*); + +void printOnlySeqFromChar(char*, FILE*); + +void printOnlyHeaderFromFastaSeqPtr(fastaSeqPtr, FILE*); + +void printOnlyHeaderFromTable(element_from_header*, FILE*); + +void printHeaderAndSeqFromFastaSeqPtr(fastaSeqPtr, FILE*); + + +#endif diff --git a/libfasta/header_mem_handler.c b/libfasta/header_mem_handler.c new file mode 100644 index 0000000..a0b8e7c --- /dev/null +++ b/libfasta/header_mem_handler.c @@ -0,0 +1,93 @@ +#include +#include +#include "header_mem_handler.h" +#include + +#define FIELD_BUFFER 1024 + + +char* malloc_field(int *free_size) +{ + char* field = (char*) malloc(sizeof(char) * FIELD_BUFFER); + field[0] = 0; + (*free_size) = FIELD_BUFFER; + return field; +} + +int check_mem_field(int size_needed) +{ + int number_of_chunks_to_alloc; + number_of_chunks_to_alloc = size_needed / FIELD_BUFFER + 1; + return number_of_chunks_to_alloc; +} + +char* realloc_field(int number_of_chunks_to_alloc, char* field) +{ + int size_needed; + size_needed = number_of_chunks_to_alloc * FIELD_BUFFER; + field = realloc(field, (size_needed)*sizeof(char)); + return field; +} + +char* check_and_realloc_field(char* field, int size_needed, int* free_size) +{ + size_needed = size_needed + strlen(field); + int number_of_chunks_to_alloc = check_mem_field(size_needed); + if (strlen(field)>0) + field = realloc_field(number_of_chunks_to_alloc, field); + else + { + free(field); + field = malloc(number_of_chunks_to_alloc * FIELD_BUFFER); + } + (*free_size) = number_of_chunks_to_alloc*FIELD_BUFFER - size_needed + 1; + return field; +} + + +char* store_in_field(char* field, char* yytext, int* free_size, int* i) +{ + int size_needed; + size_needed = strlen(yytext)+1; + if (size_needed > (*free_size)) + field = check_and_realloc_field(field, size_needed, free_size); + else + (*free_size) = (*free_size) - size_needed + 1; + strcpy(&(field[(*i)]),yytext); + (*i) = (*i)+size_needed-1; + return field; +} + + +char* store_in_header_table(char* field, char** storing_place, int* free_size, int* i) +{ + int size_needed; + size_needed = strlen(field)+1; + *storing_place = (char*) malloc(size_needed*sizeof(char)); + strcpy(*storing_place,field); + (*i)=0; + free(field); + field = malloc_field(free_size); + return field; +} + + +element_from_header** check_and_realloc_mem_in_header_table(element_from_header** p_header, int* nbf, int* memory_allocated) +{ + (*nbf)++; + + if (*nbf == *memory_allocated) + { + (*memory_allocated)++; + *p_header = (element_from_header*) realloc(*p_header, (*memory_allocated) * sizeof(element_from_header)); + } + + return p_header; +} + +void end_header_table(element_from_header** p_header, int nbf) +{ + nbf = nbf - 1; + //fprintf(stderr, "nbf = %d", nbf); + sprintf((*p_header)->value, "%d", nbf); +} diff --git a/libfasta/header_mem_handler.h b/libfasta/header_mem_handler.h new file mode 100644 index 0000000..bfb591b --- /dev/null +++ b/libfasta/header_mem_handler.h @@ -0,0 +1,22 @@ +#ifndef HEADER_MEM_HANDLER_H_ +#define HEADER_MEM_HANDLER_H_ + +#include "fasta_header_parser.h" + +char* malloc_field(int*); + +int check_mem_field(int); + +char* realloc_field(int, char*); + +char* check_and_realloc_field(char*, int, int*); + +char* store_in_field(char*, char*, int*, int*); + +char* store_in_header_table(char*, char**, int*, int*); + +element_from_header** check_and_realloc_mem_in_header_table(element_from_header**, int*, int*); + +void end_header_table(element_from_header** p_header, int nbf); + +#endif diff --git a/libfasta/sequence.c b/libfasta/sequence.c new file mode 100644 index 0000000..4f903d9 --- /dev/null +++ b/libfasta/sequence.c @@ -0,0 +1,450 @@ +/** + * FileName: sequence.c + * Authors: Tiayyba Riaz, Celine Mercier + * Description: C file for sequence reading and parsing + * **/ + +#include +#include +#include +#include + +#include "../libutils/utilities.h" +#include "sequence.h" +#include "../libfile/fileHandling.h" +#include "fasta_header_handler.h" +#include "fasta_header_parser.h" + + +/* + * Function Name: seq_getNext(FILE *fp, char *fieldDelim) + * Description: Gets the next sequence from file by calling another function, passes the sequence + * to other function to get the header elements and nucleotide suquence into a strcuture of + * type fastaSeq and returns a pointer to this newly populated structure. + */ + +fastaSeqPtr seq_getNext(FILE *fp, char *fieldDelim, BOOL isStandardSeq, BOOL onlyATGC) +{ + char *seq; + char *header; + char *strTemp; + fastaSeqPtr seqElem; + int seqLen; + + seq = seq_readNextFromFilebyLine(fp); + if (seq == NULL) return NULL; + + /* Find header separator \n, if not found return NULL */ + strTemp = strchr(seq, '\n'); + if(strTemp == NULL) + return NULL; + + seqLen = strlen(strTemp); + header = (char*) util_malloc(1+(strlen(seq) - seqLen)*sizeof(char), __FILE__, __LINE__); + + /* Separate header in header variable */ + strncpy(header, seq, strTemp - seq); + header[strTemp - seq] = '\0'; + /* Get memory for new sequence structure element */ + seqElem = (fastaSeqPtr) util_malloc(sizeof(fastaSeq), __FILE__, __LINE__); + /* Parse header and assign values to structure fields */ + seq_fillHeader(header, fieldDelim, seqElem); + /* Get clean sequence and assign to structure field */ + if (isStandardSeq) + if (onlyATGC) + seq_fillSeqOnlyATGC(strTemp, seqElem, seqLen); + else + seq_fillSeq(strTemp, seqElem, seqLen); + else + seq_fillDigitSeq(strTemp, seqElem, seqLen); + /* Type cast the char * seq to void pointer and deallocate the memory pointed by this */ + util_free((void *)seq); + /* Return new sequence structure element */ + return seqElem; +} + + +char *seq_readNextFromFilebyLine(FILE* fp) +{ + char newc = '\0'; + BOOL seqCompleted = FALSE; + int length = 500; + int32_t len; + char tempstr[length]; + char* buffer; + + if (feof(fp)) return NULL; + newc = file_nextChar(fp); + if (newc != '>') ungetc(newc, fp); + + buffer = util_malloc(1*sizeof(char), __FILE__, __LINE__); + buffer[0] = '\0'; + + while(!seqCompleted) + { + newc = file_nextChar(fp); + if(newc == '>' || newc == '\0') + { + seqCompleted = TRUE; + if (newc == '>') + ungetc(newc, fp); // Make sure next time we start from sequence delimiter > + } + else + { + ungetc(newc, fp); + if(file_nextLine( fp, tempstr, length) != NULL) + { + len = strlen(tempstr) + strlen(buffer) + 1; + buffer = util_realloc(buffer, len, __FILE__, __LINE__); + strcat(buffer, tempstr); + } + else + { + seqCompleted = TRUE; + } + } + } + return buffer; +} + + +/* + * Function Name: seq_fillHeader(char* header, char *fieldDelim, fastaSeqPtr seqElem) + */ +void seq_fillHeader(char* header, char *fieldDelim, fastaSeqPtr seqElem) +{ + char* IdEnd; + int IdSize; + + seqElem->rawheader = strdup(header); + + IdEnd = strchr(header, ' '); + if (IdEnd == NULL) + IdSize = strlen(header); + else + IdSize = strlen(header) - strlen(IdEnd); + + seqElem->accession_id = (char*) util_malloc(1+IdSize*sizeof(char), __FILE__, __LINE__); + + strncpy(seqElem->accession_id, header, IdSize); + + (seqElem->accession_id)[IdSize] = '\0'; +} + + +/* + * Function Name: seq_fillSeq(char *seq, fastaSeqPtr seqElem) + * Description: Parses the whole sequences for actual nucleotide sequences and stores that + * sequence in the field of structure 'seqElem' . + */ +void seq_fillSeq(char *seq, fastaSeqPtr seqElem, int seqLen) +{ + char* seqTemp; + char c; + int32_t index = 0, seqIndex = 0, len = strlen(seq); + char* seqAlphabets = "acgtACGT-nN"; + + seqTemp = (char*) util_malloc(seqLen*sizeof(char), __FILE__, __LINE__); + + while (index < len) + { + c = seq[index++]; + if (strchr(seqAlphabets, c) != NULL) + seqTemp[seqIndex++] = tolower(c); + } + seqTemp[seqIndex] = '\0'; + seqElem->length=seqIndex; + seqElem->sequence = strdup(seqTemp); +} + + +void seq_fillSeqOnlyATGC(char *seq, fastaSeqPtr seqElem, int seqLen) +{ + char* seqTemp; + char c; + int32_t index = 0, seqIndex = 0, len = strlen(seq); + char* seqAlphabets = "acgtACGT"; + int notAllATGC = 0; + + seqTemp = (char*) util_malloc(seqLen*sizeof(char), __FILE__, __LINE__); + + while (index < len) + { + c = seq[index++]; + if (strchr(seqAlphabets, c) != NULL) + seqTemp[seqIndex++] = tolower(c); + else if (c != '\n') + notAllATGC = 1; + } + + if (notAllATGC) + seqTemp[0] = '\0'; + else + { + seqTemp[seqIndex] = '\0'; + seqElem->length=seqIndex; + } + seqElem->sequence = strdup(seqTemp); +} + + +void seq_fillDigitSeq(char *seq, fastaSeqPtr seqElem, int seqLen) +{ + char* seqTemp; + char c; + int32_t index = 0, seqIndex = 0, len = strlen(seq); + + seqTemp = (char*) util_malloc(seqLen*sizeof(char), __FILE__, __LINE__); + + while (index < len) + { + c = seq[index++]; + if ((c >= '0' && c <= '9') || c == ' ') + seqTemp[seqIndex++] = c; + /*else + { + printf("Error in input file"); + exit(0); + }*/ + } + seqTemp[seqIndex] = '\0'; + seqElem->sequence = strdup(seqTemp); +} + + +fastaSeqCount seq_readAllSeq2(char *fileName, BOOL isStandardSeq, BOOL onlyATGC) +{ + FILE* fp; + fastaSeqPtr seqPtr; + fastaSeqPtr seqPtrAr; + + int32_t counter = 0; + int32_t slots = 1000; + fastaSeqCount allseqs; + int32_t discarded=0; + + fp = file_open(fileName, TRUE); + + if (fp == NULL) + { + fprintf(stderr, "\nCould not open file.\n"); + exit(1); + } + + exitIfEmptyFile(fp); + + seqPtrAr = (fastaSeqPtr) util_malloc(slots*sizeof(fastaSeq), __FILE__, __LINE__); + + seqPtr = seq_getNext(fp, " ", isStandardSeq, onlyATGC); + + while (seqPtr != NULL) + { + if (counter == slots) + { + slots += 1000; + seqPtrAr = (fastaSeqPtr)util_realloc(seqPtrAr, slots*sizeof(fastaSeq), __FILE__, __LINE__); + } + + if ((seqPtr->sequence)[0] != '\0') + seqPtrAr[counter++] = *seqPtr; + else + discarded++; + + util_free((void *)seqPtr); + seqPtr = seq_getNext(fp, " ", isStandardSeq, onlyATGC); + } + fclose(fp); + + if (counter != slots) + seqPtrAr = (fastaSeqPtr)util_realloc(seqPtrAr, counter*sizeof(fastaSeq), __FILE__, __LINE__); + + allseqs.count = counter; + allseqs.fastaSeqs = seqPtrAr; + + if (discarded) + fprintf(stderr, "\nDiscarded %d sequences that did not contain only 'AaTtGgCc' characters.", discarded); + + return allseqs; +} + + +int32_t seq_findSeqByAccId (char *accid, fastaSeqCountPtr allseqs) +{ + int32_t i; + + for (i = 0; i < allseqs->count; i++) + { + if (strcmp (accid, allseqs->fastaSeqs[i].accession_id) == 0) + return i; + } + return -1; +} + + +void seq_printSeqs (fastaSeqCountPtr allseq) +{ + int32_t i; + + for (i = 0; i < allseq->count; i++) + //for (i = 0; i < 4; i++) + { + if (allseq->fastaSeqs[i].sequence == NULL) continue; + if (allseq->fastaSeqs[i].rawheader) + printf (">%s\n", allseq->fastaSeqs[i].rawheader); + else + printf (">%s\n", allseq->fastaSeqs[i].accession_id); + printf ("%s\n", allseq->fastaSeqs[i].sequence); + } +} + + +int cleanDB(fastaSeqCount db) // replace not a/t/g/c with a's +{ + int32_t i; + char *seq; + BOOL changed; + int32_t seqchanged=0; + int32_t nucchanged=0; + + fprintf(stderr,"Cleaning dataset..."); + + for (i=0; i < db.count;i++) + { + + changed=FALSE; + for (seq = db.fastaSeqs[i].sequence; *seq!=0; seq++) + { + if (*seq!='a' && *seq!='c' && *seq!='g' && *seq!='t') + { + changed=TRUE; + nucchanged++; + *seq='a'; + } + } + if (changed) + seqchanged++; + } + + if (seqchanged) + fprintf(stderr," : %d nucleotides substituted in %d sequences\n",nucchanged,seqchanged); + else + fprintf(stderr," : Done\n"); + + return(db.count); +} + + +void addCounts(fastaSeqCount* db) +{ + int s; + char* count; + element_from_header* header; + char* count_n; + char* count_v; + + count_n = (char*) malloc(6*sizeof(char)); + count_v = (char*) malloc(2*sizeof(char)); + + strcpy(count_n, "count"); + strcpy(count_v, "1"); + + for (s=0; s < db->count; s++) + { + header = header_parser_main(db->fastaSeqs[s].rawheader); + count = getItemFromHeader("count", header); + if (count == 0) // no count field + { + header = table_header_add_field(header, count_n, count_v); + db->fastaSeqs[s].count = 1; + } + else + db->fastaSeqs[s].count = atoi(count); + db->fastaSeqs[s].header = header; + } +} + + +int uniqSeqsVector(fastaSeqCount* db, fastaSeqPtr** uniqSeqs) +{ + int i, j, k; + *(*(uniqSeqs)) = db->fastaSeqs; + db->fastaSeqs[0].uniqHead = TRUE; + + i = 0; + k = 1; + + for (j=1; j < db->count; j++) + { + if (strcmp(db->fastaSeqs[i].sequence, db->fastaSeqs[j].sequence) == 0) + { + db->fastaSeqs[i].count += db->fastaSeqs[j].count; + db->fastaSeqs[j].uniqHead = FALSE; + } + else + { + db->fastaSeqs[j].uniqHead = TRUE; + *(*(uniqSeqs)+k) = (db->fastaSeqs)+j; + k++; + i = j; + } + } + return(k); +} + + +void calculateMaxAndMinLen(fastaSeqPtr* db, int n, int* lmax, int* lmin) +{ + int i; + int l; + + *lmax = 0; + for (i=0; i < n; i++) + { + l = (*(db+i))->length; + if (l > *lmax) + *lmax = l; + } + + *lmin = *lmax; + for (i=0; i < n; i++) + { + l = (*(db+i))->length; + if (l < *lmin) + *lmin = l; + } +} + + +void calculateMaxAndMinLenDB(fastaSeqCount db, int* lmax, int* lmin) +{ + int i; + int l; + + *lmax = 0; + for (i=0; i < db.count; i++) + { + l = ((db.fastaSeqs)+i)->length; + if (l > *lmax) + *lmax = l; + } + + *lmin = *lmax; + for (i=0; i < db.count; i++) + { + l = ((db.fastaSeqs)+i)->length;; + if (l < *lmin) + *lmin = l; + } +} + + +int sortSeqsWithCounts(const void **s1, const void **s2) +{ + return(((fastaSeqPtr) *s2)->count - ((fastaSeqPtr) *s1)->count); +} + + +int reverseSortSeqsWithCounts(const void **s1, const void **s2) +{ + return(((fastaSeqPtr) *s1)->count - ((fastaSeqPtr) *s2)->count); +} diff --git a/libfasta/sequence.h b/libfasta/sequence.h new file mode 100644 index 0000000..fa2d782 --- /dev/null +++ b/libfasta/sequence.h @@ -0,0 +1,64 @@ +/** + * FileName: sequence.h + * Authors: Tiayyba Riaz, Celine Mercier + * Description: Prototypes and other declarations for sequences + * **/ +#ifndef SEQUENCE_H_ +#define SEQUENCE_H_ + +#include +#include +#include "../libutils/utilities.h" +#include "fasta_header_parser.h" + + +typedef struct { + char* accession_id; // identifier + char *rawheader; // not parsed header + element_from_header* header; // parsed header + char *sequence; // DNA sequence itself + int32_t length; // DNA sequence's length + int32_t count; // abundance of the sequence + unsigned char *table; // 4mer occurrence table build using function buildTable + int32_t over; // count of 4mer with occurrences greater than 255 (overflow) + struct fastaSeqPtr* next; // next unique sequence for example + BOOL cluster_center; // whether the sequence is a cluster center or not + int32_t cluster_weight; // cluster weight when sequence is cluster center + int32_t cluster_weight_unique_ids; // cluster weight when sequence is cluster center, counting the number sequence records + double score; // score with cluster center for example + struct fastaSeqPtr* center; // pointer to the sequence's cluster center + int32_t center_index; // index of the sequence's cluster center + BOOL uniqHead; // whether the sequence is a unique head or not + char* columns_BIOM; // to print in BIOM format + int columns_BIOM_size; // size allocated for columns_BIOM + char* line_OTU_table; // to print in OTU table format + int line_OTU_table_size; // size allocated for line_OTU_table + struct hashtable *sample_counts; // sample counts for sumaclean +}fastaSeq,*fastaSeqPtr; + + +typedef struct { + int32_t count; + fastaSeqPtr fastaSeqs; +}fastaSeqCount, *fastaSeqCountPtr; + + +fastaSeqPtr seq_getNext(FILE *fp, char *fieldDelim, BOOL isStandardSeq, BOOL onlyATGC); +char *seq_readNextFromFilebyLine(FILE* fp); +void seq_fillSeq(char *seq, fastaSeqPtr seqElem, int seqLen); +void seq_fillSeqOnlyATGC(char *seq, fastaSeqPtr seqElem, int seqLen); +void seq_fillDigitSeq(char *seq, fastaSeqPtr seqElem, int seqLen); +void seq_fillHeader(char* header, char *fieldDelim, fastaSeqPtr seqElem); +fastaSeqCount seq_readAllSeq2(char *fileName, BOOL isStandardSeq, BOOL onlyATGC); +int32_t seq_findSeqByAccId (char *accid, fastaSeqCountPtr allseqs); +void seq_printSeqs (fastaSeqCountPtr allseq); +int cleanDB(fastaSeqCount); +void addCounts(fastaSeqCount* db); +int uniqSeqsVector(fastaSeqCount* db, fastaSeqPtr** uniqSeqs); +void calculateMaxAndMinLen(fastaSeqPtr* db, int n, int* lmax, int* lmin); +void calculateMaxAndMinLenDB(fastaSeqCount db, int* lmax, int* lmin); +int sortSeqsWithCounts(const void **s1, const void **s2); +int reverseSortSeqsWithCounts(const void **s1, const void **s2); +void readSampleCounts(fastaSeqCount* db, char* key_name); + +#endif /*SEQUENCE_H_*/ diff --git a/libfile/.DS_Store b/libfile/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6 GIT binary patch literal 6148 zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0 +#include +#include + +#include "../libutils/utilities.h" + +/* + * Function Name: fileOpen(char* fileName, BOOL abortOnError) + * Description: Opens the file and returns the pointer to file object + */ +FILE *file_open(char* fileName, BOOL abortOnError) +{ + FILE* fp; + + if (fileName == NULL && abortOnError) + ERRORABORT(FILE_OPENING_ERROR, "File name not given."); + + if (fileName == NULL) + return NULL; + + fp = fopen(fileName, "r"); + return fp; +} + +FILE *file_openrw(char* fileName, BOOL abortOnError) +{ + FILE* fp; + + if (fileName == NULL && abortOnError) + ERRORABORT(FILE_OPENING_ERROR, "File name not given."); + + if (fileName == NULL) + return NULL; + + fp = fopen(fileName, "w+"); + return fp; +} + +/* + * Function Name: fileNextChar(FILE* fp) + * Description: Reads the file and returns next character, if file is null or its end of file, returns \¯. + */ +char file_nextChar(FILE* fp) +{ + if (fp == NULL) + return '\0'; + + if(feof(fp)) + return '\0'; + + return (char) fgetc(fp); +} + +/* + * Function Name: *fileNextLine(FILE *fp, char *buffer, int32_t bufferSize) + * Description: Reads the file and returns next line, if file is null or its end of file, returns \¯. + */ +char *file_nextLine(FILE *fp, char *buffer, int32_t bufferSize) +{ + if(fp == NULL) + return NULL; + + if(feof(fp)) + return NULL; + + return fgets(buffer, bufferSize, fp); +} + + +void exitIfEmptyFile(FILE *file) +{ + long savedOffset = ftell(file); + fseek(file, 0, SEEK_END); + + if (ftell(file) == 0) + { + fprintf(stderr, "\nInput file is empty.\n"); + exit(1); + } + fseek(file, savedOffset, SEEK_SET); +} + diff --git a/libfile/fileHandling.h b/libfile/fileHandling.h new file mode 100644 index 0000000..334d8c3 --- /dev/null +++ b/libfile/fileHandling.h @@ -0,0 +1,20 @@ +/** + * FileName: fileHandling.h + * Authors: Tiayyba Riaz, Celine Mercier + * Description: Header file for file handling functions + * **/ + + +#ifndef FILEHANDLING_H_ +#define FILEHANDLING_H_ + +#include "../libutils/utilities.h" +/* Prototypes */ + +FILE *file_open(char* fileName, BOOL abortOnError); +char file_nextChar(FILE* fp); +char *file_nextLine(FILE *fp, char *buffer, int32_t bufferSize); +FILE *file_openrw(char* fileName, BOOL abortOnError); +void exitIfEmptyFile(FILE *file); + +#endif /*FILEHANDLING_H_*/ diff --git a/liblcs/.DS_Store b/liblcs/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6 GIT binary patch literal 6148 zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0 +#include +#include + +#include + + + +// Allocate a band allowing to align sequences of length : 'length' + +column_t* allocateColumn(int length,column_t *column, bool mode8bits) +{ + int size; + bool newc = false; + + // The band length should be equal to the length + // of the sequence + 7 for taking into account its + // shape + + size = (length+1) * ((mode8bits) ? sizeof(int8_t):sizeof(int16_t)); + + + // If the pointer to the old column is NULL we allocate + // a new column + + if (column==NULL) + { + + column = malloc(sizeof(column_t)); + if (!column) + return NULL; + + column->size = 0; + column->data.shrt=NULL; + column->score.shrt=NULL; + newc = true; + } + + // Otherwise we check if its size is sufficient + // or if it should be extended + + if (size > column->size) + { + int16_t *old = column->data.shrt; + int16_t *olds= column->score.shrt; + + column->data.shrt = malloc(size); + column->score.shrt= malloc(size); + + if (column->data.shrt==NULL || column->score.shrt==NULL) + { + fprintf(stderr,"Allocation Error on column for a size of %d\n" , size); + column->data.shrt = old; + column->score.shrt= olds; + + if (newc) + { + free(column); + column=NULL; + return NULL; + } + return NULL; + } + else + column->size = size; + } + + return column; +} + +void freeColumn(column_p column) +{ + if (column) + { + if (column->data.shrt) + free(column->data.shrt); + + if (column->score.shrt) + free(column->score.shrt); + + free(column); + } +} + +int fastLCSScore(const char* seq1, const char* seq2,column_pp column,int32_t* lpath) +{ + return fastLCSScore16(seq1,seq2,column,lpath); +} + +int simpleLCS(const char* seq1, const char* seq2,column_pp ppcolumn,int32_t* lpath) +{ + int lseq1,lseq2; // length of the both sequences + int lcs; + int itmp; // tmp variables for swap + const char* stmp; // + int32_t *score; + int32_t *path; + column_t *column; + int32_t i,j; + int32_t sl,su,sd; + int32_t pl,pu,pd; + + // Made seq1 the longest sequences + lseq1=strlen(seq1); + lseq2=strlen(seq2); + + if (lseq1 < lseq2) + { + itmp=lseq1; + lseq1=lseq2; + lseq2=itmp; + + stmp=seq1; + seq1=seq2; + seq2=stmp; + } + + lseq1++; + lseq2++; + + // a band sized to the smallest sequence is allocated + + if (ppcolumn) + column = *ppcolumn; + else + column=NULL; + + column = allocateColumn(lseq1*2,column,0); + score = (int32_t*) column->score.shrt; + path = (int32_t*) column->data.shrt; + + memset(score,0,lseq1 * sizeof(int32_t)); + + for (j=0; j < lseq1; j++) + path[j]=j; + + for (i=1; i< lseq2; i++) + { + sl=0; + pl=i; + for (j=1; j < lseq1; j++) + { + sd=score[j-1] + (seq2[i-1]==seq1[j-1] ? 1:0); + pd=path[j-1] + 1; + + su=score[j]; + pu=path[j] + 1; + + score[j-1]=sl; + + if (su > sl) sl=su, pl=pu; + if (sd > sl) sl=sd, pl=pd; + } + } + + lcs = sl; + if(lpath) *lpath=pl; + + if (ppcolumn) + *ppcolumn=column; + else + freeColumn(column); + + return lcs; +} + diff --git a/liblcs/_lcs.ext.2.c b/liblcs/_lcs.ext.2.c new file mode 100644 index 0000000..381dc6a --- /dev/null +++ b/liblcs/_lcs.ext.2.c @@ -0,0 +1,34 @@ +#include "_lcs.h" + +#include +#include +#include + +#include + + + + +#define VSIZE (8) +#define VTYPE vInt16 +#define STYPE int16_t +#define CMENB shrt +#define VMODE false +#define FASTLCSSCORE fastLCSScore16 +#define INSERT_REG _MM_INSERT_EPI16 +#define EXTRACT_REG _MM_EXTRACT_EPI16 +#define EQUAL_REG _MM_CMPEQ_EPI16 +#define GREATER_REG _MM_CMPGT_EPI16 +#define SMALLER_REG _MM_CMPLT_EPI16 +#define ADD_REG _MM_ADD_EPI16 +#define SUB_REG _MM_SUB_EPI16 +#define AND_REG _MM_AND_SI128 +#define ANDNOT_REG _MM_ANDNOT_SI128 +#define OR_REG _MM_OR_SI128 +#define SET_CONST _MM_SET1_EPI16 +#define GET_MAX _MM_MAX_EPI16 +#define GET_MIN _MM_MIN_EPI16 +#define MIN_SCORE INT16_MIN +#define MAX_SCORE 32000 + +#include "_lcs_fast.h" diff --git a/liblcs/_lcs.ext.3.c b/liblcs/_lcs.ext.3.c new file mode 100644 index 0000000..5c3a150 --- /dev/null +++ b/liblcs/_lcs.ext.3.c @@ -0,0 +1,34 @@ +#include "_lcs.h" + +#include +#include +#include + +#include + + + + +#define VSIZE (16) +#define VTYPE vInt8 +#define STYPE int8_t +#define CMENB byte +#define VMODE true +#define FASTLCSSCORE fastLCSScore8 +#define INSERT_REG _MM_INSERT_EPI8 +#define EXTRACT_REG _MM_EXTRACT_EPI8 +#define EQUAL_REG _MM_CMPEQ_EPI8 +#define GREATER_REG _MM_CMPGT_EPI8 +#define SMALLER_REG _MM_CMPLT_EPI8 +#define ADD_REG _MM_ADD_EPI8 +#define SUB_REG _MM_SUB_EPI8 +#define AND_REG _MM_AND_SI128 +#define ANDNOT_REG _MM_ANDNOT_SI128 +#define OR_REG _MM_OR_SI128 +#define SET_CONST _MM_SET1_EPI8 +#define GET_MAX _MM_MAX_EPI8 +#define GET_MIN _MM_MIN_EPI8 +#define MIN_SCORE INT8_MIN +#define MAX_SCORE 127 + +#include "_lcs_fast.h" diff --git a/liblcs/_lcs.h b/liblcs/_lcs.h new file mode 100644 index 0000000..cfc032f --- /dev/null +++ b/liblcs/_lcs.h @@ -0,0 +1,29 @@ +#include "../libsse/_sse.h" + +#define bool char +#define false (1==0) +#define true (1==1) + +typedef struct { + int16_t size; + + union { int16_t *shrt; + int8_t *byte; + } data; + + union { int16_t *shrt; + int8_t *byte; + } score; + + +} column_t, **column_pp, *column_p; + +column_p allocateColumn(int length,column_t *column, bool mode8bits); + +void freeColumn(column_p column); + +int fastLCSScore16(const char* seq1, const char* seq2,column_pp ppcolumn,int32_t* lpath); +int fastLCSScore8(const char* seq1, const char* seq2,column_pp ppcolumn,int32_t* lpath); +int simpleLCS(const char* seq1, const char* seq2,column_pp ppcolumn,int32_t* lpath); + +int fastLCSScore(const char* seq1, const char* seq2,column_pp column,int32_t* lpath); diff --git a/liblcs/_lcs_fast.h b/liblcs/_lcs_fast.h new file mode 100644 index 0000000..3d0ac00 --- /dev/null +++ b/liblcs/_lcs_fast.h @@ -0,0 +1,597 @@ + +/* + * Print a SSE register for debug purpose + */ + +#ifdef __SSE2__ + +static void printreg(VTYPE r) +{ + STYPE a0,a1,a2,a3,a4,a5,a6,a7; +#if VMODE + STYPE a8,a9,a10,a11,a12,a13,a14,a15; +#endif + + a0= EXTRACT_REG(r,0); + a1= EXTRACT_REG(r,1); + a2= EXTRACT_REG(r,2); + a3= EXTRACT_REG(r,3); + a4= EXTRACT_REG(r,4); + a5= EXTRACT_REG(r,5); + a6= EXTRACT_REG(r,6); + a7= EXTRACT_REG(r,7); +#if VMODE + a8= EXTRACT_REG(r,8); + a9= EXTRACT_REG(r,9); + a10= EXTRACT_REG(r,10); + a11= EXTRACT_REG(r,11); + a12= EXTRACT_REG(r,12); + a13= EXTRACT_REG(r,13); + a14= EXTRACT_REG(r,14); + a15= EXTRACT_REG(r,15); +#endif + +printf( "a00 :-> %7d %7d %7d %7d " + " %7d %7d %7d %7d " +#if VMODE + "%7d %7d %7d %7d " + " %7d %7d %7d %7d " +#endif + "\n" + , a0,a1,a2,a3,a4,a5,a6,a7 +#if VMODE + , a8,a9,a10,a11,a12,a13,a14,a15 +#endif +); +} + +/* + * set position p of a SSE register with the value v + */ + +static inline VTYPE insert_reg(VTYPE r, STYPE v, int p) +{ + switch (p) { + case 0: return INSERT_REG(r,v,0); + case 1: return INSERT_REG(r,v,1); + case 2: return INSERT_REG(r,v,2); + case 3: return INSERT_REG(r,v,3); + case 4: return INSERT_REG(r,v,4); + case 5: return INSERT_REG(r,v,5); + case 6: return INSERT_REG(r,v,6); + case 7: return INSERT_REG(r,v,7); +#if VMODE + case 8: return INSERT_REG(r,v,8); + case 9: return INSERT_REG(r,v,9); + case 10: return INSERT_REG(r,v,10); + case 11: return INSERT_REG(r,v,11); + case 12: return INSERT_REG(r,v,12); + case 13: return INSERT_REG(r,v,13); + case 14: return INSERT_REG(r,v,14); + case 15: return INSERT_REG(r,v,15); +#endif + } + return _MM_SETZERO_SI128(); +} + +static inline STYPE extract_reg(VTYPE r, int p) +{ + switch (p) { + case 0: return EXTRACT_REG(r,0); + case 1: return EXTRACT_REG(r,1); + case 2: return EXTRACT_REG(r,2); + case 3: return EXTRACT_REG(r,3); + case 4: return EXTRACT_REG(r,4); + case 5: return EXTRACT_REG(r,5); + case 6: return EXTRACT_REG(r,6); + case 7: return EXTRACT_REG(r,7); +#if VMODE + case 8: return EXTRACT_REG(r,8); + case 9: return EXTRACT_REG(r,9); + case 10: return EXTRACT_REG(r,10); + case 11: return EXTRACT_REG(r,11); + case 12: return EXTRACT_REG(r,12); + case 13: return EXTRACT_REG(r,13); + case 14: return EXTRACT_REG(r,14); + case 15: return EXTRACT_REG(r,15); +#endif + } + return 0; +} + +#define GET_H_SYMBOLE(s,p) ((p && p < lseq1) ? (s)[(p)-1]:255) +#define GET_V_SYMBOLE(s,p) ((p && p < lseq2) ? (s)[(p)-1]:0) + +#define LSHIFT_SCORE(r) { r = _MM_SLLI_SI128((r),sizeof(STYPE)); } +#define SET_H_SYMBOLE(r,p,s) { r = insert_reg((r),(STYPE)GET_H_SYMBOLE(seq1,(s)),(p)); } +#define PUSH_V_SYMBOLE(r,s) { r = insert_reg(_MM_SLLI_SI128((r),sizeof(STYPE)),(STYPE)GET_V_SYMBOLE(seq2,(s)),0); } +#define EQUAL(f1,f2) _MM_AND_SI128(EQUAL_REG((f1),(f2)),SET_CONST(1)) + +int FASTLCSSCORE(const char* seq1, const char* seq2,column_pp ppcolumn,int32_t* lpath) +{ + int lseq1,lseq2; // length of the both sequences + + int itmp; // tmp variables for swap + const char* stmp; // + + int nbands; // Number of bands of width eight in the score matrix + int lastband; // width of the last band + + // Register for scanning the score matrix + VTYPE minus1; + VTYPE minus2; + VTYPE current; + + VTYPE left; + VTYPE top; + VTYPE diag; + + + VTYPE sminus1; + VTYPE sminus2; + VTYPE scurrent; + + VTYPE sleft; + VTYPE stop; + VTYPE sdiag; + + VTYPE way; + VTYPE onevect; + VTYPE maxvect; + + VTYPE fhseq; // The fragment of the horizontal sequence + // to consider for aligment + VTYPE fvseq; // The fragment of the horizontal sequence + // to consider for aligment + VTYPE match; + + int band; + int line; + int limit; + + int lcs; + + int h; + int i; + + column_t *column; + + + // Made seq1 the longest sequences + lseq1=strlen(seq1); + lseq2=strlen(seq2); + + if (lseq1 < 10 || lseq2 < 10) + return simpleLCS(seq1,seq2,ppcolumn,lpath); + + if (lseq1 < lseq2) + { + itmp=lseq1; + lseq1=lseq2; + lseq2=itmp; + + stmp=seq1; + seq1=seq2; + seq2=stmp; + } + + // we add one to both lengths for taking into + // account the extra line and column in the score + // matrix + + lseq1++; + lseq2++; + + // a band sized to the smallest sequence is allocated + + if (ppcolumn) + column = *ppcolumn; + else + column=NULL; + + column = allocateColumn(lseq2,column,VMODE); + + // Check memory allocation + if (column == NULL) + return -1; + + for (i=0; idata.CMENB[i]=MIN_SCORE; + column->score.CMENB[i]=-1; + } + + nbands = lseq1 / VSIZE; // You have VSIZE element in one SSE register + // Alignment will be realized in nbands + + lastband = lseq1 - (nbands * VSIZE); // plus one of width lastband except if + // lastband==0 + + if (lastband) nbands++; + else lastband=VSIZE; + + lastband--; + +// printf("seq1 : %s seq2 : %s\n",seq1,seq2); + + + minus2 = SET_CONST(MIN_SCORE); + minus1 = _MM_SETZERO_SI128(); + + sminus1= _MM_SETZERO_SI128(); + sminus2= _MM_SETZERO_SI128(); + onevect= SET_CONST(1); + maxvect= SET_CONST(MAX_SCORE); + + h=0; + + fhseq = _MM_SETZERO_SI128(); + fvseq = _MM_SETZERO_SI128(); + + // + // Beginning of the first band + // + + for (line = 0; line < VSIZE; line++,h++) // avant VSIZE - 1 + { +// printf("line= %4d h= %4d\n",line,h); + SET_H_SYMBOLE(fhseq,line,h) + PUSH_V_SYMBOLE(fvseq,line) + minus2 = insert_reg(minus2,0,h); + minus1 = insert_reg(minus1,MIN_SCORE,line); // 0 avant + match = EQUAL(fhseq,fvseq); + + if (lpath) + { + sminus2 = insert_reg(sminus2,line-1,line); // Je ne suis pas certain de l'initialisation + sminus1 = insert_reg(sminus1,0,line); + } + +// printreg(fvseq); +// printreg(fhseq); +// printreg(match); +// printf("================================\n"); + + current = minus1; // The best score is the upper one + // It cannot be the best as set to MIN_SCORE + + left = minus1; + +// printf("Vert = "); printreg(current); + + + LSHIFT_SCORE(minus1) // I shift minus1 so now I'll compare with the left position + minus1=insert_reg(minus1,(column)->data.CMENB[line],0); + + top=minus1; + + if (lpath) + { + sleft=sminus1; // I store the path length corresponding to the upper path + LSHIFT_SCORE(sminus1) // I shift to prepare the score coming from the left side + sminus1=insert_reg(sminus1,(column)->score.CMENB[line],0); + stop=sminus1; + sdiag=sminus2; + + } + +// printf("Horz = "); printreg(minus1); + + current = GET_MAX(current,minus1); // Look for the best between upper and left + +// printf("BstHV= "); printreg(current); +// +// printf("Diag = "); printreg(ADD_REG(minus2,match)); + + diag=minus2; + + // minus2 = ; // Minus2 contains the diagonal score, so I add the match reward + // Diag score are setup to 0 so this one will win on the first iteration + current = GET_MAX(current,ADD_REG(minus2,match)); + + if (lpath) + { +// printf("\n"); +// printf("current: "); +// printreg(current); +// printf("current: "); +// printreg(SUB_REG(current,match)); +// printf("diag : "); +// printreg(diag); +// printf("left : "); +// printreg(left); +// printf("top : "); +// printreg(top); + + + way = EQUAL_REG(SUB_REG(current,match),diag); + scurrent= OR_REG(AND_REG(way,sdiag), + ANDNOT_REG(way,maxvect)); +// printf("sdiag : "); +// printreg(scurrent); + way = EQUAL_REG(current,left); + scurrent= GET_MIN(scurrent,OR_REG(AND_REG(way,sleft), + ANDNOT_REG(way,maxvect))); + +// printf("sleft : "); +// printreg(scurrent); + way = EQUAL_REG(current,top); + scurrent= GET_MIN(scurrent,OR_REG(AND_REG(way,stop), + ANDNOT_REG(way,maxvect))); +// printf("stop : "); +// printreg(scurrent); + + scurrent= ADD_REG(scurrent,onevect); + + sminus2=sminus1; + sminus1=scurrent; + } +// printf("line %d :Best = ",line); printreg(current); +// +// printf("================================\n"); + + minus2=minus1; + minus1=current; + +// printf("min2 = "); printreg(minus2); +// printf("min1 = "); printreg(minus1); +// printf("================================\n"); + +// printf("\n"); +// printf("sdiag : "); +// printreg(sminus2); +// printf("scur : "); +// printreg(scurrent); +// printf("current: "); +// printreg(current); +// printf("%8s\n",seq1); +// printf("%8s\n",seq2); +// printf("================================\n"); + + + } ///// <<<<<<<<------- Fin du debut de la premiere bande + + +// printf("================================\n"); + + (column)->data.CMENB[lseq2-VSIZE+line]=EXTRACT_REG(current,VSIZE-1); + + + if (lpath) + (column)->score.CMENB[lseq2-VSIZE+line]=EXTRACT_REG(scurrent,VSIZE-1); + + + + for (band=0; band < nbands; band++) + { +// SET_H_SYMBOLE(fhseq,line,h) +// minus2 = insert_reg(minus2,0,line); +// minus1 = insert_reg(minus1,MIN_SCORE,line); // 0 avant +// h++; + + for (; line < lseq2; line++) + { +// printf("Je tourne avec line= %d \n",line); + PUSH_V_SYMBOLE(fvseq,line) + + match = EQUAL(fhseq,fvseq); + +// printreg(fvseq); +// printreg(fhseq); +// printreg(match); +// printf("================================\n"); + + current = minus1; + + left = minus1; + + // Store the last current score in extra column + (column)->data.CMENB[line-VSIZE]=EXTRACT_REG(current,VSIZE-1); + LSHIFT_SCORE(minus1) + minus1=insert_reg(minus1,(column)->data.CMENB[line],0); + + top = minus1; + +// printf("Vert = "); printreg(current); + + if (lpath) + { + sleft= sminus1; + (column)->score.CMENB[line-VSIZE]=EXTRACT_REG(scurrent,VSIZE-1); + LSHIFT_SCORE(sminus1) + sminus1=insert_reg(sminus1,(column)->score.CMENB[line],0); + stop=sminus1; + sdiag=sminus2; + } + +// printf("line = %d --> get = %d\n",line,(column)->data.CMENB[line]); + +// printf("Horz = "); printreg(minus1); + + current = GET_MAX(current,minus1); + + diag=minus2; + + current = GET_MAX(current,ADD_REG(minus2,match)); + + if (lpath) + { +// printf("\n"); +// printf("current: "); +// printreg(current); +// printf("current: "); +// printreg(SUB_REG(current,match)); +// printf("diag : "); +// printreg(diag); +// printf("left : "); +// printreg(left); +// printf("top : "); +// printreg(top); + + way = EQUAL_REG(SUB_REG(current,match),diag); + scurrent= OR_REG(AND_REG(way,sdiag), + ANDNOT_REG(way,maxvect)); + +// printf("sdiag : "); +// printreg(scurrent); + + way = EQUAL_REG(current,left); + scurrent= GET_MIN(scurrent,OR_REG(AND_REG(way,sleft), + ANDNOT_REG(way,maxvect))); + +// printf("sleft : "); +// printreg(scurrent); + + way = EQUAL_REG(current,top); + scurrent= GET_MIN(scurrent,OR_REG(AND_REG(way,stop), + ANDNOT_REG(way,maxvect))); + +// printf("stop : "); +// printreg(scurrent); + + scurrent= ADD_REG(scurrent,onevect); + + sminus2=sminus1; + sminus1=scurrent; + } + + minus2=minus1; + minus1=current; + +// printf("\n"); +// printf("sdiag : "); +// printreg(sminus2); +// printf("scur : "); +// printreg(scurrent); +// printf("current: "); +// printreg(current); +// printf("%8s\n",seq1); +// printf("%8s\n",seq2); + } +// printf("================================\n"); + + // end of the band and beginnig of the next one + + limit=(band==(nbands-1)) ? lastband:VSIZE; + + for (line = 0; line < limit; line++,h++) + { +// printf("Je fini avec line= %d \n",line); + + SET_H_SYMBOLE(fhseq,line,h) + PUSH_V_SYMBOLE(fvseq,line) + + + minus2 = insert_reg(minus2,MIN_SCORE,line); + minus1 = insert_reg(minus1,MIN_SCORE,line); + current = minus1; + left=minus1; + + match = EQUAL(fhseq,fvseq); + + if (lpath) + { + sminus2 = insert_reg(sminus2,lseq2-VSIZE+line,line); + sminus1 = insert_reg(sminus1,h,line); + sleft= sminus1; + } + + +// printf("\n"); +// printf("fhseq = "); printreg(fhseq); +// printf("fvseq = "); printreg(fvseq); +// printf("----------------------------------------------------------------\n"); +// printf("match = "); printreg(match); + + + (column)->data.CMENB[lseq2-VSIZE+line]=EXTRACT_REG(current,VSIZE-1); + LSHIFT_SCORE(minus1) + minus1=insert_reg(minus1,(column)->data.CMENB[line],0); + top=minus1; + + current = GET_MAX(current,minus1); + + if (lpath) + { + (column)->score.CMENB[lseq2-VSIZE+line]=EXTRACT_REG(scurrent,VSIZE-1); + LSHIFT_SCORE(sminus1) + sminus1=insert_reg(sminus1,(column)->score.CMENB[line],0); + stop=sminus1; + sdiag=sminus2; + + way = EQUAL_REG(current,minus1); + + scurrent= OR_REG(AND_REG(way,sminus1), + ANDNOT_REG(way,scurrent)); + } + + + diag=minus2; + + current = GET_MAX(current,ADD_REG(minus2,match)); + + if (lpath) + { + way = EQUAL_REG(SUB_REG(current,match),diag); + scurrent= OR_REG(AND_REG(way,sdiag), + ANDNOT_REG(way,maxvect)); + + way = EQUAL_REG(current,left); + scurrent= GET_MIN(scurrent,OR_REG(AND_REG(way,sleft), + ANDNOT_REG(way,maxvect))); + + way = EQUAL_REG(current,top); + scurrent= GET_MIN(scurrent,OR_REG(AND_REG(way,stop), + ANDNOT_REG(way,maxvect))); + + scurrent= ADD_REG(scurrent,onevect); + + sminus2=sminus1; + sminus1=scurrent; + } + +// printf("currt = "); printreg(current); + + minus2=minus1; + minus1=current; + +// printf("\n"); +// printf("sdiag : "); +// printreg(sminus2); +// printf("scur : "); +// printreg(scurrent); +// printf("current: "); +// printreg(current); +// printf("%8s\n",seq1); +// printf("%8s\n",seq2); + +// printf("Je stocke line= %d la valeur %d\n",lseq2-VSIZE+line,(column)->data.CMENB[lseq2-VSIZE+line]); + } + + } + +// printf("\n"); +// printf("line = %d, h= %d, lastband = %d\n",line,h,lastband); +// printf("currt = "); printreg(current); + lcs = extract_reg(current,lastband); + + if(lpath) + *lpath= extract_reg(scurrent,lastband); +// printf("lastband = %d (%d) lcs = %d\n",lastband,lseq2,lcs); + + if (ppcolumn) + *ppcolumn=column; + else + freeColumn(column); + + return lcs; +} + +#else +int FASTLCSSCORE(const char* seq1, const char* seq2,column_pp ppcolumn,int32_t* lpath) +{ + return simpleLCS(seq1,seq2,ppcolumn,lpath); +} + +#endif /* __SSE2__ */ + diff --git a/liblcs/banded_LCS_alignment.c b/liblcs/banded_LCS_alignment.c new file mode 100644 index 0000000..0fae829 --- /dev/null +++ b/liblcs/banded_LCS_alignment.c @@ -0,0 +1,211 @@ +/* + * banded_LCS_alignment.c + * + * Created on: 7 nov. 2012 + * Author: merciece + */ + +#include +#include +#include +#include "../libutils/utilities.h" + + +typedef struct { + int score; + int l_path; +}infos; + + +int calculateScore(char nuc1, char nuc2) +{ + return(nuc1 == nuc2); +} + +infos** banded_align(char *seq1, char *seq2, int l1, int l2, int bandLengthRight, int bandLengthLeft) +{ + int i, j; + //int c; + //double id; + int start, end; + int diag_score, delete, insert, mismatch; + int l_path, l_path_i, l_path_d; + int bestScore; + int mismatch_margin; + int stop; + int diag_index; + infos **matrix; + + l1++; + l2++; + mismatch_margin = bandLengthLeft; // the biggest one + diag_index = l1-l2; // diagonal index + stop=0; + + //fprintf(stderr,"\nseq1 = %s, seq2=%s, bandLengthR = %d, bandLengthL = %d", seq1, seq2, bandLengthRight, bandLengthLeft); + + // Matrix initialization~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + matrix = (infos**) malloc(l1 * sizeof(infos*)); + for (i = 0; i < l1; i++) + matrix[i] = (infos*) malloc(l2 * sizeof(infos)); + + for (i = 0; i < l1; i++) + for (j = 0; j < l2; j++) + { + matrix[i][j].score = 0; + matrix[i][j].l_path = 0; + } + + for (i = 0; i < l1; i++) + matrix[i][0].l_path = i; + + for (j = 0; j < l2; j++) + matrix[0][j].l_path = j; + + // Matrix initialized~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + for (i = 1; i < l1; i++) + { + start = i - bandLengthLeft; + if (start < 1) + start = 1; + end = i+bandLengthRight+1; + if (end > l2) + end = l2; + + for (j = start; j < end; j++) + { + delete = matrix[i-1][j].score; + l_path_d = matrix[i-1][j].l_path + 1; + insert = matrix[i][j-1].score; + l_path_i = matrix[i][j-1].l_path + 1; + mismatch = 0; + + diag_score = calculateScore(seq1[i-1], seq2[j-1]); + bestScore = matrix[i-1][j-1].score + diag_score; + l_path = matrix[i-1][j-1].l_path + 1; + if (diag_score == 0) // mismatch + mismatch = 1; + + if ((insert > bestScore) || ((insert == bestScore) && (l_path_i < l_path))) + { + bestScore = matrix[i][j-1].score; + l_path = l_path_i; + mismatch = 0; + } + + if ((delete > bestScore) || ((delete == bestScore) && (l_path_d < l_path))) + { + bestScore = delete; + l_path = l_path_d; + mismatch = 0; + } + + /*if (((i-j) - diag_index == 0) && (mismatch == 1)) + { + //fprintf(stderr, "\nR = %d, L = %d\n", bandLengthRight, bandLengthLeft); + if (bandLengthRight+bandLengthLeft == 0) + { + stop = 1; + //fprintf(stderr, "\nBREAKING LOOPS\n"); + break; + } + if (bandLengthRight != 0) + bandLengthRight = bandLengthRight - 1; + if (bandLengthLeft != 0) + bandLengthLeft = bandLengthLeft - 1; + }*/ + + (matrix[i][j]).score = bestScore; + (matrix[i][j]).l_path = l_path; + } + + //if ((bandLengthRight + bandLengthLeft == 0) && ((matrix[i][j].l_path - matrix[i][j].score) > mismatch_margin)) + if (stop==1) + break; + } + return(matrix); +} + + +void calculateBandLength(int l1, int l2, double threshold, int* bandLengthRight, int* bandLengthLeft) +{ + (*bandLengthLeft) = round(-l1 * threshold + l1); + (*bandLengthRight) = round(-l1 * threshold + l2); + +// fprintf(stderr,"\nR=%d, L=%d", (*bandLengthRight), (*bandLengthLeft)); +} + + +double calculateId(infos** matrix, int len1, int len2) +{ + double id; + int l_ali; + int l_lcs; + + l_lcs = matrix[len1][len2].score; + l_ali = matrix[len1][len2].l_path; + + if (l_lcs == 0) + id = 0.0; + else + id = (double) l_lcs / (double) l_ali; + + //fprintf(stderr, "\n%d, %d\n", l_lcs, l_ali); + return(id); +} + + +double banded_lcs_align(int16_t* seq1, int16_t* seq2, int l1, int l2, double threshold, BOOL n, int ref, BOOL lcsmode, int16_t* address) +{ + double id; + int bandLengthRight, bandLengthLeft; + int i,j; + + char* s1; + char* s2; + + s1 = (char*) malloc(l1*sizeof(char)+1); + s2 = (char*) malloc(l2*sizeof(char)+1); + + for (i=l1-1, j=0; i>=0, j +#include +#include +#include +#include "../libutils/utilities.h" +#include "../libsse/_sse.h" + + + +/*static void printreg(__m128i r) +{ + int16_t a0,a1,a2,a3,a4,a5,a6,a7; + + a0= _MM_EXTRACT_EPI16(r,0); + a1= _MM_EXTRACT_EPI16(r,1); + a2= _MM_EXTRACT_EPI16(r,2); + a3= _MM_EXTRACT_EPI16(r,3); + a4= _MM_EXTRACT_EPI16(r,4); + a5= _MM_EXTRACT_EPI16(r,5); + a6= _MM_EXTRACT_EPI16(r,6); + a7= _MM_EXTRACT_EPI16(r,7); + +fprintf(stderr, "a00 :-> %7d %7d %7d %7d " + " %7d %7d %7d %7d " + "\n" + , a0,a1,a2,a3,a4,a5,a6,a7 + ); +} +*/ + +static inline int extract_reg(__m128i r, int p) +{ + switch (p) { + case 0: return(_MM_EXTRACT_EPI16(r,0)); + case 1: return(_MM_EXTRACT_EPI16(r,1)); + case 2: return(_MM_EXTRACT_EPI16(r,2)); + case 3: return(_MM_EXTRACT_EPI16(r,3)); + case 4: return(_MM_EXTRACT_EPI16(r,4)); + case 5: return(_MM_EXTRACT_EPI16(r,5)); + case 6: return(_MM_EXTRACT_EPI16(r,6)); + case 7: return(_MM_EXTRACT_EPI16(r,7)); + } + return(0); +} + + +void sse_banded_align_lcs_and_ali_len(int16_t* seq1, int16_t* seq2, int l1, int l2, int bandLengthLeft, int bandLengthTotal, int16_t* address, double* lcs_length, int* ali_length) +{ + register int j; + int k1, k2; + int max, diff; + int l_reg, l_loc; + int line; + int numberOfRegistersPerLine; + int numberOfRegistersFor3Lines; + + BOOL even_line; + BOOL odd_line; + BOOL even_BLL; + BOOL odd_BLL; + + um128* SSEregisters; + um128* p_diag; + um128* p_gap1; + um128* p_gap2; + um128* p_diag_j; + um128* p_gap1_j; + um128* p_gap2_j; + um128 current; + + um128* l_ali_SSEregisters; + um128* p_l_ali_diag; + um128* p_l_ali_gap1; + um128* p_l_ali_gap2; + um128* p_l_ali_diag_j; + um128* p_l_ali_gap1_j; + um128* p_l_ali_gap2_j; + um128 l_ali_current; + + um128 nucs1; + um128 nucs2; + um128 scores; + + um128 boolean_reg; + + // Initialisations + + odd_BLL = bandLengthLeft & 1; + even_BLL = !odd_BLL; + + max = INT16_MAX - l1; + + numberOfRegistersPerLine = bandLengthTotal / 8; + numberOfRegistersFor3Lines = 3 * numberOfRegistersPerLine; + + SSEregisters = (um128*) calloc(numberOfRegistersFor3Lines * 2, sizeof(um128)); + l_ali_SSEregisters = SSEregisters + numberOfRegistersFor3Lines; + + // preparer registres SSE + + for (j=0; ji, scores.i); + + // Computing alignment length + + l_ali_current.i = p_l_ali_diag_j->i; + boolean_reg.i = _MM_CMPGT_EPI16(p_gap1_j->i, current.i); + l_ali_current.i = _MM_OR_SI128( + _MM_AND_SI128(p_l_ali_gap1_j->i, boolean_reg.i), + _MM_ANDNOT_SI128(boolean_reg.i, l_ali_current.i)); + current.i = _MM_OR_SI128( + _MM_AND_SI128(p_gap1_j->i, boolean_reg.i), + _MM_ANDNOT_SI128(boolean_reg.i, current.i)); + boolean_reg.i = _MM_AND_SI128( + _MM_CMPEQ_EPI16(p_gap1_j->i, current.i), + _MM_CMPLT_EPI16(p_l_ali_gap1_j->i, l_ali_current.i)); + l_ali_current.i = _MM_OR_SI128( + _MM_AND_SI128(p_l_ali_gap1_j->i, boolean_reg.i), + _MM_ANDNOT_SI128(boolean_reg.i, l_ali_current.i)); + current.i = _MM_OR_SI128( + _MM_AND_SI128(p_gap1_j->i, boolean_reg.i), + _MM_ANDNOT_SI128(boolean_reg.i, current.i)); + boolean_reg.i = _MM_CMPGT_EPI16(p_gap2_j->i, current.i); + l_ali_current.i = _MM_OR_SI128( + _MM_AND_SI128(p_l_ali_gap2_j->i, boolean_reg.i), + _MM_ANDNOT_SI128(boolean_reg.i, l_ali_current.i)); + current.i = _MM_OR_SI128( + _MM_AND_SI128(p_gap2_j->i, boolean_reg.i), + _MM_ANDNOT_SI128(boolean_reg.i, current.i)); + boolean_reg.i = _MM_AND_SI128( + _MM_CMPEQ_EPI16(p_gap2_j->i, current.i), + _MM_CMPLT_EPI16(p_l_ali_gap2_j->i, l_ali_current.i)); + l_ali_current.i = _MM_OR_SI128( + _MM_AND_SI128(p_l_ali_gap2_j->i, boolean_reg.i), + _MM_ANDNOT_SI128(boolean_reg.i, l_ali_current.i)); + current.i = _MM_OR_SI128( + _MM_AND_SI128(p_gap2_j->i, boolean_reg.i), + _MM_ANDNOT_SI128(boolean_reg.i, current.i)); + + +/* fprintf(stderr, "\nline = %d", line); + fprintf(stderr, "\nDiag, r %d : ", j); + printreg((*(p_diag_j)).i); + fprintf(stderr, "Gap1 : "); + printreg((*(p_gap1_j)).i); + fprintf(stderr, "Gap2 : "); + printreg((*(p_gap2_j)).i); + fprintf(stderr, "current : "); + printreg(current.i); + fprintf(stderr, "L ALI\nDiag r %d : ", j); + printreg((*(p_l_ali_diag_j)).i); + fprintf(stderr, "Gap1 : "); + printreg((*(p_l_ali_gap1_j)).i); + fprintf(stderr, "Gap2 : "); + printreg((*(p_l_ali_gap2_j)).i); + fprintf(stderr, "current : "); + printreg(l_ali_current.i); +*/ + + // diag = gap1 and gap1 = current + p_diag_j->i = p_gap1_j->i; + p_gap1_j->i = current.i; + + // l_ali_diag = l_ali_gap1 and l_ali_gap1 = l_ali_current+1 + p_l_ali_diag_j->i = p_l_ali_gap1_j->i; + p_l_ali_gap1_j->i = _MM_ADD_EPI16(l_ali_current.i, _MM_SET1_EPI16(1)); + } + + // shifts for gap2, to do only once all the registers of a line have been computed Copier gap2 puis le charger depuis la copie? + + for (j=0; j < numberOfRegistersPerLine; j++) + { + if ((odd_line && even_BLL) || (even_line && odd_BLL)) + { + p_gap2[j].i = _MM_LOADU_SI128((p_gap1[j].s16)-1); + p_l_ali_gap2[j].i = _MM_LOADU_SI128((p_l_ali_gap1[j].s16)-1); + if (j == 0) + { + p_gap2[j].i = _MM_INSERT_EPI16(p_gap2[j].i, 0, 0); + p_l_ali_gap2[j].i = _MM_INSERT_EPI16(p_l_ali_gap2[j].i, max, 0); + } + } + else + { + p_gap2[j].i = _MM_LOADU_SI128(p_gap1[j].s16+1); + p_l_ali_gap2[j].i = _MM_LOADU_SI128(p_l_ali_gap1[j].s16+1); + if (j == numberOfRegistersPerLine - 1) + { + p_gap2[j].i = _MM_INSERT_EPI16(p_gap2[j].i, 0, 7); + p_l_ali_gap2[j].i = _MM_INSERT_EPI16(p_l_ali_gap2[j].i, max, 7); + } + } + } + // end shifts for gap2 + + } + +/* /// Recovering LCS and alignment lengths \\\ */ + + // finding the location of the results in the registers : + diff = l1-l2; + if ((diff & 1) && odd_BLL) + l_loc = (int) floor((double)(bandLengthLeft) / (double)2) - floor((double)(diff) / (double)2); + else + l_loc = (int) floor((double)(bandLengthLeft) / (double)2) - ceil((double)(diff) / (double)2); + + l_reg = (int)floor((double)l_loc/(double)8.0); + //fprintf(stderr, "\nl_reg = %d, l_loc = %d\n", l_reg, l_loc); + l_loc = l_loc - l_reg*8; + + // extracting the results from the registers : + *lcs_length = extract_reg(p_gap1[l_reg].i, l_loc); + *ali_length = extract_reg(p_l_ali_gap1[l_reg].i, l_loc) - 1; + + // freeing the registers + free(SSEregisters); +} + + +double sse_banded_align_just_lcs(int16_t* seq1, int16_t* seq2, int l1, int l2, int bandLengthLeft, int bandLengthTotal) +{ + register int j; + int k1, k2; + int diff; + int l_reg, l_loc; + int16_t l_lcs; + int line; + int numberOfRegistersPerLine; + int numberOfRegistersFor3Lines; + + BOOL even_line; + BOOL odd_line; + BOOL even_BLL; + BOOL odd_BLL; + + um128* SSEregisters; + um128* p_diag; + um128* p_gap1; + um128* p_gap2; + um128* p_diag_j; + um128* p_gap1_j; + um128* p_gap2_j; + um128 current; + + um128 nucs1; + um128 nucs2; + um128 scores; + + // Initialisations + + odd_BLL = bandLengthLeft & 1; + even_BLL = !odd_BLL; + + numberOfRegistersPerLine = bandLengthTotal / 8; + numberOfRegistersFor3Lines = 3 * numberOfRegistersPerLine; + + SSEregisters = malloc(numberOfRegistersFor3Lines * sizeof(um128)); + + // preparer registres SSE + + for (j=0; j 0) + { + if (normalize) + { + if (reference == MINLEN) + LCSmin = threshold*l2; + else // ref = maxlen or alilen + LCSmin = threshold*l1; + } + else if (lcsmode) + LCSmin = threshold; + else if ((reference == MINLEN)) // not lcsmode + LCSmin = l2 - threshold; + else // not lcsmode and ref = maxlen or alilen + LCSmin = l1 - threshold; + } + else + LCSmin = 0; + + return(LCSmin); +} + + +int calculateSSEBandLength(int bandLengthRight, int bandLengthLeft) +{ +// *bandLengthTotal= (double) floor(bandLengthRight + bandLengthLeft) / 2.0 + 1; + int bandLengthTotal= (double)(bandLengthRight + bandLengthLeft) / 2.0 + 1.0; + + return (bandLengthTotal & (~ (int)7)) + (( bandLengthTotal & (int)7) ? 8:0); // Calcule le multiple de 8 superieur +} + + +int calculateSizeToAllocate(int maxLen, int minLen, int LCSmin) +{ + int size; + int notUsed; + + calculateBandLengths(maxLen, minLen, ¬Used, &size, LCSmin); // max size = max left band length * 2 + + //fprintf(stderr, "\nsize for address before %8 = %d", size); + + size*= 2; + size = (size & (~ (int)7)) + (( size & (int)7) ? 8:0); // Calcule le multiple de 8 superieur + size*= 3; + size+= 16; + + //fprintf(stderr, "\nsize for address = %d", size); + + return(size*sizeof(int16_t)); +} + + +void iniSeq(int16_t* seq, int size, int16_t iniValue) +{ + int16_t *target=seq; + int16_t *end = target + (size_t)size; + + for (; target < end; target++) + *target = iniValue; +} + + +void putSeqInSeq(int16_t* seq, char* s, int l, BOOL reverse) +{ + int16_t *target=seq; + int16_t *end = target + (size_t)l; + char *source=s; + + if (reverse) + for (source=s + (size_t)l-1; target < end; target++, source--) + *target=*source; + else + for (; target < end; source++,target++) + *target=*source; +} + + +void initializeAddressWithGaps(int16_t* address, int bandLengthTotal, int bandLengthLeft, int l1) +{ + int i; + int address_00, x_address_10, address_01, address_01_shifted; + int numberOfRegistersPerLine; + int bm; + int value=INT16_MAX-l1; + + numberOfRegistersPerLine = bandLengthTotal / 8; + bm = bandLengthLeft%2; + + for (i=0; i < (3*numberOfRegistersPerLine*8); i++) + address[i] = value; + + // 0,0 set to 1 and 0,1 and 1,0 set to 2 + + address_00 = bandLengthLeft / 2; + + x_address_10 = address_00 + bm - 1; + address_01 = numberOfRegistersPerLine*8 + x_address_10; + + address_01_shifted = numberOfRegistersPerLine*16 + address_00 - bm; + + // fill address_00, address_01,+1, address_01_shifted,+1 + + address[address_00] = 1; + address[address_01] = 2; + address[address_01+1] = 2; + address[address_01_shifted] = 2; + address[address_01_shifted+1] = 2; +} + + +double sse_banded_lcs_align(int16_t* seq1, int16_t* seq2, int l1, int l2, BOOL normalize, int reference, BOOL lcsmode, int16_t* address, int LCSmin) +{ + double id; + int bandLengthRight, bandLengthLeft, bandLengthTotal; + int ali_length; + + //fprintf(stderr, "\nl1 = %d, l2 = %d\n", l1, l2); + + calculateBandLengths(l1, l2, &bandLengthRight, &bandLengthLeft, LCSmin); + + //fprintf(stderr, "\nBLL = %d, BLR = %d, LCSmin = %d\n", bandLengthLeft, bandLengthRight, LCSmin); + + bandLengthTotal = calculateSSEBandLength(bandLengthRight, bandLengthLeft); + + //fprintf(stderr, "\nBLT = %d\n", bandLengthTotal); + + if ((reference == ALILEN) && (normalize || !lcsmode)) + { + initializeAddressWithGaps(address, bandLengthTotal, bandLengthLeft, l1); + sse_banded_align_lcs_and_ali_len(seq1, seq2, l1, l2, bandLengthLeft, bandLengthTotal, address, &id, &ali_length); + } + else + id = sse_banded_align_just_lcs(seq1, seq2, l1, l2, bandLengthLeft, bandLengthTotal); + + //fprintf(stderr, "\nid before normalizations = %f", id); + + //fprintf(stderr, "\nlcs = %f, ali = %d\n", id, ali_length); + + if (!lcsmode && !normalize) + switch(reference) { + case ALILEN: id = ali_length - id; + break; + case MAXLEN: id = l1 - id; + break; + case MINLEN: id = l2 - id; + } + + //fprintf(stderr, "\n2>>> %f, %d\n", id, ali_length); + if (normalize) + switch(reference) { + case ALILEN: id = id / (double) ali_length; + break; + case MAXLEN: id = id / (double) l1; + break; + case MINLEN: id = id / (double) l2; + } + + //fprintf(stderr, "\nid = %f\n", id); + return(id); +} + + +double generic_sse_banded_lcs_align(char* seq1, char* seq2, double threshold, BOOL normalize, int reference, BOOL lcsmode, int16_t** address, int* buffer_size, int16_t** iseq1, + int16_t** iseq2, int* buffer_sizeS) +{ + double id; + int l1; + int l2; + int lmax, lmin; + int sizeToAllocateForBand; + int maxBLL, notUsed; + int sizeToAllocateForSeqs; + int LCSmin; + + l1 = strlen(seq1); + l2 = strlen(seq2); + + if (l2 > l1) + { + lmax = l1; + lmin = l2; + } + else + { + lmax = l2; + lmin = l1; + } + + if (!lcsmode && (normalize==TRUE)) + { + threshold = 1.0 - threshold; + } + + LCSmin = calculateLCSmin(lmax, lmin, threshold, normalize, reference, lcsmode); + +// Allocating space for matrix band if the alignment must be computed + + if ((reference == ALILEN) && ((lcsmode && normalize) || (!lcsmode))) // checking if alignment must be computed + { + sizeToAllocateForBand = calculateSizeToAllocate(lmax, lmin, LCSmin); + + if (sizeToAllocateForBand > (*buffer_size)) + { + // reallocating if needed + address = reallocA16Address(*address, sizeToAllocateForBand); + } + } + +// Allocating space for the int16_t arrays representing the sequences + + calculateBandLengths(lmax, lmin, ¬Used, &maxBLL, LCSmin); + + sizeToAllocateForSeqs = 2*maxBLL+lmax; + + if (sizeToAllocateForSeqs > *buffer_sizeS) + { + (*(iseq1)) = realloc((*(iseq1)), sizeToAllocateForSeqs*sizeof(int16_t)); + (*(iseq2)) = realloc((*(iseq2)), sizeToAllocateForSeqs*sizeof(int16_t)); + } + + iniSeq(*(iseq1), maxBLL, 0); + iniSeq(*(iseq2), maxBLL, 255); + *(iseq1) = *(iseq1)+maxBLL; + *(iseq2) = *(iseq2)+maxBLL; + + // longest seq must be first argument of sse_align function + if (l2 > l1) + { + putSeqInSeq((*(iseq1)), seq2, l2, TRUE); + putSeqInSeq((*(iseq2)), seq1, l1, FALSE); + id = sse_banded_lcs_align(*(iseq1), *(iseq2), l2, l1, normalize, reference, lcsmode, *address, LCSmin); + } + else + { + putSeqInSeq((*(iseq1)), seq1, l1, TRUE); + putSeqInSeq((*(iseq2)), seq2, l2, FALSE); + id = sse_banded_lcs_align(*(iseq1), *(iseq2), l1, l2, normalize, reference, lcsmode, *address, LCSmin); + } + + return(id); +} + + +int prepareTablesForSumathings(int lmax, int lmin, double threshold, BOOL normalize, int reference, BOOL lcsmode, + int16_t** address, int16_t** iseq1, int16_t** iseq2) +{ + int sizeToAllocateForBand; + int maxBLL; + int notUsed; + int sizeToAllocateForSeqs; + int LCSmin; + + LCSmin = calculateLCSmin(lmax, lmin, threshold, normalize, reference, lcsmode); + + // Allocating space for matrix band if the alignment must be computed + + if ((reference == ALILEN) && (normalize || !lcsmode)) // checking if alignment must be computed + { + sizeToAllocateForBand = calculateSizeToAllocate(lmax, lmin, LCSmin); + (*(address)) = getA16Address(sizeToAllocateForBand); + } + + // Allocating space for the int16_t arrays representing the sequences + + calculateBandLengths(lmax, lmin, ¬Used, &maxBLL, LCSmin); + + sizeToAllocateForSeqs = 2*maxBLL+lmax; + (*(iseq1)) = malloc(sizeToAllocateForSeqs*sizeof(int16_t)); + (*(iseq2)) = malloc(sizeToAllocateForSeqs*sizeof(int16_t)); + + iniSeq(*(iseq1), maxBLL, 0); + iniSeq(*(iseq2), maxBLL, 255); + *(iseq1) = *(iseq1)+maxBLL; + *(iseq2) = *(iseq2)+maxBLL; + + return(maxBLL+lmax); +} + + +double alignForSumathings(char* seq1, int16_t* iseq1, char* seq2, int16_t* iseq2, int l1, int l2, + BOOL normalize, int reference, BOOL lcsmode, int16_t* address, int sizeForSeqs, int LCSmin) +{ + double id; + + iniSeq(iseq1, sizeForSeqs, 0); + iniSeq(iseq2, sizeForSeqs, 255); + + if (l2 > l1) + { + putSeqInSeq(iseq1, seq2, l2, TRUE); + putSeqInSeq(iseq2, seq1, l1, FALSE); + id = sse_banded_lcs_align(iseq1, iseq2, l2, l1, normalize, reference, lcsmode, address, LCSmin); + } + else + { + putSeqInSeq(iseq1, seq1, l1, TRUE); + putSeqInSeq(iseq2, seq2, l2, FALSE); + id = sse_banded_lcs_align(iseq1, iseq2, l1, l2, normalize, reference, lcsmode, address, LCSmin); + } + + return(id); +} + diff --git a/liblcs/sse_banded_LCS_alignment.h b/liblcs/sse_banded_LCS_alignment.h new file mode 100644 index 0000000..95f50b0 --- /dev/null +++ b/liblcs/sse_banded_LCS_alignment.h @@ -0,0 +1,24 @@ +/* + * sse_banded_LCS_alignment.h + * + * Created on: november 29, 2012 + * Author: mercier + */ + +#ifndef SSE_BANDED_LCS_ALIGNMENT_H_ +#define SSE_BANDED_LCS_ALIGNMENT_H_ +#include + +double sse_banded_lcs_align(int16_t* seq1, int16_t* seq2, int l1, int l2, BOOL normalize, int reference, BOOL lcsmode, int16_t* address, int LCSmin); +int calculateSizeToAllocate(int maxLen, int minLen, int LCSmin); +void calculateThresholdFromErrorNumber(int error, int length, double* threshold); +void iniSeq(int16_t* seq, int size, int16_t iniValue); +void putSeqInSeq(int16_t* seq, char* s, int l, BOOL reverse); +double generic_sse_banded_lcs_align(char* seq1, char* seq2, double threshold, BOOL normalize, int reference, BOOL lcsmode, int16_t** address, int* buffer_size, int16_t** iseq1, + int16_t** iseq2, int* buffer_sizeS); +int prepareTablesForSumathings(int lmax, int lmin, double threshold, BOOL normalize, int reference, BOOL lcsmode, + int16_t** address, int16_t** iseq1, int16_t** iseq2); +double alignForSumathings(char* seq1, int16_t* iseq1, char* seq2, int16_t* iseq2, int l1, int l2, BOOL normalize, + int reference, BOOL lcsmode, int16_t* address, int sizeForSeqs, int LCSmin); +int calculateLCSmin(int l1, int l2, double threshold, BOOL normalize, int reference, BOOL lcsmode); +#endif diff --git a/liblcs/upperband.c b/liblcs/upperband.c new file mode 100644 index 0000000..4948bd0 --- /dev/null +++ b/liblcs/upperband.c @@ -0,0 +1,382 @@ +#include "../libsse/_sse.h" +#include +#include +#include "../libutils/utilities.h" +#include "../libfasta/sequence.h" +#include "sse_banded_LCS_alignment.h" + + +inline static uchar_v hash4m128(uchar_v frag) +{ + uchar_v words; + + vUInt8 mask_03= _MM_SET1_EPI8(0x03); // charge le registre avec 16x le meme octet + vUInt8 mask_FC= _MM_SET1_EPI8(0xFC); + + frag.m = _MM_SRLI_EPI64(frag.m,1); // shift logic a droite sur 2 x 64 bits + frag.m = _MM_AND_SI128(frag.m,mask_03); // and sur les 128 bits + + + words.m= _MM_SLLI_EPI64(frag.m,2); + words.m= _MM_AND_SI128(words.m,mask_FC); + frag.m = _MM_SRLI_SI128(frag.m,1); + words.m= _MM_OR_SI128(words.m,frag.m); + + words.m= _MM_SLLI_EPI64(words.m,2); + words.m= _MM_AND_SI128(words.m,mask_FC); + frag.m = _MM_SRLI_SI128(frag.m,1); + words.m= _MM_OR_SI128(words.m,frag.m); + + words.m= _MM_SLLI_EPI64(words.m,2); + words.m= _MM_AND_SI128(words.m,mask_FC); + frag.m = _MM_SRLI_SI128(frag.m,1); + words.m= _MM_OR_SI128(words.m,frag.m); + + return words; +} + +#ifdef __SSE2__ + +inline static int anyzerom128(vUInt8 data) +{ + vUInt8 mask_00= _MM_SETZERO_SI128(); + uint64_v tmp; + tmp.m = _MM_CMPEQ_EPI8(data,mask_00); + return (int)(tmp.c[0]!=0 || tmp.c[1]!=0); +} + +#else + +inline static int anyzerom128(vUInt8 data) +{ + int i; + um128 tmp; + tmp.i = data; + for (i=0;i<8;i++) + if (tmp.s8[i]==0) + return 1; + return 0; +} + +#endif + +inline static void dumpm128(unsigned short *table,vUInt8 data) +{ + memcpy(table,&data,16); +} + +/** + * Compute 4mer occurrence table from a DNA sequence + * + * sequence : a pointer to the null terminated nuc sequence + * table : a pointer to a 256 cells unisgned char table for + * storing the occurrence table + * count : pointer to an int value used as a return value + * containing the global word counted + * + * returns the number of words observed in the sequence with a + * count greater than 255. + */ + +int buildTable(const char* sequence, unsigned char *table, int *count) +{ + int overflow = 0; + int wc=0; + int i; + vUInt8 mask_00= _MM_SETZERO_SI128(); + + uchar_v frag; + uchar_v words; + uchar_v zero; + + char* s; + + s=(char*)sequence; + + memset(table,0,256*sizeof(unsigned char)); + + // encode ascii sequence with A : 00 C : 01 T: 10 G : 11 + + for(frag.m=_MM_LOADU_SI128((vUInt8*)s); + ! anyzerom128(frag.m); + s+=12,frag.m=_MM_LOADU_SI128((vUInt8*)s)) + { + words= hash4m128(frag); + + // printf("%d %d %d %d\n",words.c[0],words.c[1],words.c[2],words.c[3]); + + if (table[words.c[0]]<255) table[words.c[0]]++; else overflow++; + if (table[words.c[1]]<255) table[words.c[1]]++; else overflow++; + if (table[words.c[2]]<255) table[words.c[2]]++; else overflow++; + if (table[words.c[3]]<255) table[words.c[3]]++; else overflow++; + if (table[words.c[4]]<255) table[words.c[4]]++; else overflow++; + if (table[words.c[5]]<255) table[words.c[5]]++; else overflow++; + if (table[words.c[6]]<255) table[words.c[6]]++; else overflow++; + if (table[words.c[7]]<255) table[words.c[7]]++; else overflow++; + if (table[words.c[8]]<255) table[words.c[8]]++; else overflow++; + if (table[words.c[9]]<255) table[words.c[9]]++; else overflow++; + if (table[words.c[10]]<255) table[words.c[10]]++; else overflow++; + if (table[words.c[11]]<255) table[words.c[11]]++; else overflow++; + + wc+=12; + } + + zero.m=_MM_CMPEQ_EPI8(frag.m,mask_00); + //printf("frag=%d %d %d %d\n",frag.c[0],frag.c[1],frag.c[2],frag.c[3]); + //printf("zero=%d %d %d %d\n",zero.c[0],zero.c[1],zero.c[2],zero.c[3]); + words = hash4m128(frag); + + if (zero.c[0]+zero.c[1]+zero.c[2]+zero.c[3]==0) + for(i=0;zero.c[i+3]==0;i++,wc++) + if (table[words.c[i]]<255) table[words.c[i]]++; else overflow++; + + if (count) *count=wc; + return overflow; +} + +static inline vUInt16 partialminsum(vUInt8 ft1,vUInt8 ft2) +{ + vUInt8 mini; + vUInt16 minilo; + vUInt16 minihi; + vUInt8 mask_00= _MM_SETZERO_SI128(); + + mini = _MM_MIN_EPU8(ft1,ft2); + minilo = _MM_UNPACKLO_EPI8(mini,mask_00); + minihi = _MM_UNPACKHI_EPI8(mini,mask_00); + + return _MM_ADDS_EPU16(minilo,minihi); +} + +int compareTable(unsigned char *t1, int over1, unsigned char* t2, int over2) +{ + vUInt8 ft1; + vUInt8 ft2; + vUInt8 *table1=(vUInt8*)t1; + vUInt8 *table2=(vUInt8*)t2; + ushort_v summini; + int i; + int total; + + ft1 = _MM_LOADU_SI128(table1); + ft2 = _MM_LOADU_SI128(table2); + summini.m = partialminsum(ft1,ft2); + table1++; + table2++; + + + for (i=1;i<16;i++,table1++,table2++) + { + ft1 = _MM_LOADU_SI128(table1); + ft2 = _MM_LOADU_SI128(table2); + summini.m = _MM_ADDS_EPU16(summini.m,partialminsum(ft1,ft2)); + + } + + // Finishing the sum process + + summini.m = _MM_ADDS_EPU16(summini.m,_MM_SRLI_SI128(summini.m,8)); // sum the 4 firsts with the 4 lasts + summini.m = _MM_ADDS_EPU16(summini.m,_MM_SRLI_SI128(summini.m,4)); + + total = summini.c[0]+summini.c[1]; + total+= (over1 < over2) ? over1:over2; + + return total; +} + +int threshold4(int wordcount,double identity) +{ + int error; + int lmax; + + wordcount+=3; + error = (int)floor((double)wordcount * ((double)1.0-identity)); + lmax = (wordcount - error) / (error + 1); + if (lmax < 4) + return 0; + return (lmax - 3) \ + * (error + 1) \ + + ((wordcount - error) % (error + 1)); +} + + +int thresholdLCS4(int32_t reflen,int32_t lcs) +{ + int nbfrag; + int smin; + int R; + int common; + + nbfrag = (reflen - lcs)*2 + 1; + smin = lcs/nbfrag; + R = lcs - smin * nbfrag; + common = MAX(smin - 2,0) * R + MAX(smin - 3,0) * (nbfrag - R); + return common; +} + + +int hashDB(fastaSeqCount db) +{ + int32_t i; + int32_t count; + + fprintf(stderr,"Indexing dataset..."); + + for (i=0; i < db.count;i++) + { + db.fastaSeqs[i].table = util_malloc((256)*sizeof(unsigned char), __FILE__, __LINE__); + db.fastaSeqs[i].over = buildTable((const char*)(db.fastaSeqs[i].sequence), + db.fastaSeqs[i].table, + &count); + } + + fprintf(stderr," : Done\n"); + + return db.count; +} + + +BOOL isPossible(fastaSeqPtr seq1, fastaSeqPtr seq2, double threshold, BOOL normalize, int reference, BOOL lcsmode) +{ + int32_t reflen; + int32_t maxlen; + int32_t lcs; + int32_t mincount; + + if (seq1->length < 12 || seq2->length < 12) + return TRUE; + + maxlen = MAX(seq1->length,seq2->length); + + if (reference==ALILEN || reference==MAXLEN) + reflen = maxlen; + else + reflen = MIN(seq1->length,seq2->length); + + if (normalize) + { + if (! lcsmode) + threshold = 1. - threshold; + + lcs = (int32_t)ceil((double)reflen * threshold); + } + else + { + if (! lcsmode) + threshold = reflen - threshold; + lcs = (int32_t) threshold; + } + + if (lcs > MIN(seq1->length,seq2->length)) + return FALSE; + + mincount = thresholdLCS4(maxlen,lcs); + + return compareTable(seq1->table,seq1->over,seq2->table,seq2->over) >=mincount; +} + + +BOOL isPossibleSumathings(fastaSeqPtr seq1, fastaSeqPtr seq2, int l1, int l2, double threshold, BOOL normalize, int reference, BOOL lcsmode) +{ // optimized version of the filter for sumaclust and sumatra + + int32_t reflen; + int32_t lcs; + int32_t mincount; + + if (l1 < 12 || l2 < 12) + return TRUE; + + if (reference==ALILEN || reference==MAXLEN) + reflen = l1; + else + reflen = l2; + + if (normalize) + lcs = (int32_t)ceil((double)reflen * threshold); + else + { + if (! lcsmode) + threshold = reflen - threshold; + lcs = (int32_t) threshold; + } + + mincount = thresholdLCS4(l1,lcs); + + return compareTable(seq1->table,seq1->over,seq2->table,seq2->over) >=mincount; +} + + +void filters(fastaSeqPtr seq1, fastaSeqPtr seq2, double threshold, BOOL normalize, int reference, BOOL lcsmode, double* score, int* LCSmin) +{ // score takes value -1 if filters are passed. score must be initialized in calling function. + int l1; + int l2; + + l1 = seq1->length; + l2 = seq2->length; + + if (l1 >= l2) + { + *LCSmin = calculateLCSmin(l1, l2, threshold, normalize, reference, lcsmode); + if (l2 >= *LCSmin) + { + if (isPossibleSumathings(seq1, seq2, l1, l2, threshold, normalize, reference, lcsmode)) // 4-mers filter + *score = -1.0; + } + } + else + { + *LCSmin = calculateLCSmin(l2, l1, threshold, normalize, reference, lcsmode); + if (l1 >= *LCSmin) + { + if (isPossibleSumathings(seq2, seq1, l2, l1, threshold, normalize, reference, lcsmode)) // 4-mers filter + *score = -1.0; + } + } +} + + +void filtersSumatra(fastaSeqPtr seq1, fastaSeqPtr seq2, double threshold, BOOL normalize, int reference, BOOL lcsmode, double* score, int* LCSmin) +{ // score takes value -2 if filters are not passed, -1 if filters are passed and >= 0 with max score if the 2 sequences are identical. + + int l1; + int l2; + l1 = seq1->length; + + *score = -2.0; + + if (strcmp(seq1->sequence, seq2->sequence) == 0) // the 2 sequences are identical + { + if (lcsmode && normalize) + *score = 1.0; + else if (!lcsmode) + *score = 0.0; + else + *score = l1; + } + + else if (threshold != 0) + { + l2 = seq2->length; + + if (l1 >= l2) + { + *LCSmin = calculateLCSmin(l1, l2, threshold, normalize, reference, lcsmode); + if (l2 >= *LCSmin) + { + if (isPossibleSumathings(seq1, seq2, l1, l2, threshold, normalize, reference, lcsmode)) // 4-mers filter + *score = -1.0; + } + } + else + { + *LCSmin = calculateLCSmin(l2, l1, threshold, normalize, reference, lcsmode); + if (l1 >= *LCSmin) + { + if (isPossibleSumathings(seq2, seq1, l2, l1, threshold, normalize, reference, lcsmode)) // 4-mers filter + *score = -1.0; + } + } + } + else + *LCSmin = 0; +} diff --git a/liblcs/upperband.h b/liblcs/upperband.h new file mode 100644 index 0000000..cded693 --- /dev/null +++ b/liblcs/upperband.h @@ -0,0 +1,18 @@ + +#ifndef UPPERBAND_H_ +#define UPPERBAND_H_ + + +int buildTable(const char *sequence, unsigned char *table, int *count); +int compareTable(unsigned char *t1, int over1, unsigned char* t2, int over2); +int threshold4(int wordcount,double identity); +int thresholdLCS4(int32_t reflen,int32_t lcs); + + +int hashDB(fastaSeqCount); +BOOL isPossible(fastaSeqPtr, fastaSeqPtr, BOOL, int, double, BOOL); +BOOL isPossibleSumathings(fastaSeqPtr seq1, fastaSeqPtr seq2, int l1, int l2, double threshold, BOOL normalize, int reference, BOOL lcsmode); +void filters(fastaSeqPtr seq1, fastaSeqPtr seq2, double threshold, BOOL normalize, int reference, BOOL lcsmode, double* score, int* LCSmin); +void filtersSumatra(fastaSeqPtr seq1, fastaSeqPtr seq2, double threshold, BOOL normalize, int reference, BOOL lcsmode, double* score, int* LCSmin); +#endif + diff --git a/libsse/.DS_Store b/libsse/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6 GIT binary patch literal 6148 zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0 + +#include +#ifdef __SSE2__ +#include +#else +typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__)); +#endif /* __SSE2__ */ + +#ifndef MAX +#define MAX(x,y) (((x)>(y)) ? (x):(y)) +#define MIN(x,y) (((x)<(y)) ? (x):(y)) +#endif + +#define ALIGN __attribute__((aligned(16))) +typedef __m128i vUInt8; +typedef __m128i vInt8; + +typedef __m128i vUInt16; +typedef __m128i vInt16; + +typedef __m128i vUInt64; + +typedef union +{ + __m128i i; + int64_t s64[ 2]; + int16_t s16[ 8]; + int8_t s8 [16]; + uint8_t u8 [16]; + uint16_t u16[8 ]; + uint32_t u32[4 ]; + uint64_t u64[2 ]; +} um128; + +typedef union + { + vUInt8 m; + uint8_t c[16]; + } uchar_v; + +typedef union + { + vUInt16 m; + uint16_t c[8]; + } ushort_v; + +typedef union + { + vUInt64 m; + uint64_t c[2]; + } uint64_v; + + +#ifdef __SSE2__ + +static inline int8_t _s2_extract_epi8(__m128i r, const int p) +{ +#define ACTIONP(r,x) return _mm_extract_epi16(r,x) & 0xFF +#define ACTIONI(r,x) return _mm_extract_epi16(r,x) >> 8 + switch (p) { + case 0: ACTIONP(r,0); + case 1: ACTIONI(r,0); + case 2: ACTIONP(r,1); + case 3: ACTIONI(r,1); + case 4: ACTIONP(r,2); + case 5: ACTIONI(r,2); + case 6: ACTIONP(r,3); + case 7: ACTIONI(r,3); + case 8: ACTIONP(r,4); + case 9: ACTIONI(r,4); + case 10: ACTIONP(r,5); + case 11: ACTIONI(r,5); + case 12: ACTIONP(r,6); + case 13: ACTIONI(r,6); + case 14: ACTIONP(r,7); + case 15: ACTIONI(r,7); + } +#undef ACTIONP +#undef ACTIONI + + return 0; +} + +static inline __m128i _s2_max_epi8(__m128i a, __m128i b) +{ + __m128i mask = _mm_cmpgt_epi8( a, b ); + a = _mm_and_si128 (a,mask ); + b = _mm_andnot_si128(mask,b); + return _mm_or_si128(a,b); +} + +static inline __m128i _s2_min_epi8(__m128i a, __m128i b) +{ + __m128i mask = _mm_cmplt_epi8( a, b ); + a = _mm_and_si128 (a,mask ); + b = _mm_andnot_si128(mask,b); + return _mm_or_si128(a,b); +} + +static inline __m128i _s2_insert_epi8(__m128i r, int b, const int p) +{ +#define ACTIONP(r,x) return _mm_insert_epi16(r,(_mm_extract_epi16(r,x) & 0xFF00) | (b & 0x00FF),x) +#define ACTIONI(r,x) return _mm_insert_epi16(r,(_mm_extract_epi16(r,x) & 0x00FF) | ((b << 8)& 0xFF00),x) + switch (p) { + case 0: ACTIONP(r,0); + case 1: ACTIONI(r,0); + case 2: ACTIONP(r,1); + case 3: ACTIONI(r,1); + case 4: ACTIONP(r,2); + case 5: ACTIONI(r,2); + case 6: ACTIONP(r,3); + case 7: ACTIONI(r,3); + case 8: ACTIONP(r,4); + case 9: ACTIONI(r,4); + case 10: ACTIONP(r,5); + case 11: ACTIONI(r,5); + case 12: ACTIONP(r,6); + case 13: ACTIONI(r,6); + case 14: ACTIONP(r,7); + case 15: ACTIONI(r,7); + } +#undef ACTIONP +#undef ACTIONI + + return _mm_setzero_si128(); +} + +// Fill a SSE Register with 16 time the same 8bits integer value +#define _MM_SET1_EPI8(x) _mm_set1_epi8(x) +#define _MM_INSERT_EPI8(r,x,i) _s2_insert_epi8((r),(x),(i)) +#define _MM_CMPEQ_EPI8(x,y) _mm_cmpeq_epi8((x),(y)) +#define _MM_CMPGT_EPI8(x,y) _mm_cmpgt_epi8((x),(y)) +#define _MM_CMPLT_EPI8(x,y) _mm_cmplt_epi8((x),(y)) +#define _MM_MAX_EPI8(x,y) _s2_max_epi8((x),(y)) +#define _MM_MIN_EPI8(x,y) _s2_min_epi8((x),(y)) +#define _MM_ADD_EPI8(x,y) _mm_add_epi8((x),(y)) +#define _MM_SUB_EPI8(x,y) _mm_sub_epi8((x),(y)) +#define _MM_EXTRACT_EPI8(r,p) _s2_extract_epi8((r),(p)) + +#define _MM_MIN_EPU8(x,y) _mm_min_epu8((x),(y)) + +// Fill a SSE Register with 8 time the same 16bits integer value +#define _MM_SET1_EPI16(x) _mm_set1_epi16(x) + +#define _MM_INSERT_EPI16(r,x,i) _mm_insert_epi16((r),(x),(i)) +#define _MM_CMPEQ_EPI16(x,y) _mm_cmpeq_epi16((x),(y)) +#define _MM_CMPGT_EPI16(x,y) _mm_cmpgt_epi16((x),(y)) +#define _MM_CMPGT_EPU16(x,y) _mm_cmpgt_epu16((x),(y)) // n'existe pas ?? +#define _MM_CMPLT_EPI16(x,y) _mm_cmplt_epi16((x),(y)) +#define _MM_MAX_EPI16(x,y) _mm_max_epi16((x),(y)) +#define _MM_MIN_EPI16(x,y) _mm_min_epi16((x),(y)) +#define _MM_ADD_EPI16(x,y) _mm_add_epi16((x),(y)) +#define _MM_SUB_EPI16(x,y) _mm_sub_epi16((x),(y)) +#define _MM_EXTRACT_EPI16(r,p) _mm_extract_epi16((r),(p)) +#define _MM_UNPACKLO_EPI8(a,b) _mm_unpacklo_epi8((a),(b)) +#define _MM_UNPACKHI_EPI8(a,b) _mm_unpackhi_epi8((a),(b)) +#define _MM_ADDS_EPU16(x,y) _mm_adds_epu16((x),(y)) + +// Multiplication +#define _MM_MULLO_EPI16(x,y) _mm_mullo_epi16((x), (y)) + +#define _MM_SRLI_EPI64(r,x) _mm_srli_epi64((r),(x)) +#define _MM_SLLI_EPI64(r,x) _mm_slli_epi64((r),(x)) + +// Set a SSE Register to 0 +#define _MM_SETZERO_SI128() _mm_setzero_si128() + +#define _MM_AND_SI128(x,y) _mm_and_si128((x),(y)) +#define _MM_ANDNOT_SI128(x,y) _mm_andnot_si128((x),(y)) +#define _MM_OR_SI128(x,y) _mm_or_si128((x),(y)) +#define _MM_XOR_SI128(x,y) _mm_xor_si128((x),(y)) +#define _MM_SLLI_SI128(r,s) _mm_slli_si128((r),(s)) +#define _MM_SRLI_SI128(r,s) _mm_srli_si128((r),(s)) + +// Load a SSE register from an unaligned address +#define _MM_LOADU_SI128(x) _mm_loadu_si128(x) + +// Load a SSE register from an aligned address (/!\ not defined when SSE not available) +#define _MM_LOAD_SI128(x) _mm_load_si128(x) + +// #define _MM_UNPACKLO_EPI8(x,y) _mm_unpacklo_epi8((x),(y)) + +#else /* __SSE2__ Not defined */ + +static inline __m128i _em_set1_epi8(int x) +{ + um128 a; + + x&=0xFF; + a.s8[0]=x; + a.s8[1]=x; + a.u16[1]=a.u16[0]; + a.u32[1]=a.u32[0]; + a.u64[1]=a.u64[0]; + + return a.i; +} + +static inline __m128i _em_insert_epi8(__m128i r, int x, const int i) +{ + um128 a; + a.i=r; + a.s8[i]=x & 0xFF; + return a.i; +} + +static inline __m128i _em_cmpeq_epi8(__m128i a, __m128i b) +{ + um128 x; + um128 y; + um128 r; + + x.i=a; + y.i=b; + +#define R(z) r.s8[z]=(x.s8[z]==y.s8[z]) ? 0xFF:0 + R(0); + R(1); + R(2); + R(3); + R(4); + R(5); + R(6); + R(7); + R(8); + R(9); + R(10); + R(11); + R(12); + R(13); + R(14); + R(15); +#undef R + + return r.i; +} + +static inline __m128i _em_cmpgt_epi8(__m128i a, __m128i b) +{ + um128 x; + um128 y; + um128 r; + + x.i=a; + y.i=b; + +#define R(z) r.s8[z]=(x.s8[z]>y.s8[z]) ? 0xFF:0 + R(0); + R(1); + R(2); + R(3); + R(4); + R(5); + R(6); + R(7); + R(8); + R(9); + R(10); + R(11); + R(12); + R(13); + R(14); + R(15); +#undef R + + return r.i; +} + +static inline __m128i _em_cmplt_epi8(__m128i a, __m128i b) +{ + um128 x; + um128 y; + um128 r; + + x.i=a; + y.i=b; + +#define R(z) r.s8[z]=(x.s8[z]y.s16[z]) ? 0xFFFF:0 + R(0); + R(1); + R(2); + R(3); + R(4); + R(5); + R(6); + R(7); +#undef R + + return r.i; +} + +static inline __m128i _em_cmplt_epi16(__m128i a, __m128i b) +{ + um128 x; + um128 y; + um128 r; + + x.i=a; + y.i=b; + +#define R(z) r.s16[z]=(x.s16[z]>=b; + x.s64[1]>>=b; + + return x.i; +} + +static inline __m128i _em_slli_epi64(__m128i a, int b) +{ + um128 x; + + x.i=a; + + x.s64[0]<<=b; + x.s64[1]<<=b; + + return x.i; +} + +static inline __m128i _em_setzero_si128() +{ + um128 x; + + x.s64[0]=x.s64[1]=0; + + return x.i; +} + +static inline __m128i _em_and_si128(__m128i a, __m128i b) +{ + um128 x; + um128 y; + um128 r; + + x.i=a; + y.i=b; + + +#define R(z) r.u64[z]=x.u64[z] & y.u64[z] + R(0); + R(1); +#undef R + + return r.i; +} + +static inline __m128i _em_andnot_si128(__m128i a, __m128i b) +{ + um128 x; + um128 y; + um128 r; + + x.i=a; + y.i=b; + + +#define R(z) r.u64[z]=(~x.u64[z]) & y.u64[z] + R(0); + R(1); +#undef R + + return r.i; +} + +static inline __m128i _em_or_si128(__m128i a, __m128i b) +{ + um128 x; + um128 y; + um128 r; + + x.i=a; + y.i=b; + +#define R(z) r.u64[z]=x.u64[z] | y.u64[z] + R(0); + R(1); +#undef R + + return r.i; +} + +static inline __m128i _em_xor_si128(__m128i a, __m128i b) +{ + um128 x; + um128 y; + um128 r; + + x.i=a; + y.i=b; + +#define R(z) r.u64[z]=x.u64[z] ^ y.u64[z] + R(0); + R(1); +#undef R + + return r.i; +} + +static inline __m128i _em_slli_si128(__m128i a, int b) +{ + um128 x; + + x.i=a; + +#define R(z) x.u8[z]=(z>=b) ? x.u8[z-b]:0 + R(15); + R(14); + R(13); + R(12); + R(11); + R(10); + R(9); + R(8); + R(7); + R(6); + R(5); + R(4); + R(3); + R(2); + R(1); + R(0); +#undef R + + return x.i; +} + +static inline __m128i _em_srli_si128(__m128i a, int b) +{ + um128 x; + + x.i=a; + +#define R(z) x.u8[z]=((b+z) > 15) ? 0:x.u8[z+b] + R(0); + R(1); + R(2); + R(3); + R(4); + R(5); + R(6); + R(7); + R(8); + R(9); + R(10); + R(11); + R(12); + R(13); + R(14); + R(15); +#undef R + + return x.i; +} + +inline static __m128i _em_loadu_si128(__m128i const *P) +{ + um128 tmp; + um128 *pp=(um128*)P; + + tmp.u8[0]=(*pp).u8[0]; + tmp.u8[1]=(*pp).u8[1]; + tmp.u8[2]=(*pp).u8[2]; + tmp.u8[3]=(*pp).u8[3]; + tmp.u8[4]=(*pp).u8[4]; + tmp.u8[5]=(*pp).u8[5]; + tmp.u8[6]=(*pp).u8[6]; + tmp.u8[7]=(*pp).u8[7]; + tmp.u8[8]=(*pp).u8[8]; + tmp.u8[9]=(*pp).u8[9]; + tmp.u8[10]=(*pp).u8[10]; + tmp.u8[11]=(*pp).u8[11]; + tmp.u8[12]=(*pp).u8[12]; + tmp.u8[13]=(*pp).u8[13]; + tmp.u8[14]=(*pp).u8[14]; + tmp.u8[15]=(*pp).u8[15]; + return tmp.i; +} + + +#define _MM_SET1_EPI8(x) _em_set1_epi8(x) +#define _MM_INSERT_EPI8(r,x,i) _em_insert_epi8((r),(x),(i)) +#define _MM_CMPEQ_EPI8(x,y) _em_cmpeq_epi8((x),(y)) +#define _MM_CMPGT_EPI8(x,y) _em_cmpgt_epi8((x),(y)) +#define _MM_CMPLT_EPI8(x,y) _em_cmplt_epi8((x),(y)) +#define _MM_MAX_EPI8(x,y) _em_max_epi8((x),(y)) +#define _MM_MIN_EPI8(x,y) _em_min_epi8((x),(y)) +#define _MM_ADD_EPI8(x,y) _em_add_epi8((x),(y)) +#define _MM_SUB_EPI8(x,y) _em_sub_epi8((x),(y)) +#define _MM_EXTRACT_EPI8(r,p) _em_extract_epi8((r),(p)) + +#define _MM_MIN_EPU8(x,y) _em_min_epu8((x),(y)) + +#define _MM_SET1_EPI16(x) _em_set1_epi16(x) +#define _MM_INSERT_EPI16(r,x,i) _em_insert_epi16((r),(x),(i)) +#define _MM_CMPEQ_EPI16(x,y) _em_cmpeq_epi16((x),(y)) +#define _MM_CMPGT_EPI16(x,y) _em_cmpgt_epi16((x),(y)) +#define _MM_CMPLT_EPI16(x,y) _em_cmplt_epi16((x),(y)) +#define _MM_MAX_EPI16(x,y) _em_max_epi16((x),(y)) +#define _MM_MIN_EPI16(x,y) _em_min_epi16((x),(y)) +#define _MM_ADD_EPI16(x,y) _em_add_epi16((x),(y)) +#define _MM_SUB_EPI16(x,y) _em_sub_epi16((x),(y)) +#define _MM_EXTRACT_EPI16(r,p) _em_extract_epi16((r),(p)) +#define _MM_UNPACKLO_EPI8(a,b) _em_unpacklo_epi8((a),(b)) +#define _MM_UNPACKHI_EPI8(a,b) _em_unpackhi_epi8((a),(b)) +#define _MM_ADDS_EPU16(x,y) _em_adds_epu16((x),(y)) + +#define _MM_SRLI_EPI64(r,x) _em_srli_epi64((r),(x)) +#define _MM_SLLI_EPI64(r,x) _em_slli_epi64((r),(x)) + +#define _MM_SETZERO_SI128() _em_setzero_si128() + +#define _MM_AND_SI128(x,y) _em_and_si128((x),(y)) +#define _MM_ANDNOT_SI128(x,y) _em_andnot_si128((x),(y)) +#define _MM_OR_SI128(x,y) _em_or_si128((x),(y)) +#define _MM_XOR_SI128(x,y) _em_xor_si128((x),(y)) +#define _MM_SLLI_SI128(r,s) _em_slli_si128((r),(s)) +#define _MM_SRLI_SI128(r,s) _em_srli_si128((r),(s)) + +#define _MM_LOADU_SI128(x) _em_loadu_si128(x) +#define _MM_LOAD_SI128(x) _em_loadu_si128(x) + + +#endif /* __SSE2__ */ + +#define _MM_NOT_SI128(x) _MM_XOR_SI128((x),(_MM_SET1_EPI8(0xFFFF))) + +#endif diff --git a/libutils/.DS_Store b/libutils/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6 GIT binary patch literal 6148 zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0 +#include +#include "debug.h" + +char* int2bin(int64_t i,size_t bits) +{ + static char str[65]; + uint64_t u; + + if (bits > 64) + return NULL; + + str[bits] = 0; + + // type punning because signed shift is implementation-defined + u = *(unsigned *)&i; + + for(; bits--; u >>= 1) + str[bits] = u & 1 ? '1' : '0'; + + return str; +} + + + diff --git a/libutils/debug.h b/libutils/debug.h new file mode 100644 index 0000000..6462cb5 --- /dev/null +++ b/libutils/debug.h @@ -0,0 +1,25 @@ +/* + * debug.h + * + * Created on: 4 sept. 2012 + * Author: coissac + */ + +#ifndef DEBUG_H_ +#define DEBUG_H_ + +#ifdef DEBUG +#undef DEBUG +#endif + +#ifdef DEBUG_ON +#define DEBUG(format,...) fprintf(stderr,"[%s:%d] : "format"\n",__FILE__,__LINE__,__VA_ARGS__) +#else +#define DEBUG(format,...) +#endif +#include + +char * int2bin(int64_t i,size_t bits); + + +#endif /* DEBUG_H_ */ diff --git a/libutils/utilities.c b/libutils/utilities.c new file mode 100644 index 0000000..71c3ce6 --- /dev/null +++ b/libutils/utilities.c @@ -0,0 +1,230 @@ +/** + * FileName: utilities.c + * Author: Tiayyba Riaz + * Description: C file for miscellenious functions and macros + * **/ + +#include "utilities.h" +#include +#include +#include + +/* + * Function Name: errorAbort(int errorCode, char* errorMsg, char* fileName, int lineNumber) + * Description: Reports an error on standard error and aborts + */ +void errorAbort(int32_t errorCode, char* errorMsg, char* fileName, int32_t lineNumber) +{ + fprintf(stderr,"Error %d in file %s line %d : %s\n", + errorCode, + fileName, + lineNumber, + errorMsg); + + abort(); +} + +void *util_malloc(size_t chunksize, const char *filename, int32_t line) +{ + void * chunk; + + chunk = calloc(1,chunksize); + + if (!chunk) + errorAbort(MEM_ALLOC_ERROR,"Could not allocate memory.",filename,line); + + return chunk; +} + +/* + * Function Name: util_realloc(void *chunk, int32_t newsize, const char *filename, int32_t line) + * Description: Overloading realloc funstion, changes the size of the memory object pointed to by chunk + * to the size specified by newsize. If memory cannot be allocated, gives the error on stderr and aborts. + */ +void *util_realloc(void *chunk, size_t newsize, const char *filename, int32_t line) +{ + void *newchunk; + + newchunk = realloc(chunk,newsize); + + if (!newchunk) + { + errorAbort(MEM_ALLOC_ERROR,"Could not allocate memory.",filename,line); + } + + return newchunk; +} + +/* + * Function Name: util_free(void *chunk) + * Description: Returns the memory specified by chunk back to operating syste. + */ +void util_free(void *chunk) +{ + free(chunk); +} + +BOOL util_findInArr(int32_t tempArr[], int seqNo, int32_t noOfSeqs) +{ + int index; + + for(index = 0; index < noOfSeqs; index++) + { + if(tempArr[index] == seqNo) return TRUE; + } + + return FALSE; +} + + +/** + * + * String handling utilities + * + **/ + +/* + * Function Name: str_chopAtDelim(char *dest, char *src, char *delim, BOOL includeDelim) + * Description: chops the string startig from source to the delimeter specified. + */ +char *str_chopAtDelim(char *dest, char *src, char *delim, BOOL includeDelim) +{ + char *temp; + int32_t len; + + /* returns a pointer to the first occurance of delim in src*/ + temp = strstr(src, delim); + + if (temp == NULL) + return NULL; + + if (includeDelim) + { + /* temp - src + strlen(delim) -> a string between src and delimeter including delimeter*/ + len = temp - src + strlen(delim); + strncpy(dest, src, len); + } + else + { + len = temp - src; + strncpy(dest, src, temp - src); + } + dest[len] = '\0'; + + return dest; +} + +/* + * Function Name: str_sepNameValue(char *name, char *value, char *src, char *delim) + * Description: . + */ +void str_sepNameValue(char *name, char *value, char *src, char *delim) +{ + char *temp; + + temp = strstr(src, delim); + + if(temp != NULL) + { + strncpy(name, src, temp - src); + strcpy(value, temp + strlen(delim)); + } + else + { + strcpy(name, src); + strcpy(value, ""); + } +} + +/* + * Function Name: str_removeSpaces(char *src) + * Description: Removes the spaces from the start and end of the string. + */ +int str_isSpace (char ch) +{ + switch (ch) + { + case ' ': + case '\t': + case '\n': + return 1; + } + return 0; +} + +void str_removeSpaces(char *src) +{ + int32_t start = 0, end = strlen(src) - 1; + int32_t index = 0; + + if (src == NULL || end < 0) return; + + while(str_isSpace(src[start]) && start < end) start++; + while(str_isSpace(src[end]) && end > start) end--; + + if ( start == end && src[start] == ' ') + { + src[0] = '\0'; + return; + } + if (start > 0) + { + while(start <= end) + { + src[index] = src[start]; + index++; + start++; + } + src[index] = '\0'; + return; + } + src[end+1] = '\0'; +} + +/* + * Function Name: str_strrstr(char *src, char *delim) + * Description: Searches the position of last occurence of string delim in src. + */ +char *str_strrstr(char *src, char *delim) +{ + char *last, *next; + next = strstr(src, delim); + last = next; + while(next != NULL) + { + last = next; + next = strstr(last + 1, delim); + } + return last; +} + + +void* getA16Address(int size) +{ + void* address; + address = (void*) malloc(size); + while ((((long long unsigned int) (address))%16) != 0) + address++; + return(address); +} + + +void** reallocA16Address(void** address, int size) +{ + if (*(address) == NULL) + *(address) = malloc(size); + *(address) = realloc(address, size); + while ((((long long unsigned int) (*(address)))%16) != 0) + (*(address))++; + return(address); +} + + + + + + + + + + diff --git a/libutils/utilities.h b/libutils/utilities.h new file mode 100644 index 0000000..36138f9 --- /dev/null +++ b/libutils/utilities.h @@ -0,0 +1,56 @@ +/** + * FileName: utilities.h + * Author: Tiayyba Riaz + * Description: Header file for miscellenious functions and macros + * **/ + +#ifndef UTILITIES_H_ +#define UTILITIES_H_ + +#include +#include +#include + + + +//static char *basecodes = "00100020000000000003000000"; + +//#define BASEIDX(ch) basecodes[ch - 'a'] - 48 + +#ifndef MAX +#define MAX(x,y) (((x)>(y)) ? (x):(y)) +#define MIN(x,y) (((x)<(y)) ? (x):(y)) +#endif + +typedef char BOOL; +#define TRUE (3==3) +#define FALSE (!TRUE) +#define ALILEN (0) +#define MAXLEN (1) +#define MINLEN (2) + + +/* Error Codes */ +#define FILE_OPENING_ERROR (1) +#define MEM_ALLOC_ERROR (2) + +/* Prototypes */ +void errorAbort(int32_t code, char* errorMsg, char* fileName, int32_t lineNumber); +char *str_strrstr(char *src, char *delim); +void str_removeSpaces(char *src); +void str_sepNameValue(char *name, char *value, char *src, char *delim); +char *str_chopAtDelim(char *dest, char *src, char *delim, BOOL includeDelim); +void util_free(void *chunk); +void *util_realloc(void *chunk, size_t newsize, const char *filename, int32_t line); +void *util_malloc(size_t chunksize, const char *filename, int32_t line); +BOOL util_findInArr(int32_t tempArr[], int seqNo, int32_t noOfSeqs); +void* getA16Address(int size); +void** reallocA16Address(void** address, int size); + +/* Macros */ +#define ERRORABORT(code, msg) errorAbort((code), (msg), __FILE__, __LINE__) + +#endif /*UTILITIES_H_*/ + + +