Backup with sets and ahocorasick
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/branches/ecoPrimers-2.1@298 60f365c0-8329-0410-b2a4-ec073aeeaa1d
This commit is contained in:
221
.cproject
Normal file
221
.cproject
Normal file
@ -0,0 +1,221 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<?fileVersion 4.0.0?>
|
||||
|
||||
<cproject>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.settings">
|
||||
<cconfiguration id="cdt.managedbuild.toolchain.gnu.macosx.base.2134184396">
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.toolchain.gnu.macosx.base.2134184396" moduleId="org.eclipse.cdt.core.settings" name="MacOSX GCC">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.MachO" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactName="ecoPrimers" buildProperties="" description="" id="cdt.managedbuild.toolchain.gnu.macosx.base.2134184396" name="MacOSX GCC" parent="org.eclipse.cdt.build.core.emptycfg">
|
||||
<folderInfo id="cdt.managedbuild.toolchain.gnu.macosx.base.2134184396.1840911077" name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.base.766054112" name="cdt.managedbuild.toolchain.gnu.macosx.base" superClass="cdt.managedbuild.toolchain.gnu.macosx.base">
|
||||
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.MachO" id="cdt.managedbuild.target.gnu.platform.macosx.base.2057035265" name="Debug Platform" osList="macosx" superClass="cdt.managedbuild.target.gnu.platform.macosx.base"/>
|
||||
<builder id="cdt.managedbuild.target.gnu.builder.macosx.base.783726363" keepEnvironmentInBuildfile="false" managedBuildOn="false" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.base"/>
|
||||
<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.base.914103467" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.base">
|
||||
<inputType id="cdt.managedbuild.tool.macosx.c.linker.input.62980206" superClass="cdt.managedbuild.tool.macosx.c.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
|
||||
</inputType>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.base.691108439" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.base"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.base.695639877" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.base">
|
||||
<option id="gnu.both.asm.option.include.paths.1544375094" name="Include paths (-I)" superClass="gnu.both.asm.option.include.paths" valueType="includePath"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1507665054" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.1786370580" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.base.454329831" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.base"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.base.1928774909" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.base">
|
||||
<option id="gnu.c.compiler.option.include.paths.823251305" superClass="gnu.c.compiler.option.include.paths" valueType="includePath">
|
||||
<listOptionValue builtIn="false" value="/usr/include"/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.330854350" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
||||
</tool>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.language.mapping"/>
|
||||
<storageModule moduleId="scannerConfiguration">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile"/>
|
||||
<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile">
|
||||
<buildOutputProvider>
|
||||
<openAction enabled="true" filePath=""/>
|
||||
<parser enabled="true"/>
|
||||
</buildOutputProvider>
|
||||
<scannerInfoProvider id="specsFile">
|
||||
<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
|
||||
<parser enabled="true"/>
|
||||
</scannerInfoProvider>
|
||||
</profile>
|
||||
<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerFileProfile">
|
||||
<buildOutputProvider>
|
||||
<openAction enabled="true" filePath=""/>
|
||||
<parser enabled="true"/>
|
||||
</buildOutputProvider>
|
||||
<scannerInfoProvider id="makefileGenerator">
|
||||
<runAction arguments="-f ${project_name}_scd.mk" command="make" useDefault="true"/>
|
||||
<parser enabled="true"/>
|
||||
</scannerInfoProvider>
|
||||
</profile>
|
||||
<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfile">
|
||||
<buildOutputProvider>
|
||||
<openAction enabled="true" filePath=""/>
|
||||
<parser enabled="true"/>
|
||||
</buildOutputProvider>
|
||||
<scannerInfoProvider id="specsFile">
|
||||
<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
|
||||
<parser enabled="true"/>
|
||||
</scannerInfoProvider>
|
||||
</profile>
|
||||
<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP">
|
||||
<buildOutputProvider>
|
||||
<openAction enabled="true" filePath=""/>
|
||||
<parser enabled="true"/>
|
||||
</buildOutputProvider>
|
||||
<scannerInfoProvider id="specsFile">
|
||||
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
|
||||
<parser enabled="true"/>
|
||||
</scannerInfoProvider>
|
||||
</profile>
|
||||
<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC">
|
||||
<buildOutputProvider>
|
||||
<openAction enabled="true" filePath=""/>
|
||||
<parser enabled="true"/>
|
||||
</buildOutputProvider>
|
||||
<scannerInfoProvider id="specsFile">
|
||||
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
|
||||
<parser enabled="true"/>
|
||||
</scannerInfoProvider>
|
||||
</profile>
|
||||
<profile id="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfile">
|
||||
<buildOutputProvider>
|
||||
<openAction enabled="true" filePath=""/>
|
||||
<parser enabled="true"/>
|
||||
</buildOutputProvider>
|
||||
<scannerInfoProvider id="specsFile">
|
||||
<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
|
||||
<parser enabled="true"/>
|
||||
</scannerInfoProvider>
|
||||
</profile>
|
||||
<profile id="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfileCPP">
|
||||
<buildOutputProvider>
|
||||
<openAction enabled="true" filePath=""/>
|
||||
<parser enabled="true"/>
|
||||
</buildOutputProvider>
|
||||
<scannerInfoProvider id="specsFile">
|
||||
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
|
||||
<parser enabled="true"/>
|
||||
</scannerInfoProvider>
|
||||
</profile>
|
||||
<profile id="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfileC">
|
||||
<buildOutputProvider>
|
||||
<openAction enabled="true" filePath=""/>
|
||||
<parser enabled="true"/>
|
||||
</buildOutputProvider>
|
||||
<scannerInfoProvider id="specsFile">
|
||||
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
|
||||
<parser enabled="true"/>
|
||||
</scannerInfoProvider>
|
||||
</profile>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.toolchain.gnu.macosx.base.2134184396;cdt.managedbuild.toolchain.gnu.macosx.base.2134184396.1840911077;cdt.managedbuild.tool.gnu.c.compiler.macosx.base.1928774909;cdt.managedbuild.tool.gnu.c.compiler.input.330854350">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
|
||||
<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile">
|
||||
<buildOutputProvider>
|
||||
<openAction enabled="true" filePath=""/>
|
||||
<parser enabled="true"/>
|
||||
</buildOutputProvider>
|
||||
<scannerInfoProvider id="specsFile">
|
||||
<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
|
||||
<parser enabled="true"/>
|
||||
</scannerInfoProvider>
|
||||
</profile>
|
||||
<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerFileProfile">
|
||||
<buildOutputProvider>
|
||||
<openAction enabled="true" filePath=""/>
|
||||
<parser enabled="true"/>
|
||||
</buildOutputProvider>
|
||||
<scannerInfoProvider id="makefileGenerator">
|
||||
<runAction arguments="-f ${project_name}_scd.mk" command="make" useDefault="true"/>
|
||||
<parser enabled="true"/>
|
||||
</scannerInfoProvider>
|
||||
</profile>
|
||||
<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfile">
|
||||
<buildOutputProvider>
|
||||
<openAction enabled="true" filePath=""/>
|
||||
<parser enabled="true"/>
|
||||
</buildOutputProvider>
|
||||
<scannerInfoProvider id="specsFile">
|
||||
<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
|
||||
<parser enabled="true"/>
|
||||
</scannerInfoProvider>
|
||||
</profile>
|
||||
<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP">
|
||||
<buildOutputProvider>
|
||||
<openAction enabled="true" filePath=""/>
|
||||
<parser enabled="true"/>
|
||||
</buildOutputProvider>
|
||||
<scannerInfoProvider id="specsFile">
|
||||
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
|
||||
<parser enabled="true"/>
|
||||
</scannerInfoProvider>
|
||||
</profile>
|
||||
<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC">
|
||||
<buildOutputProvider>
|
||||
<openAction enabled="true" filePath=""/>
|
||||
<parser enabled="true"/>
|
||||
</buildOutputProvider>
|
||||
<scannerInfoProvider id="specsFile">
|
||||
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
|
||||
<parser enabled="true"/>
|
||||
</scannerInfoProvider>
|
||||
</profile>
|
||||
<profile id="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfile">
|
||||
<buildOutputProvider>
|
||||
<openAction enabled="true" filePath=""/>
|
||||
<parser enabled="true"/>
|
||||
</buildOutputProvider>
|
||||
<scannerInfoProvider id="specsFile">
|
||||
<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
|
||||
<parser enabled="true"/>
|
||||
</scannerInfoProvider>
|
||||
</profile>
|
||||
<profile id="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfileCPP">
|
||||
<buildOutputProvider>
|
||||
<openAction enabled="true" filePath=""/>
|
||||
<parser enabled="true"/>
|
||||
</buildOutputProvider>
|
||||
<scannerInfoProvider id="specsFile">
|
||||
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
|
||||
<parser enabled="true"/>
|
||||
</scannerInfoProvider>
|
||||
</profile>
|
||||
<profile id="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfileC">
|
||||
<buildOutputProvider>
|
||||
<openAction enabled="true" filePath=""/>
|
||||
<parser enabled="true"/>
|
||||
</buildOutputProvider>
|
||||
<scannerInfoProvider id="specsFile">
|
||||
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
|
||||
<parser enabled="true"/>
|
||||
</scannerInfoProvider>
|
||||
</profile>
|
||||
</scannerConfigBuildInfo>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.internal.ui.text.commentOwnerProjectMappings"/>
|
||||
</cconfiguration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<project id="ecoPrimers.null.1292969001" name="ecoPrimers"/>
|
||||
</storageModule>
|
||||
</cproject>
|
83
.project
Normal file
83
.project
Normal file
@ -0,0 +1,83 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<projectDescription>
|
||||
<name>ecoPrimers</name>
|
||||
<comment></comment>
|
||||
<projects>
|
||||
</projects>
|
||||
<buildSpec>
|
||||
<buildCommand>
|
||||
<name>org.python.pydev.PyDevBuilder</name>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
|
||||
<triggers>clean,full,incremental,</triggers>
|
||||
<arguments>
|
||||
<dictionary>
|
||||
<key>?name?</key>
|
||||
<value></value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.append_environment</key>
|
||||
<value>true</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.autoBuildTarget</key>
|
||||
<value>all</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.buildArguments</key>
|
||||
<value></value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.buildCommand</key>
|
||||
<value>make</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.cleanBuildTarget</key>
|
||||
<value>clean</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.contents</key>
|
||||
<value>org.eclipse.cdt.make.core.activeConfigSettings</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.enableAutoBuild</key>
|
||||
<value>false</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.enableCleanBuild</key>
|
||||
<value>true</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.enableFullBuild</key>
|
||||
<value>true</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.fullBuildTarget</key>
|
||||
<value>all</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.stopOnError</key>
|
||||
<value>true</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.useDefaultBuildCmd</key>
|
||||
<value>true</value>
|
||||
</dictionary>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
</buildSpec>
|
||||
<natures>
|
||||
<nature>org.eclipse.cdt.core.cnature</nature>
|
||||
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
|
||||
<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
|
||||
<nature>org.python.pydev.pythonNature</nature>
|
||||
</natures>
|
||||
</projectDescription>
|
7
.pydevproject
Normal file
7
.pydevproject
Normal file
@ -0,0 +1,7 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<?eclipse-pydev version="1.0"?>
|
||||
|
||||
<pydev_project>
|
||||
<pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Python 2.6</pydev_property>
|
||||
<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.6</pydev_property>
|
||||
</pydev_project>
|
506
Licence_CeCILL_V2-en.txt
Normal file
506
Licence_CeCILL_V2-en.txt
Normal file
@ -0,0 +1,506 @@
|
||||
|
||||
CeCILL FREE SOFTWARE LICENSE AGREEMENT
|
||||
|
||||
|
||||
Notice
|
||||
|
||||
This Agreement is a Free Software license agreement that is the result
|
||||
of discussions between its authors in order to ensure compliance with
|
||||
the two main principles guiding its drafting:
|
||||
|
||||
* firstly, compliance with the principles governing the distribution
|
||||
of Free Software: access to source code, broad rights granted to
|
||||
users,
|
||||
* secondly, the election of a governing law, French law, with which
|
||||
it is conformant, both as regards the law of torts and
|
||||
intellectual property law, and the protection that it offers to
|
||||
both authors and holders of the economic rights over software.
|
||||
|
||||
The authors of the CeCILL (for Ce[a] C[nrs] I[nria] L[ogiciel] L[ibre])
|
||||
license are:
|
||||
|
||||
Commissariat <20> l'Energie Atomique - CEA, a public scientific, technical
|
||||
and industrial research establishment, having its principal place of
|
||||
business at 25 rue Leblanc, immeuble Le Ponant D, 75015 Paris, France.
|
||||
|
||||
Centre National de la Recherche Scientifique - CNRS, a public scientific
|
||||
and technological establishment, having its principal place of business
|
||||
at 3 rue Michel-Ange, 75794 Paris cedex 16, France.
|
||||
|
||||
Institut National de Recherche en Informatique et en Automatique -
|
||||
INRIA, a public scientific and technological establishment, having its
|
||||
principal place of business at Domaine de Voluceau, Rocquencourt, BP
|
||||
105, 78153 Le Chesnay cedex, France.
|
||||
|
||||
|
||||
Preamble
|
||||
|
||||
The purpose of this Free Software license agreement is to grant users
|
||||
the right to modify and redistribute the software governed by this
|
||||
license within the framework of an open source distribution model.
|
||||
|
||||
The exercising of these rights is conditional upon certain obligations
|
||||
for users so as to preserve this status for all subsequent redistributions.
|
||||
|
||||
In consideration of access to the source code and the rights to copy,
|
||||
modify and redistribute granted by the license, users are provided only
|
||||
with a limited warranty and the software's author, the holder of the
|
||||
economic rights, and the successive licensors only have limited liability.
|
||||
|
||||
In this respect, the risks associated with loading, using, modifying
|
||||
and/or developing or reproducing the software by the user are brought to
|
||||
the user's attention, given its Free Software status, which may make it
|
||||
complicated to use, with the result that its use is reserved for
|
||||
developers and experienced professionals having in-depth computer
|
||||
knowledge. Users are therefore encouraged to load and test the
|
||||
suitability of the software as regards their requirements in conditions
|
||||
enabling the security of their systems and/or data to be ensured and,
|
||||
more generally, to use and operate it in the same conditions of
|
||||
security. This Agreement may be freely reproduced and published,
|
||||
provided it is not altered, and that no provisions are either added or
|
||||
removed herefrom.
|
||||
|
||||
This Agreement may apply to any or all software for which the holder of
|
||||
the economic rights decides to submit the use thereof to its provisions.
|
||||
|
||||
|
||||
Article 1 - DEFINITIONS
|
||||
|
||||
For the purpose of this Agreement, when the following expressions
|
||||
commence with a capital letter, they shall have the following meaning:
|
||||
|
||||
Agreement: means this license agreement, and its possible subsequent
|
||||
versions and annexes.
|
||||
|
||||
Software: means the software in its Object Code and/or Source Code form
|
||||
and, where applicable, its documentation, "as is" when the Licensee
|
||||
accepts the Agreement.
|
||||
|
||||
Initial Software: means the Software in its Source Code and possibly its
|
||||
Object Code form and, where applicable, its documentation, "as is" when
|
||||
it is first distributed under the terms and conditions of the Agreement.
|
||||
|
||||
Modified Software: means the Software modified by at least one
|
||||
Contribution.
|
||||
|
||||
Source Code: means all the Software's instructions and program lines to
|
||||
which access is required so as to modify the Software.
|
||||
|
||||
Object Code: means the binary files originating from the compilation of
|
||||
the Source Code.
|
||||
|
||||
Holder: means the holder(s) of the economic rights over the Initial
|
||||
Software.
|
||||
|
||||
Licensee: means the Software user(s) having accepted the Agreement.
|
||||
|
||||
Contributor: means a Licensee having made at least one Contribution.
|
||||
|
||||
Licensor: means the Holder, or any other individual or legal entity, who
|
||||
distributes the Software under the Agreement.
|
||||
|
||||
Contribution: means any or all modifications, corrections, translations,
|
||||
adaptations and/or new functions integrated into the Software by any or
|
||||
all Contributors, as well as any or all Internal Modules.
|
||||
|
||||
Module: means a set of sources files including their documentation that
|
||||
enables supplementary functions or services in addition to those offered
|
||||
by the Software.
|
||||
|
||||
External Module: means any or all Modules, not derived from the
|
||||
Software, so that this Module and the Software run in separate address
|
||||
spaces, with one calling the other when they are run.
|
||||
|
||||
Internal Module: means any or all Module, connected to the Software so
|
||||
that they both execute in the same address space.
|
||||
|
||||
GNU GPL: means the GNU General Public License version 2 or any
|
||||
subsequent version, as published by the Free Software Foundation Inc.
|
||||
|
||||
Parties: mean both the Licensee and the Licensor.
|
||||
|
||||
These expressions may be used both in singular and plural form.
|
||||
|
||||
|
||||
Article 2 - PURPOSE
|
||||
|
||||
The purpose of the Agreement is the grant by the Licensor to the
|
||||
Licensee of a non-exclusive, transferable and worldwide license for the
|
||||
Software as set forth in Article 5 hereinafter for the whole term of the
|
||||
protection granted by the rights over said Software.
|
||||
|
||||
|
||||
Article 3 - ACCEPTANCE
|
||||
|
||||
3.1 The Licensee shall be deemed as having accepted the terms and
|
||||
conditions of this Agreement upon the occurrence of the first of the
|
||||
following events:
|
||||
|
||||
* (i) loading the Software by any or all means, notably, by
|
||||
downloading from a remote server, or by loading from a physical
|
||||
medium;
|
||||
* (ii) the first time the Licensee exercises any of the rights
|
||||
granted hereunder.
|
||||
|
||||
3.2 One copy of the Agreement, containing a notice relating to the
|
||||
characteristics of the Software, to the limited warranty, and to the
|
||||
fact that its use is restricted to experienced users has been provided
|
||||
to the Licensee prior to its acceptance as set forth in Article 3.1
|
||||
hereinabove, and the Licensee hereby acknowledges that it has read and
|
||||
understood it.
|
||||
|
||||
|
||||
Article 4 - EFFECTIVE DATE AND TERM
|
||||
|
||||
|
||||
4.1 EFFECTIVE DATE
|
||||
|
||||
The Agreement shall become effective on the date when it is accepted by
|
||||
the Licensee as set forth in Article 3.1.
|
||||
|
||||
|
||||
4.2 TERM
|
||||
|
||||
The Agreement shall remain in force for the entire legal term of
|
||||
protection of the economic rights over the Software.
|
||||
|
||||
|
||||
Article 5 - SCOPE OF RIGHTS GRANTED
|
||||
|
||||
The Licensor hereby grants to the Licensee, who accepts, the following
|
||||
rights over the Software for any or all use, and for the term of the
|
||||
Agreement, on the basis of the terms and conditions set forth hereinafter.
|
||||
|
||||
Besides, if the Licensor owns or comes to own one or more patents
|
||||
protecting all or part of the functions of the Software or of its
|
||||
components, the Licensor undertakes not to enforce the rights granted by
|
||||
these patents against successive Licensees using, exploiting or
|
||||
modifying the Software. If these patents are transferred, the Licensor
|
||||
undertakes to have the transferees subscribe to the obligations set
|
||||
forth in this paragraph.
|
||||
|
||||
|
||||
5.1 RIGHT OF USE
|
||||
|
||||
The Licensee is authorized to use the Software, without any limitation
|
||||
as to its fields of application, with it being hereinafter specified
|
||||
that this comprises:
|
||||
|
||||
1. permanent or temporary reproduction of all or part of the Software
|
||||
by any or all means and in any or all form.
|
||||
|
||||
2. loading, displaying, running, or storing the Software on any or
|
||||
all medium.
|
||||
|
||||
3. entitlement to observe, study or test its operation so as to
|
||||
determine the ideas and principles behind any or all constituent
|
||||
elements of said Software. This shall apply when the Licensee
|
||||
carries out any or all loading, displaying, running, transmission
|
||||
or storage operation as regards the Software, that it is entitled
|
||||
to carry out hereunder.
|
||||
|
||||
|
||||
5.2 ENTITLEMENT TO MAKE CONTRIBUTIONS
|
||||
|
||||
The right to make Contributions includes the right to translate, adapt,
|
||||
arrange, or make any or all modifications to the Software, and the right
|
||||
to reproduce the resulting software.
|
||||
|
||||
The Licensee is authorized to make any or all Contributions to the
|
||||
Software provided that it includes an explicit notice that it is the
|
||||
author of said Contribution and indicates the date of the creation thereof.
|
||||
|
||||
|
||||
5.3 RIGHT OF DISTRIBUTION
|
||||
|
||||
In particular, the right of distribution includes the right to publish,
|
||||
transmit and communicate the Software to the general public on any or
|
||||
all medium, and by any or all means, and the right to market, either in
|
||||
consideration of a fee, or free of charge, one or more copies of the
|
||||
Software by any means.
|
||||
|
||||
The Licensee is further authorized to distribute copies of the modified
|
||||
or unmodified Software to third parties according to the terms and
|
||||
conditions set forth hereinafter.
|
||||
|
||||
|
||||
5.3.1 DISTRIBUTION OF SOFTWARE WITHOUT MODIFICATION
|
||||
|
||||
The Licensee is authorized to distribute true copies of the Software in
|
||||
Source Code or Object Code form, provided that said distribution
|
||||
complies with all the provisions of the Agreement and is accompanied by:
|
||||
|
||||
1. a copy of the Agreement,
|
||||
|
||||
2. a notice relating to the limitation of both the Licensor's
|
||||
warranty and liability as set forth in Articles 8 and 9,
|
||||
|
||||
and that, in the event that only the Object Code of the Software is
|
||||
redistributed, the Licensee allows future Licensees unhindered access to
|
||||
the full Source Code of the Software by indicating how to access it, it
|
||||
being understood that the additional cost of acquiring the Source Code
|
||||
shall not exceed the cost of transferring the data.
|
||||
|
||||
|
||||
5.3.2 DISTRIBUTION OF MODIFIED SOFTWARE
|
||||
|
||||
When the Licensee makes a Contribution to the Software, the terms and
|
||||
conditions for the distribution of the resulting Modified Software
|
||||
become subject to all the provisions of this Agreement.
|
||||
|
||||
The Licensee is authorized to distribute the Modified Software, in
|
||||
source code or object code form, provided that said distribution
|
||||
complies with all the provisions of the Agreement and is accompanied by:
|
||||
|
||||
1. a copy of the Agreement,
|
||||
|
||||
2. a notice relating to the limitation of both the Licensor's
|
||||
warranty and liability as set forth in Articles 8 and 9,
|
||||
|
||||
and that, in the event that only the object code of the Modified
|
||||
Software is redistributed, the Licensee allows future Licensees
|
||||
unhindered access to the full source code of the Modified Software by
|
||||
indicating how to access it, it being understood that the additional
|
||||
cost of acquiring the source code shall not exceed the cost of
|
||||
transferring the data.
|
||||
|
||||
|
||||
5.3.3 DISTRIBUTION OF EXTERNAL MODULES
|
||||
|
||||
When the Licensee has developed an External Module, the terms and
|
||||
conditions of this Agreement do not apply to said External Module, that
|
||||
may be distributed under a separate license agreement.
|
||||
|
||||
|
||||
5.3.4 COMPATIBILITY WITH THE GNU GPL
|
||||
|
||||
The Licensee can include a code that is subject to the provisions of one
|
||||
of the versions of the GNU GPL in the Modified or unmodified Software,
|
||||
and distribute that entire code under the terms of the same version of
|
||||
the GNU GPL.
|
||||
|
||||
The Licensee can include the Modified or unmodified Software in a code
|
||||
that is subject to the provisions of one of the versions of the GNU GPL,
|
||||
and distribute that entire code under the terms of the same version of
|
||||
the GNU GPL.
|
||||
|
||||
|
||||
Article 6 - INTELLECTUAL PROPERTY
|
||||
|
||||
|
||||
6.1 OVER THE INITIAL SOFTWARE
|
||||
|
||||
The Holder owns the economic rights over the Initial Software. Any or
|
||||
all use of the Initial Software is subject to compliance with the terms
|
||||
and conditions under which the Holder has elected to distribute its work
|
||||
and no one shall be entitled to modify the terms and conditions for the
|
||||
distribution of said Initial Software.
|
||||
|
||||
The Holder undertakes that the Initial Software will remain ruled at
|
||||
least by this Agreement, for the duration set forth in Article 4.2.
|
||||
|
||||
|
||||
6.2 OVER THE CONTRIBUTIONS
|
||||
|
||||
The Licensee who develops a Contribution is the owner of the
|
||||
intellectual property rights over this Contribution as defined by
|
||||
applicable law.
|
||||
|
||||
|
||||
6.3 OVER THE EXTERNAL MODULES
|
||||
|
||||
The Licensee who develops an External Module is the owner of the
|
||||
intellectual property rights over this External Module as defined by
|
||||
applicable law and is free to choose the type of agreement that shall
|
||||
govern its distribution.
|
||||
|
||||
|
||||
6.4 JOINT PROVISIONS
|
||||
|
||||
The Licensee expressly undertakes:
|
||||
|
||||
1. not to remove, or modify, in any manner, the intellectual property
|
||||
notices attached to the Software;
|
||||
|
||||
2. to reproduce said notices, in an identical manner, in the copies
|
||||
of the Software modified or not.
|
||||
|
||||
The Licensee undertakes not to directly or indirectly infringe the
|
||||
intellectual property rights of the Holder and/or Contributors on the
|
||||
Software and to take, where applicable, vis-<2D>-vis its staff, any and all
|
||||
measures required to ensure respect of said intellectual property rights
|
||||
of the Holder and/or Contributors.
|
||||
|
||||
|
||||
Article 7 - RELATED SERVICES
|
||||
|
||||
7.1 Under no circumstances shall the Agreement oblige the Licensor to
|
||||
provide technical assistance or maintenance services for the Software.
|
||||
|
||||
However, the Licensor is entitled to offer this type of services. The
|
||||
terms and conditions of such technical assistance, and/or such
|
||||
maintenance, shall be set forth in a separate instrument. Only the
|
||||
Licensor offering said maintenance and/or technical assistance services
|
||||
shall incur liability therefor.
|
||||
|
||||
7.2 Similarly, any Licensor is entitled to offer to its licensees, under
|
||||
its sole responsibility, a warranty, that shall only be binding upon
|
||||
itself, for the redistribution of the Software and/or the Modified
|
||||
Software, under terms and conditions that it is free to decide. Said
|
||||
warranty, and the financial terms and conditions of its application,
|
||||
shall be subject of a separate instrument executed between the Licensor
|
||||
and the Licensee.
|
||||
|
||||
|
||||
Article 8 - LIABILITY
|
||||
|
||||
8.1 Subject to the provisions of Article 8.2, the Licensee shall be
|
||||
entitled to claim compensation for any direct loss it may have suffered
|
||||
from the Software as a result of a fault on the part of the relevant
|
||||
Licensor, subject to providing evidence thereof.
|
||||
|
||||
8.2 The Licensor's liability is limited to the commitments made under
|
||||
this Agreement and shall not be incurred as a result of in particular:
|
||||
(i) loss due the Licensee's total or partial failure to fulfill its
|
||||
obligations, (ii) direct or consequential loss that is suffered by the
|
||||
Licensee due to the use or performance of the Software, and (iii) more
|
||||
generally, any consequential loss. In particular the Parties expressly
|
||||
agree that any or all pecuniary or business loss (i.e. loss of data,
|
||||
loss of profits, operating loss, loss of customers or orders,
|
||||
opportunity cost, any disturbance to business activities) or any or all
|
||||
legal proceedings instituted against the Licensee by a third party,
|
||||
shall constitute consequential loss and shall not provide entitlement to
|
||||
any or all compensation from the Licensor.
|
||||
|
||||
|
||||
Article 9 - WARRANTY
|
||||
|
||||
9.1 The Licensee acknowledges that the scientific and technical
|
||||
state-of-the-art when the Software was distributed did not enable all
|
||||
possible uses to be tested and verified, nor for the presence of
|
||||
possible defects to be detected. In this respect, the Licensee's
|
||||
attention has been drawn to the risks associated with loading, using,
|
||||
modifying and/or developing and reproducing the Software which are
|
||||
reserved for experienced users.
|
||||
|
||||
The Licensee shall be responsible for verifying, by any or all means,
|
||||
the suitability of the product for its requirements, its good working
|
||||
order, and for ensuring that it shall not cause damage to either persons
|
||||
or properties.
|
||||
|
||||
9.2 The Licensor hereby represents, in good faith, that it is entitled
|
||||
to grant all the rights over the Software (including in particular the
|
||||
rights set forth in Article 5).
|
||||
|
||||
9.3 The Licensee acknowledges that the Software is supplied "as is" by
|
||||
the Licensor without any other express or tacit warranty, other than
|
||||
that provided for in Article 9.2 and, in particular, without any warranty
|
||||
as to its commercial value, its secured, safe, innovative or relevant
|
||||
nature.
|
||||
|
||||
Specifically, the Licensor does not warrant that the Software is free
|
||||
from any error, that it will operate without interruption, that it will
|
||||
be compatible with the Licensee's own equipment and software
|
||||
configuration, nor that it will meet the Licensee's requirements.
|
||||
|
||||
9.4 The Licensor does not either expressly or tacitly warrant that the
|
||||
Software does not infringe any third party intellectual property right
|
||||
relating to a patent, software or any other property right. Therefore,
|
||||
the Licensor disclaims any and all liability towards the Licensee
|
||||
arising out of any or all proceedings for infringement that may be
|
||||
instituted in respect of the use, modification and redistribution of the
|
||||
Software. Nevertheless, should such proceedings be instituted against
|
||||
the Licensee, the Licensor shall provide it with technical and legal
|
||||
assistance for its defense. Such technical and legal assistance shall be
|
||||
decided on a case-by-case basis between the relevant Licensor and the
|
||||
Licensee pursuant to a memorandum of understanding. The Licensor
|
||||
disclaims any and all liability as regards the Licensee's use of the
|
||||
name of the Software. No warranty is given as regards the existence of
|
||||
prior rights over the name of the Software or as regards the existence
|
||||
of a trademark.
|
||||
|
||||
|
||||
Article 10 - TERMINATION
|
||||
|
||||
10.1 In the event of a breach by the Licensee of its obligations
|
||||
hereunder, the Licensor may automatically terminate this Agreement
|
||||
thirty (30) days after notice has been sent to the Licensee and has
|
||||
remained ineffective.
|
||||
|
||||
10.2 A Licensee whose Agreement is terminated shall no longer be
|
||||
authorized to use, modify or distribute the Software. However, any
|
||||
licenses that it may have granted prior to termination of the Agreement
|
||||
shall remain valid subject to their having been granted in compliance
|
||||
with the terms and conditions hereof.
|
||||
|
||||
|
||||
Article 11 - MISCELLANEOUS
|
||||
|
||||
|
||||
11.1 EXCUSABLE EVENTS
|
||||
|
||||
Neither Party shall be liable for any or all delay, or failure to
|
||||
perform the Agreement, that may be attributable to an event of force
|
||||
majeure, an act of God or an outside cause, such as defective
|
||||
functioning or interruptions of the electricity or telecommunications
|
||||
networks, network paralysis following a virus attack, intervention by
|
||||
government authorities, natural disasters, water damage, earthquakes,
|
||||
fire, explosions, strikes and labor unrest, war, etc.
|
||||
|
||||
11.2 Any failure by either Party, on one or more occasions, to invoke
|
||||
one or more of the provisions hereof, shall under no circumstances be
|
||||
interpreted as being a waiver by the interested Party of its right to
|
||||
invoke said provision(s) subsequently.
|
||||
|
||||
11.3 The Agreement cancels and replaces any or all previous agreements,
|
||||
whether written or oral, between the Parties and having the same
|
||||
purpose, and constitutes the entirety of the agreement between said
|
||||
Parties concerning said purpose. No supplement or modification to the
|
||||
terms and conditions hereof shall be effective as between the Parties
|
||||
unless it is made in writing and signed by their duly authorized
|
||||
representatives.
|
||||
|
||||
11.4 In the event that one or more of the provisions hereof were to
|
||||
conflict with a current or future applicable act or legislative text,
|
||||
said act or legislative text shall prevail, and the Parties shall make
|
||||
the necessary amendments so as to comply with said act or legislative
|
||||
text. All other provisions shall remain effective. Similarly, invalidity
|
||||
of a provision of the Agreement, for any reason whatsoever, shall not
|
||||
cause the Agreement as a whole to be invalid.
|
||||
|
||||
|
||||
11.5 LANGUAGE
|
||||
|
||||
The Agreement is drafted in both French and English and both versions
|
||||
are deemed authentic.
|
||||
|
||||
|
||||
Article 12 - NEW VERSIONS OF THE AGREEMENT
|
||||
|
||||
12.1 Any person is authorized to duplicate and distribute copies of this
|
||||
Agreement.
|
||||
|
||||
12.2 So as to ensure coherence, the wording of this Agreement is
|
||||
protected and may only be modified by the authors of the License, who
|
||||
reserve the right to periodically publish updates or new versions of the
|
||||
Agreement, each with a separate number. These subsequent versions may
|
||||
address new issues encountered by Free Software.
|
||||
|
||||
12.3 Any Software distributed under a given version of the Agreement may
|
||||
only be subsequently distributed under the same version of the Agreement
|
||||
or a subsequent version, subject to the provisions of Article 5.3.4.
|
||||
|
||||
|
||||
Article 13 - GOVERNING LAW AND JURISDICTION
|
||||
|
||||
13.1 The Agreement is governed by French law. The Parties agree to
|
||||
endeavor to seek an amicable solution to any disagreements or disputes
|
||||
that may arise during the performance of the Agreement.
|
||||
|
||||
13.2 Failing an amicable solution within two (2) months as from their
|
||||
occurrence, and unless emergency proceedings are necessary, the
|
||||
disagreements or disputes shall be referred to the Paris Courts having
|
||||
jurisdiction, by the more diligent Party.
|
||||
|
||||
|
||||
Version 2.0 dated 2006-09-05.
|
512
Licence_CeCILL_V2-fr.txt
Normal file
512
Licence_CeCILL_V2-fr.txt
Normal file
@ -0,0 +1,512 @@
|
||||
|
||||
CONTRAT DE LICENCE DE LOGICIEL LIBRE CeCILL
|
||||
|
||||
|
||||
Avertissement
|
||||
|
||||
Ce contrat est une licence de logiciel libre issue d'une concertation
|
||||
entre ses auteurs afin que le respect de deux grands principes pr<70>side <20>
|
||||
sa r<>daction:
|
||||
|
||||
* d'une part, le respect des principes de diffusion des logiciels
|
||||
libres: acc<63>s au code source, droits <20>tendus conf<6E>r<EFBFBD>s aux
|
||||
utilisateurs,
|
||||
* d'autre part, la d<>signation d'un droit applicable, le droit
|
||||
fran<61>ais, auquel elle est conforme, tant au regard du droit de la
|
||||
responsabilit<69> civile que du droit de la propri<72>t<EFBFBD> intellectuelle
|
||||
et de la protection qu'il offre aux auteurs et titulaires des
|
||||
droits patrimoniaux sur un logiciel.
|
||||
|
||||
Les auteurs de la licence CeCILL (pour Ce[a] C[nrs] I[nria] L[ogiciel]
|
||||
L[ibre]) sont:
|
||||
|
||||
Commissariat <20> l'Energie Atomique - CEA, <20>tablissement public de
|
||||
recherche <20> caract<63>re scientifique, technique et industriel, dont le
|
||||
si<EFBFBD>ge est situ<74> 25 rue Leblanc, immeuble Le Ponant D, 75015 Paris.
|
||||
|
||||
Centre National de la Recherche Scientifique - CNRS, <20>tablissement
|
||||
public <20> caract<63>re scientifique et technologique, dont le si<73>ge est
|
||||
situ<EFBFBD> 3 rue Michel-Ange, 75794 Paris cedex 16.
|
||||
|
||||
Institut National de Recherche en Informatique et en Automatique -
|
||||
INRIA, <20>tablissement public <20> caract<63>re scientifique et technologique,
|
||||
dont le si<73>ge est situ<74> Domaine de Voluceau, Rocquencourt, BP 105, 78153
|
||||
Le Chesnay cedex.
|
||||
|
||||
|
||||
Pr<50>ambule
|
||||
|
||||
Ce contrat est une licence de logiciel libre dont l'objectif est de
|
||||
conf<EFBFBD>rer aux utilisateurs la libert<72> de modification et de
|
||||
redistribution du logiciel r<>gi par cette licence dans le cadre d'un
|
||||
mod<EFBFBD>le de diffusion en logiciel libre.
|
||||
|
||||
L'exercice de ces libert<72>s est assorti de certains devoirs <20> la charge
|
||||
des utilisateurs afin de pr<70>server ce statut au cours des
|
||||
redistributions ult<6C>rieures.
|
||||
|
||||
L'accessibilit<69> au code source et les droits de copie, de modification
|
||||
et de redistribution qui en d<>coulent ont pour contrepartie de n'offrir
|
||||
aux utilisateurs qu'une garantie limit<69>e et de ne faire peser sur
|
||||
l'auteur du logiciel, le titulaire des droits patrimoniaux et les
|
||||
conc<EFBFBD>dants successifs qu'une responsabilit<69> restreinte.
|
||||
|
||||
A cet <20>gard l'attention de l'utilisateur est attir<69>e sur les risques
|
||||
associ<EFBFBD>s au chargement, <20> l'utilisation, <20> la modification et/ou au
|
||||
d<EFBFBD>veloppement et <20> la reproduction du logiciel par l'utilisateur <20>tant
|
||||
donn<EFBFBD> sa sp<73>cificit<69> de logiciel libre, qui peut le rendre complexe <20>
|
||||
manipuler et qui le r<>serve donc <20> des d<>veloppeurs ou des
|
||||
professionnels avertis poss<73>dant des connaissances informatiques
|
||||
approfondies. Les utilisateurs sont donc invit<69>s <20> charger et tester
|
||||
l'ad<61>quation du logiciel <20> leurs besoins dans des conditions permettant
|
||||
d'assurer la s<>curit<69> de leurs syst<73>mes et/ou de leurs donn<6E>es et, plus
|
||||
g<EFBFBD>n<EFBFBD>ralement, <20> l'utiliser et l'exploiter dans les m<>mes conditions de
|
||||
s<EFBFBD>curit<EFBFBD>. Ce contrat peut <20>tre reproduit et diffus<75> librement, sous
|
||||
r<EFBFBD>serve de le conserver en l'<27>tat, sans ajout ni suppression de clauses.
|
||||
|
||||
Ce contrat est susceptible de s'appliquer <20> tout logiciel dont le
|
||||
titulaire des droits patrimoniaux d<>cide de soumettre l'exploitation aux
|
||||
dispositions qu'il contient.
|
||||
|
||||
|
||||
Article 1 - DEFINITIONS
|
||||
|
||||
Dans ce contrat, les termes suivants, lorsqu'ils seront <20>crits avec une
|
||||
lettre capitale, auront la signification suivante:
|
||||
|
||||
Contrat: d<>signe le pr<70>sent contrat de licence, ses <20>ventuelles versions
|
||||
post<EFBFBD>rieures et annexes.
|
||||
|
||||
Logiciel: d<>signe le logiciel sous sa forme de Code Objet et/ou de Code
|
||||
Source et le cas <20>ch<63>ant sa documentation, dans leur <20>tat au moment de
|
||||
l'acceptation du Contrat par le Licenci<63>.
|
||||
|
||||
Logiciel Initial: d<>signe le Logiciel sous sa forme de Code Source et
|
||||
<EFBFBD>ventuellement de Code Objet et le cas <20>ch<63>ant sa documentation, dans
|
||||
leur <20>tat au moment de leur premi<6D>re diffusion sous les termes du Contrat.
|
||||
|
||||
Logiciel Modifi<66>: d<>signe le Logiciel modifi<66> par au moins une
|
||||
Contribution.
|
||||
|
||||
Code Source: d<>signe l'ensemble des instructions et des lignes de
|
||||
programme du Logiciel et auquel l'acc<63>s est n<>cessaire en vue de
|
||||
modifier le Logiciel.
|
||||
|
||||
Code Objet: d<>signe les fichiers binaires issus de la compilation du
|
||||
Code Source.
|
||||
|
||||
Titulaire: d<>signe le ou les d<>tenteurs des droits patrimoniaux d'auteur
|
||||
sur le Logiciel Initial.
|
||||
|
||||
Licenci<EFBFBD>: d<>signe le ou les utilisateurs du Logiciel ayant accept<70> le
|
||||
Contrat.
|
||||
|
||||
Contributeur: d<>signe le Licenci<63> auteur d'au moins une Contribution.
|
||||
|
||||
Conc<EFBFBD>dant: d<>signe le Titulaire ou toute personne physique ou morale
|
||||
distribuant le Logiciel sous le Contrat.
|
||||
|
||||
Contribution: d<>signe l'ensemble des modifications, corrections,
|
||||
traductions, adaptations et/ou nouvelles fonctionnalit<69>s int<6E>gr<67>es dans
|
||||
le Logiciel par tout Contributeur, ainsi que tout Module Interne.
|
||||
|
||||
Module: d<>signe un ensemble de fichiers sources y compris leur
|
||||
documentation qui permet de r<>aliser des fonctionnalit<69>s ou services
|
||||
suppl<EFBFBD>mentaires <20> ceux fournis par le Logiciel.
|
||||
|
||||
Module Externe: d<>signe tout Module, non d<>riv<69> du Logiciel, tel que ce
|
||||
Module et le Logiciel s'ex<65>cutent dans des espaces d'adressage
|
||||
diff<EFBFBD>rents, l'un appelant l'autre au moment de leur ex<65>cution.
|
||||
|
||||
Module Interne: d<>signe tout Module li<6C> au Logiciel de telle sorte
|
||||
qu'ils s'ex<65>cutent dans le m<>me espace d'adressage.
|
||||
|
||||
GNU GPL: d<>signe la GNU General Public License dans sa version 2 ou
|
||||
toute version ult<6C>rieure, telle que publi<6C>e par Free Software Foundation
|
||||
Inc.
|
||||
|
||||
Parties: d<>signe collectivement le Licenci<63> et le Conc<6E>dant.
|
||||
|
||||
Ces termes s'entendent au singulier comme au pluriel.
|
||||
|
||||
|
||||
Article 2 - OBJET
|
||||
|
||||
Le Contrat a pour objet la concession par le Conc<6E>dant au Licenci<63> d'une
|
||||
licence non exclusive, cessible et mondiale du Logiciel telle que
|
||||
d<EFBFBD>finie ci-apr<70>s <20> l'article 5 pour toute la dur<75>e de protection des droits
|
||||
portant sur ce Logiciel.
|
||||
|
||||
|
||||
Article 3 - ACCEPTATION
|
||||
|
||||
3.1 L'acceptation par le Licenci<63> des termes du Contrat est r<>put<75>e
|
||||
acquise du fait du premier des faits suivants:
|
||||
|
||||
* (i) le chargement du Logiciel par tout moyen notamment par
|
||||
t<>l<EFBFBD>chargement <20> partir d'un serveur distant ou par chargement <20>
|
||||
partir d'un support physique;
|
||||
* (ii) le premier exercice par le Licenci<63> de l'un quelconque des
|
||||
droits conc<6E>d<EFBFBD>s par le Contrat.
|
||||
|
||||
3.2 Un exemplaire du Contrat, contenant notamment un avertissement
|
||||
relatif aux sp<73>cificit<69>s du Logiciel, <20> la restriction de garantie et <20>
|
||||
la limitation <20> un usage par des utilisateurs exp<78>riment<6E>s a <20>t<EFBFBD> mis <20>
|
||||
disposition du Licenci<63> pr<70>alablement <20> son acceptation telle que
|
||||
d<EFBFBD>finie <20> l'article 3.1 ci dessus et le Licenci<63> reconna<6E>t en avoir pris
|
||||
connaissance.
|
||||
|
||||
|
||||
Article 4 - ENTREE EN VIGUEUR ET DUREE
|
||||
|
||||
|
||||
4.1 ENTREE EN VIGUEUR
|
||||
|
||||
Le Contrat entre en vigueur <20> la date de son acceptation par le Licenci<63>
|
||||
telle que d<>finie en 3.1.
|
||||
|
||||
|
||||
4.2 DUREE
|
||||
|
||||
Le Contrat produira ses effets pendant toute la dur<75>e l<>gale de
|
||||
protection des droits patrimoniaux portant sur le Logiciel.
|
||||
|
||||
|
||||
Article 5 - ETENDUE DES DROITS CONCEDES
|
||||
|
||||
Le Conc<6E>dant conc<6E>de au Licenci<63>, qui accepte, les droits suivants sur
|
||||
le Logiciel pour toutes destinations et pour la dur<75>e du Contrat dans
|
||||
les conditions ci-apr<70>s d<>taill<6C>es.
|
||||
|
||||
Par ailleurs, si le Conc<6E>dant d<>tient ou venait <20> d<>tenir un ou
|
||||
plusieurs brevets d'invention prot<6F>geant tout ou partie des
|
||||
fonctionnalit<EFBFBD>s du Logiciel ou de ses composants, il s'engage <20> ne pas
|
||||
opposer les <20>ventuels droits conf<6E>r<EFBFBD>s par ces brevets aux Licenci<63>s
|
||||
successifs qui utiliseraient, exploiteraient ou modifieraient le
|
||||
Logiciel. En cas de cession de ces brevets, le Conc<6E>dant s'engage <20>
|
||||
faire reprendre les obligations du pr<70>sent alin<69>a aux cessionnaires.
|
||||
|
||||
|
||||
5.1 DROIT D'UTILISATION
|
||||
|
||||
Le Licenci<63> est autoris<69> <20> utiliser le Logiciel, sans restriction quant
|
||||
aux domaines d'application, <20>tant ci-apr<70>s pr<70>cis<69> que cela comporte:
|
||||
|
||||
1. la reproduction permanente ou provisoire du Logiciel en tout ou
|
||||
partie par tout moyen et sous toute forme.
|
||||
|
||||
2. le chargement, l'affichage, l'ex<65>cution, ou le stockage du
|
||||
Logiciel sur tout support.
|
||||
|
||||
3. la possibilit<69> d'en observer, d'en <20>tudier, ou d'en tester le
|
||||
fonctionnement afin de d<>terminer les id<69>es et principes qui sont
|
||||
<20> la base de n'importe quel <20>l<EFBFBD>ment de ce Logiciel; et ceci,
|
||||
lorsque le Licenci<63> effectue toute op<6F>ration de chargement,
|
||||
d'affichage, d'ex<65>cution, de transmission ou de stockage du
|
||||
Logiciel qu'il est en droit d'effectuer en vertu du Contrat.
|
||||
|
||||
|
||||
5.2 DROIT D'APPORTER DES CONTRIBUTIONS
|
||||
|
||||
Le droit d'apporter des Contributions comporte le droit de traduire,
|
||||
d'adapter, d'arranger ou d'apporter toute autre modification au Logiciel
|
||||
et le droit de reproduire le logiciel en r<>sultant.
|
||||
|
||||
Le Licenci<63> est autoris<69> <20> apporter toute Contribution au Logiciel sous
|
||||
r<EFBFBD>serve de mentionner, de fa<66>on explicite, son nom en tant qu'auteur de
|
||||
cette Contribution et la date de cr<63>ation de celle-ci.
|
||||
|
||||
|
||||
5.3 DROIT DE DISTRIBUTION
|
||||
|
||||
Le droit de distribution comporte notamment le droit de diffuser, de
|
||||
transmettre et de communiquer le Logiciel au public sur tout support et
|
||||
par tout moyen ainsi que le droit de mettre sur le march<63> <20> titre
|
||||
on<EFBFBD>reux ou gratuit, un ou des exemplaires du Logiciel par tout proc<6F>d<EFBFBD>.
|
||||
|
||||
Le Licenci<63> est autoris<69> <20> distribuer des copies du Logiciel, modifi<66> ou
|
||||
non, <20> des tiers dans les conditions ci-apr<70>s d<>taill<6C>es.
|
||||
|
||||
|
||||
5.3.1 DISTRIBUTION DU LOGICIEL SANS MODIFICATION
|
||||
|
||||
Le Licenci<63> est autoris<69> <20> distribuer des copies conformes du Logiciel,
|
||||
sous forme de Code Source ou de Code Objet, <20> condition que cette
|
||||
distribution respecte les dispositions du Contrat dans leur totalit<69> et
|
||||
soit accompagn<67>e:
|
||||
|
||||
1. d'un exemplaire du Contrat,
|
||||
|
||||
2. d'un avertissement relatif <20> la restriction de garantie et de
|
||||
responsabilit<69> du Conc<6E>dant telle que pr<70>vue aux articles 8
|
||||
et 9,
|
||||
|
||||
et que, dans le cas o<> seul le Code Objet du Logiciel est redistribu<62>,
|
||||
le Licenci<63> permette aux futurs Licenci<63>s d'acc<63>der facilement au Code
|
||||
Source complet du Logiciel en indiquant les modalit<69>s d'acc<63>s, <20>tant
|
||||
entendu que le co<63>t additionnel d'acquisition du Code Source ne devra
|
||||
pas exc<78>der le simple co<63>t de transfert des donn<6E>es.
|
||||
|
||||
|
||||
5.3.2 DISTRIBUTION DU LOGICIEL MODIFIE
|
||||
|
||||
Lorsque le Licenci<63> apporte une Contribution au Logiciel, les conditions
|
||||
de distribution du Logiciel Modifi<66> en r<>sultant sont alors soumises <20>
|
||||
l'int<6E>gralit<69> des dispositions du Contrat.
|
||||
|
||||
Le Licenci<63> est autoris<69> <20> distribuer le Logiciel Modifi<66>, sous forme de
|
||||
code source ou de code objet, <20> condition que cette distribution
|
||||
respecte les dispositions du Contrat dans leur totalit<69> et soit
|
||||
accompagn<EFBFBD>e:
|
||||
|
||||
1. d'un exemplaire du Contrat,
|
||||
|
||||
2. d'un avertissement relatif <20> la restriction de garantie et de
|
||||
responsabilit<69> du Conc<6E>dant telle que pr<70>vue aux articles 8
|
||||
et 9,
|
||||
|
||||
et que, dans le cas o<> seul le code objet du Logiciel Modifi<66> est
|
||||
redistribu<EFBFBD>, le Licenci<63> permette aux futurs Licenci<63>s d'acc<63>der
|
||||
facilement au code source complet du Logiciel Modifi<66> en indiquant les
|
||||
modalit<EFBFBD>s d'acc<63>s, <20>tant entendu que le co<63>t additionnel d'acquisition
|
||||
du code source ne devra pas exc<78>der le simple co<63>t de transfert des donn<6E>es.
|
||||
|
||||
|
||||
5.3.3 DISTRIBUTION DES MODULES EXTERNES
|
||||
|
||||
Lorsque le Licenci<63> a d<>velopp<70> un Module Externe les conditions du
|
||||
Contrat ne s'appliquent pas <20> ce Module Externe, qui peut <20>tre distribu<62>
|
||||
sous un contrat de licence diff<66>rent.
|
||||
|
||||
|
||||
5.3.4 COMPATIBILITE AVEC LA LICENCE GNU GPL
|
||||
|
||||
Le Licenci<63> peut inclure un code soumis aux dispositions d'une des
|
||||
versions de la licence GNU GPL dans le Logiciel modifi<66> ou non et
|
||||
distribuer l'ensemble sous les conditions de la m<>me version de la
|
||||
licence GNU GPL.
|
||||
|
||||
Le Licenci<63> peut inclure le Logiciel modifi<66> ou non dans un code soumis
|
||||
aux dispositions d'une des versions de la licence GNU GPL et distribuer
|
||||
l'ensemble sous les conditions de la m<>me version de la licence GNU GPL.
|
||||
|
||||
|
||||
Article 6 - PROPRIETE INTELLECTUELLE
|
||||
|
||||
|
||||
6.1 SUR LE LOGICIEL INITIAL
|
||||
|
||||
Le Titulaire est d<>tenteur des droits patrimoniaux sur le Logiciel
|
||||
Initial. Toute utilisation du Logiciel Initial est soumise au respect
|
||||
des conditions dans lesquelles le Titulaire a choisi de diffuser son
|
||||
oeuvre et nul autre n'a la facult<6C> de modifier les conditions de
|
||||
diffusion de ce Logiciel Initial.
|
||||
|
||||
Le Titulaire s'engage <20> ce que le Logiciel Initial reste au moins r<>gi
|
||||
par le Contrat et ce, pour la dur<75>e vis<69>e <20> l'article 4.2.
|
||||
|
||||
|
||||
6.2 SUR LES CONTRIBUTIONS
|
||||
|
||||
Le Licenci<63> qui a d<>velopp<70> une Contribution est titulaire sur celle-ci
|
||||
des droits de propri<72>t<EFBFBD> intellectuelle dans les conditions d<>finies par
|
||||
la l<>gislation applicable.
|
||||
|
||||
|
||||
6.3 SUR LES MODULES EXTERNES
|
||||
|
||||
Le Licenci<63> qui a d<>velopp<70> un Module Externe est titulaire sur celui-ci
|
||||
des droits de propri<72>t<EFBFBD> intellectuelle dans les conditions d<>finies par
|
||||
la l<>gislation applicable et reste libre du choix du contrat r<>gissant
|
||||
sa diffusion.
|
||||
|
||||
|
||||
6.4 DISPOSITIONS COMMUNES
|
||||
|
||||
Le Licenci<63> s'engage express<73>ment:
|
||||
|
||||
1. <20> ne pas supprimer ou modifier de quelque mani<6E>re que ce soit les
|
||||
mentions de propri<72>t<EFBFBD> intellectuelle appos<6F>es sur le Logiciel;
|
||||
|
||||
2. <20> reproduire <20> l'identique lesdites mentions de propri<72>t<EFBFBD>
|
||||
intellectuelle sur les copies du Logiciel modifi<66> ou non.
|
||||
|
||||
Le Licenci<63> s'engage <20> ne pas porter atteinte, directement ou
|
||||
indirectement, aux droits de propri<72>t<EFBFBD> intellectuelle du Titulaire et/ou
|
||||
des Contributeurs sur le Logiciel et <20> prendre, le cas <20>ch<63>ant, <20>
|
||||
l'<27>gard de son personnel toutes les mesures n<>cessaires pour assurer le
|
||||
respect des dits droits de propri<72>t<EFBFBD> intellectuelle du Titulaire et/ou
|
||||
des Contributeurs.
|
||||
|
||||
|
||||
Article 7 - SERVICES ASSOCIES
|
||||
|
||||
7.1 Le Contrat n'oblige en aucun cas le Conc<6E>dant <20> la r<>alisation de
|
||||
prestations d'assistance technique ou de maintenance du Logiciel.
|
||||
|
||||
Cependant le Conc<6E>dant reste libre de proposer ce type de services. Les
|
||||
termes et conditions d'une telle assistance technique et/ou d'une telle
|
||||
maintenance seront alors d<>termin<69>s dans un acte s<>par<61>. Ces actes de
|
||||
maintenance et/ou assistance technique n'engageront que la seule
|
||||
responsabilit<EFBFBD> du Conc<6E>dant qui les propose.
|
||||
|
||||
7.2 De m<>me, tout Conc<6E>dant est libre de proposer, sous sa seule
|
||||
responsabilit<EFBFBD>, <20> ses licenci<63>s une garantie, qui n'engagera que lui,
|
||||
lors de la redistribution du Logiciel et/ou du Logiciel Modifi<66> et ce,
|
||||
dans les conditions qu'il souhaite. Cette garantie et les modalit<69>s
|
||||
financi<EFBFBD>res de son application feront l'objet d'un acte s<>par<61> entre le
|
||||
Conc<EFBFBD>dant et le Licenci<63>.
|
||||
|
||||
|
||||
Article 8 - RESPONSABILITE
|
||||
|
||||
8.1 Sous r<>serve des dispositions de l'article 8.2, le Licenci<63> a la
|
||||
facult<EFBFBD>, sous r<>serve de prouver la faute du Conc<6E>dant concern<72>, de
|
||||
solliciter la r<>paration du pr<70>judice direct qu'il subirait du fait du
|
||||
Logiciel et dont il apportera la preuve.
|
||||
|
||||
8.2 La responsabilit<69> du Conc<6E>dant est limit<69>e aux engagements pris en
|
||||
application du Contrat et ne saurait <20>tre engag<61>e en raison notamment:
|
||||
(i) des dommages dus <20> l'inex<65>cution, totale ou partielle, de ses
|
||||
obligations par le Licenci<63>, (ii) des dommages directs ou indirects
|
||||
d<EFBFBD>coulant de l'utilisation ou des performances du Logiciel subis par le
|
||||
Licenci<EFBFBD> et (iii) plus g<>n<EFBFBD>ralement d'un quelconque dommage indirect. En
|
||||
particulier, les Parties conviennent express<73>ment que tout pr<70>judice
|
||||
financier ou commercial (par exemple perte de donn<6E>es, perte de
|
||||
b<EFBFBD>n<EFBFBD>fices, perte d'exploitation, perte de client<6E>le ou de commandes,
|
||||
manque <20> gagner, trouble commercial quelconque) ou toute action dirig<69>e
|
||||
contre le Licenci<63> par un tiers, constitue un dommage indirect et
|
||||
n'ouvre pas droit <20> r<>paration par le Conc<6E>dant.
|
||||
|
||||
|
||||
Article 9 - GARANTIE
|
||||
|
||||
9.1 Le Licenci<63> reconna<6E>t que l'<27>tat actuel des connaissances
|
||||
scientifiques et techniques au moment de la mise en circulation du
|
||||
Logiciel ne permet pas d'en tester et d'en v<>rifier toutes les
|
||||
utilisations ni de d<>tecter l'existence d'<27>ventuels d<>fauts. L'attention
|
||||
du Licenci<63> a <20>t<EFBFBD> attir<69>e sur ce point sur les risques associ<63>s au
|
||||
chargement, <20> l'utilisation, la modification et/ou au d<>veloppement et <20>
|
||||
la reproduction du Logiciel qui sont r<>serv<72>s <20> des utilisateurs avertis.
|
||||
|
||||
Il rel<65>ve de la responsabilit<69> du Licenci<63> de contr<74>ler, par tous
|
||||
moyens, l'ad<61>quation du produit <20> ses besoins, son bon fonctionnement et
|
||||
de s'assurer qu'il ne causera pas de dommages aux personnes et aux biens.
|
||||
|
||||
9.2 Le Conc<6E>dant d<>clare de bonne foi <20>tre en droit de conc<6E>der
|
||||
l'ensemble des droits attach<63>s au Logiciel (comprenant notamment les
|
||||
droits vis<69>s <20> l'article 5).
|
||||
|
||||
9.3 Le Licenci<63> reconna<6E>t que le Logiciel est fourni "en l'<27>tat" par le
|
||||
Conc<EFBFBD>dant sans autre garantie, expresse ou tacite, que celle pr<70>vue <20>
|
||||
l'article 9.2 et notamment sans aucune garantie sur sa valeur commerciale,
|
||||
son caract<63>re s<>curis<69>, innovant ou pertinent.
|
||||
|
||||
En particulier, le Conc<6E>dant ne garantit pas que le Logiciel est exempt
|
||||
d'erreur, qu'il fonctionnera sans interruption, qu'il sera compatible
|
||||
avec l'<27>quipement du Licenci<63> et sa configuration logicielle ni qu'il
|
||||
remplira les besoins du Licenci<63>.
|
||||
|
||||
9.4 Le Conc<6E>dant ne garantit pas, de mani<6E>re expresse ou tacite, que le
|
||||
Logiciel ne porte pas atteinte <20> un quelconque droit de propri<72>t<EFBFBD>
|
||||
intellectuelle d'un tiers portant sur un brevet, un logiciel ou sur tout
|
||||
autre droit de propri<72>t<EFBFBD>. Ainsi, le Conc<6E>dant exclut toute garantie au
|
||||
profit du Licenci<63> contre les actions en contrefa<66>on qui pourraient <20>tre
|
||||
diligent<EFBFBD>es au titre de l'utilisation, de la modification, et de la
|
||||
redistribution du Logiciel. N<>anmoins, si de telles actions sont
|
||||
exerc<EFBFBD>es contre le Licenci<63>, le Conc<6E>dant lui apportera son aide
|
||||
technique et juridique pour sa d<>fense. Cette aide technique et
|
||||
juridique est d<>termin<69>e au cas par cas entre le Conc<6E>dant concern<72> et
|
||||
le Licenci<63> dans le cadre d'un protocole d'accord. Le Conc<6E>dant d<>gage
|
||||
toute responsabilit<69> quant <20> l'utilisation de la d<>nomination du
|
||||
Logiciel par le Licenci<63>. Aucune garantie n'est apport<72>e quant <20>
|
||||
l'existence de droits ant<6E>rieurs sur le nom du Logiciel et sur
|
||||
l'existence d'une marque.
|
||||
|
||||
|
||||
Article 10 - RESILIATION
|
||||
|
||||
10.1 En cas de manquement par le Licenci<63> aux obligations mises <20> sa
|
||||
charge par le Contrat, le Conc<6E>dant pourra r<>silier de plein droit le
|
||||
Contrat trente (30) jours apr<70>s notification adress<73>e au Licenci<63> et
|
||||
rest<EFBFBD>e sans effet.
|
||||
|
||||
10.2 Le Licenci<63> dont le Contrat est r<>sili<6C> n'est plus autoris<69> <20>
|
||||
utiliser, modifier ou distribuer le Logiciel. Cependant, toutes les
|
||||
licences qu'il aura conc<6E>d<EFBFBD>es ant<6E>rieurement <20> la r<>siliation du Contrat
|
||||
resteront valides sous r<>serve qu'elles aient <20>t<EFBFBD> effectu<74>es en
|
||||
conformit<EFBFBD> avec le Contrat.
|
||||
|
||||
|
||||
Article 11 - DISPOSITIONS DIVERSES
|
||||
|
||||
|
||||
11.1 CAUSE EXTERIEURE
|
||||
|
||||
Aucune des Parties ne sera responsable d'un retard ou d'une d<>faillance
|
||||
d'ex<65>cution du Contrat qui serait d<> <20> un cas de force majeure, un cas
|
||||
fortuit ou une cause ext<78>rieure, telle que, notamment, le mauvais
|
||||
fonctionnement ou les interruptions du r<>seau <20>lectrique ou de
|
||||
t<EFBFBD>l<EFBFBD>communication, la paralysie du r<>seau li<6C>e <20> une attaque
|
||||
informatique, l'intervention des autorit<69>s gouvernementales, les
|
||||
catastrophes naturelles, les d<>g<EFBFBD>ts des eaux, les tremblements de terre,
|
||||
le feu, les explosions, les gr<67>ves et les conflits sociaux, l'<27>tat de
|
||||
guerre...
|
||||
|
||||
11.2 Le fait, par l'une ou l'autre des Parties, d'omettre en une ou
|
||||
plusieurs occasions de se pr<70>valoir d'une ou plusieurs dispositions du
|
||||
Contrat, ne pourra en aucun cas impliquer renonciation par la Partie
|
||||
int<EFBFBD>ress<EFBFBD>e <20> s'en pr<70>valoir ult<6C>rieurement.
|
||||
|
||||
11.3 Le Contrat annule et remplace toute convention ant<6E>rieure, <20>crite
|
||||
ou orale, entre les Parties sur le m<>me objet et constitue l'accord
|
||||
entier entre les Parties sur cet objet. Aucune addition ou modification
|
||||
aux termes du Contrat n'aura d'effet <20> l'<27>gard des Parties <20> moins
|
||||
d'<27>tre faite par <20>crit et sign<67>e par leurs repr<70>sentants d<>ment habilit<69>s.
|
||||
|
||||
11.4 Dans l'hypoth<74>se o<> une ou plusieurs des dispositions du Contrat
|
||||
s'av<61>rerait contraire <20> une loi ou <20> un texte applicable, existants ou
|
||||
futurs, cette loi ou ce texte pr<70>vaudrait, et les Parties feraient les
|
||||
amendements n<>cessaires pour se conformer <20> cette loi ou <20> ce texte.
|
||||
Toutes les autres dispositions resteront en vigueur. De m<>me, la
|
||||
nullit<EFBFBD>, pour quelque raison que ce soit, d'une des dispositions du
|
||||
Contrat ne saurait entra<72>ner la nullit<69> de l'ensemble du Contrat.
|
||||
|
||||
|
||||
11.5 LANGUE
|
||||
|
||||
Le Contrat est r<>dig<69> en langue fran<61>aise et en langue anglaise, ces
|
||||
deux versions faisant <20>galement foi.
|
||||
|
||||
|
||||
Article 12 - NOUVELLES VERSIONS DU CONTRAT
|
||||
|
||||
12.1 Toute personne est autoris<69>e <20> copier et distribuer des copies de
|
||||
ce Contrat.
|
||||
|
||||
12.2 Afin d'en pr<70>server la coh<6F>rence, le texte du Contrat est prot<6F>g<EFBFBD>
|
||||
et ne peut <20>tre modifi<66> que par les auteurs de la licence, lesquels se
|
||||
r<EFBFBD>servent le droit de publier p<>riodiquement des mises <20> jour ou de
|
||||
nouvelles versions du Contrat, qui poss<73>deront chacune un num<75>ro
|
||||
distinct. Ces versions ult<6C>rieures seront susceptibles de prendre en
|
||||
compte de nouvelles probl<62>matiques rencontr<74>es par les logiciels libres.
|
||||
|
||||
12.3 Tout Logiciel diffus<75> sous une version donn<6E>e du Contrat ne pourra
|
||||
faire l'objet d'une diffusion ult<6C>rieure que sous la m<>me version du
|
||||
Contrat ou une version post<73>rieure, sous r<>serve des dispositions de
|
||||
l'article 5.3.4.
|
||||
|
||||
|
||||
Article 13 - LOI APPLICABLE ET COMPETENCE TERRITORIALE
|
||||
|
||||
13.1 Le Contrat est r<>gi par la loi fran<61>aise. Les Parties conviennent
|
||||
de tenter de r<>gler <20> l'amiable les diff<66>rends ou litiges qui
|
||||
viendraient <20> se produire par suite ou <20> l'occasion du Contrat.
|
||||
|
||||
13.2 A d<>faut d'accord amiable dans un d<>lai de deux (2) mois <20> compter
|
||||
de leur survenance et sauf situation relevant d'une proc<6F>dure d'urgence,
|
||||
les diff<66>rends ou litiges seront port<72>s par la Partie la plus diligente
|
||||
devant les Tribunaux comp<6D>tents de Paris.
|
||||
|
||||
|
||||
Version 2.0 du 2006-09-05.
|
1389
ahoc_metazoas.gv
Normal file
1389
ahoc_metazoas.gv
Normal file
File diff suppressed because it is too large
Load Diff
15
ecoPrimerCommands
Normal file
15
ecoPrimerCommands
Normal file
@ -0,0 +1,15 @@
|
||||
|
||||
|
||||
./ecoPrimer -d /Groups/Barcode-Leca/eubacteria-gr -l 10 -L 1000 -e 3 > euBactResults.txt
|
||||
./ecoPrimer -d ChloroDB/chloroplast -l 5 -L 120 -r 58023 -e 3 > chloroVascularPlantsEric.txt
|
||||
./ecoPrimer -d /Users/tiayyba/Documents/Data/mitochondrion/mitochondrion -q 0.4 -s 0.5 -l 10 -L 60 -r 1 -i 1 -T 0.2 -p > setsRoot.txt
|
||||
|
||||
|
||||
./ecoPCR -d /Users/tiayyba/Documents/workspace/ecoPrimers/src/mitochondrion/mitochondrion -l 50 -L 120 -r 7742 TAGAACAGGCTCCTCTAG TTAGATACCCCACTATGC > 12SV5.ecoPCR
|
||||
ecoTaxSpecificity -d /Users/tiayyba/Documents/workspace/ecoPrimers/src/mitochondrion/mitochondrion /Users/tiayyba/Documents/workspace/ecoPCR/src/12SV5.ecoPCR
|
||||
|
||||
|
||||
|
||||
149 ./ecoPrimer -d /Users/tiayyba/Documents/Data/mitochondrion/mitochondrion -r 40674 -E 9606 -l 10 -L 100 > MamalsNotHomoSapien.primers
|
||||
150 ./ecoPrimer -d /Users/tiayyba/Documents/Data/mitochondrion/mitochondrion -q 0.4 -s 0.5 -r 40674 -E 9606 -l 10 -L 100 > MamalsNotHomoSapien.primers
|
||||
151 ./ecoPrimer -d /Users/tiayyba/Documents/Data/mitochondrion/mitochondrion -q 0.5 -s 0.6 -r 40674 -E 9606 -l 10 -L 100 > MamalsNotHomoSapien1.primers
|
BIN
src copy.zip
Normal file
BIN
src copy.zip
Normal file
Binary file not shown.
250
src/.Rhistory
Normal file
250
src/.Rhistory
Normal file
@ -0,0 +1,250 @@
|
||||
plot(s$seq, s$size, xlab="Sequence Count", ylab="Memory used[B]", main="a. memory used vs sequence count without data mining")
|
||||
abline(v = 273, col = "Blue")
|
||||
text(400, 0.7e+09, "nSeq = 273")
|
||||
plot(t$seq, t$size, xlab="Sequence Count", ylab="Memory used[B]", main="b. memory used vs sequence count with data mining")
|
||||
abline(v = 273, col = "Blue")
|
||||
text(450, 700, "nSeq = 273")
|
||||
plot(s$seq, s$time, xlab="Sequence Count", ylab="Time[s]", main="c. time vs sequence count withount data mining")
|
||||
abline(v = 273, col = "Blue")
|
||||
text(450, 300, "nSeq = 273")
|
||||
plot(t$seq, t$time, xlab="Sequence Count", ylab="Time[s]", main="d. time vs sequence count with data mining")
|
||||
abline(v = 273, col = "Blue")
|
||||
text(350, 20, "nSeq = 273")
|
||||
par(mfrow=c(2,2))
|
||||
plot(s$seq, s$size, xlab="Sequence Count", ylab="Memory used[B]", main="a. memory used vs sequence count without data mining")
|
||||
abline(v = 273, col = "Blue")
|
||||
text(400, 0.6e+09, "nSeq = 273")
|
||||
plot(t$seq, t$size, xlab="Sequence Count", ylab="Memory used[B]", main="b. memory used vs sequence count with data mining")
|
||||
abline(v = 273, col = "Blue")
|
||||
text(400, 600, "nSeq = 273")
|
||||
plot(s$seq, s$time, xlab="Sequence Count", ylab="Time[s]", main="c. time vs sequence count withount data mining")
|
||||
abline(v = 273, col = "Blue")
|
||||
text(400, 300, "nSeq = 273")
|
||||
plot(t$seq, t$time, xlab="Sequence Count", ylab="Time[s]", main="d. time vs sequence count with data mining")
|
||||
abline(v = 273, col = "Blue")
|
||||
text(400, 15, "nSeq = 273")
|
||||
par(mfrow=c(2,2))
|
||||
plot(s$seq, s$size, xlab="Sequence Count", ylab="Memory used[B]", main="a. memory used vs sequence count without data mining")
|
||||
abline(v = 273, col = "Blue")
|
||||
text(400, 0.6e+09, "Nseq = 273")
|
||||
plot(t$seq, t$size, xlab="Sequence Count", ylab="Memory used[B]", main="b. memory used vs sequence count with data mining")
|
||||
abline(v = 273, col = "Blue")
|
||||
text(400, 600, "Nseq = 273")
|
||||
plot(s$seq, s$time, xlab="Sequence Count", ylab="Time[s]", main="c. time vs sequence count withount data mining")
|
||||
abline(v = 273, col = "Blue")
|
||||
text(400, 300, "Nseq = 273")
|
||||
plot(t$seq, t$time, xlab="Sequence Count", ylab="Time[s]", main="d. time vs sequence count with data mining")
|
||||
abline(v = 273, col = "Blue")
|
||||
text(400, 15, "Nseq = 273")
|
||||
s = read.table('/Users/tiayyba/Desktop/euBact/ecoprimer_71493.log', header= T)
|
||||
t = read.table('/Users/tiayyba/Desktop/euBact/ecoprimer_84784.log', header= T)
|
||||
par(mfrow=c(2,2))
|
||||
plot(s$seq, s$size, xlab="Sequence Count", ylab="Memory used[B]", main="a. memory used vs sequence count without data mining")
|
||||
plot(s$seq, s$size*1.5, xlab="Sequence Count", ylab="Memory used[B]", main="a. memory used vs sequence count without data mining")
|
||||
abline(v = 273, col = "Blue")
|
||||
text(450, 3e+09, "nSeq = 273")
|
||||
plot(t$seq, t$size*1.5, xlab="Sequence Count", ylab="Memory used[B]", main="b. memory used vs sequence count with data mining")
|
||||
abline(v = 273, col = "Blue")
|
||||
text(450, 1150, "nSeq = 273")
|
||||
par(mfrow=c(2,2))
|
||||
plot(s$seq, s$size*1.5, xlab="Sequence Count", ylab="Memory used[B]", main="a. memory used vs sequence count without data mining")
|
||||
abline(v = 273, col = "Blue")
|
||||
text(450, 3e+09, "nSeq = 273")
|
||||
plot(t$seq, t$size*1.5, xlab="Sequence Count", ylab="Memory used[B]", main="b. memory used vs sequence count with data mining")
|
||||
abline(v = 273, col = "Blue")
|
||||
text(450, 1150, "nSeq = 273")
|
||||
par(mfrow=c(2,2))
|
||||
plot(s$seq, s$size*1.5, xlab="Sequence Count", ylab="Memory used[B]", main="a. memory used vs sequence count without data mining")
|
||||
abline(v = 273, col = "Blue")
|
||||
text(450, 1.7e+09, "nSeq = 273")
|
||||
par(mfrow=c(2,2))
|
||||
plot(s$seq, s$size*1.5, xlab="Sequence Count", ylab="Memory used[B]", main="a. memory used vs sequence count without data mining")
|
||||
abline(v = 273, col = "Blue")
|
||||
text(450, 1.4e+09, "nSeq = 273")
|
||||
plot(t$seq, t$size*1.5, xlab="Sequence Count", ylab="Memory used[B]", main="b. memory used vs sequence count with data mining")
|
||||
abline(v = 273, col = "Blue")
|
||||
text(450, 500, "nSeq = 273")
|
||||
par(mfrow=c(2,2))
|
||||
plot(s$seq, s$size*1.5, xlab="Sequence Count", ylab="Memory used[B]", main="a. memory used vs sequence count without data mining")
|
||||
abline(v = 273, col = "Blue")
|
||||
text(450, 1.4e+09, "nSeq = 273")
|
||||
plot(t$seq, t$size*1.5, xlab="Sequence Count", ylab="Memory used[B]", main="b. memory used vs sequence count with data mining")
|
||||
abline(v = 273, col = "Blue")
|
||||
text(450, 500, "nSeq = 273")
|
||||
text(450, 1000, "nSeq = 273")
|
||||
par(mfrow=c(2,2))
|
||||
plot(s$seq, s$size*1.5, xlab="Sequence Count", ylab="Memory used[B]", main="a. memory used vs sequence count without data mining")
|
||||
abline(v = 273, col = "Blue")
|
||||
text(450, 1.3e+09, "nSeq = 273")
|
||||
plot(t$seq, t$size*1.5, xlab="Sequence Count", ylab="Memory used[B]", main="b. memory used vs sequence count with data mining")
|
||||
abline(v = 273, col = "Blue")
|
||||
text(450, 1000, "nSeq = 273")
|
||||
plot(s$seq, s$time, xlab="Sequence Count", ylab="Time[s]", main="c. time vs sequence count withount data mining")
|
||||
abline(v = 273, col = "Blue")
|
||||
text(450, 700, "nSeq = 273")
|
||||
plot(t$seq, t$time, xlab="Sequence Count", ylab="Time[s]", main="d. time vs sequence count with data mining")
|
||||
abline(v = 273, col = "Blue")
|
||||
text(450, 30, "nSeq = 273")
|
||||
s = read.table('/Users/tiayyba/Desktop/UsedistRef1.txt', header = T)
|
||||
s
|
||||
plot(s$Distance, s$Count)
|
||||
plot(s$Distance, log(s$Count))
|
||||
plot(log(s$Distance), log(s$Count))
|
||||
plot(s$Distance, log(s$Count))
|
||||
s = read.table('/Users/tiayyba/Desktop/UU_results/UU_F83/plots/F83_distRef1.old.txt', header = T)
|
||||
plot(s$Distance, log(s$Count))
|
||||
s = read.table('/Users/tiayyba/Desktop/UsedistRef1.txt', header = T)
|
||||
plot(s$Distance*100, log(s$Count))
|
||||
s = read.table('/Users/tiayyba/Desktop/UU_results/UU_F83/plots/F83_distRef1.old.txt', header = T)
|
||||
plot(s$Distance, log(s$Count))
|
||||
s = read.table('/Users/tiayyba/Desktop/UsedistRef1.txt', header = T)
|
||||
plot(s$Distance, log(s$Count))
|
||||
s = read.table('/Users/tiayyba/Desktop/UU_results/UU_F83/plots/F83_distRef1.old.txt', header = T)
|
||||
plot(s$Distance, log(s$Count))
|
||||
s = read.table('/Users/tiayyba/Desktop/UsedistRef1.txt', header = T)
|
||||
plot(s$Distance, log(s$Count))
|
||||
plot(s$Distance*1000, log(s$Count))
|
||||
s = read.table('/Users/tiayyba/Desktop/UsedistRef1.txt', header = T)
|
||||
plot(s$Distance*1000, log(s$Count))
|
||||
u = read.table('/Users/tiayyba/Desktop/UU_FdistRef2.txt', header = T)
|
||||
u
|
||||
max(u$Count)
|
||||
max(u$Distance)
|
||||
plot(u$Distance, u$Count)
|
||||
plot(log(u$Distance), log(u$Count))
|
||||
plot(u$Distance, u$Count, log(xy))
|
||||
plot(u$Distance, u$Count, log=xy)
|
||||
plot(u$Distance, u$Count, log='xy')
|
||||
plot(u$Distance+1, u$Count, log='xy')
|
||||
plot(u$Distance, u$Count, log='xy')
|
||||
?plot
|
||||
s = read.table('/Users/tiayyba/Desktop/UU_FdistRef1.txt', header = T)
|
||||
plot(s$Distance, s$Count)
|
||||
plot(log(s$Distance), log(s$Count))
|
||||
plot(s$Distance, s$Count, log='xy')
|
||||
plot(s$Distance, log(s$Count))
|
||||
s = read.table('/Users/tiayyba/Desktop/ErrModel/Borneo/UnciaUncia/UU_F83/plots/F83_distRef1.old.txt', header = T)
|
||||
s
|
||||
sNew = s[order(s$Count),]
|
||||
sNew
|
||||
plot(s$Distance, s$Count)
|
||||
s = read.table('/Users/tiayyba/Desktop/ErrModel/Borneo/UnciaUncia/UU_F83/plots/F83_distRef1.old.txt', header = T)
|
||||
t = read.table('/Users/tiayyba/Desktop/ErrModel/Borneo/UnciaUncia/UU_F83/plots/F83_distRef2.old.txt', header = T)
|
||||
s = read.table('/Users/tiayyba/Desktop/ErrModel/Borneo/UnciaUncia/UU_F83/plots/F83_distRef2.old.txt', header = T)
|
||||
u = read.table('/Users/tiayyba/Desktop/ErrModel/Borneo/UnciaUncia/UU_F83/plots/F83_distRef1.old.txt', header = T)
|
||||
t = data.frame(count=s$Count,uu=u$Distance,cs=s$Distance,col=(s$Distance < u$Distance)+1)
|
||||
plot(s$Distance,s$Count,log='xy',col=t$col)
|
||||
par(mfrow=c(1,2))
|
||||
plot(s$Distance+1,s$Count,log='xy',col=t$col)
|
||||
plot(u$Distance+1,u$Count,log='xy',col=t$col)
|
||||
s = read.table('/Users/tiayyba/Desktop/ErrModel/Borneo/UnciaUncia/UU_F83/plots/F83_distRef2.old.txt', header = T)
|
||||
u = read.table('/Users/tiayyba/Desktop/ErrModel/Borneo/UnciaUncia/UU_F83/plots/F83_distRef1.old.txt', header = T)
|
||||
t = data.frame(count=s$Count,uu=u$Distance,cs=s$Distance,col=(s$Distance < u$Distance)+1)
|
||||
plot(u$Distance+1,u$Count,log='xy',col=t$col)
|
||||
par(mfrow=c(1,2))
|
||||
plot(s$Distance+1,s$Count,log='xy',col=t$col)
|
||||
plot(u$Distance+1,u$Count,log='xy',col=t$col)
|
||||
s = read.table('/Users/tiayyba/Desktop/ErrModel/Borneo/UnciaUncia/UU_F82/plots/F82_distRef1.txt', header = F)
|
||||
s = read.table('/Users/tiayyba/Desktop/ErrModel/Borneo/UnciaUncia/UU_F82/plots/F82_distRef1.txt', header = T)
|
||||
s = read.table('/Users/tiayyba/Desktop/ErrModel/Borneo/UnciaUncia/UU_F82/plots/F82_distRef2.txt', header = T)
|
||||
u = read.table('/Users/tiayyba/Desktop/ErrModel/Borneo/UnciaUncia/UU_F82/plots/F82_distRef1.txt', header = T)
|
||||
t = data.frame(count=s$Count,uu=u$Distance,cs=s$Distance,col=(s$Distance < u$Distance)+1)
|
||||
t = data.frame(count=u$Count,uu=u$Distance,cs=s$Distance,col=(s$Distance < u$Distance)+1)
|
||||
s = read.table('/Users/tiayyba/Desktop/abc'header = F)
|
||||
s = read.table('/Users/tiayyba/Desktop/abc', header = F)
|
||||
plot(s)
|
||||
s
|
||||
s = read.table('/Users/tiayyba/Desktop/abc', header = F)
|
||||
s
|
||||
plot(s$V2, s$V1)
|
||||
plot(s$V1, s$V2)
|
||||
plot(s$V1, s$V2, xlab="sequence number", ylab="sequence count")
|
||||
s = read.table('/Users/tiayyba/Desktop/abc', header = F)
|
||||
s
|
||||
s = read.table('/Users/tiayyba/Desktop/abc', header = F)
|
||||
s
|
||||
Sr tacg gcta ctag actg
|
||||
par(mfrow=c(2,2))
|
||||
plot(s$V1, s$V2, xlab="sequence number", ylab="sequence count", main = "sample: tacg")
|
||||
plot(s$V1, s$V3, xlab="sequence number", ylab="sequence count", main = "sample: gcta")
|
||||
plot(s$V1, s$V4, xlab="sequence number", ylab="sequence count", main = "sample: ctag")
|
||||
plot(s$V1, s$V5, xlab="sequence number", ylab="sequence count", main = "sample: actg")
|
||||
s = read.table('/Users/tiayyba/Desktop/abc', header = F)
|
||||
s
|
||||
plot(s$V1, s$V2, xlab="sequence number", ylab="sequence count", main = "sample: Ranunculus_acris")
|
||||
Sr tacg gcta ctag actg
|
||||
par(mfrow=c(2,2))
|
||||
plot(s$V1, s$V2, xlab="sequence number", ylab="sequence count", main = "sample: Ranunculus_acris")
|
||||
plot(s$V1, s$V3, xlab="sequence number", ylab="sequence count", main = "sample: Luzula_sudetica")
|
||||
plot(s$V1, s$V4, xlab="sequence number", ylab="sequence count", main = "sample: Deschampsia_cespitosa")
|
||||
plot(s$V1, s$V5, xlab="sequence number", ylab="sequence count", main = "sample: Cardamine_pratensis_paludosa")
|
||||
s = read.table('/Users/tiayyba/Desktop/abc', header = F)
|
||||
par(mfrow=c(2,2))
|
||||
plot(s$V1, s$V2, xlab="sequence number", ylab="sequence count", main = "sample 1: Ranunculus_acris")
|
||||
plot(s$V1, s$V3, xlab="sequence number", ylab="sequence count", main = "sample 2: Luzula_sudetica")
|
||||
plot(s$V1, s$V4, xlab="sequence number", ylab="sequence count", main = "sample 3: Deschampsia_cespitosa")
|
||||
plot(s$V1, s$V5, xlab="sequence number", ylab="sequence count", main = "sample 4: Cardamine_pratensis_paludosa")
|
||||
s = read.table('/Users/tiayyba/Desktop/ErrModel/Borneo/UnciaUncia/UU_F83/plots/F83_dist.txt', header = F)
|
||||
s = read.table('/Users/tiayyba/Desktop/ErrModel/Borneo/UnciaUncia/UU_F83/plots/F83_dist.txt', header = F, row.names = 1)
|
||||
s = read.table('/Users/tiayyba/Desktop/ErrModel/Borneo/UnciaUncia/UU_F83/plots/F83_dist.txt', header = F, row.names=1)
|
||||
s = read.table('/Users/tiayyba/Desktop/ErrModel/Borneo/UnciaUncia/UU_F83/plots/F83_dist.txt',row.names=1)
|
||||
s
|
||||
plot(s$count, s$uu)
|
||||
plot(s$uu, s$count)
|
||||
plot(s$uu, s$count, col = s$col)
|
||||
plot(s$cs, s$count, col = s$col)
|
||||
plot(s$cs, s$count, col = s$col, col = (s$uu < s$cs) +1)
|
||||
plot(s$uu, s$count)
|
||||
plot(log(s$uu), log(s$count))
|
||||
plot(s$uu, log(s$count))
|
||||
plot(log(s$uu), log(s$count))
|
||||
plot(s$uu, s$count)
|
||||
plot(s$uu, log(s$count))
|
||||
plot(s$uu, log(s$count), xlab = "distance from Uncia uncia", ylab = "log(SequenceCount)")
|
||||
= read.table('/Users/tiayyba/Desktop/ErrModel/Borneo/UnciaUncia/UU_F83/plots/F83_dist.txt', header = T)
|
||||
s= read.table('/Users/tiayyba/Desktop/ErrModel/Borneo/UnciaUncia/UU_F83/plots/F83_dist.txt', header = T)
|
||||
s
|
||||
par(mfrow = c(1,2))
|
||||
plot(s$cs, s$count, col = s$col)
|
||||
plot(s$cs, s$count,log = 'xy', col = s$col)
|
||||
par(mfrow = c(1,2))
|
||||
plot(s$cs, s$count,log = 'xy', col = s$col)
|
||||
plot(s$uu, s$count,log = 'xy', col = s$col)
|
||||
score = (0.407)*0.890*exp(-sqrt(692.33/1.247))
|
||||
score
|
||||
score1 = (0.511)*0.904*exp(-sqrt(481/148.5))
|
||||
score1
|
||||
score1 = sqrt(481/148.5)
|
||||
score1
|
||||
score = sqrt(692.33/1.247)
|
||||
score
|
||||
score = (0.407)^2*0.890*sqrt(692.33/1.247)
|
||||
score
|
||||
score1 = (0.511)^2*0.904*sqrt(481/148.5)
|
||||
score1
|
||||
score = (0.407)*0.890*sqrt(692.33/1.247)
|
||||
score
|
||||
score1 = (0.511)*0.904*sqrt(481/148.5)
|
||||
score1
|
||||
score = (0.407)*0.890*692.33/sqrt(1.247)
|
||||
score
|
||||
score1 = (0.511)*0.904*481/sqrt(148.5)
|
||||
score1
|
||||
sum = 714 + 710 + 699
|
||||
sum
|
||||
2123/3
|
||||
1760+3210+4950+2090
|
||||
12010+3300+220
|
||||
4400+385+55+495+715+800+450+2200+1540+1210+550
|
||||
944 - 220
|
||||
724 - 40
|
||||
684 -10
|
||||
364-278
|
||||
944-657
|
||||
250+220
|
||||
7*9
|
||||
q
|
||||
quit
|
||||
s = read.table('metazoas_Mdyn_T50.gv')
|
||||
s = read.table('metazoas_Mdyn_T50.gv.hist.gv')
|
||||
t = read.table('test.gv')
|
BIN
src/Documents/CalculTM.xls
Normal file
BIN
src/Documents/CalculTM.xls
Normal file
Binary file not shown.
78
src/Makefile
Normal file
78
src/Makefile
Normal file
@ -0,0 +1,78 @@
|
||||
EXEC=ecoPrimers
|
||||
|
||||
PRIMER_SRC= ecoprimer.c
|
||||
PRIMER_OBJ= $(patsubst %.c,%.o,$(PRIMER_SRC))
|
||||
|
||||
|
||||
SRCS= $(PRIMER_SRC)
|
||||
|
||||
LIB= -lecoprimer -lecoPCR -lthermo -lz -lm
|
||||
|
||||
LIBFILE= libecoPCR/libecoPCR.a \
|
||||
libecoprimer/libecoprimer.a \
|
||||
libthermo/libthermo.a \
|
||||
|
||||
|
||||
|
||||
include global.mk
|
||||
|
||||
all: $(EXEC)
|
||||
|
||||
|
||||
########
|
||||
#
|
||||
# ecoPrimer compilation
|
||||
#
|
||||
########
|
||||
|
||||
# executable compilation and link
|
||||
|
||||
ecoPrimers: $(PRIMER_OBJ) $(LIBFILE)
|
||||
$(CC) -g $(LDFLAGS) -O5 -m64 -o $@ $< $(LIBPATH) $(LIB)
|
||||
|
||||
|
||||
########
|
||||
#
|
||||
# library compilation
|
||||
#
|
||||
########
|
||||
|
||||
libecoPCR/libecoPCR.a:
|
||||
$(MAKE) -C libecoPCR
|
||||
|
||||
libecoprimer/libecoprimer.a:
|
||||
$(MAKE) -C libecoprimer
|
||||
|
||||
libthermo/libthermo.a:
|
||||
$(MAKE) -C libthermo
|
||||
|
||||
########
|
||||
#
|
||||
# project management
|
||||
#
|
||||
########
|
||||
|
||||
clean:
|
||||
rm -f *.o
|
||||
rm -f $(EXEC)
|
||||
$(MAKE) -C libecoPCR clean
|
||||
$(MAKE) -C libecoprimer clean
|
||||
$(MAKE) -C libthermo clean
|
||||
|
||||
|
||||
|
||||
########
|
||||
#
|
||||
# clean for k2 to remove .o and .P files
|
||||
#
|
||||
########
|
||||
|
||||
k2clean:
|
||||
rm -f *.o
|
||||
rm -f *.P
|
||||
rm -f libecoPCR/*.o
|
||||
rm -f libecoPCR/*.P
|
||||
rm -f libecoprimer/*.o
|
||||
rm -f libecoprimer/*.P
|
||||
rm -f libthermo/*.o
|
||||
rm -f libthermo/*.P
|
BIN
src/ecoPrimers
Executable file
BIN
src/ecoPrimers
Executable file
Binary file not shown.
32
src/ecoprimer.P
Normal file
32
src/ecoprimer.P
Normal file
@ -0,0 +1,32 @@
|
||||
ecoprimer.o ecoprimer.P : ecoprimer.c libecoprimer/ecoprimer.h /usr/include/inttypes.h \
|
||||
/usr/include/sys/cdefs.h /usr/include/_types.h \
|
||||
/usr/include/sys/_types.h /usr/include/machine/_types.h \
|
||||
/usr/include/i386/_types.h \
|
||||
/usr/lib/gcc/i686-apple-darwin10/4.2.1/include/stdint.h \
|
||||
/usr/include/stdlib.h /usr/include/Availability.h \
|
||||
/usr/include/AvailabilityInternal.h /usr/include/sys/wait.h \
|
||||
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
|
||||
/usr/include/machine/signal.h /usr/include/i386/signal.h \
|
||||
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
|
||||
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
|
||||
/usr/include/sys/resource.h /usr/include/machine/endian.h \
|
||||
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
|
||||
/usr/include/libkern/_OSByteOrder.h \
|
||||
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
|
||||
/usr/include/machine/types.h /usr/include/i386/types.h \
|
||||
/usr/include/i386/_types.h /usr/include/stdio.h \
|
||||
/usr/include/secure/_stdio.h /usr/include/secure/_common.h \
|
||||
libecoprimer/ecotype.h libecoprimer/../libecoPCR/ecoPCR.h \
|
||||
libecoprimer/../libthermo/nnparams.h /usr/include/math.h \
|
||||
/usr/include/architecture/i386/math.h /usr/include/string.h \
|
||||
/usr/include/secure/_string.h libecoprimer/apat.h \
|
||||
libecoprimer/libstki.h libecoprimer/debug.h libecoprimer/PrimerSets.h \
|
||||
libecoprimer/ecoprimer.h libecoprimer/ahocorasick.h \
|
||||
/usr/include/ctype.h /usr/include/runetype.h /usr/include/getopt.h \
|
||||
/usr/include/unistd.h /usr/include/sys/unistd.h \
|
||||
/usr/include/sys/select.h /usr/include/sys/_select.h \
|
||||
/usr/include/time.h /usr/include/_structs.h /usr/include/sys/time.h \
|
||||
/usr/include/dlfcn.h \
|
||||
/usr/lib/gcc/i686-apple-darwin10/4.2.1/include/stdbool.h \
|
||||
/usr/include/AvailabilityMacros.h libthermo/nnparams.h \
|
||||
libthermo/thermostats.h libthermo/../libecoprimer/ecoprimer.h
|
1019
src/ecoprimer.c
Executable file
1019
src/ecoprimer.c
Executable file
File diff suppressed because it is too large
Load Diff
21
src/global.mk
Normal file
21
src/global.mk
Normal file
@ -0,0 +1,21 @@
|
||||
MACHINE=MAC_OS_X
|
||||
LIBPATH= -Llibapat -LlibecoPCR -Llibecoprimer -Llibthermo
|
||||
MAKEDEPEND = gcc -D$(MACHINE) -M $(CPPFLAGS) -o $*.d $<
|
||||
|
||||
CC=gcc
|
||||
CFLAGS= -W -Wall -m64 -g
|
||||
#CFLAGS= -W -Wall -O5 -m64 -g
|
||||
#CFLAGS= -W -Wall -O0 -m64 -g
|
||||
#CFLAGS= -W -Wall -O5 -fast -g
|
||||
|
||||
default: all
|
||||
|
||||
%.o: %.c
|
||||
$(CC) -D$(MACHINE) $(CFLAGS) -c -o $@ $<
|
||||
|
||||
%.P : %.c
|
||||
$(MAKEDEPEND)
|
||||
@sed 's/\($*\)\.o[ :]*/\1.o $@ : /g' < $*.d > $@; \
|
||||
rm -f $*.d; [ -s $@ ] || rm -f $@
|
||||
|
||||
include $(SRCS:.c=.P)
|
30
src/libecoPCR/Makefile
Normal file
30
src/libecoPCR/Makefile
Normal file
@ -0,0 +1,30 @@
|
||||
|
||||
SOURCES = ecodna.c \
|
||||
ecoError.c \
|
||||
ecoIOUtils.c \
|
||||
ecoMalloc.c \
|
||||
ecorank.c \
|
||||
ecoseq.c \
|
||||
ecotax.c \
|
||||
ecofilter.c \
|
||||
econame.c
|
||||
|
||||
SRCS=$(SOURCES)
|
||||
|
||||
OBJECTS= $(patsubst %.c,%.o,$(SOURCES))
|
||||
|
||||
LIBFILE= libecoPCR.a
|
||||
RANLIB= ranlib
|
||||
|
||||
|
||||
include ../global.mk
|
||||
|
||||
|
||||
all: $(LIBFILE)
|
||||
|
||||
clean:
|
||||
rm -rf $(OBJECTS) $(LIBFILE)
|
||||
|
||||
$(LIBFILE): $(OBJECTS)
|
||||
ar -cr $@ $?
|
||||
$(RANLIB) $@
|
26
src/libecoPCR/ecoError.c
Normal file
26
src/libecoPCR/ecoError.c
Normal file
@ -0,0 +1,26 @@
|
||||
#include "ecoPCR.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
/*
|
||||
* print the message given as argument and exit the program
|
||||
* @param error error number
|
||||
* @param message the text explaining what's going on
|
||||
* @param filename the file source where the program failed
|
||||
* @param linenumber the line where it has failed
|
||||
* filename and linenumber are written at pre-processing
|
||||
* time by a macro
|
||||
*/
|
||||
void ecoError(int32_t error,
|
||||
const char* message,
|
||||
const char * filename,
|
||||
int linenumber)
|
||||
{
|
||||
fprintf(stderr,"Error %d in file %s line %d : %s\n",
|
||||
error,
|
||||
filename,
|
||||
linenumber,
|
||||
message);
|
||||
|
||||
abort();
|
||||
}
|
122
src/libecoPCR/ecoIOUtils.c
Normal file
122
src/libecoPCR/ecoIOUtils.c
Normal file
@ -0,0 +1,122 @@
|
||||
#include "ecoPCR.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#define SWAPINT32(x) ((((x) << 24) & 0xFF000000) | (((x) << 8) & 0xFF0000) | \
|
||||
(((x) >> 8) & 0xFF00) | (((x) >> 24) & 0xFF))
|
||||
|
||||
|
||||
int32_t is_big_endian()
|
||||
{
|
||||
int32_t i=1;
|
||||
|
||||
return (int32_t)((char*)&i)[0];
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
int32_t swap_int32_t(int32_t i)
|
||||
{
|
||||
return SWAPINT32(i);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Read part of the file
|
||||
* @param *f the database
|
||||
* @param recordSize the size to be read
|
||||
*
|
||||
* @return buffer
|
||||
*/
|
||||
void *read_ecorecord(FILE *f,int32_t *recordSize)
|
||||
{
|
||||
static void *buffer =NULL;
|
||||
int32_t buffersize=0;
|
||||
int32_t read;
|
||||
|
||||
if (!recordSize)
|
||||
ECOERROR(ECO_ASSERT_ERROR,
|
||||
"recordSize cannot be NULL");
|
||||
|
||||
read = fread(recordSize,
|
||||
1,
|
||||
sizeof(int32_t),
|
||||
f);
|
||||
|
||||
if (feof(f))
|
||||
return NULL;
|
||||
|
||||
if (read != sizeof(int32_t))
|
||||
ECOERROR(ECO_IO_ERROR,"Reading record size error");
|
||||
|
||||
if (is_big_endian())
|
||||
*recordSize=swap_int32_t(*recordSize);
|
||||
|
||||
if (buffersize < *recordSize)
|
||||
{
|
||||
if (buffer)
|
||||
buffer = ECOREALLOC(buffer,*recordSize,
|
||||
"Increase size of record buffer");
|
||||
else
|
||||
buffer = ECOMALLOC(*recordSize,
|
||||
"Allocate record buffer");
|
||||
}
|
||||
|
||||
read = fread(buffer,
|
||||
1,
|
||||
*recordSize,
|
||||
f);
|
||||
|
||||
if (read != *recordSize)
|
||||
ECOERROR(ECO_IO_ERROR,"Reading record data error");
|
||||
|
||||
return buffer;
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Open the database and check it's readable
|
||||
* @param filename name of the database (.sdx, .rdx, .tbx)
|
||||
* @param sequencecount buffer - pointer to variable storing the number of occurence
|
||||
* @param abort_on_open_error boolean to define the behaviour in case of error
|
||||
* while opening the database
|
||||
* @return FILE type
|
||||
**/
|
||||
FILE *open_ecorecorddb(const char *filename,
|
||||
int32_t *sequencecount,
|
||||
int32_t abort_on_open_error)
|
||||
{
|
||||
FILE *f;
|
||||
int32_t read;
|
||||
|
||||
f = fopen(filename,"rb");
|
||||
|
||||
if (!f)
|
||||
{
|
||||
if (abort_on_open_error)
|
||||
ECOERROR(ECO_IO_ERROR,"Cannot open file");
|
||||
else
|
||||
{
|
||||
*sequencecount=0;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
read = fread(sequencecount,
|
||||
1,
|
||||
sizeof(int32_t),
|
||||
f);
|
||||
|
||||
if (read != sizeof(int32_t))
|
||||
ECOERROR(ECO_IO_ERROR,"Reading record size error");
|
||||
|
||||
if (is_big_endian())
|
||||
*sequencecount=swap_int32_t(*sequencecount);
|
||||
|
||||
return f;
|
||||
}
|
||||
|
96
src/libecoPCR/ecoMalloc.c
Normal file
96
src/libecoPCR/ecoMalloc.c
Normal file
@ -0,0 +1,96 @@
|
||||
#include "ecoPCR.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
static int eco_log_malloc = 0;
|
||||
static size_t eco_amount_malloc=0;
|
||||
static size_t eco_chunk_malloc=0;
|
||||
|
||||
void eco_trace_memory_allocation()
|
||||
{
|
||||
eco_log_malloc=1;
|
||||
}
|
||||
|
||||
void eco_untrace_memory_allocation()
|
||||
{
|
||||
eco_log_malloc=0;
|
||||
}
|
||||
|
||||
void ecoMallocedMemory()
|
||||
{
|
||||
//eco_amount_malloc;
|
||||
}
|
||||
|
||||
void *eco_malloc(int64_t chunksize,
|
||||
const char *error_message,
|
||||
const char *filename,
|
||||
int32_t line)
|
||||
{
|
||||
void * chunk;
|
||||
|
||||
chunk = calloc(1,chunksize);
|
||||
|
||||
|
||||
if (!chunk)
|
||||
ecoError(ECO_MEM_ERROR,error_message,filename,line);
|
||||
|
||||
eco_chunk_malloc++;
|
||||
|
||||
if (eco_log_malloc)
|
||||
fprintf(stderr,
|
||||
"Memory segment located at %p of size %d is allocated (file : %s [%d])",
|
||||
chunk,
|
||||
chunksize,
|
||||
filename,
|
||||
line);
|
||||
|
||||
return chunk;
|
||||
}
|
||||
|
||||
void *eco_realloc(void *chunk,
|
||||
int64_t newsize,
|
||||
const char *error_message,
|
||||
const char *filename,
|
||||
int32_t line)
|
||||
{
|
||||
void *newchunk;
|
||||
|
||||
newchunk = realloc(chunk,newsize);
|
||||
|
||||
|
||||
if (!newchunk)
|
||||
{
|
||||
ecoError(ECO_MEM_ERROR,error_message,filename,line);
|
||||
fprintf(stderr,"Requested memory : %d\n",newsize);
|
||||
}
|
||||
if (!chunk)
|
||||
eco_chunk_malloc++;
|
||||
|
||||
if (eco_log_malloc)
|
||||
fprintf(stderr,
|
||||
"Old memory segment %p is reallocated at %p with a size of %d (file : %s [%d])",
|
||||
chunk,
|
||||
newchunk,
|
||||
newsize,
|
||||
filename,
|
||||
line);
|
||||
|
||||
return newchunk;
|
||||
}
|
||||
|
||||
void eco_free(void *chunk,
|
||||
const char *error_message,
|
||||
const char *filename,
|
||||
int32_t line)
|
||||
{
|
||||
free(chunk);
|
||||
|
||||
if (eco_log_malloc)
|
||||
fprintf(stderr,
|
||||
"Memory segment %p is released => %s (file : %s [%d])",
|
||||
chunk,
|
||||
error_message,
|
||||
filename,
|
||||
line);
|
||||
|
||||
eco_chunk_malloc--;
|
||||
}
|
270
src/libecoPCR/ecoPCR.h
Normal file
270
src/libecoPCR/ecoPCR.h
Normal file
@ -0,0 +1,270 @@
|
||||
#ifndef ECOPCR_H_
|
||||
#define ECOPCR_H_
|
||||
|
||||
#include <stdio.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
/*****************************************************
|
||||
*
|
||||
* Data type declarations
|
||||
*
|
||||
*****************************************************/
|
||||
|
||||
/*
|
||||
*
|
||||
* Sequence types
|
||||
*
|
||||
*/
|
||||
|
||||
typedef struct {
|
||||
|
||||
int32_t taxid;
|
||||
char AC[20];
|
||||
int32_t DE_length;
|
||||
int32_t SQ_length;
|
||||
int32_t CSQ_length; /*what is this CSQ_length ? */
|
||||
|
||||
char data[1];
|
||||
|
||||
} ecoseqformat_t;
|
||||
|
||||
typedef struct {
|
||||
int32_t taxid;
|
||||
int32_t SQ_length;
|
||||
int32_t isexample;
|
||||
char *AC;
|
||||
char *DE;
|
||||
char *SQ;
|
||||
|
||||
int32_t ranktaxonid;/*TR: taxon id to which the sequence belongs*/
|
||||
} ecoseq_t, *pecoseq_t;
|
||||
|
||||
/*
|
||||
*
|
||||
* Taxonomy taxon types
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
typedef struct {
|
||||
int32_t taxid;
|
||||
int32_t rank;
|
||||
int32_t parent;
|
||||
int32_t namelength;
|
||||
char name[1];
|
||||
|
||||
} ecotxformat_t;
|
||||
|
||||
typedef struct ecotxnode {
|
||||
int32_t taxid;
|
||||
int32_t rank;
|
||||
struct ecotxnode *parent;
|
||||
char *name;
|
||||
} ecotx_t;
|
||||
|
||||
typedef struct {
|
||||
int32_t count;
|
||||
ecotx_t taxon[1];
|
||||
} ecotxidx_t;
|
||||
|
||||
|
||||
/*
|
||||
*
|
||||
* Taxonomy rank types
|
||||
*
|
||||
*/
|
||||
|
||||
typedef struct {
|
||||
int32_t count;
|
||||
char* label[1];
|
||||
} ecorankidx_t;
|
||||
|
||||
/*
|
||||
*
|
||||
* Taxonomy name types
|
||||
*
|
||||
*/
|
||||
|
||||
typedef struct {
|
||||
int32_t is_scientificname;
|
||||
int32_t namelength;
|
||||
int32_t classlength;
|
||||
int32_t taxid;
|
||||
char names[1];
|
||||
} econameformat_t;
|
||||
|
||||
|
||||
typedef struct {
|
||||
char *name;
|
||||
char *classname;
|
||||
int32_t is_scientificname;
|
||||
struct ecotxnode *taxon;
|
||||
} econame_t;
|
||||
|
||||
|
||||
typedef struct {
|
||||
int32_t count;
|
||||
econame_t names[1];
|
||||
} econameidx_t;
|
||||
|
||||
|
||||
typedef struct {
|
||||
ecorankidx_t *ranks;
|
||||
econameidx_t *names;
|
||||
ecotxidx_t *taxons;
|
||||
} ecotaxonomy_t;
|
||||
|
||||
|
||||
/*****************************************************
|
||||
*
|
||||
* Function declarations
|
||||
*
|
||||
*****************************************************/
|
||||
|
||||
/*
|
||||
*
|
||||
* Low level system functions
|
||||
*
|
||||
*/
|
||||
|
||||
int32_t is_big_endian();
|
||||
int32_t swap_int32_t(int32_t);
|
||||
|
||||
void *eco_malloc(int64_t chunksize,
|
||||
const char *error_message,
|
||||
const char *filename,
|
||||
int32_t line);
|
||||
|
||||
|
||||
void *eco_realloc(void *chunk,
|
||||
int64_t chunksize,
|
||||
const char *error_message,
|
||||
const char *filename,
|
||||
int32_t line);
|
||||
|
||||
void eco_free(void *chunk,
|
||||
const char *error_message,
|
||||
const char *filename,
|
||||
int32_t line);
|
||||
|
||||
void eco_trace_memory_allocation();
|
||||
void eco_untrace_memory_allocation();
|
||||
|
||||
#define ECOMALLOC(size,error_message) \
|
||||
eco_malloc((size),(error_message),__FILE__,__LINE__)
|
||||
|
||||
#define ECOREALLOC(chunk,size,error_message) \
|
||||
eco_realloc((chunk),(size),(error_message),__FILE__,__LINE__)
|
||||
|
||||
#define ECOFREE(chunk,error_message) \
|
||||
eco_free((chunk),(error_message),__FILE__,__LINE__)
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
*
|
||||
* Error managment
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
void ecoError(int32_t,const char*,const char *,int);
|
||||
|
||||
#define ECOERROR(code,message) ecoError((code),(message),__FILE__,__LINE__)
|
||||
|
||||
#define ECO_IO_ERROR (1)
|
||||
#define ECO_MEM_ERROR (2)
|
||||
#define ECO_ASSERT_ERROR (3)
|
||||
#define ECO_NOTFOUND_ERROR (4)
|
||||
|
||||
|
||||
/*
|
||||
*
|
||||
* Low level Disk access functions
|
||||
*
|
||||
*/
|
||||
|
||||
FILE *open_ecorecorddb(const char *filename,
|
||||
int32_t *sequencecount,
|
||||
int32_t abort_on_open_error);
|
||||
|
||||
void *read_ecorecord(FILE *,int32_t *recordSize);
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Read function in internal binary format
|
||||
*/
|
||||
|
||||
FILE *open_ecoseqdb(const char *filename,
|
||||
int32_t *sequencecount);
|
||||
|
||||
ecoseq_t *readnext_ecoseq(FILE *);
|
||||
|
||||
ecorankidx_t *read_rankidx(const char *filename);
|
||||
|
||||
econameidx_t *read_nameidx(const char *filename,ecotaxonomy_t *taxonomy);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Read taxonomy data as formated by the ecoPCRFormat.py script.
|
||||
*
|
||||
* This function is normaly uses internaly by the read_taxonomy
|
||||
* function and should not be called directly.
|
||||
*
|
||||
* @arg filename path to the *.tdx file of the reformated db
|
||||
*
|
||||
* @return pointer to a taxonomy index structure
|
||||
*/
|
||||
|
||||
ecotxidx_t *read_taxonomyidx(const char *filename);
|
||||
|
||||
ecotaxonomy_t *read_taxonomy(const char *prefix,int32_t readAlternativeName);
|
||||
|
||||
ecotx_t *eco_findtaxonbytaxid(ecotaxonomy_t *taxonomy, int32_t taxid);
|
||||
|
||||
ecotx_t *eco_findtaxonatrank(ecotx_t *taxon, int32_t rankidx);
|
||||
|
||||
int eco_isundertaxon(ecotx_t *taxon, int other_taxid);
|
||||
|
||||
ecoseq_t *ecoseq_iterator(const char *prefix);
|
||||
|
||||
|
||||
|
||||
ecoseq_t *new_ecoseq();
|
||||
int32_t delete_ecoseq(ecoseq_t *);
|
||||
ecoseq_t *new_ecoseq_with_data( char *AC,
|
||||
char *DE,
|
||||
char *SQ,
|
||||
int32_t taxid
|
||||
);
|
||||
|
||||
|
||||
int32_t delete_taxon(ecotx_t *taxon);
|
||||
int32_t delete_taxonomy(ecotxidx_t *index);
|
||||
|
||||
|
||||
int32_t rank_index(const char* label,ecorankidx_t* ranks);
|
||||
|
||||
//int32_t delete_apatseq(SeqPtr pseq);
|
||||
//PatternPtr buildPattern(const char *pat, int32_t error_max);
|
||||
//PatternPtr complementPattern(PatternPtr pat);
|
||||
//
|
||||
//SeqPtr ecoseq2apatseq(ecoseq_t *in,SeqPtr out,int32_t circular);
|
||||
|
||||
char *ecoComplementPattern(char *nucAcSeq);
|
||||
char *ecoComplementSequence(char *nucAcSeq);
|
||||
char *getSubSequence(char* nucAcSeq,int32_t begin,int32_t end);
|
||||
|
||||
ecotx_t *eco_getspecies(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
|
||||
ecotx_t *eco_getgenus(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
|
||||
ecotx_t *eco_getfamily(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
|
||||
ecotx_t *eco_getkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
|
||||
ecotx_t *eco_getsuperkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
|
||||
|
||||
int eco_is_taxid_ignored(int32_t *ignored_taxid, int32_t tab_len, int32_t taxid);
|
||||
int eco_is_taxid_included(ecotaxonomy_t *taxonomy, int32_t *included_taxid, int32_t tab_len, int32_t taxid);
|
||||
|
||||
#endif /*ECOPCR_H_*/
|
202
src/libecoPCR/ecoapat.c
Normal file
202
src/libecoPCR/ecoapat.c
Normal file
@ -0,0 +1,202 @@
|
||||
#include "../libapat/libstki.h"
|
||||
#include "../libapat/apat.h"
|
||||
|
||||
#include "ecoPCR.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
static void EncodeSequence(SeqPtr seq);
|
||||
static void UpperSequence(char *seq);
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* uppercase sequence */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
#define IS_LOWER(c) (((c) >= 'a') && ((c) <= 'z'))
|
||||
#define TO_UPPER(c) ((c) - 'a' + 'A')
|
||||
|
||||
void UpperSequence(char *seq)
|
||||
{
|
||||
char *cseq;
|
||||
|
||||
for (cseq = seq ; *cseq ; cseq++)
|
||||
if (IS_LOWER(*cseq))
|
||||
*cseq = TO_UPPER(*cseq);
|
||||
}
|
||||
|
||||
#undef IS_LOWER
|
||||
#undef TO_UPPER
|
||||
|
||||
|
||||
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* encode sequence */
|
||||
/* IS_UPPER is slightly faster than isupper */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
#define IS_UPPER(c) (((c) >= 'A') && ((c) <= 'Z'))
|
||||
|
||||
|
||||
|
||||
void EncodeSequence(SeqPtr seq)
|
||||
{
|
||||
int i;
|
||||
UInt8 *data;
|
||||
char *cseq;
|
||||
|
||||
data = seq->data;
|
||||
cseq = seq->cseq;
|
||||
|
||||
while (*cseq) {
|
||||
|
||||
*data = (IS_UPPER(*cseq) ? *cseq - 'A' : 0x0);
|
||||
data++;
|
||||
cseq++;
|
||||
}
|
||||
|
||||
for (i=0,cseq=seq->cseq;i < seq->circular; i++,cseq++,data++)
|
||||
*data = (IS_UPPER(*cseq) ? *cseq - 'A' : 0x0);
|
||||
|
||||
for (i = 0 ; i < MAX_PATTERN ; i++)
|
||||
seq->hitpos[i]->top = seq->hiterr[i]->top = 0;
|
||||
|
||||
}
|
||||
|
||||
#undef IS_UPPER
|
||||
|
||||
|
||||
SeqPtr ecoseq2apatseq(ecoseq_t *in,SeqPtr out,int32_t circular)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!out)
|
||||
{
|
||||
out = ECOMALLOC(sizeof(Seq),
|
||||
"Error in Allocation of a new Seq structure");
|
||||
|
||||
for (i = 0 ; i < MAX_PATTERN ; i++)
|
||||
{
|
||||
|
||||
if (! (out->hitpos[i] = NewStacki(kMinStackiSize)))
|
||||
ECOERROR(ECO_MEM_ERROR,"Error in hit stack Allocation");
|
||||
|
||||
if (! (out->hiterr[i] = NewStacki(kMinStackiSize)))
|
||||
ECOERROR(ECO_MEM_ERROR,"Error in error stack Allocation");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
out->name = in->AC;
|
||||
out->seqsiz = out->seqlen = in->SQ_length;
|
||||
out->circular = circular;
|
||||
|
||||
if (!out->data)
|
||||
{
|
||||
out->data = ECOMALLOC((out->seqlen+circular) *sizeof(UInt8),
|
||||
"Error in Allocation of a new Seq data member");
|
||||
out->datsiz= out->seqlen+circular;
|
||||
}
|
||||
else if ((out->seqlen +circular) >= out->datsiz)
|
||||
{
|
||||
out->data = ECOREALLOC(out->data,(out->seqlen+circular),
|
||||
"Error during Seq data buffer realloc");
|
||||
out->datsiz= out->seqlen+circular;
|
||||
}
|
||||
|
||||
out->cseq = in->SQ;
|
||||
|
||||
EncodeSequence(out);
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
int32_t delete_apatseq(SeqPtr pseq)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (pseq) {
|
||||
|
||||
if (pseq->data)
|
||||
ECOFREE(pseq->data,"Freeing sequence data buffer");
|
||||
|
||||
for (i = 0 ; i < MAX_PATTERN ; i++) {
|
||||
if (pseq->hitpos[i]) FreeStacki(pseq->hitpos[i]);
|
||||
if (pseq->hiterr[i]) FreeStacki(pseq->hiterr[i]);
|
||||
}
|
||||
|
||||
ECOFREE(pseq,"Freeing apat sequence structure");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
PatternPtr buildPattern(const char *pat, int32_t error_max)
|
||||
{
|
||||
PatternPtr pattern;
|
||||
int32_t patlen;
|
||||
|
||||
pattern = ECOMALLOC(sizeof(Pattern),
|
||||
"Error in pattern allocation");
|
||||
|
||||
pattern->ok = Vrai;
|
||||
pattern->hasIndel= Faux;
|
||||
pattern->maxerr = error_max;
|
||||
patlen = strlen(pat);
|
||||
|
||||
pattern->cpat = ECOMALLOC(sizeof(char)*patlen+1,
|
||||
"Error in sequence pattern allocation");
|
||||
|
||||
strncpy(pattern->cpat,pat,patlen);
|
||||
pattern->cpat[patlen]=0;
|
||||
UpperSequence(pattern->cpat);
|
||||
|
||||
if (!CheckPattern(pattern))
|
||||
ECOERROR(ECO_ASSERT_ERROR,"Error in pattern checking");
|
||||
|
||||
if (! EncodePattern(pattern, dna))
|
||||
ECOERROR(ECO_ASSERT_ERROR,"Error in pattern encoding");
|
||||
|
||||
if (! CreateS(pattern, ALPHA_LEN))
|
||||
ECOERROR(ECO_ASSERT_ERROR,"Error in pattern compiling");
|
||||
|
||||
return pattern;
|
||||
|
||||
}
|
||||
|
||||
PatternPtr complementPattern(PatternPtr pat)
|
||||
{
|
||||
PatternPtr pattern;
|
||||
|
||||
pattern = ECOMALLOC(sizeof(Pattern),
|
||||
"Error in pattern allocation");
|
||||
|
||||
pattern->ok = Vrai;
|
||||
pattern->hasIndel= pat->hasIndel;
|
||||
pattern->maxerr = pat->maxerr;
|
||||
pattern->patlen = pat->patlen;
|
||||
|
||||
pattern->cpat = ECOMALLOC(sizeof(char)*(strlen(pat->cpat)+1),
|
||||
"Error in sequence pattern allocation");
|
||||
|
||||
strcpy(pattern->cpat,pat->cpat);
|
||||
|
||||
ecoComplementPattern(pattern->cpat);
|
||||
|
||||
if (!CheckPattern(pattern))
|
||||
ECOERROR(ECO_ASSERT_ERROR,"Error in pattern checking");
|
||||
|
||||
if (! EncodePattern(pattern, dna))
|
||||
ECOERROR(ECO_ASSERT_ERROR,"Error in pattern encoding");
|
||||
|
||||
if (! CreateS(pattern, ALPHA_LEN))
|
||||
ECOERROR(ECO_ASSERT_ERROR,"Error in pattern compiling");
|
||||
|
||||
return pattern;
|
||||
|
||||
}
|
||||
*/
|
153
src/libecoPCR/ecodna.c
Normal file
153
src/libecoPCR/ecodna.c
Normal file
@ -0,0 +1,153 @@
|
||||
#include <string.h>
|
||||
#include "ecoPCR.h"
|
||||
|
||||
/*
|
||||
* @doc: DNA alphabet (IUPAC)
|
||||
*/
|
||||
#define LX_BIO_DNA_ALPHA "ABCDEFGHIJKLMNOPQRSTUVWXYZ#![]"
|
||||
|
||||
/*
|
||||
* @doc: complementary DNA alphabet (IUPAC)
|
||||
*/
|
||||
#define LX_BIO_CDNA_ALPHA "TVGHEFCDIJMLKNOPQYSAABWXRZ#!]["
|
||||
|
||||
|
||||
static char sNuc[] = LX_BIO_DNA_ALPHA;
|
||||
static char sAnuc[] = LX_BIO_CDNA_ALPHA;
|
||||
|
||||
static char LXBioBaseComplement(char nucAc);
|
||||
static char *LXBioSeqComplement(char *nucAcSeq);
|
||||
static char *reverseSequence(char *str,char isPattern);
|
||||
|
||||
|
||||
/* ---------------------------- */
|
||||
|
||||
char LXBioBaseComplement(char nucAc)
|
||||
{
|
||||
char *c;
|
||||
|
||||
if ((c = strchr(sNuc, nucAc)))
|
||||
return sAnuc[(c - sNuc)];
|
||||
else
|
||||
return nucAc;
|
||||
}
|
||||
|
||||
/* ---------------------------- */
|
||||
|
||||
char *LXBioSeqComplement(char *nucAcSeq)
|
||||
{
|
||||
char *s;
|
||||
|
||||
for (s = nucAcSeq ; *s ; s++)
|
||||
*s = LXBioBaseComplement(*s);
|
||||
|
||||
return nucAcSeq;
|
||||
}
|
||||
|
||||
|
||||
char *reverseSequence(char *str,char isPattern)
|
||||
{
|
||||
char *sb, *se, c;
|
||||
|
||||
if (! str)
|
||||
return str;
|
||||
|
||||
sb = str;
|
||||
se = str + strlen(str) - 1;
|
||||
|
||||
while(sb <= se) {
|
||||
c = *sb;
|
||||
*sb++ = *se;
|
||||
*se-- = c;
|
||||
}
|
||||
|
||||
sb = str;
|
||||
se = str + strlen(str) - 1;
|
||||
|
||||
if (isPattern)
|
||||
for (;sb < se; sb++)
|
||||
{
|
||||
if (*sb=='#')
|
||||
{
|
||||
if (((se - sb) > 2) && (*(sb+2)=='!'))
|
||||
{
|
||||
*sb='!';
|
||||
sb+=2;
|
||||
*sb='#';
|
||||
}
|
||||
else
|
||||
{
|
||||
*sb=*(sb+1);
|
||||
sb++;
|
||||
*sb='#';
|
||||
}
|
||||
}
|
||||
else if (*sb=='!')
|
||||
{
|
||||
*sb=*(sb-1);
|
||||
*(sb-1)='!';
|
||||
}
|
||||
}
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
char *ecoComplementPattern(char *nucAcSeq)
|
||||
{
|
||||
return reverseSequence(LXBioSeqComplement(nucAcSeq),1);
|
||||
}
|
||||
|
||||
char *ecoComplementSequence(char *nucAcSeq)
|
||||
{
|
||||
return reverseSequence(LXBioSeqComplement(nucAcSeq),0);
|
||||
}
|
||||
|
||||
|
||||
char *getSubSequence(char* nucAcSeq,int32_t begin,int32_t end)
|
||||
{
|
||||
static char *buffer = NULL;
|
||||
static int32_t buffSize= 0;
|
||||
int32_t length;
|
||||
|
||||
if (begin < end)
|
||||
{
|
||||
length = end - begin;
|
||||
|
||||
if (length >= buffSize)
|
||||
{
|
||||
buffSize = length+1;
|
||||
if (buffer)
|
||||
buffer=ECOREALLOC(buffer,buffSize,
|
||||
"Error in reallocating sub sequence buffer");
|
||||
else
|
||||
buffer=ECOMALLOC(buffSize,
|
||||
"Error in allocating sub sequence buffer");
|
||||
|
||||
}
|
||||
|
||||
strncpy(buffer,nucAcSeq + begin,length);
|
||||
buffer[length]=0;
|
||||
}
|
||||
else
|
||||
{
|
||||
length = end + strlen(nucAcSeq) - begin;
|
||||
|
||||
if (length >= buffSize)
|
||||
{
|
||||
buffSize = length+1;
|
||||
if (buffer)
|
||||
buffer=ECOREALLOC(buffer,buffSize,
|
||||
"Error in reallocating sub sequence buffer");
|
||||
else
|
||||
buffer=ECOMALLOC(buffSize,
|
||||
"Error in allocating sub sequence buffer");
|
||||
|
||||
}
|
||||
strncpy(buffer,nucAcSeq+begin,length - end);
|
||||
strncpy(buffer+(length-end),nucAcSeq ,end);
|
||||
buffer[length]=0;
|
||||
}
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
20
src/libecoPCR/ecofilter.c
Normal file
20
src/libecoPCR/ecofilter.c
Normal file
@ -0,0 +1,20 @@
|
||||
#include "ecoPCR.h"
|
||||
|
||||
int eco_is_taxid_included( ecotaxonomy_t *taxonomy,
|
||||
int32_t *restricted_taxid,
|
||||
int32_t tab_len,
|
||||
int32_t taxid)
|
||||
{
|
||||
int i;
|
||||
ecotx_t *taxon;
|
||||
|
||||
taxon = eco_findtaxonbytaxid(taxonomy, taxid);
|
||||
|
||||
for (i=0; i < tab_len; i++)
|
||||
if ( (taxon->taxid == restricted_taxid[i]) ||
|
||||
(eco_isundertaxon(taxon, restricted_taxid[i])) )
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
61
src/libecoPCR/econame.c
Normal file
61
src/libecoPCR/econame.c
Normal file
@ -0,0 +1,61 @@
|
||||
#include "ecoPCR.h"
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
static econame_t *readnext_econame(FILE *f,econame_t *name,ecotaxonomy_t *taxonomy);
|
||||
|
||||
econameidx_t *read_nameidx(const char *filename,ecotaxonomy_t *taxonomy)
|
||||
{
|
||||
|
||||
int32_t count;
|
||||
FILE *f;
|
||||
econameidx_t *indexname;
|
||||
int32_t i;
|
||||
|
||||
f = open_ecorecorddb(filename,&count,1);
|
||||
|
||||
indexname = (econameidx_t*) ECOMALLOC(sizeof(econameidx_t) + sizeof(econame_t) * (count-1),"Allocate names");
|
||||
|
||||
indexname->count=count;
|
||||
|
||||
for (i=0; i < count; i++){
|
||||
readnext_econame(f,(indexname->names)+i,taxonomy);
|
||||
}
|
||||
|
||||
return indexname;
|
||||
}
|
||||
|
||||
econame_t *readnext_econame(FILE *f,econame_t *name,ecotaxonomy_t *taxonomy)
|
||||
{
|
||||
|
||||
econameformat_t *raw;
|
||||
int32_t rs;
|
||||
|
||||
raw = read_ecorecord(f,&rs);
|
||||
|
||||
if (!raw)
|
||||
return NULL;
|
||||
|
||||
if (is_big_endian())
|
||||
{
|
||||
raw->is_scientificname = swap_int32_t(raw->is_scientificname);
|
||||
raw->namelength = swap_int32_t(raw->namelength);
|
||||
raw->classlength = swap_int32_t(raw->classlength);
|
||||
raw->taxid = swap_int32_t(raw->taxid);
|
||||
}
|
||||
|
||||
name->is_scientificname=raw->is_scientificname;
|
||||
|
||||
name->name = ECOMALLOC((raw->namelength+1) * sizeof(char),"Allocate name");
|
||||
strncpy(name->name,raw->names,raw->namelength);
|
||||
name->name[raw->namelength]=0;
|
||||
|
||||
name->classname = ECOMALLOC((raw->classlength+1) * sizeof(char),"Allocate classname");
|
||||
strncpy(name->classname,(raw->names+raw->namelength),raw->classlength);
|
||||
name->classname[raw->classlength]=0;
|
||||
|
||||
name->taxon = taxonomy->taxons->taxon + raw->taxid;
|
||||
|
||||
return name;
|
||||
}
|
||||
|
52
src/libecoPCR/ecorank.c
Normal file
52
src/libecoPCR/ecorank.c
Normal file
@ -0,0 +1,52 @@
|
||||
#include "ecoPCR.h"
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
static int compareRankLabel(const void *label1, const void *label2);
|
||||
|
||||
ecorankidx_t *read_rankidx(const char *filename)
|
||||
{
|
||||
int32_t count;
|
||||
FILE *f;
|
||||
ecorankidx_t *index;
|
||||
int32_t i;
|
||||
int32_t rs;
|
||||
char *buffer;
|
||||
|
||||
f = open_ecorecorddb(filename,&count,1);
|
||||
|
||||
index = (ecorankidx_t*) ECOMALLOC(sizeof(ecorankidx_t) + sizeof(char*) * (count-1),
|
||||
"Allocate rank index");
|
||||
|
||||
index->count=count;
|
||||
|
||||
for (i=0; i < count; i++)
|
||||
{
|
||||
buffer = read_ecorecord(f,&rs);
|
||||
index->label[i]=(char*) ECOMALLOC(rs+1,
|
||||
"Allocate rank label");
|
||||
strncpy(index->label[i],buffer,rs);
|
||||
}
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
int32_t rank_index(const char* label,ecorankidx_t* ranks)
|
||||
{
|
||||
char **rep;
|
||||
|
||||
rep = bsearch(label,ranks->label,ranks->count,sizeof(char*),compareRankLabel);
|
||||
|
||||
if (rep)
|
||||
return rep-ranks->label;
|
||||
else
|
||||
ECOERROR(ECO_NOTFOUND_ERROR,"Rank label not found");
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
int compareRankLabel(const void *label1, const void *label2)
|
||||
{
|
||||
return strcmp((const char*)label1,*(const char**)label2);
|
||||
}
|
233
src/libecoPCR/ecoseq.c
Normal file
233
src/libecoPCR/ecoseq.c
Normal file
@ -0,0 +1,233 @@
|
||||
#include "ecoPCR.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <zlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
static FILE *open_seqfile(const char *prefix,int32_t index);
|
||||
|
||||
|
||||
ecoseq_t *new_ecoseq()
|
||||
{
|
||||
void *tmp;
|
||||
|
||||
tmp = ECOMALLOC(sizeof(ecoseq_t),"Allocate new ecoseq structure");
|
||||
|
||||
return tmp;
|
||||
}
|
||||
|
||||
int32_t delete_ecoseq(ecoseq_t * seq)
|
||||
{
|
||||
|
||||
if (seq)
|
||||
{
|
||||
if (seq->AC)
|
||||
ECOFREE(seq->AC,"Free sequence AC");
|
||||
|
||||
if (seq->DE)
|
||||
ECOFREE(seq->DE,"Free sequence DE");
|
||||
|
||||
if (seq->SQ)
|
||||
ECOFREE(seq->SQ,"Free sequence SQ");
|
||||
|
||||
ECOFREE(seq,"Free sequence structure");
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
ecoseq_t *new_ecoseq_with_data( char *AC,
|
||||
char *DE,
|
||||
char *SQ,
|
||||
int32_t taxid_idx
|
||||
)
|
||||
{
|
||||
ecoseq_t *tmp;
|
||||
int32_t lstr;
|
||||
tmp = new_ecoseq();
|
||||
|
||||
tmp->taxid=taxid_idx;
|
||||
|
||||
if (AC)
|
||||
{
|
||||
lstr =strlen(AC);
|
||||
tmp->AC=ECOMALLOC((lstr+1) * sizeof(char),
|
||||
"Allocate sequence accession");
|
||||
strcpy(tmp->AC,AC);
|
||||
}
|
||||
|
||||
if (DE)
|
||||
{
|
||||
lstr =strlen(DE);
|
||||
tmp->DE=ECOMALLOC((lstr+1) * sizeof(char),
|
||||
"Allocate sequence definition");
|
||||
strcpy(tmp->DE,DE);
|
||||
}
|
||||
|
||||
if (SQ)
|
||||
{
|
||||
lstr =strlen(SQ);
|
||||
tmp->SQ=ECOMALLOC((lstr+1) * sizeof(char),
|
||||
"Allocate sequence data");
|
||||
strcpy(tmp->SQ,SQ);
|
||||
}
|
||||
|
||||
tmp->isexample=1;
|
||||
|
||||
return tmp;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* ?? used ??
|
||||
**/
|
||||
FILE *open_ecoseqdb(const char *filename,
|
||||
int32_t *sequencecount)
|
||||
{
|
||||
return open_ecorecorddb(filename,sequencecount,1);
|
||||
}
|
||||
|
||||
ecoseq_t *readnext_ecoseq(FILE *f)
|
||||
{
|
||||
char *compressed=NULL;
|
||||
|
||||
ecoseqformat_t *raw;
|
||||
ecoseq_t *seq;
|
||||
int32_t comp_status;
|
||||
unsigned long int seqlength;
|
||||
int32_t rs;
|
||||
char *c;
|
||||
int32_t i;
|
||||
|
||||
raw = read_ecorecord(f,&rs);
|
||||
|
||||
if (!raw)
|
||||
return NULL;
|
||||
|
||||
if (is_big_endian())
|
||||
{
|
||||
raw->CSQ_length = swap_int32_t(raw->CSQ_length);
|
||||
raw->DE_length = swap_int32_t(raw->DE_length);
|
||||
raw->SQ_length = swap_int32_t(raw->SQ_length);
|
||||
raw->taxid = swap_int32_t(raw->taxid);
|
||||
}
|
||||
|
||||
seq = new_ecoseq();
|
||||
|
||||
seq->taxid = raw->taxid;
|
||||
|
||||
seq->AC = ECOMALLOC(strlen(raw->AC) +1,
|
||||
"Allocate Sequence Accesion number");
|
||||
strncpy(seq->AC,raw->AC,strlen(raw->AC));
|
||||
|
||||
|
||||
seq->DE = ECOMALLOC(raw->DE_length+1,
|
||||
"Allocate Sequence definition");
|
||||
strncpy(seq->DE,raw->data,raw->DE_length);
|
||||
|
||||
seqlength = seq->SQ_length = raw->SQ_length;
|
||||
|
||||
compressed = raw->data + raw->DE_length;
|
||||
|
||||
seq->SQ = ECOMALLOC(seqlength+1,
|
||||
"Allocate sequence buffer");
|
||||
|
||||
seq->isexample=1;
|
||||
|
||||
comp_status = uncompress((unsigned char*)seq->SQ,
|
||||
&seqlength,
|
||||
(unsigned char*)compressed,
|
||||
raw->CSQ_length);
|
||||
|
||||
if (comp_status != Z_OK)
|
||||
ECOERROR(ECO_IO_ERROR,"I cannot uncompress sequence data");
|
||||
|
||||
for (c=seq->SQ,i=0;i<seqlength;c++,i++)
|
||||
*c=toupper(*c);
|
||||
|
||||
// fprintf(stderr,"seq name : %30s seq size : %d\n",seq->DE,seq->SQ_length);
|
||||
return seq;
|
||||
}
|
||||
|
||||
/**
|
||||
* Open the sequences database (.sdx file)
|
||||
* @param prefix name of the database (radical without extension)
|
||||
* @param index integer
|
||||
*
|
||||
* @return file object
|
||||
*/
|
||||
FILE *open_seqfile(const char *prefix,int32_t index)
|
||||
{
|
||||
char filename_buffer[1024];
|
||||
int32_t filename_length;
|
||||
FILE *input;
|
||||
int32_t seqcount;
|
||||
|
||||
filename_length = snprintf(filename_buffer,
|
||||
1023,
|
||||
"%s_%03d.sdx",
|
||||
prefix,
|
||||
index);
|
||||
|
||||
|
||||
|
||||
if (filename_length >= 1024)
|
||||
ECOERROR(ECO_ASSERT_ERROR,"file name is too long");
|
||||
|
||||
filename_buffer[filename_length]=0;
|
||||
|
||||
input=open_ecorecorddb(filename_buffer,&seqcount,0);
|
||||
|
||||
if (input)
|
||||
fprintf(stderr,"# Reading file %s containing %d sequences...\n",
|
||||
filename_buffer,
|
||||
seqcount);
|
||||
|
||||
return input;
|
||||
}
|
||||
|
||||
ecoseq_t *ecoseq_iterator(const char *prefix)
|
||||
{
|
||||
static FILE *current_seq_file= NULL;
|
||||
static int32_t current_file_idx = 1;
|
||||
static char current_prefix[1024];
|
||||
ecoseq_t *seq;
|
||||
|
||||
if (prefix)
|
||||
{
|
||||
current_file_idx = 1;
|
||||
|
||||
if (current_seq_file)
|
||||
fclose(current_seq_file);
|
||||
|
||||
strncpy(current_prefix,prefix,1023);
|
||||
current_prefix[1024]=0;
|
||||
|
||||
current_seq_file = open_seqfile(current_prefix,
|
||||
current_file_idx);
|
||||
|
||||
if (!current_seq_file)
|
||||
return NULL;
|
||||
|
||||
}
|
||||
|
||||
seq = readnext_ecoseq(current_seq_file);
|
||||
|
||||
if (!seq && feof(current_seq_file))
|
||||
{
|
||||
current_file_idx++;
|
||||
fclose(current_seq_file);
|
||||
current_seq_file = open_seqfile(current_prefix,
|
||||
current_file_idx);
|
||||
|
||||
|
||||
if (current_seq_file)
|
||||
seq = readnext_ecoseq(current_seq_file);
|
||||
}
|
||||
|
||||
return seq;
|
||||
}
|
329
src/libecoPCR/ecotax.c
Normal file
329
src/libecoPCR/ecotax.c
Normal file
@ -0,0 +1,329 @@
|
||||
#include "ecoPCR.h"
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
static ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon);
|
||||
|
||||
/**
|
||||
* Open the taxonomy database
|
||||
* @param pointer to the database (.tdx file)
|
||||
* @return a ecotxidx_t structure
|
||||
*/
|
||||
ecotxidx_t *read_taxonomyidx(const char *filename)
|
||||
{
|
||||
int32_t count;
|
||||
FILE *f;
|
||||
ecotxidx_t *index;
|
||||
int32_t i;
|
||||
|
||||
f = open_ecorecorddb(filename,&count,1);
|
||||
|
||||
index = (ecotxidx_t*) ECOMALLOC(sizeof(ecotxidx_t) + sizeof(ecotx_t) * (count-1),
|
||||
"Allocate taxonomy");
|
||||
|
||||
index->count=count;
|
||||
for (i=0; i < count; i++){
|
||||
readnext_ecotaxon(f,&(index->taxon[i]));
|
||||
index->taxon[i].parent=index->taxon + (size_t)index->taxon[i].parent;
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
|
||||
int32_t delete_taxonomy(ecotxidx_t *index)
|
||||
{
|
||||
int32_t i;
|
||||
|
||||
if (index)
|
||||
{
|
||||
for (i=0; i< index->count; i++)
|
||||
if (index->taxon[i].name)
|
||||
ECOFREE(index->taxon[i].name,"Free scientific name");
|
||||
|
||||
ECOFREE(index,"Free Taxonomy");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int32_t delete_taxon(ecotx_t *taxon)
|
||||
{
|
||||
if (taxon)
|
||||
{
|
||||
if (taxon->name)
|
||||
ECOFREE(taxon->name,"Free scientific name");
|
||||
|
||||
ECOFREE(taxon,"Free Taxon");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Read the database for a given taxon a save the data
|
||||
* into the taxon structure(if any found)
|
||||
* @param *f pointer to FILE type returned by fopen
|
||||
* @param *taxon pointer to the structure
|
||||
*
|
||||
* @return a ecotx_t structure if any taxon found else NULL
|
||||
*/
|
||||
ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon)
|
||||
{
|
||||
|
||||
ecotxformat_t *raw;
|
||||
int32_t rs;
|
||||
|
||||
raw = read_ecorecord(f,&rs);
|
||||
|
||||
if (!raw)
|
||||
return NULL;
|
||||
|
||||
if (is_big_endian())
|
||||
{
|
||||
raw->namelength = swap_int32_t(raw->namelength);
|
||||
raw->parent = swap_int32_t(raw->parent);
|
||||
raw->rank = swap_int32_t(raw->rank);
|
||||
raw->taxid = swap_int32_t(raw->taxid);
|
||||
}
|
||||
|
||||
taxon->parent = (ecotx_t*)(size_t)raw->parent;
|
||||
taxon->taxid = raw->taxid;
|
||||
taxon->rank = raw->rank;
|
||||
|
||||
taxon->name = ECOMALLOC((raw->namelength+1) * sizeof(char),
|
||||
"Allocate taxon scientific name");
|
||||
|
||||
strncpy(taxon->name,raw->name,raw->namelength);
|
||||
|
||||
return taxon;
|
||||
}
|
||||
|
||||
|
||||
ecotaxonomy_t *read_taxonomy(const char *prefix,int32_t readAlternativeName)
|
||||
{
|
||||
ecotaxonomy_t *tax;
|
||||
char *filename;
|
||||
int buffsize;
|
||||
|
||||
tax = ECOMALLOC(sizeof(ecotaxonomy_t),
|
||||
"Allocate taxonomy structure");
|
||||
|
||||
buffsize = strlen(prefix)+10;
|
||||
|
||||
filename = ECOMALLOC(buffsize,
|
||||
"Allocate filename");
|
||||
|
||||
snprintf(filename,buffsize,"%s.rdx",prefix);
|
||||
|
||||
tax->ranks = read_rankidx(filename);
|
||||
|
||||
snprintf(filename,buffsize,"%s.tdx",prefix);
|
||||
|
||||
tax->taxons = read_taxonomyidx(filename);
|
||||
|
||||
if (readAlternativeName)
|
||||
{
|
||||
snprintf(filename,buffsize,"%s.ndx",prefix);
|
||||
tax->names=read_nameidx(filename,tax);
|
||||
}
|
||||
else
|
||||
tax->names=NULL;
|
||||
return tax;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
int32_t delete_ecotaxonomy(ecotaxonomy_t *taxonomy)
|
||||
{
|
||||
if (taxonomy)
|
||||
{
|
||||
if (taxonomy->ranks)
|
||||
ECOFREE(taxonomy->ranks,"Free rank index");
|
||||
|
||||
if (taxonomy->taxons)
|
||||
ECOFREE(taxonomy->taxons,"Free taxon index");
|
||||
|
||||
ECOFREE(taxonomy,"Free taxonomy structure");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
ecotx_t *eco_findtaxonatrank(ecotx_t *taxon,
|
||||
int32_t rankidx)
|
||||
{
|
||||
ecotx_t *current_taxon;
|
||||
ecotx_t *next_taxon;
|
||||
|
||||
current_taxon = taxon;
|
||||
next_taxon = current_taxon->parent;
|
||||
|
||||
while ((current_taxon!=next_taxon) && // I' am the root node
|
||||
(current_taxon->rank!=rankidx))
|
||||
{
|
||||
current_taxon = next_taxon;
|
||||
next_taxon = current_taxon->parent;
|
||||
}
|
||||
|
||||
if (current_taxon->rank==rankidx)
|
||||
return current_taxon;
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get back information concerning a taxon from a taxonomic id
|
||||
* @param *taxonomy the taxonomy database
|
||||
* @param taxid the taxonomic id
|
||||
*
|
||||
* @result a ecotx_t structure containing the taxonimic information
|
||||
**/
|
||||
ecotx_t *eco_findtaxonbytaxid(ecotaxonomy_t *taxonomy,
|
||||
int32_t taxid)
|
||||
{
|
||||
ecotx_t *current_taxon;
|
||||
int32_t taxoncount;
|
||||
int32_t i;
|
||||
|
||||
taxoncount=taxonomy->taxons->count;
|
||||
|
||||
for (current_taxon=taxonomy->taxons->taxon,
|
||||
i=0;
|
||||
i < taxoncount;
|
||||
i++,
|
||||
current_taxon++){
|
||||
if (current_taxon->taxid==taxid){
|
||||
return current_taxon;
|
||||
}
|
||||
}
|
||||
|
||||
return (ecotx_t*)NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find out if taxon is son of other taxon (identified by its taxid)
|
||||
* @param *taxon son taxon
|
||||
* @param parent_taxid taxonomic id of the other taxon
|
||||
*
|
||||
* @return 1 is the other taxid math a parent taxid, else 0
|
||||
**/
|
||||
int eco_isundertaxon(ecotx_t *taxon,
|
||||
int other_taxid)
|
||||
{
|
||||
ecotx_t *next_parent;
|
||||
|
||||
next_parent = taxon->parent;
|
||||
|
||||
while ( (other_taxid != next_parent->taxid) &&
|
||||
(strcmp(next_parent->name, "root")) )
|
||||
{
|
||||
next_parent = next_parent->parent;
|
||||
}
|
||||
|
||||
if (other_taxid == next_parent->taxid)
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
ecotx_t *eco_getspecies(ecotx_t *taxon,
|
||||
ecotaxonomy_t *taxonomy)
|
||||
{
|
||||
static ecotaxonomy_t *tax=NULL;
|
||||
static int32_t rankindex=-1;
|
||||
|
||||
if (taxonomy && tax!=taxonomy)
|
||||
{
|
||||
rankindex = rank_index("species",taxonomy->ranks);
|
||||
tax=taxonomy;
|
||||
}
|
||||
|
||||
if (!tax || rankindex < 0)
|
||||
ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
|
||||
|
||||
return eco_findtaxonatrank(taxon,rankindex);
|
||||
}
|
||||
|
||||
ecotx_t *eco_getgenus(ecotx_t *taxon,
|
||||
ecotaxonomy_t *taxonomy)
|
||||
{
|
||||
static ecotaxonomy_t *tax=NULL;
|
||||
static int32_t rankindex=-1;
|
||||
|
||||
if (taxonomy && tax!=taxonomy)
|
||||
{
|
||||
rankindex = rank_index("genus",taxonomy->ranks);
|
||||
tax=taxonomy;
|
||||
}
|
||||
|
||||
if (!tax || rankindex < 0)
|
||||
ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
|
||||
|
||||
return eco_findtaxonatrank(taxon,rankindex);
|
||||
}
|
||||
|
||||
|
||||
ecotx_t *eco_getfamily(ecotx_t *taxon,
|
||||
ecotaxonomy_t *taxonomy)
|
||||
{
|
||||
static ecotaxonomy_t *tax=NULL;
|
||||
static int32_t rankindex=-1;
|
||||
|
||||
if (taxonomy && tax!=taxonomy)
|
||||
{
|
||||
rankindex = rank_index("family",taxonomy->ranks);
|
||||
tax=taxonomy;
|
||||
}
|
||||
|
||||
if (!tax || rankindex < 0)
|
||||
ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
|
||||
|
||||
return eco_findtaxonatrank(taxon,rankindex);
|
||||
}
|
||||
|
||||
ecotx_t *eco_getkingdom(ecotx_t *taxon,
|
||||
ecotaxonomy_t *taxonomy)
|
||||
{
|
||||
static ecotaxonomy_t *tax=NULL;
|
||||
static int32_t rankindex=-1;
|
||||
|
||||
if (taxonomy && tax!=taxonomy)
|
||||
{
|
||||
rankindex = rank_index("kingdom",taxonomy->ranks);
|
||||
tax=taxonomy;
|
||||
}
|
||||
|
||||
if (!tax || rankindex < 0)
|
||||
ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
|
||||
|
||||
return eco_findtaxonatrank(taxon,rankindex);
|
||||
}
|
||||
|
||||
ecotx_t *eco_getsuperkingdom(ecotx_t *taxon,
|
||||
ecotaxonomy_t *taxonomy)
|
||||
{
|
||||
static ecotaxonomy_t *tax=NULL;
|
||||
static int32_t rankindex=-1;
|
||||
|
||||
if (taxonomy && tax!=taxonomy)
|
||||
{
|
||||
rankindex = rank_index("superkingdom",taxonomy->ranks);
|
||||
tax=taxonomy;
|
||||
}
|
||||
|
||||
if (!tax || rankindex < 0)
|
||||
ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
|
||||
|
||||
return eco_findtaxonatrank(taxon,rankindex);
|
||||
}
|
39
src/libecoprimer/Makefile
Normal file
39
src/libecoprimer/Makefile
Normal file
@ -0,0 +1,39 @@
|
||||
|
||||
SOURCES = goodtaxon.c \
|
||||
readdnadb.c \
|
||||
smothsort.c \
|
||||
sortword.c \
|
||||
hashsequence.c \
|
||||
strictprimers.c \
|
||||
aproxpattern.c \
|
||||
merge.c \
|
||||
queue.c \
|
||||
libstki.c \
|
||||
sortmatch.c \
|
||||
pairtree.c \
|
||||
pairs.c \
|
||||
taxstats.c \
|
||||
apat_search.c \
|
||||
filtering.c \
|
||||
PrimerSets.c \
|
||||
ahocorasick.c
|
||||
|
||||
SRCS=$(SOURCES)
|
||||
|
||||
OBJECTS= $(patsubst %.c,%.o,$(SOURCES))
|
||||
|
||||
LIBFILE= libecoprimer.a
|
||||
RANLIB= ranlib
|
||||
|
||||
|
||||
include ../global.mk
|
||||
|
||||
|
||||
all: $(LIBFILE)
|
||||
|
||||
clean:
|
||||
rm -rf $(OBJECTS) $(LIBFILE)
|
||||
|
||||
$(LIBFILE): $(OBJECTS)
|
||||
ar -cr $@ $?
|
||||
$(RANLIB) $@
|
1770
src/libecoprimer/PrimerSets.c
Normal file
1770
src/libecoprimer/PrimerSets.c
Normal file
File diff suppressed because it is too large
Load Diff
58
src/libecoprimer/PrimerSets.h
Normal file
58
src/libecoprimer/PrimerSets.h
Normal file
@ -0,0 +1,58 @@
|
||||
#ifndef PRIMERSETS_H_
|
||||
#define PRIMERSETS_H_
|
||||
|
||||
#include "ecoprimer.h"
|
||||
|
||||
#define PRIMERS_IN_SET_COUNT 10
|
||||
|
||||
typedef struct {
|
||||
int *set_wellIdentifiedTaxa;
|
||||
int32_t set_pairs[PRIMERS_IN_SET_COUNT];
|
||||
float set_specificity;
|
||||
float set_coverage;
|
||||
float set_lmean;
|
||||
float set_lcov;
|
||||
float set_score;
|
||||
int32_t set_intaxa;
|
||||
int32_t set_wi_cnt;
|
||||
}pairset;
|
||||
|
||||
typedef struct{
|
||||
ppair_t* sortedpairs;
|
||||
int32_t sorted_count;
|
||||
pecodnadb_t seqdb;
|
||||
poptions_t options;
|
||||
}SetParams;
|
||||
|
||||
typedef struct{
|
||||
float t_spc; //specificity contribution
|
||||
float t_cov; //coverage contribution
|
||||
float t_lmd; //link spread difference
|
||||
float len; //length
|
||||
float score; //score
|
||||
}primerscore;
|
||||
|
||||
void add_pair_in_set (pairset *pair_set, int32_t pset_idx, int32_t prb_idx, SetParams *pparams);
|
||||
void get_next_pair_options (int *pair_wi_count_sorted_ids, pairset *pair_set, SetParams *pparams);
|
||||
float get_links_distribution (int prb_idx, pairset *prob_set, SetParams *pparams);
|
||||
pairset build_primers_set_greedy_spc (SetParams *pparams);
|
||||
void get_set_mean_cov_stats (pairset *prob_set, SetParams *pparams);
|
||||
void some_other_set_possibilities (pairset *pair_set,
|
||||
ppair_t * sortedpairs, int32_t sorted_count, pecodnadb_t seqdb, poptions_t options);
|
||||
void sets_by_SimulatedAnealing (pairset *pair_set,
|
||||
ppair_t * sortedpairs, int32_t sorted_count, pecodnadb_t seqdb, poptions_t options);
|
||||
void sets_by_TabuSearch (pairset *pair_set,
|
||||
ppair_t * sortedpairs, int32_t sorted_count, pecodnadb_t seqdb, poptions_t options);
|
||||
pairset * sets_by_BruteForce (ppair_t * sortedpairs,
|
||||
int32_t sorted_count, pecodnadb_t seqdb, poptions_t options);
|
||||
pairset * extend_set_randomly (pairset *pair_set, SetParams *params, int extend_to_cnt);
|
||||
void build_and_print_sets (ppair_t * sortedpairs, int32_t sorted_count, pecodnadb_t seqdb, poptions_t options);
|
||||
int32_t get_next_option_increasing_cov (pairset *pair_set, SetParams *pparams);
|
||||
void reset_set_props (pairset *pair_set, SetParams *pparams);
|
||||
void primers_graph_graphviz (ppair_t * sortedpairs,
|
||||
int32_t sorted_count, poptions_t options);
|
||||
size_t primers_changeSortedArray (ppair_t ** pairs,
|
||||
size_t sorted_count, poptions_t options);
|
||||
size_t primers_filterWithGivenLinks (ppair_t ** pairs,
|
||||
size_t sorted_count, poptions_t options);
|
||||
#endif
|
479
src/libecoprimer/ahocorasick.c
Executable file
479
src/libecoprimer/ahocorasick.c
Executable file
@ -0,0 +1,479 @@
|
||||
/*
|
||||
* ahocorasick.h
|
||||
*
|
||||
* Created on: 26 march 2011
|
||||
* Author: tiayyba
|
||||
*/
|
||||
#include <inttypes.h>
|
||||
#include "hashencoder.h"
|
||||
#include "ahocorasick.h"
|
||||
|
||||
void ahoc_graphKeywordTree (aho_state *root);
|
||||
aho_state *groot = NULL; //just for graph testing
|
||||
|
||||
#define BASEATINDEX(w, l, i) (uint8_t)((((w)&(0x3LLU<<(((l)-(i))*2)))>>(((l)-(i))*2)) & 0x3LLU)
|
||||
|
||||
void ahoc_addOutputElement (aho_state *node, bool_t isdirect, uint32_t idx)
|
||||
{
|
||||
if (!node) return;
|
||||
if (node->output.count == 0)
|
||||
node->output.out_set = ECOMALLOC(sizeof(aho_output),
|
||||
"Cannot allocate memory for aho-corasick state output element");
|
||||
else
|
||||
node->output.out_set = ECOREALLOC(node->output.out_set, (node->output.count+1)*sizeof(aho_output),
|
||||
"Cannot allocate memory for aho-corasick state output element");
|
||||
node->output.out_set[node->output.count].wordidx = idx;
|
||||
node->output.out_set[node->output.count].isdirect = isdirect;
|
||||
node->output.count++;
|
||||
}
|
||||
|
||||
//is the passed output element in the set
|
||||
bool_t ahoc_isOutputIn (aho_state *node, aho_output ot)
|
||||
{
|
||||
uint32_t i;
|
||||
|
||||
for (i=0; i<node->output.count; i++)
|
||||
if (node->output.out_set[i].isdirect == ot.isdirect && node->output.out_set[i].wordidx == ot.wordidx) return TRUE;
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
//take union of output of the two nodes and put in node1
|
||||
void ahoc_unionOutputElements (aho_state *node1, aho_state *node2)
|
||||
{
|
||||
uint32_t i;
|
||||
|
||||
for (i=0; i<node2->output.count; i++)
|
||||
if (ahoc_isOutputIn (node1, node2->output.out_set[i]) == FALSE)
|
||||
ahoc_addOutputElement (node1, node2->output.out_set[i].isdirect, node2->output.out_set[i].wordidx);
|
||||
}
|
||||
|
||||
void ahoc_addKeyword (aho_state *root, word_t w, bool_t isdirect, uint32_t idx, poptions_t options)
|
||||
{
|
||||
uint32_t i;
|
||||
aho_state *nextnode = root;
|
||||
uint8_t basecode;
|
||||
static uint32_t state_id = 0;
|
||||
|
||||
//fprintf (stderr, "%s\n", ecoUnhashWord(w, options->primer_length));
|
||||
for (i=1; i<=options->primer_length; i++)
|
||||
{
|
||||
basecode = BASEATINDEX (w, options->primer_length, i);
|
||||
//fprintf (stderr, "%d", basecode);
|
||||
if (nextnode->next[basecode] == NULL)
|
||||
{
|
||||
//add new state
|
||||
nextnode->next[basecode] = ECOMALLOC(sizeof(aho_state),
|
||||
"Cannot allocate memory for aho-corasick state");
|
||||
nextnode = nextnode->next[basecode];
|
||||
//initialize state
|
||||
nextnode->id = ++state_id;
|
||||
nextnode->next[0]=nextnode->next[1]=nextnode->next[2]=nextnode->next[3]=NULL;
|
||||
nextnode->fail = NULL;
|
||||
nextnode->output.count = 0;
|
||||
}
|
||||
else
|
||||
nextnode = nextnode->next[basecode];
|
||||
}
|
||||
//fprintf (stderr, "\n", basecode);
|
||||
//new pattern addess so add node ouptup element
|
||||
ahoc_addOutputElement (nextnode, isdirect, idx);
|
||||
}
|
||||
|
||||
void ahoc_buildKeywordTree (aho_state *root, pwordcount_t words, poptions_t options)
|
||||
{
|
||||
uint32_t i;
|
||||
if (!root) return;
|
||||
|
||||
//init root
|
||||
root->id = 0;
|
||||
root->next[0]=root->next[1]=root->next[2]=root->next[3]=NULL;
|
||||
root->fail = NULL;
|
||||
root->output.count = 0;
|
||||
|
||||
//now add each word as a pattern in the keyword tree
|
||||
for (i=0; i<words->size; i++)
|
||||
{
|
||||
//add direct word
|
||||
word_t w=WORD(words->words[i]);
|
||||
ahoc_addKeyword (root, w, TRUE, i, options);
|
||||
|
||||
//add reverse word
|
||||
w=ecoComplementWord(w,options->primer_length);
|
||||
ahoc_addKeyword (root, w, FALSE, i, options);
|
||||
}
|
||||
|
||||
//loop on root if some base has no out going edge from roots
|
||||
for (i=0; i<4; i++)
|
||||
if (root->next[i] == NULL)
|
||||
root->next[i] = root;
|
||||
}
|
||||
|
||||
void ahoc_enqueue (aho_queue *ahoqueue, aho_state *node)
|
||||
{
|
||||
queue_node *q;
|
||||
if (node == NULL) return;
|
||||
|
||||
q = ECOMALLOC(sizeof(queue_node),
|
||||
"Cannot allocate memory for aho-corasick queue node");
|
||||
q->state_node = node;
|
||||
q->next = NULL;
|
||||
|
||||
if (ahoqueue->first == NULL)
|
||||
{
|
||||
ahoqueue->first = q;
|
||||
ahoqueue->last = q;
|
||||
}
|
||||
else
|
||||
{
|
||||
ahoqueue->last->next = q;
|
||||
ahoqueue->last = q;
|
||||
}
|
||||
}
|
||||
|
||||
aho_state *ahoc_dequeue (aho_queue *ahoqueue)
|
||||
{
|
||||
aho_state *node = NULL;
|
||||
queue_node *q;
|
||||
|
||||
if (ahoqueue->first == NULL) return node;
|
||||
q = ahoqueue->first;
|
||||
ahoqueue->first = q->next;
|
||||
|
||||
node = q->state_node;
|
||||
ECOFREE (q, "Cannot free memory for aho-corasick queue node");
|
||||
return node;
|
||||
}
|
||||
|
||||
//set fail links and output sets for the keyword tree
|
||||
void ahoc_updateForFailAndOutput (aho_state *root)
|
||||
{
|
||||
int32_t i;
|
||||
aho_queue Q;
|
||||
aho_state *node_r;
|
||||
aho_state *node_u;
|
||||
aho_state *node_v;
|
||||
|
||||
//empty queue
|
||||
Q.first = NULL;
|
||||
Q.last = NULL;
|
||||
|
||||
//for us alphabet has 4 elements, A=0, C=1, G=2 and T=3
|
||||
for (i=0; i<4; i++)
|
||||
{
|
||||
if (root->next[i] != root && root->next[i] != NULL)
|
||||
{
|
||||
root->next[i]->fail = root;
|
||||
ahoc_enqueue (&Q, root->next[i]);
|
||||
}
|
||||
}
|
||||
|
||||
//while queue not empty
|
||||
while (Q.first != NULL)
|
||||
{
|
||||
node_r = ahoc_dequeue (&Q);
|
||||
for (i=0; i<4; i++)
|
||||
{
|
||||
if (node_r->next[i] != NULL)
|
||||
{
|
||||
node_u = node_r->next[i];
|
||||
ahoc_enqueue (&Q, node_u);
|
||||
node_v = node_r->fail;
|
||||
while (node_v->next[i] == NULL)
|
||||
node_v = node_v->fail;
|
||||
node_u->fail = node_v->next[i];
|
||||
ahoc_unionOutputElements (node_u, node_u->fail);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ahoc_freeKeywordTree (aho_state *node)
|
||||
{
|
||||
int i;
|
||||
for (i=0; i<4; i++)
|
||||
if (node->next[i])
|
||||
ahoc_freeKeywordTree (node->next[i]);
|
||||
if (node->output.count > 0)
|
||||
ECOFREE (node->output.out_set, "Free failed for node output");
|
||||
ECOFREE (node, "Free failed for node");
|
||||
}
|
||||
|
||||
pprimercount_t ahoc_lookforStrictPrimers (pecodnadb_t database, uint32_t seqdbsize,uint32_t exampleCount,
|
||||
pwordcount_t words,poptions_t options)
|
||||
{
|
||||
aho_state automaton_root;
|
||||
aho_state *curr_state;
|
||||
//uint32_t inSequenceQuorum;
|
||||
uint32_t outSequenceQuorum;
|
||||
pprimer_t data;
|
||||
pprimercount_t primers;
|
||||
uint32_t i, j, k;
|
||||
int32_t pos;
|
||||
uint32_t lmax;
|
||||
char *base;
|
||||
int8_t code;
|
||||
uint32_t goodPrimers=0;
|
||||
static int iii=0;
|
||||
|
||||
|
||||
//inSequenceQuorum = (uint32_t)floor((float)exampleCount * options->sensitivity_quorum);
|
||||
outSequenceQuorum = (uint32_t)floor((float)(seqdbsize-exampleCount) * options->false_positive_quorum);
|
||||
|
||||
//fprintf(stderr," Primers should be at least present in %d/%d example sequences\n",inSequenceQuorum,exampleCount);
|
||||
fprintf(stderr," Primers should not be present in more than %d/%d counterexample sequences\n",outSequenceQuorum,(seqdbsize-exampleCount));
|
||||
|
||||
data = ECOMALLOC(words->size * sizeof(primer_t),
|
||||
"Cannot allocate memory for fuzzy matching results");
|
||||
for (i=0; i < words->size; i++)
|
||||
{
|
||||
data[i].word=WORD(words->words[i]);
|
||||
data[i].inexample = 0;
|
||||
data[i].outexample= 0;
|
||||
|
||||
data[i].directCount=ECOMALLOC(seqdbsize * sizeof(uint32_t),
|
||||
"Cannot allocate memory for primer position");
|
||||
data[i].directPos = ECOMALLOC(seqdbsize * sizeof(poslist_t),
|
||||
"Cannot allocate memory for primer position");
|
||||
data[i].reverseCount=ECOMALLOC(seqdbsize * sizeof(uint32_t),
|
||||
"Cannot allocate memory for primer position");
|
||||
data[i].reversePos = ECOMALLOC(seqdbsize * sizeof(poslist_t),
|
||||
"Cannot allocate memory for primer position");
|
||||
}
|
||||
|
||||
//build keywords automaton
|
||||
ahoc_buildKeywordTree (&automaton_root, words, options);
|
||||
//set fail links and output sets
|
||||
ahoc_updateForFailAndOutput (&automaton_root);
|
||||
|
||||
//debug; print keywordtree in a gv file
|
||||
//ahoc_graphKeywordTree (&automaton_root);
|
||||
|
||||
//loop on each sequence for its each base and find words
|
||||
for (i=0; i < seqdbsize; i++)
|
||||
{
|
||||
if(database[i]->SQ_length <= options->primer_length) continue;
|
||||
|
||||
lmax = database[i]->SQ_length;
|
||||
if (!options->circular)
|
||||
lmax += options->primer_length-1;
|
||||
curr_state = &automaton_root;
|
||||
|
||||
for (j=0,base=database[i]->SQ; j<lmax; j++,base++)
|
||||
{
|
||||
if (i==(uint32_t)database[i]->SQ_length) base=database[i]->SQ;
|
||||
|
||||
//code = encoder[(*base) - 'A'];
|
||||
code = *base;
|
||||
//if (iii++ < 30)
|
||||
// fprintf (stderr, "%d:%d,", *base, code);
|
||||
if (code < 0 || code > 3)
|
||||
{
|
||||
//if error char, start from root for next character
|
||||
//+forget any incomplete words
|
||||
curr_state = &automaton_root;
|
||||
continue;
|
||||
}
|
||||
while (curr_state->next[code] == NULL) curr_state = curr_state->fail;
|
||||
curr_state = curr_state->next[code];
|
||||
|
||||
//start position of primer is options->primer_length-1 chars back
|
||||
pos = j-options->primer_length+1;
|
||||
if (pos < 0) pos = database[i]->SQ_length+pos;
|
||||
|
||||
//set output, if there is some output on this state then
|
||||
//+all words in the output set complete here, so increment their
|
||||
//+found properties for current sequence
|
||||
for (k=0; k<curr_state->output.count; k++)
|
||||
{
|
||||
if (curr_state->output.out_set[k].isdirect)
|
||||
data[curr_state->output.out_set[k].wordidx].directCount[i]++;
|
||||
else
|
||||
data[curr_state->output.out_set[k].wordidx].reverseCount[i]++;
|
||||
|
||||
if (options->no_multi_match)
|
||||
{
|
||||
if ((data[curr_state->output.out_set[k].wordidx].directCount[i] +
|
||||
data[curr_state->output.out_set[k].wordidx].reverseCount[i]) > 1)
|
||||
//since multimach not allowd, set an indication on 1st seq position that
|
||||
//+ a multimatch was found, so that this word will be filtered out
|
||||
//+ and because of first postion we wont have to search the whole array
|
||||
//+ to find if it voilated nomultimatch constraint for some seq
|
||||
data[curr_state->output.out_set[k].wordidx].directCount[0] = 2;
|
||||
else
|
||||
{
|
||||
if (curr_state->output.out_set[k].isdirect)
|
||||
//direct word found on jth position of ith sequence
|
||||
data[curr_state->output.out_set[k].wordidx].directPos[i].value = (uint32_t)pos;
|
||||
else
|
||||
//reverse word found on jth position of ith sequence
|
||||
data[curr_state->output.out_set[k].wordidx].reversePos[i].value = (uint32_t)pos;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
//okay multi match allowed
|
||||
if (curr_state->output.out_set[k].isdirect)
|
||||
{
|
||||
if (data[curr_state->output.out_set[k].wordidx].directCount[i] == 1)
|
||||
data[curr_state->output.out_set[k].wordidx].directPos[i].value = (uint32_t)pos;
|
||||
else
|
||||
{
|
||||
//need to create or extend the positions list
|
||||
if (data[curr_state->output.out_set[k].wordidx].directCount[i] == 2)
|
||||
{
|
||||
//for second element, first was put in .value, so dont forget to copy that in the array too
|
||||
data[curr_state->output.out_set[k].wordidx].directPos[i].pointer = ECOMALLOC(2 * sizeof(uint32_t),
|
||||
"Cannot allocate memory for primer position");
|
||||
data[curr_state->output.out_set[k].wordidx].directPos[i].pointer[0] = data[curr_state->output.out_set[k].wordidx].directPos[i].value;
|
||||
data[curr_state->output.out_set[k].wordidx].directPos[i].pointer[1] = (uint32_t)pos;
|
||||
}
|
||||
else
|
||||
{
|
||||
//for third or greater element
|
||||
data[curr_state->output.out_set[k].wordidx].directPos[i].pointer = ECOREALLOC(data[curr_state->output.out_set[k].wordidx].directPos[i].pointer,
|
||||
data[curr_state->output.out_set[k].wordidx].directCount[i] * sizeof(uint32_t),
|
||||
"Cannot allocate memory for primer position");
|
||||
data[curr_state->output.out_set[k].wordidx].directPos[i].pointer[data[curr_state->output.out_set[k].wordidx].directCount[i]-1] = (uint32_t)pos;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (data[curr_state->output.out_set[k].wordidx].reverseCount[i] == 1)
|
||||
data[curr_state->output.out_set[k].wordidx].reversePos[i].value = (uint32_t)pos;
|
||||
else
|
||||
{
|
||||
//need to create or extend the positions list
|
||||
if (data[curr_state->output.out_set[k].wordidx].reverseCount[i] == 2)
|
||||
{
|
||||
//for second element, first was put in .value, so dont forget to copy that in the array too
|
||||
data[curr_state->output.out_set[k].wordidx].reversePos[i].pointer = ECOMALLOC(2 * sizeof(uint32_t),
|
||||
"Cannot allocate memory for primer position");
|
||||
data[curr_state->output.out_set[k].wordidx].reversePos[i].pointer[0] = data[curr_state->output.out_set[k].wordidx].reversePos[i].value;
|
||||
data[curr_state->output.out_set[k].wordidx].reversePos[i].pointer[1] = (uint32_t)pos;
|
||||
}
|
||||
else
|
||||
{
|
||||
//for third or greater element
|
||||
data[curr_state->output.out_set[k].wordidx].reversePos[i].pointer = ECOREALLOC(data[curr_state->output.out_set[k].wordidx].reversePos[i].pointer,
|
||||
data[curr_state->output.out_set[k].wordidx].reverseCount[i] * sizeof(uint32_t),
|
||||
"Cannot allocate memory for primer position");
|
||||
data[curr_state->output.out_set[k].wordidx].reversePos[i].pointer[data[curr_state->output.out_set[k].wordidx].reverseCount[i]-1] = (uint32_t)pos;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
//dont forget to increment inexample or outexample count, but only once for a sequence
|
||||
if ((data[curr_state->output.out_set[k].wordidx].directCount[i] +
|
||||
data[curr_state->output.out_set[k].wordidx].reverseCount[i]) == 1)
|
||||
{
|
||||
if (database[i]->isexample)
|
||||
data[curr_state->output.out_set[k].wordidx].inexample++;
|
||||
else
|
||||
data[curr_state->output.out_set[k].wordidx].outexample++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//Only thing that remains is to remove the failed words
|
||||
for (i=0,j=0; i<words->size; i++)
|
||||
{
|
||||
fprintf(stderr,"Primers %5d/%lld analyzed => sequence : %s in %d example and %d counterexample sequences \r",
|
||||
i+1,words->size,ecoUnhashWord(data[i].word,options->primer_length),
|
||||
data[i].inexample,data[i].outexample);
|
||||
|
||||
//if (data[i].inexample < inSequenceQuorum || (data[i].directCount[0] == 2 && options->no_multi_match))
|
||||
if (data[i].directCount[0] == 2 && options->no_multi_match)
|
||||
{
|
||||
//bad word, delete from the array
|
||||
for (k=0; k<seqdbsize; k++)
|
||||
{
|
||||
if (data[i].directCount[k] > 1)
|
||||
ECOFREE (data[i].directPos[k].pointer, "Cannot free position pointer.");
|
||||
if (data[i].reverseCount[k] > 1)
|
||||
ECOFREE (data[i].reversePos[k].pointer, "Cannot free position pointer.");
|
||||
}
|
||||
ECOFREE (data[i].directCount, "Cannot free position pointer.");
|
||||
ECOFREE (data[i].directPos, "Cannot free position pointer.");
|
||||
ECOFREE (data[i].reverseCount, "Cannot free position pointer.");
|
||||
ECOFREE (data[i].reversePos, "Cannot free position pointer.");
|
||||
}
|
||||
else
|
||||
{
|
||||
//data[i].good = data[i].inexample >= inSequenceQuorum && data[i].outexample <= outSequenceQuorum;
|
||||
data[i].good = data[i].outexample <= outSequenceQuorum;
|
||||
goodPrimers+=data[i].good? 1:0;
|
||||
if (j < i)
|
||||
data[j] = data[i];
|
||||
j++;
|
||||
}
|
||||
}
|
||||
fprintf(stderr,"\n\nOn %lld analyzed primers %d respect quorum conditions\n",words->size,goodPrimers);
|
||||
fprintf(stderr,"Conserved primers for further analysis : %d/%lld\n",j,words->size);
|
||||
|
||||
primers = ECOMALLOC(sizeof(primercount_t),"Cannot allocate memory for primer table");
|
||||
primers->primers=ECOREALLOC(data,
|
||||
j * sizeof(primer_t),
|
||||
"Cannot reallocate memory for fuzzy matching results");
|
||||
primers->size=j;
|
||||
|
||||
//free memory of keyword table
|
||||
for (i=0; i<4; i++)
|
||||
if (automaton_root.next[i] != &automaton_root)
|
||||
ahoc_freeKeywordTree (automaton_root.next[i]);
|
||||
|
||||
return primers;
|
||||
}
|
||||
|
||||
void ahoc_graphPrintNodesInfo (aho_state *node, FILE* gfile)
|
||||
{
|
||||
uint32_t i;
|
||||
fprintf (gfile, "\"%d\"[\n", node->id);
|
||||
fprintf (gfile, "label=\"%d\\n", node->id);
|
||||
for (i=0; i<node->output.count; i++)
|
||||
fprintf (gfile, "%d%c,", node->output.out_set[i].wordidx, node->output.out_set[i].isdirect?'d':'r');
|
||||
fprintf (gfile, "\"\n];\n");
|
||||
|
||||
for (i=0; i<4; i++)
|
||||
if (node->next[i] != NULL && node->next[i] != node)
|
||||
ahoc_graphPrintNodesInfo (node->next[i], gfile);
|
||||
}
|
||||
|
||||
void ahoc_graphPrintNodesLinks (aho_state *node, FILE* gfile)
|
||||
{
|
||||
uint32_t i;
|
||||
static int j=0;
|
||||
|
||||
for (i=0; i<4; i++)
|
||||
if (node->next[i] != NULL && node->next[i] != node)
|
||||
{
|
||||
fprintf (gfile, "\"%d\" -> \"%d\" [\n", node->id, node->next[i]->id);
|
||||
fprintf (gfile, "label=\"%c\"\n];\n", "ACGT"[i]);
|
||||
}
|
||||
|
||||
if (j++ < 40)
|
||||
if (node->fail != NULL && node->fail != groot)
|
||||
{
|
||||
fprintf (gfile, "\"%d\" -> \"%d\" [\n", node->id, node->fail->id);
|
||||
fprintf (gfile, "color= \"red\"\n];\n");
|
||||
}
|
||||
|
||||
for (i=0; i<4; i++)
|
||||
if (node->next[i] != NULL && node->next[i] != node)
|
||||
ahoc_graphPrintNodesLinks (node->next[i], gfile);
|
||||
}
|
||||
|
||||
void ahoc_graphKeywordTree (aho_state *root)
|
||||
{
|
||||
FILE *gfile;
|
||||
|
||||
groot=root;
|
||||
gfile = fopen ("keywordtree.gv", "w");
|
||||
fprintf (gfile, "digraph keywordtree {\n");
|
||||
ahoc_graphPrintNodesInfo (root, gfile);
|
||||
ahoc_graphPrintNodesLinks (root, gfile);
|
||||
fprintf (gfile, "}\n");
|
||||
fclose(gfile);
|
||||
}
|
||||
|
43
src/libecoprimer/ahocorasick.h
Executable file
43
src/libecoprimer/ahocorasick.h
Executable file
@ -0,0 +1,43 @@
|
||||
/*
|
||||
* ahocorasick.h
|
||||
*
|
||||
* Created on: 26 march 2011
|
||||
* Author: tiayyba
|
||||
*/
|
||||
|
||||
#ifndef H_ahocorasick
|
||||
#define H_ahocorasick
|
||||
|
||||
#include "ecoprimer.h"
|
||||
|
||||
typedef struct aho_output_t{
|
||||
uint32_t wordidx; //index of strict word (dont save the word of 64B)
|
||||
bool_t isdirect; //we need to find both direct and reverse words so we must know which one is it
|
||||
}aho_output;
|
||||
|
||||
typedef struct aho_output_count_t{
|
||||
uint32_t count;
|
||||
aho_output *out_set;
|
||||
}aho_output_count;
|
||||
|
||||
typedef struct aho_state_t{
|
||||
int32_t id;
|
||||
struct aho_state_t *next[4]; //for labels A=0,C=1,G=2 and T=3
|
||||
struct aho_state_t *fail;
|
||||
aho_output_count output;
|
||||
}aho_state;
|
||||
|
||||
typedef struct queue_node_t {
|
||||
aho_state *state_node;
|
||||
struct queue_node_t *next;
|
||||
}queue_node;
|
||||
|
||||
typedef struct{
|
||||
queue_node *first;
|
||||
queue_node *last;
|
||||
}aho_queue;
|
||||
|
||||
pprimercount_t ahoc_lookforStrictPrimers (pecodnadb_t database, uint32_t seqdbsize,uint32_t exampleCount,
|
||||
pwordcount_t words,poptions_t options);
|
||||
#endif /* H_ahocorasick */
|
||||
|
131
src/libecoprimer/amplifiatree.c
Normal file
131
src/libecoprimer/amplifiatree.c
Normal file
@ -0,0 +1,131 @@
|
||||
/*
|
||||
* amplifiatree.c
|
||||
*
|
||||
* Created on: 7 mars 2009
|
||||
* Author: coissac
|
||||
*/
|
||||
|
||||
#include "ecoprimer.h"
|
||||
#include <search.h>
|
||||
|
||||
static void cleanamplifia(pamplifia_t amplifia);
|
||||
static void deleteamplifialist(pamplifialist_t list);
|
||||
static int cmpamplifia(const void* p1,const void*p2);
|
||||
|
||||
|
||||
static void cleanamplifiatlist(pamplifiacount_t list)
|
||||
{
|
||||
if (list->amplifias)
|
||||
ECOFREE(list->amplifias,
|
||||
"Free amplifia list");
|
||||
}
|
||||
|
||||
static void cleanamplifia(pamplifia_t amplifia)
|
||||
{
|
||||
cleanamplifiatlist(&(amplifia->pcr));
|
||||
}
|
||||
|
||||
static pamplifialist_t newamplifialist(pamplifialist_t parent, size_t size)
|
||||
{
|
||||
pamplifialist_t tmp;
|
||||
|
||||
tmp=ECOMALLOC(sizeof(amplifialist_t)+sizeof(amplifia_t)*(size-1),
|
||||
"Cannot allocate new amplifia list");
|
||||
|
||||
tmp->amplifiaslots=size;
|
||||
tmp->amplifiacount=0;
|
||||
tmp->next=NULL;
|
||||
|
||||
if (parent)
|
||||
parent->next=(void*)tmp;
|
||||
|
||||
return tmp;
|
||||
}
|
||||
|
||||
static void deleteamplifialist(pamplifialist_t list)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
if (list)
|
||||
{
|
||||
if (list->next)
|
||||
{
|
||||
deleteamplifialist(list->next);
|
||||
list->next=NULL;
|
||||
}
|
||||
for (i=0; i < list->amplifiacount; i++)
|
||||
cleanamplifia((list->amplifias)+i);
|
||||
|
||||
ECOFREE(list,"Delete amplifia list");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static int cmpamplifia(const void* p1,const void*p2)
|
||||
{
|
||||
pamplifia_t pr1,pr2;
|
||||
|
||||
pr1=(pamplifia_t)p1;
|
||||
pr2=(pamplifia_t)p2;
|
||||
|
||||
if (pr1->p1 < pr2->p1) return -1;
|
||||
if (pr1->p1 > pr2->p1) return 1;
|
||||
|
||||
if (pr1->asdirect1 < pr2->asdirect1) return -1;
|
||||
if (pr1->asdirect1 > pr2->asdirect1) return 1;
|
||||
|
||||
if (pr1->p2 < pr2->p2) return -1;
|
||||
if (pr1->p2 > pr2->p2) return 1;
|
||||
|
||||
if (pr1->asdirect2 < pr2->asdirect2) return -1;
|
||||
if (pr1->asdirect2 > pr2->asdirect2) return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
pamplifia_t amplifiaintree (amplifia_t key,
|
||||
pamplifiatree_t amplifialist)
|
||||
{
|
||||
if (!amplifialist->tree)
|
||||
return NULL;
|
||||
|
||||
return *((pamplifia_t*)tsearch((const void *)(&key),
|
||||
&(amplifialist->tree),
|
||||
cmpamplifia
|
||||
));
|
||||
}
|
||||
|
||||
pamplifia_t insertamplifia(amplifia_t key,
|
||||
pamplifiatree_t list)
|
||||
{
|
||||
pamplifia_t current;
|
||||
pamplifia_t found;
|
||||
|
||||
if (list->last->amplifiacount==list->last->amplifiaslots)
|
||||
{
|
||||
list->last->next=newamplifialist(list,100);
|
||||
list->last=list->last->next;
|
||||
}
|
||||
|
||||
current = list->last->amplifias + list->last->amplifiacount;
|
||||
*current=key;
|
||||
|
||||
found = *((pamplifia_t*)tsearch((const void *)current,
|
||||
&(list->tree),
|
||||
cmpamplifia));
|
||||
if (found==current)
|
||||
list->last->amplifiacount++;
|
||||
|
||||
return found;
|
||||
}
|
||||
|
||||
pamplifiatree_t initamplifiatree(pamplifiatree_t tree)
|
||||
{
|
||||
if (!tree)
|
||||
tree = ECOMALLOC(sizeof(amplifiatree_t),"Cannot allocate amplifia tree");
|
||||
|
||||
tree->first=newamplifialist(NULL,500);
|
||||
tree->last=tree->first;
|
||||
|
||||
tree->tree=NULL;
|
||||
}
|
120
src/libecoprimer/apat.h
Normal file
120
src/libecoprimer/apat.h
Normal file
@ -0,0 +1,120 @@
|
||||
/* ==================================================== */
|
||||
/* Copyright (c) Atelier de BioInformatique */
|
||||
/* Dec. 94 */
|
||||
/* File: apat.h */
|
||||
/* Purpose: pattern scan */
|
||||
/* History: */
|
||||
/* 28/12/94 : <Gloup> ascan first version */
|
||||
/* 14/05/99 : <Gloup> last revision */
|
||||
/* ==================================================== */
|
||||
|
||||
|
||||
#ifndef H_apat
|
||||
#define H_apat
|
||||
|
||||
|
||||
#include "libstki.h"
|
||||
#include "inttypes.h"
|
||||
#include "../libecoPCR/ecoPCR.h"
|
||||
|
||||
|
||||
/* ----------------------------------------------- */
|
||||
/* constantes */
|
||||
/* ----------------------------------------------- */
|
||||
|
||||
#ifndef BUFSIZ
|
||||
#define BUFSIZ 1024 /* io buffer size */
|
||||
#endif
|
||||
|
||||
#define MAX_NAME_LEN BUFSIZ /* max length of sequence name */
|
||||
|
||||
#define ALPHA_LEN 4 /* alphabet length */
|
||||
/* *DO NOT* modify */
|
||||
|
||||
#define MAX_PATTERN 4 /* max # of patterns */
|
||||
/* *DO NOT* modify */
|
||||
|
||||
#define MAX_PAT_LEN 32 /* max pattern length */
|
||||
/* *DO NOT* modify */
|
||||
|
||||
#define MAX_PAT_ERR 32 /* max # of errors */
|
||||
/* *DO NOT* modify */
|
||||
|
||||
#define PATMASK 0x3ffffff /* mask for 26 symbols */
|
||||
/* *DO NOT* modify */
|
||||
|
||||
#define OBLIBIT 0x4000000 /* bit 27 to 1 -> oblig. pos */
|
||||
/* *DO NOT* modify */
|
||||
|
||||
/* mask for position */
|
||||
#define ONEMASK 0x80000000 /* mask for highest position */
|
||||
|
||||
/* masks for Levenhstein edit */
|
||||
#define OPER_IDT 0x00000000 /* identity */
|
||||
#define OPER_INS 0x40000000 /* insertion */
|
||||
#define OPER_DEL 0x80000000 /* deletion */
|
||||
#define OPER_SUB 0xc0000000 /* substitution */
|
||||
|
||||
#define OPER_SHFT 30 /* <unused> shift */
|
||||
|
||||
/* Levenhstein Opcodes */
|
||||
#define SOPER_IDT 0x0 /* identity */
|
||||
#define SOPER_INS 0x1 /* insertion */
|
||||
#define SOPER_DEL 0x2 /* deletion */
|
||||
#define SOPER_SUB 0x3 /* substitution */
|
||||
|
||||
/* Levenhstein Opcodes masks */
|
||||
#define OPERMASK 0xc0000000 /* mask for Opcodes */
|
||||
#define NOPERMASK 0x3fffffff /* negate of previous */
|
||||
|
||||
|
||||
|
||||
/* ----------------------------------------------- */
|
||||
/* data structures */
|
||||
/* ----------------------------------------------- */
|
||||
|
||||
|
||||
typedef uint32_t pattern_t[ALPHA_LEN], *ppattern_t;
|
||||
|
||||
/* -------------------- */
|
||||
typedef struct { /* pattern */
|
||||
/* -------------------- */
|
||||
int patlen; /* pattern length */
|
||||
int maxerr; /* max # of errors */
|
||||
uint32_t omask; /* oblig. bits mask */
|
||||
bool_t circular; /* is circular sequence */
|
||||
} patternParam_t, *ppatternParam_t;
|
||||
|
||||
|
||||
/* ----------------------------------------------- */
|
||||
/* macros */
|
||||
/* ----------------------------------------------- */
|
||||
|
||||
#ifndef NEW
|
||||
#define NEW(typ) (typ*)malloc(sizeof(typ))
|
||||
#define NEWN(typ, dim) (typ*)malloc((unsigned long)(dim) * sizeof(typ))
|
||||
#define REALLOC(typ, ptr, dim) (typ*)realloc((void *) (ptr), (unsigned long)(dim) * sizeof(typ))
|
||||
#define FREE(ptr) free((void *) ptr)
|
||||
#endif
|
||||
|
||||
/* ----------------------------------------------- */
|
||||
/* prototypes */
|
||||
/* ----------------------------------------------- */
|
||||
|
||||
/* apat_search.c */
|
||||
|
||||
int32_t ManberNoErr(pecoseq_t pseq,ppattern_t pat,
|
||||
ppatternParam_t param,
|
||||
StackiPtr stkpos);
|
||||
|
||||
int32_t ManberSub(pecoseq_t pseq,ppattern_t pat,
|
||||
ppatternParam_t param,
|
||||
StackiPtr stkpos);
|
||||
|
||||
int32_t ManberAll(pecoseq_t pseq,ppattern_t pat,
|
||||
ppatternParam_t param,
|
||||
StackiPtr stkpos);
|
||||
|
||||
|
||||
#endif /* H_apat */
|
||||
|
65
src/libecoprimer/apat_parse.c
Normal file
65
src/libecoprimer/apat_parse.c
Normal file
@ -0,0 +1,65 @@
|
||||
/* ==================================================== */
|
||||
/* Copyright (c) Atelier de BioInformatique */
|
||||
/* Mar. 92 */
|
||||
/* File: apat_parse.c */
|
||||
/* Purpose: Codage du pattern */
|
||||
/* History: */
|
||||
/* 00/07/94 : <Gloup> first version (stanford) */
|
||||
/* 00/11/94 : <Gloup> revised for DNA/PROTEIN */
|
||||
/* 30/12/94 : <Gloup> modified EncodePattern */
|
||||
/* for manber search */
|
||||
/* 14/05/99 : <Gloup> indels added */
|
||||
/* ==================================================== */
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "apat.h"
|
||||
#include "ecoprimer.h"
|
||||
|
||||
|
||||
/* IUPAC Dna */
|
||||
static int32_t sDnaCode[] = {
|
||||
/* IUPAC */
|
||||
|
||||
0x00000001 /* A */, 0x0000000E /* B */, 0x00000002 /* C */,
|
||||
0x0000000D /* D */, 0x00000000 /* E */, 0x00000000 /* F */,
|
||||
0x00000004 /* G */, 0x0000000B /* H */, 0x00000000 /* I */,
|
||||
0x00000000 /* J */, 0x0000000C /* K */, 0x00000000 /* L */,
|
||||
0x00000003 /* M */, 0x0000000F /* N */, 0x00000000 /* O */,
|
||||
0x00000000 /* P */, 0x00000000 /* Q */, 0x00000005 /* R */,
|
||||
0x00000006 /* S */, 0x00000008 /* T */, 0x00000008 /* U */,
|
||||
0x00000007 /* V */, 0x00000009 /* W */, 0x00000000 /* X */,
|
||||
0x0000000A /* Y */, 0x00000000 /* Z */
|
||||
};
|
||||
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* internal replacement of gets */
|
||||
/* -------------------------------------------- */
|
||||
static char *sGets(char *buffer, int size) {
|
||||
|
||||
char *ebuf;
|
||||
|
||||
if (! fgets(buffer, size-1, stdin))
|
||||
return NULL;
|
||||
|
||||
/* remove trailing line feed */
|
||||
|
||||
ebuf = buffer + strlen(buffer);
|
||||
|
||||
while (--ebuf >= buffer) {
|
||||
if ((*ebuf == '\n') || (*ebuf == '\r'))
|
||||
*ebuf = '\000';
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* Interface */
|
||||
/* -------------------------------------------- */
|
155
src/libecoprimer/apat_search.c
Normal file
155
src/libecoprimer/apat_search.c
Normal file
@ -0,0 +1,155 @@
|
||||
/* ==================================================== */
|
||||
/* Copyright (c) Atelier de BioInformatique */
|
||||
/* Dec. 94 */
|
||||
/* File: apat_search.c */
|
||||
/* Purpose: recherche du pattern */
|
||||
/* algorithme de Baeza-Yates/Gonnet */
|
||||
/* Manber (agrep) */
|
||||
/* History: */
|
||||
/* 07/12/94 : <MFS> first version */
|
||||
/* 28/12/94 : <Gloup> revised version */
|
||||
/* 14/05/99 : <Gloup> last revision */
|
||||
/* ==================================================== */
|
||||
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "libstki.h"
|
||||
#include "apat.h"
|
||||
|
||||
#define POP PopiOut
|
||||
#define PUSH(s,v) PushiIn(&(s),(v))
|
||||
#define TOPCURS CursiToTop
|
||||
#define DOWNREAD ReadiDown
|
||||
|
||||
#define KRONECK(x, msk) ((~x & msk) ? 0 : 1)
|
||||
#define MIN(x, y) ((x) < (y) ? (x) : (y))
|
||||
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* Baeza-Yates/Manber algorithm */
|
||||
/* NoError */
|
||||
/* -------------------------------------------- */
|
||||
int32_t ManberNoErr(pecoseq_t pseq,ppattern_t pat,
|
||||
ppatternParam_t param,
|
||||
StackiPtr stkpos)
|
||||
{
|
||||
int32_t pos;
|
||||
uint32_t smask, r;
|
||||
uint8_t *data;
|
||||
int32_t end;
|
||||
|
||||
end = (size_t)(pseq->SQ_length);
|
||||
|
||||
if (param->circular)
|
||||
end+=param->patlen - 1;
|
||||
|
||||
|
||||
/* create local masks */
|
||||
smask = r = 0x1L << param->patlen;
|
||||
/* init. scan */
|
||||
data = (uint8_t*)(pseq->SQ);
|
||||
|
||||
/* loop on text data */
|
||||
for (pos = 0 ; pos < end ; pos++,data++) {
|
||||
if (pos==pseq->SQ_length)
|
||||
data=(uint8_t*)(pseq->SQ);
|
||||
|
||||
if (*data < 4)
|
||||
r = (r >> 1) & pat[*data];
|
||||
else
|
||||
r=0;
|
||||
|
||||
if (r & 0x1L) {
|
||||
PUSH(stkpos, pos - param->patlen + 1);
|
||||
}
|
||||
|
||||
r |= smask;
|
||||
}
|
||||
return stkpos->top; /* aka # of hits */
|
||||
}
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* Baeza-Yates/Manber algorithm */
|
||||
/* Substitution only */
|
||||
/* */
|
||||
/* Note : r array is stored as : */
|
||||
/* 0 0 r(0,j) r(0,j+1) r(1,j) r(1,j+1) ... */
|
||||
/* */
|
||||
/* -------------------------------------------- */
|
||||
int32_t ManberSub(pecoseq_t pseq,ppattern_t pat,
|
||||
ppatternParam_t param,
|
||||
StackiPtr stkpos)
|
||||
{
|
||||
int e, found;
|
||||
int32_t pos;
|
||||
uint32_t smask, cmask, sindx;
|
||||
uint32_t *pr, r[2 * MAX_PAT_ERR + 2];
|
||||
uint8_t *data;
|
||||
int32_t end;
|
||||
|
||||
end = (size_t)(pseq->SQ_length);
|
||||
|
||||
if (param->circular)
|
||||
end+=param->patlen - 1;
|
||||
|
||||
/* create local masks */
|
||||
r[0] = r[1] = 0x0;
|
||||
|
||||
cmask = smask = 0x1L << param->patlen;
|
||||
|
||||
for (e = 0, pr = r + 3 ; e <= param->maxerr ; e++, pr += 2)
|
||||
*pr = cmask;
|
||||
|
||||
cmask = ~ param->omask;
|
||||
/* init. scan */
|
||||
data = (uint8_t*)(pseq->SQ);
|
||||
|
||||
/* loop on text data */
|
||||
|
||||
for (pos = 0 ; pos < end ; pos++,data++) {
|
||||
if (pos==pseq->SQ_length)
|
||||
data=(uint8_t*)(pseq->SQ);
|
||||
|
||||
sindx = (*data==4) ? 0:pat[*data];
|
||||
|
||||
for (e = found = 0, pr = r ; e <= param->maxerr ; e++, pr += 2) {
|
||||
|
||||
pr[2] = pr[3] | smask;
|
||||
|
||||
pr[3] = ((pr[0] >> 1) & cmask) /* sub */
|
||||
| ((pr[2] >> 1) & sindx); /* ident */
|
||||
|
||||
if (pr[3] & 0x1L) { /* found */
|
||||
if (! found) {
|
||||
PUSH(stkpos, pos - param->patlen + 1);
|
||||
}
|
||||
found++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return stkpos->top; /* aka # of hits */
|
||||
}
|
||||
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* Baeza-Yates/Manber algorithm */
|
||||
/* API call to previous functions */
|
||||
/* -------------------------------------------- */
|
||||
int32_t ManberAll(pecoseq_t pseq,ppattern_t pat,
|
||||
ppatternParam_t param,
|
||||
StackiPtr stkpos)
|
||||
{
|
||||
if (param->maxerr == 0)
|
||||
return ManberNoErr(pseq,
|
||||
pat, param,
|
||||
stkpos);
|
||||
else
|
||||
return ManberSub(pseq,
|
||||
pat, param,
|
||||
stkpos);
|
||||
}
|
||||
|
237
src/libecoprimer/aproxpattern.c
Normal file
237
src/libecoprimer/aproxpattern.c
Normal file
@ -0,0 +1,237 @@
|
||||
/*
|
||||
* aproxpattern.c
|
||||
*
|
||||
* Created on: 20 nov. 2008
|
||||
* Author: coissac
|
||||
*/
|
||||
|
||||
|
||||
#include "ecoprimer.h"
|
||||
#include "apat.h"
|
||||
#include <math.h>
|
||||
|
||||
static uint8_t encoder[] = {0, // A
|
||||
4, // b
|
||||
1, // C
|
||||
4,4,4, // d, e, f
|
||||
2, // G
|
||||
4,4,4,4,4,4,4,4,4,4,4,4, // h,i,j,k,l,m,n,o,p,q,r,s
|
||||
3,3, // T,U
|
||||
4,4,4,4,4}; // v,w,x,y,z
|
||||
|
||||
|
||||
ppattern_t buildPatternFromWord(word_t word, uint32_t patlen)
|
||||
{
|
||||
static pattern_t pattern;
|
||||
uint32_t i;
|
||||
|
||||
for (i = 0 ; i < ALPHA_LEN ; i++)
|
||||
pattern[i] = 0x0;
|
||||
|
||||
for (i=0;i < patlen; i++)
|
||||
{
|
||||
pattern[word & 3LLU] |= 1 << i;
|
||||
word>>=2;
|
||||
}
|
||||
|
||||
return pattern;
|
||||
|
||||
}
|
||||
|
||||
|
||||
#ifdef IS_UPPER
|
||||
#undef IS_UPPER
|
||||
#endif
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* encode sequence */
|
||||
/* IS_UPPER is slightly faster than isupper */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
#define IS_UPPER(c) (((c) >= 'A') && ((c) <= 'Z'))
|
||||
|
||||
void encodeSequence(ecoseq_t *seq)
|
||||
{
|
||||
int i;
|
||||
uint8_t *data;
|
||||
char *cseq;
|
||||
|
||||
data = (uint8_t*)(seq->SQ);
|
||||
cseq = seq->SQ;
|
||||
|
||||
for (i=0;i<seq->SQ_length;i++,data++,cseq++)
|
||||
{
|
||||
*data = encoder[(IS_UPPER(*cseq) ? *cseq : 'Z') - 'A'];
|
||||
}
|
||||
}
|
||||
|
||||
pprimercount_t lookforAproxPrimer(pecodnadb_t database, uint32_t seqdbsize,uint32_t exampleCount,
|
||||
pwordcount_t words,poptions_t options)
|
||||
{
|
||||
pprimer_t data;
|
||||
pprimercount_t primers;
|
||||
ppattern_t pattern;
|
||||
patternParam_t params;
|
||||
uint32_t i;
|
||||
uint32_t w;
|
||||
uint32_t j;
|
||||
Stacki positions;
|
||||
uint32_t count=1;
|
||||
uint32_t goodPrimers=0;
|
||||
|
||||
uint32_t inSequenceQuorum;
|
||||
uint32_t outSequenceQuorum;
|
||||
bool_t conserved = TRUE;
|
||||
|
||||
//poslist_t ttt;
|
||||
|
||||
|
||||
inSequenceQuorum = (uint32_t)floor((float)exampleCount * options->sensitivity_quorum);
|
||||
outSequenceQuorum = (uint32_t)floor((float)(seqdbsize-exampleCount) * options->false_positive_quorum);
|
||||
|
||||
fprintf(stderr," Primers should be at least present in %d/%d example sequences\n",inSequenceQuorum,exampleCount);
|
||||
fprintf(stderr," Primers should not be present in more than %d/%d counterexample sequences\n",outSequenceQuorum,(seqdbsize-exampleCount));
|
||||
|
||||
data = ECOMALLOC(words->size * sizeof(primer_t),
|
||||
"Cannot allocate memory for fuzzy matching results");
|
||||
|
||||
params.circular = options->circular;
|
||||
params.maxerr = options->error_max;
|
||||
// params.omask = (1 << options->strict_three_prime) -1;
|
||||
params.omask = 0;
|
||||
params.patlen = options->primer_length;
|
||||
|
||||
positions.val=NULL;
|
||||
|
||||
for (i=0,w=0; i < words->size; i++)
|
||||
{
|
||||
data[w].word=WORD(words->words[i]);
|
||||
data[w].inexample = 0;
|
||||
data[w].outexample= 0;
|
||||
count = 1;
|
||||
|
||||
if (conserved)
|
||||
{
|
||||
data[w].directCount=ECOMALLOC(seqdbsize * sizeof(uint32_t),
|
||||
"Cannot allocate memory for primer position");
|
||||
data[w].directPos = ECOMALLOC(seqdbsize * sizeof(poslist_t),
|
||||
"Cannot allocate memory for primer position");
|
||||
data[w].reverseCount=ECOMALLOC(seqdbsize * sizeof(uint32_t),
|
||||
"Cannot allocate memory for primer position");
|
||||
data[w].reversePos = ECOMALLOC(seqdbsize * sizeof(poslist_t),
|
||||
"Cannot allocate memory for primer position");
|
||||
}
|
||||
|
||||
pattern = buildPatternFromWord(data[w].word,options->primer_length);
|
||||
positions.val=NULL;
|
||||
|
||||
for (j=0; j < seqdbsize && (count < 2 || !options->no_multi_match); j++)
|
||||
{
|
||||
positions.cursor=0;
|
||||
positions.top =0;
|
||||
if (!positions.val)
|
||||
{
|
||||
positions.size=1;
|
||||
positions.val = ECOMALLOC(sizeof(uint32_t),
|
||||
"Cannot allocate memory for primer position");
|
||||
}
|
||||
|
||||
|
||||
count = ManberAll(database[j],pattern,¶ms,&positions);
|
||||
data[w].directCount[j]=count;
|
||||
|
||||
|
||||
if (count>1)
|
||||
{
|
||||
data[w].directPos[j].pointer = (uint32_t*)positions.val;
|
||||
positions.val=NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
data[w].directPos[j].pointer=NULL;
|
||||
if (count==1)
|
||||
data[w].directPos[j].value = (uint32_t)*(positions.val);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
pattern = buildPatternFromWord(ecoComplementWord(data[w].word,options->primer_length),
|
||||
options->primer_length);
|
||||
|
||||
for (j=0; j < seqdbsize && (count < 2 || !options->no_multi_match); j++)
|
||||
{
|
||||
positions.cursor=0;
|
||||
positions.top =0;
|
||||
if (!positions.val)
|
||||
{
|
||||
positions.size=1;
|
||||
positions.val = ECOMALLOC(sizeof(uint32_t),
|
||||
"Cannot allocate memory for primer position");
|
||||
}
|
||||
|
||||
count = ManberAll(database[j],pattern,¶ms,&positions);
|
||||
data[w].reverseCount[j]=count;
|
||||
|
||||
if (count>1)
|
||||
{
|
||||
data[w].reversePos[j].pointer = (uint32_t*)positions.val;
|
||||
positions.val=NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
data[w].reversePos[j].pointer=NULL;
|
||||
if (count==1)
|
||||
data[w].reversePos[j].value = (uint32_t)*(positions.val);
|
||||
}
|
||||
|
||||
if (database[j]->isexample)
|
||||
{
|
||||
data[w].inexample+=(data[w].directCount[j] || data[w].reverseCount[j])? 1:0;
|
||||
}
|
||||
else
|
||||
{
|
||||
data[w].outexample+=(data[w].directCount[j] || data[w].reverseCount[j])? 1:0;
|
||||
|
||||
}
|
||||
|
||||
count+=data[w].directCount[j];
|
||||
}
|
||||
|
||||
data[w].good = data[w].inexample >= inSequenceQuorum && data[w].outexample <= outSequenceQuorum;
|
||||
goodPrimers+=data[w].good? 1:0;
|
||||
|
||||
fprintf(stderr,"Primers %5d/%d analyzed => sequence : %s in %d example and %d counterexample sequences \r",
|
||||
i+1,words->size,ecoUnhashWord(data[w].word,options->primer_length),
|
||||
data[w].inexample,data[w].outexample);
|
||||
|
||||
|
||||
conserved=data[w].inexample >= inSequenceQuorum;
|
||||
conserved=conserved && (count < 2 || !options->no_multi_match);
|
||||
|
||||
if (conserved)
|
||||
w++;
|
||||
}
|
||||
|
||||
if (positions.val)
|
||||
ECOFREE(positions.val,"Free stack position pointer");
|
||||
|
||||
if (!conserved)
|
||||
{
|
||||
ECOFREE(data[w].directCount,"Free direct count table");
|
||||
ECOFREE(data[w].directPos,"Free direct count table");
|
||||
ECOFREE(data[w].reverseCount,"Free direct count table");
|
||||
ECOFREE(data[w].reversePos,"Free direct count table");
|
||||
}
|
||||
|
||||
fprintf(stderr,"\n\nOn %d analyzed primers %d respect quorum conditions\n",words->size,goodPrimers);
|
||||
fprintf(stderr,"Conserved primers for further analysis : %d/%d\n",w,words->size);
|
||||
|
||||
primers = ECOMALLOC(sizeof(primercount_t),"Cannot allocate memory for primer table");
|
||||
primers->primers=ECOREALLOC(data,
|
||||
w * sizeof(primer_t),
|
||||
"Cannot reallocate memory for fuzzy matching results");
|
||||
primers->size=w;
|
||||
|
||||
return primers;
|
||||
}
|
29
src/libecoprimer/debug.h
Normal file
29
src/libecoprimer/debug.h
Normal file
@ -0,0 +1,29 @@
|
||||
/*
|
||||
* debug.h
|
||||
*
|
||||
* Created on: 12 nov. 2008
|
||||
* Author: coissac
|
||||
*/
|
||||
|
||||
#ifndef DEBUG_H_
|
||||
#define DEBUG_H_
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
|
||||
#ifdef DEBUG
|
||||
|
||||
#define DEBUG_LOG(message,...) { \
|
||||
char *text; \
|
||||
(void)asprintf(&text,(message),##__VA_ARGS__); \
|
||||
fprintf(stderr,"DEBUG %s (line %d) : %s\n",__FILE__,__LINE__,(text)); \
|
||||
free(text); \
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define DEBUG_LOG(message, ...)
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* DEBUG_H_ */
|
366
src/libecoprimer/ecoprimer.h
Executable file
366
src/libecoprimer/ecoprimer.h
Executable file
@ -0,0 +1,366 @@
|
||||
/*
|
||||
* epsort.h
|
||||
*
|
||||
* Created on: 6 nov. 2008
|
||||
* Author: coissac
|
||||
*/
|
||||
|
||||
#ifndef EPSORT_H_
|
||||
#define EPSORT_H_
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include "ecotype.h"
|
||||
#include "../libecoPCR/ecoPCR.h"
|
||||
#include "../libthermo/nnparams.h"
|
||||
#include "apat.h"
|
||||
|
||||
#define DEBUG
|
||||
#include "debug.h"
|
||||
|
||||
/****
|
||||
* Word format used :
|
||||
*
|
||||
* bit 63 : bad word -> this word should not be used
|
||||
* bit 62 : multi word -> this word is not uniq in at least one seq
|
||||
* bits 0-61 : hashed dna word of max size 31 pb
|
||||
* code used for a : 00
|
||||
* code used for c : 01
|
||||
* code used for g : 10
|
||||
* code used for t : 11
|
||||
*/
|
||||
|
||||
typedef uint64_t word_t, *pword_t;
|
||||
|
||||
#define WORD(x) ((x) & 0x3FFFFFFFFFFFFFFFLLU)
|
||||
#define WORD(x) ((x) & 0x3FFFFFFFFFFFFFFFLLU)
|
||||
|
||||
#define ISBADWORD(x) (((x) & 0x8000000000000000LLU) >> 63)
|
||||
#define SETBADWORD(x) ((x) | 0x8000000000000000LLU)
|
||||
#define RESETBADWORD(x) ((x) & 0x7FFFFFFFFFFFFFFFLLU)
|
||||
|
||||
#define ISMULTIWORD(x) (((x) & 0x4000000000000000LLU) >> 62)
|
||||
#define SETMULTIWORD(x) ((x) | 0x4000000000000000LLU)
|
||||
#define RESETMULTIWORD(x) ((x) & 0xBFFFFFFFFFFFFFFFLLU)
|
||||
|
||||
|
||||
#define WORDMASK(s) ((1LLU << ((s) * 2)) -1)
|
||||
#define LSHIFTWORD(x,s) (((x) << 2) & WORDMASK(s))
|
||||
#define RSHIFTWORD(x,s) (((x) & WORDMASK(s))>> 2)
|
||||
#define ERRORMASK(s) ((int32_t)((1LLU << (s)) -1))
|
||||
|
||||
#define RAPPENDBASE(x,s,c) (LSHIFTWORD((x),(s)) | (word_t)(c))
|
||||
#define LAPPENDBASE(x,s,c) (RSHIFTWORD((x),(s)) | ((word_t)((~(c)) & 3) << (((s)-1) *2)))
|
||||
|
||||
|
||||
#define ECO_ASSERT(x,message) if (!(x)) \
|
||||
{ \
|
||||
fprintf(stderr,"Assertion Error in %s (line %d): %s\n", \
|
||||
__FILE__,\
|
||||
__LINE__,\
|
||||
message\
|
||||
); \
|
||||
exit(ECO_ASSERT_ERROR); \
|
||||
}
|
||||
|
||||
#define MINI(x,y) (((x) < (y)) ? (x):(y))
|
||||
#define MAXI(x,y) (((x) < (y)) ? (y):(x))
|
||||
|
||||
#define FWORDSIZE (13)
|
||||
#define FWORDMASK WORDMASK(FWORDSIZE)
|
||||
#define FILTERWORD(x) ((uint32_t)((x) & FWORDMASK))
|
||||
#define CFILTERWORD(x,s) ((uint32_t)(((x) >> (((s)-FWORDSIZE)*2)) & FWORDMASK))
|
||||
|
||||
|
||||
|
||||
typedef struct {
|
||||
pword_t words;
|
||||
uint32_t *strictcount;
|
||||
uint32_t inseqcount;
|
||||
uint32_t outseqcount;
|
||||
uint64_t size;
|
||||
} wordcount_t, *pwordcount_t;
|
||||
|
||||
|
||||
typedef union {
|
||||
uint32_t *pointer;
|
||||
uint32_t value;
|
||||
} poslist_t, *pposlist_t;
|
||||
|
||||
|
||||
/**
|
||||
* primer_t structure store fuzzy match positions for a primer
|
||||
* on all sequences
|
||||
*/
|
||||
|
||||
typedef struct {
|
||||
word_t word; //< code for the primer
|
||||
uint32_t *directCount; //< Occurrence count on direct strand
|
||||
pposlist_t directPos; //< list of position list on direct strand
|
||||
|
||||
uint32_t *reverseCount; //< Occurrence count on reverse strand
|
||||
pposlist_t reversePos; //< list of position list on reverse strand
|
||||
|
||||
bool_t good; //< primer match more than quorum example and no
|
||||
// more counterexample quorum.
|
||||
|
||||
uint32_t inexample; //< count of example sequences matching primer
|
||||
uint32_t outexample; //< count of counterexample sequences matching primer
|
||||
} primer_t, *pprimer_t;
|
||||
|
||||
/**
|
||||
* primercount_t structure store fuzzy match positions for all primers
|
||||
* on all sequences as a list of primer_t
|
||||
*/
|
||||
typedef struct {
|
||||
pprimer_t primers;
|
||||
uint32_t size;
|
||||
} primercount_t, *pprimercount_t;
|
||||
|
||||
typedef struct {
|
||||
pprimer_t primer;
|
||||
uint32_t position;
|
||||
bool_t strand;
|
||||
} primermatch_t, *pprimermatch_t;
|
||||
|
||||
/*TR: Added*/
|
||||
typedef struct {
|
||||
pprimermatch_t matches;
|
||||
uint32_t matchcount;
|
||||
} primermatchcount_t, *pprimermatchcount_t;
|
||||
|
||||
typedef struct {
|
||||
pecoseq_t sequence;
|
||||
bool_t strand;
|
||||
const char *amplifia;
|
||||
int32_t length;
|
||||
uint32_t begin;
|
||||
uint32_t end;
|
||||
} amplifia_t, *pamplifia_t;
|
||||
|
||||
typedef struct {
|
||||
pamplifia_t amplifias;
|
||||
uint32_t ampcount;
|
||||
uint32_t ampslot;
|
||||
} amplifiacount_t, *pamplifiacount_t;
|
||||
|
||||
typedef struct {
|
||||
char *amplifia;
|
||||
int32_t *taxonids;
|
||||
uint32_t seqidcount;
|
||||
uint32_t seqidindex;
|
||||
} ampseqset_t, *pampseqset_t;
|
||||
|
||||
typedef struct {
|
||||
int32_t taxonid;
|
||||
char **amplifia;
|
||||
uint32_t amplifiacount;
|
||||
uint32_t amplifiaindex;
|
||||
} taxampset_t, *ptaxampset_t;
|
||||
|
||||
typedef struct {
|
||||
pprimer_t p1;
|
||||
bool_t asdirect1;
|
||||
pprimer_t p2;
|
||||
bool_t asdirect2;
|
||||
|
||||
amplifiacount_t pcr;
|
||||
|
||||
uint32_t inexample; //< example sequence count
|
||||
uint32_t outexample; //< counterexample sequence count
|
||||
uint32_t intaxa; //< example taxa count
|
||||
uint32_t outtaxa; //< counterexample taxa count
|
||||
uint32_t notwellidentifiedtaxa;
|
||||
|
||||
int *wellIdentifiedSeqs; //< an array having elements equla to total seqs
|
||||
// values are either 0 or 1, if seq is well identified
|
||||
// its 1 else 0
|
||||
int *coveredSeqs; //< an array having elements equal to total seqs, 1 if seq is covered else 0
|
||||
|
||||
// these statistics are relative to inexample sequences
|
||||
|
||||
uint32_t mind; //< minimum distance between primers
|
||||
uint32_t maxd; //< maximum distance between primers
|
||||
uint32_t sumd; //< distance sum
|
||||
uint32_t amplifiacount;
|
||||
float yule;
|
||||
float quorumin;
|
||||
float quorumout;
|
||||
float bs;
|
||||
float bc;
|
||||
int32_t refsequence;
|
||||
//
|
||||
// uint32_t taxsetcount;
|
||||
// uint32_t taxsetindex;
|
||||
// ptaxampset_t taxset;
|
||||
//
|
||||
// uint32_t oktaxoncount;
|
||||
uint32_t curseqid;
|
||||
float p1temp; //strict primer1 melting temperature
|
||||
float p1mintemp; //approx primer1 minimum melting temperature
|
||||
float p2temp; //strict primer2 melting temperature
|
||||
float p2mintemp; //approx primer2 minimum melting temperature
|
||||
} pair_t, *ppair_t;
|
||||
|
||||
/*TR: Added*/
|
||||
|
||||
typedef struct {
|
||||
size_t paircount;
|
||||
size_t pairslots;
|
||||
void* next;
|
||||
pair_t pairs[1];
|
||||
} pairlist_t, *ppairlist_t;
|
||||
|
||||
typedef struct {
|
||||
ppairlist_t first;
|
||||
ppairlist_t last;
|
||||
void *tree;
|
||||
int32_t count;
|
||||
} pairtree_t, *ppairtree_t;
|
||||
|
||||
typedef struct {
|
||||
pword_t words;
|
||||
uint32_t *count;
|
||||
uint32_t push;
|
||||
uint32_t pop;
|
||||
uint32_t size;
|
||||
bool_t empty;
|
||||
bool_t full;
|
||||
} queue_t, *pqueue_t;
|
||||
|
||||
typedef struct {
|
||||
pword_t words;
|
||||
uint32_t *count;
|
||||
uint32_t write;
|
||||
uint32_t read1;
|
||||
uint32_t read2;
|
||||
uint32_t size;
|
||||
} merge_t, *pmerge_t;
|
||||
|
||||
typedef struct {
|
||||
const char *amplifia;
|
||||
bool_t strand;
|
||||
int32_t length;
|
||||
int32_t taxoncount;
|
||||
void *taxontree;
|
||||
}amptotaxon_t, *pamptotaxon_t;
|
||||
|
||||
typedef struct {
|
||||
int32_t taxid;
|
||||
void *amptree;
|
||||
}taxontoamp_t, *ptaxontoamp_t;
|
||||
|
||||
typedef struct {
|
||||
bool_t printAC;
|
||||
bool_t statistics;
|
||||
bool_t filtering;
|
||||
uint32_t lmin; //**< Amplifia minimal length
|
||||
uint32_t lmax; //**< Amplifia maximal length
|
||||
uint32_t error_max; //**< maximum error count in fuzzy search
|
||||
uint32_t primer_length; //**< minimal length of the primers
|
||||
int32_t *restricted_taxid; //**< limit amplification below these taxid
|
||||
int32_t *ignored_taxid; //**< no amplification below these taxid
|
||||
int32_t *exception_taxid;
|
||||
char *prefix;
|
||||
char *reference;
|
||||
pecoseq_t refseq;
|
||||
uint32_t refseqid;
|
||||
uint32_t circular;
|
||||
uint32_t doublestrand;
|
||||
float strict_quorum;
|
||||
float strict_exclude_quorum;
|
||||
float sensitivity_quorum;
|
||||
float false_positive_quorum;
|
||||
uint32_t strict_three_prime;
|
||||
int32_t r; //**< count of restrited taxa (restricted_taxid array size)
|
||||
int32_t g; //**< count of ignored taxa (ignored_taxid array size)
|
||||
int32_t e; //**< count of ignored taxa (ignored_taxid array size)
|
||||
bool_t no_multi_match;
|
||||
char taxonrank[20]; //TR to count ranks against a pair
|
||||
int32_t taxonrankidx; //TR to count ranks against a pair
|
||||
|
||||
// Some statistics useful for options filters
|
||||
|
||||
int32_t dbsize;
|
||||
int32_t insamples;
|
||||
int32_t outsamples;
|
||||
int32_t intaxa;
|
||||
int32_t outtaxa;
|
||||
int saltmethod;
|
||||
float salt;
|
||||
PNNParams pnparm;
|
||||
bool_t print_sets_of_primers;
|
||||
float specificity_threshold;
|
||||
int links_cnt;
|
||||
float max_links_percent;
|
||||
bool_t filter_on_links;
|
||||
} options_t, *poptions_t;
|
||||
|
||||
typedef ecoseq_t **pecodnadb_t;
|
||||
|
||||
void sortword(pword_t table,uint32_t N);
|
||||
|
||||
|
||||
pecodnadb_t readdnadb(const char *name, ecotaxonomy_t *taxonomy, uint32_t *size,poptions_t options);
|
||||
|
||||
int isGoodTaxon(ecotaxonomy_t *taxonomy,int32_t taxon,poptions_t options);
|
||||
int isExampleTaxon(ecotaxonomy_t *taxonomy,int32_t taxon,poptions_t options);
|
||||
int isCounterExampleTaxon(ecotaxonomy_t *taxonomy,int32_t taxon,poptions_t options);
|
||||
|
||||
uint32_t ecoWordCount(uint32_t wordsize, uint32_t circular, ecoseq_t *seq);
|
||||
pword_t ecoHashSequence(pword_t dest, uint32_t wordsize, uint32_t circular, uint32_t doublestrand, ecoseq_t *seq,uint32_t *size,int32_t *neededWords,uint32_t neededWordCount,
|
||||
int32_t quorum);
|
||||
uint32_t ecoCompactHashSequence(pword_t dest,uint32_t size);
|
||||
const char* ecoUnhashWord(word_t word,uint32_t size);
|
||||
word_t ecoComplementWord(word_t word,uint32_t size);
|
||||
uint32_t ecoFindWord(pwordcount_t table,word_t word);
|
||||
|
||||
|
||||
void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,uint32_t seqQuorum);
|
||||
pwordcount_t initCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,uint32_t seqQuorum,ecoseq_t *seq,int32_t *neededWords,uint32_t neededWordCount);
|
||||
void addSeqToWordCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,uint32_t exampleCount,uint32_t seqQuorum,ecoseq_t *seq,int32_t *neededWords,uint32_t neededWordCount);
|
||||
|
||||
pqueue_t newQueue(pqueue_t queue, uint32_t size);
|
||||
pqueue_t resizeQueue(pqueue_t queue, uint32_t size);
|
||||
|
||||
void pop(pqueue_t queue);
|
||||
void push(pqueue_t queue, word_t word, uint32_t count);
|
||||
|
||||
pqueue_t cleanQueue(pqueue_t queue);
|
||||
|
||||
pwordcount_t lookforStrictPrimer(pecodnadb_t database, uint32_t seqdbsize,
|
||||
uint32_t exampleCount, poptions_t options);
|
||||
uint32_t filterMultiStrictPrimer(pwordcount_t strictprimers);
|
||||
|
||||
void encodeSequence(ecoseq_t *seq);
|
||||
ppattern_t buildPatternFromWord(word_t word, uint32_t patlen);
|
||||
|
||||
pprimercount_t lookforAproxPrimer(pecodnadb_t database, uint32_t seqdbsize,uint32_t exampleCount,
|
||||
pwordcount_t words,poptions_t options);
|
||||
|
||||
void sortmatch(pprimermatch_t table,uint32_t N);
|
||||
|
||||
ppairtree_t initpairtree(ppairtree_t tree);
|
||||
ppair_t pairintree (pair_t key,ppairtree_t pairlist);
|
||||
ppair_t insertpair(pair_t key,ppairtree_t list);
|
||||
|
||||
|
||||
/*TR: Added*/
|
||||
ppairtree_t buildPrimerPairs(pecodnadb_t seqdb,uint32_t seqdbsize,pprimercount_t primers,poptions_t options);
|
||||
|
||||
int32_t counttaxon(int32_t taxid);
|
||||
int32_t getrankdbstats(pecodnadb_t seqdb,
|
||||
uint32_t seqdbsize,
|
||||
ecotaxonomy_t *taxonomy,
|
||||
poptions_t options);
|
||||
float taxonomycoverage(ppair_t pair, poptions_t options, pecodnadb_t seqdb,uint32_t seqdbsize);
|
||||
char ecoComplementChar(char base);
|
||||
void taxonomyspecificity (ppair_t pair, pecodnadb_t seqdb,uint32_t seqdbsize);
|
||||
|
||||
int32_t *filteringSeq(pecodnadb_t database, uint32_t seqdbsize,
|
||||
uint32_t exampleCount,poptions_t options,uint32_t *size,int32_t sequenceQuorum);
|
||||
|
||||
void printSeqTest(pecodnadb_t seqdb,uint32_t seqdbsize);
|
||||
|
||||
#endif /* EPSORT_H_ */
|
14
src/libecoprimer/ecotype.h
Normal file
14
src/libecoprimer/ecotype.h
Normal file
@ -0,0 +1,14 @@
|
||||
/*
|
||||
* ecotype.h
|
||||
*
|
||||
* Created on: 24 nov. 2008
|
||||
* Author: coissac
|
||||
*/
|
||||
|
||||
#ifndef ECOTYPE_H_
|
||||
#define ECOTYPE_H_
|
||||
|
||||
typedef enum { FALSE=0,TRUE=1} bool_t, *pbool_t;
|
||||
|
||||
|
||||
#endif /* ECOTYPE_H_ */
|
188
src/libecoprimer/filtering.c
Normal file
188
src/libecoprimer/filtering.c
Normal file
@ -0,0 +1,188 @@
|
||||
/*
|
||||
* filtering.c
|
||||
*
|
||||
* Created on: 12 mai 2009
|
||||
* Author: coissac
|
||||
*/
|
||||
|
||||
#include "ecoprimer.h"
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "hashencoder.h"
|
||||
|
||||
static int32_t *ecoFilteringHashSequence(int32_t *dest,
|
||||
uint32_t circular,
|
||||
uint32_t doublestrand,
|
||||
ecoseq_t *seq,
|
||||
uint32_t *size);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
static int32_t *ecoFilteringHashSequence(int32_t *dest,
|
||||
uint32_t circular,
|
||||
uint32_t doublestrand,
|
||||
ecoseq_t *seq,
|
||||
uint32_t *size)
|
||||
{
|
||||
static char *in_last_seq=NULL;
|
||||
uint32_t i=0;
|
||||
uint32_t j;
|
||||
char *base;
|
||||
int8_t code;
|
||||
int32_t error=0;
|
||||
word_t word=0;
|
||||
word_t antiword=0;
|
||||
uint32_t goodword;
|
||||
uint32_t lmax=0;
|
||||
|
||||
// run on the first call;
|
||||
|
||||
|
||||
if (dest==(void*)-1)
|
||||
{
|
||||
if (in_last_seq) ECOFREE(in_last_seq,"Free in last seq table");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
*size = pow(4,FWORDSIZE);
|
||||
|
||||
if (!in_last_seq)
|
||||
in_last_seq = ECOMALLOC(*size*sizeof(char),
|
||||
"Cannot allocate filtering hash table");
|
||||
|
||||
memset(in_last_seq,0,*size*sizeof(char));
|
||||
|
||||
|
||||
if (!dest)
|
||||
{
|
||||
dest = ECOMALLOC(*size*sizeof(int32_t),
|
||||
"Cannot allocate filtering hash table");
|
||||
memset(dest,0,*size*sizeof(int32_t));
|
||||
}
|
||||
|
||||
lmax = seq->SQ_length;
|
||||
if (!circular)
|
||||
lmax-= FWORDSIZE-1;
|
||||
|
||||
|
||||
|
||||
// DEBUG_LOG("Sequence %s @ %d : %18.18s",seq->AC,i,(seq->SQ+i));
|
||||
|
||||
for (i=0, base = seq->SQ; i < FWORDSIZE && i < lmax; i++,base++)
|
||||
{
|
||||
error<<= 1;
|
||||
error&=ERRORMASK(FWORDSIZE);
|
||||
|
||||
code = encoder[(*base) - 'A'];
|
||||
if (code <0)
|
||||
{
|
||||
code = 0;
|
||||
error|= 1;
|
||||
}
|
||||
|
||||
|
||||
word=RAPPENDBASE(word,FWORDSIZE,code);
|
||||
if (doublestrand)
|
||||
antiword=LAPPENDBASE(antiword,FWORDSIZE,code);
|
||||
}
|
||||
|
||||
if (!error && i==FWORDSIZE)
|
||||
{
|
||||
|
||||
goodword=(uint32_t)((doublestrand) ? MINI(word,antiword):word);
|
||||
|
||||
if (!in_last_seq[goodword])
|
||||
{
|
||||
in_last_seq[goodword]=1;
|
||||
dest[goodword]++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
for (j=1; j < lmax; j++,i++,base++)
|
||||
{
|
||||
|
||||
// DEBUG_LOG("Sequence %s @ %d : %18.18s",seq->AC,j,(seq->SQ+j));
|
||||
|
||||
/* roll over the sequence for circular ones */
|
||||
if (i==(uint32_t)seq->SQ_length) base=seq->SQ;
|
||||
|
||||
error<<= 1;
|
||||
error&=ERRORMASK(FWORDSIZE);
|
||||
|
||||
//code = -1;
|
||||
//if((*base) >= 'A' && (*base) <= 'Z')
|
||||
code = encoder[(*base) - 'A'];
|
||||
if (code <0)
|
||||
{
|
||||
code = 0;
|
||||
error|= 1;
|
||||
}
|
||||
|
||||
word=RAPPENDBASE(word,FWORDSIZE,code);
|
||||
if (doublestrand)
|
||||
antiword=LAPPENDBASE(antiword,FWORDSIZE,code);
|
||||
|
||||
if (!error)
|
||||
{
|
||||
if (doublestrand)
|
||||
goodword=(uint32_t)MINI(word,antiword);
|
||||
else
|
||||
goodword=word;
|
||||
if (!in_last_seq[goodword])
|
||||
{
|
||||
in_last_seq[goodword]=1;
|
||||
dest[goodword]++;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return dest;
|
||||
|
||||
}
|
||||
|
||||
|
||||
int32_t *filteringSeq(pecodnadb_t database, uint32_t seqdbsize,
|
||||
uint32_t exampleCount,poptions_t options,uint32_t *size,int32_t sequenceQuorum)
|
||||
{
|
||||
int32_t *wordscount=NULL;
|
||||
int32_t keep=0;
|
||||
uint32_t i,j=0;
|
||||
|
||||
for (i=0;i<seqdbsize;i++)
|
||||
{
|
||||
if (database[i]->isexample && database[i]->SQ_length > options->primer_length)
|
||||
{
|
||||
j++;
|
||||
wordscount=ecoFilteringHashSequence(wordscount,
|
||||
options->circular,
|
||||
options->doublestrand,
|
||||
database[i],
|
||||
size);
|
||||
}
|
||||
fprintf(stderr," Filtered sequences %5u/%5u \r",j,exampleCount);
|
||||
|
||||
}
|
||||
|
||||
fprintf(stderr,"\n");
|
||||
|
||||
for (i=0;i<*size;i++)
|
||||
if (wordscount[i] >= sequenceQuorum)
|
||||
keep++;
|
||||
|
||||
|
||||
(void)ecoFilteringHashSequence((int32_t*)-1,
|
||||
options->circular,
|
||||
options->doublestrand,
|
||||
NULL,
|
||||
NULL);
|
||||
|
||||
fprintf(stderr,"ok\n Considered word of size %d for filtering : %d\n",FWORDSIZE,keep);
|
||||
return wordscount;
|
||||
|
||||
}
|
64
src/libecoprimer/goodtaxon.c
Normal file
64
src/libecoprimer/goodtaxon.c
Normal file
@ -0,0 +1,64 @@
|
||||
/*
|
||||
* goodtaxon.c
|
||||
*
|
||||
* Created on: 7 nov. 2008
|
||||
* Author: coissac
|
||||
*/
|
||||
|
||||
|
||||
#include "ecoprimer.h"
|
||||
|
||||
int isGoodTaxon(ecotaxonomy_t *taxonomy,int32_t taxon,poptions_t options)
|
||||
{
|
||||
int result;
|
||||
|
||||
result=((options->r == 0) || (eco_is_taxid_included(taxonomy,
|
||||
options->restricted_taxid,
|
||||
options->r,
|
||||
taxonomy->taxons->taxon[taxon].taxid)
|
||||
)) &&
|
||||
((options->e == 0) || !(eco_is_taxid_included(taxonomy,
|
||||
options->exception_taxid,
|
||||
options->e,
|
||||
taxonomy->taxons->taxon[taxon].taxid)
|
||||
));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
int isExampleTaxon(ecotaxonomy_t *taxonomy,int32_t taxon,poptions_t options)
|
||||
{
|
||||
int result;
|
||||
|
||||
result=( (options->r == 0) || (eco_is_taxid_included(taxonomy,
|
||||
options->restricted_taxid,
|
||||
options->r,
|
||||
taxonomy->taxons->taxon[taxon].taxid)
|
||||
)) &&
|
||||
((options->e == 0) || !(eco_is_taxid_included(taxonomy,
|
||||
options->exception_taxid,
|
||||
options->e,
|
||||
taxonomy->taxons->taxon[taxon].taxid)
|
||||
));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
int isCounterExampleTaxon(ecotaxonomy_t *taxonomy,int32_t taxon,poptions_t options)
|
||||
{
|
||||
int result;
|
||||
|
||||
result=((options->g != 0) && (eco_is_taxid_included(taxonomy,
|
||||
options->ignored_taxid,
|
||||
options->g,
|
||||
taxonomy->taxons->taxon[taxon].taxid))
|
||||
) || ((options->e != 0) && (eco_is_taxid_included(taxonomy,
|
||||
options->exception_taxid,
|
||||
options->e,
|
||||
taxonomy->taxons->taxon[taxon].taxid))
|
||||
);
|
||||
|
||||
|
||||
return result;
|
||||
}
|
21
src/libecoprimer/hashencoder.h
Normal file
21
src/libecoprimer/hashencoder.h
Normal file
@ -0,0 +1,21 @@
|
||||
/*
|
||||
* hashencoder.h
|
||||
*
|
||||
* Created on: 12 mai 2009
|
||||
* Author: coissac
|
||||
*/
|
||||
|
||||
#ifndef HASHENCODER_H_
|
||||
#define HASHENCODER_H_
|
||||
|
||||
static int8_t encoder[] = {0, // A
|
||||
-1, // b
|
||||
1, // C
|
||||
-1,-1,-1, // d, e, f
|
||||
2, // G
|
||||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // h,i,j,k,l,m,n,o,p,q,r,s
|
||||
3,3, // T,U
|
||||
-1,-1,-1,-1,-1}; // v,w,x,y,z
|
||||
|
||||
|
||||
#endif /* HASHENCODER_H_ */
|
243
src/libecoprimer/hashsequence.c
Normal file
243
src/libecoprimer/hashsequence.c
Normal file
@ -0,0 +1,243 @@
|
||||
/*
|
||||
* hashsequence.c
|
||||
*
|
||||
* Created on: 7 nov. 2008
|
||||
* Author: coissac
|
||||
*/
|
||||
|
||||
|
||||
#include "ecoprimer.h"
|
||||
|
||||
static int cmpword(const void *x,const void *y);
|
||||
|
||||
#include "hashencoder.h"
|
||||
|
||||
uint32_t ecoWordCount(uint32_t wordsize, uint32_t circular, ecoseq_t *seq)
|
||||
{
|
||||
uint32_t wordcount;
|
||||
|
||||
wordcount = seq->SQ_length;
|
||||
|
||||
if (!circular) wordcount-=wordsize-1;
|
||||
|
||||
return wordcount;
|
||||
}
|
||||
|
||||
pword_t ecoHashSequence(pword_t dest,
|
||||
uint32_t wordsize,
|
||||
uint32_t circular,
|
||||
uint32_t doublestrand,
|
||||
ecoseq_t *seq,
|
||||
uint32_t *size,
|
||||
int32_t *neededWords,
|
||||
uint32_t neededWordCount,
|
||||
int32_t quorum)
|
||||
{
|
||||
uint32_t i=0;
|
||||
uint32_t j;
|
||||
char *base;
|
||||
int8_t code;
|
||||
int32_t error=0;
|
||||
word_t word=0;
|
||||
word_t antiword=0;
|
||||
word_t goodword;
|
||||
uint32_t lmax=0;
|
||||
|
||||
(*size)=0;
|
||||
|
||||
lmax = seq->SQ_length;
|
||||
if (!circular)
|
||||
lmax-= wordsize-1;
|
||||
|
||||
if (!dest)
|
||||
dest = ECOMALLOC(lmax*sizeof(word_t),
|
||||
"I cannot allocate memory for sequence hashing"
|
||||
);
|
||||
|
||||
// DEBUG_LOG("Sequence %s @ %d : %18.18s",seq->AC,i,(seq->SQ+i));
|
||||
|
||||
for (i=0, base = seq->SQ; i < wordsize && i < lmax; i++,base++)
|
||||
{
|
||||
|
||||
error<<= 1;
|
||||
error&=ERRORMASK(wordsize);
|
||||
|
||||
code = encoder[(*base) - 'A'];
|
||||
if (code <0)
|
||||
{
|
||||
code = 0;
|
||||
error|= 1;
|
||||
}
|
||||
|
||||
|
||||
word=RAPPENDBASE(word,wordsize,code);
|
||||
|
||||
if (doublestrand)
|
||||
antiword=LAPPENDBASE(antiword,wordsize,code);
|
||||
|
||||
if (neededWordCount && i>=(FWORDSIZE-1))
|
||||
{
|
||||
|
||||
goodword = (doublestrand) ? MINI(FILTERWORD(word),CFILTERWORD(antiword,wordsize)):FILTERWORD(word);
|
||||
if (neededWords[(uint32_t)goodword]<quorum)
|
||||
error|= (1 << (FWORDSIZE-1));
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
if (!error && i==wordsize)
|
||||
{
|
||||
dest[*size]=(doublestrand) ? MINI(word,antiword):word;
|
||||
(*size)++;
|
||||
}
|
||||
|
||||
|
||||
for (j=1; j < lmax; j++,i++,base++)
|
||||
{
|
||||
|
||||
// DEBUG_LOG("Sequence %s @ %d : %18.18s",seq->AC,j,(seq->SQ+j));
|
||||
|
||||
/* roll over the sequence for circular ones */
|
||||
|
||||
if (i==(uint32_t)seq->SQ_length) base=seq->SQ;
|
||||
|
||||
error<<= 1;
|
||||
error&=ERRORMASK(wordsize);
|
||||
|
||||
code = encoder[(*base) - 'A'];
|
||||
if (code <0)
|
||||
{
|
||||
code = 0;
|
||||
error|= 1;
|
||||
}
|
||||
|
||||
word=RAPPENDBASE(word,wordsize,code);
|
||||
if (doublestrand)
|
||||
antiword=LAPPENDBASE(antiword,wordsize,code);
|
||||
|
||||
if (neededWordCount)
|
||||
{
|
||||
goodword = (doublestrand) ? MINI(FILTERWORD(word),CFILTERWORD(antiword,wordsize)):FILTERWORD(word);
|
||||
if (neededWords[(uint32_t)goodword]<quorum)
|
||||
error|= (1 << (FWORDSIZE-1));
|
||||
// else
|
||||
// DEBUG_LOG("%s goodword = %p %d/%d (pos:%d error:%d)",seq->AC,goodword,neededWords[(uint32_t)goodword],quorum,i,error);
|
||||
|
||||
}
|
||||
|
||||
|
||||
if (!error)
|
||||
{
|
||||
dest[*size]=(doublestrand) ? MINI(word,antiword):word;
|
||||
(*size)++;
|
||||
}
|
||||
|
||||
}
|
||||
// DEBUG_LOG("%s goodword = %d",seq->AC,*size);
|
||||
return dest;
|
||||
|
||||
}
|
||||
|
||||
uint32_t ecoCompactHashSequence(pword_t table,uint32_t size)
|
||||
{
|
||||
uint32_t i,j;
|
||||
word_t current;
|
||||
// bool_t here=FALSE;
|
||||
|
||||
sortword(table,size);
|
||||
|
||||
current = 0;
|
||||
current=SETMULTIWORD(current); /* build impossible word for the first loop cycle */
|
||||
|
||||
// if (strcmp(ecoUnhashWord(table[size-1],18),"GTTTGTTCAACGATTAAA")==0)
|
||||
// here=TRUE;
|
||||
|
||||
for (i=0,j=0; j < size;j++)
|
||||
{
|
||||
if (WORD(table[j])!=current)
|
||||
{
|
||||
current =table[j];
|
||||
table[i]=current;
|
||||
i++;
|
||||
}
|
||||
else
|
||||
table[i]=SETMULTIWORD(table[i]);
|
||||
}
|
||||
|
||||
// if (strcmp(ecoUnhashWord(WORD(table[i-1]),18),"TACGACCTCGATGTTGGA")==0)
|
||||
// DEBUG_LOG("winner %d",i)
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
const char* ecoUnhashWord(word_t word,uint32_t size)
|
||||
{
|
||||
static char buffer[32];
|
||||
static char decode[]="ACGT";
|
||||
|
||||
uint32_t i;
|
||||
|
||||
for (i=0; i < size; i++)
|
||||
{
|
||||
buffer[i]=decode[(word >> (2 * (size - 1 -i))) & 3];
|
||||
}
|
||||
|
||||
buffer[size]=0;
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
word_t ecoComplementWord(word_t word,uint32_t size)
|
||||
{
|
||||
word_t rep=0;
|
||||
uint32_t i;
|
||||
|
||||
// DEBUG_LOG("%llx %llx",word,~word);
|
||||
word=(~word) & WORDMASK(size);
|
||||
for (i=0;i < size; i++)
|
||||
{
|
||||
|
||||
rep = RAPPENDBASE(rep,size,word & 3LLU);
|
||||
// DEBUG_LOG("%016llx %016llx %016llx",word,word & 3LLU,rep);
|
||||
word>>=2;
|
||||
}
|
||||
// DEBUG_LOG("Complemented = %s",ecoUnhashWord(rep,18));
|
||||
return rep;
|
||||
|
||||
}
|
||||
|
||||
static int cmpword(const void *x,const void *y)
|
||||
{
|
||||
word_t w1 = *(pword_t)x;
|
||||
word_t w2 = *(pword_t)y;
|
||||
|
||||
w1 = WORD(w1);
|
||||
w2 = WORD(w2);
|
||||
|
||||
if (w1 < w2)
|
||||
return -1;
|
||||
if (w1 > w2)
|
||||
return +1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t ecoFindWord(pwordcount_t table,word_t word)
|
||||
{
|
||||
pword_t dest;
|
||||
|
||||
dest = (pword_t)bsearch((const void*)&word,(const void*)table->words,table->size,sizeof(word_t),cmpword);
|
||||
|
||||
if (dest)
|
||||
return dest - table->words;
|
||||
else
|
||||
return ~0;
|
||||
}
|
||||
|
||||
char ecoComplementChar(char base)
|
||||
{
|
||||
return (base < 4)? !base & 3: 4;
|
||||
}
|
||||
|
379
src/libecoprimer/libstki.c
Normal file
379
src/libecoprimer/libstki.c
Normal file
@ -0,0 +1,379 @@
|
||||
/* ==================================================== */
|
||||
/* Copyright (c) Atelier de BioInformatique */
|
||||
/* Mar. 92 */
|
||||
/* File: libstki.c */
|
||||
/* Purpose: A library to deal with 'stacks' of */
|
||||
/* integers */
|
||||
/* Note: 'stacks' are dynamic (i.e. size is */
|
||||
/* automatically readjusted when needed) */
|
||||
/* History: */
|
||||
/* 00/03/92 : <Gloup> first draft */
|
||||
/* 15/08/93 : <Gloup> revised version */
|
||||
/* 14/05/99 : <Gloup> last revision */
|
||||
/* ==================================================== */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "libstki.h"
|
||||
#include "ecoprimer.h"
|
||||
|
||||
|
||||
/* ============================ */
|
||||
/* Constantes et Macros locales */
|
||||
/* ============================ */
|
||||
|
||||
#define ExpandStack(stkh) ResizeStacki((stkh), (*stkh)->size << 1)
|
||||
|
||||
#define ShrinkStack(stkh) ResizeStacki((stkh), (*stkh)->size >> 1)
|
||||
|
||||
|
||||
static int16_t sStkiLastError = kStkiNoErr;
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* gestion des erreurs */
|
||||
/* get/reset erreur flag */
|
||||
/* */
|
||||
/* @function: StkiError */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
int16_t StkiError(bool_t reset)
|
||||
{
|
||||
int16_t err;
|
||||
|
||||
err = sStkiLastError;
|
||||
|
||||
if (reset)
|
||||
sStkiLastError = kStkiNoErr;
|
||||
|
||||
return err;
|
||||
|
||||
} /* end of StkiError */
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* creation d'un stack */
|
||||
/* */
|
||||
/* @function: NewStacki */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
StackiPtr NewStacki(int32_t size)
|
||||
{
|
||||
StackiPtr stki;
|
||||
|
||||
if (! (stki = NEW(Stacki)))
|
||||
return NULL;
|
||||
|
||||
stki->size = size;
|
||||
stki->top = 0;
|
||||
stki->cursor = 0;
|
||||
|
||||
if ( ! (stki->val = NEWN(int32_t, size))) {
|
||||
sStkiLastError = kStkiMemErr;
|
||||
return FreeStacki(stki);
|
||||
}
|
||||
|
||||
return stki;
|
||||
|
||||
} /* end of NewStacki */
|
||||
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* liberation d'un stack */
|
||||
/* */
|
||||
/* @function: FreeStacki */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
StackiPtr FreeStacki(StackiPtr stki)
|
||||
{
|
||||
if (stki) {
|
||||
if (stki->val)
|
||||
ECOFREE(stki->val,"Free stack values");
|
||||
ECOFREE(stki,"Free stack");
|
||||
}
|
||||
|
||||
return NULL;
|
||||
|
||||
} /* end of FreeStacki */
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* creation d'un vecteur de stacks */
|
||||
/* */
|
||||
/* @function: NewStackiVector */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
StackiHdle NewStackiVector(int32_t vectSize, int32_t stackSize)
|
||||
{
|
||||
int32_t i;
|
||||
StackiHdle stkh;
|
||||
|
||||
if (! (stkh = NEWN(StackiPtr, vectSize))) {
|
||||
sStkiLastError = kStkiMemErr;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (i = 0 ; i < vectSize ; i++)
|
||||
if (! (stkh[i] = NewStacki(stackSize)))
|
||||
return FreeStackiVector(stkh, i);
|
||||
|
||||
return stkh;
|
||||
|
||||
} /* end of NewStackiVector */
|
||||
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* liberation d'un vecteur de stacks */
|
||||
/* */
|
||||
/* @function: FreeStackiVector */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
StackiHdle FreeStackiVector(StackiHdle stkh, int32_t vectSize)
|
||||
{
|
||||
int32_t i;
|
||||
|
||||
if (stkh) {
|
||||
for (i = 0 ; i < vectSize ; i++)
|
||||
(void) FreeStacki(stkh[i]);
|
||||
ECOFREE(stkh,"Free stack vector");
|
||||
}
|
||||
|
||||
return NULL;
|
||||
|
||||
} /* end of FreeStackiVector */
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* resize d'un stack */
|
||||
/* */
|
||||
/* @function: ResizeStacki */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
int32_t ResizeStacki(StackiHdle stkh, int32_t size)
|
||||
{
|
||||
int32_t resize = 0; /* assume error */
|
||||
int32_t *val;
|
||||
|
||||
if ((val = ECOREALLOC((*stkh)->val, size * sizeof(int32_t),"Cannot reallocate stack values"))) {
|
||||
(*stkh)->size = resize = size;
|
||||
(*stkh)->val = val;
|
||||
}
|
||||
|
||||
if (! resize)
|
||||
sStkiLastError = kStkiMemErr;
|
||||
|
||||
return resize;
|
||||
|
||||
} /* end of ResizeStacki */
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* empilage(/lement) */
|
||||
/* */
|
||||
/* @function: PushiIn */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
bool_t PushiIn(StackiHdle stkh, int32_t val)
|
||||
{
|
||||
if (((*stkh)->top >= (*stkh)->size) && (! ExpandStack(stkh)))
|
||||
return FALSE;
|
||||
|
||||
(*stkh)->val[((*stkh)->top)++] = val;
|
||||
|
||||
return TRUE;
|
||||
|
||||
} /* end of PushiIn */
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* depilage(/lement) */
|
||||
/* */
|
||||
/* @function: PopiOut */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
bool_t PopiOut(StackiHdle stkh, int32_t *val)
|
||||
{
|
||||
if ((*stkh)->top <= 0)
|
||||
return FALSE;
|
||||
|
||||
*val = (*stkh)->val[--((*stkh)->top)];
|
||||
|
||||
if ( ((*stkh)->top < ((*stkh)->size >> 1))
|
||||
&& ((*stkh)->top > kMinStackiSize))
|
||||
|
||||
(void) ShrinkStack(stkh);
|
||||
|
||||
return TRUE;
|
||||
|
||||
} /* end of PopiOut */
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* lecture descendante */
|
||||
/* */
|
||||
/* @function: ReadiDown */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
bool_t ReadiDown(StackiPtr stki, int32_t *val)
|
||||
{
|
||||
if (stki->cursor <= 0)
|
||||
return FALSE;
|
||||
|
||||
*val = stki->val[--(stki->cursor)];
|
||||
|
||||
return TRUE;
|
||||
|
||||
} /* end of ReadiDown */
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* lecture ascendante */
|
||||
/* */
|
||||
/* @function: ReadiUp */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
bool_t ReadiUp(StackiPtr stki, int32_t *val)
|
||||
{
|
||||
if (stki->cursor >= stki->top)
|
||||
return FALSE;
|
||||
|
||||
*val = stki->val[(stki->cursor)++];
|
||||
|
||||
return TRUE;
|
||||
|
||||
} /* end of ReadiUp */
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* remontee/descente du curseur */
|
||||
/* */
|
||||
/* @function: CursiToTop */
|
||||
/* @function: CursiToBottom */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
void CursiToTop(StackiPtr stki)
|
||||
{
|
||||
stki->cursor = stki->top;
|
||||
|
||||
} /* end of CursiToTop */
|
||||
|
||||
void CursiToBottom(stki)
|
||||
StackiPtr stki;
|
||||
{
|
||||
stki->cursor = 0;
|
||||
|
||||
} /* end of CursiToBottom */
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* echange des valeurs cursor <-> (top - 1) */
|
||||
/* */
|
||||
/* @function: CursiSwap */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
void CursiSwap(StackiPtr stki)
|
||||
{
|
||||
int32_t tmp;
|
||||
|
||||
if ((stki->top <= 0) || (stki->cursor < 0))
|
||||
return;
|
||||
|
||||
tmp = stki->val[stki->cursor];
|
||||
stki->val[stki->cursor] = stki->val[stki->top - 1];
|
||||
stki->val[stki->top - 1] = tmp;
|
||||
|
||||
} /* end of CursiSwap */
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* Recherche d'une valeur en stack a partir du */
|
||||
/* curseur courant en descendant. */
|
||||
/* on laisse le curseur a l'endroit trouve */
|
||||
/* */
|
||||
/* @function: SearchDownStacki */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
bool_t SearchDownStacki(StackiPtr stki, int32_t sval)
|
||||
{
|
||||
int32_t val;
|
||||
bool_t more;
|
||||
|
||||
while ((more = ReadiDown(stki, &val)))
|
||||
if (val == sval)
|
||||
break;
|
||||
|
||||
return more;
|
||||
|
||||
} /* end of SearchDownStacki */
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* Recherche dichotomique d'une valeur en stack */
|
||||
/* le stack est suppose trie par valeurs */
|
||||
/* croissantes. */
|
||||
/* on place le curseur a l'endroit trouve */
|
||||
/* */
|
||||
/* @function: BinSearchStacki */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
bool_t BinSearchStacki(StackiPtr stki, int32_t sval)
|
||||
{
|
||||
int32_t midd, low, high, span;
|
||||
|
||||
low = 0;
|
||||
high = stki->top - 1;
|
||||
|
||||
while (high >= low) {
|
||||
|
||||
midd = (high + low) / 2;
|
||||
|
||||
span = stki->val[midd] - sval;
|
||||
|
||||
if (span == 0) {
|
||||
stki->cursor = midd;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
if (span > 0)
|
||||
high = midd - 1;
|
||||
else
|
||||
low = midd + 1;
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
|
||||
} /* end of BinSearchStacki */
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* teste l'egalite *physique* de deux stacks */
|
||||
/* */
|
||||
/* @function: SameStacki */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
bool_t SameStacki(StackiPtr stki1, StackiPtr stki2)
|
||||
{
|
||||
if (stki1->top != stki2->top)
|
||||
return FALSE;
|
||||
|
||||
return ((memcmp(stki1->val, stki2->val,
|
||||
stki1->top * sizeof(int32_t)) == 0) ? TRUE : FALSE);
|
||||
|
||||
} /* end of SameStacki */
|
||||
|
||||
|
||||
/* -------------------------------------------- */
|
||||
/* inverse l'ordre des elements dans un stack */
|
||||
/* */
|
||||
/* @function: ReverseStacki */
|
||||
/* -------------------------------------------- */
|
||||
|
||||
bool_t ReverseStacki(StackiPtr stki)
|
||||
{
|
||||
int32_t *t, *b, swp;
|
||||
|
||||
if (stki->top <= 0)
|
||||
return FALSE;
|
||||
|
||||
b = stki->val;
|
||||
t = b + stki->top - 1;
|
||||
|
||||
while (t > b) {
|
||||
swp = *t;
|
||||
*t-- = *b;
|
||||
*b++ = swp;
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
|
||||
} /* end of ReverseStacki */
|
||||
|
89
src/libecoprimer/libstki.h
Normal file
89
src/libecoprimer/libstki.h
Normal file
@ -0,0 +1,89 @@
|
||||
/* ==================================================== */
|
||||
/* Copyright (c) Atelier de BioInformatique */
|
||||
/* Mar. 92 */
|
||||
/* File: libstki.h */
|
||||
/* Purpose: library of dynamic stacks holding */
|
||||
/* integer values */
|
||||
/* History: */
|
||||
/* 00/03/92 : <Gloup> first draft */
|
||||
/* 07/07/93 : <Gloup> complete revision */
|
||||
/* 10/03/94 : <Gloup> added xxxVector funcs */
|
||||
/* 14/05/99 : <Gloup> last revision */
|
||||
/* ==================================================== */
|
||||
|
||||
#ifndef _H_libstki
|
||||
#define _H_libstki
|
||||
|
||||
|
||||
#include "ecotype.h"
|
||||
|
||||
/* ==================================================== */
|
||||
/* Constantes de dimensionnement */
|
||||
/* ==================================================== */
|
||||
|
||||
#ifndef kMinStackiSize
|
||||
#define kMinStackiSize 2 /* taille mini stack */
|
||||
#endif
|
||||
|
||||
|
||||
#define kStkiNoErr 0 /* ok */
|
||||
#define kStkiMemErr 1 /* not enough memory */
|
||||
|
||||
#define kStkiReset TRUE
|
||||
#define kStkiGet FALSE
|
||||
|
||||
/* ==================================================== */
|
||||
/* Macros standards */
|
||||
/* ==================================================== */
|
||||
|
||||
#ifndef NEW
|
||||
#define NEW(typ) (typ*)malloc(sizeof(typ))
|
||||
#define NEWN(typ, dim) (typ*)malloc((uint32_t)(dim) * sizeof(typ))
|
||||
#define REALLOC(typ, ptr, dim) (typ*)realloc((void *) (ptr), (uint32_t)(dim) * sizeof(typ))
|
||||
#define FREE(ptr) free((Ptr) ptr)
|
||||
#endif
|
||||
|
||||
|
||||
/* ==================================================== */
|
||||
/* Types & Structures de donnees */
|
||||
/* ==================================================== */
|
||||
|
||||
/* -------------------- */
|
||||
/* structure : pile */
|
||||
/* -------------------- */
|
||||
typedef struct Stacki {
|
||||
/* ---------------------*/
|
||||
int32_t size; /* stack size */
|
||||
int32_t top; /* current free pos. */
|
||||
int32_t cursor; /* current cursor */
|
||||
int32_t *val; /* values */
|
||||
/* ---------------------*/
|
||||
} Stacki, *StackiPtr, **StackiHdle;
|
||||
|
||||
|
||||
|
||||
/* ==================================================== */
|
||||
/* Prototypes (generated by mproto) */
|
||||
/* ==================================================== */
|
||||
|
||||
/* libstki.c */
|
||||
|
||||
int16_t StkiError (bool_t reset );
|
||||
StackiPtr NewStacki (int32_t size );
|
||||
StackiPtr FreeStacki (StackiPtr stki );
|
||||
StackiHdle NewStackiVector (int32_t vectSize, int32_t stackSize );
|
||||
StackiHdle FreeStackiVector (StackiHdle stkh, int32_t vectSize );
|
||||
int32_t ResizeStacki (StackiHdle stkh , int32_t size );
|
||||
bool_t PushiIn (StackiHdle stkh , int32_t val );
|
||||
bool_t PopiOut (StackiHdle stkh , int32_t *val );
|
||||
bool_t ReadiDown (StackiPtr stki , int32_t *val );
|
||||
bool_t ReadiUp (StackiPtr stki , int32_t *val );
|
||||
void CursiToTop (StackiPtr stki );
|
||||
void CursiToBottom (StackiPtr stki );
|
||||
void CursiSwap (StackiPtr stki );
|
||||
bool_t SearchDownStacki (StackiPtr stki , int32_t sval );
|
||||
bool_t BinSearchStacki (StackiPtr stki , int32_t sval );
|
||||
bool_t SameStacki (StackiPtr stki1 , StackiPtr stki2 );
|
||||
bool_t ReverseStacki (StackiPtr stki );
|
||||
|
||||
#endif /* _H_libstki */
|
7
src/libecoprimer/mapping.c
Normal file
7
src/libecoprimer/mapping.c
Normal file
@ -0,0 +1,7 @@
|
||||
/*
|
||||
* mapping.c
|
||||
*
|
||||
* Created on: 25 nov. 2008
|
||||
* Author: coissac
|
||||
*/
|
||||
|
152
src/libecoprimer/merge.c
Normal file
152
src/libecoprimer/merge.c
Normal file
@ -0,0 +1,152 @@
|
||||
/*
|
||||
* merge.c
|
||||
*
|
||||
* Created on: 11 nov. 2008
|
||||
* Author: coissac
|
||||
*/
|
||||
|
||||
#include "ecoprimer.h"
|
||||
|
||||
static pmerge_t mergeInit(pmerge_t merge,pwordcount_t data,uint32_t s1,uint32_t s2);
|
||||
|
||||
|
||||
static pmerge_t mergeInit(pmerge_t merge, pwordcount_t data,uint32_t s1,uint32_t s2)
|
||||
{
|
||||
merge->words = data->words;
|
||||
merge->count = data->strictcount;
|
||||
merge->write = 0;
|
||||
merge->read1 = 0;
|
||||
merge->read2 = s1;
|
||||
merge->size = s1+s2;
|
||||
return merge;
|
||||
}
|
||||
|
||||
|
||||
typedef enum {S1=1,S2=2,STACK=3} source_t;
|
||||
|
||||
void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,uint32_t seqQuorum)
|
||||
{
|
||||
merge_t merged;
|
||||
source_t source;
|
||||
word_t currentword,tmpword;
|
||||
uint32_t currentcount,tmpcount;
|
||||
int same;
|
||||
queue_t queue;
|
||||
int nsame=0;
|
||||
uint32_t maxcount=0;
|
||||
bool_t writed=TRUE;
|
||||
|
||||
// DEBUG_LOG("Coucou %p s1= %d s2= %d",data,s1,s2)
|
||||
|
||||
(void)mergeInit(&merged,data,s1,s2);
|
||||
(void)newQueue(&queue,MINI(s1,s2));
|
||||
|
||||
|
||||
while (merged.read1 < s1 || merged.read2 < merged.size)
|
||||
{
|
||||
if (! queue.empty)
|
||||
{
|
||||
currentword = queue.words[queue.pop];
|
||||
currentcount = queue.count[queue.pop];
|
||||
source=STACK;
|
||||
}
|
||||
else
|
||||
{
|
||||
currentword = merged.words[merged.read1];
|
||||
currentcount = merged.count[merged.read1];
|
||||
source=S1;
|
||||
}
|
||||
|
||||
if (merged.read2 < merged.size &&
|
||||
WORD(currentword) > WORD(merged.words[merged.read2]))
|
||||
{
|
||||
currentword = merged.words[merged.read2];
|
||||
currentcount = merged.count[merged.read2];
|
||||
source = S2;
|
||||
}
|
||||
|
||||
same = (source != S2) && (WORD(currentword) == WORD(merged.words[merged.read2]));
|
||||
nsame+=same;
|
||||
|
||||
// DEBUG_LOG("Merging : r1 = %d s1 = %d r2 = %d size = %d word = %s source=%u same=%u",merged.read1,s1,merged.read2-s1,merged.size,ecoUnhashWord(currentword,18),source,same)
|
||||
|
||||
tmpword = merged.words[merged.write];
|
||||
tmpcount= merged.count[merged.write];
|
||||
|
||||
merged.words[merged.write] = currentword;
|
||||
merged.count[merged.write] = currentcount;
|
||||
|
||||
if (source != S2)
|
||||
{
|
||||
if (same)
|
||||
{
|
||||
merged.count[merged.write]+=merged.count[merged.read2];
|
||||
|
||||
if (ISMULTIWORD(currentword) || ISMULTIWORD(merged.words[merged.read2]))
|
||||
merged.words[merged.write]=SETMULTIWORD(currentword);
|
||||
|
||||
merged.read2++;
|
||||
}
|
||||
|
||||
if (source==STACK)
|
||||
pop(&queue);
|
||||
merged.read1++;
|
||||
}
|
||||
else
|
||||
merged.read2++;
|
||||
|
||||
if (writed && merged.read1 <= merged.write && merged.write < s1)
|
||||
push(&queue,tmpword,tmpcount);
|
||||
|
||||
if (merged.count[merged.write] > maxcount)
|
||||
maxcount=merged.count[merged.write];
|
||||
|
||||
writed = remainingSeq + merged.count[merged.write] >= seqQuorum;
|
||||
if (writed)
|
||||
merged.write++;
|
||||
|
||||
|
||||
// else
|
||||
// DEBUG_LOG("Remove word : %s count : %d remainingSeq : %d total : %d Quorum : %d",
|
||||
// ecoUnhashWord(currentword,18),merged.count[merged.write],remainingSeq,maxcount+remainingSeq,seqQuorum);
|
||||
|
||||
} /* while loop */
|
||||
|
||||
// DEBUG_LOG("r1 : %d r2 : %d qsize : %d nsame : %d tot : %d write : %s count : %d source : %d size : %d pop : %d push : %d empty : %d",merged.read1,merged.read2-s1,qsize,nsame,qsize+nsame,ecoUnhashWord(currentword,18),currentcount,source,queue.size,queue.pop,queue.push,queue.empty)
|
||||
|
||||
|
||||
if (merged.read2 < merged.size)
|
||||
{
|
||||
//DEBUG_LOG("end1 %d %d/%d %d/%d",merged.write,merged.read1,s1,merged.read2,merged.size);
|
||||
for (;merged.read2 < merged.size;merged.read2++)
|
||||
{
|
||||
merged.words[merged.write]=merged.words[merged.read2];
|
||||
merged.count[merged.write]=merged.count[merged.read2];
|
||||
if (remainingSeq + merged.count[merged.write] >= seqQuorum)
|
||||
merged.write++;
|
||||
|
||||
}
|
||||
}
|
||||
else {
|
||||
//DEBUG_LOG("end2 %d %d/%d %d/%d",merged.write,merged.read1,s1,merged.read2,merged.size);
|
||||
while (! queue.empty)
|
||||
{
|
||||
// DEBUG_LOG("write : %s count : %d write : %d size : %d pop : %d push : %d empty : %d",ecoUnhashWord(queue.words[queue.pop],18),queue.count[queue.pop],merged.write,queue.size,queue.pop,queue.push,queue.empty)
|
||||
merged.words[merged.write]=queue.words[queue.pop];
|
||||
merged.count[merged.write]=queue.count[queue.pop];
|
||||
pop(&queue);
|
||||
if (remainingSeq + merged.count[merged.write] >= seqQuorum)
|
||||
merged.write++;
|
||||
}
|
||||
}
|
||||
|
||||
data->size = merged.write;
|
||||
|
||||
cleanQueue(&queue);
|
||||
|
||||
// DEBUG_LOG("Max count : %d remainingSeq : %d total : %d Quorum : %d",maxcount,remainingSeq,maxcount+remainingSeq,seqQuorum)
|
||||
// DEBUG_LOG("Second word : %s",ecoUnhashWord(data->words[1],18))
|
||||
// DEBUG_LOG("Last word : %s",ecoUnhashWord(data->words[data->size-1],18))
|
||||
|
||||
|
||||
}
|
460
src/libecoprimer/pairs.c
Normal file
460
src/libecoprimer/pairs.c
Normal file
@ -0,0 +1,460 @@
|
||||
/*
|
||||
* pairs.c
|
||||
*
|
||||
* Created on: 15 d<>c. 2008
|
||||
* Author: coissac
|
||||
*/
|
||||
|
||||
#include "ecoprimer.h"
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include "../libthermo/thermostats.h"
|
||||
|
||||
static void buildPrimerPairsForOneSeq(uint32_t seqid,
|
||||
pecodnadb_t seqdb,
|
||||
pprimercount_t primers,
|
||||
ppairtree_t pairs,
|
||||
poptions_t options);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/*************************************
|
||||
*
|
||||
* pair collection management
|
||||
*
|
||||
*************************************/
|
||||
|
||||
#ifdef MASKEDCODE
|
||||
|
||||
char *addamplifiasetelem (ppair_t pair, char* amplifia, int32_t taxid)
|
||||
{
|
||||
uint32_t i;
|
||||
uint32_t j;
|
||||
char *ampused = NULL;
|
||||
|
||||
if(pair->ampsetcount == 0)
|
||||
{
|
||||
pair->ampsetcount = 500;
|
||||
pair->ampsetindex = 0;
|
||||
pair->ampset = ECOMALLOC(pair->ampsetcount * sizeof(ampseqset_t),"Cannot allocate amplifia set");
|
||||
}
|
||||
|
||||
for (i = 0; i < pair->ampsetindex; i++)
|
||||
{
|
||||
if (strcmp (pair->ampset[i].amplifia, amplifia) == 0)
|
||||
{
|
||||
ampused = pair->ampset[i].amplifia;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (i == 0)
|
||||
{
|
||||
pair->ampset[i].seqidcount = 100;
|
||||
pair->ampset[i].seqidindex = 0;
|
||||
pair->ampset[i].taxonids = ECOMALLOC(pair->ampset[i].seqidcount * sizeof(uint32_t),"Cannot allocate amplifia sequence table");
|
||||
}
|
||||
|
||||
if (pair->ampsetindex == pair->ampsetcount)
|
||||
{
|
||||
pair->ampsetcount += 500;
|
||||
pair->ampset = ECOREALLOC(pair->ampset, pair->ampsetcount * sizeof(ampseqset_t), "Cannot allocate amplifia set");
|
||||
}
|
||||
|
||||
if (pair->ampset[i].seqidindex == pair->ampset[i].seqidcount)
|
||||
{
|
||||
pair->ampset[i].seqidcount += 100;
|
||||
pair->ampset[i].taxonids = ECOREALLOC(pair->ampset[i].taxonids, pair->ampset[i].seqidcount * sizeof(int32_t), "Cannot allocate amplifia sequence table");
|
||||
}
|
||||
|
||||
if (pair->ampset[i].amplifia == NULL)
|
||||
{
|
||||
pair->ampset[i].amplifia = amplifia;
|
||||
pair->ampsetindex++;
|
||||
}
|
||||
|
||||
for (j = 0; j < pair->ampset[i].seqidindex; j++)
|
||||
{
|
||||
if (pair->ampset[i].taxonids[j] == taxid) break;
|
||||
}
|
||||
|
||||
if (j == pair->ampset[i].seqidindex)
|
||||
pair->ampset[i].taxonids[pair->ampset[i].seqidindex++] = taxid;
|
||||
return ampused;
|
||||
}
|
||||
|
||||
void addtaxampsetelem (ppair_t pair, int32_t taxid, char *amplifia)
|
||||
{
|
||||
uint32_t i;
|
||||
uint32_t j;
|
||||
|
||||
if(pair->taxsetcount == 0)
|
||||
{
|
||||
pair->taxsetcount = 500;
|
||||
pair->taxsetindex = 0;
|
||||
pair->taxset = ECOMALLOC(pair->taxsetcount * sizeof(taxampset_t),"Cannot allocate taxon set");
|
||||
}
|
||||
|
||||
for (i = 0; i < pair->taxsetindex; i++)
|
||||
{
|
||||
if (pair->taxset[i].taxonid == taxid) break;
|
||||
}
|
||||
|
||||
if (i == 0)
|
||||
{
|
||||
pair->taxset[i].amplifiacount = 100;
|
||||
pair->taxset[i].amplifiaindex = 0;
|
||||
pair->taxset[i].amplifia = ECOMALLOC(pair->taxset[i].amplifiacount * sizeof(char *),"Cannot allocate amplifia table");
|
||||
}
|
||||
|
||||
if (pair->taxsetindex == pair->taxsetcount)
|
||||
{
|
||||
pair->taxsetcount += 500;
|
||||
pair->taxset = ECOREALLOC(pair->taxset, pair->taxsetcount * sizeof(taxampset_t), "Cannot allocate taxon set");
|
||||
}
|
||||
|
||||
if (pair->taxset[i].amplifiaindex == pair->taxset[i].amplifiacount)
|
||||
{
|
||||
pair->taxset[i].amplifiacount += 100;
|
||||
pair->taxset[i].amplifia = ECOREALLOC(pair->taxset[i].amplifia, pair->taxset[i].amplifiacount * sizeof(char *), "Cannot allocate amplifia table");
|
||||
}
|
||||
|
||||
if (pair->taxset[i].taxonid == 0)
|
||||
{
|
||||
pair->taxset[i].taxonid = taxid;
|
||||
pair->taxsetindex++;
|
||||
}
|
||||
|
||||
for (j = 0; j < pair->taxset[i].amplifiaindex; j++)
|
||||
{
|
||||
if (strcmp(pair->taxset[i].amplifia[j], amplifia) == 0) break;
|
||||
}
|
||||
|
||||
if (j == pair->taxset[i].amplifiaindex)
|
||||
{
|
||||
pair->taxset[i].amplifia[j] = amplifia;
|
||||
pair->taxset[i].amplifiaindex++;
|
||||
}
|
||||
}
|
||||
|
||||
char *getamplifia (pecoseq_t seq, uint32_t start, uint32_t len)
|
||||
{
|
||||
fprintf(stderr,"start : %d length : %d\n",start,len);
|
||||
char *amplifia = ECOMALLOC((len + 1) * sizeof(char),"Cannot allocate amplifia");
|
||||
char *seqc = &seq->SQ[start];
|
||||
|
||||
strncpy(amplifia, seqc, len);
|
||||
return amplifia;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/*TR: Added*/
|
||||
ppairtree_t buildPrimerPairs(pecodnadb_t seqdb,uint32_t seqdbsize,pprimercount_t primers,poptions_t options)
|
||||
{
|
||||
uint32_t i;
|
||||
ppairtree_t primerpairs;
|
||||
|
||||
primerpairs = initpairtree(NULL);
|
||||
|
||||
for (i=0; i < seqdbsize; i++)
|
||||
{
|
||||
buildPrimerPairsForOneSeq(i, seqdb, primers, primerpairs, options);
|
||||
}
|
||||
return primerpairs;
|
||||
}
|
||||
|
||||
#define DMAX (2000000000)
|
||||
|
||||
static void buildPrimerPairsForOneSeq(uint32_t seqid,
|
||||
pecodnadb_t seqdb,
|
||||
pprimercount_t primers,
|
||||
ppairtree_t pairs,
|
||||
poptions_t options)
|
||||
{
|
||||
static uint32_t paircount=0;
|
||||
uint32_t i,j,k;
|
||||
uint32_t matchcount=0;
|
||||
pprimermatch_t matches = NULL;
|
||||
//primermatchcount_t seqmatchcount;
|
||||
ppair_t pcurrent;
|
||||
pair_t current;
|
||||
pprimer_t wswp;
|
||||
bool_t bswp;
|
||||
size_t distance;
|
||||
bool_t strand;
|
||||
//char prmr[50];
|
||||
//float mtemp;
|
||||
word_t w1, w1a, omask = (0x1L << (options->strict_three_prime*2)) -1;
|
||||
word_t w2, w2a;//, wtmp;
|
||||
uint32_t bp1,bp2;
|
||||
|
||||
//prmr[options->primer_length] = '\0';
|
||||
|
||||
for (i=0;i < primers->size; i++)
|
||||
{
|
||||
matchcount+=primers->primers[i].directCount[seqid];
|
||||
matchcount+=primers->primers[i].reverseCount[seqid];
|
||||
}
|
||||
|
||||
if (matchcount <= 0)
|
||||
return;
|
||||
|
||||
matches = ECOMALLOC(matchcount * sizeof(primermatch_t),"Cannot allocate primers match table");
|
||||
|
||||
for (i=0,j=0;i < primers->size; i++)
|
||||
{
|
||||
if (primers->primers[i].directCount[seqid])
|
||||
{
|
||||
if (primers->primers[i].directCount[seqid]==1)
|
||||
{
|
||||
matches[j].primer = primers->primers+i;
|
||||
matches[j].strand=TRUE;
|
||||
matches[j].position=primers->primers[i].directPos[seqid].value;
|
||||
j++;
|
||||
}
|
||||
else for (k=0; k < primers->primers[i].directCount[seqid]; k++,j++)
|
||||
{
|
||||
matches[j].primer = primers->primers+i;
|
||||
matches[j].strand=TRUE;
|
||||
matches[j].position=primers->primers[i].directPos[seqid].pointer[k];
|
||||
}
|
||||
}
|
||||
|
||||
if (primers->primers[i].reverseCount[seqid])
|
||||
{
|
||||
if (primers->primers[i].reverseCount[seqid]==1)
|
||||
{
|
||||
matches[j].primer = primers->primers+i;
|
||||
matches[j].strand=FALSE;
|
||||
matches[j].position=primers->primers[i].reversePos[seqid].value;
|
||||
j++;
|
||||
}
|
||||
else for (k=0; k < primers->primers[i].reverseCount[seqid]; k++,j++)
|
||||
{
|
||||
matches[j].primer = primers->primers+i;
|
||||
matches[j].strand=FALSE;
|
||||
matches[j].position=primers->primers[i].reversePos[seqid].pointer[k];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (matchcount>1)
|
||||
{
|
||||
// fprintf(stderr,"\n====================================\n");
|
||||
|
||||
sortmatch(matches,matchcount); // sort in ascending order by position
|
||||
|
||||
for (i=0; i < matchcount;i++)
|
||||
{
|
||||
// For all primers matching the sequence
|
||||
|
||||
/*for(j=i+1;
|
||||
(j<matchcount)
|
||||
&& ((distance=matches[j].position - matches[i].position - options->primer_length) < options->lmax);
|
||||
j++
|
||||
)//*/
|
||||
for (j=i+1; j<matchcount; j++)
|
||||
{
|
||||
if (matches[j].position - matches[i].position <= options->primer_length) continue;
|
||||
distance = matches[j].position - matches[i].position - options->primer_length;
|
||||
if (distance >= options->lmax) break;
|
||||
|
||||
|
||||
// For all not too far primers
|
||||
|
||||
if ( (matches[i].primer->good || matches[j].primer->good)
|
||||
&& (distance > options->lmin)
|
||||
)
|
||||
{
|
||||
// If possible primer pair
|
||||
current.p1 = matches[i].primer;
|
||||
current.asdirect1=matches[i].strand;
|
||||
current.p2 = matches[j].primer;
|
||||
current.asdirect2= !matches[j].strand;
|
||||
current.maxd=DMAX;
|
||||
current.mind=DMAX;
|
||||
current.sumd=0;
|
||||
current.amplifiacount=0;
|
||||
current.inexample=0;
|
||||
current.outexample=0;
|
||||
current.curseqid = 0;
|
||||
current.refsequence=-1;
|
||||
//current.p1temp = 100;
|
||||
//current.p1mintemp = 100;
|
||||
//current.p2temp = 100;
|
||||
//current.p2mintemp = 100;
|
||||
|
||||
// Standardize the pair
|
||||
strand = current.p2->word > current.p1->word;
|
||||
if (!strand)
|
||||
{
|
||||
wswp = current.p1;
|
||||
current.p1=current.p2;
|
||||
current.p2=wswp;
|
||||
|
||||
bswp = current.asdirect1;
|
||||
current.asdirect1=current.asdirect2;
|
||||
current.asdirect2=bswp;
|
||||
}
|
||||
|
||||
|
||||
//Code to make sure that if -3 option is given then
|
||||
//3' end must match upto given number of base pairs
|
||||
if (options->strict_three_prime > 0)
|
||||
{
|
||||
w1 = current.p1->word;
|
||||
w2 = current.p2->word;
|
||||
if (!current.asdirect1) //make sure that word is from 5' to 3'
|
||||
w1=ecoComplementWord(w1,options->primer_length);
|
||||
|
||||
if (!current.asdirect2) //make sure that word is from 5' to 3'
|
||||
w2=ecoComplementWord(w2,options->primer_length);
|
||||
//now both w1 and w2 are from 5' to 3' end
|
||||
bp1 = matches[i].position;
|
||||
bp2 = matches[j].position;
|
||||
if (!strand)
|
||||
{
|
||||
bp1 = matches[j].position;
|
||||
bp2 = matches[i].position;
|
||||
}
|
||||
//get word of first approximate repeat
|
||||
w1a = extractSite(seqdb[seqid]->SQ,bp1,options->primer_length,strand);
|
||||
//get word of second approximate repeat
|
||||
w2a = extractSite(seqdb[seqid]->SQ,bp2,options->primer_length,!strand);
|
||||
|
||||
w1 = w1 & omask; //keep only strict_three_prime bases on the right (3') end
|
||||
w2 = w2 & omask; //keep only strict_three_prime bases on the right (3') end
|
||||
w1a = w1a & omask; //keep only strict_three_prime bases on the right (3') end
|
||||
w2a = w2a & omask; //keep only strict_three_prime bases on the right (3') end
|
||||
|
||||
//now check that both words and primers of amplifia have same bases on 3' end
|
||||
if ((w1 ^ w1a) != 0) continue;
|
||||
if ((w2 ^ w2a) != 0) continue;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Look for the new pair in already seen pairs
|
||||
|
||||
pcurrent = insertpair(current,pairs);
|
||||
|
||||
|
||||
if (seqdb[seqid]->isexample)
|
||||
|
||||
{
|
||||
//pcurrent->inexample++;
|
||||
pcurrent->sumd+=distance;
|
||||
pcurrent->amplifiacount++;
|
||||
|
||||
if ((pcurrent->maxd==DMAX) || (distance > pcurrent->maxd))
|
||||
pcurrent->maxd = distance;
|
||||
|
||||
if (distance < pcurrent->mind)
|
||||
pcurrent->mind = distance;
|
||||
}
|
||||
//else
|
||||
// pcurrent->outexample++;
|
||||
|
||||
//for each pair we save current sequence id in the pair
|
||||
//when we see this pair for the first time in currnet sequence
|
||||
//because we want to increment inexample & outexample count
|
||||
//only once for one sequence
|
||||
if (pcurrent->curseqid != (seqid+1))
|
||||
{
|
||||
if (seqdb[seqid]->isexample)
|
||||
pcurrent->inexample++;
|
||||
else
|
||||
pcurrent->outexample++;
|
||||
|
||||
if (pcurrent->curseqid != 0)
|
||||
pcurrent->curseqid = seqid+1;
|
||||
}
|
||||
|
||||
/*if ((pcurrent->outexample+pcurrent->inexample)==0)
|
||||
{
|
||||
fprintf(stderr,"pcurrent->outexample+pcurrent->inexample=0!\n");
|
||||
exit(0);
|
||||
}*/
|
||||
|
||||
if (pcurrent->curseqid == 0)//((pcurrent->outexample+pcurrent->inexample)==1)
|
||||
{
|
||||
pcurrent->curseqid = seqid+1;
|
||||
paircount++;
|
||||
pcurrent->pcr.ampslot=200;
|
||||
pcurrent->pcr.ampcount=0;
|
||||
pcurrent->pcr.amplifias = ECOMALLOC(sizeof(amplifia_t)*pcurrent->pcr.ampslot,
|
||||
"Cannot allocate amplifia table");
|
||||
}
|
||||
else
|
||||
{
|
||||
if (pcurrent->pcr.ampslot==pcurrent->pcr.ampcount)
|
||||
{
|
||||
pcurrent->pcr.ampslot+=200;
|
||||
pcurrent->pcr.amplifias = ECOREALLOC(pcurrent->pcr.amplifias,
|
||||
sizeof(amplifia_t)*pcurrent->pcr.ampslot,
|
||||
"Cannot allocate amplifia table");
|
||||
}
|
||||
}
|
||||
|
||||
if (seqid==options->refseqid)
|
||||
pcurrent->refsequence=seqid;
|
||||
pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].length=distance;
|
||||
pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].sequence=seqdb[seqid];
|
||||
pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].strand=strand;
|
||||
pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].begin=matches[i].position + options->primer_length;
|
||||
pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].end= matches[j].position - 1;
|
||||
|
||||
if (strand)
|
||||
pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].amplifia= seqdb[seqid]->SQ + matches[i].position + options->primer_length;
|
||||
else
|
||||
pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].amplifia= seqdb[seqid]->SQ + matches[j].position - 1 ;
|
||||
|
||||
|
||||
/*strncpy (prmr, seqdb[seqid]->SQ + matches[i].position, options->primer_length);
|
||||
mtemp = nparam_CalcSelfTM (options->pnparm, prmr, options->primer_length) - 273.0;
|
||||
if (mtemp < pcurrent->p1mintemp)
|
||||
pcurrent->p1mintemp = mtemp;
|
||||
//fprintf (stderr, "prmr1: %s\n", seqdb[seqid]->SQ);
|
||||
strncpy (prmr, seqdb[seqid]->SQ + matches[j].position, options->primer_length);
|
||||
mtemp = nparam_CalcSelfTM (options->pnparm, prmr, options->primer_length) - 273.0;
|
||||
if (mtemp < pcurrent->p2mintemp)
|
||||
pcurrent->p2mintemp = mtemp;
|
||||
//fprintf (stderr, "prmr2: %s\n", prmr);
|
||||
|
||||
if (pcurrent->p1temp == 100)
|
||||
pcurrent->p1temp = nparam_CalcSelfTM (options->pnparm, ecoUnhashWord(pcurrent->p1->word, options->primer_length), 0) - 273.0;
|
||||
if (pcurrent->p2temp == 100)
|
||||
pcurrent->p2temp = nparam_CalcSelfTM (options->pnparm, ecoUnhashWord(pcurrent->p2->word, options->primer_length), 0) - 273.0;
|
||||
*/
|
||||
pcurrent->pcr.ampcount++;
|
||||
// fprintf(stderr,"%c%c W1 : %s direct : %c",
|
||||
// "bG"[(int)pcurrent->p1->good],
|
||||
// "bG"[(int)pcurrent->p2->good],
|
||||
// ecoUnhashWord(pcurrent->p1->word, options->primer_length),
|
||||
// "><"[(int)pcurrent->asdirect1]
|
||||
// );
|
||||
//
|
||||
// fprintf(stderr," W2 : %s direct : %c distance : %d (min/max/avg : %d/%d/%f) in/out: %d/%d %c (%d pairs)\n",
|
||||
// ecoUnhashWord(pcurrent->p2->word, options->primer_length),
|
||||
// "><"[(int)pcurrent->asdirect2],
|
||||
// distance,
|
||||
// pcurrent->mind,pcurrent->maxd,
|
||||
// (pcurrent->inexample) ? (float)pcurrent->sumd/pcurrent->inexample:0.0,
|
||||
// pcurrent->inexample,pcurrent->outexample,
|
||||
// " N"[(pcurrent->outexample+pcurrent->inexample)==1],
|
||||
// paircount
|
||||
//
|
||||
// );
|
||||
//
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
pairs->count=paircount;
|
||||
|
||||
}
|
136
src/libecoprimer/pairtree.c
Normal file
136
src/libecoprimer/pairtree.c
Normal file
@ -0,0 +1,136 @@
|
||||
/*
|
||||
* pairtree.c
|
||||
*
|
||||
* Created on: 7 mars 2009
|
||||
* Author: coissac
|
||||
*/
|
||||
|
||||
#include "ecoprimer.h"
|
||||
#include <search.h>
|
||||
|
||||
static void cleanpair(ppair_t pair);
|
||||
static void deletepairlist(ppairlist_t list);
|
||||
static int cmppair(const void* p1,const void*p2);
|
||||
|
||||
|
||||
static void cleanamplifiatlist(pamplifiacount_t list)
|
||||
{
|
||||
if (list->amplifias)
|
||||
ECOFREE(list->amplifias,
|
||||
"Free amplifia list");
|
||||
}
|
||||
|
||||
static void cleanpair(ppair_t pair)
|
||||
{
|
||||
cleanamplifiatlist(&(pair->pcr));
|
||||
}
|
||||
|
||||
static ppairlist_t newpairlist(ppairlist_t parent, size_t size)
|
||||
{
|
||||
ppairlist_t tmp;
|
||||
|
||||
tmp=ECOMALLOC(sizeof(pairlist_t)+sizeof(pair_t)*(size-1),
|
||||
"Cannot allocate new pair list");
|
||||
|
||||
tmp->pairslots=size;
|
||||
tmp->paircount=0;
|
||||
tmp->next=NULL;
|
||||
|
||||
if (parent)
|
||||
parent->next=(void*)tmp;
|
||||
|
||||
|
||||
return tmp;
|
||||
}
|
||||
|
||||
static void deletepairlist(ppairlist_t list)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
if (list)
|
||||
{
|
||||
if (list->next)
|
||||
{
|
||||
deletepairlist(list->next);
|
||||
list->next=NULL;
|
||||
}
|
||||
for (i=0; i < list->paircount; i++)
|
||||
cleanpair((list->pairs)+i);
|
||||
|
||||
ECOFREE(list,"Delete pair list");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static int cmppair(const void* p1,const void*p2)
|
||||
{
|
||||
ppair_t pr1,pr2;
|
||||
|
||||
pr1=(ppair_t)p1;
|
||||
pr2=(ppair_t)p2;
|
||||
|
||||
if (pr1->p1 < pr2->p1) return -1;
|
||||
if (pr1->p1 > pr2->p1) return 1;
|
||||
|
||||
if (pr1->asdirect1 < pr2->asdirect1) return -1;
|
||||
if (pr1->asdirect1 > pr2->asdirect1) return 1;
|
||||
|
||||
if (pr1->p2 < pr2->p2) return -1;
|
||||
if (pr1->p2 > pr2->p2) return 1;
|
||||
|
||||
if (pr1->asdirect2 < pr2->asdirect2) return -1;
|
||||
if (pr1->asdirect2 > pr2->asdirect2) return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
ppair_t pairintree (pair_t key,
|
||||
ppairtree_t pairlist)
|
||||
{
|
||||
if (!pairlist->tree)
|
||||
return NULL;
|
||||
|
||||
return *((ppair_t*)tsearch((const void *)(&key),
|
||||
&(pairlist->tree),
|
||||
cmppair
|
||||
));
|
||||
}
|
||||
|
||||
ppair_t insertpair(pair_t key,
|
||||
ppairtree_t list)
|
||||
{
|
||||
ppair_t current;
|
||||
ppair_t found;
|
||||
|
||||
if (list->last->paircount==list->last->pairslots)
|
||||
{
|
||||
list->last->next=newpairlist(list->last,100);
|
||||
list->last=list->last->next;
|
||||
}
|
||||
|
||||
current = list->last->pairs + list->last->paircount;
|
||||
*current=key;
|
||||
|
||||
found = *((ppair_t*)tsearch((const void *)current,
|
||||
&(list->tree),
|
||||
cmppair));
|
||||
if (found==current)
|
||||
list->last->paircount++;
|
||||
|
||||
return found;
|
||||
}
|
||||
|
||||
ppairtree_t initpairtree(ppairtree_t tree)
|
||||
{
|
||||
|
||||
if (!tree)
|
||||
tree = ECOMALLOC(sizeof(pairtree_t),"Cannot allocate pair tree");
|
||||
|
||||
tree->first=newpairlist(NULL,300);
|
||||
tree->last=tree->first;
|
||||
|
||||
tree->tree=NULL;
|
||||
tree->count=0;
|
||||
|
||||
return tree;
|
||||
}
|
100
src/libecoprimer/queue.c
Normal file
100
src/libecoprimer/queue.c
Normal file
@ -0,0 +1,100 @@
|
||||
/*
|
||||
* queue.c
|
||||
*
|
||||
* Created on: 14 nov. 2008
|
||||
* Author: coissac
|
||||
*/
|
||||
|
||||
#include "ecoprimer.h"
|
||||
|
||||
|
||||
|
||||
pqueue_t newQueue(pqueue_t queue, uint32_t size)
|
||||
{
|
||||
if (!queue)
|
||||
queue = ECOMALLOC(sizeof(queue_t),"Cannot allocate queue structure");
|
||||
|
||||
queue->size=0;
|
||||
|
||||
resizeQueue(queue,size);
|
||||
|
||||
return queue;
|
||||
|
||||
}
|
||||
|
||||
pqueue_t resizeQueue(pqueue_t queue, uint32_t size)
|
||||
{
|
||||
queue->pop=0;
|
||||
queue->push=0;
|
||||
queue->empty=TRUE;
|
||||
queue->full=FALSE;
|
||||
|
||||
if (!queue->size)
|
||||
{
|
||||
queue->count=ECOMALLOC(size * sizeof(uint32_t),
|
||||
"Cannot allocate count queue array"
|
||||
);
|
||||
queue->words=ECOMALLOC(size * sizeof(word_t),
|
||||
"Cannot allocate word queue array"
|
||||
);
|
||||
queue->size=size;
|
||||
}
|
||||
else if (size > queue->size)
|
||||
{
|
||||
queue->count=ECOREALLOC(queue->count,
|
||||
size * sizeof(uint32_t),
|
||||
"Cannot allocate count queue array"
|
||||
);
|
||||
queue->words=ECOREALLOC(queue->words,
|
||||
size * sizeof(word_t),
|
||||
"Cannot allocate word queue array"
|
||||
);
|
||||
|
||||
queue->size=size;
|
||||
}
|
||||
|
||||
return queue;
|
||||
}
|
||||
|
||||
pqueue_t cleanQueue(pqueue_t queue)
|
||||
{
|
||||
if (queue->size)
|
||||
{
|
||||
if (queue->count)
|
||||
ECOFREE(queue->count,"Free count queue");
|
||||
if (queue->words)
|
||||
ECOFREE(queue->words,"Free words queue");
|
||||
}
|
||||
|
||||
queue->size=0;
|
||||
|
||||
return queue;
|
||||
}
|
||||
|
||||
void push(pqueue_t queue, word_t word, uint32_t count)
|
||||
{
|
||||
ECO_ASSERT(!queue->full,"Queue is full");
|
||||
|
||||
queue->count[queue->push]=count;
|
||||
queue->words[queue->push]=word;
|
||||
|
||||
queue->push++;
|
||||
|
||||
if (queue->push==queue->size)
|
||||
queue->push=0;
|
||||
|
||||
queue->full=queue->push==queue->pop;
|
||||
queue->empty=FALSE;
|
||||
}
|
||||
|
||||
void pop(pqueue_t queue)
|
||||
{
|
||||
ECO_ASSERT(!queue->empty,"Queue is empty");
|
||||
queue->pop++;
|
||||
|
||||
if (queue->pop==queue->size)
|
||||
queue->pop=0;
|
||||
|
||||
queue->empty=queue->push==queue->pop;
|
||||
queue->full=FALSE;
|
||||
}
|
59
src/libecoprimer/readdnadb.c
Normal file
59
src/libecoprimer/readdnadb.c
Normal file
@ -0,0 +1,59 @@
|
||||
/*
|
||||
* readdnadb.c
|
||||
*
|
||||
* Created on: 7 nov. 2008
|
||||
* Author: coissac
|
||||
*/
|
||||
|
||||
#include "ecoprimer.h"
|
||||
|
||||
pecodnadb_t readdnadb(const char *name, ecotaxonomy_t *taxonomy, uint32_t *size,poptions_t options)
|
||||
{
|
||||
ecoseq_t *seq;
|
||||
uint32_t buffsize=100;
|
||||
pecodnadb_t db;
|
||||
|
||||
db = ECOMALLOC(buffsize*sizeof(ecoseq_t*),"I cannot allocate db memory");
|
||||
|
||||
|
||||
for(seq=ecoseq_iterator(name), *size=0;
|
||||
seq;
|
||||
seq=ecoseq_iterator(NULL)
|
||||
)
|
||||
{
|
||||
if (isExampleTaxon(taxonomy,seq->taxid,options) ||
|
||||
isCounterExampleTaxon(taxonomy,seq->taxid,options))
|
||||
{
|
||||
if (*size==buffsize)
|
||||
{
|
||||
buffsize*=2;
|
||||
db = ECOREALLOC(db,buffsize*sizeof(ecoseq_t*),"I cannot allocate db memory");
|
||||
}
|
||||
db[*size]=seq;
|
||||
(*size)++;
|
||||
}
|
||||
else
|
||||
{
|
||||
delete_ecoseq(seq);
|
||||
}
|
||||
};
|
||||
|
||||
db = ECOREALLOC(db,(*size)*sizeof(ecoseq_t*),"I cannot allocate db memory");
|
||||
|
||||
return db;
|
||||
}
|
||||
|
||||
|
||||
void printSeqTest(pecodnadb_t seqdb,uint32_t seqdbsize)
|
||||
{
|
||||
uint32_t i;
|
||||
char ch[11];
|
||||
ch [10] = '\0';
|
||||
|
||||
for (i=0; i < seqdbsize; i++)
|
||||
{
|
||||
strncpy (ch, seqdb[i]->SQ, 10);
|
||||
fprintf (stderr, "seq %d = %s\n", i, ch);
|
||||
}
|
||||
exit (0);
|
||||
}
|
265
src/libecoprimer/smothsort.c
Normal file
265
src/libecoprimer/smothsort.c
Normal file
@ -0,0 +1,265 @@
|
||||
/*
|
||||
* This file is part of the Sofia-SIP package
|
||||
*
|
||||
* Copyright (C) 2005 Nokia Corporation.
|
||||
*
|
||||
* Contact: Pekka Pessi <pekka.pessi@nokia.com>
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public License
|
||||
* as published by the Free Software Foundation; either version 2.1 of
|
||||
* the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
* 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
/**@file smoothsort.c
|
||||
* @brief Smoothsort implementation
|
||||
*
|
||||
* Smoothsort is a in-place sorting algorithm with performance of O(NlogN)
|
||||
* in worst case and O(n) in best case.
|
||||
*
|
||||
* @sa <a href="http://www.enterag.ch/hartwig/order/smoothsort.pdf">
|
||||
* "Smoothsort, an alternative for sorting in-situ", E.D. Dijkstra, EWD796a</a>,
|
||||
* <http://www.enterag.ch/hartwig/order/smoothsort.pdf>.
|
||||
*
|
||||
* @author Pekka Pessi <Pekka.Pessi@nokia.com>
|
||||
*/
|
||||
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <sys/types.h>
|
||||
#include <inttypes.h> /* <EC> add sto switch from size_t to uint32_t */
|
||||
|
||||
/** Description of current stretch */
|
||||
typedef struct {
|
||||
uint32_t b, c; /**< Leonardo numbers */
|
||||
unsigned long long p; /**< Concatenation codification */
|
||||
} stretch;
|
||||
|
||||
/** Description of array */
|
||||
typedef struct
|
||||
{
|
||||
void *m;
|
||||
int (*less)(void *m, uint32_t a, uint32_t b);
|
||||
void (*swap)(void *m, uint32_t a, uint32_t b);
|
||||
} array;
|
||||
|
||||
static inline uint32_t stretch_up(stretch s[1])
|
||||
{
|
||||
uint32_t next;
|
||||
|
||||
s->p >>= 1;
|
||||
|
||||
next = s->b + s->c + 1, s->c = s->b, s->b = next;
|
||||
|
||||
return next;
|
||||
}
|
||||
|
||||
static inline uint32_t stretch_down(stretch s[1], unsigned bit)
|
||||
{
|
||||
uint32_t next;
|
||||
|
||||
s->p <<= 1, s->p |= bit;
|
||||
|
||||
next = s->c, s->c = s->b - s->c - 1, s->b = next;
|
||||
|
||||
return next;
|
||||
}
|
||||
|
||||
#if DEBUG_SMOOTHSORT
|
||||
static char const *binary(unsigned long long p)
|
||||
{
|
||||
static char binary[65];
|
||||
int i;
|
||||
|
||||
if (p == 0)
|
||||
return "0";
|
||||
|
||||
binary[64] = 0;
|
||||
|
||||
for (i = 64; p; p >>= 1)
|
||||
binary[--i] = "01"[p & 1];
|
||||
|
||||
return binary + i;
|
||||
}
|
||||
#else
|
||||
#define DEBUG(x) ((void)0)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Sift the root of the stretch.
|
||||
*
|
||||
* The low values are sifted up (towards index 0) from root.
|
||||
*
|
||||
* @param array description of array to sort
|
||||
* @param r root of the stretch
|
||||
* @param s description of current stretch
|
||||
*/
|
||||
static void sift(array const *array, uint32_t r, stretch s)
|
||||
{
|
||||
while (s.b >= 3) {
|
||||
uint32_t r2 = r - s.b + s.c;
|
||||
|
||||
if (!array->less(array->m, r - 1, r2)) {
|
||||
r2 = r - 1;
|
||||
stretch_down(&s, 0);
|
||||
}
|
||||
|
||||
if (array->less(array->m, r2, r))
|
||||
break;
|
||||
|
||||
DEBUG(("\tswap(%p @%zu <=> @%zu)\n", array, r, r2));
|
||||
|
||||
array->swap(array->m, r, r2); r = r2;
|
||||
|
||||
stretch_down(&s, 0);
|
||||
}
|
||||
}
|
||||
|
||||
/** Trinkle the roots of the given stretches
|
||||
*
|
||||
* @param array description of array to sort
|
||||
* @param r root of the stretch
|
||||
* @param s description of stretches to concatenate
|
||||
*/
|
||||
static void trinkle(array const *array, uint32_t r, stretch s)
|
||||
{
|
||||
DEBUG(("trinkle(%p, %zu, (%u, %s))\n", array, r, s.b, binary(s.p)));
|
||||
|
||||
while (s.p != 0) {
|
||||
uint32_t r2, r3;
|
||||
|
||||
while ((s.p & 1) == 0)
|
||||
stretch_up(&s);
|
||||
|
||||
if (s.p == 1)
|
||||
break;
|
||||
|
||||
r3 = r - s.b;
|
||||
|
||||
if (array->less(array->m, r3, r))
|
||||
break;
|
||||
|
||||
s.p--;
|
||||
|
||||
if (s.b < 3) {
|
||||
DEBUG(("\tswap(%p @%zu <=> @%zu b=%u)\n", array, r, r3, s.b));
|
||||
array->swap(array->m, r, r3); r = r3;
|
||||
continue;
|
||||
}
|
||||
|
||||
r2 = r - s.b + s.c;
|
||||
|
||||
if (array->less(array->m, r2, r - 1)) {
|
||||
r2 = r - 1;
|
||||
stretch_down(&s, 0);
|
||||
}
|
||||
|
||||
if (array->less(array->m, r2, r3)) {
|
||||
DEBUG(("swap(%p [%zu]=[%zu])\n", array, r, r3));
|
||||
array->swap(array->m, r, r3); r = r3;
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG(("\tswap(%p @%zu <=> @%zu b=%u)\n", array, r, r2, s.b));
|
||||
array->swap(array->m, r, r2); r = r2;
|
||||
stretch_down(&s, 0);
|
||||
break;
|
||||
}
|
||||
|
||||
sift(array, r, s);
|
||||
}
|
||||
|
||||
/** Trinkles the stretches when the adjacent stretches are already trusty.
|
||||
*
|
||||
* @param array description of array to sort
|
||||
* @param r root of the stretch
|
||||
* @param stretch description of stretches to trinkle
|
||||
*/
|
||||
static void semitrinkle(array const *array, uint32_t r, stretch s)
|
||||
{
|
||||
uint32_t r1 = r - s.c;
|
||||
|
||||
DEBUG(("semitrinkle(%p, %zu, (%u, %s))\n", array, r, s.b, binary(s.p)));
|
||||
|
||||
if (array->less(array->m, r, r1)) {
|
||||
DEBUG(("\tswap(%p @%zu <=> @%zu b=%u)\n", array, r, r1, s.b));
|
||||
array->swap(array->m, r, r1);
|
||||
trinkle(array, r1, s);
|
||||
}
|
||||
}
|
||||
|
||||
/** Sort array using smoothsort.
|
||||
*
|
||||
* Sort @a N elements from array @a base starting with index @a r with smoothsort.
|
||||
*
|
||||
* @param base pointer to array
|
||||
* @param r lowest index to sort
|
||||
* @param N number of elements to sort
|
||||
* @param less comparison function returning nonzero if m[a] < m[b]
|
||||
* @param swap swapper function exchanging elements m[a] and m[b]
|
||||
*/
|
||||
void su_smoothsort(void *base, uint32_t r, uint32_t N,
|
||||
int (*less)(void *m, uint32_t a, uint32_t b),
|
||||
void (*swap)(void *m, uint32_t a, uint32_t b))
|
||||
{
|
||||
stretch s = { 1, 1, 1 };
|
||||
uint32_t q;
|
||||
|
||||
array const array[1] = {{ base, less, swap }};
|
||||
|
||||
assert(less && swap);
|
||||
|
||||
if (base == NULL || N <= 1 || less == NULL || swap == NULL)
|
||||
return;
|
||||
|
||||
DEBUG(("\nsmoothsort(%p, %zu)\n", array, nmemb));
|
||||
|
||||
for (q = 1; q != N; q++, r++, s.p++) {
|
||||
DEBUG(("loop0 q=%zu, b=%u, p=%s \n", q, s.b, binary(s.p)));
|
||||
|
||||
if ((s.p & 7) == 3) {
|
||||
sift(array, r, s), stretch_up(&s), stretch_up(&s);
|
||||
}
|
||||
else /* if ((s.p & 3) == 1) */ { assert((s.p & 3) == 1);
|
||||
if (q + s.c < N)
|
||||
sift(array, r, s);
|
||||
else
|
||||
trinkle(array, r, s);
|
||||
|
||||
while (stretch_down(&s, 0) > 1)
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
trinkle(array, r, s);
|
||||
|
||||
for (; q > 1; q--) {
|
||||
s.p--;
|
||||
|
||||
DEBUG(("loop1 q=%zu: b=%u p=%s\n", q, s.b, binary(s.p)));
|
||||
|
||||
if (s.b <= 1) {
|
||||
while ((s.p & 1) == 0)
|
||||
stretch_up(&s);
|
||||
--r;
|
||||
}
|
||||
else /* if b >= 3 */ {
|
||||
if (s.p) semitrinkle(array, r - (s.b - s.c), s);
|
||||
stretch_down(&s, 1);
|
||||
semitrinkle(array, --r, s);
|
||||
stretch_down(&s, 1);
|
||||
}
|
||||
}
|
||||
}
|
51
src/libecoprimer/sortmatch.c
Normal file
51
src/libecoprimer/sortmatch.c
Normal file
@ -0,0 +1,51 @@
|
||||
/*
|
||||
* sortmatch.c
|
||||
*
|
||||
* Created on: 15 d<>c. 2008
|
||||
* Author: coissac
|
||||
*/
|
||||
|
||||
/*
|
||||
* sortword.c
|
||||
*
|
||||
*
|
||||
* Created on: 6 nov. 2008
|
||||
* Author: coissac
|
||||
*/
|
||||
|
||||
#include "ecoprimer.h"
|
||||
#include <math.h>
|
||||
|
||||
void su_smoothsort(void *base, uint32_t r, uint32_t N,
|
||||
int (*less)(void *m, uint32_t a, uint32_t b),
|
||||
void (*swap)(void *m, uint32_t a, uint32_t b));
|
||||
|
||||
static int less(void *m, uint32_t a, uint32_t b);
|
||||
static void swap(void *m, uint32_t a, uint32_t b);
|
||||
|
||||
|
||||
void sortmatch(pprimermatch_t table,uint32_t N)
|
||||
{
|
||||
su_smoothsort((void*)table,0,N,less,swap);
|
||||
}
|
||||
|
||||
int less(void *m, uint32_t a, uint32_t b)
|
||||
{
|
||||
pprimermatch_t t;
|
||||
|
||||
t = (pprimermatch_t)m;
|
||||
|
||||
return t[a].position <= t[b].position;
|
||||
}
|
||||
|
||||
void swap(void *m, uint32_t a, uint32_t b)
|
||||
{
|
||||
primermatch_t tmp;
|
||||
pprimermatch_t t;
|
||||
|
||||
t = (pprimermatch_t)m;
|
||||
tmp = t[a];
|
||||
t[a]= t[b];
|
||||
t[b]= tmp;
|
||||
}
|
||||
|
44
src/libecoprimer/sortword.c
Normal file
44
src/libecoprimer/sortword.c
Normal file
@ -0,0 +1,44 @@
|
||||
/*
|
||||
* sortword.c
|
||||
*
|
||||
*
|
||||
* Created on: 6 nov. 2008
|
||||
* Author: coissac
|
||||
*/
|
||||
|
||||
#include "ecoprimer.h"
|
||||
#include <math.h>
|
||||
|
||||
void su_smoothsort(void *base, uint32_t r, uint32_t N,
|
||||
int (*less)(void *m, uint32_t a, uint32_t b),
|
||||
void (*swap)(void *m, uint32_t a, uint32_t b));
|
||||
|
||||
static int less(void *m, uint32_t a, uint32_t b);
|
||||
static void swap(void *m, uint32_t a, uint32_t b);
|
||||
|
||||
|
||||
void sortword(pword_t table,uint32_t N)
|
||||
{
|
||||
su_smoothsort((void*)table,0,N,less,swap);
|
||||
}
|
||||
|
||||
int less(void *m, uint32_t a, uint32_t b)
|
||||
{
|
||||
pword_t t;
|
||||
|
||||
t = (pword_t)m;
|
||||
|
||||
return WORD(t[a]) <= WORD(t[b]);
|
||||
}
|
||||
|
||||
void swap(void *m, uint32_t a, uint32_t b)
|
||||
{
|
||||
word_t tmp;
|
||||
pword_t t;
|
||||
|
||||
t = (pword_t)m;
|
||||
tmp = t[a];
|
||||
t[a]= t[b];
|
||||
t[b]= tmp;
|
||||
}
|
||||
|
264
src/libecoprimer/strictprimers.c
Normal file
264
src/libecoprimer/strictprimers.c
Normal file
@ -0,0 +1,264 @@
|
||||
/*
|
||||
* strictprimers.c
|
||||
*
|
||||
* Created on: 7 nov. 2008
|
||||
* Author: coissac
|
||||
*/
|
||||
|
||||
#define _GNU_SOURCE
|
||||
#include "ecoprimer.h"
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
#include <sys/resource.h>
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifndef RUSAGE_SELF
|
||||
#define RUSAGE_SELF 0
|
||||
#define RUSAGE_CHILDREN -1
|
||||
#endif
|
||||
|
||||
static double timeval_subtract (struct timeval *x, struct timeval *y);
|
||||
|
||||
|
||||
/* Subtract the `struct timeval' values X and Y,
|
||||
Return elapsed secondes as a double. */
|
||||
|
||||
double timeval_subtract (struct timeval *x, struct timeval *y)
|
||||
{
|
||||
struct timeval result;
|
||||
|
||||
/* Perform the carry for the later subtraction by updating y. */
|
||||
if (x->tv_usec < y->tv_usec) {
|
||||
int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1;
|
||||
y->tv_usec -= 1000000 * nsec;
|
||||
y->tv_sec += nsec;
|
||||
}
|
||||
if (x->tv_usec - y->tv_usec > 1000000) {
|
||||
int nsec = (x->tv_usec - y->tv_usec) / 1000000;
|
||||
y->tv_usec += 1000000 * nsec;
|
||||
y->tv_sec -= nsec;
|
||||
}
|
||||
|
||||
/* Compute the time remaining to wait.
|
||||
tv_usec is certainly positive. */
|
||||
result.tv_sec = x->tv_sec - y->tv_sec;
|
||||
result.tv_usec = x->tv_usec - y->tv_usec;
|
||||
|
||||
return (double)result.tv_sec + (double)result.tv_usec/1e6;
|
||||
}
|
||||
|
||||
pwordcount_t initCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,uint32_t seqQuorum,ecoseq_t *seq,int32_t *neededWords,uint32_t neededWordCount)
|
||||
{
|
||||
uint32_t i;
|
||||
uint32_t buffsize;
|
||||
//wordcount_t t;
|
||||
|
||||
if (!table)
|
||||
table = ECOMALLOC(sizeof(wordcount_t),"Cannot allocate memory for word count structure");
|
||||
|
||||
table->words=NULL;
|
||||
table->size =0;
|
||||
table->outseqcount=0;
|
||||
table->inseqcount=0;
|
||||
table->strictcount =0;
|
||||
|
||||
if (seq)
|
||||
{
|
||||
table->words = ecoHashSequence(NULL,wordsize,circular,doublestrand,seq,&buffsize,neededWords,neededWordCount,seqQuorum);
|
||||
table->size = ecoCompactHashSequence(table->words,buffsize);
|
||||
|
||||
table->inseqcount=1;
|
||||
table->strictcount =ECOMALLOC((table->size*sizeof(uint32_t)),
|
||||
"Cannot allocate memory for word count table"
|
||||
);
|
||||
|
||||
for (i=0; i < table->size; i++) table->strictcount[i]=1;
|
||||
}
|
||||
|
||||
return table;
|
||||
}
|
||||
|
||||
void addSeqToWordCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,uint32_t exampleCount,uint32_t seqQuorum,ecoseq_t *seq,int32_t *neededWords,uint32_t neededWordCount)
|
||||
{
|
||||
uint32_t buffersize;
|
||||
pword_t newtable;
|
||||
uint32_t newsize;
|
||||
uint32_t i;
|
||||
|
||||
buffersize = table->size + ecoWordCount(wordsize,circular,seq);
|
||||
|
||||
table->words = ECOREALLOC(table->words,buffersize*sizeof(word_t),
|
||||
"\n\nCannot allocate memory to extend word table" );
|
||||
|
||||
|
||||
newtable = table->words + table->size;
|
||||
|
||||
// DEBUG_LOG("Words = %x (%u) new = %x", table->words,table->size,newtable);
|
||||
|
||||
(void)ecoHashSequence(newtable,wordsize,circular,doublestrand,seq,&newsize,neededWords,neededWordCount,seqQuorum);
|
||||
// DEBUG_LOG("new seq wordCount : %d",newsize);
|
||||
|
||||
newsize = ecoCompactHashSequence(newtable,newsize);
|
||||
|
||||
// DEBUG_LOG("compacted wordCount : %d",newsize);
|
||||
buffersize = table->size + newsize;
|
||||
|
||||
// resize the count buffer
|
||||
|
||||
table->inseqcount++;
|
||||
|
||||
//fprintf (stderr, "\nOldAddress: %x", table->strictcount);
|
||||
table->strictcount = ECOREALLOC(table->strictcount,(buffersize+5000)*sizeof(uint32_t),
|
||||
"Cannot allocate memory to extend example word count table");
|
||||
//fprintf (stderr, " NewAddress: %x\n", table->strictcount);
|
||||
|
||||
for (i=table->size; i < buffersize; i++)
|
||||
table->strictcount[i]=1;
|
||||
|
||||
|
||||
|
||||
// Now we have to merge in situ the two tables
|
||||
|
||||
ecomerge(table,table->size,newsize,exampleCount - table->inseqcount,seqQuorum);
|
||||
// DEBUG_LOG("Dictionnary size : %d",table->size);
|
||||
|
||||
}
|
||||
|
||||
pwordcount_t lookforStrictPrimer(pecodnadb_t database, uint32_t seqdbsize,
|
||||
uint32_t exampleCount,poptions_t options)
|
||||
{
|
||||
struct rusage start;
|
||||
struct rusage usage;
|
||||
double seconde;
|
||||
char *logfilename;
|
||||
FILE *logfile;
|
||||
uint32_t i;
|
||||
bool_t first=TRUE;
|
||||
pwordcount_t strictprimers=NULL;
|
||||
uint64_t totallength=0;
|
||||
uint32_t sequenceQuorum = (uint32_t)floor((float)exampleCount * options->strict_quorum);
|
||||
int32_t *neededWords;
|
||||
uint32_t neededWordCount;
|
||||
|
||||
fprintf(stderr,"Filtering... ");
|
||||
|
||||
if (options->filtering)
|
||||
neededWords = filteringSeq(database,seqdbsize,exampleCount,options,&neededWordCount,(int32_t)sequenceQuorum);
|
||||
else
|
||||
{
|
||||
neededWordCount=0;
|
||||
neededWords=NULL;
|
||||
}
|
||||
|
||||
if (options->statistics)
|
||||
{
|
||||
asprintf(&logfilename,"ecoprimer_%d.log",getpid());
|
||||
logfile = fopen(logfilename,"w");
|
||||
fprintf(logfile,"# seq\tlength\tsize\ttime\tspeed\n");
|
||||
fclose(logfile);
|
||||
}
|
||||
|
||||
|
||||
fprintf(stderr," Primers should be at least present in %d/%d example sequences\n",sequenceQuorum,exampleCount);
|
||||
|
||||
strictprimers = initCountTable(NULL,options->primer_length,
|
||||
options->circular,
|
||||
options->doublestrand,
|
||||
0,
|
||||
NULL,NULL,0);
|
||||
|
||||
|
||||
getrusage(RUSAGE_SELF,&start);
|
||||
|
||||
for (i=0;i<seqdbsize;i++)
|
||||
{
|
||||
if (database[i]->isexample && database[i]->SQ_length > options->primer_length)
|
||||
{
|
||||
|
||||
if (first)
|
||||
{
|
||||
strictprimers = initCountTable(strictprimers,options->primer_length,
|
||||
options->circular,
|
||||
options->doublestrand,
|
||||
sequenceQuorum,
|
||||
database[i],neededWords,neededWordCount);
|
||||
first=FALSE;
|
||||
}
|
||||
else
|
||||
{
|
||||
uint32_t s;
|
||||
s = strictprimers->size;
|
||||
// DEBUG_LOG("stack size : %u",s);
|
||||
addSeqToWordCountTable(strictprimers,options->primer_length,
|
||||
options->circular,
|
||||
options->doublestrand,
|
||||
exampleCount,
|
||||
sequenceQuorum,
|
||||
database[i],neededWords,neededWordCount);
|
||||
};
|
||||
totallength+=database[i]->SQ_length;
|
||||
getrusage(RUSAGE_SELF,&usage);
|
||||
if (options->statistics)
|
||||
{
|
||||
asprintf(&logfilename,"ecoprimer_%d.log",getpid());
|
||||
logfile = fopen(logfilename,"a");
|
||||
seconde = timeval_subtract(&(usage.ru_utime),&(start.ru_utime)) +
|
||||
timeval_subtract(&(usage.ru_stime),&(start.ru_stime));
|
||||
fprintf(logfile,"%d\t%llu\t%lu\t%8.3f\t%8.3e\n",i,
|
||||
(long long unsigned)totallength,
|
||||
strictprimers->size*(sizeof(int64_t)+sizeof(int32_t)),
|
||||
seconde,seconde/(double)totallength);
|
||||
fclose(logfile);
|
||||
}
|
||||
}
|
||||
else
|
||||
strictprimers->outseqcount++;
|
||||
|
||||
fprintf(stderr," Indexed sequences %5d/%5d : considered words %-10llu \r",
|
||||
(int32_t)i+1,(int32_t)seqdbsize,
|
||||
(long long unsigned)strictprimers->size);
|
||||
|
||||
// DEBUG_LOG("First word : %s ==> %d",ecoUnhashWord(strictprimers->words[0],18),strictprimers->incount[0])
|
||||
// DEBUG_LOG("Second word : %s ==> %d",ecoUnhashWord(strictprimers->words[1],18),strictprimers->incount[1])
|
||||
}
|
||||
|
||||
strictprimers->strictcount = ECOREALLOC(strictprimers->strictcount,
|
||||
sizeof(uint32_t)*strictprimers->size,
|
||||
"Cannot reallocate strict primer count table");
|
||||
strictprimers->words = ECOREALLOC(strictprimers->words,
|
||||
sizeof(word_t)*strictprimers->size,
|
||||
"Cannot reallocate strict primer table");
|
||||
|
||||
if (neededWords)
|
||||
ECOFREE(neededWords,"Clean needed word table");
|
||||
|
||||
return strictprimers;
|
||||
}
|
||||
|
||||
uint32_t filterMultiStrictPrimer(pwordcount_t strictprimers)
|
||||
{
|
||||
uint32_t i;
|
||||
uint32_t w;
|
||||
|
||||
for (i=0,w=0;i < strictprimers->size;i++)
|
||||
{
|
||||
if (w < i)
|
||||
{
|
||||
strictprimers->words[w]=strictprimers->words[i];
|
||||
strictprimers->strictcount[w]=strictprimers->strictcount[i];
|
||||
}
|
||||
if (! ISMULTIWORD(strictprimers->words[w]))
|
||||
w++;
|
||||
}
|
||||
|
||||
strictprimers->size=w;
|
||||
strictprimers->strictcount = ECOREALLOC(strictprimers->strictcount,
|
||||
sizeof(uint32_t)*strictprimers->size,
|
||||
"Cannot reallocate strict primer count table");
|
||||
strictprimers->words = ECOREALLOC(strictprimers->words,
|
||||
sizeof(word_t)*strictprimers->size,
|
||||
"Cannot reallocate strict primer table");
|
||||
|
||||
return w;
|
||||
}
|
378
src/libecoprimer/taxstats.c
Normal file
378
src/libecoprimer/taxstats.c
Normal file
@ -0,0 +1,378 @@
|
||||
/*
|
||||
* taxstats.c
|
||||
*
|
||||
* Created on: 12 mars 2009
|
||||
* Author: coissac
|
||||
*/
|
||||
|
||||
#include <search.h>
|
||||
//void tdestroy (void *root, void (*free_node)(void *nodep));
|
||||
|
||||
#include "ecoprimer.h"
|
||||
|
||||
static int cmptaxon(const void *t1, const void* t2);
|
||||
|
||||
void **tree_root = NULL;
|
||||
int delete_passes = 0;
|
||||
|
||||
void delete_twalkaction (const void *node, VISIT order, int level)
|
||||
{
|
||||
switch (order)
|
||||
{
|
||||
case preorder:
|
||||
delete_passes++;
|
||||
break;
|
||||
case postorder:
|
||||
delete_passes++;
|
||||
break;
|
||||
case endorder:
|
||||
delete_passes++;
|
||||
break;
|
||||
case leaf:
|
||||
if (tree_root)
|
||||
tdelete (node, tree_root,cmptaxon);
|
||||
delete_passes++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void free_tree_nodes (void *tree)
|
||||
{
|
||||
while (1)
|
||||
{
|
||||
delete_passes = 0;
|
||||
twalk (tree, delete_twalkaction);
|
||||
if (delete_passes <= 1) break;
|
||||
}
|
||||
}
|
||||
|
||||
static int cmptaxon(const void *t1, const void* t2)
|
||||
{
|
||||
const size_t taxid1=(size_t)t1;
|
||||
const size_t taxid2=(size_t)t2;
|
||||
|
||||
// fprintf(stderr,"==> counted taxid1 : %d\n",taxid1);
|
||||
// fprintf(stderr,"==> counted taxid2 : %d\n",taxid2);
|
||||
|
||||
if (taxid1 < taxid2)
|
||||
return -1;
|
||||
if (taxid1 > taxid2)
|
||||
return +1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t counttaxon(int32_t taxid)
|
||||
{
|
||||
static void* taxontree=NULL;
|
||||
static int32_t taxoncount=0;
|
||||
|
||||
// fprintf(stderr,"counted taxid : %d taxontree %p\n",taxid,taxontree);
|
||||
|
||||
if (taxid==-1)
|
||||
{
|
||||
if (taxontree)
|
||||
{
|
||||
tree_root = (void **)&taxontree;
|
||||
//free_tree_nodes (taxontree);
|
||||
ECOFREE(taxontree,"Free taxon tree");
|
||||
tree_root = NULL;
|
||||
}
|
||||
taxontree=NULL;
|
||||
taxoncount=0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
if ((taxid > 0) && ((!taxontree) || (!tfind((void*)((size_t)taxid),&taxontree,cmptaxon))))
|
||||
{
|
||||
tsearch((void*)((size_t)taxid),&taxontree,cmptaxon);
|
||||
taxoncount++;
|
||||
}
|
||||
return taxoncount;
|
||||
}
|
||||
|
||||
int32_t getrankdbstats(pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *taxonomy,
|
||||
poptions_t options)
|
||||
{
|
||||
|
||||
uint32_t i;
|
||||
ecotx_t *taxon;
|
||||
ecotx_t *tmptaxon;
|
||||
|
||||
counttaxon(-1);
|
||||
options->intaxa = 0;
|
||||
|
||||
for (i=0;i<seqdbsize;i++)
|
||||
{
|
||||
taxon = &(taxonomy->taxons->taxon[seqdb[i]->taxid]);
|
||||
seqdb[i]->isexample=isExampleTaxon(taxonomy,seqdb[i]->taxid,options);
|
||||
|
||||
tmptaxon = eco_findtaxonatrank(taxon,
|
||||
options->taxonrankidx);
|
||||
|
||||
// fprintf(stderr,"Taxid : %d %p\n",taxon->taxid,tmptaxon);
|
||||
|
||||
if (tmptaxon)
|
||||
{
|
||||
// fprintf(stderr,"orig : %d trans : %d\n",taxon->taxid,
|
||||
// tmptaxon->taxid);
|
||||
|
||||
seqdb[i]->ranktaxonid=tmptaxon->taxid;
|
||||
if (seqdb[i]->isexample)
|
||||
options->intaxa = counttaxon(tmptaxon->taxid);
|
||||
}
|
||||
else
|
||||
seqdb[i]->ranktaxonid=-1;
|
||||
}
|
||||
|
||||
counttaxon(-1);
|
||||
options->outtaxa = 0;
|
||||
|
||||
for (i=0;i<seqdbsize;i++)
|
||||
{
|
||||
if (seqdb[i]->ranktaxonid>=0 && !seqdb[i]->isexample)
|
||||
options->outtaxa = counttaxon(seqdb[i]->ranktaxonid);
|
||||
}
|
||||
|
||||
return options->outtaxa + options->intaxa;
|
||||
}
|
||||
|
||||
|
||||
float taxonomycoverage(ppair_t pair, poptions_t options, pecodnadb_t seqdb,uint32_t seqdbsize)
|
||||
{
|
||||
int32_t seqcount;
|
||||
int32_t i;
|
||||
int32_t incount=0;
|
||||
int32_t outcount=0;
|
||||
uint32_t j;
|
||||
|
||||
|
||||
memset (pair->coveredSeqs, 0, seqdbsize*sizeof (int));
|
||||
seqcount=pair->pcr.ampcount;
|
||||
|
||||
counttaxon(-1);
|
||||
for (i=0; i < seqcount; i++)
|
||||
if (pair->pcr.amplifias[i].sequence->isexample
|
||||
&& pair->pcr.amplifias[i].sequence->ranktaxonid > 0 )
|
||||
{
|
||||
incount = counttaxon(pair->pcr.amplifias[i].sequence->ranktaxonid);
|
||||
|
||||
for (j=0; j<seqdbsize; j++)
|
||||
if (pair->pcr.amplifias[i].sequence == seqdb[j])
|
||||
{pair->coveredSeqs[j] = 1; break;}
|
||||
}
|
||||
|
||||
counttaxon(-1);
|
||||
for (i=0; i < seqcount; i++)
|
||||
if (!pair->pcr.amplifias[i].sequence->isexample
|
||||
&& pair->pcr.amplifias[i].sequence->ranktaxonid)
|
||||
outcount = counttaxon(pair->pcr.amplifias[i].sequence->ranktaxonid);
|
||||
|
||||
|
||||
pair->intaxa=incount;
|
||||
pair->outtaxa=outcount;
|
||||
pair->bc=(float)incount/options->intaxa;
|
||||
return pair->bc;
|
||||
}
|
||||
|
||||
/*
|
||||
static int cmpamp(const void *ampf1, const void* ampf2)
|
||||
{
|
||||
int i;
|
||||
int j = 0;
|
||||
int incr = 1;
|
||||
char cd1;
|
||||
char cd2;
|
||||
int chd = 0;
|
||||
int len = 0;
|
||||
|
||||
pamptotaxon_t pampf1 = (pamptotaxon_t) ampf1;
|
||||
pamptotaxon_t pampf2 = (pamptotaxon_t) ampf2;
|
||||
|
||||
|
||||
if (pampf1->strand != pampf2->strand)
|
||||
{
|
||||
incr = -1;
|
||||
j = pampf1->length - 1;
|
||||
|
||||
if (pampf2->strand)
|
||||
{
|
||||
pampf1 = (pamptotaxon_t) ampf2;
|
||||
pampf2 = (pamptotaxon_t) ampf1;
|
||||
chd = 1;
|
||||
}
|
||||
//j = pampf2->length - 1; should have been here and pampf2 instead of pampf1?
|
||||
}
|
||||
|
||||
len = (pampf1->length <= pampf2->length)? pampf1->length: pampf2->length;
|
||||
|
||||
for (i = 0; i < len; i++, j += incr)
|
||||
{
|
||||
cd1 = pampf1->amplifia[i];
|
||||
if (incr == -1)
|
||||
cd2 = ecoComplementChar(pampf2->amplifia[j]);
|
||||
else
|
||||
cd2 = pampf2->amplifia[j];
|
||||
|
||||
if (cd1 < cd2) return chd ? 1: -1;
|
||||
if (cd2 < cd1) return chd ? -1: 1;
|
||||
}
|
||||
|
||||
if (pampf1->length > pampf2->length) return chd ? -1: 1;
|
||||
if (pampf2->length > pampf1->length) return chd ? 1: -1;
|
||||
|
||||
return 0;
|
||||
}*/
|
||||
|
||||
|
||||
static int cmpamp(const void *ampf1, const void* ampf2)
|
||||
{
|
||||
int i;
|
||||
char cd1;
|
||||
char cd2;
|
||||
int len = 0;
|
||||
char *ch1;
|
||||
char *ch2;
|
||||
int incr1;
|
||||
int incr2;
|
||||
|
||||
pamptotaxon_t pampf1 = (pamptotaxon_t) ampf1;
|
||||
pamptotaxon_t pampf2 = (pamptotaxon_t) ampf2;
|
||||
|
||||
ch1 = pampf1->amplifia;
|
||||
ch2 = pampf2->amplifia;
|
||||
|
||||
incr1 = 1;
|
||||
incr2 = 1;
|
||||
|
||||
if (!pampf1->strand)
|
||||
incr1 = -1;
|
||||
if (!pampf2->strand)
|
||||
incr2 = -1;
|
||||
|
||||
len = (pampf1->length <= pampf2->length)? pampf1->length: pampf2->length;
|
||||
for (i = 0; i < len; i++)
|
||||
{
|
||||
cd1 = *ch1;
|
||||
if (incr1 == -1)
|
||||
cd1 = ecoComplementChar(*ch1);
|
||||
|
||||
cd2 = *ch2;
|
||||
if (incr2 == -1)
|
||||
cd2 = ecoComplementChar(*ch2);
|
||||
|
||||
if (cd1 < cd2) return -1;
|
||||
if (cd2 < cd1) return 1;
|
||||
|
||||
ch1 += incr1;
|
||||
ch2 += incr2;
|
||||
}
|
||||
|
||||
if (pampf1->length > pampf2->length) return 1;
|
||||
if (pampf2->length > pampf1->length) return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void twalkaction (const void *node, VISIT order, int level)
|
||||
{
|
||||
int32_t *taxid = (int32_t*)node;
|
||||
//const size_t taxid=(size_t)node;
|
||||
//printf ("\t%d:%p, ", *taxid, node);
|
||||
counttaxon(*taxid);
|
||||
}
|
||||
|
||||
int32_t gtxid;
|
||||
void twalkaction2 (const void *node, VISIT order, int level)
|
||||
{
|
||||
int32_t *pt = (int32_t *) node;
|
||||
gtxid = *pt;
|
||||
}
|
||||
|
||||
void taxonomyspecificity (ppair_t pair, pecodnadb_t seqdb,uint32_t seqdbsize)
|
||||
{
|
||||
uint32_t i, j;
|
||||
uint32_t ampfindex = 0;
|
||||
int32_t taxid;
|
||||
uint32_t wellidentifiedcount;
|
||||
|
||||
void *ampftree = NULL;
|
||||
pamptotaxon_t pcurrentampf;
|
||||
pamptotaxon_t *ptmp;
|
||||
|
||||
pamptotaxon_t ampfwithtaxtree = ECOMALLOC(sizeof(amptotaxon_t) * pair->pcr.ampcount,"Cannot allocate amplifia tree");
|
||||
|
||||
for (i = 0; i < pair->pcr.ampcount; i++)
|
||||
{
|
||||
/*populate taxon ids tree against each unique amplifia
|
||||
i.e set of taxon ids for each amplifia*/
|
||||
if (pair->pcr.amplifias[i].sequence->isexample)
|
||||
{
|
||||
ampfwithtaxtree[ampfindex].amplifia = pair->pcr.amplifias[i].amplifia;
|
||||
ampfwithtaxtree[ampfindex].strand = pair->pcr.amplifias[i].strand;
|
||||
ampfwithtaxtree[ampfindex].length = pair->pcr.amplifias[i].length;
|
||||
pcurrentampf = &fwithtaxtree[ampfindex];
|
||||
taxid = pair->pcr.amplifias[i].sequence->ranktaxonid;
|
||||
ptmp = tfind((const void*)pcurrentampf, &ftree, cmpamp);
|
||||
if (ptmp == NULL)
|
||||
{
|
||||
pcurrentampf = &fwithtaxtree[ampfindex];
|
||||
tsearch((void*)pcurrentampf,&ftree,cmpamp);
|
||||
ampfindex++;
|
||||
}
|
||||
else
|
||||
pcurrentampf = *ptmp;
|
||||
|
||||
if (tfind((void*)((size_t)taxid), &(pcurrentampf->taxontree), cmptaxon) == NULL)
|
||||
{
|
||||
pcurrentampf->taxoncount++;
|
||||
tsearch((void*)((size_t)taxid),&(pcurrentampf->taxontree),cmptaxon);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
memset (pair->wellIdentifiedSeqs, 0, seqdbsize*sizeof (int));
|
||||
//counttaxon(-1);
|
||||
for (i = 0; i < ampfindex; i++)
|
||||
{
|
||||
if (ampfwithtaxtree[i].taxoncount > 1)
|
||||
{
|
||||
//printf ("\nampfwithtaxtree[i].taxoncount: %d\n", ampfwithtaxtree[i].taxoncount);
|
||||
//twalk(ampfwithtaxtree[i].taxontree, twalkaction);
|
||||
}
|
||||
//TR 5/9/10 - added code for well identified seqs
|
||||
else if(ampfwithtaxtree[i].taxoncount == 1) /*well identified*/
|
||||
{
|
||||
gtxid = -1;
|
||||
twalk(ampfwithtaxtree[i].taxontree, twalkaction2);
|
||||
|
||||
if (gtxid != -1)
|
||||
{
|
||||
for (j = 0; j < seqdbsize; j++)
|
||||
if (seqdb[j]->ranktaxonid == gtxid
|
||||
&& seqdb[j]->isexample
|
||||
&&(pair->p1->directCount[j] > 0
|
||||
|| pair->p1->reverseCount[j] > 0)
|
||||
&& (pair->p2->directCount[j] > 0
|
||||
|| pair->p2->reverseCount[j] > 0))
|
||||
{
|
||||
pair->wellIdentifiedSeqs[j] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
//printf ("\n");
|
||||
counttaxon(-1);
|
||||
wellidentifiedcount = 0;
|
||||
for (j = 0; j < seqdbsize; j++)
|
||||
if (pair->wellIdentifiedSeqs[j] == 1)
|
||||
counttaxon(seqdb[j]->ranktaxonid);
|
||||
wellidentifiedcount = counttaxon(-2);
|
||||
//pair->notwellidentifiedtaxa = counttaxon(-2);
|
||||
pair->notwellidentifiedtaxa = (pair->intaxa-wellidentifiedcount); //counttaxon(-2);
|
||||
//pair->bs = ((float)pair->intaxa - (float)pair->notwellidentifiedtaxa) / pair->intaxa;
|
||||
pair->bs = ((float)wellidentifiedcount) / (float)pair->intaxa;
|
||||
|
||||
ECOFREE (ampfwithtaxtree, "Free amplifia table");
|
||||
|
||||
}
|
23
src/libthermo/Makefile
Normal file
23
src/libthermo/Makefile
Normal file
@ -0,0 +1,23 @@
|
||||
|
||||
SOURCES = nnparams.c \
|
||||
thermostats.c
|
||||
|
||||
SRCS=$(SOURCES)
|
||||
|
||||
OBJECTS= $(patsubst %.c,%.o,$(SOURCES))
|
||||
|
||||
LIBFILE= libthermo.a
|
||||
RANLIB= ranlib
|
||||
|
||||
|
||||
include ../global.mk
|
||||
|
||||
|
||||
all: $(LIBFILE)
|
||||
|
||||
clean:
|
||||
rm -rf $(OBJECTS) $(LIBFILE)
|
||||
|
||||
$(LIBFILE): $(OBJECTS)
|
||||
ar -cr $@ $?
|
||||
$(RANLIB) $@
|
600
src/libthermo/nnparams.c
Normal file
600
src/libthermo/nnparams.c
Normal file
@ -0,0 +1,600 @@
|
||||
/*
|
||||
* nnparams.cpp
|
||||
* PHunterLib
|
||||
*
|
||||
* Nearest Neighbor Model / Parameters
|
||||
*
|
||||
* Created by Tiayyba Riaz on 7/2/09.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <memory.h>
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include"nnparams.h"
|
||||
|
||||
|
||||
double forbidden_entropy;
|
||||
|
||||
|
||||
double nparam_GetInitialEntropy(PNNParams nparm)
|
||||
{
|
||||
return -5.9f+nparm->rlogc;
|
||||
}
|
||||
|
||||
|
||||
//Retrieve Enthalpy for given NN-Pair from parameter table
|
||||
double nparam_GetEnthalpy(PNNParams nparm, char x0, char x1, char y0, char y1)
|
||||
{
|
||||
return ndH(x0,x1,y0,y1); //xx, yx are already numbers
|
||||
}
|
||||
|
||||
|
||||
//Retrieve Entropy for given NN-Pair from parameter table
|
||||
double nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1)
|
||||
{
|
||||
//xx and yx are already numbers
|
||||
char nx0=x0;//nparam_convertNum(x0);
|
||||
char nx1=x1;//nparam_convertNum(x1);
|
||||
char ny0=y0;//nparam_convertNum(y0);
|
||||
char ny1=y1;//nparam_convertNum(y1);
|
||||
double answer = ndS(nx0,nx1,ny0,ny1);
|
||||
/*Salt correction Santalucia*/
|
||||
if (nparm->saltMethod == SALT_METHOD_SANTALUCIA) {
|
||||
if(nx0!=5 && 1<= nx1 && nx1<=4) {
|
||||
answer += 0.5*nparm->kfac;
|
||||
}
|
||||
if(ny1!=5 && 1<= ny0 && ny0<=4) {
|
||||
answer += 0.5*nparm->kfac;
|
||||
}
|
||||
}
|
||||
/*Salt correction Owczarzy*/
|
||||
if (nparm->saltMethod == SALT_METHOD_OWCZARZY) {
|
||||
double logk = log(nparm->kplus);
|
||||
answer += ndH(nx0,nx1,ny0,ny1)*((4.29 * nparm->gcContent-3.95)*0.00001*logk+ 0.0000094*logk*logk);
|
||||
}
|
||||
return answer;
|
||||
}
|
||||
|
||||
/* PURPOSE: Return melting temperature TM for given entropy and enthalpy
|
||||
* Assuming a one-state transition and using the formula
|
||||
* TM = dH / (dS + R ln(Ct/4))
|
||||
* entropy = dS + R ln Ct/4 (must already be included!)
|
||||
* enthaklpy = dH
|
||||
* where
|
||||
* dH = enthalpy
|
||||
* dS = entropy
|
||||
* R = Boltzmann factor
|
||||
* Ct = Strand Concentration
|
||||
*
|
||||
* PARAMETERS:
|
||||
* entrypy and enthalpy
|
||||
*
|
||||
* RETURN VALUE:
|
||||
* temperature
|
||||
*/
|
||||
|
||||
double nparam_CalcTM(double entropy,double enthalpy)
|
||||
{
|
||||
double tm = 0; // absolute zero - return if model fails!
|
||||
if (enthalpy>=forbidden_enthalpy) //||(entropy==-cfact))
|
||||
return 0;
|
||||
if (entropy<0) // avoid division by zero and model errors!
|
||||
{
|
||||
tm = enthalpy/entropy;// - kfac; //LKFEB
|
||||
if (tm<0)
|
||||
return 0;
|
||||
}
|
||||
return tm;
|
||||
}
|
||||
|
||||
|
||||
void nparam_InitParams(PNNParams nparm, double c1, double c2, double kp, int sm)
|
||||
{
|
||||
nparm->Ct1 = c1;
|
||||
nparm->Ct2 = c2;
|
||||
nparm->kplus = kp;
|
||||
int maxCT = 1;
|
||||
if(nparm->Ct2 > nparm->Ct1)
|
||||
{
|
||||
maxCT = 2;
|
||||
}
|
||||
double ctFactor;
|
||||
if(nparm->Ct1 == nparm->Ct2)
|
||||
{
|
||||
ctFactor = nparm->Ct1/2;
|
||||
}
|
||||
else if (maxCT == 1)
|
||||
{
|
||||
ctFactor = nparm->Ct1-nparm->Ct2/2;
|
||||
}
|
||||
else
|
||||
{
|
||||
ctFactor = nparm->Ct2-nparm->Ct1/2;
|
||||
}
|
||||
nparm->rlogc = R * log(ctFactor);
|
||||
forbidden_entropy = nparm->rlogc;
|
||||
nparm->kfac = 0.368 * log (nparm->kplus);
|
||||
nparm->saltMethod = sm;
|
||||
int x,y,a,b; // variables used as counters...
|
||||
|
||||
// Set all parameters to zero!
|
||||
memset(nparm->dH,0,sizeof(nparm->dH));
|
||||
memset(nparm->dS,0,sizeof(nparm->dS));
|
||||
|
||||
// Set all X-/Y-, -X/Y- and X-/-Y so, that TM will be VERY small!
|
||||
for (x=1;x<=4;x++)
|
||||
{
|
||||
for (y=1;y<=4;y++)
|
||||
{
|
||||
ndH(0,x,y,0)=forbidden_enthalpy;
|
||||
ndS(0,x,y,0)=forbidden_entropy;
|
||||
ndH(x,0,0,y)=forbidden_enthalpy;
|
||||
ndS(x,0,0,y)=forbidden_entropy;
|
||||
ndH(x,0,y,0)=forbidden_enthalpy;
|
||||
ndS(x,0,y,0)=forbidden_entropy;
|
||||
// forbid X-/Y$ and X$/Y- etc., i.e. terminal must not be paired with gap!
|
||||
ndH(x,5,y,0)=forbidden_enthalpy;
|
||||
ndS(x,5,y,0)=forbidden_entropy;
|
||||
ndH(x,0,y,5)=forbidden_enthalpy;
|
||||
ndS(x,0,y,5)=forbidden_entropy;
|
||||
ndH(5,x,0,y)=forbidden_enthalpy;
|
||||
ndS(5,x,0,y)=forbidden_entropy;
|
||||
ndH(0,x,5,y)=forbidden_enthalpy;
|
||||
ndS(0,x,5,y)=forbidden_entropy;
|
||||
// forbid X$/-Y etc.
|
||||
ndH(x,5,0,y)=forbidden_enthalpy;
|
||||
ndS(x,5,0,y)=forbidden_entropy;
|
||||
ndH(x,0,5,y)=forbidden_enthalpy;
|
||||
ndS(x,0,5,y)=forbidden_entropy;
|
||||
ndH(5,x,y,0)=forbidden_enthalpy;
|
||||
ndS(5,x,y,0)=forbidden_entropy;
|
||||
ndH(0,x,y,5)=forbidden_enthalpy;
|
||||
ndS(0,x,y,5)=forbidden_entropy;
|
||||
|
||||
}
|
||||
// also, forbid x-/-- and --/x-, i.e. no two inner gaps paired
|
||||
ndH(x,0,0,0)=forbidden_enthalpy;
|
||||
ndS(x,0,0,0)=forbidden_entropy;
|
||||
ndH(0,0,x,0)=forbidden_enthalpy;
|
||||
ndS(0,0,x,0)=forbidden_entropy;
|
||||
// x-/-$
|
||||
ndH(x,0,0,5)=forbidden_enthalpy;
|
||||
ndS(x,0,0,5)=forbidden_entropy;
|
||||
ndH(5,0,0,x)=forbidden_enthalpy;
|
||||
ndS(5,0,0,x)=forbidden_entropy;
|
||||
ndH(0,5,x,0)=forbidden_enthalpy;
|
||||
ndS(x,0,0,5)=forbidden_entropy;
|
||||
ndH(0,x,5,0)=forbidden_enthalpy;
|
||||
ndS(0,x,5,0)=forbidden_entropy;
|
||||
}
|
||||
// forbid --/--
|
||||
ndH(0,0,0,0)=forbidden_enthalpy;
|
||||
ndS(0,0,0,0)=forbidden_entropy;
|
||||
|
||||
ndH(5,0,0,0)=forbidden_enthalpy;
|
||||
ndS(5,0,0,0)=forbidden_entropy;
|
||||
ndH(0,0,5,0)=forbidden_enthalpy;
|
||||
ndS(0,0,5,0)=forbidden_entropy;
|
||||
ndH(0,5,5,0)=forbidden_enthalpy;
|
||||
ndS(0,5,5,0)=forbidden_entropy;
|
||||
|
||||
// Interior loops (double Mismatches)
|
||||
#define iloop_entropy -0.97f
|
||||
#define iloop_enthalpy 0.0f
|
||||
for (x=1; x<=4; x++)
|
||||
for (y=1; y<=4; y++)
|
||||
for (a=1; a<=4; a++)
|
||||
for (b=1; b<=4; b++)
|
||||
// AT and CG pair, and as A=1, C=2, G=3, T=4 this means
|
||||
// we have Watson-Crick pairs if (x+a==5) and (y+b)==5.
|
||||
if (!((x+a==5)||(y+b==5)))
|
||||
{
|
||||
// No watson-crick-pair, i.e. double mismatch!
|
||||
// set enthalpy/entropy to loop expansion!
|
||||
ndH(x,y,a,b) = iloop_enthalpy;
|
||||
ndS(x,y,a,b) = iloop_entropy;
|
||||
}
|
||||
|
||||
// xy/-- and --/xy (Bulge Loops of size > 1)
|
||||
#define bloop_entropy -1.3f
|
||||
#define bloop_enthalpy 0.0f
|
||||
for (x=1; x<=4; x++)
|
||||
for (y=1; y<=4; y++)
|
||||
{
|
||||
ndH(x,y,0,0) = bloop_enthalpy;
|
||||
ndS(x,y,0,0) = bloop_entropy;
|
||||
ndH(0,0,x,y) = bloop_enthalpy;
|
||||
ndS(0,0,x,y) = bloop_entropy;
|
||||
}
|
||||
|
||||
// x-/ya abd xa/y- as well as -x/ay and ax/-y
|
||||
// bulge opening and closing parameters with
|
||||
// adjacent matches / mismatches
|
||||
// obulge_mism and cbulge_mism chosen so high to avoid
|
||||
// AAAAAAAAA
|
||||
// T--G----T
|
||||
// being better than
|
||||
// AAAAAAAAA
|
||||
// TG------T
|
||||
#define obulge_match_H (-2.66f * 1000)
|
||||
#define obulge_match_S -14.22f
|
||||
#define cbulge_match_H (-2.66f * 1000)
|
||||
#define cbulge_match_S -14.22f
|
||||
#define obulge_mism_H (0.0f * 1000)
|
||||
#define obulge_mism_S -6.45f
|
||||
#define cbulge_mism_H 0.0f
|
||||
#define cbulge_mism_S -6.45f
|
||||
for (x=1; x<=4; x++)
|
||||
for (y=1; y<=4; y++)
|
||||
for (a=1; a<=4; a++)
|
||||
{
|
||||
if (x+y==5) // other base pair matches!
|
||||
{
|
||||
ndH(x,0,y,a)=obulge_match_H; // bulge opening
|
||||
ndS(x,0,y,a)=obulge_match_S;
|
||||
ndH(x,a,y,0)=obulge_match_H;
|
||||
ndS(x,a,y,0)=obulge_match_S;
|
||||
ndH(0,x,a,y)=cbulge_match_H; // bulge closing
|
||||
ndS(0,x,a,y)=cbulge_match_S;
|
||||
ndH(a,x,0,y)=cbulge_match_H;
|
||||
ndS(a,x,0,y)=cbulge_match_S;
|
||||
}
|
||||
else
|
||||
{ // mismatch in other base pair!
|
||||
ndH(x,0,y,a)=obulge_mism_H; // bulge opening
|
||||
ndS(x,0,y,a)=obulge_mism_S;
|
||||
ndH(x,a,y,0)=obulge_mism_H;
|
||||
ndS(x,a,y,0)=obulge_mism_S;
|
||||
ndH(0,x,a,y)=cbulge_mism_H; // bulge closing
|
||||
ndS(0,x,a,y)=cbulge_mism_S;
|
||||
ndH(a,x,0,y)=cbulge_mism_H;
|
||||
ndS(a,x,0,y)=cbulge_mism_S;
|
||||
}
|
||||
}
|
||||
|
||||
// Watson-Crick pairs (note that only ten are unique, as obviously
|
||||
// 5'-AG-3'/3'-TC-5' = 5'-CT-3'/3'-GA-5' etc.
|
||||
ndH(1,1,4,4)=-7.6f*1000; ndS(1,1,4,4)=-21.3f; // AA/TT 04
|
||||
ndH(1,2,4,3)=-8.4f*1000; ndS(1,2,4,3)=-22.4f; // AC/TG adapted GT/CA
|
||||
ndH(1,3,4,2)=-7.8f*1000; ndS(1,3,4,2)=-21.0f; // AG/TC adapted CT/GA
|
||||
ndH(1,4,4,1)=-7.2f*1000; ndS(1,4,4,1)=-20.4f; // AT/TA 04
|
||||
ndH(2,1,3,4)=-8.5f*1000; ndS(2,1,3,4)=-22.7f; // CA/GT 04
|
||||
ndH(2,2,3,3)=-8.0f*1000; ndS(2,2,3,3)=-19.9f; // CC/GG adapted GG/CC
|
||||
ndH(2,3,3,2)=-10.6f*1000; ndS(2,3,3,2)=-27.2f; // CG/GC 04
|
||||
ndH(2,4,3,1)=-7.8f*1000; ndS(2,4,3,1)=-21.0f; // CT/GA 04
|
||||
ndH(3,1,2,4)=-8.2f*1000; ndS(3,1,2,4)=-22.2f; // GA/CT 04
|
||||
ndH(3,2,2,3)=-9.8f*1000; ndS(3,2,2,3)=-24.4f; // GC/CG 04
|
||||
ndH(3,3,2,2)=-8.0f*1000; ndS(3,3,2,2)=-19.9f; // GG/CC 04
|
||||
ndH(3,4,2,1)=-8.4f*1000; ndS(3,4,2,1)=-22.4f; // GT/CA 04
|
||||
ndH(4,1,1,4)=-7.2f*1000; ndS(4,1,1,4)=-21.3f; // TA/AT 04
|
||||
ndH(4,2,1,3)=-8.2f*1000; ndS(4,2,1,3)=-22.2f; // TC/AG adapted GA/CT
|
||||
ndH(4,3,1,2)=-8.5f*1000; ndS(4,3,1,2)=-22.7f; // TG/AC adapted CA/GT
|
||||
ndH(4,4,1,1)=-7.6f*1000; ndS(4,4,1,1)=-21.3f; // TT/AA adapted AA/TT
|
||||
|
||||
// A-C Mismatches (Values for pH 7.0)
|
||||
ndH(1,1,2,4)=7.6f*1000; ndS(1,1,2,4)=20.2f; // AA/CT
|
||||
ndH(1,1,4,2)=2.3f*1000; ndS(1,1,4,2)=4.6f; // AA/TC
|
||||
ndH(1,2,2,3)=-0.7f*1000; ndS(1,2,2,3)=-3.8f; // AC/CG
|
||||
ndH(1,2,4,1)=5.3f*1000; ndS(1,2,4,1)=14.6f; // AC/TA
|
||||
ndH(1,3,2,2)=0.6f*1000; ndS(1,3,2,2)=-0.6f; // AG/CC
|
||||
ndH(1,4,2,1)=5.3f*1000; ndS(1,4,2,1)=14.6f; // AT/CA
|
||||
ndH(2,1,1,4)=3.4f*1000; ndS(2,1,1,4)=8.0f; // CA/AT
|
||||
ndH(2,1,3,2)=1.9f*1000; ndS(2,1,3,2)=3.7f; // CA/GC
|
||||
ndH(2,2,1,3)=5.2f*1000; ndS(2,2,1,3)=14.2f; // CC/AG
|
||||
ndH(2,2,3,1)=0.6f*1000; ndS(2,2,3,1)=-0.6f; // CC/GA
|
||||
ndH(2,3,1,2)=1.9f*1000; ndS(2,3,1,2)=3.7f; // CG/AC
|
||||
ndH(2,4,1,1)=2.3f*1000; ndS(2,4,1,1)=4.6f; // CT/AA
|
||||
ndH(3,1,2,2)=5.2f*1000; ndS(3,1,2,2)=14.2f; // GA/CC
|
||||
ndH(3,2,2,1)=-0.7f*1000; ndS(3,2,2,1)=-3.8f; // GC/CA
|
||||
ndH(4,1,1,2)=3.4f*1000; ndS(4,1,1,2)=8.0f; // TA/AC
|
||||
ndH(4,2,1,1)=7.6f*1000; ndS(4,2,1,1)=20.2f; // TC/AA
|
||||
|
||||
// C-T Mismatches
|
||||
ndH(1,2,4,4)=0.7f*1000; ndS(1,2,4,4)=0.2f; // AC/TT
|
||||
ndH(1,4,4,2)=-1.2f*1000; ndS(1,4,4,2)=-6.2f; // AT/TC
|
||||
ndH(2,1,4,4)=1.0f*1000; ndS(2,1,4,4)=0.7f; // CA/TT
|
||||
ndH(2,2,3,4)=-0.8f*1000; ndS(2,2,3,4)=-4.5f; // CC/GT
|
||||
ndH(2,2,4,3)=5.2f*1000; ndS(2,2,4,3)=13.5f; // CC/TG
|
||||
ndH(2,3,4,2)=-1.5f*1000; ndS(2,3,4,2)=-6.1f; // CG/TC
|
||||
ndH(2,4,3,2)=-1.5f*1000; ndS(2,4,3,2)=-6.1f; // CT/GC
|
||||
ndH(2,4,4,1)=-1.2f*1000; ndS(2,4,4,1)=-6.2f; // CT/TA
|
||||
ndH(3,2,2,4)=2.3f*1000; ndS(3,2,2,4)=5.4f; // GC/CT
|
||||
ndH(3,4,2,2)=5.2f*1000; ndS(3,4,2,2)=13.5f; // GT/CC
|
||||
ndH(4,1,2,4)=1.2f*1000; ndS(4,1,2,4)=0.7f; // TA/CT
|
||||
ndH(4,2,2,3)=2.3f*1000; ndS(4,2,2,3)=5.4f; // TC/CG
|
||||
ndH(4,2,1,4)=1.2f*1000; ndS(4,2,1,4)=0.7f; // TC/AT
|
||||
ndH(4,3,2,2)=-0.8f*1000; ndS(4,3,2,2)=-4.5f; // TG/CC
|
||||
ndH(4,4,2,1)=0.7f*1000; ndS(4,4,2,1)=0.2f; // TT/CA
|
||||
ndH(4,4,1,2)=1.0f*1000; ndS(4,4,1,2)=0.7f; // TT/AC
|
||||
|
||||
// G-A Mismatches
|
||||
ndH(1,1,3,4)=3.0f*1000; ndS(1,1,3,4)=7.4f; // AA/GT
|
||||
ndH(1,1,4,3)=-0.6f*1000; ndS(1,1,4,3)=-2.3f; // AA/TG
|
||||
ndH(1,2,3,3)=0.5f*1000; ndS(1,2,3,3)=3.2f; // AC/GG
|
||||
ndH(1,3,3,2)=-4.0f*1000; ndS(1,3,3,2)=-13.2f; // AG/GC
|
||||
ndH(1,3,4,1)=-0.7f*1000; ndS(1,3,4,1)=-2.3f; // AG/TA
|
||||
ndH(1,4,3,1)=-0.7f*1000; ndS(1,4,3,1)=-2.3f; // AT/GA
|
||||
ndH(2,1,3,3)=-0.7f*1000; ndS(2,1,3,3)=-2.3f; // CA/GG
|
||||
ndH(2,3,3,1)=-4.0f*1000; ndS(2,3,3,1)=-13.2f; // CG/GA
|
||||
ndH(3,1,1,4)=0.7f*1000; ndS(3,1,1,4)=0.7f; // GA/AT
|
||||
ndH(3,1,2,3)=-0.6f*1000; ndS(3,1,2,3)=-1.0f; // GA/CG
|
||||
ndH(3,2,1,3)=-0.6f*1000; ndS(3,2,1,3)=-1.0f; // GC/AG
|
||||
ndH(3,3,1,2)=-0.7f*1000; ndS(3,3,1,2)=-2.3f; // GG/AC
|
||||
ndH(3,3,2,1)=0.5f*1000; ndS(3,3,2,1)=3.2f; // GG/CA
|
||||
ndH(3,4,1,1)=-0.6f*1000; ndS(3,4,1,1)=-2.3f; // GT/AA
|
||||
ndH(4,1,1,3)=0.7f*1000; ndS(4,1,1,3)=0.7f; // TA/AG
|
||||
ndH(4,3,1,1)=3.0f*1000; ndS(4,3,1,1)=7.4f; // TG/AA
|
||||
|
||||
// G-T Mismatches
|
||||
ndH(1,3,4,4)=1.0f*1000; ndS(1,3,4,4)=0.9f; // AG/TT
|
||||
ndH(1,4,4,3)=-2.5f*1000; ndS(1,4,4,3)=-8.3f; // AT/TG
|
||||
ndH(2,3,3,4)=-4.1f*1000; ndS(2,3,3,4)=-11.7f; // CG/GT
|
||||
ndH(2,4,3,3)=-2.8f*1000; ndS(2,4,3,3)=-8.0f; // CT/GG
|
||||
ndH(3,1,4,4)=-1.3f*1000; ndS(3,1,4,4)=-5.3f; // GA/TT
|
||||
ndH(3,2,4,3)=-4.4f*1000; ndS(3,2,4,3)=-12.3f; // GC/TG
|
||||
ndH(3,3,2,4)=3.3f*1000; ndS(3,3,2,4)=10.4f; // GG/CT
|
||||
ndH(3,3,4,2)=-2.8f*1000; ndS(3,3,4,2)=-8.0f; // GG/TC
|
||||
// ndH(3,3,4,4)=5.8f*1000; ndS(3,3,4,4)=16.3f; // GG/TT
|
||||
ndH(3,4,2,3)=-4.4f*1000; ndS(3,4,2,3)=-12.3f; // GT/CG
|
||||
ndH(3,4,4,1)=-2.5f*1000; ndS(3,4,4,1)=-8.3f; // GT/TA
|
||||
// ndH(3,4,4,3)=4.1f*1000; ndS(3,4,4,3)=9.5f; // GT/TG
|
||||
ndH(4,1,3,4)=-0.1f*1000; ndS(4,1,3,4)=-1.7f; // TA/GT
|
||||
ndH(4,2,3,3)=3.3f*1000; ndS(4,2,3,3)=10.4f; // TC/GG
|
||||
ndH(4,3,1,4)=-0.1f*1000; ndS(4,3,1,4)=-1.7f; // TG/AT
|
||||
ndH(4,3,3,2)=-4.1f*1000; ndS(4,3,3,2)=-11.7f; // TG/GC
|
||||
// ndH(4,3,3,4)=-1.4f*1000; ndS(4,3,3,4)=-6.2f; // TG/GT
|
||||
ndH(4,4,1,3)=-1.3f*1000; ndS(4,4,1,3)=-5.3f; // TT/AG
|
||||
ndH(4,4,3,1)=1.0f*1000; ndS(4,4,3,1)=0.9f; // TT/GA
|
||||
// ndH(4,4,3,3)=5.8f*1000; ndS(4,4,3,3)=16.3f; // TT/GG
|
||||
|
||||
// A-A Mismatches
|
||||
ndH(1,1,1,4)=4.7f*1000; ndS(1,1,1,4)=12.9f; // AA/AT
|
||||
ndH(1,1,4,1)=1.2f*1000; ndS(1,1,4,1)=1.7f; // AA/TA
|
||||
ndH(1,2,1,3)=-2.9f*1000; ndS(1,2,1,3)=-9.8f; // AC/AG
|
||||
ndH(1,3,1,2)=-0.9f*1000; ndS(1,3,1,2)=-4.2f; // AG/AC
|
||||
ndH(1,4,1,1)=1.2f*1000; ndS(1,4,1,1)=1.7f; // AT/AA
|
||||
ndH(2,1,3,1)=-0.9f*1000; ndS(2,1,3,1)=-4.2f; // CA/GA
|
||||
ndH(3,1,2,1)=-2.9f*1000; ndS(3,1,2,1)=-9.8f; // GA/CA
|
||||
ndH(4,1,1,1)=4.7f*1000; ndS(4,1,1,1)=12.9f; // TA/AA
|
||||
|
||||
// C-C Mismatches
|
||||
ndH(1,2,4,2)=0.0f*1000; ndS(1,2,4,2)=-4.4f; // AC/TC
|
||||
ndH(2,1,2,4)=6.1f*1000; ndS(2,1,2,4)=16.4f; // CA/CT
|
||||
ndH(2,2,2,3)=3.6f*1000; ndS(2,2,2,3)=8.9f; // CC/CG
|
||||
ndH(2,2,3,2)=-1.5f*1000; ndS(2,2,3,2)=-7.2f; // CC/GC
|
||||
ndH(2,3,2,2)=-1.5f*1000; ndS(2,3,2,2)=-7.2f; // CG/CC
|
||||
ndH(2,4,2,1)=0.0f*1000; ndS(2,4,2,1)=-4.4f; // CT/CA
|
||||
ndH(3,2,2,2)=3.6f*1000; ndS(3,2,2,2)=8.9f; // GC/CC
|
||||
ndH(4,2,1,2)=6.1f*1000; ndS(4,2,1,2)=16.4f; // TC/AC
|
||||
|
||||
// G-G Mismatches
|
||||
ndH(1,3,4,3)=-3.1f*1000; ndS(1,3,4,3)=-9.5f; // AG/TG
|
||||
ndH(2,3,3,3)=-4.9f*1000; ndS(2,3,3,3)=-15.3f; // CG/GG
|
||||
ndH(3,1,3,4)=1.6f*1000; ndS(3,1,3,4)=3.6f; // GA/GT
|
||||
ndH(3,2,3,3)=-6.0f*1000; ndS(3,2,3,3)=-15.8f; // GC/GG
|
||||
ndH(3,3,2,3)=-6.0f*1000; ndS(3,3,2,3)=-15.8f; // GG/CG
|
||||
ndH(3,3,3,2)=-4.9f*1000; ndS(3,3,3,2)=-15.3f; // GG/GC
|
||||
ndH(3,4,3,1)=-3.1f*1000; ndS(3,4,3,1)=-9.5f; // GT/GA
|
||||
ndH(4,3,1,3)=1.6f*1000; ndS(4,3,1,3)=3.6f; // TG/AG
|
||||
|
||||
// T-T Mismatches
|
||||
ndH(1,4,4,4)=-2.7f*1000; ndS(1,4,4,4)=-10.8f; // AT/TT
|
||||
ndH(2,4,3,4)=-5.0f*1000; ndS(2,4,3,4)=-15.8f; // CT/GT
|
||||
ndH(3,4,2,4)=-2.2f*1000; ndS(3,4,2,4)=-8.4f; // GT/CT
|
||||
ndH(4,1,4,4)=0.2f*1000; ndS(4,1,4,4)=-1.5f; // TA/TT
|
||||
ndH(4,2,4,3)=-2.2f*1000; ndS(4,2,4,3)=-8.4f; // TC/TG
|
||||
ndH(4,3,4,2)=-5.0f*1000; ndS(4,3,4,2)=-15.8f; // TG/TC
|
||||
ndH(4,4,1,4)=0.2f*1000; ndS(4,4,1,4)=-1.5f; // TT/AT
|
||||
ndH(4,4,4,1)=-2.7f*1000; ndS(4,4,4,1)=-10.8f; // TT/TA
|
||||
|
||||
// Dangling Ends
|
||||
ndH(5,1,1,4)=-0.7f*1000; ndS(5,1,1,4)=-0.8f; // $A/AT
|
||||
ndH(5,1,2,4)=4.4f*1000; ndS(5,1,2,4)=14.9f; // $A/CT
|
||||
ndH(5,1,3,4)=-1.6f*1000; ndS(5,1,3,4)=-3.6f; // $A/GT
|
||||
ndH(5,1,4,4)=2.9f*1000; ndS(5,1,4,4)=10.4f; // $A/TT
|
||||
ndH(5,2,1,3)=-2.1f*1000; ndS(5,2,1,3)=-3.9f; // $C/AG
|
||||
ndH(5,2,2,3)=-0.2f*1000; ndS(5,2,2,3)=-0.1f; // $C/CG
|
||||
ndH(5,2,3,3)=-3.9f*1000; ndS(5,2,3,3)=-11.2f; // $C/GG
|
||||
ndH(5,2,4,3)=-4.4f*1000; ndS(5,2,4,3)=-13.1f; // $C/TG
|
||||
ndH(5,3,1,2)=-5.9f*1000; ndS(5,3,1,2)=-16.5f; // $G/AC
|
||||
ndH(5,3,2,2)=-2.6f*1000; ndS(5,3,2,2)=-7.4f; // $G/CC
|
||||
ndH(5,3,3,2)=-3.2f*1000; ndS(5,3,3,2)=-10.4f; // $G/GC
|
||||
ndH(5,3,4,2)=-5.2f*1000; ndS(5,3,4,2)=-15.0f; // $G/TC
|
||||
ndH(5,4,1,1)=-0.5f*1000; ndS(5,4,1,1)=-1.1f; // $T/AA
|
||||
ndH(5,4,2,1)=4.7f*1000; ndS(5,4,2,1)=14.2f; // $T/CA
|
||||
ndH(5,4,3,1)=-4.1f*1000; ndS(5,4,3,1)=-13.1f; // $T/GA
|
||||
ndH(5,4,4,1)=-3.8f*1000; ndS(5,4,4,1)=-12.6f; // $T/TA
|
||||
ndH(1,5,4,1)=-2.9f*1000; ndS(1,5,4,1)=-7.6f; // A$/TA
|
||||
ndH(1,5,4,2)=-4.1f*1000; ndS(1,5,4,2)=-13.0f; // A$/TC
|
||||
ndH(1,5,4,3)=-4.2f*1000; ndS(1,5,4,3)=-15.0f; // A$/TG
|
||||
ndH(1,5,4,4)=-0.2f*1000; ndS(1,5,4,4)=-0.5f; // A$/TT
|
||||
ndH(1,1,5,4)=0.2f*1000; ndS(1,1,5,4)=2.3f; // AA/$T
|
||||
ndH(1,1,4,5)=-0.5f*1000; ndS(1,1,4,5)=-1.1f; // AA/T$
|
||||
ndH(1,2,5,3)=-6.3f*1000; ndS(1,2,5,3)=-17.1f; // AC/$G
|
||||
ndH(1,2,4,5)=4.7f*1000; ndS(1,2,4,5)=14.2f; // AC/T$
|
||||
ndH(1,3,5,2)=-3.7f*1000; ndS(1,3,5,2)=-10.0f; // AG/$C
|
||||
ndH(1,3,4,5)=-4.1f*1000; ndS(1,3,4,5)=-13.1f; // AG/T$
|
||||
ndH(1,4,5,1)=-2.9f*1000; ndS(1,4,5,1)=-7.6f; // AT/$A
|
||||
ndH(1,4,4,5)=-3.8f*1000; ndS(1,4,4,5)=-12.6f; // AT/T$
|
||||
ndH(2,5,3,1)=-3.7f*1000; ndS(2,5,3,1)=-10.0f; // C$/GA
|
||||
ndH(2,5,3,2)=-4.0f*1000; ndS(2,5,3,2)=-11.9f; // C$/GC
|
||||
ndH(2,5,3,3)=-3.9f*1000; ndS(2,5,3,3)=-10.9f; // C$/GG
|
||||
ndH(2,5,3,4)=-4.9f*1000; ndS(2,5,3,4)=-13.8f; // C$/GT
|
||||
ndH(2,1,5,4)=0.6f*1000; ndS(2,1,5,4)=3.3f; // CA/$T
|
||||
ndH(2,1,3,5)=-5.9f*1000; ndS(2,1,3,5)=-16.5f; // CA/G$
|
||||
ndH(2,2,5,3)=-4.4f*1000; ndS(2,2,5,3)=-12.6f; // CC/$G
|
||||
ndH(2,2,3,5)=-2.6f*1000; ndS(2,2,3,5)=-7.4f; // CC/G$
|
||||
ndH(2,3,5,2)=-4.0f*1000; ndS(2,3,5,2)=-11.9f; // CG/$C
|
||||
ndH(2,3,3,5)=-3.2f*1000; ndS(2,3,3,5)=-10.4f; // CG/G$
|
||||
ndH(2,4,5,1)=-4.1f*1000; ndS(2,4,5,1)=-13.0f; // CT/$A
|
||||
ndH(2,4,3,5)=-5.2f*1000; ndS(2,4,3,5)=-15.0f; // CT/G$
|
||||
ndH(3,5,2,1)=-6.3f*1000; ndS(3,5,2,1)=-17.1f; // G$/CA
|
||||
ndH(3,5,2,2)=-4.4f*1000; ndS(3,5,2,2)=-12.6f; // G$/CC
|
||||
ndH(3,5,2,3)=-5.1f*1000; ndS(3,5,2,3)=-14.0f; // G$/CG
|
||||
ndH(3,5,2,4)=-4.0f*1000; ndS(3,5,2,4)=-10.9f; // G$/CT
|
||||
ndH(3,1,5,4)=-1.1f*1000; ndS(3,1,5,4)=-1.6f; // GA/$T
|
||||
ndH(3,1,2,5)=-2.1f*1000; ndS(3,1,2,5)=-3.9f; // GA/C$
|
||||
ndH(3,2,5,3)=-5.1f*1000; ndS(3,2,5,3)=-14.0f; // GC/$G
|
||||
ndH(3,2,2,5)=-0.2f*1000; ndS(3,2,2,5)=-0.1f; // GC/C$
|
||||
ndH(3,3,5,2)=-3.9f*1000; ndS(3,3,5,2)=-10.9f; // GG/$C
|
||||
ndH(3,3,2,5)=-3.9f*1000; ndS(3,3,2,5)=-11.2f; // GG/C$
|
||||
ndH(3,4,5,1)=-4.2f*1000; ndS(3,4,5,1)=-15.0f; // GT/$A
|
||||
ndH(3,4,2,5)=-4.4f*1000; ndS(3,4,2,5)=-13.1f; // GT/C$
|
||||
ndH(4,5,1,1)=0.2f*1000; ndS(4,5,1,1)=2.3f; // T$/AA
|
||||
ndH(4,5,1,2)=0.6f*1000; ndS(4,5,1,2)=3.3f; // T$/AC
|
||||
ndH(4,5,1,3)=-1.1f*1000; ndS(4,5,1,3)=-1.6f; // T$/AG
|
||||
ndH(4,5,1,4)=-6.9f*1000; ndS(4,5,1,4)=-20.0f; // T$/AT
|
||||
ndH(4,1,5,4)=-6.9f*1000; ndS(4,1,5,4)=-20.0f; // TA/$T
|
||||
ndH(4,1,1,5)=-0.7f*1000; ndS(4,1,1,5)=-0.7f; // TA/A$
|
||||
ndH(4,2,5,3)=-4.0f*1000; ndS(4,2,5,3)=-10.9f; // TC/$G
|
||||
ndH(4,2,1,5)=4.4f*1000; ndS(4,2,1,5)=14.9f; // TC/A$
|
||||
ndH(4,3,5,2)=-4.9f*1000; ndS(4,3,5,2)=-13.8f; // TG/$C
|
||||
ndH(4,3,1,5)=-1.6f*1000; ndS(4,3,1,5)=-3.6f; // TG/A$
|
||||
ndH(4,4,5,1)=-0.2f*1000; ndS(4,4,5,1)=-0.5f; // TT/$A
|
||||
ndH(4,4,1,5)=2.9f*1000; ndS(4,4,1,5)=10.4f; // TT/A$
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
int nparam_CountGCContent(char * seq ) {
|
||||
int lseq = strlen(seq);
|
||||
int k;
|
||||
double count = 0;
|
||||
for( k=0;k<lseq;k++) {
|
||||
if (seq[k] == 'G' || seq[k] == 'C' ) {
|
||||
count+=1;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
void nparam_CleanSeq (char* inseq, char* outseq, int len)
|
||||
{
|
||||
int seqlen = strlen (inseq);
|
||||
int i, j;
|
||||
|
||||
if (len != 0)
|
||||
seqlen = len;
|
||||
|
||||
outseq[0]='x';
|
||||
|
||||
for (i = 0, j = 0; i < seqlen && outseq[0]; i++,j++)
|
||||
{
|
||||
switch (inseq[i])
|
||||
{
|
||||
case 'a':
|
||||
case '\0':
|
||||
case 'A':
|
||||
outseq[j] = 'A'; break;
|
||||
case 'c':
|
||||
case '\1':
|
||||
case 'C':
|
||||
outseq[j] = 'C'; break;
|
||||
case 'g':
|
||||
case '\2':
|
||||
case 'G':
|
||||
outseq[j] = 'G'; break;
|
||||
case 't':
|
||||
case '\3':
|
||||
case 'T':
|
||||
outseq[j] = 'T'; break;
|
||||
default:
|
||||
outseq[0]=0;
|
||||
}
|
||||
}
|
||||
outseq[j] = '\0';
|
||||
}
|
||||
|
||||
//Calculate TM for given sequence against its complement
|
||||
double nparam_CalcSelfTM(PNNParams nparm, char* seq, int len)
|
||||
{
|
||||
double thedH = 0;
|
||||
//double thedS = nparam_GetInitialEntropy(nparm);
|
||||
double thedS = -5.9f+nparm->rlogc;
|
||||
double mtemp;
|
||||
char c1;
|
||||
char c2;
|
||||
char c3;
|
||||
char c4;
|
||||
unsigned int i;
|
||||
char nseq[50];
|
||||
char *useq = seq;
|
||||
|
||||
nparam_CleanSeq (seq, nseq, len);
|
||||
useq = nseq;
|
||||
|
||||
for ( i=1;i<len;i++)
|
||||
{
|
||||
c1 = GETREVCODE(useq[i-1]); //nparam_getComplement(seq[i-1],1);
|
||||
c2 = GETREVCODE(useq[i]); //nparam_getComplement(seq[i],1);
|
||||
c3 = GETNUMCODE(useq[i-1]);
|
||||
c4 = GETNUMCODE(useq[i]);
|
||||
|
||||
|
||||
thedH += nparm->dH[c3][c4][c1][c2];//nparam_GetEnthalpy(nparm, c3,c4,c1,c2);
|
||||
thedS += nparam_GetEntropy(nparm, c3,c4,c1,c2);
|
||||
}
|
||||
//printf("------------------\n");
|
||||
mtemp = nparam_CalcTM(thedS,thedH);
|
||||
//fprintf(stderr,"Enthalpy: %f, entropy: %f, seq: %s rloc=%f\n", thedH, thedS, useq, nparm->rlogc);
|
||||
//exit (0);
|
||||
return mtemp;
|
||||
}
|
||||
|
||||
double nparam_CalcTwoTM(PNNParams nparm, char* seq1, char* seq2, int len)
|
||||
{
|
||||
double thedH = 0;
|
||||
//double thedS = nparam_GetInitialEntropy(nparm);
|
||||
double thedS = -5.9f+nparm->rlogc;
|
||||
double mtemp;
|
||||
char c1;
|
||||
char c2;
|
||||
char c3;
|
||||
char c4;
|
||||
unsigned int i;
|
||||
char nseq1[50];
|
||||
char nseq2[50];
|
||||
char *useq1;
|
||||
char *useq2;
|
||||
|
||||
nparam_CleanSeq (seq1, nseq1, len);
|
||||
useq1 = nseq1;
|
||||
|
||||
nparam_CleanSeq (seq2, nseq2, len);
|
||||
useq2 = nseq2;
|
||||
|
||||
//fprintf (stderr,"Primer : %s\n",useq);
|
||||
for ( i=1;i<len;i++)
|
||||
{
|
||||
c1 = GETREVCODE(useq2[i-1]); //nparam_getComplement(seq[i-1],1);
|
||||
c2 = GETREVCODE(useq2[i]); //nparam_getComplement(seq[i],1);
|
||||
c3 = GETNUMCODE(useq1[i-1]);
|
||||
c4 = GETNUMCODE(useq1[i]);
|
||||
|
||||
//fprintf (stderr,"Primer : %s %f %f %d %d, %d %d %f\n",useq,thedH,thedS,(int)c3,(int)c4,(int)c1,(int)c2,nparam_GetEnthalpy(nparm, c3,c4,c1,c2));
|
||||
|
||||
thedH += nparm->dH[c3][c4][c1][c2];//nparam_GetEnthalpy(nparm, c3,c4,c1,c2);
|
||||
thedS += nparam_GetEntropy(nparm, c3,c4,c1,c2);
|
||||
}
|
||||
//fprintf(stderr,"------------------\n");
|
||||
mtemp = nparam_CalcTM(thedS,thedH);
|
||||
//if (mtemp == 0)
|
||||
//{
|
||||
// fprintf(stderr,"Enthalpy: %f, entropy: %f, seq: %s\n", thedH, thedS, useq);
|
||||
//exit (0);
|
||||
//}
|
||||
return mtemp;
|
||||
}
|
||||
|
||||
double calculateMeltingTemperatureBasic (char * seq) {
|
||||
int gccount;
|
||||
double temp;
|
||||
int seqlen;
|
||||
|
||||
seqlen = strlen (seq);
|
||||
gccount = nparam_CountGCContent (seq);
|
||||
temp = 64.9 + 41*(gccount - 16.4)/seqlen;
|
||||
return temp;
|
||||
}
|
72
src/libthermo/nnparams.h
Normal file
72
src/libthermo/nnparams.h
Normal file
@ -0,0 +1,72 @@
|
||||
/*
|
||||
* nnparams.h
|
||||
* PHunterLib
|
||||
*
|
||||
* Nearest Neighbor Model Parameters
|
||||
*
|
||||
* Created by Tiayyba Riaz on 02/07/09.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef NNPARAMS_H_
|
||||
#define NNPARAMS_H_
|
||||
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
//#include "../libecoprimer/ecoprimer.h"
|
||||
|
||||
// following defines to simplify coding...
|
||||
#define ndH(a,b,c,d) nparm->dH[a][b][c][d]
|
||||
#define ndS(a,b,c,d) nparm->dS[a][b][c][d]
|
||||
#define forbidden_enthalpy 1000000000000000000.0f
|
||||
#define R 1.987f
|
||||
#define SALT_METHOD_SANTALUCIA 1
|
||||
#define SALT_METHOD_OWCZARZY 2
|
||||
|
||||
#define DEF_CONC_PRIMERS 0.0000008
|
||||
#define DEF_CONC_SEQUENCES 0
|
||||
#define DEF_SALT 0.05
|
||||
|
||||
#define GETNUMCODE(a) bpencoder[a - 'A']
|
||||
#define GETREVCODE(a) 5-bpencoder[a - 'A']
|
||||
|
||||
|
||||
extern double forbidden_entropy;
|
||||
|
||||
static char bpencoder[] = { 1, // A
|
||||
0, // b
|
||||
2, // C
|
||||
0,0,0, // d, e, f
|
||||
3, // G
|
||||
0,0,0,0,0,0,0,0,0,0,0,0, // h,i,j,k,l,m,n,o,p,q,r,s
|
||||
4,0, // T,U
|
||||
0,0,0,0,0}; // v,w,x,y,z
|
||||
|
||||
|
||||
typedef struct CNNParams_st
|
||||
{
|
||||
double Ct1;
|
||||
double Ct2;
|
||||
double rlogc;
|
||||
double kplus;
|
||||
double kfac;
|
||||
int saltMethod;
|
||||
double gcContent;
|
||||
double new_TM;
|
||||
double dH[6][6][6][6]; // A-C-G-T + gap + initiation (dangling end, $ sign)
|
||||
double dS[6][6][6][6];
|
||||
}CNNParams, * PNNParams;
|
||||
|
||||
void nparam_InitParams(PNNParams nparm, double c1, double c2, double kp, int sm);
|
||||
int nparam_CountGCContent(char * seq );
|
||||
double nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1);
|
||||
double nparam_GetEnthalpy(PNNParams nparm, char x0, char x1, char y0, char y1);
|
||||
double nparam_CalcTM(double entropy,double enthalpy);
|
||||
double nparam_CalcSelfTM(PNNParams nparm, char* seq, int len);
|
||||
double nparam_CalcTwoTM(PNNParams nparm, char* seq1, char* seq2, int len);
|
||||
|
||||
double nparam_GetInitialEntropy(PNNParams nparm) ;
|
||||
double calculateMeltingTemperatureBasic (char * seq);
|
||||
//void getThermoProperties (ppair_t* pairs, size_t count, poptions_t options);
|
||||
|
||||
#endif
|
115
src/libthermo/thermostats.c
Normal file
115
src/libthermo/thermostats.c
Normal file
@ -0,0 +1,115 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include <stdlib.h>
|
||||
#include "thermostats.h"
|
||||
|
||||
word_t extractSite(char* sequence, size_t begin, size_t length, bool_t strand)
|
||||
{
|
||||
char *c;
|
||||
char *start;
|
||||
uint32_t l;
|
||||
word_t site = 0;
|
||||
|
||||
start=sequence+begin;
|
||||
if (!strand)
|
||||
start+=length-1;
|
||||
|
||||
|
||||
for (c=start,
|
||||
l=0;
|
||||
l<length;
|
||||
l++,
|
||||
c+=(strand)? 1:-1)
|
||||
site = (site << 2) | ((strand)? (*c):(~*c)&3);
|
||||
|
||||
return site;
|
||||
}
|
||||
|
||||
void getThermoProperties (ppair_t* pairs, size_t count, poptions_t options)
|
||||
{
|
||||
size_t i, j,k,l;
|
||||
uint32_t bp1,bp2;
|
||||
uint32_t ep1,ep2;
|
||||
word_t w1;
|
||||
word_t w2;
|
||||
bool_t strand;
|
||||
|
||||
char *sq,*sq1,*sq2,*c;
|
||||
char prmrd[50];
|
||||
char prmrr[50];
|
||||
char sqsite[50];
|
||||
double mtemp;
|
||||
|
||||
for (i = 0; i < count; i++)
|
||||
{
|
||||
w1 = pairs[i]->p1->word;
|
||||
w2 = pairs[i]->p2->word;
|
||||
|
||||
if (!pairs[i]->asdirect1)
|
||||
w1=ecoComplementWord(w1,options->primer_length);
|
||||
|
||||
if (!pairs[i]->asdirect2)
|
||||
w2=ecoComplementWord(w2,options->primer_length);
|
||||
|
||||
strncpy(prmrd,ecoUnhashWord(w1, options->primer_length),options->primer_length);
|
||||
strncpy(prmrr,ecoUnhashWord(w2, options->primer_length),options->primer_length);
|
||||
prmrd[options->primer_length]=0;
|
||||
prmrr[options->primer_length]=0;
|
||||
pairs[i]->p1temp = nparam_CalcSelfTM (options->pnparm, prmrd, options->primer_length) - 273.0;
|
||||
pairs[i]->p2temp = nparam_CalcSelfTM (options->pnparm, prmrr, options->primer_length) - 273.0;
|
||||
pairs[i]->p1mintemp = 100;
|
||||
pairs[i]->p2mintemp = 100;
|
||||
|
||||
for (j = 0; j < pairs[i]->pcr.ampcount; j++)
|
||||
if (pairs[i]->pcr.amplifias[j].sequence->isexample)
|
||||
{
|
||||
|
||||
sq = pairs[i]->pcr.amplifias[j].sequence->SQ;
|
||||
strand = pairs[i]->pcr.amplifias[j].strand;
|
||||
bp1 = pairs[i]->pcr.amplifias[j].begin - options->primer_length;
|
||||
bp2 = pairs[i]->pcr.amplifias[j].end + 1;
|
||||
|
||||
if (!strand)
|
||||
{
|
||||
uint32_t tmp;
|
||||
tmp=bp1;
|
||||
bp1=bp2;
|
||||
bp2=tmp;
|
||||
}
|
||||
|
||||
// printf("%s : %s, %c",prmrd,
|
||||
// ecoUnhashWord(extractSite(sq,bp1,options->primer_length,strand),options->primer_length),
|
||||
// "rd"[strand]);
|
||||
mtemp = nparam_CalcTwoTM(options->pnparm,
|
||||
prmrd,
|
||||
ecoUnhashWord(extractSite(sq,bp1,options->primer_length,strand),options->primer_length),
|
||||
options->primer_length) - 273.0;
|
||||
// printf(" %4.2f %4.2f\n",pairs[i]->p1temp,mtemp);
|
||||
if (mtemp < pairs[i]->p1mintemp)
|
||||
pairs[i]->p1mintemp = mtemp;
|
||||
|
||||
// printf("%s : %s, %c\n",prmrr,ecoUnhashWord(extractSite(sq,bp2,options->primer_length,!strand),options->primer_length),
|
||||
// "rd"[strand]);
|
||||
//
|
||||
mtemp = nparam_CalcTwoTM(options->pnparm,
|
||||
prmrr,
|
||||
ecoUnhashWord(extractSite(sq,bp2,options->primer_length,!strand),options->primer_length),
|
||||
options->primer_length) - 273.0;
|
||||
if (mtemp < pairs[i]->p2mintemp)
|
||||
pairs[i]->p2mintemp = mtemp;
|
||||
}
|
||||
|
||||
if (w2 < w1)
|
||||
{
|
||||
mtemp = pairs[i]->p1temp;
|
||||
pairs[i]->p1temp = pairs[i]->p2temp;
|
||||
pairs[i]->p2temp = mtemp;
|
||||
|
||||
mtemp = pairs[i]->p1mintemp;
|
||||
pairs[i]->p1mintemp = pairs[i]->p2mintemp;
|
||||
pairs[i]->p2mintemp = mtemp;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
9
src/libthermo/thermostats.h
Normal file
9
src/libthermo/thermostats.h
Normal file
@ -0,0 +1,9 @@
|
||||
#ifndef THERMOSTATS_H_
|
||||
#define THERMOSTATS_H_
|
||||
|
||||
#include "../libecoprimer/ecoprimer.h"
|
||||
|
||||
void getThermoProperties (ppair_t* pairs, size_t count, poptions_t options);
|
||||
word_t extractSite(char* sequence, size_t begin, size_t length, bool_t strand);
|
||||
|
||||
#endif
|
651
tools/ecoPCRFormat.py
Executable file
651
tools/ecoPCRFormat.py
Executable file
@ -0,0 +1,651 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import re
|
||||
import gzip
|
||||
import struct
|
||||
import sys
|
||||
import time
|
||||
import getopt
|
||||
|
||||
try:
|
||||
import psycopg2
|
||||
_dbenable=True
|
||||
except ImportError:
|
||||
_dbenable=False
|
||||
|
||||
#####
|
||||
#
|
||||
#
|
||||
# Generic file function
|
||||
#
|
||||
#
|
||||
#####
|
||||
|
||||
def universalOpen(file):
|
||||
if isinstance(file,str):
|
||||
if file[-3:] == '.gz':
|
||||
rep = gzip.open(file)
|
||||
else:
|
||||
rep = open(file)
|
||||
else:
|
||||
rep = file
|
||||
return rep
|
||||
|
||||
def universalTell(file):
|
||||
if isinstance(file, gzip.GzipFile):
|
||||
file=file.myfileobj
|
||||
return file.tell()
|
||||
|
||||
def fileSize(file):
|
||||
if isinstance(file, gzip.GzipFile):
|
||||
file=file.myfileobj
|
||||
pos = file.tell()
|
||||
file.seek(0,2)
|
||||
length = file.tell()
|
||||
file.seek(pos,0)
|
||||
return length
|
||||
|
||||
def progressBar(pos,max,reset=False,delta=[]):
|
||||
if reset:
|
||||
del delta[:]
|
||||
if not delta:
|
||||
delta.append(time.time())
|
||||
delta.append(time.time())
|
||||
|
||||
delta[1]=time.time()
|
||||
elapsed = delta[1]-delta[0]
|
||||
percent = float(pos)/max * 100
|
||||
remain = time.strftime('%H:%M:%S',time.gmtime(elapsed / percent * (100-percent)))
|
||||
bar = '#' * int(percent/2)
|
||||
bar+= '|/-\\-'[pos % 5]
|
||||
bar+= ' ' * (50 - int(percent/2))
|
||||
sys.stderr.write('\r%5.1f %% |%s] remain : %s' %(percent,bar,remain))
|
||||
|
||||
#####
|
||||
#
|
||||
#
|
||||
# NCBI Dump Taxonomy reader
|
||||
#
|
||||
#
|
||||
#####
|
||||
|
||||
def endLessIterator(endedlist):
|
||||
for x in endedlist:
|
||||
yield x
|
||||
while(1):
|
||||
yield endedlist[-1]
|
||||
|
||||
class ColumnFile(object):
|
||||
|
||||
def __init__(self,stream,sep=None,strip=True,types=None):
|
||||
if isinstance(stream,str):
|
||||
self._stream = open(stream)
|
||||
elif hasattr(stream,'next'):
|
||||
self._stream = stream
|
||||
else:
|
||||
raise ValueError,'stream must be string or an iterator'
|
||||
self._delimiter=sep
|
||||
self._strip=strip
|
||||
if types:
|
||||
self._types=[x for x in types]
|
||||
for i in xrange(len(self._types)):
|
||||
if self._types[i] is bool:
|
||||
self._types[i]=ColumnFile.str2bool
|
||||
else:
|
||||
self._types=None
|
||||
|
||||
def str2bool(x):
|
||||
return bool(eval(x.strip()[0].upper(),{'T':True,'V':True,'F':False}))
|
||||
|
||||
str2bool = staticmethod(str2bool)
|
||||
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def next(self):
|
||||
ligne = self._stream.next()
|
||||
data = ligne.split(self._delimiter)
|
||||
if self._strip or self._types:
|
||||
data = [x.strip() for x in data]
|
||||
if self._types:
|
||||
it = endLessIterator(self._types)
|
||||
data = [x[1](x[0]) for x in ((y,it.next()) for y in data)]
|
||||
return data
|
||||
|
||||
def taxonCmp(t1,t2):
|
||||
if t1[0] < t2[0]:
|
||||
return -1
|
||||
elif t1[0] > t2[0]:
|
||||
return +1
|
||||
return 0
|
||||
|
||||
def bsearchTaxon(taxonomy,taxid):
|
||||
taxCount = len(taxonomy)
|
||||
begin = 0
|
||||
end = taxCount
|
||||
oldcheck=taxCount
|
||||
check = begin + end / 2
|
||||
while check != oldcheck and taxonomy[check][0]!=taxid :
|
||||
if taxonomy[check][0] < taxid:
|
||||
begin=check
|
||||
else:
|
||||
end=check
|
||||
oldcheck=check
|
||||
check = (begin + end) / 2
|
||||
|
||||
|
||||
if taxonomy[check][0]==taxid:
|
||||
return check
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
|
||||
def readNodeTable(file):
|
||||
|
||||
file = universalOpen(file)
|
||||
|
||||
nodes = ColumnFile(file,
|
||||
sep='|',
|
||||
types=(int,int,str,
|
||||
str,str,bool,
|
||||
int,bool,int,
|
||||
bool,bool,bool,str))
|
||||
print >>sys.stderr,"Reading taxonomy dump file..."
|
||||
taxonomy=[[n[0],n[2],n[1]] for n in nodes]
|
||||
print >>sys.stderr,"List all taxonomy rank..."
|
||||
ranks =list(set(x[1] for x in taxonomy))
|
||||
ranks.sort()
|
||||
ranks = dict(map(None,ranks,xrange(len(ranks))))
|
||||
|
||||
print >>sys.stderr,"Sorting taxons..."
|
||||
taxonomy.sort(taxonCmp)
|
||||
|
||||
print >>sys.stderr,"Indexing taxonomy..."
|
||||
index = {}
|
||||
for t in taxonomy:
|
||||
index[t[0]]=bsearchTaxon(taxonomy, t[0])
|
||||
|
||||
print >>sys.stderr,"Indexing parent and rank..."
|
||||
for t in taxonomy:
|
||||
t[1]=ranks[t[1]]
|
||||
t[2]=index[t[2]]
|
||||
|
||||
|
||||
return taxonomy,ranks,index
|
||||
|
||||
def nameIterator(file):
|
||||
file = universalOpen(file)
|
||||
names = ColumnFile(file,
|
||||
sep='|',
|
||||
types=(int,str,
|
||||
str,str))
|
||||
for taxid,name,unique,classname,white in names:
|
||||
yield taxid,name,classname
|
||||
|
||||
def mergedNodeIterator(file):
|
||||
file = universalOpen(file)
|
||||
merged = ColumnFile(file,
|
||||
sep='|',
|
||||
types=(int,int,str))
|
||||
for taxid,current,white in merged:
|
||||
yield taxid,current
|
||||
|
||||
def deletedNodeIterator(file):
|
||||
file = universalOpen(file)
|
||||
deleted = ColumnFile(file,
|
||||
sep='|',
|
||||
types=(int,str))
|
||||
for taxid,white in deleted:
|
||||
yield taxid
|
||||
|
||||
def readTaxonomyDump(taxdir):
|
||||
taxonomy,ranks,index = readNodeTable('%s/nodes.dmp' % taxdir)
|
||||
|
||||
print >>sys.stderr,"Adding scientific name..."
|
||||
|
||||
alternativeName=[]
|
||||
for taxid,name,classname in nameIterator('%s/names.dmp' % taxdir):
|
||||
alternativeName.append((name,classname,index[taxid]))
|
||||
if classname == 'scientific name':
|
||||
taxonomy[index[taxid]].append(name)
|
||||
|
||||
print >>sys.stderr,"Adding taxid alias..."
|
||||
for taxid,current in mergedNodeIterator('%s/merged.dmp' % taxdir):
|
||||
index[taxid]=index[current]
|
||||
|
||||
print >>sys.stderr,"Adding deleted taxid..."
|
||||
for taxid in deletedNodeIterator('%s/delnodes.dmp' % taxdir):
|
||||
index[taxid]=None
|
||||
|
||||
return taxonomy,ranks,alternativeName,index
|
||||
|
||||
def readTaxonomyDB(dbname):
|
||||
connection = psycopg2.connect(database=dbname)
|
||||
|
||||
cursor = connection.cursor()
|
||||
cursor.execute("select numid,rank,parent from ncbi_taxonomy.taxon")
|
||||
taxonomy=[list(x) for x in cursor]
|
||||
|
||||
cursor.execute("select rank_class from ncbi_taxonomy.taxon_rank_class order by rank_class")
|
||||
ranks=cursor.fetchall()
|
||||
ranks = dict(map(None,(x[0] for x in ranks),xrange(len(ranks))))
|
||||
|
||||
print >>sys.stderr,"Sorting taxons..."
|
||||
taxonomy.sort(taxonCmp)
|
||||
|
||||
print >>sys.stderr,"Indexing taxonomy..."
|
||||
index = {}
|
||||
for t in taxonomy:
|
||||
index[t[0]]=bsearchTaxon(taxonomy, t[0])
|
||||
|
||||
print >>sys.stderr,"Indexing parent and rank..."
|
||||
for t in taxonomy:
|
||||
t[1]=ranks[t[1]]
|
||||
try:
|
||||
t[2]=index[t[2]]
|
||||
except KeyError,e:
|
||||
if t[2] is None and t[0]==1:
|
||||
t[2]=index[t[0]]
|
||||
else:
|
||||
raise e
|
||||
|
||||
cursor.execute("select taxid,name,category from ncbi_taxonomy.name")
|
||||
|
||||
alternativeName=[]
|
||||
for taxid,name,classname in cursor:
|
||||
alternativeName.append((name,classname,index[taxid]))
|
||||
if classname == 'scientific name':
|
||||
taxonomy[index[taxid]].append(name)
|
||||
|
||||
cursor.execute("select old_numid,current_numid from ncbi_taxonomy.taxon_id_alias")
|
||||
|
||||
print >>sys.stderr,"Adding taxid alias..."
|
||||
for taxid,current in cursor:
|
||||
if current is not None:
|
||||
index[taxid]=index[current]
|
||||
else:
|
||||
index[taxid]=None
|
||||
|
||||
|
||||
return taxonomy,ranks,alternativeName,index
|
||||
|
||||
#####
|
||||
#
|
||||
#
|
||||
# Genbank/EMBL sequence reader
|
||||
#
|
||||
#
|
||||
#####
|
||||
|
||||
def entryIterator(file):
|
||||
file = universalOpen(file)
|
||||
rep =[]
|
||||
for ligne in file:
|
||||
rep.append(ligne)
|
||||
if ligne == '//\n':
|
||||
rep = ''.join(rep)
|
||||
yield rep
|
||||
rep = []
|
||||
|
||||
def fastaEntryIterator(file):
|
||||
file = universalOpen(file)
|
||||
rep =[]
|
||||
for ligne in file:
|
||||
if ligne[0] == '>' and rep:
|
||||
rep = ''.join(rep)
|
||||
yield rep
|
||||
rep = []
|
||||
rep.append(ligne)
|
||||
if rep:
|
||||
rep = ''.join(rep)
|
||||
yield rep
|
||||
|
||||
_cleanSeq = re.compile('[ \n0-9]+')
|
||||
|
||||
def cleanSeq(seq):
|
||||
return _cleanSeq.sub('',seq)
|
||||
|
||||
|
||||
_gbParseID = re.compile('(?<=^LOCUS {7})[^ ]+(?= )',re.MULTILINE)
|
||||
_gbParseDE = re.compile('(?<=^DEFINITION {2}).+?\. *$(?=[^ ])',re.MULTILINE+re.DOTALL)
|
||||
_gbParseSQ = re.compile('(?<=^ORIGIN).+?(?=^//$)',re.MULTILINE+re.DOTALL)
|
||||
_gbParseTX = re.compile('(?<= /db_xref="taxon:)[0-9]+(?=")')
|
||||
|
||||
def genbankEntryParser(entry):
|
||||
Id = _gbParseID.findall(entry)[0]
|
||||
De = ' '.join(_gbParseDE.findall(entry)[0].split())
|
||||
Sq = cleanSeq(_gbParseSQ.findall(entry)[0].upper())
|
||||
try:
|
||||
Tx = int(_gbParseTX.findall(entry)[0])
|
||||
except IndexError:
|
||||
Tx = None
|
||||
return {'id':Id,'taxid':Tx,'definition':De,'sequence':Sq}
|
||||
|
||||
######################
|
||||
|
||||
_cleanDef = re.compile('[\nDE]')
|
||||
|
||||
def cleanDef(definition):
|
||||
return _cleanDef.sub('',definition)
|
||||
|
||||
_emblParseID = re.compile('(?<=^ID {3})[^ ]+(?=;)',re.MULTILINE)
|
||||
_emblParseDE = re.compile('(?<=^DE {3}).+?\. *$(?=[^ ])',re.MULTILINE+re.DOTALL)
|
||||
_emblParseSQ = re.compile('(?<=^ ).+?(?=^//$)',re.MULTILINE+re.DOTALL)
|
||||
_emblParseTX = re.compile('(?<= /db_xref="taxon:)[0-9]+(?=")')
|
||||
|
||||
def emblEntryParser(entry):
|
||||
Id = _emblParseID.findall(entry)[0]
|
||||
De = ' '.join(cleanDef(_emblParseDE.findall(entry)[0]).split())
|
||||
Sq = cleanSeq(_emblParseSQ.findall(entry)[0].upper())
|
||||
try:
|
||||
Tx = int(_emblParseTX.findall(entry)[0])
|
||||
except IndexError:
|
||||
Tx = None
|
||||
return {'id':Id,'taxid':Tx,'definition':De,'sequence':Sq}
|
||||
|
||||
|
||||
######################
|
||||
|
||||
_fastaSplit=re.compile(';\W*')
|
||||
|
||||
def parseFasta(seq):
|
||||
seq=seq.split('\n')
|
||||
title = seq[0].strip()[1:].split(None,1)
|
||||
id=title[0]
|
||||
if len(title) == 2:
|
||||
field = _fastaSplit.split(title[1])
|
||||
else:
|
||||
field=[]
|
||||
info = dict(x.split('=',1) for x in field if '=' in x)
|
||||
definition = ' '.join([x for x in field if '=' not in x])
|
||||
seq=(''.join([x.strip() for x in seq[1:]])).upper()
|
||||
return id,seq,definition,info
|
||||
|
||||
|
||||
def fastaEntryParser(entry):
|
||||
id,seq,definition,info = parseFasta(entry)
|
||||
Tx = info.get('taxid',None)
|
||||
if Tx is not None:
|
||||
Tx=int(Tx)
|
||||
return {'id':id,'taxid':Tx,'definition':definition,'sequence':seq}
|
||||
|
||||
|
||||
def sequenceIteratorFactory(entryParser,entryIterator):
|
||||
def sequenceIterator(file):
|
||||
for entry in entryIterator(file):
|
||||
yield entryParser(entry)
|
||||
return sequenceIterator
|
||||
|
||||
|
||||
def taxonomyInfo(entry,connection):
|
||||
taxid = entry['taxid']
|
||||
curseur = connection.cursor()
|
||||
curseur.execute("""
|
||||
select taxid,species,genus,family,
|
||||
taxonomy.scientificName(taxid) as sn,
|
||||
taxonomy.scientificName(species) as species_sn,
|
||||
taxonomy.scientificName(genus) as genus_sn,
|
||||
taxonomy.scientificName(family) as family_sn
|
||||
from
|
||||
(
|
||||
select alias as taxid,
|
||||
taxonomy.getSpecies(alias) as species,
|
||||
taxonomy.getGenus(alias) as genus,
|
||||
taxonomy.getFamily(alias) as family
|
||||
from taxonomy.aliases
|
||||
where id=%d ) as tax
|
||||
""" % taxid)
|
||||
rep = curseur.fetchone()
|
||||
entry['current_taxid']=rep[0]
|
||||
entry['species']=rep[1]
|
||||
entry['genus']=rep[2]
|
||||
entry['family']=rep[3]
|
||||
entry['scientific_name']=rep[4]
|
||||
entry['species_sn']=rep[5]
|
||||
entry['genus_sn']=rep[6]
|
||||
entry['family_sn']=rep[7]
|
||||
return entry
|
||||
|
||||
#####
|
||||
#
|
||||
#
|
||||
# Binary writer
|
||||
#
|
||||
#
|
||||
#####
|
||||
|
||||
def ecoSeqPacker(sq):
|
||||
|
||||
compactseq = gzip.zlib.compress(sq['sequence'],9)
|
||||
cptseqlength = len(compactseq)
|
||||
delength = len(sq['definition'])
|
||||
|
||||
totalSize = 4 + 20 + 4 + 4 + 4 + cptseqlength + delength
|
||||
|
||||
packed = struct.pack('> I I 20s I I I %ds %ds' % (delength,cptseqlength),
|
||||
totalSize,
|
||||
sq['taxid'],
|
||||
sq['id'],
|
||||
delength,
|
||||
len(sq['sequence']),
|
||||
cptseqlength,
|
||||
sq['definition'],
|
||||
compactseq)
|
||||
|
||||
assert len(packed) == totalSize+4, "error in sequence packing"
|
||||
|
||||
return packed
|
||||
|
||||
def ecoTaxPacker(tx):
|
||||
|
||||
namelength = len(tx[3])
|
||||
|
||||
totalSize = 4 + 4 + 4 + 4 + namelength
|
||||
|
||||
packed = struct.pack('> I I I I I %ds' % namelength,
|
||||
totalSize,
|
||||
tx[0],
|
||||
tx[1],
|
||||
tx[2],
|
||||
namelength,
|
||||
tx[3])
|
||||
|
||||
return packed
|
||||
|
||||
def ecoRankPacker(rank):
|
||||
|
||||
namelength = len(rank)
|
||||
|
||||
packed = struct.pack('> I %ds' % namelength,
|
||||
namelength,
|
||||
rank)
|
||||
|
||||
return packed
|
||||
|
||||
def ecoNamePacker(name):
|
||||
|
||||
namelength = len(name[0])
|
||||
classlength= len(name[1])
|
||||
totalSize = namelength + classlength + 4 + 4 + 4 + 4
|
||||
|
||||
packed = struct.pack('> I I I I I %ds %ds' % (namelength,classlength),
|
||||
totalSize,
|
||||
int(name[1]=='scientific name'),
|
||||
namelength,
|
||||
classlength,
|
||||
name[2],
|
||||
name[0],
|
||||
name[1])
|
||||
|
||||
return packed
|
||||
|
||||
def ecoSeqWriter(file,input,taxindex,parser):
|
||||
output = open(file,'wb')
|
||||
input = universalOpen(input)
|
||||
inputsize = fileSize(input)
|
||||
entries = parser(input)
|
||||
seqcount=0
|
||||
skipped = []
|
||||
|
||||
output.write(struct.pack('> I',seqcount))
|
||||
|
||||
progressBar(1, inputsize,reset=True)
|
||||
for entry in entries:
|
||||
if entry['taxid'] is not None:
|
||||
try:
|
||||
entry['taxid']=taxindex[entry['taxid']]
|
||||
except KeyError:
|
||||
entry['taxid']=None
|
||||
if entry['taxid'] is not None:
|
||||
seqcount+=1
|
||||
output.write(ecoSeqPacker(entry))
|
||||
else:
|
||||
skipped.append(entry['id'])
|
||||
where = universalTell(input)
|
||||
progressBar(where, inputsize)
|
||||
print >>sys.stderr," Readed sequences : %d " % seqcount,
|
||||
else:
|
||||
skipped.append(entry['id'])
|
||||
|
||||
print >>sys.stderr
|
||||
output.seek(0,0)
|
||||
output.write(struct.pack('> I',seqcount))
|
||||
|
||||
output.close()
|
||||
return skipped
|
||||
|
||||
|
||||
def ecoTaxWriter(file,taxonomy):
|
||||
output = open(file,'wb')
|
||||
output.write(struct.pack('> I',len(taxonomy)))
|
||||
|
||||
for tx in taxonomy:
|
||||
output.write(ecoTaxPacker(tx))
|
||||
|
||||
output.close()
|
||||
|
||||
def ecoRankWriter(file,ranks):
|
||||
output = open(file,'wb')
|
||||
output.write(struct.pack('> I',len(ranks)))
|
||||
|
||||
rankNames = ranks.keys()
|
||||
rankNames.sort()
|
||||
|
||||
for rank in rankNames:
|
||||
output.write(ecoRankPacker(rank))
|
||||
|
||||
output.close()
|
||||
|
||||
def nameCmp(n1,n2):
|
||||
name1=n1[0].upper()
|
||||
name2=n2[0].upper()
|
||||
if name1 < name2:
|
||||
return -1
|
||||
elif name1 > name2:
|
||||
return 1
|
||||
return 0
|
||||
|
||||
|
||||
def ecoNameWriter(file,names):
|
||||
output = open(file,'wb')
|
||||
output.write(struct.pack('> I',len(names)))
|
||||
|
||||
names.sort(nameCmp)
|
||||
|
||||
for name in names:
|
||||
output.write(ecoNamePacker(name))
|
||||
|
||||
output.close()
|
||||
|
||||
def ecoDBWriter(prefix,taxonomy,seqFileNames,parser):
|
||||
|
||||
ecoRankWriter('%s.rdx' % prefix, taxonomy[1])
|
||||
ecoTaxWriter('%s.tdx' % prefix, taxonomy[0])
|
||||
ecoNameWriter('%s.ndx' % prefix, taxonomy[2])
|
||||
|
||||
filecount = 0
|
||||
for filename in seqFileNames:
|
||||
filecount+=1
|
||||
sk=ecoSeqWriter('%s_%03d.sdx' % (prefix,filecount),
|
||||
filename,
|
||||
taxonomy[3],
|
||||
parser)
|
||||
if sk:
|
||||
print >>sys.stderr,"Skipped entry :"
|
||||
print >>sys.stderr,sk
|
||||
|
||||
def ecoParseOptions(arguments):
|
||||
opt = {
|
||||
'prefix' : 'ecodb',
|
||||
'taxdir' : 'taxdump',
|
||||
'parser' : sequenceIteratorFactory(genbankEntryParser,
|
||||
entryIterator)
|
||||
}
|
||||
|
||||
o,filenames = getopt.getopt(arguments,
|
||||
'ht:T:n:gfe',
|
||||
['help',
|
||||
'taxonomy=',
|
||||
'taxonomy_db=',
|
||||
'name=',
|
||||
'genbank',
|
||||
'fasta',
|
||||
'embl'])
|
||||
|
||||
for name,value in o:
|
||||
if name in ('-h','--help'):
|
||||
printHelp()
|
||||
exit()
|
||||
elif name in ('-t','--taxonomy'):
|
||||
opt['taxmod']='dump'
|
||||
opt['taxdir']=value
|
||||
elif name in ('-T','--taxonomy_db'):
|
||||
opt['taxmod']='db'
|
||||
opt['taxdb']=value
|
||||
elif name in ('-n','--name'):
|
||||
opt['prefix']=value
|
||||
elif name in ('-g','--genbank'):
|
||||
opt['parser']=sequenceIteratorFactory(genbankEntryParser,
|
||||
entryIterator)
|
||||
|
||||
elif name in ('-f','--fasta'):
|
||||
opt['parser']=sequenceIteratorFactory(fastaEntryParser,
|
||||
fastaEntryIterator)
|
||||
|
||||
elif name in ('-e','--embl'):
|
||||
opt['parser']=sequenceIteratorFactory(emblEntryParser,
|
||||
entryIterator)
|
||||
else:
|
||||
raise ValueError,'Unknown option %s' % name
|
||||
|
||||
return opt,filenames
|
||||
|
||||
def printHelp():
|
||||
print "-----------------------------------"
|
||||
print " ecoPCRFormat.py"
|
||||
print "-----------------------------------"
|
||||
print "ecoPCRFormat.py [option] <argument>"
|
||||
print "-----------------------------------"
|
||||
print "-e --embl :[E]mbl format"
|
||||
print "-f --fasta :[F]asta format"
|
||||
print "-g --genbank :[G]enbank format"
|
||||
print "-h --help :[H]elp - print this help"
|
||||
print "-n --name :[N]ame of the new database created"
|
||||
print "-t --taxonomy :[T]axonomy - path to the taxonomy database"
|
||||
print " :bcp-like dump from GenBank taxonomy database."
|
||||
print "-----------------------------------"
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
opt,filenames = ecoParseOptions(sys.argv[1:])
|
||||
|
||||
if opt['taxmod']=='dump':
|
||||
taxonomy = readTaxonomyDump(opt['taxdir'])
|
||||
elif opt['taxmod']=='db':
|
||||
taxonomy = readTaxonomyDB(opt['taxdb'])
|
||||
|
||||
|
||||
ecoDBWriter(opt['prefix'], taxonomy, filenames, opt['parser'])
|
||||
|
Reference in New Issue
Block a user