50 Commits

Author SHA1 Message Date
40f2972bda Actualiser src/libecoPCR/ecorank.c 2025-06-12 13:35:06 +00:00
7db34d77a4 Actualiser src/libecoPCR/ecotax.c 2025-06-12 13:34:30 +00:00
3503f66520 Update ecopcr.c to comment a debug message 2020-04-27 14:10:30 +02:00
9deb30a8c4 Patch a bug inducing that sequences were considered as circular whatever
the -c option was present or not.
2020-04-27 12:07:28 +02:00
c4321036be Fixed length computation bug introduced in last version 2019-01-22 15:17:59 +01:00
a92a7fa070 Fixed version + some git ignore things 2018-07-30 14:45:48 +02:00
bd1db764d4 Fixed a bug when primers would touch or overlap, producing amplicons of
size <= 0
2018-07-26 19:03:59 +02:00
f0cca648ea Version 0.8.0 2017-02-06 14:34:57 +01:00
573bd5bad7 Removed deprecated code in ecoPCRFormat 2017-02-06 11:24:32 +01:00
17387dae8d Deleted OS X code specificities 2017-02-03 13:56:32 +01:00
18583c4d2e Changed compilation flags and fixed typo in ecoPCR help 2017-02-03 11:36:36 +01:00
2dd4a217f1 Merge branch 'master' of https://git.metabarcoding.org/obitools/ecopcr 2017-02-01 17:12:18 +01:00
9a0d77535f Fixing versioning problem with new version 0.6.0 2017-02-01 16:50:07 +01:00
b09bd1455b Add license 2017-01-11 13:01:41 +01:00
194ec811f4 Fixes #2 by adding the missing -D option in the help 2015-05-19 13:52:35 +02:00
126bf80670 Convert svn:ignore properties to .gitignore. 2015-05-16 17:51:46 +02:00
46d086b215 unsufficient space allocated for handling strings in arguments
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@595 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2013-03-07 09:25:16 +00:00
0a62ff49cc MOD: updated the help to include the -P option
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@424 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2012-05-16 11:56:36 +00:00
2452df90de MOD: Added a [P]ath option displaying for each taxon its full path
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@423 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2012-05-16 11:43:33 +00:00
745d50cfa4 MOD: Corrected condition in getSon to handle the root of the taxonomy
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@422 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2012-05-16 07:52:28 +00:00
e2fe83fcb7 remove extra obitools directory from ecoPCR subversion archive
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@418 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2012-05-07 09:31:08 +00:00
2a118eedda MOD: In the printRepeat function, corrected rdelta and ldelta management
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@417 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2012-05-07 09:14:09 +00:00
93c530e090 removed the "without temperature" option
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@416 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2012-05-04 14:53:33 +00:00
4e5d8893e5 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@390 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2011-12-05 12:59:34 +00:00
957a59eb5d Add management of local taxa from the new extension of the OBI Taxonomy library
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@315 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2011-07-19 06:44:47 +00:00
9adf426abf delta bug
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@310 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2011-06-30 15:26:36 +00:00
ae528e48f4 delta bug
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@309 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2011-06-30 15:12:38 +00:00
f53cc6d500 patch on printing
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@299 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2011-06-15 21:54:33 +00:00
8313c67a9b syntax debug on ecopcr.c
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@297 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2011-05-25 16:35:30 +00:00
01173d22cf add an hidden -D option
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@296 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2011-05-25 15:41:22 +00:00
87c2496447 patch a bug in Tm computation
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@260 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2010-05-23 12:30:16 +00:00
9867859237 patch for 64bits constants
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@244 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2010-02-24 15:10:11 +00:00
f6f39f58fe patch Tm calculation to return NaN if one of the two sequence is composed with letters different than ACGT
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@243 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2010-02-24 15:00:11 +00:00
7331dd5612 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@242 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2010-01-22 09:21:04 +00:00
0214be011e git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@241 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2010-01-22 09:20:09 +00:00
1c30a8604f git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@240 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2010-01-22 09:19:36 +00:00
985d067bd8 New version 0.2 of ecoPCR, with Tm computation.
Take care file format have change. You must use corresponding version of ecogrep
You can use -t option to go back to the old format without tm computation

git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@239 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2010-01-22 09:19:19 +00:00
f7e25b2082 New version 0.2 of ecoPCR, with Tm computation.
Take care file format have change. You must use corresponding version of ecogrep
You can use -t option to go back to the old format without tm computation

git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@238 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2010-01-22 09:16:53 +00:00
ad6f493d0f Accept to deal with sequence in lower case
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@217 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2009-05-13 13:21:10 +00:00
1428cd7499 patch online help in ecoPCR software
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@168 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2008-05-16 08:53:11 +00:00
708b7c387e Sequence circularity, bug correction
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@167 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2008-04-28 16:46:18 +00:00
d863b7e48e Manage sequence circularity
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@166 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2008-04-28 15:49:12 +00:00
4b74056af8 change reference column for taxonomy filtering in ecogrep.c
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@165 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2008-03-12 21:26:08 +00:00
f68f4af244 Add option to ecoPCRFormat to deal with an obischema db as taxonomy source
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@158 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2008-02-08 10:27:42 +00:00
a1141a77b5 --
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@119 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2007-09-24 10:10:03 +00:00
6835206344 Add version file
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@118 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2007-09-24 10:07:39 +00:00
45c85a9f32 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@117 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-09-24 10:06:00 +00:00
471bf72bf9 Add rule for ecogrep build in makefile
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@116 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2007-09-24 09:58:48 +00:00
22ecb4b842 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@115 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-09-24 09:48:19 +00:00
bc4c7656c6 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@114 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-09-24 09:48:08 +00:00
50 changed files with 4648 additions and 890 deletions

122
.cproject Normal file
View File

@ -0,0 +1,122 @@
<?xml version="1.0" encoding="UTF-8"?>
<?fileVersion 4.0.0?>
<cproject>
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.toolchain.gnu.macosx.base.985481067">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.toolchain.gnu.macosx.base.985481067" moduleId="org.eclipse.cdt.core.settings" name="MacOSX GCC">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.MachO" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.MakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="ecoPCR" buildProperties="" id="cdt.managedbuild.toolchain.gnu.macosx.base.985481067" name="MacOSX GCC" parent="org.eclipse.cdt.build.core.emptycfg">
<folderInfo id="cdt.managedbuild.toolchain.gnu.macosx.base.985481067.141857048" name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.base.1673936174" name="cdt.managedbuild.toolchain.gnu.macosx.base" superClass="cdt.managedbuild.toolchain.gnu.macosx.base">
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.MachO" id="cdt.managedbuild.target.gnu.platform.macosx.base.584997877" name="Debug Platform" osList="macosx" superClass="cdt.managedbuild.target.gnu.platform.macosx.base"/>
<builder id="cdt.managedbuild.target.gnu.builder.macosx.base.328283627" managedBuildOn="false" name="Gnu Make Builder.MacOSX GCC" superClass="cdt.managedbuild.target.gnu.builder.macosx.base"/>
<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.base.627652869" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.base"/>
<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.base.815782479" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.base"/>
<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.base.536333148" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.base"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.202459766" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.base.1142106025" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.base"/>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.base.845498516" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.base"/>
</toolChain>
</folderInfo>
</configuration>
</storageModule>
<storageModule moduleId="scannerConfiguration">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile"/>
<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile">
<buildOutputProvider>
<openAction enabled="true" filePath=""/>
<parser enabled="true"/>
</buildOutputProvider>
<scannerInfoProvider id="specsFile">
<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
<parser enabled="true"/>
</scannerInfoProvider>
</profile>
<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerFileProfile">
<buildOutputProvider>
<openAction enabled="true" filePath=""/>
<parser enabled="true"/>
</buildOutputProvider>
<scannerInfoProvider id="makefileGenerator">
<runAction arguments="-f ${project_name}_scd.mk" command="make" useDefault="true"/>
<parser enabled="true"/>
</scannerInfoProvider>
</profile>
<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfile">
<buildOutputProvider>
<openAction enabled="false" filePath=""/>
<parser enabled="false"/>
</buildOutputProvider>
<scannerInfoProvider id="specsFile">
<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
<parser enabled="true"/>
</scannerInfoProvider>
</profile>
<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP">
<buildOutputProvider>
<openAction enabled="false" filePath=""/>
<parser enabled="false"/>
</buildOutputProvider>
<scannerInfoProvider id="specsFile">
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
<parser enabled="true"/>
</scannerInfoProvider>
</profile>
<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC">
<buildOutputProvider>
<openAction enabled="false" filePath=""/>
<parser enabled="false"/>
</buildOutputProvider>
<scannerInfoProvider id="specsFile">
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
<parser enabled="true"/>
</scannerInfoProvider>
</profile>
<profile id="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfile">
<buildOutputProvider>
<openAction enabled="false" filePath=""/>
<parser enabled="false"/>
</buildOutputProvider>
<scannerInfoProvider id="specsFile">
<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
<parser enabled="true"/>
</scannerInfoProvider>
</profile>
<profile id="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfileCPP">
<buildOutputProvider>
<openAction enabled="false" filePath=""/>
<parser enabled="false"/>
</buildOutputProvider>
<scannerInfoProvider id="specsFile">
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
<parser enabled="true"/>
</scannerInfoProvider>
</profile>
<profile id="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfileC">
<buildOutputProvider>
<openAction enabled="false" filePath=""/>
<parser enabled="false"/>
</buildOutputProvider>
<scannerInfoProvider id="specsFile">
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
<parser enabled="true"/>
</scannerInfoProvider>
</profile>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<project id="ecoPCR.null.1320766773" name="ecoPCR"/>
</storageModule>
</cproject>

21
.gitignore vendored Normal file
View File

@ -0,0 +1,21 @@
/.gitignore
/.cproject
/.project
# /src/
/src/ecoPCR
/src/ecofind
/src/*.P
/src/*.o
/src/ecogrep
# /src/libapat/
/src/libapat/libapat.a
/src/libapat/*.P
# /src/libecoPCR/
/src/libecoPCR/*.P
# /src/libthermo/
/src/libthermo/*.P

83
.project Normal file
View File

@ -0,0 +1,83 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>ecoPCR</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.python.pydev.PyDevBuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
<triggers>clean,full,incremental,</triggers>
<arguments>
<dictionary>
<key>org.eclipse.cdt.make.core.fullBuildTarget</key>
<value>all</value>
</dictionary>
<dictionary>
<key>?name?</key>
<value></value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.enableAutoBuild</key>
<value>false</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.enableFullBuild</key>
<value>true</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.enableCleanBuild</key>
<value>true</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.cleanBuildTarget</key>
<value>clean</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.append_environment</key>
<value>true</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.contents</key>
<value>org.eclipse.cdt.make.core.activeConfigSettings</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.useDefaultBuildCmd</key>
<value>true</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.buildArguments</key>
<value></value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.buildCommand</key>
<value>make</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.autoBuildTarget</key>
<value>all</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.stopOnError</key>
<value>true</value>
</dictionary>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.cdt.core.cnature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
<nature>org.python.pydev.pythonNature</nature>
</natures>
</projectDescription>

7
.pydevproject Normal file
View File

@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<?eclipse-pydev version="1.0"?>
<pydev_project>
<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.4</pydev_property>
<pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property>
</pydev_project>

View File

@ -1,62 +0,0 @@
r -1acghtgd -2gctcagctagcta /Users/coissac/encours/ecoPCR/tools/ecodb
print put
print out
f ecoPCR
r -1GGGCAATCCTGAGCCAA -2CCATTGAGTCTCTGCACCTATC -l5 -L200 -e2 /Users/coissac/encours/ecoPCR/tools/ecodb
bt
print label1
up
print label1
print (char*)label1
r -1GGGCAATCCTGAGCCAA -2CCATTGAGTCTCTGCACCTATC -l5 -L200 -e2 /Users/coissac/encours/ecoPCR/tools/ecodb
bt
up 3
l
b 38
r
r -1GGGCAATCCTGAGCCAA -2CCATTGAGTCTCTGCACCTATC -l5 -L200 -e2 /Users/coissac/encours/ecoPCR/tools/ecodb
print label
print ranks
print *ranks
print ranks->label +1
print *ranks->label +1
print *(ranks->label +1)
print *(ranks->label +2)
b ecotax.c:147
r -1GGGCAATCCTGAGCCAA -2CCATTGAGTCTCTGCACCTATC -l5 -L200 -e2 /Users/coissac/encours/ecoPCR/tools/ecodb
print current_taxon ;
print current_taxon
n
print current_taxon ;
print current_taxon
print *current_taxon
print taxonomy->taxons[11]
print taxonomy->taxons[2952]
print taxonomy->taxons->taxon[2952]
print taxonomy->ranks->rank[11]
print taxonomy->ranks->label[11]
print taxonomy->ranks->label[3]
b ecotax.c:147
r -1GGGCAATCCTGAGCCAA -2CCATTGAGTCTCTGCACCTATC -l5 -L200 -e2 /Users/coissac/encours/ecoPCR/tools/ecodb
print current_taxon
print *current_taxon
n
print *current_taxon
b ecotax.c:147
r -1GGGCAATCCTGAGCCAA -2CCATTGAGTCTCTGCACCTATC -l5 -L200 -e2 /Users/coissac/encours/ecoPCR/tools/ecodb
n
print *current_taxon
r -1GGGCAATCCTGAGCCAA -2CCATTGAGTCTCTGCACCTATC -l5 -L200 -e2 /Users/coissac/encours/ecoPCR/tools/ecodb
n
print *current_taxon
b ecodna.c:70
r -1GGGCAATCCTGAGCC#A#A# -2CCATTGAGTCTCTGCACCTA#T#C# -l5 -L200 -e2 /Users/coissac/encours/ecoPCR/tools/ecodb
*sb
print *sb
print sb
print str
b 52
r -1GGGCAATCCTGAGCC#A#A# -2CCATTGAGTCTCTGCACCTA#T#C# -l5 -L200 -e2 /Users/coissac/encours/ecoPCR/tools/ecodb
print str
r -1GGGCAATCCTGAGCC#A#A# -2CCATTGAGTCTCTGCACCTA#T#C# -l5 -L200 -e2 /Users/coissac/encours/ecoPCR/tools/ecodb
bt

View File

@ -1,46 +0,0 @@
################################################################################
# Automatically-generated file. Do not edit!
################################################################################
-include ../makefile.init
RM := rm -rf
# All of the sources participating in the build are defined here
-include sources.mk
-include subdir.mk
-include src/libecoPCR/subdir.mk
-include src/libapat/subdir.mk
-include src/subdir.mk
-include objects.mk
ifneq ($(MAKECMDGOALS),clean)
ifneq ($(strip $(C_DEPS)),)
-include $(C_DEPS)
endif
endif
-include ../makefile.defs
# Add inputs and outputs from these tool invocations to the build variables
# All Target
all: ecoPCR
# Tool invocations
ecoPCR: $(OBJS) $(USER_OBJS)
@echo 'Building target: $@'
@echo 'Invoking: MacOS X C Linker'
gcc -o "ecoPCR" $(OBJS) $(USER_OBJS) $(LIBS)
@echo 'Finished building target: $@'
@echo ' '
# Other Targets
clean:
-$(RM) $(OBJS)$(EXECUTABLES)$(C_DEPS) ecoPCR
-@echo ' '
.PHONY: all clean dependents
.SECONDARY:
-include ../makefile.targets

View File

@ -1,7 +0,0 @@
################################################################################
# Automatically-generated file. Do not edit!
################################################################################
LIBS := -lz
USER_OBJS :=

View File

@ -1,19 +0,0 @@
################################################################################
# Automatically-generated file. Do not edit!
################################################################################
C_SRCS :=
O_SRCS :=
ASM_SRCS :=
S_SRCS :=
OBJ_SRCS :=
OBJS :=
EXECUTABLES :=
C_DEPS :=
# Every subdirectory with source files must be described here
SUBDIRS := \
src/libecoPCR \
src/libapat \
src \

View File

@ -1,24 +0,0 @@
################################################################################
# Automatically-generated file. Do not edit!
################################################################################
# Add inputs and outputs from these tool invocations to the build variables
C_SRCS += \
../src/libapat/CODES/gendual.c
OBJS += \
./src/libapat/CODES/gendual.o
C_DEPS += \
./src/libapat/CODES/gendual.d
# Each subdirectory must supply rules for building sources it contributes
src/libapat/CODES/%.o: ../src/libapat/CODES/%.c
@echo 'Building file: $<'
@echo 'Invoking: GCC C Compiler'
gcc -DMAC_OS_X -O0 -g3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<"
@echo 'Finished building: $<'
@echo ' '

View File

@ -1,30 +0,0 @@
################################################################################
# Automatically-generated file. Do not edit!
################################################################################
# Add inputs and outputs from these tool invocations to the build variables
C_SRCS += \
../src/libapat/apat_parse.c \
../src/libapat/apat_search.c \
../src/libapat/libstki.c
OBJS += \
./src/libapat/apat_parse.o \
./src/libapat/apat_search.o \
./src/libapat/libstki.o
C_DEPS += \
./src/libapat/apat_parse.d \
./src/libapat/apat_search.d \
./src/libapat/libstki.d
# Each subdirectory must supply rules for building sources it contributes
src/libapat/%.o: ../src/libapat/%.c
@echo 'Building file: $<'
@echo 'Invoking: GCC C Compiler'
gcc -DMAC_OS_X -O0 -g3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<"
@echo 'Finished building: $<'
@echo ' '

View File

@ -1,45 +0,0 @@
################################################################################
# Automatically-generated file. Do not edit!
################################################################################
# Add inputs and outputs from these tool invocations to the build variables
C_SRCS += \
../src/libecoPCR/ecoError.c \
../src/libecoPCR/ecoIOUtils.c \
../src/libecoPCR/ecoMalloc.c \
../src/libecoPCR/ecoapat.c \
../src/libecoPCR/ecodna.c \
../src/libecoPCR/ecorank.c \
../src/libecoPCR/ecoseq.c \
../src/libecoPCR/ecotax.c
OBJS += \
./src/libecoPCR/ecoError.o \
./src/libecoPCR/ecoIOUtils.o \
./src/libecoPCR/ecoMalloc.o \
./src/libecoPCR/ecoapat.o \
./src/libecoPCR/ecodna.o \
./src/libecoPCR/ecorank.o \
./src/libecoPCR/ecoseq.o \
./src/libecoPCR/ecotax.o
C_DEPS += \
./src/libecoPCR/ecoError.d \
./src/libecoPCR/ecoIOUtils.d \
./src/libecoPCR/ecoMalloc.d \
./src/libecoPCR/ecoapat.d \
./src/libecoPCR/ecodna.d \
./src/libecoPCR/ecorank.d \
./src/libecoPCR/ecoseq.d \
./src/libecoPCR/ecotax.d
# Each subdirectory must supply rules for building sources it contributes
src/libecoPCR/%.o: ../src/libecoPCR/%.c
@echo 'Building file: $<'
@echo 'Invoking: GCC C Compiler'
gcc -DMAC_OS_X -O0 -g3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<"
@echo 'Finished building: $<'
@echo ' '

View File

@ -1,24 +0,0 @@
################################################################################
# Automatically-generated file. Do not edit!
################################################################################
# Add inputs and outputs from these tool invocations to the build variables
C_SRCS += \
../src/ecopcr.c
OBJS += \
./src/ecopcr.o
C_DEPS += \
./src/ecopcr.d
# Each subdirectory must supply rules for building sources it contributes
src/%.o: ../src/%.c
@echo 'Building file: $<'
@echo 'Invoking: GCC C Compiler'
gcc -DMAC_OS_X -O0 -g3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<"
@echo 'Finished building: $<'
@echo ' '

549
LICENSE Normal file
View File

@ -0,0 +1,549 @@
CONTRAT DE LICENCE DE LOGICIEL LIBRE CeCILL
Version 2.1 du 2013-06-21
Avertissement
Ce contrat est une licence de logiciel libre issue d'une concertation
entre ses auteurs afin que le respect de deux grands principes préside à
sa rédaction:
* d'une part, le respect des principes de diffusion des logiciels
libres: accès au code source, droits étendus conférés aux utilisateurs,
* d'autre part, la désignation d'un droit applicable, le droit
français, auquel elle est conforme, tant au regard du droit de la
responsabilité civile que du droit de la propriété intellectuelle et
de la protection qu'il offre aux auteurs et titulaires des droits
patrimoniaux sur un logiciel.
Les auteurs de la licence CeCILL (Ce[a] C[nrs] I[nria] L[ogiciel] L[ibre])
sont:
Commissariat à l'énergie atomique et aux énergies alternatives - CEA,
établissement public de recherche à caractère scientifique, technique et
industriel, dont le siège est situé 25 rue Leblanc, immeuble Le Ponant
D, 75015 Paris.
Centre National de la Recherche Scientifique - CNRS, établissement
public à caractère scientifique et technologique, dont le siège est
situé 3 rue Michel-Ange, 75794 Paris cedex 16.
Institut National de Recherche en Informatique et en Automatique -
Inria, établissement public à caractère scientifique et technologique,
dont le siège est situé Domaine de Voluceau, Rocquencourt, BP 105, 78153
Le Chesnay cedex.
Préambule
Ce contrat est une licence de logiciel libre dont l'objectif est de
conférer aux utilisateurs la liberté de modification et de
redistribution du logiciel régi par cette licence dans le cadre d'un
modèle de diffusion en logiciel libre.
L'exercice de ces libertés est assorti de certains devoirs à la charge
des utilisateurs afin de préserver ce statut au cours des
redistributions ultérieures.
L'accessibilité au code source et les droits de copie, de modification
et de redistribution qui en découlent ont pour contrepartie de n'offrir
aux utilisateurs qu'une garantie limitée et de ne faire peser sur
l'auteur du logiciel, le titulaire des droits patrimoniaux et les
concédants successifs qu'une responsabilité restreinte.
A cet égard l'attention de l'utilisateur est attirée sur les risques
associés au chargement, à l'utilisation, à la modification et/ou au
développement et à la reproduction du logiciel par l'utilisateur étant
donné sa spécificité de logiciel libre, qui peut le rendre complexe à
manipuler et qui le réserve donc à des développeurs ou des
professionnels avertis possédant des connaissances informatiques
approfondies. Les utilisateurs sont donc invités à charger et tester
l'adéquation du logiciel à leurs besoins dans des conditions permettant
d'assurer la sécurité de leurs systèmes et/ou de leurs données et, plus
généralement, à l'utiliser et l'exploiter dans les mêmes conditions de
sécurité. Ce contrat peut être reproduit et diffusé librement, sous
réserve de le conserver en l'état, sans ajout ni suppression de clauses.
Ce contrat est susceptible de s'appliquer à tout logiciel dont le
titulaire des droits patrimoniaux décide de soumettre l'exploitation aux
dispositions qu'il contient.
Une liste de questions fréquemment posées se trouve sur le site web
officiel de la famille des licences CeCILL
(http://www.cecill.info/index.fr.html) pour toute clarification qui
serait nécessaire.
Article 1 - DEFINITIONS
Dans ce contrat, les termes suivants, lorsqu'ils seront écrits avec une
lettre capitale, auront la signification suivante:
Contrat: désigne le présent contrat de licence, ses éventuelles versions
postérieures et annexes.
Logiciel: désigne le logiciel sous sa forme de Code Objet et/ou de Code
Source et le cas échéant sa documentation, dans leur état au moment de
l'acceptation du Contrat par le Licencié.
Logiciel Initial: désigne le Logiciel sous sa forme de Code Source et
éventuellement de Code Objet et le cas échéant sa documentation, dans
leur état au moment de leur première diffusion sous les termes du Contrat.
Logiciel Modifié: désigne le Logiciel modifié par au moins une
Contribution.
Code Source: désigne l'ensemble des instructions et des lignes de
programme du Logiciel et auquel l'accès est nécessaire en vue de
modifier le Logiciel.
Code Objet: désigne les fichiers binaires issus de la compilation du
Code Source.
Titulaire: désigne le ou les détenteurs des droits patrimoniaux d'auteur
sur le Logiciel Initial.
Licencié: désigne le ou les utilisateurs du Logiciel ayant accepté le
Contrat.
Contributeur: désigne le Licencié auteur d'au moins une Contribution.
Concédant: désigne le Titulaire ou toute personne physique ou morale
distribuant le Logiciel sous le Contrat.
Contribution: désigne l'ensemble des modifications, corrections,
traductions, adaptations et/ou nouvelles fonctionnalités intégrées dans
le Logiciel par tout Contributeur, ainsi que tout Module Interne.
Module: désigne un ensemble de fichiers sources y compris leur
documentation qui permet de réaliser des fonctionnalités ou services
supplémentaires à ceux fournis par le Logiciel.
Module Externe: désigne tout Module, non dérivé du Logiciel, tel que ce
Module et le Logiciel s'exécutent dans des espaces d'adressage
différents, l'un appelant l'autre au moment de leur exécution.
Module Interne: désigne tout Module lié au Logiciel de telle sorte
qu'ils s'exécutent dans le même espace d'adressage.
GNU GPL: désigne la GNU General Public License dans sa version 2 ou
toute version ultérieure, telle que publiée par Free Software Foundation
Inc.
GNU Affero GPL: désigne la GNU Affero General Public License dans sa
version 3 ou toute version ultérieure, telle que publiée par Free
Software Foundation Inc.
EUPL: désigne la Licence Publique de l'Union européenne dans sa version
1.1 ou toute version ultérieure, telle que publiée par la Commission
Européenne.
Parties: désigne collectivement le Licencié et le Concédant.
Ces termes s'entendent au singulier comme au pluriel.
Article 2 - OBJET
Le Contrat a pour objet la concession par le Concédant au Licencié d'une
licence non exclusive, cessible et mondiale du Logiciel telle que
définie ci-après à l'article 5 <#etendue> pour toute la durée de
protection des droits portant sur ce Logiciel.
Article 3 - ACCEPTATION
3.1 L'acceptation par le Licencié des termes du Contrat est réputée
acquise du fait du premier des faits suivants:
* (i) le chargement du Logiciel par tout moyen notamment par
téléchargement à partir d'un serveur distant ou par chargement à
partir d'un support physique;
* (ii) le premier exercice par le Licencié de l'un quelconque des
droits concédés par le Contrat.
3.2 Un exemplaire du Contrat, contenant notamment un avertissement
relatif aux spécificités du Logiciel, à la restriction de garantie et à
la limitation à un usage par des utilisateurs expérimentés a été mis à
disposition du Licencié préalablement à son acceptation telle que
définie à l'article 3.1 <#acceptation-acquise> ci dessus et le Licencié
reconnaît en avoir pris connaissance.
Article 4 - ENTREE EN VIGUEUR ET DUREE
4.1 ENTREE EN VIGUEUR
Le Contrat entre en vigueur à la date de son acceptation par le Licencié
telle que définie en 3.1 <#acceptation-acquise>.
4.2 DUREE
Le Contrat produira ses effets pendant toute la durée légale de
protection des droits patrimoniaux portant sur le Logiciel.
Article 5 - ETENDUE DES DROITS CONCEDES
Le Concédant concède au Licencié, qui accepte, les droits suivants sur
le Logiciel pour toutes destinations et pour la durée du Contrat dans
les conditions ci-après détaillées.
Par ailleurs, si le Concédant détient ou venait à détenir un ou
plusieurs brevets d'invention protégeant tout ou partie des
fonctionnalités du Logiciel ou de ses composants, il s'engage à ne pas
opposer les éventuels droits conférés par ces brevets aux Licenciés
successifs qui utiliseraient, exploiteraient ou modifieraient le
Logiciel. En cas de cession de ces brevets, le Concédant s'engage à
faire reprendre les obligations du présent alinéa aux cessionnaires.
5.1 DROIT D'UTILISATION
Le Licencié est autorisé à utiliser le Logiciel, sans restriction quant
aux domaines d'application, étant ci-après précisé que cela comporte:
1.
la reproduction permanente ou provisoire du Logiciel en tout ou
partie par tout moyen et sous toute forme.
2.
le chargement, l'affichage, l'exécution, ou le stockage du Logiciel
sur tout support.
3.
la possibilité d'en observer, d'en étudier, ou d'en tester le
fonctionnement afin de déterminer les idées et principes qui sont à
la base de n'importe quel élément de ce Logiciel; et ceci, lorsque
le Licencié effectue toute opération de chargement, d'affichage,
d'exécution, de transmission ou de stockage du Logiciel qu'il est en
droit d'effectuer en vertu du Contrat.
5.2 DROIT D'APPORTER DES CONTRIBUTIONS
Le droit d'apporter des Contributions comporte le droit de traduire,
d'adapter, d'arranger ou d'apporter toute autre modification au Logiciel
et le droit de reproduire le logiciel en résultant.
Le Licencié est autorisé à apporter toute Contribution au Logiciel sous
réserve de mentionner, de façon explicite, son nom en tant qu'auteur de
cette Contribution et la date de création de celle-ci.
5.3 DROIT DE DISTRIBUTION
Le droit de distribution comporte notamment le droit de diffuser, de
transmettre et de communiquer le Logiciel au public sur tout support et
par tout moyen ainsi que le droit de mettre sur le marché à titre
onéreux ou gratuit, un ou des exemplaires du Logiciel par tout procédé.
Le Licencié est autorisé à distribuer des copies du Logiciel, modifié ou
non, à des tiers dans les conditions ci-après détaillées.
5.3.1 DISTRIBUTION DU LOGICIEL SANS MODIFICATION
Le Licencié est autorisé à distribuer des copies conformes du Logiciel,
sous forme de Code Source ou de Code Objet, à condition que cette
distribution respecte les dispositions du Contrat dans leur totalité et
soit accompagnée:
1.
d'un exemplaire du Contrat,
2.
d'un avertissement relatif à la restriction de garantie et de
responsabilité du Concédant telle que prévue aux articles 8
<#responsabilite> et 9 <#garantie>,
et que, dans le cas où seul le Code Objet du Logiciel est redistribué,
le Licencié permette un accès effectif au Code Source complet du
Logiciel pour une durée d'au moins 3 ans à compter de la distribution du
logiciel, étant entendu que le coût additionnel d'acquisition du Code
Source ne devra pas excéder le simple coût de transfert des données.
5.3.2 DISTRIBUTION DU LOGICIEL MODIFIE
Lorsque le Licencié apporte une Contribution au Logiciel, les conditions
de distribution du Logiciel Modifié en résultant sont alors soumises à
l'intégralité des dispositions du Contrat.
Le Licencié est autorisé à distribuer le Logiciel Modifié, sous forme de
code source ou de code objet, à condition que cette distribution
respecte les dispositions du Contrat dans leur totalité et soit
accompagnée:
1.
d'un exemplaire du Contrat,
2.
d'un avertissement relatif à la restriction de garantie et de
responsabilité du Concédant telle que prévue aux articles 8
<#responsabilite> et 9 <#garantie>,
et, dans le cas où seul le code objet du Logiciel Modifié est redistribué,
3.
d'une note précisant les conditions d'accès effectif au code source
complet du Logiciel Modifié, pendant une période d'au moins 3 ans à
compter de la distribution du Logiciel Modifié, étant entendu que le
coût additionnel d'acquisition du code source ne devra pas excéder
le simple coût de transfert des données.
5.3.3 DISTRIBUTION DES MODULES EXTERNES
Lorsque le Licencié a développé un Module Externe les conditions du
Contrat ne s'appliquent pas à ce Module Externe, qui peut être distribué
sous un contrat de licence différent.
5.3.4 COMPATIBILITE AVEC D'AUTRES LICENCES
Le Licencié peut inclure un code soumis aux dispositions d'une des
versions de la licence GNU GPL, GNU Affero GPL et/ou EUPL dans le
Logiciel modifié ou non et distribuer l'ensemble sous les conditions de
la même version de la licence GNU GPL, GNU Affero GPL et/ou EUPL.
Le Licencié peut inclure le Logiciel modifié ou non dans un code soumis
aux dispositions d'une des versions de la licence GNU GPL, GNU Affero
GPL et/ou EUPL et distribuer l'ensemble sous les conditions de la même
version de la licence GNU GPL, GNU Affero GPL et/ou EUPL.
Article 6 - PROPRIETE INTELLECTUELLE
6.1 SUR LE LOGICIEL INITIAL
Le Titulaire est détenteur des droits patrimoniaux sur le Logiciel
Initial. Toute utilisation du Logiciel Initial est soumise au respect
des conditions dans lesquelles le Titulaire a choisi de diffuser son
oeuvre et nul autre n'a la faculté de modifier les conditions de
diffusion de ce Logiciel Initial.
Le Titulaire s'engage à ce que le Logiciel Initial reste au moins régi
par le Contrat et ce, pour la durée visée à l'article 4.2 <#duree>.
6.2 SUR LES CONTRIBUTIONS
Le Licencié qui a développé une Contribution est titulaire sur celle-ci
des droits de propriété intellectuelle dans les conditions définies par
la législation applicable.
6.3 SUR LES MODULES EXTERNES
Le Licencié qui a développé un Module Externe est titulaire sur celui-ci
des droits de propriété intellectuelle dans les conditions définies par
la législation applicable et reste libre du choix du contrat régissant
sa diffusion.
6.4 DISPOSITIONS COMMUNES
Le Licencié s'engage expressément:
1.
à ne pas supprimer ou modifier de quelque manière que ce soit les
mentions de propriété intellectuelle apposées sur le Logiciel;
2.
à reproduire à l'identique lesdites mentions de propriété
intellectuelle sur les copies du Logiciel modifié ou non.
Le Licencié s'engage à ne pas porter atteinte, directement ou
indirectement, aux droits de propriété intellectuelle du Titulaire et/ou
des Contributeurs sur le Logiciel et à prendre, le cas échéant, à
l'égard de son personnel toutes les mesures nécessaires pour assurer le
respect des dits droits de propriété intellectuelle du Titulaire et/ou
des Contributeurs.
Article 7 - SERVICES ASSOCIES
7.1 Le Contrat n'oblige en aucun cas le Concédant à la réalisation de
prestations d'assistance technique ou de maintenance du Logiciel.
Cependant le Concédant reste libre de proposer ce type de services. Les
termes et conditions d'une telle assistance technique et/ou d'une telle
maintenance seront alors déterminés dans un acte séparé. Ces actes de
maintenance et/ou assistance technique n'engageront que la seule
responsabilité du Concédant qui les propose.
7.2 De même, tout Concédant est libre de proposer, sous sa seule
responsabilité, à ses licenciés une garantie, qui n'engagera que lui,
lors de la redistribution du Logiciel et/ou du Logiciel Modifié et ce,
dans les conditions qu'il souhaite. Cette garantie et les modalités
financières de son application feront l'objet d'un acte séparé entre le
Concédant et le Licencié.
Article 8 - RESPONSABILITE
8.1 Sous réserve des dispositions de l'article 8.2
<#limite-responsabilite>, le Licencié a la faculté, sous réserve de
prouver la faute du Concédant concerné, de solliciter la réparation du
préjudice direct qu'il subirait du fait du Logiciel et dont il apportera
la preuve.
8.2 La responsabilité du Concédant est limitée aux engagements pris en
application du Contrat et ne saurait être engagée en raison notamment:
(i) des dommages dus à l'inexécution, totale ou partielle, de ses
obligations par le Licencié, (ii) des dommages directs ou indirects
découlant de l'utilisation ou des performances du Logiciel subis par le
Licencié et (iii) plus généralement d'un quelconque dommage indirect. En
particulier, les Parties conviennent expressément que tout préjudice
financier ou commercial (par exemple perte de données, perte de
bénéfices, perte d'exploitation, perte de clientèle ou de commandes,
manque à gagner, trouble commercial quelconque) ou toute action dirigée
contre le Licencié par un tiers, constitue un dommage indirect et
n'ouvre pas droit à réparation par le Concédant.
Article 9 - GARANTIE
9.1 Le Licencié reconnaît que l'état actuel des connaissances
scientifiques et techniques au moment de la mise en circulation du
Logiciel ne permet pas d'en tester et d'en vérifier toutes les
utilisations ni de détecter l'existence d'éventuels défauts. L'attention
du Licencié a été attirée sur ce point sur les risques associés au
chargement, à l'utilisation, la modification et/ou au développement et à
la reproduction du Logiciel qui sont réservés à des utilisateurs avertis.
Il relève de la responsabilité du Licencié de contrôler, par tous
moyens, l'adéquation du produit à ses besoins, son bon fonctionnement et
de s'assurer qu'il ne causera pas de dommages aux personnes et aux biens.
9.2 Le Concédant déclare de bonne foi être en droit de concéder
l'ensemble des droits attachés au Logiciel (comprenant notamment les
droits visés à l'article 5 <#etendue>).
9.3 Le Licencié reconnaît que le Logiciel est fourni "en l'état" par le
Concédant sans autre garantie, expresse ou tacite, que celle prévue à
l'article 9.2 <#bonne-foi> et notamment sans aucune garantie sur sa
valeur commerciale, son caractère sécurisé, innovant ou pertinent.
En particulier, le Concédant ne garantit pas que le Logiciel est exempt
d'erreur, qu'il fonctionnera sans interruption, qu'il sera compatible
avec l'équipement du Licencié et sa configuration logicielle ni qu'il
remplira les besoins du Licencié.
9.4 Le Concédant ne garantit pas, de manière expresse ou tacite, que le
Logiciel ne porte pas atteinte à un quelconque droit de propriété
intellectuelle d'un tiers portant sur un brevet, un logiciel ou sur tout
autre droit de propriété. Ainsi, le Concédant exclut toute garantie au
profit du Licencié contre les actions en contrefaçon qui pourraient être
diligentées au titre de l'utilisation, de la modification, et de la
redistribution du Logiciel. Néanmoins, si de telles actions sont
exercées contre le Licencié, le Concédant lui apportera son expertise
technique et juridique pour sa défense. Cette expertise technique et
juridique est déterminée au cas par cas entre le Concédant concerné et
le Licencié dans le cadre d'un protocole d'accord. Le Concédant dégage
toute responsabilité quant à l'utilisation de la dénomination du
Logiciel par le Licencié. Aucune garantie n'est apportée quant à
l'existence de droits antérieurs sur le nom du Logiciel et sur
l'existence d'une marque.
Article 10 - RESILIATION
10.1 En cas de manquement par le Licencié aux obligations mises à sa
charge par le Contrat, le Concédant pourra résilier de plein droit le
Contrat trente (30) jours après notification adressée au Licencié et
restée sans effet.
10.2 Le Licencié dont le Contrat est résilié n'est plus autorisé à
utiliser, modifier ou distribuer le Logiciel. Cependant, toutes les
licences qu'il aura concédées antérieurement à la résiliation du Contrat
resteront valides sous réserve qu'elles aient été effectuées en
conformité avec le Contrat.
Article 11 - DISPOSITIONS DIVERSES
11.1 CAUSE EXTERIEURE
Aucune des Parties ne sera responsable d'un retard ou d'une défaillance
d'exécution du Contrat qui serait dû à un cas de force majeure, un cas
fortuit ou une cause extérieure, telle que, notamment, le mauvais
fonctionnement ou les interruptions du réseau électrique ou de
télécommunication, la paralysie du réseau liée à une attaque
informatique, l'intervention des autorités gouvernementales, les
catastrophes naturelles, les dégâts des eaux, les tremblements de terre,
le feu, les explosions, les grèves et les conflits sociaux, l'état de
guerre...
11.2 Le fait, par l'une ou l'autre des Parties, d'omettre en une ou
plusieurs occasions de se prévaloir d'une ou plusieurs dispositions du
Contrat, ne pourra en aucun cas impliquer renonciation par la Partie
intéressée à s'en prévaloir ultérieurement.
11.3 Le Contrat annule et remplace toute convention antérieure, écrite
ou orale, entre les Parties sur le même objet et constitue l'accord
entier entre les Parties sur cet objet. Aucune addition ou modification
aux termes du Contrat n'aura d'effet à l'égard des Parties à moins
d'être faite par écrit et signée par leurs représentants dûment habilités.
11.4 Dans l'hypothèse où une ou plusieurs des dispositions du Contrat
s'avèrerait contraire à une loi ou à un texte applicable, existants ou
futurs, cette loi ou ce texte prévaudrait, et les Parties feraient les
amendements nécessaires pour se conformer à cette loi ou à ce texte.
Toutes les autres dispositions resteront en vigueur. De même, la
nullité, pour quelque raison que ce soit, d'une des dispositions du
Contrat ne saurait entraîner la nullité de l'ensemble du Contrat.
11.5 LANGUE
Le Contrat est rédigé en langue française et en langue anglaise, ces
deux versions faisant également foi.
Article 12 - NOUVELLES VERSIONS DU CONTRAT
12.1 Toute personne est autorisée à copier et distribuer des copies de
ce Contrat.
12.2 Afin d'en préserver la cohérence, le texte du Contrat est protégé
et ne peut être modifié que par les auteurs de la licence, lesquels se
réservent le droit de publier périodiquement des mises à jour ou de
nouvelles versions du Contrat, qui posséderont chacune un numéro
distinct. Ces versions ultérieures seront susceptibles de prendre en
compte de nouvelles problématiques rencontrées par les logiciels libres.
12.3 Tout Logiciel diffusé sous une version donnée du Contrat ne pourra
faire l'objet d'une diffusion ultérieure que sous la même version du
Contrat ou une version postérieure, sous réserve des dispositions de
l'article 5.3.4 <#compatibilite>.
Article 13 - LOI APPLICABLE ET COMPETENCE TERRITORIALE
13.1 Le Contrat est régi par la loi française. Les Parties conviennent
de tenter de régler à l'amiable les différends ou litiges qui
viendraient à se produire par suite ou à l'occasion du Contrat.
13.2 A défaut d'accord amiable dans un délai de deux (2) mois à compter
de leur survenance et sauf situation relevant d'une procédure d'urgence,
les différends ou litiges seront portés par la Partie la plus diligente
devant les Tribunaux compétents de Paris.

View File

@ -1,46 +0,0 @@
################################################################################
# Automatically-generated file. Do not edit!
################################################################################
-include ../makefile.init
RM := rm -rf
# All of the sources participating in the build are defined here
-include sources.mk
-include subdir.mk
-include src/libecoPCR/subdir.mk
-include src/libapat/subdir.mk
-include src/subdir.mk
-include objects.mk
ifneq ($(MAKECMDGOALS),clean)
ifneq ($(strip $(C_DEPS)),)
-include $(C_DEPS)
endif
endif
-include ../makefile.defs
# Add inputs and outputs from these tool invocations to the build variables
# All Target
all: ecoPCR
# Tool invocations
ecoPCR: $(OBJS) $(USER_OBJS)
@echo 'Building target: $@'
@echo 'Invoking: MacOS X C Linker'
gcc -o "ecoPCR" $(OBJS) $(USER_OBJS) $(LIBS)
@echo 'Finished building target: $@'
@echo ' '
# Other Targets
clean:
-$(RM) $(OBJS)$(EXECUTABLES)$(C_DEPS) ecoPCR
-@echo ' '
.PHONY: all clean dependents
.SECONDARY:
-include ../makefile.targets

View File

@ -1,7 +0,0 @@
################################################################################
# Automatically-generated file. Do not edit!
################################################################################
LIBS := -lz
USER_OBJS :=

View File

@ -1,19 +0,0 @@
################################################################################
# Automatically-generated file. Do not edit!
################################################################################
C_SRCS :=
O_SRCS :=
ASM_SRCS :=
S_SRCS :=
OBJ_SRCS :=
OBJS :=
EXECUTABLES :=
C_DEPS :=
# Every subdirectory with source files must be described here
SUBDIRS := \
src/libecoPCR \
src/libapat \
src \

View File

@ -1,30 +0,0 @@
################################################################################
# Automatically-generated file. Do not edit!
################################################################################
# Add inputs and outputs from these tool invocations to the build variables
C_SRCS += \
../src/libapat/apat_parse.c \
../src/libapat/apat_search.c \
../src/libapat/libstki.c
OBJS += \
./src/libapat/apat_parse.o \
./src/libapat/apat_search.o \
./src/libapat/libstki.o
C_DEPS += \
./src/libapat/apat_parse.d \
./src/libapat/apat_search.d \
./src/libapat/libstki.d
# Each subdirectory must supply rules for building sources it contributes
src/libapat/%.o: ../src/libapat/%.c
@echo 'Building file: $<'
@echo 'Invoking: GCC C Compiler'
gcc -DMAC_OS_X -O3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<"
@echo 'Finished building: $<'
@echo ' '

View File

@ -1,45 +0,0 @@
################################################################################
# Automatically-generated file. Do not edit!
################################################################################
# Add inputs and outputs from these tool invocations to the build variables
C_SRCS += \
../src/libecoPCR/ecoError.c \
../src/libecoPCR/ecoIOUtils.c \
../src/libecoPCR/ecoMalloc.c \
../src/libecoPCR/ecoapat.c \
../src/libecoPCR/ecodna.c \
../src/libecoPCR/ecorank.c \
../src/libecoPCR/ecoseq.c \
../src/libecoPCR/ecotax.c
OBJS += \
./src/libecoPCR/ecoError.o \
./src/libecoPCR/ecoIOUtils.o \
./src/libecoPCR/ecoMalloc.o \
./src/libecoPCR/ecoapat.o \
./src/libecoPCR/ecodna.o \
./src/libecoPCR/ecorank.o \
./src/libecoPCR/ecoseq.o \
./src/libecoPCR/ecotax.o
C_DEPS += \
./src/libecoPCR/ecoError.d \
./src/libecoPCR/ecoIOUtils.d \
./src/libecoPCR/ecoMalloc.d \
./src/libecoPCR/ecoapat.d \
./src/libecoPCR/ecodna.d \
./src/libecoPCR/ecorank.d \
./src/libecoPCR/ecoseq.d \
./src/libecoPCR/ecotax.d
# Each subdirectory must supply rules for building sources it contributes
src/libecoPCR/%.o: ../src/libecoPCR/%.c
@echo 'Building file: $<'
@echo 'Invoking: GCC C Compiler'
gcc -DMAC_OS_X -O3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<"
@echo 'Finished building: $<'
@echo ' '

View File

@ -1,24 +0,0 @@
################################################################################
# Automatically-generated file. Do not edit!
################################################################################
# Add inputs and outputs from these tool invocations to the build variables
C_SRCS += \
../src/ecopcr.c
OBJS += \
./src/ecopcr.o
C_DEPS += \
./src/ecopcr.d
# Each subdirectory must supply rules for building sources it contributes
src/%.o: ../src/%.c
@echo 'Building file: $<'
@echo 'Invoking: GCC C Compiler'
gcc -DMAC_OS_X -O3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<"
@echo 'Finished building: $<'
@echo ' '

1
VERSION Normal file
View File

@ -0,0 +1 @@
1.0.0

103
src/Makefile Normal file
View File

@ -0,0 +1,103 @@
EXEC=ecoPCR ecofind ecogrep
PCR_SRC= ecopcr.c
PCR_OBJ= $(patsubst %.c,%.o,$(PCR_SRC))
FIND_SRC= ecofind.c
FIND_OBJ= $(patsubst %.c,%.o,$(FIND_SRC))
GREP_SRC= ecogrep.c
GREP_OBJ= $(patsubst %.c,%.o,$(GREP_SRC))
IUT_SRC= ecoisundertaxon.c
IUT_OBJ= $(patsubst %.c,%.o,$(IUT_SRC))
SRCS= $(PCR_SRC) $(FIND_SRC) $(IUT_SRC)
LIB= -lecoPCR -lthermo -lapat -lz -lm
LIBFILE= libapat/libapat.a \
libecoPCR/libecoPCR.a \
libthermo/libthermo.a
include global.mk
all: $(EXEC)
########
#
# ecoPCR compilation
#
########
# executable compilation and link
ecoPCR: $(PCR_OBJ) $(LIBFILE)
$(CC) $(LDFLAGS) -o $@ $< $(LIBPATH) $(LIB)
########
#
# ecofind compilation
#
########
# executable compilation and link
ecofind: $(FIND_OBJ) $(LIBFILE)
$(CC) $(LDFLAGS) -o $@ $< $(LIBPATH) $(LIB)
########
#
# ecogrep compilation
#
########
# executable compilation and link
ecogrep: $(GREP_OBJ) $(LIBFILE)
$(CC) $(LDFLAGS) -o $@ $< $(LIBPATH) $(LIB)
########
#
# IsUnderTaxon compilation
#
########
# executable compilation and link
ecoisundertaxon: $(IUT_OBJ) $(LIBFILE)
$(CC) $(LDFLAGS) -o $@ $< $(LIBPATH) $(LIB)
########
#
# library compilation
#
########
libapat/libapat.a:
$(MAKE) -C libapat
libecoPCR/libecoPCR.a:
$(MAKE) -C libecoPCR
libthermo/libthermo.a:
$(MAKE) -C libthermo
########
#
# project management
#
########
clean:
rm -f *.o
rm -f $(EXEC)
$(MAKE) -C libapat clean
$(MAKE) -C libecoPCR clean
$(MAKE) -C libthermo clean

BIN
src/ecoPCR.gz Executable file

Binary file not shown.

373
src/ecofind.c Normal file
View File

@ -0,0 +1,373 @@
#include "libecoPCR/ecoPCR.h"
#include <regex.h>
#include <string.h>
#include <stdlib.h>
#include <getopt.h>
#include <stdio.h>
#define VERSION "0.1"
/**
* display the result
**/
void displayPath(ecotx_t *taxon, ecotaxonomy_t *taxonomy){
if (taxon != taxon->parent){
displayPath(taxon->parent,taxonomy);
printf(";");
}
if (rank_index("no rank",taxonomy->ranks) != taxon->rank)
printf("%s:", taxonomy->ranks->label[taxon->rank]);
printf("%s", taxon->name);
}
static void printresult(ecotx_t *taxon,econame_t* name,ecotaxonomy_t *taxonomy, int32_t pathDisplay)
{
char* rankname;
char* classname;
char* matchedname=taxon->name;
classname="scientific name";
if (name)
{
classname=name->classname;
matchedname=name->name;
}
rankname= taxonomy->ranks->label[taxon->rank];
printf("%10d \t| %15s \t|\t %-50s \t|\t %15s \t|\t %s",
taxon->taxid,
rankname,
matchedname,
classname,
taxon->name);
if (pathDisplay) {
printf("\t|\t");
displayPath(taxon, taxonomy);
}
printf("\n");
}
/**
* display header before printing any result
**/
static void printheader(int32_t pathDisplay)
{
printf("# %12s \t| %15s \t|\t %-50s \t|\t %-15s \t|\t %s%s\n#\n",
"taxonomy id",
"taxonomy rank",
"name",
"class name",
"scientific name",
pathDisplay ? "\t|\t path":"");
}
/**
* display son's list for given taxon
**/
static void get_son(ecotaxonomy_t *taxonomy, ecotx_t *taxon, int32_t *count, char *rankname, int32_t pathDisplay)
{
int32_t i;
ecotx_t *current_taxon;
for ( i = 0, current_taxon = taxonomy->taxons->taxon;
i < taxonomy->taxons->count;
i++, current_taxon++)
{
if (taxon != current_taxon && taxon->taxid == current_taxon->parent->taxid)
{
if (rankname == NULL || !strcmp(rankname,taxonomy->ranks->label[current_taxon->rank]))
{
printresult(current_taxon, NULL, taxonomy, pathDisplay);
(*count)++;
}
get_son(taxonomy,current_taxon,count,rankname, pathDisplay);
}
}
}
/**
* display list of rank filter option (-l option)
**/
static void listfilteroptions(ecorankidx_t *ranks)
{
int32_t i;
printf("#\n");
for ( i=0;
i < ranks->count;
i++)
{
printf("# %s \n",ranks->label[i]);
}
printf("#\n");
}
/* ---------------------------------------- */
/* get back on given taxid taxonomic parent */
/* and display it */
/* ---------------------------------------- */
void gettaxidparents(int32_t taxid, ecotaxonomy_t *taxonomy, char *rankname, int32_t pathDisplay)
{
ecotx_t *next_parent;
int32_t c = 0;
next_parent = eco_findtaxonbytaxid(taxonomy, taxid);
printheader(pathDisplay);
printresult(next_parent, NULL,taxonomy, pathDisplay);
while ( strcmp(next_parent->name, "root") )
{
next_parent = next_parent->parent;
if (rankname == NULL || !strcmp(rankname,taxonomy->ranks->label[next_parent->rank]))
{
printresult(next_parent, NULL,taxonomy, pathDisplay);
c++;
}
}
printf("# %d parent(s) found\n#\n",c);
}
/**
* printout usage and exit
**/
#define PP fprintf(stderr,
static void ExitUsage(stat)
int stat;
{
PP "usage: ecofind [-d database] [-h] [-l] [-P] [-r taxonomic rank] [-p taxid] [-s taxid] <taxon name pattern> ... \n");
PP "type \"ecofind -h\" for help\n");
if (stat)
exit(stat);
}
#undef PP
/**
* printout help
**/
#define PP fprintf(stdout,
static void PrintHelp()
{
PP "------------------------------------------\n");
PP " ecofind Version %s\n", VERSION);
PP "------------------------------------------\n");
PP "synopsis : searching for taxonomic and rank and\n");
PP " taxonomy id for given regular expression patterns\n\n");
PP "usage: ecofind [options] <patterns>\n");
PP "------------------------------------------\n");
PP "options:\n");
PP "-a : [A]ll enable the search on all alternative names and not only scientific names.\n\n");
PP "-d : [D]atabase containing the taxonomy.\n");
PP " To match the expected format, the database\n");
PP " has to be formated first by the ecoPCRFormat.py\n");
PP " program located in the tools directory.\n");
PP " Write the database radical without any extension.\n\n");
PP "-h : [H]elp - print <this> help\n\n");
PP "-l : [L]ist all taxonomic rank available for -r option\n\n");
PP "-P : [P]ath : add a column containing the full path for each displayed taxon\n\n");
PP "-p : [P]arents : specifiying this option displays all parental tree's information for the given taxid.\n\n");
PP "-r : [R]estrict to given taxonomic rank\n\n");
PP "-s : [S]ons: specifiying this option displays all subtree's information for the given taxid.\n\n");
PP "-P : Display taxonomic [P]ath as suplementary column in output\n\n");
PP "arguments:\n");
PP "<taxon> name pattern bearing regular expressions\n\n");
PP "------------------------------------------\n");
PP " http://www.grenoble.prabi.fr/trac/ecoPCR/\n");
PP "------------------------------------------\n\n");
}
/* ----------------------------------------------- */
#define PATTERN_NUMBER 10
#define PATTERN_LENGHT 40
#define RESULT_LENGTH 100
int main(int argc, char **argv)
{
int32_t carg = 0;
int32_t nummatch = 0;
int32_t k,j = 0;
int32_t errflag = 0;
int32_t tax_count = 0;
int32_t alternative = 0;
char *prefix = NULL;
ecotaxonomy_t *taxonomy;
econame_t *name;
int32_t name_count;
int re_error;
int re_match;
regex_t re_preg;
int32_t uptree = 0;
int32_t subtree = 0;
char *rankname = NULL;
int32_t rankfilter = 1;
int32_t list = 0;
int32_t path = 0;
ecotx_t *subtree_parent;
int32_t count_son = 0;
while ((carg = getopt(argc, argv, "had:p:s:r:lP")) != -1) {
switch (carg) {
case 's': /* path to the database */
sscanf(optarg,"%d",&subtree);
break;
case 'r': /* rank filter */
rankname = ECOMALLOC(strlen(optarg)+1,"allocation rankname");
strcpy(rankname,optarg);
rankfilter = 0;
break;
case 'd': /* path to the database */
prefix = ECOMALLOC(strlen(optarg)+1,"allocation prefix");
strcpy(prefix,optarg);
break;
case 'l': /* list rank filter options */
list = 1;
break;
case 'P': /* Path output option */
path=1;
break;
case 'a': /* allow alternative names */
alternative = 1;
break;
case 'h': /* display help */
PrintHelp();
exit(0);
break;
case 'p': /* taxid */
sscanf(optarg,"%d",&uptree);
break;
case '?': /* bad option */
errflag++;
}
}
if ((argc - optind) < 1)
errflag++;
if (prefix == NULL)
{
prefix = getenv("ECOPCRDB");
if (prefix == NULL)
errflag++;
}
if (errflag && !uptree && !rankname && !subtree && !list)
ExitUsage(errflag);
/**
* load taxonomy using libecoPCR functions
**/
printf("# \n# opening %s database\n",prefix);
taxonomy = read_taxonomy(prefix,1);
tax_count = taxonomy->taxons->count;
name_count = taxonomy->names->count;
/* ---------------------------------------- */
/* list -r option possibility */
/* ---------------------------------------- */
if (list)
{
listfilteroptions(taxonomy->ranks);
return 0;
}
/* ---------------------------------------- */
/* display taxid parent if -t option */
/* specified in command line */
/* ---------------------------------------- */
if (uptree)
{
gettaxidparents(uptree,taxonomy,rankname, path);
return 0;
}
/* ---------------------------------------- */
/* display taxid sons if -s option */
/* specified in command line */
/* ---------------------------------------- */
if (subtree)
{
printheader(path);
subtree_parent = eco_findtaxonbytaxid(taxonomy,subtree);
printresult(subtree_parent, NULL,taxonomy, path);
get_son(taxonomy, subtree_parent,&count_son,rankname, path);
printf("# %d son(s) found\n#\n",count_son);
return 0;
}
printf("# %d taxons\n", tax_count);
/**
* parse taxonomy
**/
for (k=optind;k<argc;k++)
{
printf("#\n# searching for '%s' pattern\n",argv[k]);
re_error = regcomp (&re_preg, argv[k], REG_NOSUB | REG_EXTENDED | REG_ICASE);
if (re_error)
{
fprintf(stderr,"# misformed pattern '%s'\n",argv[k]);
exit(1);
}
nummatch=0;
printheader(path);
for (j=0,name=taxonomy->names->names;
j < name_count;
name++,j++)
{
if(rankname)
rankfilter = !(strcmp(rankname,taxonomy->ranks->label[name->taxon->rank]));
re_match = regexec (&re_preg, name->name, 0, NULL, 0);
if (!re_match && (alternative || name->is_scientificname) && rankfilter)
{
printresult(name->taxon,name,taxonomy, path);
nummatch++;
}
}
printf("# %d records found \n",nummatch);
regfree(&re_preg);
}
return 0;
}

BIN
src/ecofind.gz Executable file

Binary file not shown.

403
src/ecogrep.c Normal file
View File

@ -0,0 +1,403 @@
#include "libecoPCR/ecoPCR.h"
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <getopt.h>
#include <stdlib.h>
#include <sys/stat.h>
#define VERSION "0.1"
void getLineContent(char *stream, ecoseq_t *seq, ecoseq_t *oligoseq_1, ecoseq_t *oligoseq_2){
int i;
char *buffer;
for( i=0, buffer = strtok(stream,"|");
buffer != NULL;
i++, buffer = strtok(NULL,"|"))
{
switch (i) {
case 0:
seq->AC = strdup(buffer);
break;
case 2:
sscanf(buffer,"%d",&seq->taxid);
break;
case 13:
oligoseq_1->SQ = strdup(buffer);
oligoseq_1->SQ_length = strlen(buffer);
break;
case 16:
oligoseq_2->SQ = strdup(buffer);
oligoseq_2->SQ_length = strlen(buffer);
break;
case 20:
seq->SQ = strdup(buffer);
seq->SQ_length = strlen(buffer);
break;
default:
break;
}
}
}
void freememory(char **tab, int32_t num){
int32_t i;
for (i=0;i<num-1;i++){
ECOFREE(tab[i],"Error in freememory function");
}
}
/**
* Check if pattern match a string using mamberall libapat function
* @param line array containing sequence information
* @param pattern array containing patterns to test on the sequence
* @param numpattern number of pattern in pattern array
* @param error_max error rate allowed by the user
*
* @return int 1 if a pattern match, else 0
**/
int ispatternmatching(ecoseq_t *seq, PatternPtr pattern){
if (pattern != NULL)
{
SeqPtr apatseq = NULL;
apatseq=ecoseq2apatseq(seq,apatseq,0);
return ManberAll(apatseq,pattern,0,0,apatseq->seqlen) > 0;
}
else return 0;
}
/* ----------------------------------------------- */
/* printout help */
/* ----------------------------------------------- */
#define PP fprintf(stdout,
static void PrintHelp()
{
PP "\n------------------------------------------\n");
PP " ecogrep Version %s\n", VERSION);
PP "------------------------------------------\n");
PP " synopsis : filtering ecoPCR result based on\n");
PP " taxonomic id filter and regular expression pattern\n");
PP " usage: ecogrep [options] filename\n");
PP "------------------------------------------\n");
PP " options:\n");
PP " -1 : [FIRST] : compare the given pattern with direct strand oligonucleotide\n\n");
PP " -2 : [SECOND] : compare the given pattern with reverse strand oligonucleotide\n\n");
PP " -d : [D]atabase containing taxonomic information\n\n");
PP " -e : [E]rrors : max error allowed in pattern match (option-1, -2 and -p) (0 by default)\n\n");
PP " -p : [P]attern : oligonucleotide pattern\n\n");
PP " -h : [H]elp : print <this> help\n\n");
PP " -i : [I]gnore subtree under given taxonomic id\n\n");
PP " -r : [R]estrict search to subtree under given taxomic id\n\n");
PP " -v : in[V]ert the sense of matching, to select non-matching lines.\n\n");
PP " argument:\n");
PP " ecoPCR ouput file name\n");
PP "------------------------------------------\n\n");
PP " http://www.grenoble.prabi.fr/trac/ecoPCR/\n");
PP "------------------------------------------\n\n");
}
#undef PP
/* ----------------------------------------------- */
/* printout usage and exit */
/* ----------------------------------------------- */
#define PP fprintf(stderr,
static void ExitUsage(stat)
int stat;
{
PP "usage: ecogrep [-d database] [-p pattern] [-i taxid] [-r taxid] [-v] [-h] <file name>\n");
PP "type \"ecogrep -h\" for help\n");
if (stat)
exit(stat);
}
#undef PP
/* ----------------------------------------------- */
/* MAIN */
/* ----------------------------------------------- */
#define LINE_BUFF_SIZE 10000
int main(int argc, char **argv){
int32_t carg = 0;
int32_t r = 0; // number of restricted taxid
int32_t i = 0; // number of ignored taxid
int32_t v = 0; // stores if -v mode is active
int32_t k = 0; // file counter
int32_t errflag = 0;
int32_t error_max = 0; // stores the error rate allowed by the user
int32_t matchingresult = 0; // stores number of matching result
ecotaxonomy_t *taxonomy; // stores the taxonomy
ecoseq_t *seq = NULL; // stores sequence info
ecoseq_t *oligoseq_1 = NULL; // stores the oligo_1 info
ecoseq_t *oligoseq_2 = NULL; // stores the oligo_2 info
char *database = NULL; // stores the database path (for taxonomy)
char *p = NULL; // pattern for sequence
PatternPtr pattern = NULL; // stores the build pattern for sequence
char *o1 = NULL; // pattern for direct strand oligo
PatternPtr oligo_1 = NULL; // stores the build pattern for direct strand oligo
char *o2 = NULL; // pattern for reverse strand oligo
PatternPtr oligo_2 = NULL; // stores the build pattern for reverse strand oligo
int32_t *restricted_taxid = NULL; // stores the restricted taxid
int32_t *ignored_taxid = NULL; // stores the ignored taxid
FILE *file = NULL; // stores the data stream, stdin by default
char *stream = ECOMALLOC(sizeof(char *)*LINE_BUFF_SIZE,"error stream buffer allocation");
char *orig = ECOMALLOC(sizeof(char *)*LINE_BUFF_SIZE,"error orig buffer allocation");
int is_ignored = 0;
int is_included = 0;
int is_matching = 0;
int match_o1 = 0;
int match_o2 = 0;
int good = 0;
seq = new_ecoseq();
oligoseq_1 = new_ecoseq();
oligoseq_2 = new_ecoseq();
/**
* Parse commande line options
**/
while ((carg = getopt(argc, argv, "1:2:p:d:i:r:e:vh")) != -1) {
switch (carg) {
case '1':
o1 = ECOMALLOC(strlen(optarg)+1,
"Error on o1 allocation");
strcpy(o1,optarg);
break;
case '2':
o2 = ECOMALLOC(strlen(optarg)+1,
"Error on o2 allocation");
strcpy(o2,optarg);
break;
case 'd':
database = ECOMALLOC(strlen(optarg)+1,
"Error on datafile allocation");
strcpy(database,optarg);
break;
case 'i':
ignored_taxid = ECOREALLOC( ignored_taxid,
sizeof(int32_t)*(i+1),
"Error on ignored_taxid reallocation");
sscanf(optarg,"%d",&ignored_taxid[i]);
i++;
break;
case 'r':
restricted_taxid = ECOREALLOC( restricted_taxid,
sizeof(int32_t)*(r+1),
"Error on restricted_taxid reallocation");
sscanf(optarg,"%d",&restricted_taxid[r]);
r++;
break;
case 'v':
v = 1;
break;
case 'h':
PrintHelp();
exit(0);
break;
case 'e':
sscanf(optarg,"%d",&error_max);
break;
case 'p':
p = ECOMALLOC(strlen(optarg)+1,
"Error on pattern allocation");
strcpy(p,optarg);
break;
case '?':
errflag++;
}
}
/**
* Check sequence pattern length and build it in PatternPtr format
**/
if(p)
{
if (strlen(p) > 32){
printf("# Sorry, ecogrep doesn't handle pattern longer than 32 characters.\
\n# Please check it out : %s\n",p);
exit(EXIT_FAILURE);
}
else if ( (pattern = buildPattern(p,error_max)) == NULL)
exit(EXIT_FAILURE);
}
/**
* Check o1 pattern length and build it in PatternPtr format
**/
if(o1)
{
if (strlen(o1) > 32){
printf("# Sorry, ecogrep doesn't handle pattern longer than 32 characters.\
\n# Please check it out : %s\n",o1);
exit(EXIT_FAILURE);
}
else if ( (oligo_1 = buildPattern(o1,error_max)) == NULL)
exit(EXIT_FAILURE);
}
/**
* Check o2 pattern length and build it in PatternPtr format
**/
if(o2)
{
if (strlen(o2) > 32){
printf("# Sorry, ecogrep doesn't handle pattern longer than 32 characters.\
\n# Please check it out : %s\n",o2);
exit(EXIT_FAILURE);
}
else if ( (oligo_2 = buildPattern(o2,error_max)) == NULL)
exit(EXIT_FAILURE);
}
/**
* try to get the database name from environment variable
* if no database name specified in the -d option
**/
if (database == NULL)
{
database = getenv("ECOPCRDB");
if (database == NULL)
errflag++;
}
/**
* check at leat one processing is asked
* either patterns or taxid filters
**/
if ( !p && !o1 && !o2 && restricted_taxid == NULL && ignored_taxid == NULL )
{
errflag++;
}
if (errflag)
ExitUsage(errflag);
/**
* Get the taxonomy back
**/
taxonomy = read_taxonomy(database,0);
/**
* Parse the stream
**/
for (k=0 ; argc >= optind ; optind++, k++){
matchingresult = 0;
if ( (file = fopen(argv[optind], "r")) == NULL)
{
if (isatty(fileno(stdin)) == 0)
{
file = stdin;
printf("# Processing standard input...\n");
}
else
break;
}
else
printf("# Processing %s...\n",argv[optind]);
while( fgets(stream, LINE_BUFF_SIZE, file) != NULL ){
if (stream[0]!= '#')
{
stream[LINE_BUFF_SIZE-1]=0;
strcpy(orig,stream);
getLineContent(stream,seq,oligoseq_1,oligoseq_2);
/* -----------------------------------------------*/
/* is ignored if at least one option -i */
/* AND */
/* if current sequence is son of taxid */
/* -----------------------------------------------*/
is_ignored = ( (i > 0) && (eco_is_taxid_included( taxonomy,
ignored_taxid,
i,
seq->taxid))
);
/* -----------------------------------------------*/
/* is included if no -r option */
/* OR */
/* if current sequence is son of taxid */
/* -----------------------------------------------*/
is_included = ( (r == 0) || (eco_is_taxid_included( taxonomy,
restricted_taxid,
r,
seq->taxid))
);
/* ----------------------------------------------------------- */
/* match if no pattern or if pattern match current sequence */
/* ----------------------------------------------------------- */
is_matching = ( !p || (ispatternmatching(seq,pattern)));
/* ---------------------------------------------------------------------------- */
/* match if no direct oligo pattern or if pattern match current direct oligo */
/* ---------------------------------------------------------------------------- */
match_o1 = (!o1 || (ispatternmatching(oligoseq_1,oligo_1)));
/* ------------------------------------------------------------------------------- */
/* match if no revesrse oligo pattern or if pattern match current reverse oligo */
/* ------------------------------------------------------------------------------- */
match_o2 = (!o2 || (ispatternmatching(oligoseq_2,oligo_2)));
good = (is_included && is_matching && !is_ignored && match_o1 && match_o2);
if (v)
good=!good;
if ( good )
{
printf("%s",orig);
matchingresult++;
}
}
}
if ( file != stdin )
fclose(file);
printf("# %d matching result(s)\n#\n",matchingresult);
}
/**
* clean and free before leaving
**/
ECOFREE(orig,"Error in free orig");
ECOFREE(stream,"Error in free stream");
ECOFREE(ignored_taxid,"Error in free stream");
ECOFREE(restricted_taxid,"Error in free stream");
return 0;
}

BIN
src/ecogrep.gz Executable file

Binary file not shown.

123
src/ecoisundertaxon.c Normal file
View File

@ -0,0 +1,123 @@
#include "libecoPCR/ecoPCR.h"
#include <getopt.h>
#include <stdlib.h>
#define VERSION "0.1"
/* ----------------------------------------------- */
/* printout verbose mode */
/* ----------------------------------------------- */
static void printTaxon(ecotx_t *taxon){
printf("# taxid : %d | rank : %d | name : %s \n\n",taxon->taxid, taxon->rank, taxon->name);
}
/* ----------------------------------------------- */
/* printout help */
/* ----------------------------------------------- */
#define PP fprintf(stdout,
static void PrintHelp()
{
PP "\n------------------------------------------\n");
PP " ecoisundertaxon Version %s\n", VERSION);
PP "------------------------------------------\n");
PP " synopsis : searching relationship in taxonomy\n");
PP " usage: ecoisundertaxon [options] database\n");
PP "------------------------------------------\n");
PP " options:\n");
PP " -1 : [FIRST] taxomic id of the hypothetical son\n\n");
PP " -2 : [SECOND] taxonomic id of the hypothetical parent\n\n");
PP " -h : [H]elp - print <this> help\n\n");
PP " -v : [V]erbose mode. Display taxonomic information for both\n");
PP " : taxonomic id.\n\n");
PP "------------------------------------------\n");
PP " database : to match the expected format, the database\n");
PP " has to be formated first by the ecoPCRFormat.py program located.\n");
PP " in the tools directory. Type the radical only, leaving out the extension\n");
PP "------------------------------------------\n\n");
PP " https://www.grenoble.prabi.fr/trac/ecoPCR/wiki");
PP "------------------------------------------\n\n");
}
#undef PP
/* ----------------------------------------------- */
/* printout usage and exit */
/* ----------------------------------------------- */
#define PP fprintf(stderr,
static void ExitUsage(stat)
int stat;
{
PP "usage: ecoisundertaxon [-1 taxid] [-2 taxid] [-v] [-h] datafile\n");
PP "type \"ecoisundertaxon -h\" for help\n");
if (stat)
exit(stat);
}
#undef PP
/* ----------------------------------------------- */
/* MAIN */
/* ----------------------------------------------- */
int main(int argc, char **argv){
int32_t carg = 0;
int32_t taxid_1 = 0;
int32_t taxid_2 = 0;
int32_t verbose = 0;
int32_t errflag = 0;
ecotaxonomy_t *taxonomy = NULL;
ecotx_t *son = NULL;
ecotx_t *parent = NULL;
while ((carg = getopt(argc, argv, "1:2:vh")) != -1) {
switch (carg) {
case '1':
sscanf(optarg,"%d",&taxid_1);
break;
case '2':
sscanf(optarg,"%d",&taxid_2);
break;
case 'v':
verbose = 1;
break;
case 'h':
PrintHelp();
exit(0);
break;
case '?':
errflag++;
}
}
if ((argc -= optind) != 1)
errflag++;
if (errflag)
ExitUsage(errflag);
taxonomy = read_taxonomy(argv[optind],0);
son = eco_findtaxonbytaxid(taxonomy, taxid_1);
if (verbose){
parent = eco_findtaxonbytaxid(taxonomy, taxid_2);
printTaxon(son);
printTaxon(parent);
}
if (eco_isundertaxon(son, taxid_2))
printf("# taxid_1 (%d) is son of taxid_2 (%d)\n",taxid_1, taxid_2);
else
printf("# taxid_1 (%d) is NOT son of taxid_2 (%d)\n",taxid_1, taxid_2);
return 0;
}

View File

@ -1,111 +1,87 @@
#include "libecoPCR/ecoPCR.h" #include "libecoPCR/ecoPCR.h"
#include "libthermo/nnparams.h"
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
#include <stdlib.h> #include <stdlib.h>
#include <getopt.h> #include <getopt.h>
#define VERSION "0.1" #define VERSION "1.0.1"
/* ----------------------------------------------- */ /* ----------------------------------------------- */
/* printout help */ /* ----------------------------------------------- */ /* printout help */
/* ----------------------------------------------- */
#define PP fprintf(stdout, #define PP fprintf(stdout,
static void PrintHelp() static void PrintHelp()
{ {
PP "------------------------------------------\n"); PP "------------------------------------------\n");
PP " Apat Version %s\n", VERSION); PP " ecoPCR Version %s\n", VERSION);
PP "------------------------------------------\n"); PP "------------------------------------------\n");
PP "synopsis : pattern(s) searching program\n"); PP "synopsis : searching for sequence and taxonomy hybriding with given primers\n");
PP "usage: apat [options] patfile datafile\n"); PP "usage: ecoPCR [options] <nucleotidic patterns>\n");
PP "------------------------------------------\n"); PP "------------------------------------------\n");
PP "options:\n"); PP "options:\n");
PP "-a code : [A]lphabet encoding for pattern\n"); PP "-a : Salt concentration in M for Tm computation (default 0.05 M)\n\n");
PP " code is one of : \n"); PP "-c : Consider that the database sequences are [c]ircular\n\n");
PP " dna: use IUPAC equivalences for dna/rna\n"); PP "-d : [D]atabase : to match the expected format, the database\n");
PP " prot: use IUPAC equivalences for proteins\n"); PP " has to be formatted first by the ecoPCRFormat.py program located.\n");
PP " alpha: no equivalences, just treat plain symbols\n"); PP " in the tools directory.\n");
PP " note: the equivalences are used in pattern only\n"); PP " ecoPCRFormat.py creates three file types :\n");
PP " *not* in sequence(s) (see note (4) below)\n"); PP " .sdx : contains the sequences\n");
PP " dft: alpha\n"); PP " .tdx : contains information concerning the taxonomy\n");
PP "-c : [C]ooccurences\n"); PP " .rdx : contains the taxonomy rank\n\n");
PP " print patterns cooccurence matrix \n"); PP " ecoPCR needs all the file type. As a result, you have to write the\n");
PP " dft: off\n"); PP " database radical without any extension. For example /ecoPCRDB/gbmam\n\n");
PP "-h : [H]elp - print <this> help\n"); PP "-D : Keeps the specified number of nucleotides on each side of the in silico \n");
PP "-m : [M]ultiple occurences\n"); PP " amplified sequences (including the amplified DNA fragment plus the two target \n");
PP " see -q option \n"); PP " sequences of the primers).\n\n");
PP " dft: off\n"); PP "-e : [E]rror : max errors allowed by oligonucleotide (0 by default)\n\n");
PP "-o file : [O]utput sequences\n"); PP "-h : [H]elp - print <this> help\n\n");
PP " additionaly output sequence(s) that match into\n"); PP "-i : [I]gnore the given taxonomy id.\n");
PP " 'file' in fasta format\n"); PP " Taxonomy id are available using the ecofind program.\n");
PP " dft: off\n"); PP " see its help typing ecofind -h for more information.\n\n");
PP "-p : no [Print] - don't printout hits\n"); PP "-k : [K]ingdom mode : set the kingdom mode\n");
PP " when just counts are needed\n"); PP " super kingdom mode by default.\n\n");
PP " dft: off\n"); PP "-l : minimum [L]ength : define the minimum amplication length. \n\n");
PP "-q nn : [Quorum]\n"); PP "-L : maximum [L]ength : define the maximum amplicationlength. \n\n");
PP " printout result if at least nn\n"); PP "-m : Salt correction method for Tm computation (SANTALUCIA : 1\n");
PP " different patterns are found on the sequence\n"); PP " or OWCZARZY:2, default=1)\n\n");
PP " (with -m : at least nn different <hits>)\n"); PP "-r : [R]estricts the search to the given taxonomic id.\n");
PP " dft: # of patterns read\n"); PP " Taxonomy id are available using the ecofind program.\n");
PP "-s : no [Sort] - don't sort hits before printing\n"); PP " see its help typing ecofind -h for more information.\n\n");
PP " usually hits are printed by increasing position\n");
PP " this option will list them by pattern\n");
PP " dft: off\n");
PP "-t : [T]est sequence\n");
PP " additionnaly check if sequences are uppercase\n");
PP " this is mostly used for testing\n");
PP " dft: off\n");
PP "-u : [U]pper\n");
PP " force lower->upper sequence conversion\n");
PP " without this option lowercase symbols in sequence\n");
PP " will not be considered to as matches\n");
PP " dft: off\n");
PP "-v : [V]erbose\n");
PP " just display a kind of progress clock on stderr\n");
PP " (this is only useful if you redirect stdout)\n");
PP "\n");
PP "patfile : pattern file (see below)\n");
PP "datafile : database file (see below)\n");
PP "------------------------------------------\n");
PP "pattern file format :\n");
PP " one pattern/line\n");
PP " format : <pattern> <space> #errors\n");
PP " <pattern> := pattern<symbol>\n");
PP " or !pattern<symbol>\n");
PP " or pattern<symbol>#\n");
PP " or !pattern<symbol>#\n");
PP " <symbol> := <letter>\n");
PP " or [<letter>....<letter>]\n");
PP " <letter> := uppercase letter (A-Z)\n");
PP " <number> := a positive number indicates max number of mismatches\n");
PP " a negative number indicates max number of mismatches or indels\n");
PP " # means that no error is allowed at this position\n");
PP " ! complement the <symbol>\n");
PP " [...] means that all symbols within [] are allowed\n");
PP " in addition IUPAC equivalences may be used as symbols\n");
PP " with the -a option\n");
PP "\n");
PP "example: G[DE]S#[GIV]!HP![DE]# 1\n");
PP "\n"); PP "\n");
PP "------------------------------------------\n"); PP "------------------------------------------\n");
PP "datafile contains one or more sequences in\n"); PP "first argument : oligonucleotide for direct strand\n\n");
PP "Fasta format, with *uppercase* symbols \n"); PP "second argument : oligonucleotide for reverse strand\n\n");
PP "\n");
PP "------------------------------------------\n"); PP "------------------------------------------\n");
PP "note (1): the maximum number of patterns is %d\n", MAX_PATTERN); PP "Table result description : \n");
PP "\n"); PP "column 1 : accession number\n");
PP "note (2): the maximum length for one pattern is %d\n", MAX_PAT_LEN); PP "column 2 : sequence length\n");
PP "\n"); PP "column 3 : taxonomic id\n");
PP "note (3): indels are still experimental and are :\n"); PP "column 4 : rank\n");
PP " not handled gracefully with the # syntax\n"); PP "column 5 : species taxonomic id\n");
PP " and hits are not printed very nicely\n"); PP "column 6 : scientific name\n");
PP "\n"); PP "column 7 : genus taxonomic id\n");
PP "note (4): the IUPAC equivalences (-a option) are used\n"); PP "column 8 : genus name\n");
PP " in pattern only *not* in sequence(s).\n"); PP "column 9 : family taxonomic id\n");
PP " for instance GATN (with option -a dna) is equivalent to GAT[ACGT]\n"); PP "column 10 : family name\n");
PP " and will match GATA/GATC/GATG/GATC but will not match GATN\n"); PP "column 11 : super kingdom taxonomic id\n");
PP " (nor NNNN) in sequence.\n"); PP "column 12 : super kingdom name\n");
PP "column 13 : strand (direct or reverse)\n");
PP "column 14 : first oligonucleotide\n");
PP "column 15 : number of errors for the first strand\n");
PP "column 16 : Tm for hybridization of primer 1 at this site\n");
PP "column 17 : second oligonucleotide\n");
PP "column 18 : number of errors for the second strand\n");
PP "column 19 : Tm for hybridization of primer 1 at this site\n");
PP "column 20 : amplification length\n");
PP "column 21 : sequence\n");
PP "column 22 : definition\n");
PP "------------------------------------------\n");
PP " https://git.metabarcoding.org/obitools/ecopcr/wikis/home\n");
PP "------------------------------------------\n\n");
PP "\n"); PP "\n");
} }
@ -121,10 +97,8 @@ static void PrintHelp()
static void ExitUsage(stat) static void ExitUsage(stat)
int stat; int stat;
{ {
PP "usage: apat [-a dna|prot] [-c] [-h] [-m] [-o file] [-p]\n"); PP "usage: ecoPCR [-d database] [-l value] [-L value] [-e value] [-r taxid] [-i taxid] [-k] oligo1 oligo2\n");
PP " [-q nn] [-t] [-u] [-v]\n"); PP "type \"ecoPCR -h\" for help\n");
PP " patfile datafile\n");
PP "type \"apat -h\" for help\n");
if (stat) if (stat)
exit(stat); exit(stat);
@ -133,12 +107,15 @@ static void ExitUsage(stat)
#undef PP #undef PP
void printRepeat(ecoseq_t *seq, void printRepeat(ecoseq_t *seq,
char* primer1, char* primer2,
PNNParams tparm,
PatternPtr o1, PatternPtr o2, PatternPtr o1, PatternPtr o2,
char strand, char strand,
char kingdom, char kingdom,
int32_t pos1, int32_t pos2, int32_t pos1, int32_t pos2,
int32_t err1, int32_t err2, int32_t err1, int32_t err2,
ecotaxonomy_t *taxonomy) ecotaxonomy_t *taxonomy,
int32_t delta)
{ {
char *AC; char *AC;
int32_t seqlength; int32_t seqlength;
@ -161,12 +138,18 @@ void printRepeat(ecoseq_t *seq,
int32_t error1; int32_t error1;
int32_t error2; int32_t error2;
int32_t ldelta,rdelta;
char *amplifia = NULL; char *amplifia = NULL;
int32_t amplength; int32_t amplength;
double tm1,tm2;
double tm=0;
int32_t i;
AC = seq->AC; AC = seq->AC;
seqlength = seq->SQ_length; seqlength = seq->SQ_length;
main_taxon = &taxonomy->taxons->taxon[seq->taxid]; main_taxon = &taxonomy->taxons->taxon[seq->taxid];
taxid = main_taxon->taxid; taxid = main_taxon->taxid;
@ -221,44 +204,81 @@ void printRepeat(ecoseq_t *seq,
superkingdom_name = "###"; superkingdom_name = "###";
} }
amplength = pos2-pos1;
amplifia = getSubSequence(seq->SQ,pos1,pos2); ldelta=(pos1 <= delta)?pos1:delta;
/*rdelta=((pos2+delta)>=seqlength)?seqlength-pos2-1:delta; */
rdelta=((pos2+delta)>=seqlength)?seqlength-pos2:delta;
amplifia = getSubSequence(seq->SQ,pos1-ldelta,pos2+rdelta);
amplength= strlen(amplifia)-rdelta-ldelta;
if (strand=='R') if (strand=='R')
{ {
ecoComplementSequence(amplifia);
strncpy(oligo1,amplifia,o2->patlen); ecoComplementSequence(amplifia);
strncpy(oligo1,amplifia + rdelta ,o2->patlen);
oligo1[o2->patlen]=0; oligo1[o2->patlen]=0;
error1=err2; error1=err2;
strncpy(oligo2,amplifia + amplength - o1->patlen,o1->patlen); strncpy(oligo2, amplifia + rdelta + amplength - o1->patlen,o1->patlen);
oligo2[o1->patlen]=0; oligo2[o1->patlen]=0;
error2=err1; error2=err1;
amplifia+=o2->patlen; if (delta==0)
amplifia+=o2->patlen;
else
{
delta=ldelta;
ldelta=rdelta+o2->patlen;
rdelta=delta+o1->patlen;
}
} }
else else /* strand == 'D' */
{ {
strncpy(oligo1,amplifia,o1->patlen); strncpy(oligo1,amplifia+ldelta,o1->patlen);
oligo1[o1->patlen]=0; oligo1[o1->patlen]=0;
error1=err1; error1=err1;
strncpy(oligo2,amplifia + amplength - o2->patlen,o2->patlen); strncpy(oligo2,amplifia + ldelta + amplength - o2->patlen,o2->patlen);
oligo2[o2->patlen]=0; oligo2[o2->patlen]=0;
error2=err2; error2=err2;
amplifia+=o1->patlen; if (delta==0)
amplifia+=o1->patlen;
else
{
ldelta+=o1->patlen;
rdelta+=o2->patlen;
}
} }
ecoComplementSequence(oligo2); ecoComplementSequence(oligo2);
amplifia[amplength - o2->patlen - o1->patlen]=0; if(delta==0)
amplifia[amplength - o2->patlen - o1->patlen]=0;
else
{
delta=ldelta+rdelta+amplength-o1->patlen-o2->patlen;
for (i=0;i<ldelta;i++)
amplifia[i]|=32;
for (i=1;i<=rdelta;i++)
amplifia[delta-i]|=32;
amplifia[delta]=0;
}
printf("%-15s | %9d | %8d | %-20s | %8d | %-30s | %8d | %-30s | %8d | %-30s | %8d | %-30s | %c | %-32s | %2d | %-32s | %2d | %5d | %s | %s\n", tm1=nparam_CalcTwoTM(tparm,oligo1,primer1,o1->patlen) - 273.15;
tm2=nparam_CalcTwoTM(tparm,oligo2,primer2,o2->patlen) - 273.15;
tm = (tm1 < tm2) ? tm1:tm2;
printf("%-15s | %9d | %8d | %-20s | %8d | %-30s | %8d | %-30s | %8d | %-30s | %8d | %-30s | %c | %-32s | %2d | %5.2f | %-32s | %2d | %5.2f | %5d | %s | %s\n",
AC, AC,
seqlength, seqlength,
taxid, taxid,
@ -274,12 +294,15 @@ void printRepeat(ecoseq_t *seq,
strand, strand,
oligo1, oligo1,
error1, error1,
tm1,
oligo2, oligo2,
error2, error2,
amplength, tm2,
amplength - o1->patlen - o2->patlen,
amplifia, amplifia,
seq->DE seq->DE
); );
} }
int main(int argc, char **argv) int main(int argc, char **argv)
@ -300,13 +323,14 @@ int main(int argc, char **argv)
PatternPtr o1c; PatternPtr o1c;
PatternPtr o2c; PatternPtr o2c;
int32_t delta=0;
int32_t lmin=0; int32_t lmin=0;
int32_t lmax=0; int32_t lmax=0;
int32_t error_max=0; int32_t error_max=0;
int32_t errflag=0; int32_t errflag=0;
char kingdom_mode=0; char kingdom_mode=0;
char *prefix; char *prefix = NULL;
int32_t checkedSequence = 0; int32_t checkedSequence = 0;
int32_t positiveSequence= 0; int32_t positiveSequence= 0;
@ -330,43 +354,49 @@ int main(int argc, char **argv)
int32_t erri; int32_t erri;
int32_t errj; int32_t errj;
int32_t *restricted_taxid = NULL;
int32_t *ignored_taxid = NULL;
int32_t r=0;
int32_t g=0;
int32_t circular=0;
while ((carg = getopt(argc, argv, "h1:2:l:L:e:k")) != -1) { int32_t saltmethod=SALT_METHOD_SANTALUCIA;
double salt=0.05;
CNNParams tparm;
while ((carg = getopt(argc, argv, "hcd:l:L:e:i:r:km:a:tD:")) != -1) {
switch (carg) { switch (carg) {
/* -------------------- */ /* -------------------- */
case '1': /* prenier oligo */ case 'd': /* database name */
/* -------------------- */ /* -------------------- */
oligo1 = ECOMALLOC(strlen(optarg)+1, prefix = ECOMALLOC(strlen(optarg)+1,
"Error on oligo 1 allocation"); "Error on prefix allocation");
strcpy(oligo1,optarg); strcpy(prefix,optarg);
break;
/* -------------------- */
case '2': /* coocurence option */
/* -------------------- */
oligo2 = ECOMALLOC(strlen(optarg)+1,
"Error on oligo 1 allocation");
strcpy(oligo2,optarg);
break; break;
/* -------------------- */ /* -------------------- */
case 'h': /* help */ case 'h': /* help */
/* -------------------- */ /* -------------------- */
PrintHelp(); PrintHelp();
exit(0); exit(0);
break; break;
/* -------------------- */ /* ------------------------- */
case 'l': /* lmin amplification */ case 'D': /* min amplification lenght */
/* -------------------- */ /* ------------------------- */
sscanf(optarg,"%d",&delta);
break;
/* ------------------------- */
case 'l': /* min amplification lenght */
/* ------------------------- */
sscanf(optarg,"%d",&lmin); sscanf(optarg,"%d",&lmin);
break; break;
/* -------------------- */ /* -------------------------- */
case 'L': /* lmax amplification */ case 'L': /* max amplification lenght */
/* -------------------- */ /* -------------------------- */
sscanf(optarg,"%d",&lmax); sscanf(optarg,"%d",&lmax);
break; break;
@ -374,46 +404,121 @@ int main(int argc, char **argv)
case 'e': /* error max */ case 'e': /* error max */
/* -------------------- */ /* -------------------- */
sscanf(optarg,"%d",&error_max); sscanf(optarg,"%d",&error_max);
break;
/* -------------------- */
case 'k': /* set the kingdom mode */
kingdom_mode = 1; /* -------------------- */
break;
/* ------------------------------------------ */
case 'r': /* stores the restricting search taxonomic id */
/* ------------------------------------------ */
restricted_taxid = ECOREALLOC(restricted_taxid,sizeof(int32_t)*(r+1),
"Error on restricted_taxid reallocation");
sscanf(optarg,"%d",&restricted_taxid[r]);
r++;
break; break;
case 'k': /* error max */ /* --------------------------------- */
/* -------------------- */ case 'i': /* stores the taxonomic id to ignore */
kingdom_mode = 1; /* --------------------------------- */
ignored_taxid = ECOREALLOC(ignored_taxid,sizeof(int32_t)*(g+1),
"Error on excluded_taxid reallocation");
sscanf(optarg,"%d",&ignored_taxid[g]);
g++;
break; break;
/* -------------------- */ /* -------------------- */
case '?': /* bad option */ case 'c': /* stores the taxonomic id to ignore */
/* --------------------------------- */
circular = 1;
break;
/* --------------------------------- */
case 'm': /* set salt method */
/* --------------------------------- */
sscanf(optarg,"%d",&(saltmethod));
break;
/* --------------------------------- */
case 'a': /* set salt */
/* --------------------------------- */
sscanf(optarg,"%lf",&(salt));
break;
case '?': /* bad option */
/* -------------------- */ /* -------------------- */
errflag++; errflag++;
} }
} }
if ((argc -= optind) != 1) /**
errflag++; * check the path to the database is given as last argument
*/
if ((argc -= optind) == 2)
{
oligo1 = ECOMALLOC(strlen(argv[optind])+1,
"Error on oligo1 allocation");
strcpy(oligo1,argv[optind]);
optind++;
oligo2 = ECOMALLOC(strlen(argv[optind])+1,
"Error on oligo1 allocation");
strcpy(oligo2,argv[optind]);
if (circular)
{
circular = strlen(oligo1);
if (strlen(oligo2)>(size_t)circular)
circular = strlen(oligo2);
}
}
else
errflag++;
if (prefix == NULL)
{
prefix = getenv("ECOPCRDB");
if (prefix == NULL)
errflag++;
}
nparam_InitParams(&tparm,DEF_CONC_PRIMERS,
DEF_CONC_PRIMERS,
salt,
saltmethod);
if (!oligo1 || !oligo2) if (!oligo1 || !oligo2)
errflag++; errflag++;
if (errflag) if (errflag)
ExitUsage(errflag); ExitUsage(errflag);
prefix = argv[optind];
o1 = buildPattern(oligo1,error_max); o1 = buildPattern(oligo1,error_max);
o2 = buildPattern(oligo2,error_max); o2 = buildPattern(oligo2,error_max);
o1c = complementPattern(o1); o1c = complementPattern(o1);
o2c = complementPattern(o2); o2c = complementPattern(o2);
printf("#@ecopcr-v2\n");
printf("#\n"); printf("#\n");
printf("# ecoPCR version %s\n",VERSION); printf("# ecoPCR version %s\n",VERSION);
printf("# direct strand oligo1 : %-32s ; oligo2c : %32s\n", o1->cpat,o2c->cpat); printf("# direct strand oligo1 : %-32s ; oligo2c : %32s\n", o1->cpat,o2c->cpat);
printf("# reverse strand oligo2 : %-32s ; oligo1c : %32s\n", o2->cpat,o1c->cpat); printf("# reverse strand oligo2 : %-32s ; oligo1c : %32s\n", o2->cpat,o1c->cpat);
printf("# max error count by oligonucleotide : %d\n",error_max); printf("# max error count by oligonucleotide : %d\n",error_max);
double tm,tm1,tm2;
tm1=nparam_CalcSelfTM(&tparm,o1->cpat,o1->patlen) - 273.15;
tm2=nparam_CalcSelfTM(&tparm,o2->cpat,o2->patlen) - 273.15;
tm = (tm1 < tm2) ? tm1:tm2;
printf("# optimal Tm for primers 1 : %5.2f\n",tm1);
printf("# optimal Tm for primers 2 : %5.2f\n",tm2);
printf("# database : %s\n",prefix); printf("# database : %s\n",prefix);
if (lmin && lmax) if (lmin && lmax)
printf("# amplifiat length between [%d,%d] bp\n",lmin,lmax); printf("# amplifiat length between [%d,%d] bp\n",lmin,lmax);
@ -425,104 +530,173 @@ int main(int argc, char **argv)
printf("# output in kingdom mode\n"); printf("# output in kingdom mode\n");
else else
printf("# output in superkingdom mode\n"); printf("# output in superkingdom mode\n");
if (circular)
printf("# DB sequences are considered as circular\n");
else
printf("# DB sequences are considered as linear\n");
printf("#\n"); printf("#\n");
taxonomy = read_taxonomy(prefix); taxonomy = read_taxonomy(prefix,0);
seq = ecoseq_iterator(prefix); seq = ecoseq_iterator(prefix);
checkedSequence = 0; checkedSequence = 0;
positiveSequence= 0; positiveSequence= 0;
amplifiatCount = 0; amplifiatCount = 0;
while(seq) while(seq)
{ {
checkedSequence++; checkedSequence++;
/**
scname = taxonomy->taxons->taxon[seq->taxid].name; * check if current sequence should be included
strncpy(head,seq->SQ,10); **/
head[10]=0; if ( (r == 0) ||
strncpy(tail,seq->SQ+seq->SQ_length-10,10); (eco_is_taxid_included(taxonomy,
tail[10]=0; restricted_taxid,
r,
taxonomy->taxons->taxon[seq->taxid].taxid)
)
)
if ((g == 0) ||
!(eco_is_taxid_included(taxonomy,
ignored_taxid,
g,
taxonomy->taxons->taxon[seq->taxid].taxid)
)
)
{
apatseq=ecoseq2apatseq(seq,apatseq); //scname = taxonomy->taxons->taxon[seq->taxid].name;
//strncpy(head,seq->SQ,10);
//head[10]=0;
//strncpy(tail,seq->SQ+seq->SQ_length-10,10);
//tail[10]=0;
o1Hits = ManberAll(apatseq,o1,0,0,apatseq->seqlen); apatseq=ecoseq2apatseq(seq,apatseq,circular);
o2cHits= 0;
if (o1Hits)
{
stktmp = apatseq->hitpos[0];
begin = stktmp->val[0] + o1->patlen;
if (lmax)
length= stktmp->val[stktmp->top-1] + o1->patlen - begin + lmax + o2->patlen;
else
length= apatseq->seqlen - begin;
o2cHits = ManberAll(apatseq,o2c,1,begin,length); o1Hits = ManberAll(apatseq,o1,0,0,apatseq->seqlen+apatseq->circular);
o2cHits= 0;
if (o2cHits)
for (i=0; i < o1Hits;i++)
{
posi = apatseq->hitpos[0]->val[i];
erri = apatseq->hiterr[0]->val[i];
for (j=0; j < o2cHits; j++)
{
posj =apatseq->hitpos[1]->val[j] + o2c->patlen;
errj =apatseq->hiterr[1]->val[j];
length=posj - posi + 1 - o1->patlen - o2->patlen;
if ((!lmin || (length >= lmin)) &&
(!lmax || (length <= lmax)))
printRepeat(seq,o1,o2c,'D',kingdom_mode,posi,posj,erri,errj,taxonomy);
//printf("%s\tD\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o1Hits,o2cHits,posi,posj,scname);
}
}
}
o2Hits = ManberAll(apatseq,o2,2,0,apatseq->seqlen);
o1cHits= 0;
if (o2Hits)
{
stktmp = apatseq->hitpos[2];
begin = stktmp->val[0] + o2->patlen;
if (lmax)
length= stktmp->val[stktmp->top-1] + o2->patlen - begin + lmax + o1->patlen;
else
length= apatseq->seqlen - begin;
o1cHits = ManberAll(apatseq,o1c,3,begin,length); if (o1Hits)
if (o1cHits)
for (i=0; i < o2Hits;i++)
{ {
posi = apatseq->hitpos[2]->val[i]; stktmp = apatseq->hitpos[0];
erri = apatseq->hiterr[2]->val[i]; begin = stktmp->val[0] + o1->patlen;
for (j=0; j < o1cHits; j++)
if (lmax)
length= stktmp->val[stktmp->top-1] + o1->patlen - begin + lmax + o2->patlen;
else
length= apatseq->seqlen - begin;
if (circular)
{ {
posj=apatseq->hitpos[3]->val[j] + o1c->patlen; begin = 0;
errj=apatseq->hiterr[3]->val[j]; length=apatseq->seqlen+circular;
length=posj - posi + 1 - o1->patlen - o2->patlen; }
o2cHits = ManberAll(apatseq,o2c,1,begin,length);
if ((!lmin || (length >= lmin)) &&
(!lmax || (length <= lmax))) if (o2cHits)
printRepeat(seq,o2,o1c,'R',kingdom_mode,posi,posj,erri,errj,taxonomy); for (i=0; i < o1Hits;i++)
//printf("%s\tR\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o2Hits,o1cHits,posi,posj,scname); {
} posi = apatseq->hitpos[0]->val[i];
if (posi < apatseq->seqlen)
{
erri = apatseq->hiterr[0]->val[i];
for (j=0; j < o2cHits; j++)
{
posj =apatseq->hitpos[1]->val[j];
if (posj < apatseq->seqlen)
{
posj+=o2c->patlen;
// printf("coucou %d %d %d\n",posi,posj,apatseq->seqlen);
errj = apatseq->hiterr[1]->val[j];
length = 0;
if (posj > posi)
length = posj - posi - o1->patlen - o2->patlen;
else {
if (circular > 0)
length = posj + apatseq->seqlen - posi - o1->patlen - o2->patlen;
}
if ((length>0) && // For when primers touch or overlap
(!lmin || (length >= lmin)) &&
(!lmax || (length <= lmax)))
{
printRepeat(seq,oligo1,oligo2,&tparm,o1,o2c,'D',kingdom_mode,posi,posj,erri,errj,taxonomy,delta);
//printf("%s\tD\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o1Hits,o2cHits,posi,posj,scname);
}
}
}
}
}
} }
}
o2Hits = ManberAll(apatseq,o2,2,0,apatseq->seqlen);
o1cHits= 0;
if (o2Hits)
{
stktmp = apatseq->hitpos[2];
begin = stktmp->val[0] + o2->patlen;
if (lmax)
length= stktmp->val[stktmp->top-1] + o2->patlen - begin + lmax + o1->patlen;
else
length= apatseq->seqlen - begin;
if (circular)
{
begin = 0;
length=apatseq->seqlen+circular;
}
o1cHits = ManberAll(apatseq,o1c,3,begin,length);
// printf("circular= %d\n",circular);
if (o1cHits)
for (i=0; i < o2Hits;i++)
{
posi = apatseq->hitpos[2]->val[i];
if (posi < apatseq->seqlen)
{
erri = apatseq->hiterr[2]->val[i];
for (j=0; j < o1cHits; j++)
{
posj=apatseq->hitpos[3]->val[j];
if (posj < apatseq->seqlen)
{
posj+=o1c->patlen;
errj=apatseq->hiterr[3]->val[j];
length = 0;
if (posj > posi)
length = posj - posi + 1 - o2->patlen - o1->patlen; /* - o1->patlen : deleted by <EC> (prior to the OBITools3) */
else {
if (circular > 0)
length = posj + apatseq->seqlen - posi - o1->patlen - o2->patlen;
}
if ((length>0) && // For when primers touch or overlap
(!lmin || (length >= lmin)) &&
(!lmax || (length <= lmax)))
{
printRepeat(seq,oligo1,oligo2,&tparm,o2,o1c,'R',kingdom_mode,posi,posj,erri,errj,taxonomy,delta);
//printf("%s\tR\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o2Hits,o1cHits,posi,posj,scname);
}
}
}
}
}
}
} /* End of taxonomic selection */
delete_ecoseq(seq); delete_ecoseq(seq);
seq = ecoseq_iterator(NULL); seq = ecoseq_iterator(NULL);
} }
ECOFREE(restricted_taxid, "Error: could not free restricted_taxid\n");
ECOFREE(ignored_taxid, "Error: could not free excluded_taxid\n");
return 0; return 0;
} }

17
src/global.mk Normal file
View File

@ -0,0 +1,17 @@
LIBPATH= -Llibapat -LlibecoPCR -Llibthermo
MAKEDEPEND = gcc -M $(CPPFLAGS) -o $*.d $<
CC=gcc
CFLAGS= -O3 -w
default: all
%.o: %.c
$(CC) $(CFLAGS) -c -o $@ $<
%.P : %.c
$(MAKEDEPEND)
@sed 's/\($*\)\.o[ :]*/\1.o $@ : /g' < $*.d > $@; \
rm -f $*.d; [ -s $@ ] || rm -f $@
include $(SRCS:.c=.P)

View File

@ -27,13 +27,9 @@
#define PROTO 1 /* prototypes flag */ #define PROTO 1 /* prototypes flag */
#endif #endif
#ifdef MAC_OS_C
#define Vrai true /* TC boolean values */
#define Faux false /* */
#else
#define Vrai 0x1 /* bool values = TRUE */ #define Vrai 0x1 /* bool values = TRUE */
#define Faux 0x0 /* = FALSE */ #define Faux 0x0 /* = FALSE */
#endif
#define Nil NULL /* nil pointer */ #define Nil NULL /* nil pointer */
@ -42,28 +38,7 @@
#define kBigUInt16 0xffff /* plus grand 16 bits ~signe */ #define kBigUInt16 0xffff /* plus grand 16 bits ~signe */
#define kBigUInt32 0xffffffff /* plus grand 32 bits ~signe */ #define kBigUInt32 0xffffffff /* plus grand 32 bits ~signe */
#ifdef MAC_OS_C
/* ==================================================== */
/* Types (for Macintosh ThinK C || MWerks) */
/* ==================================================== */
/* --- specific sizes --------- */
typedef long Int32; /* Int32 = 32 bits signe */
typedef unsigned long UInt32; /* UInt32 = 32 bits ~signe */
typedef short Int16; /* Int16 = 16 bits signe */
typedef unsigned short UInt16; /* UInt32 = 16 bits ~signe */
typedef char Int8; /* Int8 = 8 bits signe */
typedef unsigned char UInt8; /* UInt8 = 8 bits ~signe */
/* --- default types ---------- */
typedef Boolean Bool; /* booleen */
typedef long Int; /* 'natural' int (>= 32 bits) */
typedef void *Ptr; /* pointeur */
#elif ((defined SUN) || (defined SGI) || (defined UNIX))
/* ==================================================== */ /* ==================================================== */
/* Types (for Sun & Iris - 32 bits machines) */ /* Types (for Sun & Iris - 32 bits machines) */
/* ==================================================== */ /* ==================================================== */
@ -84,14 +59,7 @@ typedef int Int; /* 'natural' int (>= 32 bits) */
typedef void *Ptr; /* pointeur */ typedef void *Ptr; /* pointeur */
#else
/* ==================================================== */
/* Types (for undefined machines) */
/* ==================================================== */
#error undefined MACHINE <please edit Gmach.h>
#endif
/* ==================================================== */ /* ==================================================== */
/* special macro for prototypes */ /* special macro for prototypes */

24
src/libapat/Makefile Normal file
View File

@ -0,0 +1,24 @@
SOURCES = apat_parse.c \
apat_search.c \
libstki.c
SRCS=$(SOURCES)
OBJECTS= $(patsubst %.c,%.o,$(SOURCES))
LIBFILE= libapat.a
RANLIB=ranlib
include ../global.mk
all: $(LIBFILE)
clean:
rm -rf $(OBJECTS) $(LIBFILE)
$(LIBFILE): $(OBJECTS)
ar -cr $@ $?
$(RANLIB) $@

View File

@ -103,6 +103,7 @@ typedef struct { /* sequence */
Int32 seqlen; /* sequence length */ Int32 seqlen; /* sequence length */
Int32 seqsiz; /* sequence buffer size */ Int32 seqsiz; /* sequence buffer size */
Int32 datsiz; /* data buffer size */ Int32 datsiz; /* data buffer size */
Int32 circular;
UInt8 *data; /* data buffer */ UInt8 *data; /* data buffer */
char *cseq; /* sequence buffer */ char *cseq; /* sequence buffer */
StackiPtr hitpos[MAX_PATTERN]; /* stack of hit pos. */ StackiPtr hitpos[MAX_PATTERN]; /* stack of hit pos. */

View File

@ -80,14 +80,14 @@ int CreateS(Pattern *ppat, Int32 lalpha)
/* -------------------------------------------- */ /* -------------------------------------------- */
Int32 ManberNoErr(Seq *pseq, Pattern *ppat, int patnum,int begin,int length) Int32 ManberNoErr(Seq *pseq, Pattern *ppat, int patnum,int begin,int length)
{ {
Int32 pos; UInt32 pos;
UInt32 smask, r; UInt32 smask, r;
UInt8 *data; UInt8 *data;
StackiPtr *stkpos, *stkerr; StackiPtr *stkpos, *stkerr;
UInt32 end; UInt32 end;
end = begin + length; end = begin + length;
end = (end <= pseq->seqlen) ? end:pseq->seqlen; end = (end <= (size_t)(pseq->seqlen+pseq->circular)) ? end:(size_t)(pseq->seqlen+pseq->circular);
/* create local masks */ /* create local masks */
@ -127,7 +127,7 @@ Int32 ManberNoErr(Seq *pseq, Pattern *ppat, int patnum,int begin,int length)
Int32 ManberSub(Seq *pseq, Pattern *ppat, int patnum,int begin,int length) Int32 ManberSub(Seq *pseq, Pattern *ppat, int patnum,int begin,int length)
{ {
int e, emax, found; int e, emax, found;
Int32 pos; UInt32 pos;
UInt32 smask, cmask, sindx; UInt32 smask, cmask, sindx;
UInt32 *pr, r[2 * MAX_PAT_ERR + 2]; UInt32 *pr, r[2 * MAX_PAT_ERR + 2];
UInt8 *data; UInt8 *data;
@ -135,7 +135,7 @@ Int32 ManberSub(Seq *pseq, Pattern *ppat, int patnum,int begin,int length)
UInt32 end; UInt32 end;
end = begin + length; end = begin + length;
end = (end <= pseq->seqlen) ? end:pseq->seqlen; end = (end <= (size_t)(pseq->seqlen+pseq->circular)) ? end:(size_t)(pseq->seqlen+pseq->circular);
/* create local masks */ /* create local masks */
emax = ppat->maxerr; emax = ppat->maxerr;
@ -193,7 +193,7 @@ Int32 ManberSub(Seq *pseq, Pattern *ppat, int patnum,int begin,int length)
Int32 ManberIndel(Seq *pseq, Pattern *ppat, int patnum,int begin,int length) Int32 ManberIndel(Seq *pseq, Pattern *ppat, int patnum,int begin,int length)
{ {
int e, emax, found; int e, emax, found;
Int32 pos; UInt32 pos;
UInt32 smask, cmask, sindx; UInt32 smask, cmask, sindx;
UInt32 *pr, r[2 * MAX_PAT_ERR + 2]; UInt32 *pr, r[2 * MAX_PAT_ERR + 2];
UInt8 *data; UInt8 *data;
@ -201,7 +201,7 @@ Int32 ManberIndel(Seq *pseq, Pattern *ppat, int patnum,int begin,int length)
UInt32 end; UInt32 end;
end = begin + length; end = begin + length;
end = (end <= pseq->seqlen) ? end:pseq->seqlen; end = (end <= (size_t)(pseq->seqlen+pseq->circular)) ? end:(size_t)(pseq->seqlen+pseq->circular);
/* create local masks */ /* create local masks */
emax = ppat->maxerr; emax = ppat->maxerr;

31
src/libecoPCR/Makefile Normal file
View File

@ -0,0 +1,31 @@
SOURCES = ecoapat.c \
ecodna.c \
ecoError.c \
ecoIOUtils.c \
ecoMalloc.c \
ecorank.c \
ecoseq.c \
ecotax.c \
ecofilter.c \
econame.c
SRCS=$(SOURCES)
OBJECTS= $(patsubst %.c,%.o,$(SOURCES))
LIBFILE= libecoPCR.a
RANLIB= ranlib
include ../global.mk
all: $(LIBFILE)
clean:
rm -rf $(OBJECTS) $(LIBFILE)
$(LIBFILE): $(OBJECTS)
ar -cr $@ $?
$(RANLIB) $@

View File

@ -2,7 +2,15 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
/*
* print the message given as argument and exit the program
* @param error error number
* @param message the text explaining what's going on
* @param filename the file source where the program failed
* @param linenumber the line where it has failed
* filename and linenumber are written at pre-processing
* time by a macro
*/
void ecoError(int32_t error, void ecoError(int32_t error,
const char* message, const char* message,
const char * filename, const char * filename,

View File

@ -16,20 +16,19 @@ int32_t is_big_endian()
int32_t swap_int32_t(int32_t i) int32_t swap_int32_t(int32_t i)
{ {
return SWAPINT32(i); return SWAPINT32(i);
} }
/**
* Read part of the file
* @param *f the database
* @param recordSize the size to be read
*
* @return buffer
*/
void *read_ecorecord(FILE *f,int32_t *recordSize) void *read_ecorecord(FILE *f,int32_t *recordSize)
{ {
static void *buffer =NULL; static void *buffer =NULL;
@ -79,10 +78,14 @@ void *read_ecorecord(FILE *f,int32_t *recordSize)
/**
* Open the database and check it's readable
* @param filename name of the database (.sdx, .rdx, .tbx)
* @param sequencecount buffer - pointer to variable storing the number of occurence
* @param abort_on_open_error boolean to define the behaviour in case of error
* while opening the database
* @return FILE type
**/
FILE *open_ecorecorddb(const char *filename, FILE *open_ecorecorddb(const char *filename,
int32_t *sequencecount, int32_t *sequencecount,
int32_t abort_on_open_error) int32_t abort_on_open_error)

View File

@ -56,11 +56,11 @@ typedef struct {
} ecotxformat_t; } ecotxformat_t;
typedef struct { typedef struct ecotxnode {
int32_t taxid; int32_t taxid;
int32_t rank; int32_t rank;
int32_t parent; struct ecotxnode *parent;
char *name; char *name;
} ecotx_t; } ecotx_t;
typedef struct { typedef struct {
@ -80,12 +80,42 @@ typedef struct {
char* label[1]; char* label[1];
} ecorankidx_t; } ecorankidx_t;
/*
*
* Taxonomy name types
*
*/
typedef struct { typedef struct {
int32_t is_scientificname;
int32_t namelength;
int32_t classlength;
int32_t taxid;
char names[1];
} econameformat_t;
typedef struct {
char *name;
char *classname;
int32_t is_scientificname;
struct ecotxnode *taxon;
} econame_t;
typedef struct {
int32_t count;
econame_t names[1];
} econameidx_t;
typedef struct {
ecorankidx_t *ranks; ecorankidx_t *ranks;
econameidx_t *names;
ecotxidx_t *taxons; ecotxidx_t *taxons;
} ecotaxonomy_t; } ecotaxonomy_t;
/***************************************************** /*****************************************************
* *
* Function declarations * Function declarations
@ -175,6 +205,9 @@ ecoseq_t *readnext_ecoseq(FILE *);
ecorankidx_t *read_rankidx(const char *filename); ecorankidx_t *read_rankidx(const char *filename);
econameidx_t *read_nameidx(const char *filename,ecotaxonomy_t *taxonomy);
/** /**
* Read taxonomy data as formated by the ecoPCRFormat.py script. * Read taxonomy data as formated by the ecoPCRFormat.py script.
@ -187,10 +220,13 @@ ecorankidx_t *read_rankidx(const char *filename);
* @return pointer to a taxonomy index structure * @return pointer to a taxonomy index structure
*/ */
ecotxidx_t *read_taxonomyidx(const char *filename); ecotxidx_t *read_taxonomyidx(const char *filename,const char *filename2);
ecotaxonomy_t *read_taxonomy(const char *prefix); ecotaxonomy_t *read_taxonomy(const char *prefix,int32_t readAlternativeName);
ecotx_t *eco_findtaxonbytaxid(ecotaxonomy_t *taxonomy, int32_t taxid);
int eco_isundertaxon(ecotx_t *taxon, int other_taxid);
ecoseq_t *ecoseq_iterator(const char *prefix); ecoseq_t *ecoseq_iterator(const char *prefix);
@ -215,7 +251,7 @@ int32_t delete_apatseq(SeqPtr pseq);
PatternPtr buildPattern(const char *pat, int32_t error_max); PatternPtr buildPattern(const char *pat, int32_t error_max);
PatternPtr complementPattern(PatternPtr pat); PatternPtr complementPattern(PatternPtr pat);
SeqPtr ecoseq2apatseq(ecoseq_t *in,SeqPtr out); SeqPtr ecoseq2apatseq(ecoseq_t *in,SeqPtr out,int32_t circular);
char *ecoComplementPattern(char *nucAcSeq); char *ecoComplementPattern(char *nucAcSeq);
char *ecoComplementSequence(char *nucAcSeq); char *ecoComplementSequence(char *nucAcSeq);
@ -227,4 +263,7 @@ ecotx_t *eco_getfamily(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
ecotx_t *eco_getkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy); ecotx_t *eco_getkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
ecotx_t *eco_getsuperkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy); ecotx_t *eco_getsuperkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
int eco_is_taxid_ignored(int32_t *ignored_taxid, int32_t tab_len, int32_t taxid);
int eco_is_taxid_included(ecotaxonomy_t *taxonomy, int32_t *included_taxid, int32_t tab_len, int32_t taxid);
#endif /*ECOPCR_H_*/ #endif /*ECOPCR_H_*/

View File

@ -50,10 +50,13 @@ void EncodeSequence(SeqPtr seq)
while (*cseq) { while (*cseq) {
*data++ = (IS_UPPER(*cseq) ? *cseq - 'A' : 0x0); *data = (IS_UPPER(*cseq) ? *cseq - 'A' : 0x0);
data++;
cseq++; cseq++;
} }
for (i=0,cseq=seq->cseq;i < seq->circular; i++,cseq++,data++)
*data = (IS_UPPER(*cseq) ? *cseq - 'A' : 0x0);
for (i = 0 ; i < MAX_PATTERN ; i++) for (i = 0 ; i < MAX_PATTERN ; i++)
seq->hitpos[i]->top = seq->hiterr[i]->top = 0; seq->hitpos[i]->top = seq->hiterr[i]->top = 0;
@ -63,7 +66,7 @@ void EncodeSequence(SeqPtr seq)
#undef IS_UPPER #undef IS_UPPER
SeqPtr ecoseq2apatseq(ecoseq_t *in,SeqPtr out) SeqPtr ecoseq2apatseq(ecoseq_t *in,SeqPtr out,int32_t circular)
{ {
int i; int i;
@ -83,20 +86,22 @@ SeqPtr ecoseq2apatseq(ecoseq_t *in,SeqPtr out)
} }
} }
out->name = in->AC; out->name = in->AC;
out->seqsiz = out->seqlen = in->SQ_length; out->seqsiz = out->seqlen = in->SQ_length;
out->circular = circular;
if (!out->data) if (!out->data)
{ {
out->data = ECOMALLOC(out->seqlen *sizeof(UInt8), out->data = ECOMALLOC((out->seqlen+circular) *sizeof(UInt8),
"Error in Allocation of a new Seq data member"); "Error in Allocation of a new Seq data member");
out->datsiz= out->seqlen; out->datsiz= out->seqlen+circular;
} }
else if (out->seqlen >= out->datsiz) else if ((out->seqlen +circular) >= out->datsiz)
{ {
out->data = ECOREALLOC(out->data,out->seqlen, out->data = ECOREALLOC(out->data,(out->seqlen+circular),
"Error during Seq data buffer realloc"); "Error during Seq data buffer realloc");
out->datsiz= out->seqlen; out->datsiz= out->seqlen+circular;
} }
out->cseq = in->SQ; out->cseq = in->SQ;
@ -191,4 +196,4 @@ PatternPtr complementPattern(PatternPtr pat)
return pattern; return pattern;
} }

View File

@ -104,27 +104,52 @@ char *ecoComplementSequence(char *nucAcSeq)
char *getSubSequence(char* nucAcSeq,int32_t begin,int32_t end) char *getSubSequence(char* nucAcSeq,int32_t begin,int32_t end)
/*
extract subsequence from nucAcSeq [begin,end[
*/
{ {
static char *buffer = NULL; static char *buffer = NULL;
static int32_t buffSize= 0; static int32_t buffSize= 0;
int32_t length; int32_t length;
length = end - begin; if (begin < end)
if (length >= buffSize)
{ {
buffSize = length+1; length = end - begin;
if (buffer)
buffer=ECOREALLOC(buffer,buffSize, if (length >= buffSize)
"Error in reallocating sub sequence buffer"); {
else buffSize = length+1;
buffer=ECOMALLOC(buffSize, if (buffer)
"Error in allocating sub sequence buffer"); buffer=ECOREALLOC(buffer,buffSize,
"Error in reallocating sub sequence buffer");
else
buffer=ECOMALLOC(buffSize,
"Error in allocating sub sequence buffer");
}
strncpy(buffer,nucAcSeq + begin,length);
buffer[length]=0;
}
else
{
length = end + strlen(nucAcSeq) - begin;
if (length >= buffSize)
{
buffSize = length+1;
if (buffer)
buffer=ECOREALLOC(buffer,buffSize,
"Error in reallocating sub sequence buffer");
else
buffer=ECOMALLOC(buffSize,
"Error in allocating sub sequence buffer");
}
strncpy(buffer,nucAcSeq+begin,length - end);
strncpy(buffer+(length-end),nucAcSeq ,end);
buffer[length]=0;
} }
strncpy(buffer,nucAcSeq + begin,length);
buffer[length]=0;
return buffer; return buffer;
} }

20
src/libecoPCR/ecofilter.c Normal file
View File

@ -0,0 +1,20 @@
#include "ecoPCR.h"
int eco_is_taxid_included( ecotaxonomy_t *taxonomy,
int32_t *restricted_taxid,
int32_t tab_len,
int32_t taxid)
{
int i;
ecotx_t *taxon;
taxon = eco_findtaxonbytaxid(taxonomy, taxid);
if (taxon)
for (i=0; i < tab_len; i++)
if ( (taxon->taxid == restricted_taxid[i]) ||
(eco_isundertaxon(taxon, restricted_taxid[i])) )
return 1;
return 0;
}

61
src/libecoPCR/econame.c Normal file
View File

@ -0,0 +1,61 @@
#include "ecoPCR.h"
#include <string.h>
#include <stdlib.h>
static econame_t *readnext_econame(FILE *f,econame_t *name,ecotaxonomy_t *taxonomy);
econameidx_t *read_nameidx(const char *filename,ecotaxonomy_t *taxonomy)
{
int32_t count;
FILE *f;
econameidx_t *indexname;
int32_t i;
f = open_ecorecorddb(filename,&count,1);
indexname = (econameidx_t*) ECOMALLOC(sizeof(econameidx_t) + sizeof(econame_t) * (count-1),"Allocate names");
indexname->count=count;
for (i=0; i < count; i++){
readnext_econame(f,(indexname->names)+i,taxonomy);
}
return indexname;
}
econame_t *readnext_econame(FILE *f,econame_t *name,ecotaxonomy_t *taxonomy)
{
econameformat_t *raw;
int32_t rs;
raw = read_ecorecord(f,&rs);
if (!raw)
return NULL;
if (is_big_endian())
{
raw->is_scientificname = swap_int32_t(raw->is_scientificname);
raw->namelength = swap_int32_t(raw->namelength);
raw->classlength = swap_int32_t(raw->classlength);
raw->taxid = swap_int32_t(raw->taxid);
}
name->is_scientificname=raw->is_scientificname;
name->name = ECOMALLOC((raw->namelength+1) * sizeof(char),"Allocate name");
strncpy(name->name,raw->names,raw->namelength);
name->name[raw->namelength]=0;
name->classname = ECOMALLOC((raw->classlength+1) * sizeof(char),"Allocate classname");
strncpy(name->classname,(raw->names+raw->namelength),raw->classlength);
name->classname[raw->classlength]=0;
name->taxon = taxonomy->taxons->taxon + raw->taxid;
return name;
}

View File

@ -12,14 +12,14 @@ ecorankidx_t *read_rankidx(const char *filename)
int32_t i; int32_t i;
int32_t rs; int32_t rs;
char *buffer; char *buffer;
f = open_ecorecorddb(filename,&count,1); f = open_ecorecorddb(filename,&count,1);
index = (ecorankidx_t*) ECOMALLOC(sizeof(ecorankidx_t) + sizeof(char*) * (count-1), index = (ecorankidx_t*) ECOMALLOC(sizeof(ecorankidx_t) + sizeof(char*) * (count-1),
"Allocate rank index"); "Allocate rank index");
index->count=count; index->count=count;
for (i=0; i < count; i++) for (i=0; i < count; i++)
{ {
buffer = read_ecorecord(f,&rs); buffer = read_ecorecord(f,&rs);
@ -27,21 +27,18 @@ ecorankidx_t *read_rankidx(const char *filename)
"Allocate rank label"); "Allocate rank label");
strncpy(index->label[i],buffer,rs); strncpy(index->label[i],buffer,rs);
} }
return index; return index;
} }
int32_t rank_index(const char* label,ecorankidx_t* ranks) int32_t rank_index(const char* label,ecorankidx_t* ranks)
{ {
char **rep; char **rep;
fprintf(stderr,"Looking for rank -%s-... ",label);
rep = bsearch(label,ranks->label,ranks->count,sizeof(char*),compareRankLabel); rep = bsearch(label,ranks->label,ranks->count,sizeof(char*),compareRankLabel);
if (rep) if (rep)
return rep-ranks->label; return rep-ranks->label;
else
ECOERROR(ECO_NOTFOUND_ERROR,"Rank label not found");
return -1; return -1;
} }

View File

@ -4,6 +4,7 @@
#include <zlib.h> #include <zlib.h>
#include <string.h> #include <string.h>
#include <stdio.h> #include <stdio.h>
#include <ctype.h>
static FILE *open_seqfile(const char *prefix,int32_t index); static FILE *open_seqfile(const char *prefix,int32_t index);
@ -11,32 +12,32 @@ static FILE *open_seqfile(const char *prefix,int32_t index);
ecoseq_t *new_ecoseq() ecoseq_t *new_ecoseq()
{ {
void *tmp; void *tmp;
tmp = ECOMALLOC(sizeof(ecoseq_t),"Allocate new ecoseq structure"); tmp = ECOMALLOC(sizeof(ecoseq_t),"Allocate new ecoseq structure");
return tmp; return tmp;
} }
int32_t delete_ecoseq(ecoseq_t * seq) int32_t delete_ecoseq(ecoseq_t * seq)
{ {
if (seq) if (seq)
{ {
if (seq->AC) if (seq->AC)
ECOFREE(seq->AC,"Free sequence AC"); ECOFREE(seq->AC,"Free sequence AC");
if (seq->DE) if (seq->DE)
ECOFREE(seq->DE,"Free sequence DE"); ECOFREE(seq->DE,"Free sequence DE");
if (seq->SQ) if (seq->SQ)
ECOFREE(seq->SQ,"Free sequence SQ"); ECOFREE(seq->SQ,"Free sequence SQ");
ECOFREE(seq,"Free sequence structure"); ECOFREE(seq,"Free sequence structure");
return 0; return 0;
} }
return 1; return 1;
} }
@ -49,9 +50,9 @@ ecoseq_t *new_ecoseq_with_data( char *AC,
ecoseq_t *tmp; ecoseq_t *tmp;
int32_t lstr; int32_t lstr;
tmp = new_ecoseq(); tmp = new_ecoseq();
tmp->taxid=taxid_idx; tmp->taxid=taxid_idx;
if (AC) if (AC)
{ {
lstr =strlen(AC); lstr =strlen(AC);
@ -79,7 +80,9 @@ ecoseq_t *new_ecoseq_with_data( char *AC,
} }
/**
* ?? used ??
**/
FILE *open_ecoseqdb(const char *filename, FILE *open_ecoseqdb(const char *filename,
int32_t *sequencecount) int32_t *sequencecount)
{ {
@ -95,12 +98,14 @@ ecoseq_t *readnext_ecoseq(FILE *f)
int32_t comp_status; int32_t comp_status;
unsigned long int seqlength; unsigned long int seqlength;
int32_t rs; int32_t rs;
char *c;
int32_t i;
raw = read_ecorecord(f,&rs); raw = read_ecorecord(f,&rs);
if (!raw) if (!raw)
return NULL; return NULL;
if (is_big_endian()) if (is_big_endian())
{ {
raw->CSQ_length = swap_int32_t(raw->CSQ_length); raw->CSQ_length = swap_int32_t(raw->CSQ_length);
@ -108,38 +113,49 @@ ecoseq_t *readnext_ecoseq(FILE *f)
raw->SQ_length = swap_int32_t(raw->SQ_length); raw->SQ_length = swap_int32_t(raw->SQ_length);
raw->taxid = swap_int32_t(raw->taxid); raw->taxid = swap_int32_t(raw->taxid);
} }
seq = new_ecoseq(); seq = new_ecoseq();
seq->taxid = raw->taxid; seq->taxid = raw->taxid;
seq->AC = ECOMALLOC(strlen(raw->AC) +1, seq->AC = ECOMALLOC(strlen(raw->AC) +1,
"Allocate Sequence Accesion number"); "Allocate Sequence Accesion number");
strncpy(seq->AC,raw->AC,strlen(raw->AC)); strncpy(seq->AC,raw->AC,strlen(raw->AC));
seq->DE = ECOMALLOC(raw->DE_length+1, seq->DE = ECOMALLOC(raw->DE_length+1,
"Allocate Sequence definition"); "Allocate Sequence definition");
strncpy(seq->DE,raw->data,raw->DE_length); strncpy(seq->DE,raw->data,raw->DE_length);
seqlength = seq->SQ_length = raw->SQ_length; seqlength = seq->SQ_length = raw->SQ_length;
compressed = raw->data + raw->DE_length; compressed = raw->data + raw->DE_length;
seq->SQ = ECOMALLOC(seqlength+1, seq->SQ = ECOMALLOC(seqlength+1,
"Allocate sequence buffer"); "Allocate sequence buffer");
comp_status = uncompress((unsigned char*)seq->SQ, comp_status = uncompress((unsigned char*)seq->SQ,
&seqlength, &seqlength,
(unsigned char*)compressed, (unsigned char*)compressed,
raw->CSQ_length); raw->CSQ_length);
if (comp_status != Z_OK) if (comp_status != Z_OK)
ECOERROR(ECO_IO_ERROR,"I cannot uncompress sequence data"); ECOERROR(ECO_IO_ERROR,"I cannot uncompress sequence data");
for (c=seq->SQ,i=0;i<seqlength;c++,i++)
*c=toupper(*c);
return seq; return seq;
} }
/**
* Open the sequences database (.sdx file)
* @param prefix name of the database (radical without extension)
* @param index integer
*
* @return file object
*/
FILE *open_seqfile(const char *prefix,int32_t index) FILE *open_seqfile(const char *prefix,int32_t index)
{ {
char filename_buffer[1024]; char filename_buffer[1024];
@ -152,22 +168,22 @@ FILE *open_seqfile(const char *prefix,int32_t index)
"%s_%03d.sdx", "%s_%03d.sdx",
prefix, prefix,
index); index);
fprintf(stderr,"Coucou %s\n",filename_buffer); // fprintf(stderr,"# Coucou %s\n",filename_buffer);
if (filename_length >= 1024) if (filename_length >= 1024)
ECOERROR(ECO_ASSERT_ERROR,"file name is too long"); ECOERROR(ECO_ASSERT_ERROR,"file name is too long");
filename_buffer[filename_length]=0; filename_buffer[filename_length]=0;
input=open_ecorecorddb(filename_buffer,&seqcount,0); input=open_ecorecorddb(filename_buffer,&seqcount,0);
if (input) if (input)
fprintf(stderr,"Reading file %s containing %d sequences...\n", fprintf(stderr,"# Reading file %s containing %d sequences...\n",
filename_buffer, filename_buffer,
seqcount); seqcount);
return input; return input;
} }
@ -177,38 +193,38 @@ ecoseq_t *ecoseq_iterator(const char *prefix)
static int32_t current_file_idx = 1; static int32_t current_file_idx = 1;
static char current_prefix[1024]; static char current_prefix[1024];
ecoseq_t *seq; ecoseq_t *seq;
if (prefix) if (prefix)
{ {
current_file_idx = 1; current_file_idx = 1;
if (current_seq_file) if (current_seq_file)
fclose(current_seq_file); fclose(current_seq_file);
strncpy(current_prefix,prefix,1023); strncpy(current_prefix,prefix,1023);
current_prefix[1024]=0; current_prefix[1024]=0;
current_seq_file = open_seqfile(current_prefix, current_seq_file = open_seqfile(current_prefix,
current_file_idx); current_file_idx);
if (!current_seq_file) if (!current_seq_file)
return NULL; return NULL;
} }
seq = readnext_ecoseq(current_seq_file); seq = readnext_ecoseq(current_seq_file);
if (!seq && feof(current_seq_file)) if (!seq && feof(current_seq_file))
{ {
current_file_idx++; current_file_idx++;
fclose(current_seq_file); fclose(current_seq_file);
current_seq_file = open_seqfile(current_prefix, current_seq_file = open_seqfile(current_prefix,
current_file_idx); current_file_idx);
if (current_seq_file) if (current_seq_file)
seq = readnext_ecoseq(current_seq_file); seq = readnext_ecoseq(current_seq_file);
} }
return seq; return seq;
} }

View File

@ -5,42 +5,65 @@
static ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon); static ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon);
/**
ecotxidx_t *read_taxonomyidx(const char *filename) * Open the taxonomy database
* @param pointer to the database (.tdx file)
* @return a ecotxidx_t structure
*/
ecotxidx_t *read_taxonomyidx(const char *filename,const char *filename2)
{ {
int32_t count; int32_t count;
int32_t count2;
FILE *f; FILE *f;
FILE *f2;
ecotxidx_t *index; ecotxidx_t *index;
int32_t i; int32_t i;
f = open_ecorecorddb(filename,&count,1); f = open_ecorecorddb(filename,&count,1);
f2 = open_ecorecorddb(filename2,&count2,0);
index = (ecotxidx_t*) ECOMALLOC(sizeof(ecotxidx_t) + sizeof(ecotx_t) * (count-1),
index = (ecotxidx_t*) ECOMALLOC(sizeof(ecotxidx_t) + sizeof(ecotx_t) * (count+count2-1),
"Allocate taxonomy"); "Allocate taxonomy");
index->count=count; index->count=count+count2;
for (i=0; i < count; i++) fprintf(stderr,"Reading %d taxa...\n",count);
for (i=0; i < count; i++){
readnext_ecotaxon(f,&(index->taxon[i])); readnext_ecotaxon(f,&(index->taxon[i]));
index->taxon[i].parent=index->taxon + (int32_t)index->taxon[i].parent;
}
if (count2>0)
fprintf(stderr,"Reading %d local taxa...\n",count2);
else
fprintf(stderr,"No local taxon\n");
for (i=0; i < count2; i++){
readnext_ecotaxon(f2,&(index->taxon[count+i]));
index->taxon[count+i].parent=index->taxon + (int32_t)index->taxon[count+i].parent;
}
return index; return index;
} }
int32_t delete_taxonomy(ecotxidx_t *index) int32_t delete_taxonomy(ecotxidx_t *index)
{ {
int32_t i; int32_t i;
if (index) if (index)
{ {
for (i=0; i< index->count; i++) for (i=0; i< index->count; i++)
if (index->taxon[i].name) if (index->taxon[i].name)
ECOFREE(index->taxon[i].name,"Free scientific name"); ECOFREE(index->taxon[i].name,"Free scientific name");
ECOFREE(index,"Free Taxonomy"); ECOFREE(index,"Free Taxonomy");
return 0; return 0;
} }
return 1; return 1;
} }
@ -52,23 +75,32 @@ int32_t delete_taxon(ecotx_t *taxon)
{ {
if (taxon->name) if (taxon->name)
ECOFREE(taxon->name,"Free scientific name"); ECOFREE(taxon->name,"Free scientific name");
ECOFREE(taxon,"Free Taxon"); ECOFREE(taxon,"Free Taxon");
return 0; return 0;
} }
return 1; return 1;
} }
/**
* Read the database for a given taxon a save the data
* into the taxon structure(if any found)
* @param *f pointer to FILE type returned by fopen
* @param *taxon pointer to the structure
*
* @return a ecotx_t structure if any taxon found else NULL
*/
ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon) ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon)
{ {
ecotxformat_t *raw; ecotxformat_t *raw;
int32_t rs; int32_t rs;
raw = read_ecorecord(f,&rs); raw = read_ecorecord(f,&rs);
if (!raw) if (!raw)
return NULL; return NULL;
@ -77,105 +109,172 @@ ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon)
raw->namelength = swap_int32_t(raw->namelength); raw->namelength = swap_int32_t(raw->namelength);
raw->parent = swap_int32_t(raw->parent); raw->parent = swap_int32_t(raw->parent);
raw->rank = swap_int32_t(raw->rank); raw->rank = swap_int32_t(raw->rank);
raw->taxid = swap_int32_t(raw->taxid); raw->taxid = swap_int32_t(raw->taxid);
} }
taxon->parent = raw->parent; taxon->parent = (ecotx_t*)raw->parent;
taxon->taxid = raw->taxid; taxon->taxid = raw->taxid;
taxon->rank = raw->rank; taxon->rank = raw->rank;
taxon->name = ECOMALLOC((raw->namelength+1) * sizeof(char), taxon->name = ECOMALLOC((raw->namelength+1) * sizeof(char),
"Allocate taxon scientific name"); "Allocate taxon scientific name");
strncpy(taxon->name,raw->name,raw->namelength); strncpy(taxon->name,raw->name,raw->namelength);
return taxon; return taxon;
} }
ecotaxonomy_t *read_taxonomy(const char *prefix) ecotaxonomy_t *read_taxonomy(const char *prefix,int32_t readAlternativeName)
{ {
ecotaxonomy_t *tax; ecotaxonomy_t *tax;
char *filename; char *filename;
char *filename2;
int buffsize; int buffsize;
tax = ECOMALLOC(sizeof(ecotaxonomy_t), tax = ECOMALLOC(sizeof(ecotaxonomy_t),
"Allocate taxonomy structure"); "Allocate taxonomy structure");
buffsize = strlen(prefix)+10; buffsize = strlen(prefix)+10;
filename = ECOMALLOC(buffsize, filename = ECOMALLOC(buffsize,
"Allocate filename"); "Allocate filename");
filename2= ECOMALLOC(buffsize,
"Allocate filename");
snprintf(filename,buffsize,"%s.rdx",prefix); snprintf(filename,buffsize,"%s.rdx",prefix);
tax->ranks = read_rankidx(filename); tax->ranks = read_rankidx(filename);
snprintf(filename,buffsize,"%s.tdx",prefix); snprintf(filename,buffsize,"%s.tdx",prefix);
snprintf(filename2,buffsize,"%s.ldx",prefix);
tax->taxons = read_taxonomyidx(filename);
tax->taxons = read_taxonomyidx(filename,filename2);
if (readAlternativeName)
{
snprintf(filename,buffsize,"%s.ndx",prefix);
tax->names=read_nameidx(filename,tax);
}
else
tax->names=NULL;
return tax; return tax;
} }
int32_t delete_ecotaxonomy(ecotaxonomy_t *taxonomy) int32_t delete_ecotaxonomy(ecotaxonomy_t *taxonomy)
{ {
if (taxonomy) if (taxonomy)
{ {
if (taxonomy->ranks) if (taxonomy->ranks)
ECOFREE(taxonomy->ranks,"Free rank index"); ECOFREE(taxonomy->ranks,"Free rank index");
if (taxonomy->taxons) if (taxonomy->taxons)
ECOFREE(taxonomy->taxons,"Free taxon index"); ECOFREE(taxonomy->taxons,"Free taxon index");
ECOFREE(taxonomy,"Free taxonomy structure"); ECOFREE(taxonomy,"Free taxonomy structure");
return 0; return 0;
} }
return 1; return 1;
} }
ecotx_t *eco_findtaxonatrank(ecotx_t *taxon, ecotx_t *eco_findtaxonatrank(ecotx_t *taxon,
int32_t rankidx, int32_t rankidx)
ecotaxonomy_t *taxonomy)
{ {
ecotx_t *current_taxon; ecotx_t *current_taxon;
ecotx_t *next_taxon; ecotx_t *next_taxon;
current_taxon = taxon; current_taxon = taxon;
next_taxon = &(taxonomy->taxons->taxon[current_taxon->parent]); next_taxon = current_taxon->parent;
while ((current_taxon!=next_taxon) && while ((current_taxon!=next_taxon) && // I' am the root node
(current_taxon->rank!=rankidx)) (current_taxon->rank!=rankidx))
{ {
current_taxon = next_taxon; current_taxon = next_taxon;
next_taxon = &(taxonomy->taxons->taxon[current_taxon->parent]); next_taxon = current_taxon->parent;
} }
if (current_taxon->rank==rankidx) if (current_taxon->rank==rankidx)
return current_taxon; return current_taxon;
else else
return NULL; return NULL;
} }
/**
* Get back information concerning a taxon from a taxonomic id
* @param *taxonomy the taxonomy database
* @param taxid the taxonomic id
*
* @result a ecotx_t structure containing the taxonimic information
**/
ecotx_t *eco_findtaxonbytaxid(ecotaxonomy_t *taxonomy,
int32_t taxid)
{
ecotx_t *current_taxon;
int32_t taxoncount;
int32_t i;
taxoncount=taxonomy->taxons->count;
for (current_taxon=taxonomy->taxons->taxon,
i=0;
i < taxoncount;
i++,
current_taxon++){
if (current_taxon->taxid==taxid){
return current_taxon;
}
}
return (ecotx_t*)NULL;
}
/**
* Find out if taxon is son of other taxon (identified by its taxid)
* @param *taxon son taxon
* @param parent_taxid taxonomic id of the other taxon
*
* @return 1 is the other taxid math a parent taxid, else 0
**/
int eco_isundertaxon(ecotx_t *taxon,
int other_taxid)
{
ecotx_t *next_parent;
next_parent = taxon->parent;
while ( (other_taxid != next_parent->taxid) &&
(strcmp(next_parent->name, "root")) )
{
next_parent = next_parent->parent;
}
if (other_taxid == next_parent->taxid)
return 1;
else
return 0;
}
ecotx_t *eco_getspecies(ecotx_t *taxon, ecotx_t *eco_getspecies(ecotx_t *taxon,
ecotaxonomy_t *taxonomy) ecotaxonomy_t *taxonomy)
{ {
static ecotaxonomy_t *tax=NULL; static ecotaxonomy_t *tax=NULL;
static int32_t rankindex=-1; static int32_t rankindex=-1;
if (taxonomy && tax!=taxonomy) if (taxonomy && tax!=taxonomy)
{ {
rankindex = rank_index("species",taxonomy->ranks); rankindex = rank_index("species",taxonomy->ranks);
tax=taxonomy; tax=taxonomy;
} }
if (!tax || rankindex < 0) if (!tax || rankindex < 0)
ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined"); ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
return eco_findtaxonatrank(taxon,rankindex,tax); return eco_findtaxonatrank(taxon,rankindex);
} }
ecotx_t *eco_getgenus(ecotx_t *taxon, ecotx_t *eco_getgenus(ecotx_t *taxon,
@ -183,17 +282,17 @@ ecotx_t *eco_getgenus(ecotx_t *taxon,
{ {
static ecotaxonomy_t *tax=NULL; static ecotaxonomy_t *tax=NULL;
static int32_t rankindex=-1; static int32_t rankindex=-1;
if (taxonomy && tax!=taxonomy) if (taxonomy && tax!=taxonomy)
{ {
rankindex = rank_index("genus",taxonomy->ranks); rankindex = rank_index("genus",taxonomy->ranks);
tax=taxonomy; tax=taxonomy;
} }
if (!tax || rankindex < 0) if (!tax || rankindex < 0)
ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined"); ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
return eco_findtaxonatrank(taxon,rankindex,tax); return eco_findtaxonatrank(taxon,rankindex);
} }
@ -202,17 +301,17 @@ ecotx_t *eco_getfamily(ecotx_t *taxon,
{ {
static ecotaxonomy_t *tax=NULL; static ecotaxonomy_t *tax=NULL;
static int32_t rankindex=-1; static int32_t rankindex=-1;
if (taxonomy && tax!=taxonomy) if (taxonomy && tax!=taxonomy)
{ {
rankindex = rank_index("family",taxonomy->ranks); rankindex = rank_index("family",taxonomy->ranks);
tax=taxonomy; tax=taxonomy;
} }
if (!tax || rankindex < 0) if (!tax || rankindex < 0)
ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined"); ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
return eco_findtaxonatrank(taxon,rankindex,tax); return eco_findtaxonatrank(taxon,rankindex);
} }
ecotx_t *eco_getkingdom(ecotx_t *taxon, ecotx_t *eco_getkingdom(ecotx_t *taxon,
@ -220,17 +319,17 @@ ecotx_t *eco_getkingdom(ecotx_t *taxon,
{ {
static ecotaxonomy_t *tax=NULL; static ecotaxonomy_t *tax=NULL;
static int32_t rankindex=-1; static int32_t rankindex=-1;
if (taxonomy && tax!=taxonomy) if (taxonomy && tax!=taxonomy)
{ {
rankindex = rank_index("kingdom",taxonomy->ranks); rankindex = rank_index("kingdom",taxonomy->ranks);
tax=taxonomy; tax=taxonomy;
} }
if (!tax || rankindex < 0) if (!tax || rankindex < 0)
ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined"); ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
return eco_findtaxonatrank(taxon,rankindex,tax); return eco_findtaxonatrank(taxon,rankindex);
} }
ecotx_t *eco_getsuperkingdom(ecotx_t *taxon, ecotx_t *eco_getsuperkingdom(ecotx_t *taxon,
@ -238,15 +337,18 @@ ecotx_t *eco_getsuperkingdom(ecotx_t *taxon,
{ {
static ecotaxonomy_t *tax=NULL; static ecotaxonomy_t *tax=NULL;
static int32_t rankindex=-1; static int32_t rankindex=-1;
if (taxonomy && tax!=taxonomy) if (taxonomy && tax!=taxonomy)
{ {
rankindex = rank_index("superkingdom",taxonomy->ranks); rankindex = rank_index("superkingdom",taxonomy->ranks);
if (rankindex < 0) {
rankindex = rank_index("domain",taxonomy->ranks);
}
tax=taxonomy; tax=taxonomy;
} }
if (!tax || rankindex < 0) if (!tax || rankindex < 0)
ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined"); ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
return eco_findtaxonatrank(taxon,rankindex,tax); return eco_findtaxonatrank(taxon,rankindex);
} }

22
src/libthermo/Makefile Normal file
View File

@ -0,0 +1,22 @@
SOURCES = nnparams.c
SRCS=$(SOURCES)
OBJECTS= $(patsubst %.c,%.o,$(SOURCES))
LIBFILE= libthermo.a
RANLIB= ranlib
include ../global.mk
all: $(LIBFILE)
clean:
rm -rf $(OBJECTS) $(LIBFILE)
$(LIBFILE): $(OBJECTS)
ar -cr $@ $?
$(RANLIB) $@

619
src/libthermo/nnparams.c Normal file
View File

@ -0,0 +1,619 @@
/*
* nnparams.cpp
* PHunterLib
*
* Nearest Neighbor Model / Parameters
*
* Created by Tiayyba Riaz on 7/2/09.
*
*/
#include <memory.h>
#include <math.h>
#include <stdio.h>
#include <string.h>
#include"nnparams.h"
static char bpencoder[] = { 1, // A
0, // b
2, // C
0,0,0, // d, e, f
3, // G
0,0,0,0,0,0,0,0,0,0,0,0, // h,i,j,k,l,m,n,o,p,q,r,s
4,0, // T,U
0,0,0,0,0}; // v,w,x,y,z
double forbidden_entropy;
double nparam_GetInitialEntropy(PNNParams nparm)
{
return -5.9f+nparm->rlogc;
}
//Retrieve Enthalpy for given NN-Pair from parameter table
double nparam_GetEnthalpy(PNNParams nparm, char x0, char x1, char y0, char y1)
{
return ndH(x0,x1,y0,y1); //xx, yx are already numbers
}
//Retrieve Entropy for given NN-Pair from parameter table
double nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1)
{
//xx and yx are already numbers
char nx0=x0;//nparam_convertNum(x0);
char nx1=x1;//nparam_convertNum(x1);
char ny0=y0;//nparam_convertNum(y0);
char ny1=y1;//nparam_convertNum(y1);
double answer = ndS(nx0,nx1,ny0,ny1);
/*Salt correction Santalucia*/
if (nparm->saltMethod == SALT_METHOD_SANTALUCIA) {
if(nx0!=5 && 1<= nx1 && nx1<=4) {
answer += 0.5*nparm->kfac;
}
if(ny1!=5 && 1<= ny0 && ny0<=4) {
answer += 0.5*nparm->kfac;
}
}
/*Salt correction Owczarzy*/
if (nparm->saltMethod == SALT_METHOD_OWCZARZY) {
double logk = log(nparm->kplus);
answer += ndH(nx0,nx1,ny0,ny1)*((4.29 * nparm->gcContent-3.95)*0.00001*logk+ 0.0000094*logk*logk);
}
return answer;
}
/* PURPOSE: Return melting temperature TM for given entropy and enthalpy
* Assuming a one-state transition and using the formula
* TM = dH / (dS + R ln(Ct/4))
* entropy = dS + R ln Ct/4 (must already be included!)
* enthaklpy = dH
* where
* dH = enthalpy
* dS = entropy
* R = Boltzmann factor
* Ct = Strand Concentration
*
* PARAMETERS:
* entrypy and enthalpy
*
* RETURN VALUE:
* temperature
*/
double nparam_CalcTM(double entropy,double enthalpy)
{
double tm = 0; // absolute zero - return if model fails!
if (enthalpy>=forbidden_enthalpy) //||(entropy==-cfact))
return 0;
if (entropy<0) // avoid division by zero and model errors!
{
tm = enthalpy/entropy;// - kfac; //LKFEB
if (tm<0)
return 0;
}
return tm;
}
void nparam_InitParams(PNNParams nparm, double c1, double c2, double kp, int sm)
{
nparm->Ct1 = c1;
nparm->Ct2 = c2;
nparm->kplus = kp;
int maxCT = 1;
if(nparm->Ct2 > nparm->Ct1)
{
maxCT = 2;
}
double ctFactor;
if(nparm->Ct1 == nparm->Ct2)
{
ctFactor = nparm->Ct1/2;
}
else if (maxCT == 1)
{
ctFactor = nparm->Ct1-nparm->Ct2/2;
}
else
{
ctFactor = nparm->Ct2-nparm->Ct1/2;
}
nparm->rlogc = R * log(ctFactor);
forbidden_entropy = nparm->rlogc;
nparm->kfac = 0.368 * log (nparm->kplus);
nparm->saltMethod = sm;
int x,y,a,b; // variables used as counters...
// Set all parameters to zero!
memset(nparm->dH,0,sizeof(nparm->dH));
memset(nparm->dS,0,sizeof(nparm->dS));
// Set all X-/Y-, -X/Y- and X-/-Y so, that TM will be VERY small!
for (x=1;x<=4;x++)
{
for (y=1;y<=4;y++)
{
ndH(0,x,y,0)=forbidden_enthalpy;
ndS(0,x,y,0)=forbidden_entropy;
ndH(x,0,0,y)=forbidden_enthalpy;
ndS(x,0,0,y)=forbidden_entropy;
ndH(x,0,y,0)=forbidden_enthalpy;
ndS(x,0,y,0)=forbidden_entropy;
// forbid X-/Y$ and X$/Y- etc., i.e. terminal must not be paired with gap!
ndH(x,5,y,0)=forbidden_enthalpy;
ndS(x,5,y,0)=forbidden_entropy;
ndH(x,0,y,5)=forbidden_enthalpy;
ndS(x,0,y,5)=forbidden_entropy;
ndH(5,x,0,y)=forbidden_enthalpy;
ndS(5,x,0,y)=forbidden_entropy;
ndH(0,x,5,y)=forbidden_enthalpy;
ndS(0,x,5,y)=forbidden_entropy;
// forbid X$/-Y etc.
ndH(x,5,0,y)=forbidden_enthalpy;
ndS(x,5,0,y)=forbidden_entropy;
ndH(x,0,5,y)=forbidden_enthalpy;
ndS(x,0,5,y)=forbidden_entropy;
ndH(5,x,y,0)=forbidden_enthalpy;
ndS(5,x,y,0)=forbidden_entropy;
ndH(0,x,y,5)=forbidden_enthalpy;
ndS(0,x,y,5)=forbidden_entropy;
}
// also, forbid x-/-- and --/x-, i.e. no two inner gaps paired
ndH(x,0,0,0)=forbidden_enthalpy;
ndS(x,0,0,0)=forbidden_entropy;
ndH(0,0,x,0)=forbidden_enthalpy;
ndS(0,0,x,0)=forbidden_entropy;
// x-/-$
ndH(x,0,0,5)=forbidden_enthalpy;
ndS(x,0,0,5)=forbidden_entropy;
ndH(5,0,0,x)=forbidden_enthalpy;
ndS(5,0,0,x)=forbidden_entropy;
ndH(0,5,x,0)=forbidden_enthalpy;
ndS(x,0,0,5)=forbidden_entropy;
ndH(0,x,5,0)=forbidden_enthalpy;
ndS(0,x,5,0)=forbidden_entropy;
}
// forbid --/--
ndH(0,0,0,0)=forbidden_enthalpy;
ndS(0,0,0,0)=forbidden_entropy;
ndH(5,0,0,0)=forbidden_enthalpy;
ndS(5,0,0,0)=forbidden_entropy;
ndH(0,0,5,0)=forbidden_enthalpy;
ndS(0,0,5,0)=forbidden_entropy;
ndH(0,5,5,0)=forbidden_enthalpy;
ndS(0,5,5,0)=forbidden_entropy;
// Interior loops (double Mismatches)
#define iloop_entropy -0.97f
#define iloop_enthalpy 0.0f
for (x=1; x<=4; x++)
for (y=1; y<=4; y++)
for (a=1; a<=4; a++)
for (b=1; b<=4; b++)
// AT and CG pair, and as A=1, C=2, G=3, T=4 this means
// we have Watson-Crick pairs if (x+a==5) and (y+b)==5.
if (!((x+a==5)||(y+b==5)))
{
// No watson-crick-pair, i.e. double mismatch!
// set enthalpy/entropy to loop expansion!
ndH(x,y,a,b) = iloop_enthalpy;
ndS(x,y,a,b) = iloop_entropy;
}
// xy/-- and --/xy (Bulge Loops of size > 1)
#define bloop_entropy -1.3f
#define bloop_enthalpy 0.0f
for (x=1; x<=4; x++)
for (y=1; y<=4; y++)
{
ndH(x,y,0,0) = bloop_enthalpy;
ndS(x,y,0,0) = bloop_entropy;
ndH(0,0,x,y) = bloop_enthalpy;
ndS(0,0,x,y) = bloop_entropy;
}
// x-/ya abd xa/y- as well as -x/ay and ax/-y
// bulge opening and closing parameters with
// adjacent matches / mismatches
// obulge_mism and cbulge_mism chosen so high to avoid
// AAAAAAAAA
// T--G----T
// being better than
// AAAAAAAAA
// TG------T
#define obulge_match_H (-2.66f * 1000)
#define obulge_match_S -14.22f
#define cbulge_match_H (-2.66f * 1000)
#define cbulge_match_S -14.22f
#define obulge_mism_H (0.0f * 1000)
#define obulge_mism_S -6.45f
#define cbulge_mism_H 0.0f
#define cbulge_mism_S -6.45f
for (x=1; x<=4; x++)
for (y=1; y<=4; y++)
for (a=1; a<=4; a++)
{
if (x+y==5) // other base pair matches!
{
ndH(x,0,y,a)=obulge_match_H; // bulge opening
ndS(x,0,y,a)=obulge_match_S;
ndH(x,a,y,0)=obulge_match_H;
ndS(x,a,y,0)=obulge_match_S;
ndH(0,x,a,y)=cbulge_match_H; // bulge closing
ndS(0,x,a,y)=cbulge_match_S;
ndH(a,x,0,y)=cbulge_match_H;
ndS(a,x,0,y)=cbulge_match_S;
}
else
{ // mismatch in other base pair!
ndH(x,0,y,a)=obulge_mism_H; // bulge opening
ndS(x,0,y,a)=obulge_mism_S;
ndH(x,a,y,0)=obulge_mism_H;
ndS(x,a,y,0)=obulge_mism_S;
ndH(0,x,a,y)=cbulge_mism_H; // bulge closing
ndS(0,x,a,y)=cbulge_mism_S;
ndH(a,x,0,y)=cbulge_mism_H;
ndS(a,x,0,y)=cbulge_mism_S;
}
}
// Watson-Crick pairs (note that only ten are unique, as obviously
// 5'-AG-3'/3'-TC-5' = 5'-CT-3'/3'-GA-5' etc.
ndH(1,1,4,4)=-7.6f*1000; ndS(1,1,4,4)=-21.3f; // AA/TT 04
ndH(1,2,4,3)=-8.4f*1000; ndS(1,2,4,3)=-22.4f; // AC/TG adapted GT/CA
ndH(1,3,4,2)=-7.8f*1000; ndS(1,3,4,2)=-21.0f; // AG/TC adapted CT/GA
ndH(1,4,4,1)=-7.2f*1000; ndS(1,4,4,1)=-20.4f; // AT/TA 04
ndH(2,1,3,4)=-8.5f*1000; ndS(2,1,3,4)=-22.7f; // CA/GT 04
ndH(2,2,3,3)=-8.0f*1000; ndS(2,2,3,3)=-19.9f; // CC/GG adapted GG/CC
ndH(2,3,3,2)=-10.6f*1000; ndS(2,3,3,2)=-27.2f; // CG/GC 04
ndH(2,4,3,1)=-7.8f*1000; ndS(2,4,3,1)=-21.0f; // CT/GA 04
ndH(3,1,2,4)=-8.2f*1000; ndS(3,1,2,4)=-22.2f; // GA/CT 04
ndH(3,2,2,3)=-9.8f*1000; ndS(3,2,2,3)=-24.4f; // GC/CG 04
ndH(3,3,2,2)=-8.0f*1000; ndS(3,3,2,2)=-19.9f; // GG/CC 04
ndH(3,4,2,1)=-8.4f*1000; ndS(3,4,2,1)=-22.4f; // GT/CA 04
ndH(4,1,1,4)=-7.2f*1000; ndS(4,1,1,4)=-21.3f; // TA/AT 04
ndH(4,2,1,3)=-8.2f*1000; ndS(4,2,1,3)=-22.2f; // TC/AG adapted GA/CT
ndH(4,3,1,2)=-8.5f*1000; ndS(4,3,1,2)=-22.7f; // TG/AC adapted CA/GT
ndH(4,4,1,1)=-7.6f*1000; ndS(4,4,1,1)=-21.3f; // TT/AA adapted AA/TT
// A-C Mismatches (Values for pH 7.0)
ndH(1,1,2,4)=7.6f*1000; ndS(1,1,2,4)=20.2f; // AA/CT
ndH(1,1,4,2)=2.3f*1000; ndS(1,1,4,2)=4.6f; // AA/TC
ndH(1,2,2,3)=-0.7f*1000; ndS(1,2,2,3)=-3.8f; // AC/CG
ndH(1,2,4,1)=5.3f*1000; ndS(1,2,4,1)=14.6f; // AC/TA
ndH(1,3,2,2)=0.6f*1000; ndS(1,3,2,2)=-0.6f; // AG/CC
ndH(1,4,2,1)=5.3f*1000; ndS(1,4,2,1)=14.6f; // AT/CA
ndH(2,1,1,4)=3.4f*1000; ndS(2,1,1,4)=8.0f; // CA/AT
ndH(2,1,3,2)=1.9f*1000; ndS(2,1,3,2)=3.7f; // CA/GC
ndH(2,2,1,3)=5.2f*1000; ndS(2,2,1,3)=14.2f; // CC/AG
ndH(2,2,3,1)=0.6f*1000; ndS(2,2,3,1)=-0.6f; // CC/GA
ndH(2,3,1,2)=1.9f*1000; ndS(2,3,1,2)=3.7f; // CG/AC
ndH(2,4,1,1)=2.3f*1000; ndS(2,4,1,1)=4.6f; // CT/AA
ndH(3,1,2,2)=5.2f*1000; ndS(3,1,2,2)=14.2f; // GA/CC
ndH(3,2,2,1)=-0.7f*1000; ndS(3,2,2,1)=-3.8f; // GC/CA
ndH(4,1,1,2)=3.4f*1000; ndS(4,1,1,2)=8.0f; // TA/AC
ndH(4,2,1,1)=7.6f*1000; ndS(4,2,1,1)=20.2f; // TC/AA
// C-T Mismatches
ndH(1,2,4,4)=0.7f*1000; ndS(1,2,4,4)=0.2f; // AC/TT
ndH(1,4,4,2)=-1.2f*1000; ndS(1,4,4,2)=-6.2f; // AT/TC
ndH(2,1,4,4)=1.0f*1000; ndS(2,1,4,4)=0.7f; // CA/TT
ndH(2,2,3,4)=-0.8f*1000; ndS(2,2,3,4)=-4.5f; // CC/GT
ndH(2,2,4,3)=5.2f*1000; ndS(2,2,4,3)=13.5f; // CC/TG
ndH(2,3,4,2)=-1.5f*1000; ndS(2,3,4,2)=-6.1f; // CG/TC
ndH(2,4,3,2)=-1.5f*1000; ndS(2,4,3,2)=-6.1f; // CT/GC
ndH(2,4,4,1)=-1.2f*1000; ndS(2,4,4,1)=-6.2f; // CT/TA
ndH(3,2,2,4)=2.3f*1000; ndS(3,2,2,4)=5.4f; // GC/CT
ndH(3,4,2,2)=5.2f*1000; ndS(3,4,2,2)=13.5f; // GT/CC
ndH(4,1,2,4)=1.2f*1000; ndS(4,1,2,4)=0.7f; // TA/CT
ndH(4,2,2,3)=2.3f*1000; ndS(4,2,2,3)=5.4f; // TC/CG
ndH(4,2,1,4)=1.2f*1000; ndS(4,2,1,4)=0.7f; // TC/AT
ndH(4,3,2,2)=-0.8f*1000; ndS(4,3,2,2)=-4.5f; // TG/CC
ndH(4,4,2,1)=0.7f*1000; ndS(4,4,2,1)=0.2f; // TT/CA
ndH(4,4,1,2)=1.0f*1000; ndS(4,4,1,2)=0.7f; // TT/AC
// G-A Mismatches
ndH(1,1,3,4)=3.0f*1000; ndS(1,1,3,4)=7.4f; // AA/GT
ndH(1,1,4,3)=-0.6f*1000; ndS(1,1,4,3)=-2.3f; // AA/TG
ndH(1,2,3,3)=0.5f*1000; ndS(1,2,3,3)=3.2f; // AC/GG
ndH(1,3,3,2)=-4.0f*1000; ndS(1,3,3,2)=-13.2f; // AG/GC
ndH(1,3,4,1)=-0.7f*1000; ndS(1,3,4,1)=-2.3f; // AG/TA
ndH(1,4,3,1)=-0.7f*1000; ndS(1,4,3,1)=-2.3f; // AT/GA
ndH(2,1,3,3)=-0.7f*1000; ndS(2,1,3,3)=-2.3f; // CA/GG
ndH(2,3,3,1)=-4.0f*1000; ndS(2,3,3,1)=-13.2f; // CG/GA
ndH(3,1,1,4)=0.7f*1000; ndS(3,1,1,4)=0.7f; // GA/AT
ndH(3,1,2,3)=-0.6f*1000; ndS(3,1,2,3)=-1.0f; // GA/CG
ndH(3,2,1,3)=-0.6f*1000; ndS(3,2,1,3)=-1.0f; // GC/AG
ndH(3,3,1,2)=-0.7f*1000; ndS(3,3,1,2)=-2.3f; // GG/AC
ndH(3,3,2,1)=0.5f*1000; ndS(3,3,2,1)=3.2f; // GG/CA
ndH(3,4,1,1)=-0.6f*1000; ndS(3,4,1,1)=-2.3f; // GT/AA
ndH(4,1,1,3)=0.7f*1000; ndS(4,1,1,3)=0.7f; // TA/AG
ndH(4,3,1,1)=3.0f*1000; ndS(4,3,1,1)=7.4f; // TG/AA
// G-T Mismatches
ndH(1,3,4,4)=1.0f*1000; ndS(1,3,4,4)=0.9f; // AG/TT
ndH(1,4,4,3)=-2.5f*1000; ndS(1,4,4,3)=-8.3f; // AT/TG
ndH(2,3,3,4)=-4.1f*1000; ndS(2,3,3,4)=-11.7f; // CG/GT
ndH(2,4,3,3)=-2.8f*1000; ndS(2,4,3,3)=-8.0f; // CT/GG
ndH(3,1,4,4)=-1.3f*1000; ndS(3,1,4,4)=-5.3f; // GA/TT
ndH(3,2,4,3)=-4.4f*1000; ndS(3,2,4,3)=-12.3f; // GC/TG
ndH(3,3,2,4)=3.3f*1000; ndS(3,3,2,4)=10.4f; // GG/CT
ndH(3,3,4,2)=-2.8f*1000; ndS(3,3,4,2)=-8.0f; // GG/TC
// ndH(3,3,4,4)=5.8f*1000; ndS(3,3,4,4)=16.3f; // GG/TT
ndH(3,4,2,3)=-4.4f*1000; ndS(3,4,2,3)=-12.3f; // GT/CG
ndH(3,4,4,1)=-2.5f*1000; ndS(3,4,4,1)=-8.3f; // GT/TA
// ndH(3,4,4,3)=4.1f*1000; ndS(3,4,4,3)=9.5f; // GT/TG
ndH(4,1,3,4)=-0.1f*1000; ndS(4,1,3,4)=-1.7f; // TA/GT
ndH(4,2,3,3)=3.3f*1000; ndS(4,2,3,3)=10.4f; // TC/GG
ndH(4,3,1,4)=-0.1f*1000; ndS(4,3,1,4)=-1.7f; // TG/AT
ndH(4,3,3,2)=-4.1f*1000; ndS(4,3,3,2)=-11.7f; // TG/GC
// ndH(4,3,3,4)=-1.4f*1000; ndS(4,3,3,4)=-6.2f; // TG/GT
ndH(4,4,1,3)=-1.3f*1000; ndS(4,4,1,3)=-5.3f; // TT/AG
ndH(4,4,3,1)=1.0f*1000; ndS(4,4,3,1)=0.9f; // TT/GA
// ndH(4,4,3,3)=5.8f*1000; ndS(4,4,3,3)=16.3f; // TT/GG
// A-A Mismatches
ndH(1,1,1,4)=4.7f*1000; ndS(1,1,1,4)=12.9f; // AA/AT
ndH(1,1,4,1)=1.2f*1000; ndS(1,1,4,1)=1.7f; // AA/TA
ndH(1,2,1,3)=-2.9f*1000; ndS(1,2,1,3)=-9.8f; // AC/AG
ndH(1,3,1,2)=-0.9f*1000; ndS(1,3,1,2)=-4.2f; // AG/AC
ndH(1,4,1,1)=1.2f*1000; ndS(1,4,1,1)=1.7f; // AT/AA
ndH(2,1,3,1)=-0.9f*1000; ndS(2,1,3,1)=-4.2f; // CA/GA
ndH(3,1,2,1)=-2.9f*1000; ndS(3,1,2,1)=-9.8f; // GA/CA
ndH(4,1,1,1)=4.7f*1000; ndS(4,1,1,1)=12.9f; // TA/AA
// C-C Mismatches
ndH(1,2,4,2)=0.0f*1000; ndS(1,2,4,2)=-4.4f; // AC/TC
ndH(2,1,2,4)=6.1f*1000; ndS(2,1,2,4)=16.4f; // CA/CT
ndH(2,2,2,3)=3.6f*1000; ndS(2,2,2,3)=8.9f; // CC/CG
ndH(2,2,3,2)=-1.5f*1000; ndS(2,2,3,2)=-7.2f; // CC/GC
ndH(2,3,2,2)=-1.5f*1000; ndS(2,3,2,2)=-7.2f; // CG/CC
ndH(2,4,2,1)=0.0f*1000; ndS(2,4,2,1)=-4.4f; // CT/CA
ndH(3,2,2,2)=3.6f*1000; ndS(3,2,2,2)=8.9f; // GC/CC
ndH(4,2,1,2)=6.1f*1000; ndS(4,2,1,2)=16.4f; // TC/AC
// G-G Mismatches
ndH(1,3,4,3)=-3.1f*1000; ndS(1,3,4,3)=-9.5f; // AG/TG
ndH(2,3,3,3)=-4.9f*1000; ndS(2,3,3,3)=-15.3f; // CG/GG
ndH(3,1,3,4)=1.6f*1000; ndS(3,1,3,4)=3.6f; // GA/GT
ndH(3,2,3,3)=-6.0f*1000; ndS(3,2,3,3)=-15.8f; // GC/GG
ndH(3,3,2,3)=-6.0f*1000; ndS(3,3,2,3)=-15.8f; // GG/CG
ndH(3,3,3,2)=-4.9f*1000; ndS(3,3,3,2)=-15.3f; // GG/GC
ndH(3,4,3,1)=-3.1f*1000; ndS(3,4,3,1)=-9.5f; // GT/GA
ndH(4,3,1,3)=1.6f*1000; ndS(4,3,1,3)=3.6f; // TG/AG
// T-T Mismatches
ndH(1,4,4,4)=-2.7f*1000; ndS(1,4,4,4)=-10.8f; // AT/TT
ndH(2,4,3,4)=-5.0f*1000; ndS(2,4,3,4)=-15.8f; // CT/GT
ndH(3,4,2,4)=-2.2f*1000; ndS(3,4,2,4)=-8.4f; // GT/CT
ndH(4,1,4,4)=0.2f*1000; ndS(4,1,4,4)=-1.5f; // TA/TT
ndH(4,2,4,3)=-2.2f*1000; ndS(4,2,4,3)=-8.4f; // TC/TG
ndH(4,3,4,2)=-5.0f*1000; ndS(4,3,4,2)=-15.8f; // TG/TC
ndH(4,4,1,4)=0.2f*1000; ndS(4,4,1,4)=-1.5f; // TT/AT
ndH(4,4,4,1)=-2.7f*1000; ndS(4,4,4,1)=-10.8f; // TT/TA
// Dangling Ends
ndH(5,1,1,4)=-0.7f*1000; ndS(5,1,1,4)=-0.8f; // $A/AT
ndH(5,1,2,4)=4.4f*1000; ndS(5,1,2,4)=14.9f; // $A/CT
ndH(5,1,3,4)=-1.6f*1000; ndS(5,1,3,4)=-3.6f; // $A/GT
ndH(5,1,4,4)=2.9f*1000; ndS(5,1,4,4)=10.4f; // $A/TT
ndH(5,2,1,3)=-2.1f*1000; ndS(5,2,1,3)=-3.9f; // $C/AG
ndH(5,2,2,3)=-0.2f*1000; ndS(5,2,2,3)=-0.1f; // $C/CG
ndH(5,2,3,3)=-3.9f*1000; ndS(5,2,3,3)=-11.2f; // $C/GG
ndH(5,2,4,3)=-4.4f*1000; ndS(5,2,4,3)=-13.1f; // $C/TG
ndH(5,3,1,2)=-5.9f*1000; ndS(5,3,1,2)=-16.5f; // $G/AC
ndH(5,3,2,2)=-2.6f*1000; ndS(5,3,2,2)=-7.4f; // $G/CC
ndH(5,3,3,2)=-3.2f*1000; ndS(5,3,3,2)=-10.4f; // $G/GC
ndH(5,3,4,2)=-5.2f*1000; ndS(5,3,4,2)=-15.0f; // $G/TC
ndH(5,4,1,1)=-0.5f*1000; ndS(5,4,1,1)=-1.1f; // $T/AA
ndH(5,4,2,1)=4.7f*1000; ndS(5,4,2,1)=14.2f; // $T/CA
ndH(5,4,3,1)=-4.1f*1000; ndS(5,4,3,1)=-13.1f; // $T/GA
ndH(5,4,4,1)=-3.8f*1000; ndS(5,4,4,1)=-12.6f; // $T/TA
ndH(1,5,4,1)=-2.9f*1000; ndS(1,5,4,1)=-7.6f; // A$/TA
ndH(1,5,4,2)=-4.1f*1000; ndS(1,5,4,2)=-13.0f; // A$/TC
ndH(1,5,4,3)=-4.2f*1000; ndS(1,5,4,3)=-15.0f; // A$/TG
ndH(1,5,4,4)=-0.2f*1000; ndS(1,5,4,4)=-0.5f; // A$/TT
ndH(1,1,5,4)=0.2f*1000; ndS(1,1,5,4)=2.3f; // AA/$T
ndH(1,1,4,5)=-0.5f*1000; ndS(1,1,4,5)=-1.1f; // AA/T$
ndH(1,2,5,3)=-6.3f*1000; ndS(1,2,5,3)=-17.1f; // AC/$G
ndH(1,2,4,5)=4.7f*1000; ndS(1,2,4,5)=14.2f; // AC/T$
ndH(1,3,5,2)=-3.7f*1000; ndS(1,3,5,2)=-10.0f; // AG/$C
ndH(1,3,4,5)=-4.1f*1000; ndS(1,3,4,5)=-13.1f; // AG/T$
ndH(1,4,5,1)=-2.9f*1000; ndS(1,4,5,1)=-7.6f; // AT/$A
ndH(1,4,4,5)=-3.8f*1000; ndS(1,4,4,5)=-12.6f; // AT/T$
ndH(2,5,3,1)=-3.7f*1000; ndS(2,5,3,1)=-10.0f; // C$/GA
ndH(2,5,3,2)=-4.0f*1000; ndS(2,5,3,2)=-11.9f; // C$/GC
ndH(2,5,3,3)=-3.9f*1000; ndS(2,5,3,3)=-10.9f; // C$/GG
ndH(2,5,3,4)=-4.9f*1000; ndS(2,5,3,4)=-13.8f; // C$/GT
ndH(2,1,5,4)=0.6f*1000; ndS(2,1,5,4)=3.3f; // CA/$T
ndH(2,1,3,5)=-5.9f*1000; ndS(2,1,3,5)=-16.5f; // CA/G$
ndH(2,2,5,3)=-4.4f*1000; ndS(2,2,5,3)=-12.6f; // CC/$G
ndH(2,2,3,5)=-2.6f*1000; ndS(2,2,3,5)=-7.4f; // CC/G$
ndH(2,3,5,2)=-4.0f*1000; ndS(2,3,5,2)=-11.9f; // CG/$C
ndH(2,3,3,5)=-3.2f*1000; ndS(2,3,3,5)=-10.4f; // CG/G$
ndH(2,4,5,1)=-4.1f*1000; ndS(2,4,5,1)=-13.0f; // CT/$A
ndH(2,4,3,5)=-5.2f*1000; ndS(2,4,3,5)=-15.0f; // CT/G$
ndH(3,5,2,1)=-6.3f*1000; ndS(3,5,2,1)=-17.1f; // G$/CA
ndH(3,5,2,2)=-4.4f*1000; ndS(3,5,2,2)=-12.6f; // G$/CC
ndH(3,5,2,3)=-5.1f*1000; ndS(3,5,2,3)=-14.0f; // G$/CG
ndH(3,5,2,4)=-4.0f*1000; ndS(3,5,2,4)=-10.9f; // G$/CT
ndH(3,1,5,4)=-1.1f*1000; ndS(3,1,5,4)=-1.6f; // GA/$T
ndH(3,1,2,5)=-2.1f*1000; ndS(3,1,2,5)=-3.9f; // GA/C$
ndH(3,2,5,3)=-5.1f*1000; ndS(3,2,5,3)=-14.0f; // GC/$G
ndH(3,2,2,5)=-0.2f*1000; ndS(3,2,2,5)=-0.1f; // GC/C$
ndH(3,3,5,2)=-3.9f*1000; ndS(3,3,5,2)=-10.9f; // GG/$C
ndH(3,3,2,5)=-3.9f*1000; ndS(3,3,2,5)=-11.2f; // GG/C$
ndH(3,4,5,1)=-4.2f*1000; ndS(3,4,5,1)=-15.0f; // GT/$A
ndH(3,4,2,5)=-4.4f*1000; ndS(3,4,2,5)=-13.1f; // GT/C$
ndH(4,5,1,1)=0.2f*1000; ndS(4,5,1,1)=2.3f; // T$/AA
ndH(4,5,1,2)=0.6f*1000; ndS(4,5,1,2)=3.3f; // T$/AC
ndH(4,5,1,3)=-1.1f*1000; ndS(4,5,1,3)=-1.6f; // T$/AG
ndH(4,5,1,4)=-6.9f*1000; ndS(4,5,1,4)=-20.0f; // T$/AT
ndH(4,1,5,4)=-6.9f*1000; ndS(4,1,5,4)=-20.0f; // TA/$T
ndH(4,1,1,5)=-0.7f*1000; ndS(4,1,1,5)=-0.7f; // TA/A$
ndH(4,2,5,3)=-4.0f*1000; ndS(4,2,5,3)=-10.9f; // TC/$G
ndH(4,2,1,5)=4.4f*1000; ndS(4,2,1,5)=14.9f; // TC/A$
ndH(4,3,5,2)=-4.9f*1000; ndS(4,3,5,2)=-13.8f; // TG/$C
ndH(4,3,1,5)=-1.6f*1000; ndS(4,3,1,5)=-3.6f; // TG/A$
ndH(4,4,5,1)=-0.2f*1000; ndS(4,4,5,1)=-0.5f; // TT/$A
ndH(4,4,1,5)=2.9f*1000; ndS(4,4,1,5)=10.4f; // TT/A$
return;
}
int nparam_CountGCContent(char * seq ) {
int lseq = strlen(seq);
int k;
double count = 0;
for( k=0;k<lseq;k++) {
if (seq[k] == 'G' || seq[k] == 'C' ) {
count+=1;
}
}
return count;
}
void nparam_CleanSeq (char* inseq, char* outseq, int len)
{
int seqlen = strlen (inseq);
int i, j;
if (len != 0)
seqlen = len;
outseq[0]='x';
for (i = 0, j = 0; i < seqlen && outseq[0]; i++,j++)
{
switch (inseq[i])
{
case 'a':
case '\0':
case 'A':
outseq[j] = 'A'; break;
case 'c':
case '\1':
case 'C':
outseq[j] = 'C'; break;
case 'g':
case '\2':
case 'G':
outseq[j] = 'G'; break;
case 't':
case '\3':
case 'T':
outseq[j] = 'T'; break;
default:
outseq[0]=0;
}
}
outseq[j] = '\0';
}
//Calculate TM for given sequence against its complement
double nparam_CalcSelfTM(PNNParams nparm, char* seq, int len)
{
const unsigned long long minus1 = 0xFFFFFFFFFFFFFFFFLLU;
const double NaN = *((double*)&minus1);
double thedH = 0;
//double thedS = nparam_GetInitialEntropy(nparm);
double thedS = -5.9f+nparm->rlogc;
double mtemp;
char c1;
char c2;
char c3;
char c4;
unsigned int i;
char nseq[50];
char *useq = seq;
nparam_CleanSeq (seq, nseq, len);
if (!nseq[0])
return NaN;
useq = nseq;
for ( i=1;i<len;i++)
{
c1 = GETREVCODE(useq[i-1]); //nparam_getComplement(seq[i-1],1);
c2 = GETREVCODE(useq[i]); //nparam_getComplement(seq[i],1);
c3 = GETNUMCODE(useq[i-1]);
c4 = GETNUMCODE(useq[i]);
thedH += nparm->dH[c3][c4][c1][c2];//nparam_GetEnthalpy(nparm, c3,c4,c1,c2);
thedS += nparam_GetEntropy(nparm, c3,c4,c1,c2);
}
//printf("------------------\n");
mtemp = nparam_CalcTM(thedS,thedH);
//fprintf(stderr,"Enthalpy: %f, entropy: %f, seq: %s rloc=%f\n", thedH, thedS, useq, nparm->rlogc);
//exit (0);
return mtemp;
}
double nparam_CalcTwoTM(PNNParams nparm, char* seq1, char* seq2, int len)
{
const unsigned long long minus1 = 0xFFFFFFFFFFFFFFFFLLU;
const double NaN = *((double*)&minus1);
double thedH = 0;
//double thedS = nparam_GetInitialEntropy(nparm);
double thedS = -5.9f+nparm->rlogc;
double mtemp;
char c1;
char c2;
char c3;
char c4;
unsigned int i;
char nseq1[50];
char nseq2[50];
char *useq1;
char *useq2;
nparam_CleanSeq (seq1, nseq1, len);
if (!nseq1[0])
return NaN;
useq1 = nseq1;
nparam_CleanSeq (seq2, nseq2, len);
if (!nseq2[0])
return NaN;
useq2 = nseq2;
//fprintf (stderr,"Primer : %s\n",useq);
for ( i=1;i<len;i++)
{
c1 = GETREVCODE(useq2[i-1]); //nparam_getComplement(seq[i-1],1);
c2 = GETREVCODE(useq2[i]); //nparam_getComplement(seq[i],1);
c3 = GETNUMCODE(useq1[i-1]);
c4 = GETNUMCODE(useq1[i]);
//fprintf (stderr,"Primer : %s %f %f %d %d, %d %d %f\n",useq,thedH,thedS,(int)c3,(int)c4,(int)c1,(int)c2,nparam_GetEnthalpy(nparm, c3,c4,c1,c2));
thedH += nparm->dH[c3][c4][c1][c2];//nparam_GetEnthalpy(nparm, c3,c4,c1,c2);
thedS += nparam_GetEntropy(nparm, c3,c4,c1,c2);
}
//fprintf(stderr,"------------------\n");
mtemp = nparam_CalcTM(thedS,thedH);
//if (mtemp == 0)
//{
// fprintf(stderr,"Enthalpy: %f, entropy: %f, seq: %s\n", thedH, thedS, useq);
//exit (0);
//}
return mtemp;
}
double calculateMeltingTemperatureBasic (char * seq) {
int gccount;
double temp;
int seqlen;
seqlen = strlen (seq);
gccount = nparam_CountGCContent (seq);
temp = 64.9 + 41*(gccount - 16.4)/seqlen;
return temp;
}

63
src/libthermo/nnparams.h Normal file
View File

@ -0,0 +1,63 @@
/*
* nnparams.h
* PHunterLib
*
* Nearest Neighbor Model Parameters
*
* Created by Tiayyba Riaz on 02/07/09.
*
*/
#ifndef NNPARAMS_H_
#define NNPARAMS_H_
#include <math.h>
#include <string.h>
//#include "../libecoprimer/ecoprimer.h"
// following defines to simplify coding...
#define ndH(a,b,c,d) nparm->dH[(int)a][(int)b][(int)c][(int)d]
#define ndS(a,b,c,d) nparm->dS[(int)a][(int)b][(int)c][(int)d]
#define forbidden_enthalpy 1000000000000000000.0f
#define R 1.987f
#define SALT_METHOD_SANTALUCIA 1
#define SALT_METHOD_OWCZARZY 2
#define DEF_CONC_PRIMERS 0.0000008
#define DEF_CONC_SEQUENCES 0
#define DEF_SALT 0.05
#define GETNUMCODE(a) bpencoder[a - 'A']
#define GETREVCODE(a) 5-bpencoder[a - 'A']
extern double forbidden_entropy;
typedef struct CNNParams_st
{
double Ct1;
double Ct2;
double rlogc;
double kplus;
double kfac;
int saltMethod;
double gcContent;
double new_TM;
double dH[6][6][6][6]; // A-C-G-T + gap + initiation (dangling end, $ sign)
double dS[6][6][6][6];
}CNNParams, * PNNParams;
void nparam_InitParams(PNNParams nparm, double c1, double c2, double kp, int sm);
int nparam_CountGCContent(char * seq );
double nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1);
double nparam_GetEnthalpy(PNNParams nparm, char x0, char x1, char y0, char y1);
double nparam_CalcTM(double entropy,double enthalpy);
double nparam_CalcSelfTM(PNNParams nparm, char* seq, int len);
double nparam_CalcTwoTM(PNNParams nparm, char* seq1, char* seq2, int len);
double nparam_GetInitialEntropy(PNNParams nparm) ;
double calculateMeltingTemperatureBasic (char * seq);
//void getThermoProperties (ppair_t* pairs, size_t count, poptions_t options);
#endif

303
tools/ecoPCRFilter.py Executable file
View File

@ -0,0 +1,303 @@
#!/usr/bin/env python
import struct
import sys
import os
import gzip
#####
#
# Generic file function
#
#####
class Filter(object):
def __init__(self,path):
self._path = path
self._taxonFile = "%s.tdx" % self._path
self._ranksFile = "%s.rdx" % self._path
self._namesFile = "%s.ndx" % self._path
self._taxonomy, self._index, self._ranks, self._name = self.__readNodeTable()
def __universalOpen(self,file):
if isinstance(file,str):
if file[-3:] == '.gz':
rep = gzip.open(file)
else:
rep = open(file)
else:
rep = file
return rep
def __universalTell(self,file):
if isinstance(file, gzip.GzipFile):
file=file.myfileobj
return file.tell()
def __fileSize(self,file):
if isinstance(file, gzip.GzipFile):
file=file.myfileobj
pos = file.tell()
file.seek(0,2)
length = file.tell()
file.seek(pos,0)
return length
def __progressBar(self,pos,max,reset=False,delta=[]):
if reset:
del delta[:]
if not delta:
delta.append(time.time())
delta.append(time.time())
delta[1]=time.time()
elapsed = delta[1]-delta[0]
percent = float(pos)/max * 100
remain = time.strftime('%H:%M:%S',time.gmtime(elapsed / percent * (100-percent)))
bar = '#' * int(percent/2)
bar+= '|/-\\-'[pos % 5]
bar+= ' ' * (50 - int(percent/2))
sys.stderr.write('\r%5.1f %% |%s] remain : %s' %(percent,bar,remain))
#####
#
# Iterator functions
#
#####
def __ecoRecordIterator(self,file):
file = self.__universalOpen(file)
(recordCount,) = struct.unpack('> I',file.read(4))
for i in xrange(recordCount):
(recordSize,)=struct.unpack('>I',file.read(4))
record = file.read(recordSize)
yield record
def __ecoNameIterator(self):
for record in self.__ecoRecordIterator(self._namesFile):
lrecord = len(record)
lnames = lrecord - 16
(isScientificName,namelength,classLength,indextaxid,names)=struct.unpack('> I I I I %ds' % lnames, record)
name=names[:namelength]
classname=names[namelength:]
yield (name,classname,indextaxid)
def __ecoTaxonomicIterator(self):
for record in self.__ecoRecordIterator(self._taxonFile):
lrecord = len(record)
lnames = lrecord - 16
(taxid,rankid,parentidx,nameLength,name)=struct.unpack('> I I I I %ds' % lnames, record)
yield (taxid,rankid,parentidx,name)
def __ecoSequenceIterator(self,file):
for record in self.__ecoRecordIterator(file):
lrecord = len(record)
lnames = lrecord - (4*4+20)
(taxid,seqid,deflength,seqlength,cptseqlength,string)=struct.unpack('> I 20s I I I %ds' % lnames, record)
de = string[:deflength]
seq = gzip.zlib.decompress(string[deflength:])
yield (taxid,seqid,deflength,seqlength,cptseqlength,de,seq)
def __ecoRankIterator(self):
for record in self.__ecoRecordIterator(self._ranksFile):
yield record
#####
#
# Indexes
#
#####
def __ecoNameIndex(self):
indexName = [x for x in self.__ecoNameIterator()]
return indexName
def __ecoRankIndex(self):
rank = [r for r in self.__ecoRankIterator()]
return rank
def __ecoTaxonomyIndex(self):
taxonomy = []
index = {}
i = 0;
for x in self.__ecoTaxonomicIterator():
taxonomy.append(x)
index[x[0]] = i
i = i + 1
return taxonomy, index
def __readNodeTable(self):
taxonomy, index = self.__ecoTaxonomyIndex()
ranks = self.__ecoRankIndex()
name = self.__ecoNameIndex()
return taxonomy,index,ranks,name
def findTaxonByTaxid(self,taxid):
return self._taxonomy[self._index[taxid]]
#####
#
# PUBLIC METHODS
#
#####
def subTreeIterator(self, taxid):
"return subtree for given taxonomic id "
idx = self._index[taxid]
yield self._taxonomy[idx]
for t in self._taxonomy:
if t[2] == idx:
for subt in self.subTreeIterator(t[0]):
yield subt
def parentalTreeIterator(self, taxid):
"""
return parental tree for given taxonomic id starting from
first ancester to the root.
"""
taxon=self.findTaxonByTaxid(taxid)
while taxon[2]!= 0:
yield taxon
taxon = self._taxonomy[taxon[2]]
yield self._taxonomy[0]
def ecoPCRResultIterator(self, file):
"iteration on ecoPCR result file"
file = self.__universalOpen(file)
data = ColumnFile(file,
sep='|',
types=(str,int,int,
str,int,str,
int,str,int,
str,int,str,
str,str,int,
str,int,int,
str,str),skip='#')
for ac, sq_len, taxid,\
rank, sp_taxid, species,\
ge_taxid, genus, fa_taxid,\
family, sk_taxid, s_kgdom,\
strand, oligo_1, error_1,\
oligo_2, error_2, amp_len,\
sq_des, definition in data:
yield {'ac':ac, 'sq_len':sq_len, 'taxid':taxid,
'rank':rank, 'sp_taxid':sp_taxid, 'species':species,
'ge_taxid':ge_taxid, 'genus':genus, 'fa_taxid':fa_taxid,
'family':family, 'sk_taxid':sk_taxid, 's_kgdom':s_kgdom,
'strand':strand, 'oligo_1':oligo_1, 'error_1':error_1,
'oligo_2':oligo_2, 'error_2':error_2, 'amp_len':amp_len,
'sq_des':sq_des, 'definition':definition}
def rankFilter(self,rankid,filter):
return self._ranks[rankid] == filter
def lastCommonTaxon(self,taxid_1, taxid_2):
t1 = [x[0] for x in self.parentalTreeIterator(taxid_1)]
t2 = [x[0] for x in self.parentalTreeIterator(taxid_2)]
t1.reverse()
t2.reverse()
count = t1 < t2 and len(t1) or len(t2)
for i in range(count):
if t1[i] != t2[i]:
return t1[i-1]
class ColumnFile(object):
def __init__(self,stream,sep=None,strip=True,types=None,skip=None):
if isinstance(stream,str):
self._stream = open(stream)
elif hasattr(stream,'next'):
self._stream = stream
else:
raise ValueError,'stream must be string or an iterator'
self._delimiter=sep
self._strip=strip
if types:
self._types=[x for x in types]
for i in xrange(len(self._types)):
if self._types[i] is bool:
self._types[i]=ColumnFile.str2bool
else:
self._types=None
self._skip = skip
def str2bool(x):
return bool(eval(x.strip()[0].upper(),{'T':True,'V':True,'F':False}))
str2bool = staticmethod(str2bool)
def __iter__(self):
return self
def next(self):
ligne = self._stream.next()
while ligne[0] == self._skip:
ligne = self._stream.next()
data = ligne.split(self._delimiter)
if self._strip or self._types:
data = [x.strip() for x in data]
if self._types:
it = self.endLessIterator(self._types)
data = [x[1](x[0]) for x in ((y,it.next()) for y in data)]
return data
def endLessIterator(self,endedlist):
for x in endedlist:
yield x
while(1):
yield endedlist[-1]
class Table(list):
def __init__(self, headers, types):
list.__init__(self)
self.headers = headers
self.types = types
self.lines = []
def printTable(self):
for h in self.headers:
print "\t%s\t|" % h,
print "\n"
for l in self.lines:
for c in l:
print "\t%s\t|" % c
print "\n"
def getColumn(self,n):
print "\t%s\n" % self.header[n]
for i in range(len(self.lines)):
print "\t%s\n" % i[n]

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python2.7
import re import re
import gzip import gzip
@ -7,6 +7,9 @@ import sys
import time import time
import getopt import getopt
_dbenable=False
##### #####
# #
# #
@ -68,8 +71,7 @@ def endLessIterator(endedlist):
yield x yield x
while(1): while(1):
yield endedlist[-1] yield endedlist[-1]
class ColumnFile(object): class ColumnFile(object):
def __init__(self,stream,sep=None,strip=True,types=None): def __init__(self,stream,sep=None,strip=True,types=None):
@ -135,7 +137,7 @@ def bsearchTaxon(taxonomy,taxid):
else: else:
return None return None
def readNodeTable(file): def readNodeTable(file):
@ -149,7 +151,6 @@ def readNodeTable(file):
bool,bool,bool,str)) bool,bool,bool,str))
print >>sys.stderr,"Reading taxonomy dump file..." print >>sys.stderr,"Reading taxonomy dump file..."
taxonomy=[[n[0],n[2],n[1]] for n in nodes] taxonomy=[[n[0],n[2],n[1]] for n in nodes]
print >>sys.stderr,"List all taxonomy rank..." print >>sys.stderr,"List all taxonomy rank..."
ranks =list(set(x[1] for x in taxonomy)) ranks =list(set(x[1] for x in taxonomy))
ranks.sort() ranks.sort()
@ -171,15 +172,14 @@ def readNodeTable(file):
return taxonomy,ranks,index return taxonomy,ranks,index
def scientificNameIterator(file): def nameIterator(file):
file = universalOpen(file) file = universalOpen(file)
names = ColumnFile(file, names = ColumnFile(file,
sep='|', sep='|',
types=(int,str, types=(int,str,
str,str)) str,str))
for taxid,name,unique,classname,white in names: for taxid,name,unique,classname,white in names:
if classname == 'scientific name': yield taxid,name,classname
yield taxid,name
def mergedNodeIterator(file): def mergedNodeIterator(file):
file = universalOpen(file) file = universalOpen(file)
@ -201,8 +201,12 @@ def readTaxonomyDump(taxdir):
taxonomy,ranks,index = readNodeTable('%s/nodes.dmp' % taxdir) taxonomy,ranks,index = readNodeTable('%s/nodes.dmp' % taxdir)
print >>sys.stderr,"Adding scientific name..." print >>sys.stderr,"Adding scientific name..."
for taxid,name in scientificNameIterator('%s/names.dmp' % taxdir):
taxonomy[index[taxid]].append(name) alternativeName=[]
for taxid,name,classname in nameIterator('%s/names.dmp' % taxdir):
alternativeName.append((name,classname,index[taxid]))
if classname == 'scientific name':
taxonomy[index[taxid]].append(name)
print >>sys.stderr,"Adding taxid alias..." print >>sys.stderr,"Adding taxid alias..."
for taxid,current in mergedNodeIterator('%s/merged.dmp' % taxdir): for taxid,current in mergedNodeIterator('%s/merged.dmp' % taxdir):
@ -212,7 +216,7 @@ def readTaxonomyDump(taxdir):
for taxid in deletedNodeIterator('%s/delnodes.dmp' % taxdir): for taxid in deletedNodeIterator('%s/delnodes.dmp' % taxdir):
index[taxid]=None index[taxid]=None
return taxonomy,ranks,index return taxonomy,ranks,alternativeName,index
##### #####
@ -267,28 +271,55 @@ def genbankEntryParser(entry):
Tx = None Tx = None
return {'id':Id,'taxid':Tx,'definition':De,'sequence':Sq} return {'id':Id,'taxid':Tx,'definition':De,'sequence':Sq}
_fastaParseID = re.compile('(?<=^>)[^ ]+') ######################
_fastaParseDE = re.compile('(?<=^>).+',)
_fastaParseSQ = re.compile('^[^>].+',re.MULTILINE+re.DOTALL) _cleanDef = re.compile('[\nDE]')
_fastaParseTX = re.compile('(?<=[[Tt]axon:) *[0-9]+ *(?=])')
def cleanDef(definition):
def fastaEntryParser(entry): return _cleanDef.sub('',definition)
Id = _fastaParseID.findall(entry)[0]
De = _fastaParseDE.findall(entry)[0].split(None,1)[1:] _emblParseID = re.compile('(?<=^ID {3})[^ ]+(?=;)',re.MULTILINE)
if not De: _emblParseDE = re.compile('(?<=^DE {3}).+?\. *$(?=[^ ])',re.MULTILINE+re.DOTALL)
De='' _emblParseSQ = re.compile('(?<=^ ).+?(?=^//$)',re.MULTILINE+re.DOTALL)
else: _emblParseTX = re.compile('(?<= /db_xref="taxon:)[0-9]+(?=")')
De=De[0]
Sq = cleanSeq(_fastaParseSQ.findall(entry)[0].upper()) def emblEntryParser(entry):
Id = _emblParseID.findall(entry)[0]
De = ' '.join(cleanDef(_emblParseDE.findall(entry)[0]).split())
Sq = cleanSeq(_emblParseSQ.findall(entry)[0].upper())
try: try:
Tx = int(_fastaParseTX.findall(entry)[0]) Tx = int(_emblParseTX.findall(entry)[0])
except IndexError: except IndexError:
Tx = None Tx = None
return {'id':Id,'taxid':Tx,'definition':De,'sequence':Sq} return {'id':Id,'taxid':Tx,'definition':De,'sequence':Sq}
######################
_fastaSplit=re.compile(';\W*')
def parseFasta(seq):
seq=seq.split('\n')
title = seq[0].strip()[1:].split(None,1)
id=title[0]
if len(title) == 2:
field = _fastaSplit.split(title[1])
else:
field=[]
info = dict(x.split('=',1) for x in field if '=' in x)
definition = ' '.join([x for x in field if '=' not in x])
seq=(''.join([x.strip() for x in seq[1:]])).upper()
return id,seq,definition,info
def fastaEntryParser(entry):
id,seq,definition,info = parseFasta(entry)
Tx = info.get('taxid',None)
if Tx is not None:
Tx=int(Tx)
return {'id':id,'taxid':Tx,'definition':definition,'sequence':seq}
def sequenceIteratorFactory(entryParser,entryIterator): def sequenceIteratorFactory(entryParser,entryIterator):
def sequenceIterator(file): def sequenceIterator(file):
for entry in entryIterator(file): for entry in entryIterator(file):
@ -381,6 +412,22 @@ def ecoRankPacker(rank):
return packed return packed
def ecoNamePacker(name):
namelength = len(name[0])
classlength= len(name[1])
totalSize = namelength + classlength + 4 + 4 + 4 + 4
packed = struct.pack('> I I I I I %ds %ds' % (namelength,classlength),
totalSize,
int(name[1]=='scientific name'),
namelength,
classlength,
name[2],
name[0],
name[1])
return packed
def ecoSeqWriter(file,input,taxindex,parser): def ecoSeqWriter(file,input,taxindex,parser):
output = open(file,'wb') output = open(file,'wb')
@ -438,18 +485,40 @@ def ecoRankWriter(file,ranks):
output.write(ecoRankPacker(rank)) output.write(ecoRankPacker(rank))
output.close() output.close()
def nameCmp(n1,n2):
name1=n1[0].upper()
name2=n2[0].upper()
if name1 < name2:
return -1
elif name1 > name2:
return 1
return 0
def ecoNameWriter(file,names):
output = open(file,'wb')
output.write(struct.pack('> I',len(names)))
names.sort(nameCmp)
for name in names:
output.write(ecoNamePacker(name))
output.close()
def ecoDBWriter(prefix,taxonomy,seqFileNames,parser): def ecoDBWriter(prefix,taxonomy,seqFileNames,parser):
ecoRankWriter('%s.rdx' % prefix, taxonomy[1]) ecoRankWriter('%s.rdx' % prefix, taxonomy[1])
ecoTaxWriter('%s.tdx' % prefix, taxonomy[0]) ecoTaxWriter('%s.tdx' % prefix, taxonomy[0])
ecoNameWriter('%s.ndx' % prefix, taxonomy[2])
filecount = 0 filecount = 0
for filename in seqFileNames: for filename in seqFileNames:
filecount+=1 filecount+=1
sk=ecoSeqWriter('%s_%03d.sdx' % (prefix,filecount), sk=ecoSeqWriter('%s_%03d.sdx' % (prefix,filecount),
filename, filename,
taxonomy[2], taxonomy[3],
parser) parser)
if sk: if sk:
print >>sys.stderr,"Skipped entry :" print >>sys.stderr,"Skipped entry :"
@ -464,32 +533,55 @@ def ecoParseOptions(arguments):
} }
o,filenames = getopt.getopt(arguments, o,filenames = getopt.getopt(arguments,
'ht:n:gf', 'ht:n:gfe',
['help', ['help',
'taxonomy=', 'taxonomy=',
'name=', 'name=',
'genbank', 'genbank',
'fasta']) 'fasta',
'embl'])
for name,value in o: for name,value in o:
if name in ('-h','--help'): if name in ('-h','--help'):
pass printHelp()
exit()
elif name in ('-t','--taxonomy'): elif name in ('-t','--taxonomy'):
opt['taxmod']='dump'
opt['taxdir']=value opt['taxdir']=value
elif name in ('-n','--name'): elif name in ('-n','--name'):
opt['prefix']=value opt['prefix']=value
elif name in ('-g','--genbank'): elif name in ('-g','--genbank'):
opt['parser']=sequenceIteratorFactory(genbankEntryParser, opt['parser']=sequenceIteratorFactory(genbankEntryParser,
entryIterator entryIterator)
)
elif name in ('-f','--fasta'): elif name in ('-f','--fasta'):
opt['parser']=sequenceIteratorFactory(fastaEntryParser, opt['parser']=sequenceIteratorFactory(fastaEntryParser,
fastaEntryIterator) fastaEntryIterator)
elif name in ('-e','--embl'):
opt['parser']=sequenceIteratorFactory(emblEntryParser,
entryIterator)
else: else:
raise ValueError,'Unknown option %s' % name raise ValueError,'Unknown option %s' % name
return opt,filenames return opt,filenames
def printHelp():
print "-----------------------------------"
print " ecoPCRFormat.py"
print "-----------------------------------"
print "ecoPCRFormat.py [option] <argument>"
print "-----------------------------------"
print "-e --embl :[E]mbl format"
print "-f --fasta :[F]asta format"
print "-g --genbank :[G]enbank format"
print "-h --help :[H]elp - print this help"
print "-n --name :[N]ame of the new database created"
print "-t --taxonomy :[T]axonomy - path to the taxonomy database"
print " :bcp-like dump from GenBank taxonomy database."
print "-----------------------------------"
if __name__ == '__main__': if __name__ == '__main__':
opt,filenames = ecoParseOptions(sys.argv[1:]) opt,filenames = ecoParseOptions(sys.argv[1:])

811
tools/ecoSort.py Executable file
View File

@ -0,0 +1,811 @@
#!/usr/bin/env python
import struct
import sys
import os
import gzip
import re
import string
from reportlab.graphics.shapes import *
from reportlab.graphics.charts.barcharts import VerticalBarChart
from reportlab.graphics.charts.piecharts import Pie
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.lib.units import cm
from reportlab.lib import colors
from reportlab.platypus import *
#####
#
# Generic file function
#
#####
class Filter(object):
"""
This object provides different iterators and method :
* findTaxonByTaxid
* subTreeIterator
* parentalTreeIterator
* ecoPCRResultIterator
* rankFilter
* lastCommonTaxon
see each method for more informations
"""
def __init__(self,path):
self._path = path
self._taxonFile = "%s.tdx" % self._path
self._ranksFile = "%s.rdx" % self._path
self._namesFile = "%s.ndx" % self._path
self._taxonomy, self._index, self._ranks, self._name = self.__readNodeTable()
def __universalOpen(self,file):
if isinstance(file,str):
if file[-3:] == '.gz':
rep = gzip.open(file)
else:
rep = open(file)
else:
rep = file
return rep
def __universalTell(self,file):
if isinstance(file, gzip.GzipFile):
file=file.myfileobj
return file.tell()
def __fileSize(self,file):
if isinstance(file, gzip.GzipFile):
file=file.myfileobj
pos = file.tell()
file.seek(0,2)
length = file.tell()
file.seek(pos,0)
return length
def __progressBar(self,pos,max,reset=False,delta=[]):
if reset:
del delta[:]
if not delta:
delta.append(time.time())
delta.append(time.time())
delta[1]=time.time()
elapsed = delta[1]-delta[0]
percent = float(pos)/max * 100
remain = time.strftime('%H:%M:%S',time.gmtime(elapsed / percent * (100-percent)))
bar = '#' * int(percent/2)
bar+= '|/-\\-'[pos % 5]
bar+= ' ' * (50 - int(percent/2))
sys.stderr.write('\r%5.1f %% |%s] remain : %s' %(percent,bar,remain))
#####
#
# Iterator functions
#
#####
def __ecoRecordIterator(self,file):
file = self.__universalOpen(file)
(recordCount,) = struct.unpack('> I',file.read(4))
for i in xrange(recordCount):
(recordSize,)=struct.unpack('>I',file.read(4))
record = file.read(recordSize)
yield record
def __ecoNameIterator(self):
for record in self.__ecoRecordIterator(self._namesFile):
lrecord = len(record)
lnames = lrecord - 16
(isScientificName,namelength,classLength,indextaxid,names)=struct.unpack('> I I I I %ds' % lnames, record)
name=names[:namelength]
classname=names[namelength:]
yield (name,classname,indextaxid)
def __ecoTaxonomicIterator(self):
for record in self.__ecoRecordIterator(self._taxonFile):
lrecord = len(record)
lnames = lrecord - 16
(taxid,rankid,parentidx,nameLength,name)=struct.unpack('> I I I I %ds' % lnames, record)
yield (taxid,rankid,parentidx,name)
def __ecoSequenceIterator(self,file):
for record in self.__ecoRecordIterator(file):
lrecord = len(record)
lnames = lrecord - (4*4+20)
(taxid,seqid,deflength,seqlength,cptseqlength,string)=struct.unpack('> I 20s I I I %ds' % lnames, record)
de = string[:deflength]
seq = gzip.zlib.decompress(string[deflength:])
yield (taxid,seqid,deflength,seqlength,cptseqlength,de,seq)
def __ecoRankIterator(self):
for record in self.__ecoRecordIterator(self._ranksFile):
yield record
#####
#
# Indexes
#
#####
def __ecoNameIndex(self):
indexName = [x for x in self.__ecoNameIterator()]
return indexName
def __ecoRankIndex(self):
rank = [r for r in self.__ecoRankIterator()]
return rank
def __ecoTaxonomyIndex(self):
taxonomy = []
index = {}
i = 0;
for x in self.__ecoTaxonomicIterator():
taxonomy.append(x)
index[x[0]] = i
i = i + 1
return taxonomy, index
def __readNodeTable(self):
taxonomy, index = self.__ecoTaxonomyIndex()
ranks = self.__ecoRankIndex()
name = self.__ecoNameIndex()
return taxonomy,index,ranks,name
#####
#
# PUBLIC METHODS
#
#####
def findTaxonByTaxid(self,taxid):
"""
Returns a list containing [taxid,rankid,parent_index,nameLength,name]
It takes one argument : a taxonomic id
"""
return self._taxonomy[self._index[taxid]]
def subTreeIterator(self, taxid):
"""
Returns subtree for given taxid from first child
to last child. It takes one argument : a taxonomic id
"""
idx = self._index[taxid]
yield self._taxonomy[idx]
for t in self._taxonomy:
if t[2] == idx:
for subt in self.subTreeIterator(t[0]):
yield subt
def parentalTreeIterator(self, taxid):
"""
return parental tree for given taxonomic id starting from
first ancester to the root.
"""
taxon=self.findTaxonByTaxid(taxid)
while taxon[2]!= 0:
yield taxon
taxon = self._taxonomy[taxon[2]]
yield self._taxonomy[0]
def ecoPCRResultIterator(self, file):
"""
iteration on ecoPCR result file"
It returns a dictionnary
"""
file = self.__universalOpen(file)
data = ColumnFile(file,
sep='|',
types=(str,int,int,
str,int,str,
int,str,int,
str,int,str,
str,str,int,
str,int,int,
str,str),skip='#')
for ac, sq_len, taxid,\
rank, sp_taxid, species,\
ge_taxid, genus, fa_taxid,\
family, sk_taxid, s_kgdom,\
strand, oligo_1, error_1,\
oligo_2, error_2, amp_len,\
sq_des, definition in data:
yield {'ac':ac, 'sq_len':sq_len, 'taxid':taxid,
'rank':rank, 'sp_taxid':sp_taxid, 'species':species,
'ge_taxid':ge_taxid, 'genus':genus, 'fa_taxid':fa_taxid,
'family':family, 'sk_taxid':sk_taxid, 's_kgdom':s_kgdom,
'strand':strand, 'oligo_1':oligo_1, 'error_1':error_1,
'oligo_2':oligo_2, 'error_2':error_2, 'amp_len':amp_len,
'sq_des':sq_des, 'definition':definition}
def rankFilter(self,rankid,filter):
"""
boolean telling whether rankid match filter
takes 2 arguments :
1- rankid
2- filter (i.e genus)
"""
return self._ranks[rankid] == filter
def lastCommonTaxon(self,taxid_1, taxid_2):
"""
returns a last common parent for two given taxon.
It starts from the root and goes down the tree
until their parents diverge.
It takes 2 arguments :
1- taxid 1
2- taxid 2
"""
t1 = [x[0] for x in self.parentalTreeIterator(taxid_1)]
t2 = [x[0] for x in self.parentalTreeIterator(taxid_2)]
t1.reverse()
t2.reverse()
count = t1 < t2 and len(t1) or len(t2)
for i in range(count):
if t1[i] != t2[i]:
return t1[i-1]
return t1[count-1]
class ColumnFile(object):
"""
cut an ecoPCR file into a list
"""
def __init__(self,stream,sep=None,strip=True,types=None,skip=None):
if isinstance(stream,str):
self._stream = open(stream)
elif hasattr(stream,'next'):
self._stream = stream
else:
raise ValueError,'stream must be string or an iterator'
self._delimiter=sep
self._strip=strip
if types:
self._types=[x for x in types]
for i in xrange(len(self._types)):
if self._types[i] is bool:
self._types[i]=ColumnFile.str2bool
else:
self._types=None
self._skip = skip
self._oligo = {}
def str2bool(x):
return bool(eval(x.strip()[0].upper(),{'T':True,'V':True,'F':False}))
str2bool = staticmethod(str2bool)
def __iter__(self):
return self
def next(self):
ligne = self._stream.next()
while ligne[0] == self._skip:
ligne = self._stream.next()
data = ligne.split(self._delimiter)
if self._strip or self._types:
data = [x.strip() for x in data]
if self._types:
it = self.endLessIterator(self._types)
data = [x[1](x[0]) for x in ((y,it.next()) for y in data)]
return data
def endLessIterator(self,endedlist):
for x in endedlist:
yield x
while(1):
yield endedlist[-1]
def getOligo(self,line):
if line[2:8] == 'direct':
r = re.compile('(?<=direct strand oligo1 : )[A-Z]+(?= *)')
self._oligo['o1'] = r.findall(line)
if line[2:9] == 'reverse':
r = re.compile('(?<=reverse strand oligo2 : )[A-Z]+(?= *)')
self._oligo['o2'] = r.findall(line)
return None
###########
#
# DATA STRUCTURE
#
###########
class ecoTable(list):
"""
Data object inheriting from list
"""
def __init__(self, headers, types):
list.__init__(self)
self.headers = headers
self.types = types
def __setitem__ (self,key,value):
"""
method overloaded to check data types
"""
for e in range(len(value)):
value[e] = self.types[e](value[e])
list.__setitem__(self,key,value)
def __getitem__(self,index):
newtable = ecoTable(self.headers,self.types)
if isinstance(index,slice):
newtable.extend(list.__getitem__(self,index))
else:
newtable.append(list.__getitem__(self,index))
return newtable
def getColumns(self,columnList):
newhead = [self.headers[x] for x in columnList]
newtype = [self.types[x] for x in columnList]
newtable = ecoTable(newhead,newtype)
for line in self:
newtable.append([line[x] for x in columnList])
return newtable
###########
#
# PARSE FUNCTIONS
#
###########
def _parseOligoResult(filter,file,strand):
seq = {}
if strand == 'direct':
key = 'oligo_1'
elif strand == 'reverse':
key = 'oligo_2'
for s in filter.ecoPCRResultIterator(file):
o = s[key]
taxid = s['taxid']
if not seq.has_key(o):
seq[o] = [1,taxid]
else:
seq[o][0] = seq[o][0] + 1
seq[o][1] = filter.lastCommonTaxon(seq[o][1],taxid)
return seq
def _parseTaxonomyResult(table):
tax = {}
for l in table:
taxid = l[2]
scName = l[3]
count = l[1]
if not tax.has_key(taxid):
tax[taxid] = [1,scName,count]
else:
tax[taxid][0] = tax[taxid][0] + 1
tax[taxid][2] = tax[taxid][2] + count
return tax
def _sortTable(e1,e2):
e1 = e1[1]
e2 = e2[1]
if e1 < e2:
return 1
if e1 > e2:
return -1
return 0
def _countOligoMismatch(o1,o2):
"""
define mismatch between two oligonucleotids
return number of mismatch
"""
mmatch = 0
if len(o1) < len(o2):
mmatch = int(len(o2) - len(o1))
for i in range(len(o1)):
try:
if o1[i] != o2[i]:
mmatch = mmatch + 1
except:
mmatch = mmatch + 1
return mmatch
###########
#
# TOOLS FUNCTIONS
#
###########
def customSort(table,x,y):
"""
"""
x = x-1
y = y-1
h = (table.headers[x],table.headers[y])
t = (table.types[x],table.types[y])
cTable = ecoTable(h,t)
tmp = {}
for l in table:
if tmp.has_key(l[x]):
tmp[l[x]] = tmp[l[x]] + l[y]
else:
tmp[l[x]] = l[y]
for k,v in tmp.items():
cTable.append([k,v])
return cTable
def countColumnOccurrence(table,x):
x = x-1
h = (table.headers[x],"count")
t = (table.types[x],int)
cTable = Table(h,t)
tmp = {}
for l in table:
if tmp.has_key(l[x]):
tmp[l[x]] = tmp[l[x]] + 1
else:
tmp[l[x]] = 1
for k,v in tmp.items():
cTable.append([k,v])
return cTable
def buildSpecificityTable(table):
header = ("mismatch","taxon","count")
type = (int,str,int)
speTable = ecoTable(header,type)
tmp = {}
for l in table:
if not tmp.has_key(l[5]):
tmp[l[5]] = {}
if not tmp[l[5]].has_key(l[3]):
tmp[l[5]][l[3]] = l[1]
else:
tmp[l[5]][l[3]] = tmp[l[5]][l[3]] + l[1]
for mismatch in tmp.items():
for taxon,count in mismatch[1].items():
speTable.append([mismatch[0],taxon,count])
return speTable
def buildOligoTable(table, file, filter, oligoRef, strand='direct'):
"""
Fills and sorts a Table object with ecoPCR result file
Takes 4 arguments
1- Table object
2- ecoPCR result file path
3- Filter Object
4- the oligo used in ecoPCR
5- the oligo type : direct or reverse
"""
seq = _parseOligoResult(filter, file, strand)
i = 0
for oligo, info in seq.items():
table.append(0)
count, lctTaxid = info[0], info[1]
scName = filter.findTaxonByTaxid(info[1])[3]
rank = filter._ranks[filter.findTaxonByTaxid(info[1])[1]]
mismatch = _countOligoMismatch(oligoRef,oligo)
table[i]=[oligo,count,lctTaxid,scName,rank,mismatch]
i = i + 1
table.sort(_sortTable)
def buildTaxonomicTable(table):
"""
Fill a Table object with a taxonomic synthesis
"""
taxHeaders = ("scName","numOfOligo","numOfAmpl","taxid")
taxTypes = (str, int, int, int)
taxTable = ecoTable(taxHeaders, taxTypes)
tax = _parseTaxonomyResult(table)
i = 0
for taxid, info in tax.items():
taxTable.append(0)
numOfOligo, scName, numOfAmpl = info[0], info[1], info[2]
taxTable[i]=[scName,numOfOligo,numOfAmpl,taxid]
i = i + 1
taxTable.sort(_sortTable)
return taxTable
def _parseSequenceResult(filter,file,id):
sequences = {}
idIndex = {}
if id == 'family':
key = 'fa_taxid'
elif id == 'genus':
key = 'ge_taxid'
else:
key = 'taxid'
for s in filter.ecoPCRResultIterator(file):
seq = s['sq_des']
id = s[key]
if not idIndex.has_key(id):
idIndex[id] = []
if not sequences.has_key(seq):
sequences[seq] = [id]
else:
sequences[seq].append(id)
return sequences, idIndex
def _sameValuesInList(array):
for i in range(len(array)-1):
if array[i] != array[i+1]:
return False
return True
def _sortSequences(file,filter):
sequences, idIndex = _parseSequenceResult(filter,file,'species')
for s,id in sequences.items():
if len(id) == 1 or _sameValuesInList(id):
idIndex[id[0]].append(1)
else:
for e in id:
idIndex[e].append(0)
for id,values in idIndex.items():
idIndex[id] = float(values.count(1)) / float(len(values)) * 100
identified = {}
non_identified = {}
ambiguous = {}
return sequences, idIndex
def getIntraSpeciesDiversity(table,file,filter):
intraDiv = {}
seq, idIndex = _sortSequences(file,filter)
for id,percent in idIndex.items():
if percent == 100:
intraDiv[id] = [0,[]]
for seq,idList in sequences.items():
if id in idList:
intraDiv[id][0] = intraDiv[id][0] + 1
intraDiv[id][1].append(seq)
for id, values in intraDiv.items():
table.append(id,values[0],values[1])
###########
#
# OUTPUT FUNCTIONS
#
###########
def printTable(table):
"""
Displays the content a of Table object
Take 1 arguments
1- Table object
"""
format = ("%20s | " * len(table.headers))[:-3]
print format % tuple([str(e) for e in table.headers ]) +"\n" + "-"*23*len(table.headers)
for l in table:
print format % tuple([str(e) for e in l ])
print "# %d results" % len(table)
def saveAsCSV(table,path):
"""
Creates a csv file from a Table object
Takes 2 arguments
1- Table object
2- path of the file-to-be
"""
file = open(path,'w')
file.write(','.join([str(e) for e in table.headers ]) + "\n")
for l in table:
file.write(','.join([str(e) for e in l ]) + "\n")
file.close()
def grepTable(table,col,pattern):
"""
Filters a Table object with regular expression
Takes 3 arguments :
1- Table object
2- number of column to match with
3- regular expression pattern
Returns a Table object
"""
col = col -1
p = re.compile(pattern, re.IGNORECASE)
out = ecoTable(table.headers,table.types)
for l in table:
if p.search(l[col]):
out.append(l)
return out
###########
#
# GRAPH FUNCTIONS
#
###########
class EcoGraph(object):
def __init__(self):
self._styles = getSampleStyleSheet()
self._element = []
self._element.append(self._box(Paragraph("EcoPCR report", self._styles['Title'])))
self._element.append(Spacer(0, 0.5 * cm))
def _box(self,flt, center=True):
box_style = [('BOX', (0, 0), (-1, -1), 0.5, colors.lightgrey)]
if center:
box_style += [('ALIGN', (0, 0), (-1, -1), 'CENTER')]
return Table([[flt]], style=box_style)
def _addChart(self,chart,title):
drawing = Drawing(300, 250)
drawing.add(chart)
self._element.append(self._box(Paragraph(title, self._styles['Normal'])))
self._element.append(self._box(drawing))
self._element.append(Spacer(0, 0.5 * cm))
def _formatData(self,table):
data, label = [],[]
for i in range(len(table)):
label.append(table[i][0])
data.append(table[i][1])
return data, label
def makePie(self, table, title):
data, label = self._formatData(table)
pie = Pie()
pie.x = 100
pie.y = 100
pie.data = data
pie.labels = label
self._addChart(pie, title)
def makeHistogram(self, table, title):
data, label = self._formatData(table)
data = [tuple(data)]
histo = VerticalBarChart()
histo.x = 10
histo.y = 70
histo.height = 150
histo.width = 300
histo.bars.strokeWidth = 1
histo.barSpacing = 1
histo.barLabels.dy = 5
histo.barLabelFormat = '%d'
histo.barLabels.fontSize = 9 - (len(data[0])/10)
histo.data = data
histo.categoryAxis.labels.boxAnchor = 'e'
histo.categoryAxis.labels.textAnchor = 'start'
histo.categoryAxis.labels.dx = -40
histo.categoryAxis.labels.dy = -50
histo.categoryAxis.labels.angle = 45
histo.categoryAxis.labels.width = 10
histo.categoryAxis.labels.height = 4
histo.categoryAxis.categoryNames = label
histo.categoryAxis.strokeWidth = 1
histo.categoryAxis.labels.fontSize = 8
histo.valueAxis.valueMin = min(data[0])*0.7
histo.valueAxis.valueMax = max(data[0])
step = (max(data[0]) - min(data[0])) / 10
histo.valueAxis.valueStep = step > 1 and step or 1
self._addChart(histo, title)
def makeReport(self,path):
doc = SimpleDocTemplate(path)
doc.build(self._element)
######################
def init():
file = "/Users/bessiere/Documents/workspace/ecoPCR/src/toto.tmp"
oligo = {'o1':'ATGTTTAAAA','o2':'ATGGGGGTATTG'}
filter = Filter("/ecoPCRDB/gbmam")
headers = ("oligo", "Num", "LCT Taxid", "Sc Name", "Rank", "distance")
types = (str, int, int, str, str, int)
o1Table = ecoTable(headers, types)
o2Table = ecoTable(headers, types)
buildOligoTable(o1Table, file, filter, oligo['o1'], 'direct')
buildOligoTable(o2Table, file, filter, oligo['o2'], 'reverse')
taxTable = buildTaxonomicTable(o1Table)
speTable = buildSpecificityTable(o1Table)
return o1Table, o2Table, taxTable
def start():
file = "/Users/bessiere/Documents/workspace/ecoPCR/src/toto.tmp"
filter = Filter("/ecoPCRDB/gbmam")
speHeaders = ("taxid","num of seq","list of seq")
speTypes = (int,int,list)
speTable = ecoTable(speHeaders,speTypes)
getIntraSpeciesDiversity(speTable, file, filter)