36 Commits

Author SHA1 Message Date
194ec811f4 Fixes #2 by adding the missing -D option in the help 2015-05-19 13:52:35 +02:00
126bf80670 Convert svn:ignore properties to .gitignore. 2015-05-16 17:51:46 +02:00
46d086b215 unsufficient space allocated for handling strings in arguments
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@595 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2013-03-07 09:25:16 +00:00
0a62ff49cc MOD: updated the help to include the -P option
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@424 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2012-05-16 11:56:36 +00:00
2452df90de MOD: Added a [P]ath option displaying for each taxon its full path
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@423 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2012-05-16 11:43:33 +00:00
745d50cfa4 MOD: Corrected condition in getSon to handle the root of the taxonomy
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@422 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2012-05-16 07:52:28 +00:00
e2fe83fcb7 remove extra obitools directory from ecoPCR subversion archive
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@418 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2012-05-07 09:31:08 +00:00
2a118eedda MOD: In the printRepeat function, corrected rdelta and ldelta management
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@417 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2012-05-07 09:14:09 +00:00
93c530e090 removed the "without temperature" option
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@416 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2012-05-04 14:53:33 +00:00
4e5d8893e5 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@390 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2011-12-05 12:59:34 +00:00
957a59eb5d Add management of local taxa from the new extension of the OBI Taxonomy library
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@315 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2011-07-19 06:44:47 +00:00
9adf426abf delta bug
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@310 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2011-06-30 15:26:36 +00:00
ae528e48f4 delta bug
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@309 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2011-06-30 15:12:38 +00:00
f53cc6d500 patch on printing
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@299 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2011-06-15 21:54:33 +00:00
8313c67a9b syntax debug on ecopcr.c
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@297 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2011-05-25 16:35:30 +00:00
01173d22cf add an hidden -D option
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@296 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2011-05-25 15:41:22 +00:00
87c2496447 patch a bug in Tm computation
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@260 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2010-05-23 12:30:16 +00:00
9867859237 patch for 64bits constants
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@244 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2010-02-24 15:10:11 +00:00
f6f39f58fe patch Tm calculation to return NaN if one of the two sequence is composed with letters different than ACGT
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@243 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2010-02-24 15:00:11 +00:00
7331dd5612 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@242 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2010-01-22 09:21:04 +00:00
0214be011e git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@241 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2010-01-22 09:20:09 +00:00
1c30a8604f git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@240 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2010-01-22 09:19:36 +00:00
985d067bd8 New version 0.2 of ecoPCR, with Tm computation.
Take care file format have change. You must use corresponding version of ecogrep
You can use -t option to go back to the old format without tm computation

git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@239 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2010-01-22 09:19:19 +00:00
f7e25b2082 New version 0.2 of ecoPCR, with Tm computation.
Take care file format have change. You must use corresponding version of ecogrep
You can use -t option to go back to the old format without tm computation

git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@238 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2010-01-22 09:16:53 +00:00
ad6f493d0f Accept to deal with sequence in lower case
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@217 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2009-05-13 13:21:10 +00:00
1428cd7499 patch online help in ecoPCR software
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@168 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2008-05-16 08:53:11 +00:00
708b7c387e Sequence circularity, bug correction
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@167 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2008-04-28 16:46:18 +00:00
d863b7e48e Manage sequence circularity
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@166 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2008-04-28 15:49:12 +00:00
4b74056af8 change reference column for taxonomy filtering in ecogrep.c
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@165 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2008-03-12 21:26:08 +00:00
f68f4af244 Add option to ecoPCRFormat to deal with an obischema db as taxonomy source
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@158 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2008-02-08 10:27:42 +00:00
a1141a77b5 --
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@119 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2007-09-24 10:10:03 +00:00
6835206344 Add version file
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@118 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2007-09-24 10:07:39 +00:00
45c85a9f32 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@117 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-09-24 10:06:00 +00:00
471bf72bf9 Add rule for ecogrep build in makefile
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@116 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2007-09-24 09:58:48 +00:00
22ecb4b842 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@115 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-09-24 09:48:19 +00:00
bc4c7656c6 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@114 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2007-09-24 09:48:08 +00:00
28 changed files with 1443 additions and 178 deletions

122
.cproject Normal file
View File

@ -0,0 +1,122 @@
<?xml version="1.0" encoding="UTF-8"?>
<?fileVersion 4.0.0?>
<cproject>
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.toolchain.gnu.macosx.base.985481067">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.toolchain.gnu.macosx.base.985481067" moduleId="org.eclipse.cdt.core.settings" name="MacOSX GCC">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.MachO" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.MakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="ecoPCR" buildProperties="" id="cdt.managedbuild.toolchain.gnu.macosx.base.985481067" name="MacOSX GCC" parent="org.eclipse.cdt.build.core.emptycfg">
<folderInfo id="cdt.managedbuild.toolchain.gnu.macosx.base.985481067.141857048" name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.base.1673936174" name="cdt.managedbuild.toolchain.gnu.macosx.base" superClass="cdt.managedbuild.toolchain.gnu.macosx.base">
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.MachO" id="cdt.managedbuild.target.gnu.platform.macosx.base.584997877" name="Debug Platform" osList="macosx" superClass="cdt.managedbuild.target.gnu.platform.macosx.base"/>
<builder id="cdt.managedbuild.target.gnu.builder.macosx.base.328283627" managedBuildOn="false" name="Gnu Make Builder.MacOSX GCC" superClass="cdt.managedbuild.target.gnu.builder.macosx.base"/>
<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.base.627652869" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.base"/>
<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.base.815782479" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.base"/>
<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.base.536333148" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.base"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.202459766" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.base.1142106025" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.base"/>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.base.845498516" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.base"/>
</toolChain>
</folderInfo>
</configuration>
</storageModule>
<storageModule moduleId="scannerConfiguration">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile"/>
<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile">
<buildOutputProvider>
<openAction enabled="true" filePath=""/>
<parser enabled="true"/>
</buildOutputProvider>
<scannerInfoProvider id="specsFile">
<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
<parser enabled="true"/>
</scannerInfoProvider>
</profile>
<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerFileProfile">
<buildOutputProvider>
<openAction enabled="true" filePath=""/>
<parser enabled="true"/>
</buildOutputProvider>
<scannerInfoProvider id="makefileGenerator">
<runAction arguments="-f ${project_name}_scd.mk" command="make" useDefault="true"/>
<parser enabled="true"/>
</scannerInfoProvider>
</profile>
<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfile">
<buildOutputProvider>
<openAction enabled="false" filePath=""/>
<parser enabled="false"/>
</buildOutputProvider>
<scannerInfoProvider id="specsFile">
<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
<parser enabled="true"/>
</scannerInfoProvider>
</profile>
<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP">
<buildOutputProvider>
<openAction enabled="false" filePath=""/>
<parser enabled="false"/>
</buildOutputProvider>
<scannerInfoProvider id="specsFile">
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
<parser enabled="true"/>
</scannerInfoProvider>
</profile>
<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC">
<buildOutputProvider>
<openAction enabled="false" filePath=""/>
<parser enabled="false"/>
</buildOutputProvider>
<scannerInfoProvider id="specsFile">
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
<parser enabled="true"/>
</scannerInfoProvider>
</profile>
<profile id="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfile">
<buildOutputProvider>
<openAction enabled="false" filePath=""/>
<parser enabled="false"/>
</buildOutputProvider>
<scannerInfoProvider id="specsFile">
<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
<parser enabled="true"/>
</scannerInfoProvider>
</profile>
<profile id="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfileCPP">
<buildOutputProvider>
<openAction enabled="false" filePath=""/>
<parser enabled="false"/>
</buildOutputProvider>
<scannerInfoProvider id="specsFile">
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
<parser enabled="true"/>
</scannerInfoProvider>
</profile>
<profile id="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfileC">
<buildOutputProvider>
<openAction enabled="false" filePath=""/>
<parser enabled="false"/>
</buildOutputProvider>
<scannerInfoProvider id="specsFile">
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
<parser enabled="true"/>
</scannerInfoProvider>
</profile>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<project id="ecoPCR.null.1320766773" name="ecoPCR"/>
</storageModule>
</cproject>

16
.gitignore vendored Normal file
View File

@ -0,0 +1,16 @@
# /src/
/src/ecoPCR
/src/ecofind
/src/*.P
/src/ecogrep
# /src/libapat/
/src/libapat/libapat.a
/src/libapat/*.P
# /src/libecoPCR/
/src/libecoPCR/*.P
# /src/libthermo/
/src/libthermo/*.P

83
.project Normal file
View File

@ -0,0 +1,83 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>ecoPCR</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.python.pydev.PyDevBuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
<triggers>clean,full,incremental,</triggers>
<arguments>
<dictionary>
<key>org.eclipse.cdt.make.core.fullBuildTarget</key>
<value>all</value>
</dictionary>
<dictionary>
<key>?name?</key>
<value></value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.enableAutoBuild</key>
<value>false</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.enableFullBuild</key>
<value>true</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.enableCleanBuild</key>
<value>true</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.cleanBuildTarget</key>
<value>clean</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.append_environment</key>
<value>true</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.contents</key>
<value>org.eclipse.cdt.make.core.activeConfigSettings</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.useDefaultBuildCmd</key>
<value>true</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.buildArguments</key>
<value></value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.buildCommand</key>
<value>make</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.autoBuildTarget</key>
<value>all</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.stopOnError</key>
<value>true</value>
</dictionary>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.cdt.core.cnature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
<nature>org.python.pydev.pythonNature</nature>
</natures>
</projectDescription>

7
.pydevproject Normal file
View File

@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<?eclipse-pydev version="1.0"?>
<pydev_project>
<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.4</pydev_property>
<pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property>
</pydev_project>

1
VERSION Normal file
View File

@ -0,0 +1 @@
0.5.0

View File

@ -1,4 +1,4 @@
EXEC=ecoPCR ecofind ecoisundertaxon
EXEC=ecoPCR ecofind ecogrep
PCR_SRC= ecopcr.c
PCR_OBJ= $(patsubst %.c,%.o,$(PCR_SRC))
@ -6,15 +6,19 @@ PCR_OBJ= $(patsubst %.c,%.o,$(PCR_SRC))
FIND_SRC= ecofind.c
FIND_OBJ= $(patsubst %.c,%.o,$(FIND_SRC))
GREP_SRC= ecogrep.c
GREP_OBJ= $(patsubst %.c,%.o,$(GREP_SRC))
IUT_SRC= ecoisundertaxon.c
IUT_OBJ= $(patsubst %.c,%.o,$(IUT_SRC))
SRCS= $(PCR_SRC) $(FIND_SRC) $(IUT_SRC)
LIB= -lecoPCR -lapat -lz -lm
LIB= -lecoPCR -lthermo -lapat -lz -lm
LIBFILE= libapat/libapat.a \
libecoPCR/libecoPCR.a
libecoPCR/libecoPCR.a \
libthermo/libthermo.a
include global.mk
@ -44,6 +48,17 @@ ecoPCR: $(PCR_OBJ) $(LIBFILE)
ecofind: $(FIND_OBJ) $(LIBFILE)
$(CC) $(LDFLAGS) -o $@ $< $(LIBPATH) $(LIB)
########
#
# ecogrep compilation
#
########
# executable compilation and link
ecogrep: $(GREP_OBJ) $(LIBFILE)
$(CC) $(LDFLAGS) -o $@ $< $(LIBPATH) $(LIB)
########
#
# IsUnderTaxon compilation
@ -67,6 +82,9 @@ libapat/libapat.a:
libecoPCR/libecoPCR.a:
$(MAKE) -C libecoPCR
libthermo/libthermo.a:
$(MAKE) -C libthermo
########
#
@ -79,6 +97,7 @@ clean:
rm -f $(EXEC)
$(MAKE) -C libapat clean
$(MAKE) -C libecoPCR clean
$(MAKE) -C libthermo clean

BIN
src/ecoPCR.gz Executable file

Binary file not shown.

View File

@ -9,10 +9,23 @@
/**
* display the result
**/
static void printresult(ecotx_t *taxon,econame_t* name,ecotaxonomy_t *taxonomy)
void displayPath(ecotx_t *taxon, ecotaxonomy_t *taxonomy){
if (taxon != taxon->parent){
displayPath(taxon->parent,taxonomy);
printf(";");
}
if (rank_index("no rank",taxonomy->ranks) != taxon->rank)
printf("%s:", taxonomy->ranks->label[taxon->rank]);
printf("%s", taxon->name);
}
static void printresult(ecotx_t *taxon,econame_t* name,ecotaxonomy_t *taxonomy, int32_t pathDisplay)
{
char* rankname;
char* classname;
char* classname;
char* matchedname=taxon->name;
classname="scientific name";
@ -24,31 +37,38 @@ static void printresult(ecotx_t *taxon,econame_t* name,ecotaxonomy_t *taxonomy)
rankname= taxonomy->ranks->label[taxon->rank];
printf("%10d \t| %15s \t|\t %-50s \t|\t %15s \t|\t %s\n",
printf("%10d \t| %15s \t|\t %-50s \t|\t %15s \t|\t %s",
taxon->taxid,
rankname,
matchedname,
classname,
taxon->name);
taxon->name);
if (pathDisplay) {
printf("\t|\t");
displayPath(taxon, taxonomy);
}
printf("\n");
}
/**
* display header before printing any result
**/
static void printheader(void)
static void printheader(int32_t pathDisplay)
{
printf("# %12s \t| %15s \t|\t %-50s \t|\t %-15s \t|\t %s\n#\n",
printf("# %12s \t| %15s \t|\t %-50s \t|\t %-15s \t|\t %s%s\n#\n",
"taxonomy id",
"taxonomy rank",
"name",
"class name",
"scientific name");
"scientific name",
pathDisplay ? "\t|\t path":"");
}
/**
* display son's list for given taxon
**/
static void get_son(ecotaxonomy_t *taxonomy, ecotx_t *taxon, int32_t *count, char *rankname)
static void get_son(ecotaxonomy_t *taxonomy, ecotx_t *taxon, int32_t *count, char *rankname, int32_t pathDisplay)
{
int32_t i;
ecotx_t *current_taxon;
@ -57,14 +77,15 @@ static void get_son(ecotaxonomy_t *taxonomy, ecotx_t *taxon, int32_t *count, cha
i < taxonomy->taxons->count;
i++, current_taxon++)
{
if (taxon->taxid == current_taxon->parent->taxid)
if (taxon != current_taxon && taxon->taxid == current_taxon->parent->taxid)
{
if (rankname == NULL || !strcmp(rankname,taxonomy->ranks->label[current_taxon->rank]))
{
printresult(current_taxon, NULL, taxonomy);
printresult(current_taxon, NULL, taxonomy, pathDisplay);
(*count)++;
}
get_son(taxonomy,current_taxon,count,rankname);
get_son(taxonomy,current_taxon,count,rankname, pathDisplay);
}
}
}
@ -95,23 +116,23 @@ static void listfilteroptions(ecorankidx_t *ranks)
/* get back on given taxid taxonomic parent */
/* and display it */
/* ---------------------------------------- */
void gettaxidparents(int32_t taxid, ecotaxonomy_t *taxonomy, char *rankname)
void gettaxidparents(int32_t taxid, ecotaxonomy_t *taxonomy, char *rankname, int32_t pathDisplay)
{
ecotx_t *next_parent;
int32_t c = 0;
next_parent = eco_findtaxonbytaxid(taxonomy, taxid);
printheader();
printheader(pathDisplay);
printresult(next_parent, NULL,taxonomy);
printresult(next_parent, NULL,taxonomy, pathDisplay);
while ( strcmp(next_parent->name, "root") )
{
next_parent = next_parent->parent;
if (rankname == NULL || !strcmp(rankname,taxonomy->ranks->label[next_parent->rank]))
{
printresult(next_parent, NULL,taxonomy);
printresult(next_parent, NULL,taxonomy, pathDisplay);
c++;
}
}
@ -128,7 +149,7 @@ void gettaxidparents(int32_t taxid, ecotaxonomy_t *taxonomy, char *rankname)
static void ExitUsage(stat)
int stat;
{
PP "usage: ecofind [-d database] [-h] [-l] [-r taxonomic rank] [-p taxid] [-s taxid] <taxon name pattern> ... \n");
PP "usage: ecofind [-d database] [-h] [-l] [-P] [-r taxonomic rank] [-p taxid] [-s taxid] <taxon name pattern> ... \n");
PP "type \"ecofind -h\" for help\n");
if (stat)
exit(stat);
@ -159,9 +180,11 @@ static void PrintHelp()
PP " Write the database radical without any extension.\n\n");
PP "-h : [H]elp - print <this> help\n\n");
PP "-l : [L]ist all taxonomic rank available for -r option\n\n");
PP "-P : [P]ath : add a column containing the full path for each displayed taxon\n\n");
PP "-p : [P]arents : specifiying this option displays all parental tree's information for the given taxid.\n\n");
PP "-r : [R]estrict to given taxonomic rank\n\n");
PP "-s : [S]ons: specifiying this option displays all subtree's information for the given taxid.\n\n");
PP "-P : Display taxonomic [P]ath as suplementary column in output\n\n");
PP "arguments:\n");
PP "<taxon> name pattern bearing regular expressions\n\n");
PP "------------------------------------------\n");
@ -197,31 +220,36 @@ int main(int argc, char **argv)
char *rankname = NULL;
int32_t rankfilter = 1;
int32_t list = 0;
int32_t path = 0;
ecotx_t *subtree_parent;
int32_t count_son = 0;
while ((carg = getopt(argc, argv, "had:p:s:r:l")) != -1) {
while ((carg = getopt(argc, argv, "had:p:s:r:lP")) != -1) {
switch (carg) {
case 's': /* path to the database */
sscanf(optarg,"%d",&subtree);
break;
case 'r': /* rank filter */
rankname = ECOMALLOC(strlen(optarg),"allocation rankname");
rankname = ECOMALLOC(strlen(optarg)+1,"allocation rankname");
strcpy(rankname,optarg);
rankfilter = 0;
break;
case 'd': /* path to the database */
prefix = ECOMALLOC(strlen(optarg),"allocation prefix");
prefix = ECOMALLOC(strlen(optarg)+1,"allocation prefix");
strcpy(prefix,optarg);
break;
case 'l': /* list rank filter options */
list = 1;
break;
case 'P': /* Path output option */
path=1;
break;
case 'a': /* allow alternative names */
alternative = 1;
@ -279,7 +307,7 @@ int main(int argc, char **argv)
/* ---------------------------------------- */
if (uptree)
{
gettaxidparents(uptree,taxonomy,rankname);
gettaxidparents(uptree,taxonomy,rankname, path);
return 0;
}
@ -289,10 +317,10 @@ int main(int argc, char **argv)
/* ---------------------------------------- */
if (subtree)
{
printheader();
printheader(path);
subtree_parent = eco_findtaxonbytaxid(taxonomy,subtree);
printresult(subtree_parent, NULL,taxonomy);
get_son(taxonomy, subtree_parent,&count_son,rankname);
printresult(subtree_parent, NULL,taxonomy, path);
get_son(taxonomy, subtree_parent,&count_son,rankname, path);
printf("# %d son(s) found\n#\n",count_son);
return 0;
}
@ -315,7 +343,7 @@ int main(int argc, char **argv)
nummatch=0;
printheader();
printheader(path);
for (j=0,name=taxonomy->names->names;
j < name_count;
@ -329,7 +357,7 @@ int main(int argc, char **argv)
if (!re_match && (alternative || name->is_scientificname) && rankfilter)
{
printresult(name->taxon,name,taxonomy);
printresult(name->taxon,name,taxonomy, path);
nummatch++;
}

BIN
src/ecofind.gz Executable file

Binary file not shown.

View File

@ -22,18 +22,18 @@ void getLineContent(char *stream, ecoseq_t *seq, ecoseq_t *oligoseq_1, ecoseq_t
case 0:
seq->AC = strdup(buffer);
break;
case 4:
case 2:
sscanf(buffer,"%d",&seq->taxid);
break;
case 13:
oligoseq_1->SQ = strdup(buffer);
oligoseq_1->SQ_length = strlen(buffer);
break;
case 15:
case 16:
oligoseq_2->SQ = strdup(buffer);
oligoseq_2->SQ_length = strlen(buffer);
break;
case 18:
case 20:
seq->SQ = strdup(buffer);
seq->SQ_length = strlen(buffer);
break;
@ -64,7 +64,7 @@ int ispatternmatching(ecoseq_t *seq, PatternPtr pattern){
if (pattern != NULL)
{
SeqPtr apatseq = NULL;
apatseq=ecoseq2apatseq(seq,apatseq);
apatseq=ecoseq2apatseq(seq,apatseq,0);
return ManberAll(apatseq,pattern,0,0,apatseq->seqlen) > 0;
}
else return 0;
@ -400,4 +400,4 @@ int main(int argc, char **argv){
ECOFREE(restricted_taxid,"Error in free stream");
return 0;
}
}

BIN
src/ecogrep.gz Executable file

Binary file not shown.

BIN
src/ecoisundertaxon Executable file

Binary file not shown.

View File

@ -1,11 +1,13 @@
#include "libecoPCR/ecoPCR.h"
#include "libthermo/nnparams.h"
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <getopt.h>
#define VERSION "0.1"
#define VERSION "0.2"
/* ----------------------------------------------- */
/* printout help */
@ -21,6 +23,8 @@ static void PrintHelp()
PP "usage: ecoPCR [options] <nucleotidic patterns>\n");
PP "------------------------------------------\n");
PP "options:\n");
PP "-a : Salt concentration in M for Tm computation (default 0.05 M)\n\n");
PP "-c : Consider that the database sequences are [c]ircular\n\n");
PP "-d : [D]atabase : to match the expected format, the database\n");
PP " has to be formated first by the ecoPCRFormat.py program located.\n");
PP " in the tools directory.\n");
@ -29,8 +33,11 @@ static void PrintHelp()
PP " .tdx : contains information concerning the taxonomy\n");
PP " .rdx : contains the taxonomy rank\n\n");
PP " ecoPCR needs all the file type. As a result, you have to write the\n");
PP " database radical without any extension. For example /ecoPCRDB/gbmam\n\n");
PP "-e : [E]rror : max error allowed by oligonucleotide (0 by default)\n\n");
PP " database radical without any extension. For example /ecoPCRDB/gbmam\n\n");
PP "-D : Keeps the specified number of nucleotides on each side of the in silico \n");
PP " amplified sequences (including the amplified DNA fragment plus the two target \n");
PP " sequences of the primers).\n\n");
PP "-e : [E]rror : max errors allowed by oligonucleotide (0 by default)\n\n");
PP "-h : [H]elp - print <this> help\n\n");
PP "-i : [I]gnore the given taxonomy id.\n");
PP " Taxonomy id are available using the ecofind program.\n");
@ -39,9 +46,11 @@ static void PrintHelp()
PP " super kingdom mode by default.\n\n");
PP "-l : minimum [L]ength : define the minimum amplication length. \n\n");
PP "-L : maximum [L]ength : define the maximum amplicationlength. \n\n");
PP "-m : Salt correction method for Tm computation (SANTALUCIA : 1\n");
PP " or OWCZARZY:2, default=1)\n\n");
PP "-r : [R]estricts the search to the given taxonomic id.\n");
PP " Taxonomy id are available using the ecofind program.\n");
PP " see its help typing ecofind -h for more information.\n");
PP " see its help typing ecofind -h for more information.\n\n");
PP "\n");
PP "------------------------------------------\n");
PP "first argument : oligonucleotide for direct strand\n\n");
@ -63,11 +72,13 @@ static void PrintHelp()
PP "column 13 : strand (direct or reverse)\n");
PP "column 14 : first oligonucleotide\n");
PP "column 15 : number of errors for the first strand\n");
PP "column 16 : second oligonucleotide\n");
PP "column 17 : number of errors for the second strand\n");
PP "column 18 : amplification length\n");
PP "column 19 : sequence\n");
PP "column 20 : definition\n");
PP "column 16 : Tm for hybridization of primer 1 at this site\n");
PP "column 17 : second oligonucleotide\n");
PP "column 18 : number of errors for the second strand\n");
PP "column 19 : Tm for hybridization of primer 1 at this site\n");
PP "column 20 : amplification length\n");
PP "column 21 : sequence\n");
PP "column 22 : definition\n");
PP "------------------------------------------\n");
PP " http://www.grenoble.prabi.fr/trac/ecoPCR/\n");
PP "------------------------------------------\n\n");
@ -96,12 +107,15 @@ static void ExitUsage(stat)
#undef PP
void printRepeat(ecoseq_t *seq,
char* primer1, char* primer2,
PNNParams tparm,
PatternPtr o1, PatternPtr o2,
char strand,
char kingdom,
int32_t pos1, int32_t pos2,
int32_t err1, int32_t err2,
ecotaxonomy_t *taxonomy)
ecotaxonomy_t *taxonomy,
int32_t delta)
{
char *AC;
int32_t seqlength;
@ -124,12 +138,18 @@ void printRepeat(ecoseq_t *seq,
int32_t error1;
int32_t error2;
int32_t ldelta,rdelta;
char *amplifia = NULL;
int32_t amplength;
double tm1,tm2;
double tm=0;
int32_t i;
AC = seq->AC;
seqlength = seq->SQ_length;
main_taxon = &taxonomy->taxons->taxon[seq->taxid];
taxid = main_taxon->taxid;
@ -184,44 +204,81 @@ void printRepeat(ecoseq_t *seq,
superkingdom_name = "###";
}
amplength = pos2-pos1;
amplifia = getSubSequence(seq->SQ,pos1,pos2);
ldelta=(pos1 <= delta)?pos1:delta;
/*rdelta=((pos2+delta)>=seqlength)?seqlength-pos2-1:delta; */
rdelta=((pos2+delta)>=seqlength)?seqlength-pos2:delta;
amplifia = getSubSequence(seq->SQ,pos1-ldelta,pos2+rdelta);
amplength= strlen(amplifia)-rdelta-ldelta;
if (strand=='R')
{
ecoComplementSequence(amplifia);
strncpy(oligo1,amplifia,o2->patlen);
ecoComplementSequence(amplifia);
strncpy(oligo1,amplifia + rdelta ,o2->patlen);
oligo1[o2->patlen]=0;
error1=err2;
strncpy(oligo2,amplifia + amplength - o1->patlen,o1->patlen);
strncpy(oligo2, amplifia + rdelta + amplength - o1->patlen,o1->patlen);
oligo2[o1->patlen]=0;
error2=err1;
amplifia+=o2->patlen;
if (delta==0)
amplifia+=o2->patlen;
else
{
delta=ldelta;
ldelta=rdelta+o2->patlen;
rdelta=delta+o1->patlen;
}
}
else
else /* strand == 'D' */
{
strncpy(oligo1,amplifia,o1->patlen);
strncpy(oligo1,amplifia+ldelta,o1->patlen);
oligo1[o1->patlen]=0;
error1=err1;
strncpy(oligo2,amplifia + amplength - o2->patlen,o2->patlen);
strncpy(oligo2,amplifia + ldelta + amplength - o2->patlen,o2->patlen);
oligo2[o2->patlen]=0;
error2=err2;
amplifia+=o1->patlen;
if (delta==0)
amplifia+=o1->patlen;
else
{
ldelta+=o1->patlen;
rdelta+=o2->patlen;
}
}
ecoComplementSequence(oligo2);
amplifia[amplength - o2->patlen - o1->patlen]=0;
if(delta==0)
amplifia[amplength - o2->patlen - o1->patlen]=0;
else
{
delta=ldelta+rdelta+amplength-o1->patlen-o2->patlen;
for (i=0;i<ldelta;i++)
amplifia[i]|=32;
for (i=1;i<=rdelta;i++)
amplifia[delta-i]|=32;
amplifia[delta]=0;
}
printf("%-15s | %9d | %8d | %-20s | %8d | %-30s | %8d | %-30s | %8d | %-30s | %8d | %-30s | %c | %-32s | %2d | %-32s | %2d | %5d | %s | %s\n",
tm1=nparam_CalcTwoTM(tparm,oligo1,primer1,o1->patlen) - 273.15;
tm2=nparam_CalcTwoTM(tparm,oligo2,primer2,o2->patlen) - 273.15;
tm = (tm1 < tm2) ? tm1:tm2;
printf("%-15s | %9d | %8d | %-20s | %8d | %-30s | %8d | %-30s | %8d | %-30s | %8d | %-30s | %c | %-32s | %2d | %5.2f | %-32s | %2d | %5.2f | %5d | %s | %s\n",
AC,
seqlength,
taxid,
@ -237,12 +294,15 @@ void printRepeat(ecoseq_t *seq,
strand,
oligo1,
error1,
tm1,
oligo2,
error2,
amplength,
tm2,
amplength - o1->patlen - o2->patlen,
amplifia,
seq->DE
);
}
int main(int argc, char **argv)
@ -263,6 +323,7 @@ int main(int argc, char **argv)
PatternPtr o1c;
PatternPtr o2c;
int32_t delta=0;
int32_t lmin=0;
int32_t lmax=0;
int32_t error_max=0;
@ -297,9 +358,13 @@ int main(int argc, char **argv)
int32_t *ignored_taxid = NULL;
int32_t r=0;
int32_t g=0;
int32_t circular=0;
int32_t saltmethod=SALT_METHOD_SANTALUCIA;
double salt=0.05;
CNNParams tparm;
while ((carg = getopt(argc, argv, "hd:l:L:e:i:r:k")) != -1) {
while ((carg = getopt(argc, argv, "hcd:l:L:e:i:r:km:a:tD:")) != -1) {
switch (carg) {
/* -------------------- */
@ -317,6 +382,12 @@ int main(int argc, char **argv)
exit(0);
break;
/* ------------------------- */
case 'D': /* min amplification lenght */
/* ------------------------- */
sscanf(optarg,"%d",&delta);
break;
/* ------------------------- */
case 'l': /* min amplification lenght */
/* ------------------------- */
@ -359,7 +430,25 @@ int main(int argc, char **argv)
break;
/* -------------------- */
case '?': /* bad option */
case 'c': /* stores the taxonomic id to ignore */
/* --------------------------------- */
circular = 1;
break;
/* --------------------------------- */
case 'm': /* set salt method */
/* --------------------------------- */
sscanf(optarg,"%d",&(saltmethod));
break;
/* --------------------------------- */
case 'a': /* set salt */
/* --------------------------------- */
sscanf(optarg,"%lf",&(salt));
break;
case '?': /* bad option */
/* -------------------- */
errflag++;
}
@ -379,6 +468,13 @@ int main(int argc, char **argv)
oligo2 = ECOMALLOC(strlen(argv[optind])+1,
"Error on oligo1 allocation");
strcpy(oligo2,argv[optind]);
if (circular)
{
circular = strlen(oligo1);
if (strlen(oligo2)>(size_t)circular)
circular = strlen(oligo2);
}
}
else
errflag++;
@ -390,6 +486,10 @@ int main(int argc, char **argv)
errflag++;
}
nparam_InitParams(&tparm,DEF_CONC_PRIMERS,
DEF_CONC_PRIMERS,
salt,
saltmethod);
if (!oligo1 || !oligo2)
errflag++;
@ -403,12 +503,22 @@ int main(int argc, char **argv)
o1c = complementPattern(o1);
o2c = complementPattern(o2);
printf("#@ecopcr-v2\n");
printf("#\n");
printf("# ecoPCR version %s\n",VERSION);
printf("# direct strand oligo1 : %-32s ; oligo2c : %32s\n", o1->cpat,o2c->cpat);
printf("# reverse strand oligo2 : %-32s ; oligo1c : %32s\n", o2->cpat,o1c->cpat);
printf("# max error count by oligonucleotide : %d\n",error_max);
double tm,tm1,tm2;
tm1=nparam_CalcSelfTM(&tparm,o1->cpat,o1->patlen) - 273.15;
tm2=nparam_CalcSelfTM(&tparm,o2->cpat,o2->patlen) - 273.15;
tm = (tm1 < tm2) ? tm1:tm2;
printf("# optimal Tm for primers 1 : %5.2f\n",tm1);
printf("# optimal Tm for primers 2 : %5.2f\n",tm2);
printf("# database : %s\n",prefix);
if (lmin && lmax)
printf("# amplifiat length between [%d,%d] bp\n",lmin,lmax);
@ -420,6 +530,10 @@ int main(int argc, char **argv)
printf("# output in kingdom mode\n");
else
printf("# output in superkingdom mode\n");
if (circular)
printf("# DB sequences are considered as circular\n");
else
printf("# DB sequences are considered as linear\n");
printf("#\n");
taxonomy = read_taxonomy(prefix,0);
@ -458,9 +572,9 @@ int main(int argc, char **argv)
strncpy(tail,seq->SQ+seq->SQ_length-10,10);
tail[10]=0;
apatseq=ecoseq2apatseq(seq,apatseq);
apatseq=ecoseq2apatseq(seq,apatseq,circular);
o1Hits = ManberAll(apatseq,o1,0,0,apatseq->seqlen);
o1Hits = ManberAll(apatseq,o1,0,0,apatseq->seqlen+apatseq->circular);
o2cHits= 0;
if (o1Hits)
@ -472,24 +586,44 @@ int main(int argc, char **argv)
length= stktmp->val[stktmp->top-1] + o1->patlen - begin + lmax + o2->patlen;
else
length= apatseq->seqlen - begin;
if (circular)
{
begin = 0;
length=apatseq->seqlen+circular;
}
o2cHits = ManberAll(apatseq,o2c,1,begin,length);
if (o2cHits)
for (i=0; i < o1Hits;i++)
{
posi = apatseq->hitpos[0]->val[i];
erri = apatseq->hiterr[0]->val[i];
for (j=0; j < o2cHits; j++)
if (posi < apatseq->seqlen)
{
posj =apatseq->hitpos[1]->val[j] + o2c->patlen;
errj =apatseq->hiterr[1]->val[j];
length=posj - posi + 1 - o1->patlen - o2->patlen;
if ((!lmin || (length >= lmin)) &&
(!lmax || (length <= lmax)))
printRepeat(seq,o1,o2c,'D',kingdom_mode,posi,posj,erri,errj,taxonomy);
//printf("%s\tD\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o1Hits,o2cHits,posi,posj,scname);
erri = apatseq->hiterr[0]->val[i];
for (j=0; j < o2cHits; j++)
{
posj =apatseq->hitpos[1]->val[j];
if (posj < apatseq->seqlen)
{
posj+=o2c->patlen;
// printf("coucou %d %d %d\n",posi,posj,apatseq->seqlen);
errj =apatseq->hiterr[1]->val[j];
length = 0;
if (posj > posi)
length=posj - posi - o1->patlen - o2->patlen;
if (posj < posi)
length= posj + apatseq->seqlen - posi - o1->patlen - o2->patlen;
if (length &&
(!lmin || (length >= lmin)) &&
(!lmax || (length <= lmax)))
printRepeat(seq,oligo1,oligo2,&tparm,o1,o2c,'D',kingdom_mode,posi,posj,erri,errj,taxonomy,delta);
//printf("%s\tD\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o1Hits,o2cHits,posi,posj,scname);
}
}
}
}
}
@ -506,23 +640,43 @@ int main(int argc, char **argv)
else
length= apatseq->seqlen - begin;
if (circular)
{
begin = 0;
length=apatseq->seqlen+circular;
}
o1cHits = ManberAll(apatseq,o1c,3,begin,length);
if (o1cHits)
for (i=0; i < o2Hits;i++)
{
posi = apatseq->hitpos[2]->val[i];
erri = apatseq->hiterr[2]->val[i];
for (j=0; j < o1cHits; j++)
if (posi < apatseq->seqlen)
{
posj=apatseq->hitpos[3]->val[j] + o1c->patlen;
errj=apatseq->hiterr[3]->val[j];
length=posj - posi + 1 - o1->patlen - o2->patlen;
if ((!lmin || (length >= lmin)) &&
(!lmax || (length <= lmax)))
printRepeat(seq,o2,o1c,'R',kingdom_mode,posi,posj,erri,errj,taxonomy);
//printf("%s\tR\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o2Hits,o1cHits,posi,posj,scname);
erri = apatseq->hiterr[2]->val[i];
for (j=0; j < o1cHits; j++)
{
posj=apatseq->hitpos[3]->val[j];
if (posj < apatseq->seqlen)
{
posj+=o1c->patlen;
errj=apatseq->hiterr[3]->val[j];
length = 0;
if (posj > posi)
length=posj - posi + 1 - o2->patlen - o1->patlen; /* - o1->patlen : suppress by <EC> */
if (posj < posi)
length= posj + apatseq->seqlen - posi - o1->patlen - o2->patlen;
if (length &&
(!lmin || (length >= lmin)) &&
(!lmax || (length <= lmax)))
printRepeat(seq,oligo1,oligo2,&tparm,o2,o1c,'R',kingdom_mode,posi,posj,erri,errj,taxonomy,delta);
//printf("%s\tR\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o2Hits,o1cHits,posi,posj,scname);
}
}
}
}
}

View File

@ -1,5 +1,5 @@
MACHINE=MAC_OS_X
LIBPATH= -Llibapat -LlibecoPCR
LIBPATH= -Llibapat -LlibecoPCR -Llibthermo
MAKEDEPEND = gcc -D$(MACHINE) -M $(CPPFLAGS) -o $*.d $<
CC=gcc

View File

@ -9,6 +9,7 @@ SRCS=$(SOURCES)
OBJECTS= $(patsubst %.c,%.o,$(SOURCES))
LIBFILE= libapat.a
RANLIB=ranlib
include ../global.mk
@ -20,4 +21,4 @@ clean:
$(LIBFILE): $(OBJECTS)
ar -cr $@ $?
$(RANLIB) $@

View File

@ -103,6 +103,7 @@ typedef struct { /* sequence */
Int32 seqlen; /* sequence length */
Int32 seqsiz; /* sequence buffer size */
Int32 datsiz; /* data buffer size */
Int32 circular;
UInt8 *data; /* data buffer */
char *cseq; /* sequence buffer */
StackiPtr hitpos[MAX_PATTERN]; /* stack of hit pos. */

View File

@ -80,14 +80,14 @@ int CreateS(Pattern *ppat, Int32 lalpha)
/* -------------------------------------------- */
Int32 ManberNoErr(Seq *pseq, Pattern *ppat, int patnum,int begin,int length)
{
Int32 pos;
UInt32 pos;
UInt32 smask, r;
UInt8 *data;
StackiPtr *stkpos, *stkerr;
UInt32 end;
end = begin + length;
end = (end <= pseq->seqlen) ? end:pseq->seqlen;
end = (end <= (size_t)(pseq->seqlen+pseq->circular)) ? end:(size_t)(pseq->seqlen+pseq->circular);
/* create local masks */
@ -127,7 +127,7 @@ Int32 ManberNoErr(Seq *pseq, Pattern *ppat, int patnum,int begin,int length)
Int32 ManberSub(Seq *pseq, Pattern *ppat, int patnum,int begin,int length)
{
int e, emax, found;
Int32 pos;
UInt32 pos;
UInt32 smask, cmask, sindx;
UInt32 *pr, r[2 * MAX_PAT_ERR + 2];
UInt8 *data;
@ -135,7 +135,7 @@ Int32 ManberSub(Seq *pseq, Pattern *ppat, int patnum,int begin,int length)
UInt32 end;
end = begin + length;
end = (end <= pseq->seqlen) ? end:pseq->seqlen;
end = (end <= (size_t)(pseq->seqlen+pseq->circular)) ? end:(size_t)(pseq->seqlen+pseq->circular);
/* create local masks */
emax = ppat->maxerr;
@ -193,7 +193,7 @@ Int32 ManberSub(Seq *pseq, Pattern *ppat, int patnum,int begin,int length)
Int32 ManberIndel(Seq *pseq, Pattern *ppat, int patnum,int begin,int length)
{
int e, emax, found;
Int32 pos;
UInt32 pos;
UInt32 smask, cmask, sindx;
UInt32 *pr, r[2 * MAX_PAT_ERR + 2];
UInt8 *data;
@ -201,7 +201,7 @@ Int32 ManberIndel(Seq *pseq, Pattern *ppat, int patnum,int begin,int length)
UInt32 end;
end = begin + length;
end = (end <= pseq->seqlen) ? end:pseq->seqlen;
end = (end <= (size_t)(pseq->seqlen+pseq->circular)) ? end:(size_t)(pseq->seqlen+pseq->circular);
/* create local masks */
emax = ppat->maxerr;

View File

@ -15,6 +15,7 @@ SRCS=$(SOURCES)
OBJECTS= $(patsubst %.c,%.o,$(SOURCES))
LIBFILE= libecoPCR.a
RANLIB= ranlib
include ../global.mk
@ -27,3 +28,4 @@ clean:
$(LIBFILE): $(OBJECTS)
ar -cr $@ $?
$(RANLIB) $@

View File

@ -220,7 +220,7 @@ econameidx_t *read_nameidx(const char *filename,ecotaxonomy_t *taxonomy);
* @return pointer to a taxonomy index structure
*/
ecotxidx_t *read_taxonomyidx(const char *filename);
ecotxidx_t *read_taxonomyidx(const char *filename,const char *filename2);
ecotaxonomy_t *read_taxonomy(const char *prefix,int32_t readAlternativeName);
@ -251,7 +251,7 @@ int32_t delete_apatseq(SeqPtr pseq);
PatternPtr buildPattern(const char *pat, int32_t error_max);
PatternPtr complementPattern(PatternPtr pat);
SeqPtr ecoseq2apatseq(ecoseq_t *in,SeqPtr out);
SeqPtr ecoseq2apatseq(ecoseq_t *in,SeqPtr out,int32_t circular);
char *ecoComplementPattern(char *nucAcSeq);
char *ecoComplementSequence(char *nucAcSeq);

View File

@ -50,10 +50,13 @@ void EncodeSequence(SeqPtr seq)
while (*cseq) {
*data++ = (IS_UPPER(*cseq) ? *cseq - 'A' : 0x0);
*data = (IS_UPPER(*cseq) ? *cseq - 'A' : 0x0);
data++;
cseq++;
}
for (i=0,cseq=seq->cseq;i < seq->circular; i++,cseq++,data++)
*data = (IS_UPPER(*cseq) ? *cseq - 'A' : 0x0);
for (i = 0 ; i < MAX_PATTERN ; i++)
seq->hitpos[i]->top = seq->hiterr[i]->top = 0;
@ -63,7 +66,7 @@ void EncodeSequence(SeqPtr seq)
#undef IS_UPPER
SeqPtr ecoseq2apatseq(ecoseq_t *in,SeqPtr out)
SeqPtr ecoseq2apatseq(ecoseq_t *in,SeqPtr out,int32_t circular)
{
int i;
@ -83,20 +86,22 @@ SeqPtr ecoseq2apatseq(ecoseq_t *in,SeqPtr out)
}
}
out->name = in->AC;
out->seqsiz = out->seqlen = in->SQ_length;
out->circular = circular;
if (!out->data)
{
out->data = ECOMALLOC(out->seqlen *sizeof(UInt8),
out->data = ECOMALLOC((out->seqlen+circular) *sizeof(UInt8),
"Error in Allocation of a new Seq data member");
out->datsiz= out->seqlen;
out->datsiz= out->seqlen+circular;
}
else if (out->seqlen >= out->datsiz)
else if ((out->seqlen +circular) >= out->datsiz)
{
out->data = ECOREALLOC(out->data,out->seqlen,
out->data = ECOREALLOC(out->data,(out->seqlen+circular),
"Error during Seq data buffer realloc");
out->datsiz= out->seqlen;
out->datsiz= out->seqlen+circular;
}
out->cseq = in->SQ;
@ -191,4 +196,4 @@ PatternPtr complementPattern(PatternPtr pat)
return pattern;
}
}

View File

@ -104,27 +104,52 @@ char *ecoComplementSequence(char *nucAcSeq)
char *getSubSequence(char* nucAcSeq,int32_t begin,int32_t end)
/*
extract subsequence from nucAcSeq [begin,end[
*/
{
static char *buffer = NULL;
static int32_t buffSize= 0;
int32_t length;
length = end - begin;
if (length >= buffSize)
if (begin < end)
{
buffSize = length+1;
if (buffer)
buffer=ECOREALLOC(buffer,buffSize,
"Error in reallocating sub sequence buffer");
else
buffer=ECOMALLOC(buffSize,
"Error in allocating sub sequence buffer");
length = end - begin;
if (length >= buffSize)
{
buffSize = length+1;
if (buffer)
buffer=ECOREALLOC(buffer,buffSize,
"Error in reallocating sub sequence buffer");
else
buffer=ECOMALLOC(buffSize,
"Error in allocating sub sequence buffer");
}
strncpy(buffer,nucAcSeq + begin,length);
buffer[length]=0;
}
else
{
length = end + strlen(nucAcSeq) - begin;
if (length >= buffSize)
{
buffSize = length+1;
if (buffer)
buffer=ECOREALLOC(buffer,buffSize,
"Error in reallocating sub sequence buffer");
else
buffer=ECOMALLOC(buffSize,
"Error in allocating sub sequence buffer");
}
strncpy(buffer,nucAcSeq+begin,length - end);
strncpy(buffer+(length-end),nucAcSeq ,end);
buffer[length]=0;
}
strncpy(buffer,nucAcSeq + begin,length);
buffer[length]=0;
return buffer;
}

View File

@ -10,10 +10,11 @@ int eco_is_taxid_included( ecotaxonomy_t *taxonomy,
taxon = eco_findtaxonbytaxid(taxonomy, taxid);
for (i=0; i < tab_len; i++)
if ( (taxon->taxid == restricted_taxid[i]) ||
(eco_isundertaxon(taxon, restricted_taxid[i])) )
return 1;
if (taxon)
for (i=0; i < tab_len; i++)
if ( (taxon->taxid == restricted_taxid[i]) ||
(eco_isundertaxon(taxon, restricted_taxid[i])) )
return 1;
return 0;
}
}

View File

@ -4,6 +4,7 @@
#include <zlib.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>
static FILE *open_seqfile(const char *prefix,int32_t index);
@ -11,32 +12,32 @@ static FILE *open_seqfile(const char *prefix,int32_t index);
ecoseq_t *new_ecoseq()
{
void *tmp;
tmp = ECOMALLOC(sizeof(ecoseq_t),"Allocate new ecoseq structure");
return tmp;
}
int32_t delete_ecoseq(ecoseq_t * seq)
{
if (seq)
{
if (seq->AC)
ECOFREE(seq->AC,"Free sequence AC");
if (seq->DE)
ECOFREE(seq->DE,"Free sequence DE");
if (seq->SQ)
ECOFREE(seq->SQ,"Free sequence SQ");
ECOFREE(seq,"Free sequence structure");
return 0;
}
return 1;
}
@ -49,9 +50,9 @@ ecoseq_t *new_ecoseq_with_data( char *AC,
ecoseq_t *tmp;
int32_t lstr;
tmp = new_ecoseq();
tmp->taxid=taxid_idx;
if (AC)
{
lstr =strlen(AC);
@ -97,12 +98,14 @@ ecoseq_t *readnext_ecoseq(FILE *f)
int32_t comp_status;
unsigned long int seqlength;
int32_t rs;
char *c;
int32_t i;
raw = read_ecorecord(f,&rs);
if (!raw)
return NULL;
if (is_big_endian())
{
raw->CSQ_length = swap_int32_t(raw->CSQ_length);
@ -110,44 +113,48 @@ ecoseq_t *readnext_ecoseq(FILE *f)
raw->SQ_length = swap_int32_t(raw->SQ_length);
raw->taxid = swap_int32_t(raw->taxid);
}
seq = new_ecoseq();
seq->taxid = raw->taxid;
seq->AC = ECOMALLOC(strlen(raw->AC) +1,
"Allocate Sequence Accesion number");
strncpy(seq->AC,raw->AC,strlen(raw->AC));
seq->DE = ECOMALLOC(raw->DE_length+1,
"Allocate Sequence definition");
strncpy(seq->DE,raw->data,raw->DE_length);
seqlength = seq->SQ_length = raw->SQ_length;
compressed = raw->data + raw->DE_length;
seq->SQ = ECOMALLOC(seqlength+1,
"Allocate sequence buffer");
comp_status = uncompress((unsigned char*)seq->SQ,
&seqlength,
(unsigned char*)compressed,
raw->CSQ_length);
if (comp_status != Z_OK)
ECOERROR(ECO_IO_ERROR,"I cannot uncompress sequence data");
for (c=seq->SQ,i=0;i<seqlength;c++,i++)
*c=toupper(*c);
return seq;
}
/**
* Open the sequences database (.sdx file)
* @param prefix name of the database (radical without extension)
* @param prefix name of the database (radical without extension)
* @param index integer
*
* @return file object
*
* @return file object
*/
FILE *open_seqfile(const char *prefix,int32_t index)
{
@ -161,22 +168,22 @@ FILE *open_seqfile(const char *prefix,int32_t index)
"%s_%03d.sdx",
prefix,
index);
fprintf(stderr,"# Coucou %s\n",filename_buffer);
// fprintf(stderr,"# Coucou %s\n",filename_buffer);
if (filename_length >= 1024)
ECOERROR(ECO_ASSERT_ERROR,"file name is too long");
filename_buffer[filename_length]=0;
input=open_ecorecorddb(filename_buffer,&seqcount,0);
if (input)
fprintf(stderr,"# Reading file %s containing %d sequences...\n",
filename_buffer,
seqcount);
return input;
}
@ -186,38 +193,38 @@ ecoseq_t *ecoseq_iterator(const char *prefix)
static int32_t current_file_idx = 1;
static char current_prefix[1024];
ecoseq_t *seq;
if (prefix)
{
current_file_idx = 1;
if (current_seq_file)
fclose(current_seq_file);
strncpy(current_prefix,prefix,1023);
current_prefix[1024]=0;
current_seq_file = open_seqfile(current_prefix,
current_file_idx);
if (!current_seq_file)
return NULL;
}
seq = readnext_ecoseq(current_seq_file);
if (!seq && feof(current_seq_file))
{
current_file_idx++;
fclose(current_seq_file);
current_seq_file = open_seqfile(current_prefix,
current_file_idx);
if (current_seq_file)
seq = readnext_ecoseq(current_seq_file);
}
return seq;
}
}

View File

@ -10,23 +10,41 @@ static ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon);
* @param pointer to the database (.tdx file)
* @return a ecotxidx_t structure
*/
ecotxidx_t *read_taxonomyidx(const char *filename)
ecotxidx_t *read_taxonomyidx(const char *filename,const char *filename2)
{
int32_t count;
int32_t count2;
FILE *f;
FILE *f2;
ecotxidx_t *index;
int32_t i;
f = open_ecorecorddb(filename,&count,1);
f = open_ecorecorddb(filename,&count,1);
f2 = open_ecorecorddb(filename2,&count2,0);
index = (ecotxidx_t*) ECOMALLOC(sizeof(ecotxidx_t) + sizeof(ecotx_t) * (count-1),
index = (ecotxidx_t*) ECOMALLOC(sizeof(ecotxidx_t) + sizeof(ecotx_t) * (count+count2-1),
"Allocate taxonomy");
index->count=count;
index->count=count+count2;
fprintf(stderr,"Reading %d taxa...\n",count);
for (i=0; i < count; i++){
readnext_ecotaxon(f,&(index->taxon[i]));
index->taxon[i].parent=index->taxon + (int32_t)index->taxon[i].parent;
}
if (count2>0)
fprintf(stderr,"Reading %d local taxa...\n",count2);
else
fprintf(stderr,"No local taxon\n");
for (i=0; i < count2; i++){
readnext_ecotaxon(f2,&(index->taxon[count+i]));
index->taxon[count+i].parent=index->taxon + (int32_t)index->taxon[count+i].parent;
}
return index;
}
@ -111,6 +129,7 @@ ecotaxonomy_t *read_taxonomy(const char *prefix,int32_t readAlternativeName)
{
ecotaxonomy_t *tax;
char *filename;
char *filename2;
int buffsize;
tax = ECOMALLOC(sizeof(ecotaxonomy_t),
@ -120,14 +139,17 @@ ecotaxonomy_t *read_taxonomy(const char *prefix,int32_t readAlternativeName)
filename = ECOMALLOC(buffsize,
"Allocate filename");
filename2= ECOMALLOC(buffsize,
"Allocate filename");
snprintf(filename,buffsize,"%s.rdx",prefix);
tax->ranks = read_rankidx(filename);
snprintf(filename,buffsize,"%s.tdx",prefix);
snprintf(filename2,buffsize,"%s.ldx",prefix);
tax->taxons = read_taxonomyidx(filename);
tax->taxons = read_taxonomyidx(filename,filename2);
if (readAlternativeName)
{
@ -326,4 +348,4 @@ ecotx_t *eco_getsuperkingdom(ecotx_t *taxon,
ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
return eco_findtaxonatrank(taxon,rankindex);
}
}

22
src/libthermo/Makefile Normal file
View File

@ -0,0 +1,22 @@
SOURCES = nnparams.c
SRCS=$(SOURCES)
OBJECTS= $(patsubst %.c,%.o,$(SOURCES))
LIBFILE= libthermo.a
RANLIB= ranlib
include ../global.mk
all: $(LIBFILE)
clean:
rm -rf $(OBJECTS) $(LIBFILE)
$(LIBFILE): $(OBJECTS)
ar -cr $@ $?
$(RANLIB) $@

619
src/libthermo/nnparams.c Normal file
View File

@ -0,0 +1,619 @@
/*
* nnparams.cpp
* PHunterLib
*
* Nearest Neighbor Model / Parameters
*
* Created by Tiayyba Riaz on 7/2/09.
*
*/
#include <memory.h>
#include <math.h>
#include <stdio.h>
#include <string.h>
#include"nnparams.h"
static char bpencoder[] = { 1, // A
0, // b
2, // C
0,0,0, // d, e, f
3, // G
0,0,0,0,0,0,0,0,0,0,0,0, // h,i,j,k,l,m,n,o,p,q,r,s
4,0, // T,U
0,0,0,0,0}; // v,w,x,y,z
double forbidden_entropy;
double nparam_GetInitialEntropy(PNNParams nparm)
{
return -5.9f+nparm->rlogc;
}
//Retrieve Enthalpy for given NN-Pair from parameter table
double nparam_GetEnthalpy(PNNParams nparm, char x0, char x1, char y0, char y1)
{
return ndH(x0,x1,y0,y1); //xx, yx are already numbers
}
//Retrieve Entropy for given NN-Pair from parameter table
double nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1)
{
//xx and yx are already numbers
char nx0=x0;//nparam_convertNum(x0);
char nx1=x1;//nparam_convertNum(x1);
char ny0=y0;//nparam_convertNum(y0);
char ny1=y1;//nparam_convertNum(y1);
double answer = ndS(nx0,nx1,ny0,ny1);
/*Salt correction Santalucia*/
if (nparm->saltMethod == SALT_METHOD_SANTALUCIA) {
if(nx0!=5 && 1<= nx1 && nx1<=4) {
answer += 0.5*nparm->kfac;
}
if(ny1!=5 && 1<= ny0 && ny0<=4) {
answer += 0.5*nparm->kfac;
}
}
/*Salt correction Owczarzy*/
if (nparm->saltMethod == SALT_METHOD_OWCZARZY) {
double logk = log(nparm->kplus);
answer += ndH(nx0,nx1,ny0,ny1)*((4.29 * nparm->gcContent-3.95)*0.00001*logk+ 0.0000094*logk*logk);
}
return answer;
}
/* PURPOSE: Return melting temperature TM for given entropy and enthalpy
* Assuming a one-state transition and using the formula
* TM = dH / (dS + R ln(Ct/4))
* entropy = dS + R ln Ct/4 (must already be included!)
* enthaklpy = dH
* where
* dH = enthalpy
* dS = entropy
* R = Boltzmann factor
* Ct = Strand Concentration
*
* PARAMETERS:
* entrypy and enthalpy
*
* RETURN VALUE:
* temperature
*/
double nparam_CalcTM(double entropy,double enthalpy)
{
double tm = 0; // absolute zero - return if model fails!
if (enthalpy>=forbidden_enthalpy) //||(entropy==-cfact))
return 0;
if (entropy<0) // avoid division by zero and model errors!
{
tm = enthalpy/entropy;// - kfac; //LKFEB
if (tm<0)
return 0;
}
return tm;
}
void nparam_InitParams(PNNParams nparm, double c1, double c2, double kp, int sm)
{
nparm->Ct1 = c1;
nparm->Ct2 = c2;
nparm->kplus = kp;
int maxCT = 1;
if(nparm->Ct2 > nparm->Ct1)
{
maxCT = 2;
}
double ctFactor;
if(nparm->Ct1 == nparm->Ct2)
{
ctFactor = nparm->Ct1/2;
}
else if (maxCT == 1)
{
ctFactor = nparm->Ct1-nparm->Ct2/2;
}
else
{
ctFactor = nparm->Ct2-nparm->Ct1/2;
}
nparm->rlogc = R * log(ctFactor);
forbidden_entropy = nparm->rlogc;
nparm->kfac = 0.368 * log (nparm->kplus);
nparm->saltMethod = sm;
int x,y,a,b; // variables used as counters...
// Set all parameters to zero!
memset(nparm->dH,0,sizeof(nparm->dH));
memset(nparm->dS,0,sizeof(nparm->dS));
// Set all X-/Y-, -X/Y- and X-/-Y so, that TM will be VERY small!
for (x=1;x<=4;x++)
{
for (y=1;y<=4;y++)
{
ndH(0,x,y,0)=forbidden_enthalpy;
ndS(0,x,y,0)=forbidden_entropy;
ndH(x,0,0,y)=forbidden_enthalpy;
ndS(x,0,0,y)=forbidden_entropy;
ndH(x,0,y,0)=forbidden_enthalpy;
ndS(x,0,y,0)=forbidden_entropy;
// forbid X-/Y$ and X$/Y- etc., i.e. terminal must not be paired with gap!
ndH(x,5,y,0)=forbidden_enthalpy;
ndS(x,5,y,0)=forbidden_entropy;
ndH(x,0,y,5)=forbidden_enthalpy;
ndS(x,0,y,5)=forbidden_entropy;
ndH(5,x,0,y)=forbidden_enthalpy;
ndS(5,x,0,y)=forbidden_entropy;
ndH(0,x,5,y)=forbidden_enthalpy;
ndS(0,x,5,y)=forbidden_entropy;
// forbid X$/-Y etc.
ndH(x,5,0,y)=forbidden_enthalpy;
ndS(x,5,0,y)=forbidden_entropy;
ndH(x,0,5,y)=forbidden_enthalpy;
ndS(x,0,5,y)=forbidden_entropy;
ndH(5,x,y,0)=forbidden_enthalpy;
ndS(5,x,y,0)=forbidden_entropy;
ndH(0,x,y,5)=forbidden_enthalpy;
ndS(0,x,y,5)=forbidden_entropy;
}
// also, forbid x-/-- and --/x-, i.e. no two inner gaps paired
ndH(x,0,0,0)=forbidden_enthalpy;
ndS(x,0,0,0)=forbidden_entropy;
ndH(0,0,x,0)=forbidden_enthalpy;
ndS(0,0,x,0)=forbidden_entropy;
// x-/-$
ndH(x,0,0,5)=forbidden_enthalpy;
ndS(x,0,0,5)=forbidden_entropy;
ndH(5,0,0,x)=forbidden_enthalpy;
ndS(5,0,0,x)=forbidden_entropy;
ndH(0,5,x,0)=forbidden_enthalpy;
ndS(x,0,0,5)=forbidden_entropy;
ndH(0,x,5,0)=forbidden_enthalpy;
ndS(0,x,5,0)=forbidden_entropy;
}
// forbid --/--
ndH(0,0,0,0)=forbidden_enthalpy;
ndS(0,0,0,0)=forbidden_entropy;
ndH(5,0,0,0)=forbidden_enthalpy;
ndS(5,0,0,0)=forbidden_entropy;
ndH(0,0,5,0)=forbidden_enthalpy;
ndS(0,0,5,0)=forbidden_entropy;
ndH(0,5,5,0)=forbidden_enthalpy;
ndS(0,5,5,0)=forbidden_entropy;
// Interior loops (double Mismatches)
#define iloop_entropy -0.97f
#define iloop_enthalpy 0.0f
for (x=1; x<=4; x++)
for (y=1; y<=4; y++)
for (a=1; a<=4; a++)
for (b=1; b<=4; b++)
// AT and CG pair, and as A=1, C=2, G=3, T=4 this means
// we have Watson-Crick pairs if (x+a==5) and (y+b)==5.
if (!((x+a==5)||(y+b==5)))
{
// No watson-crick-pair, i.e. double mismatch!
// set enthalpy/entropy to loop expansion!
ndH(x,y,a,b) = iloop_enthalpy;
ndS(x,y,a,b) = iloop_entropy;
}
// xy/-- and --/xy (Bulge Loops of size > 1)
#define bloop_entropy -1.3f
#define bloop_enthalpy 0.0f
for (x=1; x<=4; x++)
for (y=1; y<=4; y++)
{
ndH(x,y,0,0) = bloop_enthalpy;
ndS(x,y,0,0) = bloop_entropy;
ndH(0,0,x,y) = bloop_enthalpy;
ndS(0,0,x,y) = bloop_entropy;
}
// x-/ya abd xa/y- as well as -x/ay and ax/-y
// bulge opening and closing parameters with
// adjacent matches / mismatches
// obulge_mism and cbulge_mism chosen so high to avoid
// AAAAAAAAA
// T--G----T
// being better than
// AAAAAAAAA
// TG------T
#define obulge_match_H (-2.66f * 1000)
#define obulge_match_S -14.22f
#define cbulge_match_H (-2.66f * 1000)
#define cbulge_match_S -14.22f
#define obulge_mism_H (0.0f * 1000)
#define obulge_mism_S -6.45f
#define cbulge_mism_H 0.0f
#define cbulge_mism_S -6.45f
for (x=1; x<=4; x++)
for (y=1; y<=4; y++)
for (a=1; a<=4; a++)
{
if (x+y==5) // other base pair matches!
{
ndH(x,0,y,a)=obulge_match_H; // bulge opening
ndS(x,0,y,a)=obulge_match_S;
ndH(x,a,y,0)=obulge_match_H;
ndS(x,a,y,0)=obulge_match_S;
ndH(0,x,a,y)=cbulge_match_H; // bulge closing
ndS(0,x,a,y)=cbulge_match_S;
ndH(a,x,0,y)=cbulge_match_H;
ndS(a,x,0,y)=cbulge_match_S;
}
else
{ // mismatch in other base pair!
ndH(x,0,y,a)=obulge_mism_H; // bulge opening
ndS(x,0,y,a)=obulge_mism_S;
ndH(x,a,y,0)=obulge_mism_H;
ndS(x,a,y,0)=obulge_mism_S;
ndH(0,x,a,y)=cbulge_mism_H; // bulge closing
ndS(0,x,a,y)=cbulge_mism_S;
ndH(a,x,0,y)=cbulge_mism_H;
ndS(a,x,0,y)=cbulge_mism_S;
}
}
// Watson-Crick pairs (note that only ten are unique, as obviously
// 5'-AG-3'/3'-TC-5' = 5'-CT-3'/3'-GA-5' etc.
ndH(1,1,4,4)=-7.6f*1000; ndS(1,1,4,4)=-21.3f; // AA/TT 04
ndH(1,2,4,3)=-8.4f*1000; ndS(1,2,4,3)=-22.4f; // AC/TG adapted GT/CA
ndH(1,3,4,2)=-7.8f*1000; ndS(1,3,4,2)=-21.0f; // AG/TC adapted CT/GA
ndH(1,4,4,1)=-7.2f*1000; ndS(1,4,4,1)=-20.4f; // AT/TA 04
ndH(2,1,3,4)=-8.5f*1000; ndS(2,1,3,4)=-22.7f; // CA/GT 04
ndH(2,2,3,3)=-8.0f*1000; ndS(2,2,3,3)=-19.9f; // CC/GG adapted GG/CC
ndH(2,3,3,2)=-10.6f*1000; ndS(2,3,3,2)=-27.2f; // CG/GC 04
ndH(2,4,3,1)=-7.8f*1000; ndS(2,4,3,1)=-21.0f; // CT/GA 04
ndH(3,1,2,4)=-8.2f*1000; ndS(3,1,2,4)=-22.2f; // GA/CT 04
ndH(3,2,2,3)=-9.8f*1000; ndS(3,2,2,3)=-24.4f; // GC/CG 04
ndH(3,3,2,2)=-8.0f*1000; ndS(3,3,2,2)=-19.9f; // GG/CC 04
ndH(3,4,2,1)=-8.4f*1000; ndS(3,4,2,1)=-22.4f; // GT/CA 04
ndH(4,1,1,4)=-7.2f*1000; ndS(4,1,1,4)=-21.3f; // TA/AT 04
ndH(4,2,1,3)=-8.2f*1000; ndS(4,2,1,3)=-22.2f; // TC/AG adapted GA/CT
ndH(4,3,1,2)=-8.5f*1000; ndS(4,3,1,2)=-22.7f; // TG/AC adapted CA/GT
ndH(4,4,1,1)=-7.6f*1000; ndS(4,4,1,1)=-21.3f; // TT/AA adapted AA/TT
// A-C Mismatches (Values for pH 7.0)
ndH(1,1,2,4)=7.6f*1000; ndS(1,1,2,4)=20.2f; // AA/CT
ndH(1,1,4,2)=2.3f*1000; ndS(1,1,4,2)=4.6f; // AA/TC
ndH(1,2,2,3)=-0.7f*1000; ndS(1,2,2,3)=-3.8f; // AC/CG
ndH(1,2,4,1)=5.3f*1000; ndS(1,2,4,1)=14.6f; // AC/TA
ndH(1,3,2,2)=0.6f*1000; ndS(1,3,2,2)=-0.6f; // AG/CC
ndH(1,4,2,1)=5.3f*1000; ndS(1,4,2,1)=14.6f; // AT/CA
ndH(2,1,1,4)=3.4f*1000; ndS(2,1,1,4)=8.0f; // CA/AT
ndH(2,1,3,2)=1.9f*1000; ndS(2,1,3,2)=3.7f; // CA/GC
ndH(2,2,1,3)=5.2f*1000; ndS(2,2,1,3)=14.2f; // CC/AG
ndH(2,2,3,1)=0.6f*1000; ndS(2,2,3,1)=-0.6f; // CC/GA
ndH(2,3,1,2)=1.9f*1000; ndS(2,3,1,2)=3.7f; // CG/AC
ndH(2,4,1,1)=2.3f*1000; ndS(2,4,1,1)=4.6f; // CT/AA
ndH(3,1,2,2)=5.2f*1000; ndS(3,1,2,2)=14.2f; // GA/CC
ndH(3,2,2,1)=-0.7f*1000; ndS(3,2,2,1)=-3.8f; // GC/CA
ndH(4,1,1,2)=3.4f*1000; ndS(4,1,1,2)=8.0f; // TA/AC
ndH(4,2,1,1)=7.6f*1000; ndS(4,2,1,1)=20.2f; // TC/AA
// C-T Mismatches
ndH(1,2,4,4)=0.7f*1000; ndS(1,2,4,4)=0.2f; // AC/TT
ndH(1,4,4,2)=-1.2f*1000; ndS(1,4,4,2)=-6.2f; // AT/TC
ndH(2,1,4,4)=1.0f*1000; ndS(2,1,4,4)=0.7f; // CA/TT
ndH(2,2,3,4)=-0.8f*1000; ndS(2,2,3,4)=-4.5f; // CC/GT
ndH(2,2,4,3)=5.2f*1000; ndS(2,2,4,3)=13.5f; // CC/TG
ndH(2,3,4,2)=-1.5f*1000; ndS(2,3,4,2)=-6.1f; // CG/TC
ndH(2,4,3,2)=-1.5f*1000; ndS(2,4,3,2)=-6.1f; // CT/GC
ndH(2,4,4,1)=-1.2f*1000; ndS(2,4,4,1)=-6.2f; // CT/TA
ndH(3,2,2,4)=2.3f*1000; ndS(3,2,2,4)=5.4f; // GC/CT
ndH(3,4,2,2)=5.2f*1000; ndS(3,4,2,2)=13.5f; // GT/CC
ndH(4,1,2,4)=1.2f*1000; ndS(4,1,2,4)=0.7f; // TA/CT
ndH(4,2,2,3)=2.3f*1000; ndS(4,2,2,3)=5.4f; // TC/CG
ndH(4,2,1,4)=1.2f*1000; ndS(4,2,1,4)=0.7f; // TC/AT
ndH(4,3,2,2)=-0.8f*1000; ndS(4,3,2,2)=-4.5f; // TG/CC
ndH(4,4,2,1)=0.7f*1000; ndS(4,4,2,1)=0.2f; // TT/CA
ndH(4,4,1,2)=1.0f*1000; ndS(4,4,1,2)=0.7f; // TT/AC
// G-A Mismatches
ndH(1,1,3,4)=3.0f*1000; ndS(1,1,3,4)=7.4f; // AA/GT
ndH(1,1,4,3)=-0.6f*1000; ndS(1,1,4,3)=-2.3f; // AA/TG
ndH(1,2,3,3)=0.5f*1000; ndS(1,2,3,3)=3.2f; // AC/GG
ndH(1,3,3,2)=-4.0f*1000; ndS(1,3,3,2)=-13.2f; // AG/GC
ndH(1,3,4,1)=-0.7f*1000; ndS(1,3,4,1)=-2.3f; // AG/TA
ndH(1,4,3,1)=-0.7f*1000; ndS(1,4,3,1)=-2.3f; // AT/GA
ndH(2,1,3,3)=-0.7f*1000; ndS(2,1,3,3)=-2.3f; // CA/GG
ndH(2,3,3,1)=-4.0f*1000; ndS(2,3,3,1)=-13.2f; // CG/GA
ndH(3,1,1,4)=0.7f*1000; ndS(3,1,1,4)=0.7f; // GA/AT
ndH(3,1,2,3)=-0.6f*1000; ndS(3,1,2,3)=-1.0f; // GA/CG
ndH(3,2,1,3)=-0.6f*1000; ndS(3,2,1,3)=-1.0f; // GC/AG
ndH(3,3,1,2)=-0.7f*1000; ndS(3,3,1,2)=-2.3f; // GG/AC
ndH(3,3,2,1)=0.5f*1000; ndS(3,3,2,1)=3.2f; // GG/CA
ndH(3,4,1,1)=-0.6f*1000; ndS(3,4,1,1)=-2.3f; // GT/AA
ndH(4,1,1,3)=0.7f*1000; ndS(4,1,1,3)=0.7f; // TA/AG
ndH(4,3,1,1)=3.0f*1000; ndS(4,3,1,1)=7.4f; // TG/AA
// G-T Mismatches
ndH(1,3,4,4)=1.0f*1000; ndS(1,3,4,4)=0.9f; // AG/TT
ndH(1,4,4,3)=-2.5f*1000; ndS(1,4,4,3)=-8.3f; // AT/TG
ndH(2,3,3,4)=-4.1f*1000; ndS(2,3,3,4)=-11.7f; // CG/GT
ndH(2,4,3,3)=-2.8f*1000; ndS(2,4,3,3)=-8.0f; // CT/GG
ndH(3,1,4,4)=-1.3f*1000; ndS(3,1,4,4)=-5.3f; // GA/TT
ndH(3,2,4,3)=-4.4f*1000; ndS(3,2,4,3)=-12.3f; // GC/TG
ndH(3,3,2,4)=3.3f*1000; ndS(3,3,2,4)=10.4f; // GG/CT
ndH(3,3,4,2)=-2.8f*1000; ndS(3,3,4,2)=-8.0f; // GG/TC
// ndH(3,3,4,4)=5.8f*1000; ndS(3,3,4,4)=16.3f; // GG/TT
ndH(3,4,2,3)=-4.4f*1000; ndS(3,4,2,3)=-12.3f; // GT/CG
ndH(3,4,4,1)=-2.5f*1000; ndS(3,4,4,1)=-8.3f; // GT/TA
// ndH(3,4,4,3)=4.1f*1000; ndS(3,4,4,3)=9.5f; // GT/TG
ndH(4,1,3,4)=-0.1f*1000; ndS(4,1,3,4)=-1.7f; // TA/GT
ndH(4,2,3,3)=3.3f*1000; ndS(4,2,3,3)=10.4f; // TC/GG
ndH(4,3,1,4)=-0.1f*1000; ndS(4,3,1,4)=-1.7f; // TG/AT
ndH(4,3,3,2)=-4.1f*1000; ndS(4,3,3,2)=-11.7f; // TG/GC
// ndH(4,3,3,4)=-1.4f*1000; ndS(4,3,3,4)=-6.2f; // TG/GT
ndH(4,4,1,3)=-1.3f*1000; ndS(4,4,1,3)=-5.3f; // TT/AG
ndH(4,4,3,1)=1.0f*1000; ndS(4,4,3,1)=0.9f; // TT/GA
// ndH(4,4,3,3)=5.8f*1000; ndS(4,4,3,3)=16.3f; // TT/GG
// A-A Mismatches
ndH(1,1,1,4)=4.7f*1000; ndS(1,1,1,4)=12.9f; // AA/AT
ndH(1,1,4,1)=1.2f*1000; ndS(1,1,4,1)=1.7f; // AA/TA
ndH(1,2,1,3)=-2.9f*1000; ndS(1,2,1,3)=-9.8f; // AC/AG
ndH(1,3,1,2)=-0.9f*1000; ndS(1,3,1,2)=-4.2f; // AG/AC
ndH(1,4,1,1)=1.2f*1000; ndS(1,4,1,1)=1.7f; // AT/AA
ndH(2,1,3,1)=-0.9f*1000; ndS(2,1,3,1)=-4.2f; // CA/GA
ndH(3,1,2,1)=-2.9f*1000; ndS(3,1,2,1)=-9.8f; // GA/CA
ndH(4,1,1,1)=4.7f*1000; ndS(4,1,1,1)=12.9f; // TA/AA
// C-C Mismatches
ndH(1,2,4,2)=0.0f*1000; ndS(1,2,4,2)=-4.4f; // AC/TC
ndH(2,1,2,4)=6.1f*1000; ndS(2,1,2,4)=16.4f; // CA/CT
ndH(2,2,2,3)=3.6f*1000; ndS(2,2,2,3)=8.9f; // CC/CG
ndH(2,2,3,2)=-1.5f*1000; ndS(2,2,3,2)=-7.2f; // CC/GC
ndH(2,3,2,2)=-1.5f*1000; ndS(2,3,2,2)=-7.2f; // CG/CC
ndH(2,4,2,1)=0.0f*1000; ndS(2,4,2,1)=-4.4f; // CT/CA
ndH(3,2,2,2)=3.6f*1000; ndS(3,2,2,2)=8.9f; // GC/CC
ndH(4,2,1,2)=6.1f*1000; ndS(4,2,1,2)=16.4f; // TC/AC
// G-G Mismatches
ndH(1,3,4,3)=-3.1f*1000; ndS(1,3,4,3)=-9.5f; // AG/TG
ndH(2,3,3,3)=-4.9f*1000; ndS(2,3,3,3)=-15.3f; // CG/GG
ndH(3,1,3,4)=1.6f*1000; ndS(3,1,3,4)=3.6f; // GA/GT
ndH(3,2,3,3)=-6.0f*1000; ndS(3,2,3,3)=-15.8f; // GC/GG
ndH(3,3,2,3)=-6.0f*1000; ndS(3,3,2,3)=-15.8f; // GG/CG
ndH(3,3,3,2)=-4.9f*1000; ndS(3,3,3,2)=-15.3f; // GG/GC
ndH(3,4,3,1)=-3.1f*1000; ndS(3,4,3,1)=-9.5f; // GT/GA
ndH(4,3,1,3)=1.6f*1000; ndS(4,3,1,3)=3.6f; // TG/AG
// T-T Mismatches
ndH(1,4,4,4)=-2.7f*1000; ndS(1,4,4,4)=-10.8f; // AT/TT
ndH(2,4,3,4)=-5.0f*1000; ndS(2,4,3,4)=-15.8f; // CT/GT
ndH(3,4,2,4)=-2.2f*1000; ndS(3,4,2,4)=-8.4f; // GT/CT
ndH(4,1,4,4)=0.2f*1000; ndS(4,1,4,4)=-1.5f; // TA/TT
ndH(4,2,4,3)=-2.2f*1000; ndS(4,2,4,3)=-8.4f; // TC/TG
ndH(4,3,4,2)=-5.0f*1000; ndS(4,3,4,2)=-15.8f; // TG/TC
ndH(4,4,1,4)=0.2f*1000; ndS(4,4,1,4)=-1.5f; // TT/AT
ndH(4,4,4,1)=-2.7f*1000; ndS(4,4,4,1)=-10.8f; // TT/TA
// Dangling Ends
ndH(5,1,1,4)=-0.7f*1000; ndS(5,1,1,4)=-0.8f; // $A/AT
ndH(5,1,2,4)=4.4f*1000; ndS(5,1,2,4)=14.9f; // $A/CT
ndH(5,1,3,4)=-1.6f*1000; ndS(5,1,3,4)=-3.6f; // $A/GT
ndH(5,1,4,4)=2.9f*1000; ndS(5,1,4,4)=10.4f; // $A/TT
ndH(5,2,1,3)=-2.1f*1000; ndS(5,2,1,3)=-3.9f; // $C/AG
ndH(5,2,2,3)=-0.2f*1000; ndS(5,2,2,3)=-0.1f; // $C/CG
ndH(5,2,3,3)=-3.9f*1000; ndS(5,2,3,3)=-11.2f; // $C/GG
ndH(5,2,4,3)=-4.4f*1000; ndS(5,2,4,3)=-13.1f; // $C/TG
ndH(5,3,1,2)=-5.9f*1000; ndS(5,3,1,2)=-16.5f; // $G/AC
ndH(5,3,2,2)=-2.6f*1000; ndS(5,3,2,2)=-7.4f; // $G/CC
ndH(5,3,3,2)=-3.2f*1000; ndS(5,3,3,2)=-10.4f; // $G/GC
ndH(5,3,4,2)=-5.2f*1000; ndS(5,3,4,2)=-15.0f; // $G/TC
ndH(5,4,1,1)=-0.5f*1000; ndS(5,4,1,1)=-1.1f; // $T/AA
ndH(5,4,2,1)=4.7f*1000; ndS(5,4,2,1)=14.2f; // $T/CA
ndH(5,4,3,1)=-4.1f*1000; ndS(5,4,3,1)=-13.1f; // $T/GA
ndH(5,4,4,1)=-3.8f*1000; ndS(5,4,4,1)=-12.6f; // $T/TA
ndH(1,5,4,1)=-2.9f*1000; ndS(1,5,4,1)=-7.6f; // A$/TA
ndH(1,5,4,2)=-4.1f*1000; ndS(1,5,4,2)=-13.0f; // A$/TC
ndH(1,5,4,3)=-4.2f*1000; ndS(1,5,4,3)=-15.0f; // A$/TG
ndH(1,5,4,4)=-0.2f*1000; ndS(1,5,4,4)=-0.5f; // A$/TT
ndH(1,1,5,4)=0.2f*1000; ndS(1,1,5,4)=2.3f; // AA/$T
ndH(1,1,4,5)=-0.5f*1000; ndS(1,1,4,5)=-1.1f; // AA/T$
ndH(1,2,5,3)=-6.3f*1000; ndS(1,2,5,3)=-17.1f; // AC/$G
ndH(1,2,4,5)=4.7f*1000; ndS(1,2,4,5)=14.2f; // AC/T$
ndH(1,3,5,2)=-3.7f*1000; ndS(1,3,5,2)=-10.0f; // AG/$C
ndH(1,3,4,5)=-4.1f*1000; ndS(1,3,4,5)=-13.1f; // AG/T$
ndH(1,4,5,1)=-2.9f*1000; ndS(1,4,5,1)=-7.6f; // AT/$A
ndH(1,4,4,5)=-3.8f*1000; ndS(1,4,4,5)=-12.6f; // AT/T$
ndH(2,5,3,1)=-3.7f*1000; ndS(2,5,3,1)=-10.0f; // C$/GA
ndH(2,5,3,2)=-4.0f*1000; ndS(2,5,3,2)=-11.9f; // C$/GC
ndH(2,5,3,3)=-3.9f*1000; ndS(2,5,3,3)=-10.9f; // C$/GG
ndH(2,5,3,4)=-4.9f*1000; ndS(2,5,3,4)=-13.8f; // C$/GT
ndH(2,1,5,4)=0.6f*1000; ndS(2,1,5,4)=3.3f; // CA/$T
ndH(2,1,3,5)=-5.9f*1000; ndS(2,1,3,5)=-16.5f; // CA/G$
ndH(2,2,5,3)=-4.4f*1000; ndS(2,2,5,3)=-12.6f; // CC/$G
ndH(2,2,3,5)=-2.6f*1000; ndS(2,2,3,5)=-7.4f; // CC/G$
ndH(2,3,5,2)=-4.0f*1000; ndS(2,3,5,2)=-11.9f; // CG/$C
ndH(2,3,3,5)=-3.2f*1000; ndS(2,3,3,5)=-10.4f; // CG/G$
ndH(2,4,5,1)=-4.1f*1000; ndS(2,4,5,1)=-13.0f; // CT/$A
ndH(2,4,3,5)=-5.2f*1000; ndS(2,4,3,5)=-15.0f; // CT/G$
ndH(3,5,2,1)=-6.3f*1000; ndS(3,5,2,1)=-17.1f; // G$/CA
ndH(3,5,2,2)=-4.4f*1000; ndS(3,5,2,2)=-12.6f; // G$/CC
ndH(3,5,2,3)=-5.1f*1000; ndS(3,5,2,3)=-14.0f; // G$/CG
ndH(3,5,2,4)=-4.0f*1000; ndS(3,5,2,4)=-10.9f; // G$/CT
ndH(3,1,5,4)=-1.1f*1000; ndS(3,1,5,4)=-1.6f; // GA/$T
ndH(3,1,2,5)=-2.1f*1000; ndS(3,1,2,5)=-3.9f; // GA/C$
ndH(3,2,5,3)=-5.1f*1000; ndS(3,2,5,3)=-14.0f; // GC/$G
ndH(3,2,2,5)=-0.2f*1000; ndS(3,2,2,5)=-0.1f; // GC/C$
ndH(3,3,5,2)=-3.9f*1000; ndS(3,3,5,2)=-10.9f; // GG/$C
ndH(3,3,2,5)=-3.9f*1000; ndS(3,3,2,5)=-11.2f; // GG/C$
ndH(3,4,5,1)=-4.2f*1000; ndS(3,4,5,1)=-15.0f; // GT/$A
ndH(3,4,2,5)=-4.4f*1000; ndS(3,4,2,5)=-13.1f; // GT/C$
ndH(4,5,1,1)=0.2f*1000; ndS(4,5,1,1)=2.3f; // T$/AA
ndH(4,5,1,2)=0.6f*1000; ndS(4,5,1,2)=3.3f; // T$/AC
ndH(4,5,1,3)=-1.1f*1000; ndS(4,5,1,3)=-1.6f; // T$/AG
ndH(4,5,1,4)=-6.9f*1000; ndS(4,5,1,4)=-20.0f; // T$/AT
ndH(4,1,5,4)=-6.9f*1000; ndS(4,1,5,4)=-20.0f; // TA/$T
ndH(4,1,1,5)=-0.7f*1000; ndS(4,1,1,5)=-0.7f; // TA/A$
ndH(4,2,5,3)=-4.0f*1000; ndS(4,2,5,3)=-10.9f; // TC/$G
ndH(4,2,1,5)=4.4f*1000; ndS(4,2,1,5)=14.9f; // TC/A$
ndH(4,3,5,2)=-4.9f*1000; ndS(4,3,5,2)=-13.8f; // TG/$C
ndH(4,3,1,5)=-1.6f*1000; ndS(4,3,1,5)=-3.6f; // TG/A$
ndH(4,4,5,1)=-0.2f*1000; ndS(4,4,5,1)=-0.5f; // TT/$A
ndH(4,4,1,5)=2.9f*1000; ndS(4,4,1,5)=10.4f; // TT/A$
return;
}
int nparam_CountGCContent(char * seq ) {
int lseq = strlen(seq);
int k;
double count = 0;
for( k=0;k<lseq;k++) {
if (seq[k] == 'G' || seq[k] == 'C' ) {
count+=1;
}
}
return count;
}
void nparam_CleanSeq (char* inseq, char* outseq, int len)
{
int seqlen = strlen (inseq);
int i, j;
if (len != 0)
seqlen = len;
outseq[0]='x';
for (i = 0, j = 0; i < seqlen && outseq[0]; i++,j++)
{
switch (inseq[i])
{
case 'a':
case '\0':
case 'A':
outseq[j] = 'A'; break;
case 'c':
case '\1':
case 'C':
outseq[j] = 'C'; break;
case 'g':
case '\2':
case 'G':
outseq[j] = 'G'; break;
case 't':
case '\3':
case 'T':
outseq[j] = 'T'; break;
default:
outseq[0]=0;
}
}
outseq[j] = '\0';
}
//Calculate TM for given sequence against its complement
double nparam_CalcSelfTM(PNNParams nparm, char* seq, int len)
{
const unsigned long long minus1 = 0xFFFFFFFFFFFFFFFFLLU;
const double NaN = *((double*)&minus1);
double thedH = 0;
//double thedS = nparam_GetInitialEntropy(nparm);
double thedS = -5.9f+nparm->rlogc;
double mtemp;
char c1;
char c2;
char c3;
char c4;
unsigned int i;
char nseq[50];
char *useq = seq;
nparam_CleanSeq (seq, nseq, len);
if (!nseq[0])
return NaN;
useq = nseq;
for ( i=1;i<len;i++)
{
c1 = GETREVCODE(useq[i-1]); //nparam_getComplement(seq[i-1],1);
c2 = GETREVCODE(useq[i]); //nparam_getComplement(seq[i],1);
c3 = GETNUMCODE(useq[i-1]);
c4 = GETNUMCODE(useq[i]);
thedH += nparm->dH[c3][c4][c1][c2];//nparam_GetEnthalpy(nparm, c3,c4,c1,c2);
thedS += nparam_GetEntropy(nparm, c3,c4,c1,c2);
}
//printf("------------------\n");
mtemp = nparam_CalcTM(thedS,thedH);
//fprintf(stderr,"Enthalpy: %f, entropy: %f, seq: %s rloc=%f\n", thedH, thedS, useq, nparm->rlogc);
//exit (0);
return mtemp;
}
double nparam_CalcTwoTM(PNNParams nparm, char* seq1, char* seq2, int len)
{
const unsigned long long minus1 = 0xFFFFFFFFFFFFFFFFLLU;
const double NaN = *((double*)&minus1);
double thedH = 0;
//double thedS = nparam_GetInitialEntropy(nparm);
double thedS = -5.9f+nparm->rlogc;
double mtemp;
char c1;
char c2;
char c3;
char c4;
unsigned int i;
char nseq1[50];
char nseq2[50];
char *useq1;
char *useq2;
nparam_CleanSeq (seq1, nseq1, len);
if (!nseq1[0])
return NaN;
useq1 = nseq1;
nparam_CleanSeq (seq2, nseq2, len);
if (!nseq2[0])
return NaN;
useq2 = nseq2;
//fprintf (stderr,"Primer : %s\n",useq);
for ( i=1;i<len;i++)
{
c1 = GETREVCODE(useq2[i-1]); //nparam_getComplement(seq[i-1],1);
c2 = GETREVCODE(useq2[i]); //nparam_getComplement(seq[i],1);
c3 = GETNUMCODE(useq1[i-1]);
c4 = GETNUMCODE(useq1[i]);
//fprintf (stderr,"Primer : %s %f %f %d %d, %d %d %f\n",useq,thedH,thedS,(int)c3,(int)c4,(int)c1,(int)c2,nparam_GetEnthalpy(nparm, c3,c4,c1,c2));
thedH += nparm->dH[c3][c4][c1][c2];//nparam_GetEnthalpy(nparm, c3,c4,c1,c2);
thedS += nparam_GetEntropy(nparm, c3,c4,c1,c2);
}
//fprintf(stderr,"------------------\n");
mtemp = nparam_CalcTM(thedS,thedH);
//if (mtemp == 0)
//{
// fprintf(stderr,"Enthalpy: %f, entropy: %f, seq: %s\n", thedH, thedS, useq);
//exit (0);
//}
return mtemp;
}
double calculateMeltingTemperatureBasic (char * seq) {
int gccount;
double temp;
int seqlen;
seqlen = strlen (seq);
gccount = nparam_CountGCContent (seq);
temp = 64.9 + 41*(gccount - 16.4)/seqlen;
return temp;
}

63
src/libthermo/nnparams.h Normal file
View File

@ -0,0 +1,63 @@
/*
* nnparams.h
* PHunterLib
*
* Nearest Neighbor Model Parameters
*
* Created by Tiayyba Riaz on 02/07/09.
*
*/
#ifndef NNPARAMS_H_
#define NNPARAMS_H_
#include <math.h>
#include <string.h>
//#include "../libecoprimer/ecoprimer.h"
// following defines to simplify coding...
#define ndH(a,b,c,d) nparm->dH[(int)a][(int)b][(int)c][(int)d]
#define ndS(a,b,c,d) nparm->dS[(int)a][(int)b][(int)c][(int)d]
#define forbidden_enthalpy 1000000000000000000.0f
#define R 1.987f
#define SALT_METHOD_SANTALUCIA 1
#define SALT_METHOD_OWCZARZY 2
#define DEF_CONC_PRIMERS 0.0000008
#define DEF_CONC_SEQUENCES 0
#define DEF_SALT 0.05
#define GETNUMCODE(a) bpencoder[a - 'A']
#define GETREVCODE(a) 5-bpencoder[a - 'A']
extern double forbidden_entropy;
typedef struct CNNParams_st
{
double Ct1;
double Ct2;
double rlogc;
double kplus;
double kfac;
int saltMethod;
double gcContent;
double new_TM;
double dH[6][6][6][6]; // A-C-G-T + gap + initiation (dangling end, $ sign)
double dS[6][6][6][6];
}CNNParams, * PNNParams;
void nparam_InitParams(PNNParams nparm, double c1, double c2, double kp, int sm);
int nparam_CountGCContent(char * seq );
double nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1);
double nparam_GetEnthalpy(PNNParams nparm, char x0, char x1, char y0, char y1);
double nparam_CalcTM(double entropy,double enthalpy);
double nparam_CalcSelfTM(PNNParams nparm, char* seq, int len);
double nparam_CalcTwoTM(PNNParams nparm, char* seq1, char* seq2, int len);
double nparam_GetInitialEntropy(PNNParams nparm) ;
double calculateMeltingTemperatureBasic (char * seq);
//void getThermoProperties (ppair_t* pairs, size_t count, poptions_t options);
#endif

View File

@ -7,6 +7,12 @@ import sys
import time
import getopt
try:
import psycopg2
_dbenable=True
except ImportError:
_dbenable=False
#####
#
#
@ -215,7 +221,56 @@ def readTaxonomyDump(taxdir):
return taxonomy,ranks,alternativeName,index
def readTaxonomyDB(dbname):
connection = psycopg2.connect(database=dbname)
cursor = connection.cursor()
cursor.execute("select numid,rank,parent from ncbi_taxonomy.taxon")
taxonomy=[list(x) for x in cursor]
cursor.execute("select rank_class from ncbi_taxonomy.taxon_rank_class order by rank_class")
ranks=cursor.fetchall()
ranks = dict(map(None,(x[0] for x in ranks),xrange(len(ranks))))
print >>sys.stderr,"Sorting taxons..."
taxonomy.sort(taxonCmp)
print >>sys.stderr,"Indexing taxonomy..."
index = {}
for t in taxonomy:
index[t[0]]=bsearchTaxon(taxonomy, t[0])
print >>sys.stderr,"Indexing parent and rank..."
for t in taxonomy:
t[1]=ranks[t[1]]
try:
t[2]=index[t[2]]
except KeyError,e:
if t[2] is None and t[0]==1:
t[2]=index[t[0]]
else:
raise e
cursor.execute("select taxid,name,category from ncbi_taxonomy.name")
alternativeName=[]
for taxid,name,classname in cursor:
alternativeName.append((name,classname,index[taxid]))
if classname == 'scientific name':
taxonomy[index[taxid]].append(name)
cursor.execute("select old_numid,current_numid from ncbi_taxonomy.taxon_id_alias")
print >>sys.stderr,"Adding taxid alias..."
for taxid,current in cursor:
if current is not None:
index[taxid]=index[current]
else:
index[taxid]=None
return taxonomy,ranks,alternativeName,index
#####
#
#
@ -293,16 +348,19 @@ def emblEntryParser(entry):
######################
_fastaSplit=re.compile(';\W*')
def parseFasta(seq):
seq=seq.split('\n')
title = seq[0].strip()[1:].split(None,1)
id=title[0]
if len(title) == 2:
field = title[1].split('; ')
field = _fastaSplit.split(title[1])
else:
field=[]
info = dict(x.split('=') for x in field if '=' in x)
info = dict(x.split('=',1) for x in field if '=' in x)
definition = ' '.join([x for x in field if '=' not in x])
seq=(''.join([x.strip() for x in seq[1:]])).upper()
seq=(''.join([x.strip() for x in seq[1:]])).upper()
return id,seq,definition,info
@ -527,9 +585,10 @@ def ecoParseOptions(arguments):
}
o,filenames = getopt.getopt(arguments,
'ht:n:gfe',
'ht:T:n:gfe',
['help',
'taxonomy=',
'taxonomy_db=',
'name=',
'genbank',
'fasta',
@ -540,7 +599,11 @@ def ecoParseOptions(arguments):
printHelp()
exit()
elif name in ('-t','--taxonomy'):
opt['taxmod']='dump'
opt['taxdir']=value
elif name in ('-T','--taxonomy_db'):
opt['taxmod']='db'
opt['taxdb']=value
elif name in ('-n','--name'):
opt['prefix']=value
elif name in ('-g','--genbank'):
@ -578,7 +641,11 @@ if __name__ == '__main__':
opt,filenames = ecoParseOptions(sys.argv[1:])
taxonomy = readTaxonomyDump(opt['taxdir'])
if opt['taxmod']=='dump':
taxonomy = readTaxonomyDump(opt['taxdir'])
elif opt['taxmod']=='db':
taxonomy = readTaxonomyDB(opt['taxdb'])
ecoDBWriter(opt['prefix'], taxonomy, filenames, opt['parser'])