16 Commits

Author SHA1 Message Date
68c4743303 added print help
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/branches/eric-test@198 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2009-04-14 11:06:26 +00:00
035495b2a1 fixed twalk action
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/branches/eric-test@197 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2009-03-26 15:15:08 +00:00
3c21789533 Removed some problems for specificity
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/branches/eric-test@196 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2009-03-26 00:22:01 +00:00
75a6dac09a git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/branches/eric-test@195 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2009-03-25 10:18:34 +00:00
6726294c78 Removed compilation errors and added new implementation of ecoComplementChar
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/branches/eric-test@194 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2009-03-22 17:44:59 +00:00
584d3c406d added compare function for amplifias
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/branches/eric-test@193 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2009-03-20 11:59:55 +00:00
0440b8d761 Switch strict_three_prime option default value to 0 as a workaround bug patch
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/branches/eric-test@192 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2009-03-18 10:04:20 +00:00
db061fbb12 Patch print function for reverse primer
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/branches/eric-test@191 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2009-03-17 10:44:00 +00:00
ac52f3303d with full coverage statistic
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/branches/eric-test@190 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2009-03-15 21:53:24 +00:00
29820c1e26 with full coverage statistic
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/branches/eric-test@189 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2009-03-15 21:52:57 +00:00
5d212d5753 with full coverage statistic
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/branches/eric-test@188 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2009-03-15 21:52:29 +00:00
9908a40aaa with full coverage statistic
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/branches/eric-test@187 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2009-03-15 21:52:06 +00:00
665b22989f first version with preliminary print function
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/branches/eric-test@186 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2009-03-12 09:55:01 +00:00
d911d6bd20 git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/branches/eric-test@185 60f365c0-8329-0410-b2a4-ec073aeeaa1d 2009-03-10 08:49:11 +00:00
dffebd5826 New version of pairing algorithm (alpha)
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/branches/eric-test@184 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2009-03-09 11:05:51 +00:00
2be5f2659b algo change test for eric
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/branches/eric-test@183 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2009-03-06 07:25:02 +00:00
61 changed files with 874 additions and 5766 deletions

364
.cproject
View File

@ -1,221 +1,151 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?> <?xml version="1.0" encoding="UTF-8"?>
<?fileVersion 4.0.0?> <?fileVersion 4.0.0?>
<cproject> <cproject>
<storageModule moduleId="org.eclipse.cdt.core.settings"> <storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.toolchain.gnu.macosx.base.2134184396"> <cconfiguration id="cdt.managedbuild.toolchain.gnu.macosx.base.2134184396">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.toolchain.gnu.macosx.base.2134184396" moduleId="org.eclipse.cdt.core.settings" name="MacOSX GCC"> <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.toolchain.gnu.macosx.base.2134184396" moduleId="org.eclipse.cdt.core.settings" name="MacOSX GCC">
<externalSettings/> <externalSettings/>
<extensions> <extensions>
<extension id="org.eclipse.cdt.core.MachO" point="org.eclipse.cdt.core.BinaryParser"/> <extension id="org.eclipse.cdt.core.MachO" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/> <extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/> <extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/> <extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/> <extension id="org.eclipse.cdt.core.MakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/> </extensions>
</extensions> </storageModule>
</storageModule> <storageModule moduleId="cdtBuildSystem" version="4.0.0">
<storageModule moduleId="cdtBuildSystem" version="4.0.0"> <configuration artifactName="ecoPrimers" buildProperties="" id="cdt.managedbuild.toolchain.gnu.macosx.base.2134184396" name="MacOSX GCC" parent="org.eclipse.cdt.build.core.emptycfg">
<configuration artifactName="ecoPrimers" buildProperties="" description="" id="cdt.managedbuild.toolchain.gnu.macosx.base.2134184396" name="MacOSX GCC" parent="org.eclipse.cdt.build.core.emptycfg"> <folderInfo id="cdt.managedbuild.toolchain.gnu.macosx.base.2134184396.1840911077" name="/" resourcePath="">
<folderInfo id="cdt.managedbuild.toolchain.gnu.macosx.base.2134184396.1840911077" name="/" resourcePath=""> <toolChain id="cdt.managedbuild.toolchain.gnu.macosx.base.766054112" name="cdt.managedbuild.toolchain.gnu.macosx.base" superClass="cdt.managedbuild.toolchain.gnu.macosx.base">
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.base.766054112" name="cdt.managedbuild.toolchain.gnu.macosx.base" superClass="cdt.managedbuild.toolchain.gnu.macosx.base"> <targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.MachO" id="cdt.managedbuild.target.gnu.platform.macosx.base.2057035265" name="Debug Platform" osList="macosx" superClass="cdt.managedbuild.target.gnu.platform.macosx.base"/>
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.MachO" id="cdt.managedbuild.target.gnu.platform.macosx.base.2057035265" name="Debug Platform" osList="macosx" superClass="cdt.managedbuild.target.gnu.platform.macosx.base"/> <builder id="cdt.managedbuild.target.gnu.builder.macosx.base.783726363" managedBuildOn="false" name="Gnu Make Builder.MacOSX GCC" superClass="cdt.managedbuild.target.gnu.builder.macosx.base"/>
<builder id="cdt.managedbuild.target.gnu.builder.macosx.base.783726363" keepEnvironmentInBuildfile="false" managedBuildOn="false" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.base"/> <tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.base.914103467" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.base">
<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.base.914103467" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.base"> <inputType id="cdt.managedbuild.tool.macosx.c.linker.input.62980206" superClass="cdt.managedbuild.tool.macosx.c.linker.input">
<inputType id="cdt.managedbuild.tool.macosx.c.linker.input.62980206" superClass="cdt.managedbuild.tool.macosx.c.linker.input"> <additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/> <additionalInput kind="additionalinput" paths="$(LIBS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/> </inputType>
</inputType> </tool>
</tool> <tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.base.691108439" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.base"/>
<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.base.691108439" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.base"/> <tool id="cdt.managedbuild.tool.gnu.assembler.macosx.base.695639877" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.base">
<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.base.695639877" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.base"> <inputType id="cdt.managedbuild.tool.gnu.assembler.input.1507665054" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
<option id="gnu.both.asm.option.include.paths.1544375094" name="Include paths (-I)" superClass="gnu.both.asm.option.include.paths" valueType="includePath"/> </tool>
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1507665054" superClass="cdt.managedbuild.tool.gnu.assembler.input"/> <tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.1786370580" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
</tool> <tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.base.454329831" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.base"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.1786370580" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/> <tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.base.1928774909" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.base">
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.base.454329831" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.base"/> <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.330854350" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.base.1928774909" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.base"> </tool>
<option id="gnu.c.compiler.option.include.paths.823251305" superClass="gnu.c.compiler.option.include.paths" valueType="includePath"> </toolChain>
<listOptionValue builtIn="false" value="/usr/include"/> </folderInfo>
</option> </configuration>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.330854350" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/> </storageModule>
</tool> <storageModule moduleId="scannerConfiguration">
</toolChain> <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile"/>
</folderInfo> <profile id="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile">
</configuration> <buildOutputProvider>
</storageModule> <openAction enabled="true" filePath=""/>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/> <parser enabled="true"/>
<storageModule moduleId="org.eclipse.cdt.core.language.mapping"/> </buildOutputProvider>
<storageModule moduleId="scannerConfiguration"> <scannerInfoProvider id="specsFile">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile"/> <runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile"> <parser enabled="true"/>
<buildOutputProvider> </scannerInfoProvider>
<openAction enabled="true" filePath=""/> </profile>
<parser enabled="true"/> <profile id="org.eclipse.cdt.make.core.GCCStandardMakePerFileProfile">
</buildOutputProvider> <buildOutputProvider>
<scannerInfoProvider id="specsFile"> <openAction enabled="true" filePath=""/>
<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/> <parser enabled="true"/>
<parser enabled="true"/> </buildOutputProvider>
</scannerInfoProvider> <scannerInfoProvider id="makefileGenerator">
</profile> <runAction arguments="-f ${project_name}_scd.mk" command="make" useDefault="true"/>
<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerFileProfile"> <parser enabled="true"/>
<buildOutputProvider> </scannerInfoProvider>
<openAction enabled="true" filePath=""/> </profile>
<parser enabled="true"/> <profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfile">
</buildOutputProvider> <buildOutputProvider>
<scannerInfoProvider id="makefileGenerator"> <openAction enabled="true" filePath=""/>
<runAction arguments="-f ${project_name}_scd.mk" command="make" useDefault="true"/> <parser enabled="true"/>
<parser enabled="true"/> </buildOutputProvider>
</scannerInfoProvider> <scannerInfoProvider id="specsFile">
</profile> <runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfile"> <parser enabled="true"/>
<buildOutputProvider> </scannerInfoProvider>
<openAction enabled="true" filePath=""/> </profile>
<parser enabled="true"/> <profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP">
</buildOutputProvider> <buildOutputProvider>
<scannerInfoProvider id="specsFile"> <openAction enabled="true" filePath=""/>
<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/> <parser enabled="true"/>
<parser enabled="true"/> </buildOutputProvider>
</scannerInfoProvider> <scannerInfoProvider id="specsFile">
</profile> <runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"> <parser enabled="true"/>
<buildOutputProvider> </scannerInfoProvider>
<openAction enabled="true" filePath=""/> </profile>
<parser enabled="true"/> <profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC">
</buildOutputProvider> <buildOutputProvider>
<scannerInfoProvider id="specsFile"> <openAction enabled="true" filePath=""/>
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/> <parser enabled="true"/>
<parser enabled="true"/> </buildOutputProvider>
</scannerInfoProvider> <scannerInfoProvider id="specsFile">
</profile> <runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"> <parser enabled="true"/>
<buildOutputProvider> </scannerInfoProvider>
<openAction enabled="true" filePath=""/> </profile>
<parser enabled="true"/> <profile id="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfile">
</buildOutputProvider> <buildOutputProvider>
<scannerInfoProvider id="specsFile"> <openAction enabled="true" filePath=""/>
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/> <parser enabled="true"/>
<parser enabled="true"/> </buildOutputProvider>
</scannerInfoProvider> <scannerInfoProvider id="specsFile">
</profile> <runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
<profile id="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfile"> <parser enabled="true"/>
<buildOutputProvider> </scannerInfoProvider>
<openAction enabled="true" filePath=""/> </profile>
<parser enabled="true"/> <profile id="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfileCPP">
</buildOutputProvider> <buildOutputProvider>
<scannerInfoProvider id="specsFile"> <openAction enabled="true" filePath=""/>
<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/> <parser enabled="true"/>
<parser enabled="true"/> </buildOutputProvider>
</scannerInfoProvider> <scannerInfoProvider id="specsFile">
</profile> <runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
<profile id="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfileCPP"> <parser enabled="true"/>
<buildOutputProvider> </scannerInfoProvider>
<openAction enabled="true" filePath=""/> </profile>
<parser enabled="true"/> <profile id="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfileC">
</buildOutputProvider> <buildOutputProvider>
<scannerInfoProvider id="specsFile"> <openAction enabled="true" filePath=""/>
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/> <parser enabled="true"/>
<parser enabled="true"/> </buildOutputProvider>
</scannerInfoProvider> <scannerInfoProvider id="specsFile">
</profile> <runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
<profile id="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfileC"> <parser enabled="true"/>
<buildOutputProvider> </scannerInfoProvider>
<openAction enabled="true" filePath=""/> </profile>
<parser enabled="true"/> <profile id="org.eclipse.cdt.managedbuilder.xlc.core.XLCManagedMakePerProjectProfile">
</buildOutputProvider> <buildOutputProvider>
<scannerInfoProvider id="specsFile"> <openAction enabled="false" filePath=""/>
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/> <parser enabled="false"/>
<parser enabled="true"/> </buildOutputProvider>
</scannerInfoProvider> <scannerInfoProvider id="specsFile">
</profile> <runAction arguments="-E -v ${plugin_state_location}/${specs_file}" command="${XL_compilerRoot}/xlc" useDefault="true"/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.toolchain.gnu.macosx.base.2134184396;cdt.managedbuild.toolchain.gnu.macosx.base.2134184396.1840911077;cdt.managedbuild.tool.gnu.c.compiler.macosx.base.1928774909;cdt.managedbuild.tool.gnu.c.compiler.input.330854350"> <parser enabled="true"/>
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/> </scannerInfoProvider>
<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile"> </profile>
<buildOutputProvider> <profile id="org.eclipse.cdt.managedbuilder.xlc.core.XLCManagedMakePerProjectProfileCPP">
<openAction enabled="true" filePath=""/> <buildOutputProvider>
<parser enabled="true"/> <openAction enabled="false" filePath=""/>
</buildOutputProvider> <parser enabled="false"/>
<scannerInfoProvider id="specsFile"> </buildOutputProvider>
<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/> <scannerInfoProvider id="specsFile">
<parser enabled="true"/> <runAction arguments="-E -v ${plugin_state_location}/${specs_file}" command="${XL_compilerRoot}/xlC" useDefault="true"/>
</scannerInfoProvider> <parser enabled="true"/>
</profile> </scannerInfoProvider>
<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerFileProfile"> </profile>
<buildOutputProvider> </storageModule>
<openAction enabled="true" filePath=""/> <storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
<parser enabled="true"/> </cconfiguration>
</buildOutputProvider> </storageModule>
<scannerInfoProvider id="makefileGenerator"> <storageModule moduleId="cdtBuildSystem" version="4.0.0">
<runAction arguments="-f ${project_name}_scd.mk" command="make" useDefault="true"/> <project id="ecoPrimers.null.1292969001" name="ecoPrimers"/>
<parser enabled="true"/> </storageModule>
</scannerInfoProvider>
</profile>
<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfile">
<buildOutputProvider>
<openAction enabled="true" filePath=""/>
<parser enabled="true"/>
</buildOutputProvider>
<scannerInfoProvider id="specsFile">
<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
<parser enabled="true"/>
</scannerInfoProvider>
</profile>
<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP">
<buildOutputProvider>
<openAction enabled="true" filePath=""/>
<parser enabled="true"/>
</buildOutputProvider>
<scannerInfoProvider id="specsFile">
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
<parser enabled="true"/>
</scannerInfoProvider>
</profile>
<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC">
<buildOutputProvider>
<openAction enabled="true" filePath=""/>
<parser enabled="true"/>
</buildOutputProvider>
<scannerInfoProvider id="specsFile">
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
<parser enabled="true"/>
</scannerInfoProvider>
</profile>
<profile id="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfile">
<buildOutputProvider>
<openAction enabled="true" filePath=""/>
<parser enabled="true"/>
</buildOutputProvider>
<scannerInfoProvider id="specsFile">
<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
<parser enabled="true"/>
</scannerInfoProvider>
</profile>
<profile id="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfileCPP">
<buildOutputProvider>
<openAction enabled="true" filePath=""/>
<parser enabled="true"/>
</buildOutputProvider>
<scannerInfoProvider id="specsFile">
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
<parser enabled="true"/>
</scannerInfoProvider>
</profile>
<profile id="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfileC">
<buildOutputProvider>
<openAction enabled="true" filePath=""/>
<parser enabled="true"/>
</buildOutputProvider>
<scannerInfoProvider id="specsFile">
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
<parser enabled="true"/>
</scannerInfoProvider>
</profile>
</scannerConfigBuildInfo>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.internal.ui.text.commentOwnerProjectMappings"/>
</cconfiguration>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<project id="ecoPrimers.null.1292969001" name="ecoPrimers"/>
</storageModule>
</cproject> </cproject>

16
.gitignore vendored
View File

@ -1,16 +0,0 @@
# /src/
/src/*.P
/src/*.a
/src/ecoPrimer
# /src/libecoPCR/
/src/libecoPCR/*.P
/src/libecoPCR/*.a
# /src/libecoprimer/
/src/libecoprimer/*.P
/src/libecoprimer/*.a
# /src/libthermo/
/src/libthermo/*.P

View File

@ -5,26 +5,45 @@
<projects> <projects>
</projects> </projects>
<buildSpec> <buildSpec>
<buildCommand>
<name>org.python.pydev.PyDevBuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand> <buildCommand>
<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name> <name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
<triggers>clean,full,incremental,</triggers> <triggers>clean,full,incremental,</triggers>
<arguments> <arguments>
<dictionary>
<key>org.eclipse.cdt.make.core.fullBuildTarget</key>
<value>all</value>
</dictionary>
<dictionary> <dictionary>
<key>?name?</key> <key>?name?</key>
<value></value> <value></value>
</dictionary> </dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.enableAutoBuild</key>
<value>false</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.enableFullBuild</key>
<value>true</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.enableCleanBuild</key>
<value>true</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.cleanBuildTarget</key>
<value>clean</value>
</dictionary>
<dictionary> <dictionary>
<key>org.eclipse.cdt.make.core.append_environment</key> <key>org.eclipse.cdt.make.core.append_environment</key>
<value>true</value> <value>true</value>
</dictionary> </dictionary>
<dictionary> <dictionary>
<key>org.eclipse.cdt.make.core.autoBuildTarget</key> <key>org.eclipse.cdt.make.core.contents</key>
<value>all</value> <value>org.eclipse.cdt.make.core.activeConfigSettings</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.useDefaultBuildCmd</key>
<value>true</value>
</dictionary> </dictionary>
<dictionary> <dictionary>
<key>org.eclipse.cdt.make.core.buildArguments</key> <key>org.eclipse.cdt.make.core.buildArguments</key>
@ -35,37 +54,13 @@
<value>make</value> <value>make</value>
</dictionary> </dictionary>
<dictionary> <dictionary>
<key>org.eclipse.cdt.make.core.cleanBuildTarget</key> <key>org.eclipse.cdt.make.core.autoBuildTarget</key>
<value>clean</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.contents</key>
<value>org.eclipse.cdt.make.core.activeConfigSettings</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.enableAutoBuild</key>
<value>false</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.enableCleanBuild</key>
<value>true</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.enableFullBuild</key>
<value>true</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.fullBuildTarget</key>
<value>all</value> <value>all</value>
</dictionary> </dictionary>
<dictionary> <dictionary>
<key>org.eclipse.cdt.make.core.stopOnError</key> <key>org.eclipse.cdt.make.core.stopOnError</key>
<value>true</value> <value>true</value>
</dictionary> </dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.useDefaultBuildCmd</key>
<value>true</value>
</dictionary>
</arguments> </arguments>
</buildCommand> </buildCommand>
<buildCommand> <buildCommand>
@ -78,6 +73,5 @@
<nature>org.eclipse.cdt.core.cnature</nature> <nature>org.eclipse.cdt.core.cnature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature> <nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature> <nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
<nature>org.python.pydev.pythonNature</nature>
</natures> </natures>
</projectDescription> </projectDescription>

View File

@ -1,7 +0,0 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?eclipse-pydev version="1.0"?>
<pydev_project>
<pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Python 2.6</pydev_property>
<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.6</pydev_property>
</pydev_project>

View File

@ -1 +0,0 @@
0.5

View File

@ -1,4 +1,4 @@
EXEC=ecoPrimers EXEC=ecoPrimer
PRIMER_SRC= ecoprimer.c PRIMER_SRC= ecoprimer.c
PRIMER_OBJ= $(patsubst %.c,%.o,$(PRIMER_SRC)) PRIMER_OBJ= $(patsubst %.c,%.o,$(PRIMER_SRC))
@ -6,12 +6,10 @@ PRIMER_OBJ= $(patsubst %.c,%.o,$(PRIMER_SRC))
SRCS= $(PRIMER_SRC) SRCS= $(PRIMER_SRC)
LIB= -lecoprimer -lecoPCR -lthermo -lz -lm LIB= -lecoPCR -lecoprimer -lz -lm
LIBFILE= libecoPCR/libecoPCR.a \ LIBFILE= libecoPCR/libecoPCR.a \
libecoprimer/libecoprimer.a \ libecoprimer/libecoprimer.a
libthermo/libthermo.a \
include global.mk include global.mk
@ -27,8 +25,8 @@ all: $(EXEC)
# executable compilation and link # executable compilation and link
ecoPrimers: $(PRIMER_OBJ) $(LIBFILE) ecoPrimer: $(PRIMER_OBJ) $(LIBFILE)
$(CC) -g $(LDFLAGS) -O5 -m64 -o $@ $< $(LIBPATH) $(LIB) $(CC) $(LDFLAGS) -O5 -m64 -fast -o $@ $< $(LIBPATH) $(LIB)
######## ########
@ -43,8 +41,6 @@ libecoPCR/libecoPCR.a:
libecoprimer/libecoprimer.a: libecoprimer/libecoprimer.a:
$(MAKE) -C libecoprimer $(MAKE) -C libecoprimer
libthermo/libthermo.a:
$(MAKE) -C libthermo
######## ########
# #
@ -54,26 +50,9 @@ libthermo/libthermo.a:
clean: clean:
rm -f *.o rm -f *.o
rm -f *.P
rm -f $(EXEC) rm -f $(EXEC)
$(MAKE) -C libecoPCR clean $(MAKE) -C libecoPCR clean
$(MAKE) -C libecoprimer clean $(MAKE) -C libecoprimer clean
$(MAKE) -C libthermo clean
########
#
# clean for k2 to remove .o and .P files
#
########
k2clean:
rm -f *.o
rm -f *.P
rm -f libecoPCR/*.o
rm -f libecoPCR/*.P
rm -f libecoprimer/*.o
rm -f libecoprimer/*.P
rm -f libthermo/*.o
rm -f libthermo/*.P

BIN
src/ecoPrimer Executable file

Binary file not shown.

View File

@ -6,8 +6,6 @@
*/ */
#include "libecoprimer/ecoprimer.h" #include "libecoprimer/ecoprimer.h"
#include "libecoprimer/PrimerSets.h"
#include "libecoprimer/ahocorasick.h"
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
#include <ctype.h> #include <ctype.h>
@ -15,22 +13,10 @@
#include <getopt.h> #include <getopt.h>
#include <time.h> #include <time.h>
#include <sys/time.h> #include <sys/time.h>
#include <dlfcn.h>
#include"libthermo/nnparams.h"
#include"libthermo/thermostats.h"
#define VERSION "0.1"
#define VERSION "0.5"
/* TR: by default, statistics are made on species level*/ /* TR: by default, statistics are made on species level*/
#define DEFAULTTAXONRANK "species" #define DEFULTTAXONRANK "species"
static int cmpprintedpairs(const void* p1,const void* p2);
//float _Z27calculateMeltingTemperature_ (char * seq1, char * seq2);
pwordcount_t reduce_words_to_debug (pwordcount_t words, poptions_t options);
void print_wordwith_positions (primer_t prm, uint32_t seqdbsize, poptions_t options);
void* lib_handle = NULL;
float (*calcMelTemp)(char*, char*);
/* ----------------------------------------------- */ /* ----------------------------------------------- */
/* printout help */ /* printout help */
@ -57,11 +43,10 @@ static void PrintHelp()
PP " database radical without any extension. For example /ecoPrimerDB/fstvert\n\n"); PP " database radical without any extension. For example /ecoPrimerDB/fstvert\n\n");
PP "-e : [E]rror : max error allowed by oligonucleotide (0 by default)\n\n"); PP "-e : [E]rror : max error allowed by oligonucleotide (0 by default)\n\n");
PP "-h : [H]elp - print <this> help\n\n"); PP "-h : [H]elp - print <this> help\n\n");
PP "-i : [I]gnore the given taxonomy id (define the counterexample taxon set).\n\n"); PP "-i : [I]gnore the given taxonomy id.\n\n");
PP "-l : minimum [L]ength : define the minimum amplication length. \n\n"); PP "-l : minimum [L]ength : define the minimum amplication length. \n\n");
PP "-L : maximum [L]ength : define the maximum amplicationlength. \n\n"); PP "-L : maximum [L]ength : define the maximum amplicationlength. \n\n");
PP "-r : [R]estricts the search to the given taxonomic id (restrict the example taxon set).\n\n"); PP "-r : [R]estricts the search to the given taxonomic id.\n\n");
PP "-E : [E]xception taxid allows to indicate than some subclade of example sequences are conterexamples.\n\n");
PP "-c : Consider that the database sequences are [c]ircular\n\n"); PP "-c : Consider that the database sequences are [c]ircular\n\n");
PP "-3 : Three prime strict match\n\n"); PP "-3 : Three prime strict match\n\n");
PP "-q : Strict matching [q]uorum, percentage of the sequences in which strict primers are found. By default it is 70\n\n"); PP "-q : Strict matching [q]uorum, percentage of the sequences in which strict primers are found. By default it is 70\n\n");
@ -69,41 +54,25 @@ static void PrintHelp()
PP "-t : required [t]axon level for results, by default the results are computed at species level\n\n"); PP "-t : required [t]axon level for results, by default the results are computed at species level\n\n");
PP "-x : false positive quorum\n\n"); PP "-x : false positive quorum\n\n");
PP "-D : set in [d]ouble strand mode\n\n"); PP "-D : set in [d]ouble strand mode\n\n");
PP "-O : set the primer length (default 18) \n\n");
PP "-S : Set in [s]ingle strand mode\n\n"); PP "-S : Set in [s]ingle strand mode\n\n");
PP "-m : Salt correction method for Tm computation (SANTALUCIA : 1 or OWCZARZY:2, default=1)\n\n");
PP "-a : Salt contentration in M for Tm computation (default 0.05 M)\n\n");
PP "-U : No multi match\n\n"); PP "-U : No multi match\n\n");
PP "-R : Define the [R]eference sequence identifier (must be part of example set)\n\n");
PP "-A : Print the list of all identifier of sequences present in the database\n\n");
PP "-f : Remove data mining step during strict primer identification\n\n");
PP "-v : Store statistic file about memory usage during strict primer identification\n\n");
PP "-p : Print sets of primers (may take several minutes after primers have been designed!)\n\n");
PP "-T : Ignore pairs having specificity below this Threshold\n\n");
PP "\n"); PP "\n");
PP "------------------------------------------\n"); PP "------------------------------------------\n");
PP "Table result description : \n"); PP "Table result description : \n");
PP "column 1 : serial number\n"); PP "column 1 : serial number\n");
PP "column 2 : primer1\n"); PP "column 2 : primer1\n");
PP "column 3 : primer2\n"); PP "column 3 : primer2\n");
PP "column 4 : primer1 Tm without mismatch\n"); PP "column 4 : good/bad\n");
PP "column 5 : primer1 lowest Tm against exemple sequences\n"); PP "column 5 : in sequence count\n");
PP "column 6 : primer2 Tm without mismatch\n"); PP "column 6 : out sequence count\n");
PP "column 7 : primer2 lowest Tm against exemple sequences\n"); PP "column 7 : yule\n");
PP "column 8 : primer1 G+C count\n"); PP "column 8 : in taxa count\n");
PP "column 9 : primer2 G+C count\n"); PP "column 9 : out taxa count\n");
PP "column 10 : good/bad\n"); PP "column 10 : coverage\n");
PP "column 11 : amplified example sequence count\n"); PP "column 11 : specificity\n");
PP "column 12 : amplified counterexample sequence count\n"); PP "column 12 : minimum amplified length\n");
PP "column 13 : yule\n"); PP "column 13 : maximum amplified length\n");
PP "column 14 : amplified example taxa count\n"); PP "column 14 : average amplified length\n");
PP "column 15 : amplified counterexample taxa count\n");
PP "column 16 : ratio of amplified example taxa versus all example taxa (Bc index)\n");
PP "column 17 : unambiguously identified example taxa count\n");
PP "column 18 : ratio of specificity unambiguously identified example taxa versus all example taxa (Bs index)\n");
PP "column 19 : minimum amplified length\n");
PP "column 20 : maximum amplified length\n");
PP "column 21 : average amplified length\n");
PP "------------------------------------------\n"); PP "------------------------------------------\n");
PP " http://www.grenoble.prabi.fr/trac/ecoPrimer/\n"); PP " http://www.grenoble.prabi.fr/trac/ecoPrimer/\n");
PP "------------------------------------------\n\n"); PP "------------------------------------------\n\n");
@ -124,18 +93,13 @@ static void ExitUsage(int stat)
void initoptions(poptions_t options) void initoptions(poptions_t options)
{ {
options->statistics=FALSE;
options->filtering=TRUE;
options->lmin=0; //< Amplifia minimal length options->lmin=0; //< Amplifia minimal length
options->lmax=1000; //< Amplifia maximal length options->lmax=0; //< Amplifia maximal length
options->error_max=3; //**< maximum error count in fuzzy search options->error_max=3; //**< maximum error count in fuzzy search
options->primer_length=18; //**< minimal length of the primers options->primer_length=18; //**< minimal length of the primers
options->restricted_taxid=NULL; //**< limit amplification below these taxid options->restricted_taxid=NULL; //**< limit amplification below these taxid
options->ignored_taxid=NULL; //**< no amplification below these taxid options->ignored_taxid=NULL; //**< no amplification below these taxid
options->exception_taxid=NULL; //**< no amplification below these taxid
options->prefix=NULL; options->prefix=NULL;
options->reference=NULL;
options->refseq=NULL;
options->circular=0; options->circular=0;
options->doublestrand=1; options->doublestrand=1;
options->strict_quorum=0.7; options->strict_quorum=0.7;
@ -145,193 +109,84 @@ void initoptions(poptions_t options)
options->strict_three_prime=0; options->strict_three_prime=0;
options->r=0; options->r=0;
options->g=0; options->g=0;
options->e=0;
options->no_multi_match=FALSE; options->no_multi_match=FALSE;
options->pnparm = NULL; strcpy(options->taxonrank, DEFULTTAXONRANK); /*taxon level for results, species by default*/
strcpy(options->taxonrank, DEFAULTTAXONRANK); /*taxon level for results, species by default*/
options->saltmethod = SALT_METHOD_SANTALUCIA;
options->salt = DEF_SALT;
options->printAC=FALSE;
options->print_sets_of_primers = FALSE;
options->specificity_threshold = 0.6;
options->links_cnt = 1;
options->max_links_percent = -1; /*graph only those primers having maximum 15% links*/
options->filter_on_links = FALSE;
} }
void printcurrenttime ()
{
time_t now;
struct tm *ts;
char buf[80];
/* Get the current time */
now = time(NULL);
/* Format and print the time, "ddd yyyy-mm-dd hh:mm:ss zzz" */
ts = localtime(&now);
strftime(buf, sizeof(buf), "%a %Y-%m-%d %H:%M:%S %Z", ts);
fprintf(stderr,"#%d#, %s\n",(int)now, buf);
}
void printcurrenttimeinmilli()
{
struct timeval tv;
struct timezone tz;
struct tm *tm;
gettimeofday(&tv, &tz);
tm=localtime(&tv.tv_sec);
fprintf(stderr, " %d:%02d:%02d %d %d \n", tm->tm_hour, tm->tm_min,
tm->tm_sec, tv.tv_usec, tv.tv_usec/1000);
}
/*TR: Added*/
void printapair(int32_t index,ppair_t pair, poptions_t options) void printapair(int32_t index,ppair_t pair, poptions_t options)
{ {
bool_t asdirect1=pair->asdirect1; uint32_t wellidentifiedtaxa;
bool_t asdirect2=pair->asdirect2;
bool_t asdirecttmp;
word_t w1=pair->p1->word;
word_t w2=pair->p2->word;
word_t wtmp;
bool_t good1=pair->p1->good;
bool_t good2=pair->p2->good;
bool_t goodtmp;
bool_t strand;
uint32_t i, j;
float temp;
CNNParams nnparams;
//nparam_InitParams(&nnparams, DEF_CONC_PRIMERS,DEF_CONC_SEQUENCES,DEF_SALT,SALT_METHOD_SANTALUCIA);
char *c;
char p1[32];
char p2[32];
if (!asdirect1)
w1=ecoComplementWord(w1,options->primer_length);
if (!asdirect2)
w2=ecoComplementWord(w2,options->primer_length);
if (w2 < w1)
{
wtmp=w1;
w1=w2;
w2=wtmp;
asdirecttmp=asdirect1;
asdirect1=asdirect2;
asdirect2=asdirecttmp;
goodtmp=good1;
good1=good2;
good2=goodtmp;
}
//print serial number
printf("%6d\t",index); printf("%6d\t",index);
if (pair->asdirect1)
printf("%s\t",ecoUnhashWord(pair->p1->word,options->primer_length));
else
printf("%s\t",ecoUnhashWord(ecoComplementWord(pair->p1->word,
options->primer_length),options->primer_length));
c = ecoUnhashWord(w1,options->primer_length); if (pair->asdirect2)
strcpy (p1, c); printf("%s",ecoUnhashWord(pair->p2->word,options->primer_length));
c = ecoUnhashWord(w2,options->primer_length); else
strcpy (p2, c); printf("%s",ecoUnhashWord(ecoComplementWord(pair->p2->word,
options->primer_length),options->primer_length));
//print primer1 printf("\t%c%c", "bG"[(int)pair->p1->good],"bG"[(int)pair->p2->good]);
printf("%s\t", p1);
//print primer2
printf("%s", p2);
//print primer1 melting temperature
printf ("\t%3.1f", pair->p1temp);
//print minimum melting temperature of approximate versions of primer1
printf ("\t%3.1f", pair->p1mintemp);
//print primer2 melting temperature
printf ("\t%3.1f", pair->p2temp);
//print minimum melting temperature of approximate versions of primer2
printf ("\t%3.1f", pair->p2mintemp);
//print gc contents of primer1
printf ("\t%d",nparam_CountGCContent(p1));
//print gc contents of primer2
printf ("\t%d",nparam_CountGCContent(p2));
//print good/bad pair indicator
printf("\t%c%c", "bG"[(int)good1],"bG"[(int)good2]);
//print inexample count
printf("\t%d", pair->inexample); printf("\t%d", pair->inexample);
//print out example count
printf("\t%d", pair->outexample); printf("\t%d", pair->outexample);
//print yule
printf("\t%4.3f", pair->yule); printf("\t%4.3f", pair->yule);
//print in taxa count
printf("\t%d", pair->intaxa); printf("\t%d", pair->intaxa);
//print out taxa count
printf("\t%d", pair->outtaxa); printf("\t%d", pair->outtaxa);
printf("\t%4.3f", (float)pair->intaxa/options->intaxa);
//print coverage wellidentifiedtaxa = (pair->intaxa + pair->outtaxa) - pair->notwellidentifiedtaxa;
printf("\t%4.3f", (float)pair->bc);
//print well identified taxa count //printf("\t%d", pair->notwellidentifiedtaxa);
printf("\t%d", pair->intaxa - pair->notwellidentifiedtaxa);
//printf("\t%d", (pair->intaxa + pair->outtaxa));
printf("\t%4.3f", (float)wellidentifiedtaxa/(options->intaxa + options->outtaxa));
//print specificity
printf("\t%4.3f", pair->bs);
//print min amplifia lenght
printf("\t%d", pair->mind); printf("\t%d", pair->mind);
//print max amplifia lenght
printf("\t%d", pair->maxd); printf("\t%d", pair->maxd);
printf("\t%3.2f\n", (float)pair->sumd/pair->inexample);
//print average amplifia lenght
printf("\t%3.2f", (float)pair->sumd/pair->amplifiacount);
//print amplifia information about reference sequence if specified
if (options->refseq && pair->refsequence >=0)
{
printf("\t%s:",options->reference);
strand = pair->pcr.amplifias[pair->refsequence].strand;
if (strand)
printf("join(");
else
printf("complement(");
printf("%d..%d,%d..%d",pair->pcr.amplifias[pair->refsequence].begin - options->primer_length + 1,
pair->pcr.amplifias[pair->refsequence].begin,
pair->pcr.amplifias[pair->refsequence].end + 2,
pair->pcr.amplifias[pair->refsequence].end + options->primer_length + 1
);
printf(")");
printf("\t");
for (c=pair->pcr.amplifias[pair->refsequence].amplifia,
i=pair->pcr.amplifias[pair->refsequence].begin;
i<=pair->pcr.amplifias[pair->refsequence].end;
i++,
c+=(strand)? 1:-1)
printf("%c","acgt"[(strand)? (*c):(~*c)&3]);
}
else
printf("\t\t");
/* j=0;
for (i=0; i<options->dbsize; i++)
if (pair->wellIdentifiedSeqs[i] == 1)
j++;
printf("%d", j);*/
printf("\n");
} }
static int cmpprintedpairs(const void* p1,const void* p2) uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t options)
{
float s1,s2;
ppair_t pair1,pair2;
pair1=*((ppair_t*)p1);
pair2=*((ppair_t*)p2);
s1 = pair1->yule * pair1->bs;
s2 = pair2->yule * pair2->bs;
// fprintf(stderr,"s1 : %4.3f %4.3f %4.3f\n",pair1->yule , pair1->bs,s1);
// fprintf(stderr,"s2 : %4.3f %4.3f %4.3f\n\n",pair2->yule , pair2->bs,s2);
if (s1 > s2) return -1;
if (s1 < s2) return 1;
return 0;
}
uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t options, pecodnadb_t seqdb)
{ {
uint32_t i,j; uint32_t i,j;
float q,qfp; float q,qfp;
@ -346,44 +201,34 @@ uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t opti
qfp = (float)sortedpairs[i]->outexample/options->outsamples; qfp = (float)sortedpairs[i]->outexample/options->outsamples;
else qfp=0.0; else qfp=0.0;
sortedpairs[i]->wellIdentifiedSeqs = NULL; //TR 05/09/10 - wellIdentified needed for primer sets
sortedpairs[i]->coveredSeqs = NULL; //TR 05/09/10 - wellIdentified needed for primer sets
sortedpairs[i]->quorumin = q; sortedpairs[i]->quorumin = q;
sortedpairs[i]->quorumout = qfp; sortedpairs[i]->quorumout = qfp;
sortedpairs[i]->yule = q - qfp; sortedpairs[i]->yule = q -qfp;
sortedpairs[j]=sortedpairs[i]; sortedpairs[j]=sortedpairs[i];
if (q > options->sensitivity_quorum && if (q > options->sensitivity_quorum &&
qfp < options->false_positive_quorum) qfp < options->false_positive_quorum)
{ {
//TR 05/09/10 - wellIdentified needed for primer sets (void)taxonomycoverage(sortedpairs[j],options);
sortedpairs[j]->wellIdentifiedSeqs = ECOMALLOC(options->dbsize * sizeof(int),"Cannot allocate well_identified_array"); taxonomyspecificity(sortedpairs[j]);
sortedpairs[j]->coveredSeqs = ECOMALLOC(options->dbsize * sizeof(int),"Cannot allocate well_identified_array"); j++;
(void)taxonomycoverage(sortedpairs[j],options, seqdb, options->dbsize);
taxonomyspecificity(sortedpairs[j], seqdb, options->dbsize);
//j++;
//if specificity less than user provieded threshold (default 60%) then ignore this pair
if (sortedpairs[j]->bs >= options->specificity_threshold)
j++;
} }
} }
qsort(sortedpairs,j,sizeof(ppair_t),cmpprintedpairs);
return j; return j;
} }
void printpairs (ppairtree_t pairs, poptions_t options)
void printpairs (ppairtree_t pairs, poptions_t options,ecotaxonomy_t *taxonomy, pecodnadb_t seqdb)
{ {
ppair_t* sortedpairs; ppair_t* sortedpairs;
ppair_t* index; ppair_t* index;
ppairlist_t pl; ppairlist_t pl;
size_t i,j; size_t i,j;
size_t count; int32_t count;
char *taxon[]={"taxon","taxa"};
ecotx_t *current_taxon;
//pairset pair_sets;
pairset *pset = NULL;
//printf("Index\tPrimer1\tPrimer2\tGB\tInexampleCount\tOutexampleCount\tYule\tIntaxaCount\tOuttaxaCount\tCoverage\tSpecificity\tMinAmplifiedLength\tMaxAmplifiedLength\tAvgAmplifiedLength\n"); //printf("Index\tPrimer1\tPrimer2\tGB\tInexampleCount\tOutexampleCount\tYule\tIntaxaCount\tOuttaxaCount\tCoverage\tSpecificity\tMinAmplifiedLength\tMaxAmplifiedLength\tAvgAmplifiedLength\n");
@ -403,123 +248,49 @@ void printpairs (ppairtree_t pairs, poptions_t options,ecotaxonomy_t *taxonomy,
for (i=0;i<pl->paircount;i++,j++) for (i=0;i<pl->paircount;i++,j++)
sortedpairs[j]=pl->pairs+i; sortedpairs[j]=pl->pairs+i;
count=filterandsortpairs(sortedpairs,pairs->count,options, seqdb); count=filterandsortpairs(sortedpairs,pairs->count,options);
getThermoProperties(sortedpairs, count, options);
fprintf(stderr,"Total good pair count : %u\n",(uint32_t)count);
printf("#\n");
printf("# ecoPrimer version %s\n",VERSION);
printf("# Rank level optimisation : %s\n", options->taxonrank);
printf("# max error count by oligonucleotide : %d\n",options->error_max);
printf("#\n");
if (options->r)
{
printf("# Restricted to %s:\n",taxon[(options->r>1) ? 1:0]);
for(i=0;i<(uint32_t)options->r;i++)
{
current_taxon=eco_findtaxonbytaxid(taxonomy,options->restricted_taxid[i]);
printf("# %d : %s (%s)\n", current_taxon->taxid,
current_taxon->name,
taxonomy->ranks->label[current_taxon->rank]
);
}
printf("#\n");
}
if (options->g)
{
printf("# Ignore %s:\n",taxon[(options->g>1) ? 1:0]);
for(i=0;i<(uint32_t)options->g;i++)
{
current_taxon=eco_findtaxonbytaxid(taxonomy,options->ignored_taxid[i]);
printf("# %d : %s (%s)\n", current_taxon->taxid,
current_taxon->name,
taxonomy->ranks->label[current_taxon->rank]
);
}
printf("#\n");
}
printf("# strict primer quorum : %3.2f\n",options->strict_quorum);
printf("# example quorum : %3.2f\n",options->sensitivity_quorum);
if (options->g + options->r)
printf("# counterexample quorum : %3.2f\n",options->false_positive_quorum);
printf("#\n");
printf("# database : %s\n",options->prefix);
printf("# Database is constituted of %5d examples corresponding to %5d %s\n",options->insamples,
options->intaxa,options->taxonrank);
printf("# and %5d counterexamples corresponding to %5d %s\n",options->outsamples,
options->outtaxa,options->taxonrank);
printf("#\n");
if (options->lmin && options->lmax)
printf("# amplifiat length between [%d,%d] bp\n",options->lmin,options->lmax);
else if (options->lmin)
printf("# amplifiat length larger than %d bp\n",options->lmin);
else if (options->lmax)
printf("# amplifiat length smaller than %d bp\n",options->lmax);
if (options->circular)
printf("# DB sequences are considered as circular\n");
else
printf("# DB sequences are considered as linear\n");
printf("# Pairs having specificity less than %0.2f will be ignored\n", options->specificity_threshold);
printf("#\n");
for (i=0;i < count;i++) for (i=0;i < count;i++)
printapair(i,sortedpairs[i],options); printapair(i,sortedpairs[i],options);
if (options->filter_on_links)
{
fprintf (stderr, "Old size: %d, ", count);
count = primers_changeSortedArray (&sortedpairs, count, options);
//count = primers_filterWithGivenLinks (&sortedpairs, count, options);
fprintf (stderr, "New size: %d\n", count);
if (count == 0)
{
fprintf (stderr, "No pairs passed the links constraints.\n");
printf ("No pairs passed the links constraints.\n");
return;
}
for (i=0;i < count;i++)
printapair(i,sortedpairs[i],options);
}
if (options->print_sets_of_primers == TRUE)
{
/*pair_sets = build_primers_set (sortedpairs, count, seqdb, options);
printf("Results from Greedy Algorithm and some other possibilities:\n");
some_other_set_possibilities (&pair_sets, sortedpairs, count, seqdb, options);
printf("Results from simulated Anealing:\n");
sets_by_SimulatedAnealing (&pair_sets, sortedpairs, count, seqdb, options);
printf("Results from Tabu Search:\n");
sets_by_TabuSearch (&pair_sets, sortedpairs, count, seqdb, options);*/
//pset = sets_by_BruteForce (sortedpairs, count, seqdb, options);
//if (pset)
/*/{
printf("Results from simulated Anealing:\n");
sets_by_SimulatedAnealing (pset, sortedpairs, count, seqdb, options);
printf("Results from Tabu Search:\n");
sets_by_TabuSearch (pset, sortedpairs, count, seqdb, options);
if (pset)
{
ECOFREE (pset->set_wellIdentifiedTaxa, "Could not free memory for pair set wi");
ECOFREE (pset, "Could not free memory for pair");
}
}*/
build_and_print_sets (sortedpairs, count, seqdb, options);
}
//primers_graph_graphviz (sortedpairs, count, options);
} }
#ifdef MASKEDCODE
void printpairs (pairscount_t pairs, poptions_t options, int32_t rankdbstats, uint32_t seqdbsize)
{
uint32_t i;
uint32_t wordsize = options->primer_length;
uint32_t quorumseqs;
double sens;
double speci;
float avg;
quorumseqs = seqdbsize * 70 / 100;
printf("primer_1\tseq_1\tPrimer_2\tseq_2\tamplifia_count\t%s_snes\t%s_spe\tmin_l\tmax_l\tavr_l\n", options->taxonrank, options->taxonrank);
for (i=0; i < pairs.paircount; i++)
{
if (quorumseqs > pairs.pairs[i].inexample) continue;
sens = (pairs.pairs[i].taxsetindex*1.0)/rankdbstats*100;
speci = (pairs.pairs[i].oktaxoncount*1.0)/pairs.pairs[i].taxsetindex*100;
avg = (pairs.pairs[i].mind+pairs.pairs[i].maxd)*1.0/2;
printf("P1\t%s", ecoUnhashWord(pairs.pairs[i].w1, wordsize));
printf("\tP2\t%s", ecoUnhashWord(pairs.pairs[i].w2, wordsize));
printf("\t%d", pairs.pairs[i].inexample);
printf("\t%3.2f", sens);
printf("\t%3.2f", speci);
printf("\t%d", pairs.pairs[i].mind);
printf("\t%d", pairs.pairs[i].maxd);
printf("\t%3.2f\n", avg);
}
}
#endif /* MASKEDCODE */
/*updateseqparams: This function counts the insample and outsample sequences /*updateseqparams: This function counts the insample and outsample sequences
* and with each sequences adds a tag of the taxon to which the sequence beongs*/ * and with each sequences adds a tag of the taxon to which the sequence beongs*/
@ -532,7 +303,7 @@ void updateseqparams (pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *taxo
for (i=0;i<seqdbsize;i++) for (i=0;i<seqdbsize;i++)
{ {
seqdb[i]->isexample=isExampleTaxon(taxonomy,seqdb[i]->taxid,options); seqdb[i]->isexample=isGoodTaxon(taxonomy,seqdb[i]->taxid,options);
if (seqdb[i]->isexample) if (seqdb[i]->isexample)
(*insamples)++; (*insamples)++;
else else
@ -569,14 +340,53 @@ void setresulttaxonrank (ecotaxonomy_t *taxonomy, poptions_t options)
} }
/* to get db stats, totals of species, genus etc....*/ /* to get db stats, totals of species, genus etc....*/
static void printAC(pecodnadb_t seqdb,uint32_t seqdbsize) #ifdef MASKEDCODE
void setoktaxforspecificity (ppairtree_t pairs)
{ {
uint32_t i; uint32_t i;
uint32_t j;
uint32_t k;
uint32_t l;
int taxcount;
int32_t taxid;
for (i=0; i< seqdbsize;i++) for (i = 0; i < pairs->paircount; i++)
printf("%15s (%8d bp ): %s\n",seqdb[i]->AC,seqdb[i]->SQ_length,seqdb[i]->DE); {
for (j = 0; j < pairs->pairs[i].taxsetindex; j++)
{
for (k = 0; k < pairs->pairs[i].taxset[j].amplifiaindex; k++)
{
taxid = 0;
taxcount = 0;
for (l = 0; l < pairs->pairs[i].ampsetindex; l++)
{
/*compare only char pointers because for equal strings we have same pointer in both sets*/
if (pairs->pairs[i].taxset[j].amplifia[k] == pairs->pairs[i].ampset[l].amplifia)
{
if (pairs->pairs[i].ampset[l].seqidindex > 1)
{
taxcount += pairs->pairs[i].ampset[l].seqidindex;
break;
}
if (taxid != pairs->pairs[i].ampset[l].taxonids[0])
{
if (!taxid) taxid = pairs->pairs[i].ampset[l].taxonids[0];
taxcount++;
}
if (taxcount > 1) break;
}
}
if (taxcount == 1) pairs->pairs[i].oktaxoncount++;
}
}
}
} }
#endif
int main(int argc, char **argv) int main(int argc, char **argv)
{ {
pecodnadb_t seqdb; /* of type ecoseq_t */ pecodnadb_t seqdb; /* of type ecoseq_t */
@ -592,37 +402,19 @@ int main(int argc, char **argv)
uint32_t i; uint32_t i;
pwordcount_t words; pwordcount_t words;
// pwordcount_t words2;
pprimercount_t primers; pprimercount_t primers;
ppairtree_t pairs; ppairtree_t pairs;
int32_t rankdbstats = 0; int32_t rankdbstats = 0;
CNNParams nnparams; //printcurrenttime();
//return 0;
initoptions(&options); initoptions(&options);
while ((carg = getopt(argc, argv, "hAfvcUDSpbE:d:l:L:e:i:r:R:q:3:s:x:t:O:m:a:T:k:M:")) != -1) { while ((carg = getopt(argc, argv, "hcUDSd:l:L:e:i:r:q:3:s:x:t:")) != -1) {
switch (carg) { switch (carg) {
/* ---------------------------- */
case 'v': /* set in single strand mode */
/* ---------------------------- */
options.statistics=TRUE;
break;
/* ---------------------------- */
case 'f': /* set in single strand mode */
/* ---------------------------- */
options.filtering=FALSE;
break;
/* ---------------------------- */
case 'A': /* set in single strand mode */
/* ---------------------------- */
options.printAC=TRUE;
break;
/* -------------------- */ /* -------------------- */
case 'd': /* database name */ case 'd': /* database name */
/* -------------------- */ /* -------------------- */
@ -713,23 +505,6 @@ int main(int argc, char **argv)
"Error on restricted_taxid reallocation"); "Error on restricted_taxid reallocation");
sscanf(optarg,"%d",&(options.restricted_taxid[options.r])); sscanf(optarg,"%d",&(options.restricted_taxid[options.r]));
options.r++; options.r++;
break;
/* ------------------------------------------ */
case 'E': /* stores the restricting search taxonomic id */
/* ------------------------------------------ */
options.exception_taxid = ECOREALLOC(options.exception_taxid,sizeof(int32_t)*(options.e+1),
"Error on exception_taxid reallocation");
sscanf(optarg,"%d",&(options.exception_taxid[options.e]));
options.e++;
break;
/* -------------------- */
case 'R': /* reference sequence */
/* -------------------- */
options.reference = ECOMALLOC(strlen(optarg)+1,
"Error on prefix allocation");
strcpy(options.reference,optarg);
break; break;
/* --------------------------------- */ /* --------------------------------- */
@ -741,72 +516,18 @@ int main(int argc, char **argv)
options.g++; options.g++;
break; break;
/* --------------------------------- */ /* -------------------- */
case 'O': /* set primer size */
/* --------------------------------- */
sscanf(optarg,"%d",&(options.primer_length));
break;
/* --------------------------------- */
case 'm': /* set salt method */
/* --------------------------------- */
sscanf(optarg,"%d",&(options.saltmethod));
break;
/* --------------------------------- */
case 'a': /* set salt */
/* --------------------------------- */
sscanf(optarg,"%f",&(options.salt));
break;
/* -------------------- */
case 'c': /* sequences are circular */ case 'c': /* sequences are circular */
/* --------------------------------- */ /* --------------------------------- */
options.circular = 1; options.circular = 1;
break; break;
/* -------------------- */
case 'p': /* print sets of primers */
/* --------------------------------- */
//options.print_sets_of_primers = TRUE;
break;
/* --------------------------------- */
case 'T': /* Ignore pairs having specificity below this Threshold */
/* --------------------------------- */
sscanf(optarg,"%f",&(options.specificity_threshold));
break;
/* --------------------------------- */
case 'M': /* Max link percentage for graph */
/* --------------------------------- */
sscanf(optarg,"%f",&(options.max_links_percent));
break;
/* --------------------------------- */
case 'k': /* links count */
/* --------------------------------- */
sscanf(optarg,"%d",&(options.links_cnt));
break;
case 'b':
options.filter_on_links = TRUE;
break;
case '?': /* bad option */ case '?': /* bad option */
/* -------------------- */ /* -------------------- */
errflag++; errflag++;
} }
} }
options.pnparm = &nnparams;
if (options.saltmethod != 2) //if not SALT_METHOD_OWCZARZY
options.saltmethod = SALT_METHOD_SANTALUCIA; //then force SALT_METHOD_SANTALUCIA
if (options.salt < 0.01 || options.salt > 0.3) //if salt value out of literature values
options.salt = DEF_SALT; //set to default
nparam_InitParams(&nnparams, DEF_CONC_PRIMERS,DEF_CONC_SEQUENCES,options.salt,options.saltmethod);
fprintf(stderr,"Reading taxonomy database ..."); fprintf(stderr,"Reading taxonomy database ...");
taxonomy = read_taxonomy(options.prefix,0); taxonomy = read_taxonomy(options.prefix,0);
@ -816,21 +537,7 @@ int main(int argc, char **argv)
fprintf(stderr,"Reading sequence database ...\n"); fprintf(stderr,"Reading sequence database ...\n");
seqdb = readdnadb(options.prefix,taxonomy,&seqdbsize, &options); seqdb = readdnadb(options.prefix,&seqdbsize);
if (options.printAC)
{
printAC(seqdb,seqdbsize);
exit(0);
}
if (options.reference)
for (i=0; i < seqdbsize;i++)
if (strcmp(seqdb[i]->AC,options.reference)==0)
{
options.refseq=seqdb[i];
options.refseqid=i;
fprintf(stderr,"Reference sequence %s identified\n",options.reference);
}
fprintf(stderr,"Ok\n"); fprintf(stderr,"Ok\n");
fprintf(stderr,"Sequence read : %d\n",(int32_t)seqdbsize); fprintf(stderr,"Sequence read : %d\n",(int32_t)seqdbsize);
@ -850,23 +557,11 @@ int main(int argc, char **argv)
fprintf(stderr,"\nIndexing words in sequences\n"); fprintf(stderr,"\nIndexing words in sequences\n");
printcurrenttimeinmilli();
words = lookforStrictPrimer(seqdb,seqdbsize,insamples,&options); words = lookforStrictPrimer(seqdb,seqdbsize,insamples,&options);
fprintf(stderr,"\n Strict primer count : %d\n",words->size); printcurrenttimeinmilli();
/*/TR Testing fprintf(stderr,"\n Strict primer count : %d\n",words->size);
fprintf(stderr,"\nReducing for debugging\n");
words = reduce_words_to_debug (words, &options);
///*/
// options.filtering=FALSE;
// words2= lookforStrictPrimer(seqdb,seqdbsize,insamples,&options);
// fprintf(stderr,"\n Strict primer count : %d\n",words2->size);
//
// fprintf(stderr,"\n\n Primer sample : \n");
// for (i=0; i<words->size; i++)
// fprintf(stderr," + Primer : %s sequence count : %d\n",ecoUnhashWord(words->words[i],options.primer_length),words->strictcount[i]);
// fprintf(stderr,"\n\n Primer sample : \n");
// for (i=0; i<words2->size; i++)
// fprintf(stderr," + Primer : %s sequence count : %d\n",ecoUnhashWord(words2->words[i],options.primer_length),words2->strictcount[i]);
if (options.no_multi_match) if (options.no_multi_match)
{ {
@ -879,6 +574,7 @@ int main(int argc, char **argv)
for (i=0; i<MINI(10,words->size); i++) for (i=0; i<MINI(10,words->size); i++)
fprintf(stderr," + Primer : %s sequence count : %d\n",ecoUnhashWord(words->words[i],options.primer_length),words->strictcount[i]); fprintf(stderr," + Primer : %s sequence count : %d\n",ecoUnhashWord(words->words[i],options.primer_length),words->strictcount[i]);
fprintf(stderr,"\nEncoding sequences for fuzzy pattern matching...\n"); fprintf(stderr,"\nEncoding sequences for fuzzy pattern matching...\n");
for (i=0;i<seqdbsize;i++) for (i=0;i<seqdbsize;i++)
{ {
@ -888,13 +584,7 @@ int main(int argc, char **argv)
ECOFREE(words->strictcount,"Free strict primer count table"); ECOFREE(words->strictcount,"Free strict primer count table");
if (options.error_max == 0)//aho, if(options.error_max == 0 && 0) old primers = lookforAproxPrimer(seqdb,seqdbsize,insamples,words,&options);
primers = ahoc_lookforStrictPrimers (seqdb,seqdbsize,insamples,words,&options);
else
primers = lookforAproxPrimer(seqdb,seqdbsize,insamples,words,&options);
//for (i=0; i<primers->size; i++)
// print_wordwith_positions (primers->primers[i], seqdbsize, &options);
ECOFREE(words->words,"Free strict primer table"); ECOFREE(words->words,"Free strict primer table");
ECOFREE(words,"Free strict primer structure"); ECOFREE(words,"Free strict primer structure");
@ -909,114 +599,17 @@ int main(int argc, char **argv)
fprintf(stderr,"\n"); fprintf(stderr,"\n");
/*TR: Added*/
pairs = buildPrimerPairs(seqdb, seqdbsize, primers, &options);
pairs = buildPrimerPairs(seqdb, seqdbsize, primers, &options);
printpairs (pairs, &options,taxonomy, seqdb); // setoktaxforspecificity (&pairs);
printpairs (pairs, &options);
//ECOFREE(pairs.pairs,"Free pairs table");
return 0; return 0;
} }
#define DEBUG_WORDS_CNT 14
pwordcount_t reduce_words_to_debug (pwordcount_t words, poptions_t options)
{
uint32_t i, k;
pwordcount_t new_words;
char *rwrd;
char dwrd[20];
/*char *strict_words[DEBUG_WORDS_CNT] = {"GAGTCTCTGCACCTATCC", "GCAATCCTGAGCCAAATC", "ACCCCTAACCACAACTCA",
"TCCGAACCGACTGATGTT", "GAAGCTTGGGTGAAACTA", "GGAGAACCAGCTAGCTCT", "GCTGGTTCTCCCCGAAAT",
"TCGATTTGGTACCGCTCT", "AAAGGAGAGAGAGGGATT", "GGATTGCTAATCCGTTGT", "CCCCCATCGTCTCACTGG",
"TGAGGCGCAGCAGTTGAC", "GCGCTACGGCGCTGAAGT", "TTTCCTGGGAGTATGGCA"};*/
char *strict_words[DEBUG_WORDS_CNT] = {"CTCCGGTCTGAACTCAGA", "TGTTGGATCAGGACATCC", "TAGATAGAAACCGACCTG",
"TGGTGCAGCCGCTATTAA", "AGATAGAAACTGACCTGG", "TGGTGCAGCCGCTATTAA", "CTAATGGTGCAGCCGCTA",
"TAGAAACTGACCTGGATT", "AGATAGAAACCGACCTGG", "ATGGTGCAGCCGCTATTA", "ATAGATAGAAACCGACCT",
"GCCGCTATTAAGGGTTCG", "GGTGCAGCCGCTATTAAG", "TAGAAACTGACCTGGATT"};
int word_seen[DEBUG_WORDS_CNT];
new_words = ECOMALLOC(sizeof(wordcount_t),"Cannot allocate memory for word count structure");
new_words->inseqcount = words->inseqcount;
new_words->outseqcount = words->outseqcount;
new_words->size = DEBUG_WORDS_CNT;
new_words->strictcount = ECOMALLOC((new_words->size*sizeof(uint32_t)), "Cannot allocate memory for word count table");
new_words->words = ECOMALLOC(new_words->size*sizeof(word_t), "I cannot allocate memory for debug words");
for (k = 0; k < DEBUG_WORDS_CNT; k++)
word_seen[k] = 0;
for (i=0; i < words->size; i++)
{
rwrd = ecoUnhashWord(words->words[i],options->primer_length);
strcpy (dwrd, rwrd);
rwrd = ecoUnhashWord(ecoComplementWord(words->words[i],options->primer_length),options->primer_length);
for (k = 0; k < DEBUG_WORDS_CNT; k++)
{
if (strcmp (dwrd, strict_words[k]) == 0) break;
if (strcmp (rwrd, strict_words[k]) == 0) break;
}
if (k < DEBUG_WORDS_CNT)
{
if (word_seen[k] == 0)
{
new_words->words[k] = words->words[i];
new_words->strictcount[k] = words->strictcount[i];
}
word_seen[k]++;
}
}
fprintf (stderr, "Debug Words Info:\n");
for (k = 0; k < DEBUG_WORDS_CNT; k++)
fprintf (stderr, "%s:%d\n", strict_words[k], word_seen[k]);
//clean input wods;
ECOFREE(words->words,"Clean word table");
ECOFREE(words->strictcount,"Clean word count table");
ECOFREE(words,"Clean word structure");
return new_words;
}
void print_wordwith_positions (primer_t prm, uint32_t seqdbsize, poptions_t options)
{
char *wrd;
uint32_t i, j;
char *twrd = "GCCTGTTTACCAAAAACA";
wrd = ecoUnhashWord(prm.word,options->primer_length);
if (strcmp (twrd, wrd) == 0)
{
printf ("Positions for Word: %s\n", wrd);
for (i=0; i<seqdbsize; i++)
{
if (prm.directCount[i] > 0)
{
printf ("%d:", i);
if (prm.directCount[i] == 1)
printf ("%d", prm.directPos[i].value);
else
for (j=0; j<prm.directCount[i]; j++)
printf ("%d,", prm.directPos[i].pointer[j]);
printf (" ");
}
}
printf ("\n");
for (i=0; i<seqdbsize; i++)
{
if (prm.reverseCount[i] > 0)
{
printf ("%d:", i);
if (prm.reverseCount[i] == 1)
printf ("%d", prm.reversePos[i].value);
else
for (j=0; j<prm.reverseCount[i]; j++)
printf ("%d,", prm.reversePos[i].pointer[j]);
printf (" ");
}
}
printf ("\n");
}
}

View File

@ -1,10 +1,9 @@
MACHINE=MAC_OS_X MACHINE=MAC_OS_X
LIBPATH= -LlibecoPCR -Llibecoprimer -Llibthermo LIBPATH= -Llibapat -LlibecoPCR -Llibecoprimer
MAKEDEPEND = gcc -D$(MACHINE) -M $(CPPFLAGS) -o $*.d $< MAKEDEPEND = gcc -D$(MACHINE) -M $(CPPFLAGS) -o $*.d $<
CC=gcc CC=gcc
CFLAGS= -W -Wall -m64 -g CFLAGS= -W -Wall -O5 -m64 -fast -g
#CFLAGS= -W -Wall -O5 -m64 -g
#CFLAGS= -W -Wall -O0 -m64 -g #CFLAGS= -W -Wall -O0 -m64 -g
#CFLAGS= -W -Wall -O5 -fast -g #CFLAGS= -W -Wall -O5 -fast -g

15
src/libecoPCR/ecoError.P Normal file
View File

@ -0,0 +1,15 @@
ecoError.o ecoError.P : ecoError.c ecoPCR.h /usr/include/stdio.h \
/usr/include/_types.h /usr/include/sys/_types.h \
/usr/include/sys/cdefs.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h /usr/include/inttypes.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h

View File

@ -0,0 +1,15 @@
ecoIOUtils.o ecoIOUtils.P : ecoIOUtils.c ecoPCR.h /usr/include/stdio.h \
/usr/include/_types.h /usr/include/sys/_types.h \
/usr/include/sys/cdefs.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h /usr/include/inttypes.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h

15
src/libecoPCR/ecoMalloc.P Normal file
View File

@ -0,0 +1,15 @@
ecoMalloc.o ecoMalloc.P : ecoMalloc.c ecoPCR.h /usr/include/stdio.h \
/usr/include/_types.h /usr/include/sys/_types.h \
/usr/include/sys/cdefs.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h /usr/include/inttypes.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h

View File

@ -17,10 +17,10 @@ void eco_untrace_memory_allocation()
void ecoMallocedMemory() void ecoMallocedMemory()
{ {
//eco_amount_malloc; return eco_amount_malloc;
} }
void *eco_malloc(int64_t chunksize, void *eco_malloc(int32_t chunksize,
const char *error_message, const char *error_message,
const char *filename, const char *filename,
int32_t line) int32_t line)
@ -47,27 +47,19 @@ void *eco_malloc(int64_t chunksize,
} }
void *eco_realloc(void *chunk, void *eco_realloc(void *chunk,
int64_t newsize, int32_t newsize,
const char *error_message, const char *error_message,
const char *filename, const char *filename,
int32_t line) int32_t line)
{ {
void *newchunk; void *newchunk;
if (newsize == 0)
{
if (chunk)
free(chunk);
return NULL;
}
newchunk = realloc(chunk,newsize); newchunk = realloc(chunk,newsize);
if (!newchunk) if (!newchunk)
{
fprintf(stderr,"Requested memory : %d\n",newsize);
ecoError(ECO_MEM_ERROR,error_message,filename,line); ecoError(ECO_MEM_ERROR,error_message,filename,line);
}
if (!chunk) if (!chunk)
eco_chunk_malloc++; eco_chunk_malloc++;

View File

@ -130,14 +130,14 @@ typedef struct {
int32_t is_big_endian(); int32_t is_big_endian();
int32_t swap_int32_t(int32_t); int32_t swap_int32_t(int32_t);
void *eco_malloc(int64_t chunksize, void *eco_malloc(int32_t chunksize,
const char *error_message, const char *error_message,
const char *filename, const char *filename,
int32_t line); int32_t line);
void *eco_realloc(void *chunk, void *eco_realloc(void *chunk,
int64_t chunksize, int32_t chunksize,
const char *error_message, const char *error_message,
const char *filename, const char *filename,
int32_t line); int32_t line);
@ -219,7 +219,7 @@ econameidx_t *read_nameidx(const char *filename,ecotaxonomy_t *taxonomy);
* @return pointer to a taxonomy index structure * @return pointer to a taxonomy index structure
*/ */
ecotxidx_t *read_taxonomyidx(const char *filename,const char *filename2); ecotxidx_t *read_taxonomyidx(const char *filename);
ecotaxonomy_t *read_taxonomy(const char *prefix,int32_t readAlternativeName); ecotaxonomy_t *read_taxonomy(const char *prefix,int32_t readAlternativeName);

5
src/libecoPCR/ecodna.P Normal file
View File

@ -0,0 +1,5 @@
ecodna.o ecodna.P : ecodna.c /usr/include/string.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/sys/cdefs.h \
/usr/include/machine/_types.h /usr/include/i386/_types.h ecoPCR.h \
/usr/include/stdio.h /usr/include/inttypes.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h

View File

@ -0,0 +1,5 @@
ecofilter.o ecofilter.P : ecofilter.c ecoPCR.h /usr/include/stdio.h \
/usr/include/_types.h /usr/include/sys/_types.h \
/usr/include/sys/cdefs.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h /usr/include/inttypes.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h

View File

@ -17,4 +17,3 @@ int eco_is_taxid_included( ecotaxonomy_t *taxonomy,
return 0; return 0;
} }

15
src/libecoPCR/econame.P Normal file
View File

@ -0,0 +1,15 @@
econame.o econame.P : econame.c ecoPCR.h /usr/include/stdio.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/sys/cdefs.h \
/usr/include/machine/_types.h /usr/include/i386/_types.h \
/usr/include/inttypes.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/string.h /usr/include/stdlib.h /usr/include/available.h \
/usr/include/sys/wait.h /usr/include/sys/signal.h \
/usr/include/sys/appleapiopts.h /usr/include/machine/signal.h \
/usr/include/i386/signal.h /usr/include/i386/_structs.h \
/usr/include/sys/_structs.h /usr/include/machine/_structs.h \
/usr/include/mach/i386/_structs.h /usr/include/sys/resource.h \
/usr/include/machine/endian.h /usr/include/i386/endian.h \
/usr/include/sys/_endian.h /usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h

15
src/libecoPCR/ecorank.P Normal file
View File

@ -0,0 +1,15 @@
ecorank.o ecorank.P : ecorank.c ecoPCR.h /usr/include/stdio.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/sys/cdefs.h \
/usr/include/machine/_types.h /usr/include/i386/_types.h \
/usr/include/inttypes.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/string.h /usr/include/stdlib.h /usr/include/available.h \
/usr/include/sys/wait.h /usr/include/sys/signal.h \
/usr/include/sys/appleapiopts.h /usr/include/machine/signal.h \
/usr/include/i386/signal.h /usr/include/i386/_structs.h \
/usr/include/sys/_structs.h /usr/include/machine/_structs.h \
/usr/include/mach/i386/_structs.h /usr/include/sys/resource.h \
/usr/include/machine/endian.h /usr/include/i386/endian.h \
/usr/include/sys/_endian.h /usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h

19
src/libecoPCR/ecoseq.P Normal file
View File

@ -0,0 +1,19 @@
ecoseq.o ecoseq.P : ecoseq.c ecoPCR.h /usr/include/stdio.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/sys/cdefs.h \
/usr/include/machine/_types.h /usr/include/i386/_types.h \
/usr/include/inttypes.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/string.h /usr/include/zlib.h /usr/include/zconf.h \
/usr/include/sys/types.h /usr/include/unistd.h \
/usr/include/sys/unistd.h /usr/include/sys/select.h \
/usr/include/sys/_select.h

View File

@ -100,8 +100,6 @@ ecoseq_t *readnext_ecoseq(FILE *f)
int32_t comp_status; int32_t comp_status;
unsigned long int seqlength; unsigned long int seqlength;
int32_t rs; int32_t rs;
char *c;
int32_t i;
raw = read_ecorecord(f,&rs); raw = read_ecorecord(f,&rs);
@ -146,10 +144,6 @@ ecoseq_t *readnext_ecoseq(FILE *f)
if (comp_status != Z_OK) if (comp_status != Z_OK)
ECOERROR(ECO_IO_ERROR,"I cannot uncompress sequence data"); ECOERROR(ECO_IO_ERROR,"I cannot uncompress sequence data");
for (c=seq->SQ,i=0;i<seqlength;c++,i++)
*c=toupper(*c);
// fprintf(stderr,"seq name : %30s seq size : %d\n",seq->DE,seq->SQ_length);
return seq; return seq;
} }

15
src/libecoPCR/ecotax.P Normal file
View File

@ -0,0 +1,15 @@
ecotax.o ecotax.P : ecotax.c ecoPCR.h /usr/include/stdio.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/sys/cdefs.h \
/usr/include/machine/_types.h /usr/include/i386/_types.h \
/usr/include/inttypes.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/string.h /usr/include/stdlib.h /usr/include/available.h \
/usr/include/sys/wait.h /usr/include/sys/signal.h \
/usr/include/sys/appleapiopts.h /usr/include/machine/signal.h \
/usr/include/i386/signal.h /usr/include/i386/_structs.h \
/usr/include/sys/_structs.h /usr/include/machine/_structs.h \
/usr/include/mach/i386/_structs.h /usr/include/sys/resource.h \
/usr/include/machine/endian.h /usr/include/i386/endian.h \
/usr/include/sys/_endian.h /usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h

View File

@ -10,41 +10,23 @@ static ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon);
* @param pointer to the database (.tdx file) * @param pointer to the database (.tdx file)
* @return a ecotxidx_t structure * @return a ecotxidx_t structure
*/ */
ecotxidx_t *read_taxonomyidx(const char *filename,const char *filename2) ecotxidx_t *read_taxonomyidx(const char *filename)
{ {
int32_t count; int32_t count;
int32_t count2;
FILE *f; FILE *f;
FILE *f2;
ecotxidx_t *index; ecotxidx_t *index;
int32_t i; int32_t i;
f = open_ecorecorddb(filename,&count,1); f = open_ecorecorddb(filename,&count,1);
f2 = open_ecorecorddb(filename2,&count2,0);
index = (ecotxidx_t*) ECOMALLOC(sizeof(ecotxidx_t) + sizeof(ecotx_t) * (count-1),
index = (ecotxidx_t*) ECOMALLOC(sizeof(ecotxidx_t) + sizeof(ecotx_t) * (count+count2-1),
"Allocate taxonomy"); "Allocate taxonomy");
index->count=count+count2; index->count=count;
fprintf(stderr,"Reading %d taxa...\n",count);
for (i=0; i < count; i++){ for (i=0; i < count; i++){
readnext_ecotaxon(f,&(index->taxon[i])); readnext_ecotaxon(f,&(index->taxon[i]));
index->taxon[i].parent=index->taxon + (int32_t)index->taxon[i].parent; index->taxon[i].parent=index->taxon + (size_t)index->taxon[i].parent;
} }
if (count2>0)
fprintf(stderr,"Reading %d local taxa...\n",count2);
else
fprintf(stderr,"No local taxon\n");
for (i=0; i < count2; i++){
readnext_ecotaxon(f2,&(index->taxon[count+i]));
index->taxon[count+i].parent=index->taxon + (int32_t)index->taxon[count+i].parent;
}
return index; return index;
} }
@ -129,36 +111,33 @@ ecotaxonomy_t *read_taxonomy(const char *prefix,int32_t readAlternativeName)
{ {
ecotaxonomy_t *tax; ecotaxonomy_t *tax;
char *filename; char *filename;
char *filename2;
int buffsize; int buffsize;
tax = ECOMALLOC(sizeof(ecotaxonomy_t), tax = ECOMALLOC(sizeof(ecotaxonomy_t),
"Allocate taxonomy structure"); "Allocate taxonomy structure");
buffsize = strlen(prefix)+10; buffsize = strlen(prefix)+10;
filename = ECOMALLOC(buffsize, filename = ECOMALLOC(buffsize,
"Allocate filename"); "Allocate filename");
filename2= ECOMALLOC(buffsize,
"Allocate filename");
snprintf(filename,buffsize,"%s.rdx",prefix); snprintf(filename,buffsize,"%s.rdx",prefix);
tax->ranks = read_rankidx(filename); tax->ranks = read_rankidx(filename);
snprintf(filename,buffsize,"%s.tdx",prefix); snprintf(filename,buffsize,"%s.tdx",prefix);
snprintf(filename2,buffsize,"%s.ldx",prefix);
tax->taxons = read_taxonomyidx(filename,filename2); tax->taxons = read_taxonomyidx(filename);
if (readAlternativeName) if (readAlternativeName)
{ {
snprintf(filename,buffsize,"%s.ndx",prefix); snprintf(filename,buffsize,"%s.ndx",prefix);
tax->names=read_nameidx(filename,tax); tax->names=read_nameidx(filename,tax);
} }
else else
tax->names=NULL; tax->names=NULL;
return tax; return tax;
} }

View File

@ -13,10 +13,7 @@ SOURCES = goodtaxon.c \
pairtree.c \ pairtree.c \
pairs.c \ pairs.c \
taxstats.c \ taxstats.c \
apat_search.c \ apat_search.c
filtering.c \
PrimerSets.c \
ahocorasick.c
SRCS=$(SOURCES) SRCS=$(SOURCES)

File diff suppressed because it is too large Load Diff

View File

@ -1,58 +0,0 @@
#ifndef PRIMERSETS_H_
#define PRIMERSETS_H_
#include "ecoprimer.h"
#define PRIMERS_IN_SET_COUNT 10
typedef struct {
int *set_wellIdentifiedTaxa;
int32_t set_pairs[PRIMERS_IN_SET_COUNT];
float set_specificity;
float set_coverage;
float set_lmean;
float set_lcov;
float set_score;
int32_t set_intaxa;
int32_t set_wi_cnt;
}pairset;
typedef struct{
ppair_t* sortedpairs;
int32_t sorted_count;
pecodnadb_t seqdb;
poptions_t options;
}SetParams;
typedef struct{
float t_spc; //specificity contribution
float t_cov; //coverage contribution
float t_lmd; //link spread difference
float len; //length
float score; //score
}primerscore;
void add_pair_in_set (pairset *pair_set, int32_t pset_idx, int32_t prb_idx, SetParams *pparams);
void get_next_pair_options (int *pair_wi_count_sorted_ids, pairset *pair_set, SetParams *pparams);
float get_links_distribution (int prb_idx, pairset *prob_set, SetParams *pparams);
pairset build_primers_set_greedy_spc (SetParams *pparams);
void get_set_mean_cov_stats (pairset *prob_set, SetParams *pparams);
void some_other_set_possibilities (pairset *pair_set,
ppair_t * sortedpairs, int32_t sorted_count, pecodnadb_t seqdb, poptions_t options);
void sets_by_SimulatedAnealing (pairset *pair_set,
ppair_t * sortedpairs, int32_t sorted_count, pecodnadb_t seqdb, poptions_t options);
void sets_by_TabuSearch (pairset *pair_set,
ppair_t * sortedpairs, int32_t sorted_count, pecodnadb_t seqdb, poptions_t options);
pairset * sets_by_BruteForce (ppair_t * sortedpairs,
int32_t sorted_count, pecodnadb_t seqdb, poptions_t options);
pairset * extend_set_randomly (pairset *pair_set, SetParams *params, int extend_to_cnt);
void build_and_print_sets (ppair_t * sortedpairs, int32_t sorted_count, pecodnadb_t seqdb, poptions_t options);
int32_t get_next_option_increasing_cov (pairset *pair_set, SetParams *pparams);
void reset_set_props (pairset *pair_set, SetParams *pparams);
void primers_graph_graphviz (ppair_t * sortedpairs,
int32_t sorted_count, poptions_t options);
size_t primers_changeSortedArray (ppair_t ** pairs,
size_t sorted_count, poptions_t options);
size_t primers_filterWithGivenLinks (ppair_t ** pairs,
size_t sorted_count, poptions_t options);
#endif

View File

@ -1,479 +0,0 @@
/*
* ahocorasick.h
*
* Created on: 26 march 2011
* Author: tiayyba
*/
#include <inttypes.h>
#include "hashencoder.h"
#include "ahocorasick.h"
void ahoc_graphKeywordTree (aho_state *root);
aho_state *groot = NULL; //just for graph testing
#define BASEATINDEX(w, l, i) (uint8_t)((((w)&(0x3LLU<<(((l)-(i))*2)))>>(((l)-(i))*2)) & 0x3LLU)
void ahoc_addOutputElement (aho_state *node, bool_t isdirect, uint32_t idx)
{
if (!node) return;
if (node->output.count == 0)
node->output.out_set = ECOMALLOC(sizeof(aho_output),
"Cannot allocate memory for aho-corasick state output element");
else
node->output.out_set = ECOREALLOC(node->output.out_set, (node->output.count+1)*sizeof(aho_output),
"Cannot allocate memory for aho-corasick state output element");
node->output.out_set[node->output.count].wordidx = idx;
node->output.out_set[node->output.count].isdirect = isdirect;
node->output.count++;
}
//is the passed output element in the set
bool_t ahoc_isOutputIn (aho_state *node, aho_output ot)
{
uint32_t i;
for (i=0; i<node->output.count; i++)
if (node->output.out_set[i].isdirect == ot.isdirect && node->output.out_set[i].wordidx == ot.wordidx) return TRUE;
return FALSE;
}
//take union of output of the two nodes and put in node1
void ahoc_unionOutputElements (aho_state *node1, aho_state *node2)
{
uint32_t i;
for (i=0; i<node2->output.count; i++)
if (ahoc_isOutputIn (node1, node2->output.out_set[i]) == FALSE)
ahoc_addOutputElement (node1, node2->output.out_set[i].isdirect, node2->output.out_set[i].wordidx);
}
void ahoc_addKeyword (aho_state *root, word_t w, bool_t isdirect, uint32_t idx, poptions_t options)
{
uint32_t i;
aho_state *nextnode = root;
uint8_t basecode;
static uint32_t state_id = 0;
//fprintf (stderr, "%s\n", ecoUnhashWord(w, options->primer_length));
for (i=1; i<=options->primer_length; i++)
{
basecode = BASEATINDEX (w, options->primer_length, i);
//fprintf (stderr, "%d", basecode);
if (nextnode->next[basecode] == NULL)
{
//add new state
nextnode->next[basecode] = ECOMALLOC(sizeof(aho_state),
"Cannot allocate memory for aho-corasick state");
nextnode = nextnode->next[basecode];
//initialize state
nextnode->id = ++state_id;
nextnode->next[0]=nextnode->next[1]=nextnode->next[2]=nextnode->next[3]=NULL;
nextnode->fail = NULL;
nextnode->output.count = 0;
}
else
nextnode = nextnode->next[basecode];
}
//fprintf (stderr, "\n", basecode);
//new pattern addess so add node ouptup element
ahoc_addOutputElement (nextnode, isdirect, idx);
}
void ahoc_buildKeywordTree (aho_state *root, pwordcount_t words, poptions_t options)
{
uint32_t i;
if (!root) return;
//init root
root->id = 0;
root->next[0]=root->next[1]=root->next[2]=root->next[3]=NULL;
root->fail = NULL;
root->output.count = 0;
//now add each word as a pattern in the keyword tree
for (i=0; i<words->size; i++)
{
//add direct word
word_t w=WORD(words->words[i]);
ahoc_addKeyword (root, w, TRUE, i, options);
//add reverse word
w=ecoComplementWord(w,options->primer_length);
ahoc_addKeyword (root, w, FALSE, i, options);
}
//loop on root if some base has no out going edge from roots
for (i=0; i<4; i++)
if (root->next[i] == NULL)
root->next[i] = root;
}
void ahoc_enqueue (aho_queue *ahoqueue, aho_state *node)
{
queue_node *q;
if (node == NULL) return;
q = ECOMALLOC(sizeof(queue_node),
"Cannot allocate memory for aho-corasick queue node");
q->state_node = node;
q->next = NULL;
if (ahoqueue->first == NULL)
{
ahoqueue->first = q;
ahoqueue->last = q;
}
else
{
ahoqueue->last->next = q;
ahoqueue->last = q;
}
}
aho_state *ahoc_dequeue (aho_queue *ahoqueue)
{
aho_state *node = NULL;
queue_node *q;
if (ahoqueue->first == NULL) return node;
q = ahoqueue->first;
ahoqueue->first = q->next;
node = q->state_node;
ECOFREE (q, "Cannot free memory for aho-corasick queue node");
return node;
}
//set fail links and output sets for the keyword tree
void ahoc_updateForFailAndOutput (aho_state *root)
{
int32_t i;
aho_queue Q;
aho_state *node_r;
aho_state *node_u;
aho_state *node_v;
//empty queue
Q.first = NULL;
Q.last = NULL;
//for us alphabet has 4 elements, A=0, C=1, G=2 and T=3
for (i=0; i<4; i++)
{
if (root->next[i] != root && root->next[i] != NULL)
{
root->next[i]->fail = root;
ahoc_enqueue (&Q, root->next[i]);
}
}
//while queue not empty
while (Q.first != NULL)
{
node_r = ahoc_dequeue (&Q);
for (i=0; i<4; i++)
{
if (node_r->next[i] != NULL)
{
node_u = node_r->next[i];
ahoc_enqueue (&Q, node_u);
node_v = node_r->fail;
while (node_v->next[i] == NULL)
node_v = node_v->fail;
node_u->fail = node_v->next[i];
ahoc_unionOutputElements (node_u, node_u->fail);
}
}
}
}
void ahoc_freeKeywordTree (aho_state *node)
{
int i;
for (i=0; i<4; i++)
if (node->next[i])
ahoc_freeKeywordTree (node->next[i]);
if (node->output.count > 0)
ECOFREE (node->output.out_set, "Free failed for node output");
ECOFREE (node, "Free failed for node");
}
pprimercount_t ahoc_lookforStrictPrimers (pecodnadb_t database, uint32_t seqdbsize,uint32_t exampleCount,
pwordcount_t words,poptions_t options)
{
aho_state automaton_root;
aho_state *curr_state;
//uint32_t inSequenceQuorum;
uint32_t outSequenceQuorum;
pprimer_t data;
pprimercount_t primers;
uint32_t i, j, k;
int32_t pos;
uint32_t lmax;
char *base;
int8_t code;
uint32_t goodPrimers=0;
static int iii=0;
//inSequenceQuorum = (uint32_t)floor((float)exampleCount * options->sensitivity_quorum);
outSequenceQuorum = (uint32_t)floor((float)(seqdbsize-exampleCount) * options->false_positive_quorum);
//fprintf(stderr," Primers should be at least present in %d/%d example sequences\n",inSequenceQuorum,exampleCount);
fprintf(stderr," Primers should not be present in more than %d/%d counterexample sequences\n",outSequenceQuorum,(seqdbsize-exampleCount));
data = ECOMALLOC(words->size * sizeof(primer_t),
"Cannot allocate memory for fuzzy matching results");
for (i=0; i < words->size; i++)
{
data[i].word=WORD(words->words[i]);
data[i].inexample = 0;
data[i].outexample= 0;
data[i].directCount=ECOMALLOC(seqdbsize * sizeof(uint32_t),
"Cannot allocate memory for primer position");
data[i].directPos = ECOMALLOC(seqdbsize * sizeof(poslist_t),
"Cannot allocate memory for primer position");
data[i].reverseCount=ECOMALLOC(seqdbsize * sizeof(uint32_t),
"Cannot allocate memory for primer position");
data[i].reversePos = ECOMALLOC(seqdbsize * sizeof(poslist_t),
"Cannot allocate memory for primer position");
}
//build keywords automaton
ahoc_buildKeywordTree (&automaton_root, words, options);
//set fail links and output sets
ahoc_updateForFailAndOutput (&automaton_root);
//debug; print keywordtree in a gv file
//ahoc_graphKeywordTree (&automaton_root);
//loop on each sequence for its each base and find words
for (i=0; i < seqdbsize; i++)
{
if(database[i]->SQ_length <= options->primer_length) continue;
lmax = database[i]->SQ_length;
if (!options->circular)
lmax += options->primer_length-1;
curr_state = &automaton_root;
for (j=0,base=database[i]->SQ; j<lmax; j++,base++)
{
if (i==(uint32_t)database[i]->SQ_length) base=database[i]->SQ;
//code = encoder[(*base) - 'A'];
code = *base;
//if (iii++ < 30)
// fprintf (stderr, "%d:%d,", *base, code);
if (code < 0 || code > 3)
{
//if error char, start from root for next character
//+forget any incomplete words
curr_state = &automaton_root;
continue;
}
while (curr_state->next[code] == NULL) curr_state = curr_state->fail;
curr_state = curr_state->next[code];
//start position of primer is options->primer_length-1 chars back
pos = j-options->primer_length+1;
if (pos < 0) pos = database[i]->SQ_length+pos;
//set output, if there is some output on this state then
//+all words in the output set complete here, so increment their
//+found properties for current sequence
for (k=0; k<curr_state->output.count; k++)
{
if (curr_state->output.out_set[k].isdirect)
data[curr_state->output.out_set[k].wordidx].directCount[i]++;
else
data[curr_state->output.out_set[k].wordidx].reverseCount[i]++;
if (options->no_multi_match)
{
if ((data[curr_state->output.out_set[k].wordidx].directCount[i] +
data[curr_state->output.out_set[k].wordidx].reverseCount[i]) > 1)
//since multimach not allowd, set an indication on 1st seq position that
//+ a multimatch was found, so that this word will be filtered out
//+ and because of first postion we wont have to search the whole array
//+ to find if it voilated nomultimatch constraint for some seq
data[curr_state->output.out_set[k].wordidx].directCount[0] = 2;
else
{
if (curr_state->output.out_set[k].isdirect)
//direct word found on jth position of ith sequence
data[curr_state->output.out_set[k].wordidx].directPos[i].value = (uint32_t)pos;
else
//reverse word found on jth position of ith sequence
data[curr_state->output.out_set[k].wordidx].reversePos[i].value = (uint32_t)pos;
}
}
else
{
//okay multi match allowed
if (curr_state->output.out_set[k].isdirect)
{
if (data[curr_state->output.out_set[k].wordidx].directCount[i] == 1)
data[curr_state->output.out_set[k].wordidx].directPos[i].value = (uint32_t)pos;
else
{
//need to create or extend the positions list
if (data[curr_state->output.out_set[k].wordidx].directCount[i] == 2)
{
//for second element, first was put in .value, so dont forget to copy that in the array too
data[curr_state->output.out_set[k].wordidx].directPos[i].pointer = ECOMALLOC(2 * sizeof(uint32_t),
"Cannot allocate memory for primer position");
data[curr_state->output.out_set[k].wordidx].directPos[i].pointer[0] = data[curr_state->output.out_set[k].wordidx].directPos[i].value;
data[curr_state->output.out_set[k].wordidx].directPos[i].pointer[1] = (uint32_t)pos;
}
else
{
//for third or greater element
data[curr_state->output.out_set[k].wordidx].directPos[i].pointer = ECOREALLOC(data[curr_state->output.out_set[k].wordidx].directPos[i].pointer,
data[curr_state->output.out_set[k].wordidx].directCount[i] * sizeof(uint32_t),
"Cannot allocate memory for primer position");
data[curr_state->output.out_set[k].wordidx].directPos[i].pointer[data[curr_state->output.out_set[k].wordidx].directCount[i]-1] = (uint32_t)pos;
}
}
}
else
{
if (data[curr_state->output.out_set[k].wordidx].reverseCount[i] == 1)
data[curr_state->output.out_set[k].wordidx].reversePos[i].value = (uint32_t)pos;
else
{
//need to create or extend the positions list
if (data[curr_state->output.out_set[k].wordidx].reverseCount[i] == 2)
{
//for second element, first was put in .value, so dont forget to copy that in the array too
data[curr_state->output.out_set[k].wordidx].reversePos[i].pointer = ECOMALLOC(2 * sizeof(uint32_t),
"Cannot allocate memory for primer position");
data[curr_state->output.out_set[k].wordidx].reversePos[i].pointer[0] = data[curr_state->output.out_set[k].wordidx].reversePos[i].value;
data[curr_state->output.out_set[k].wordidx].reversePos[i].pointer[1] = (uint32_t)pos;
}
else
{
//for third or greater element
data[curr_state->output.out_set[k].wordidx].reversePos[i].pointer = ECOREALLOC(data[curr_state->output.out_set[k].wordidx].reversePos[i].pointer,
data[curr_state->output.out_set[k].wordidx].reverseCount[i] * sizeof(uint32_t),
"Cannot allocate memory for primer position");
data[curr_state->output.out_set[k].wordidx].reversePos[i].pointer[data[curr_state->output.out_set[k].wordidx].reverseCount[i]-1] = (uint32_t)pos;
}
}
}
}
//dont forget to increment inexample or outexample count, but only once for a sequence
if ((data[curr_state->output.out_set[k].wordidx].directCount[i] +
data[curr_state->output.out_set[k].wordidx].reverseCount[i]) == 1)
{
if (database[i]->isexample)
data[curr_state->output.out_set[k].wordidx].inexample++;
else
data[curr_state->output.out_set[k].wordidx].outexample++;
}
}
}
}
//Only thing that remains is to remove the failed words
for (i=0,j=0; i<words->size; i++)
{
fprintf(stderr,"Primers %5d/%lld analyzed => sequence : %s in %d example and %d counterexample sequences \r",
i+1,words->size,ecoUnhashWord(data[i].word,options->primer_length),
data[i].inexample,data[i].outexample);
//if (data[i].inexample < inSequenceQuorum || (data[i].directCount[0] == 2 && options->no_multi_match))
if (data[i].directCount[0] == 2 && options->no_multi_match)
{
//bad word, delete from the array
for (k=0; k<seqdbsize; k++)
{
if (data[i].directCount[k] > 1)
ECOFREE (data[i].directPos[k].pointer, "Cannot free position pointer.");
if (data[i].reverseCount[k] > 1)
ECOFREE (data[i].reversePos[k].pointer, "Cannot free position pointer.");
}
ECOFREE (data[i].directCount, "Cannot free position pointer.");
ECOFREE (data[i].directPos, "Cannot free position pointer.");
ECOFREE (data[i].reverseCount, "Cannot free position pointer.");
ECOFREE (data[i].reversePos, "Cannot free position pointer.");
}
else
{
//data[i].good = data[i].inexample >= inSequenceQuorum && data[i].outexample <= outSequenceQuorum;
data[i].good = data[i].outexample <= outSequenceQuorum;
goodPrimers+=data[i].good? 1:0;
if (j < i)
data[j] = data[i];
j++;
}
}
fprintf(stderr,"\n\nOn %lld analyzed primers %d respect quorum conditions\n",words->size,goodPrimers);
fprintf(stderr,"Conserved primers for further analysis : %d/%lld\n",j,words->size);
primers = ECOMALLOC(sizeof(primercount_t),"Cannot allocate memory for primer table");
primers->primers=ECOREALLOC(data,
j * sizeof(primer_t),
"Cannot reallocate memory for fuzzy matching results");
primers->size=j;
//free memory of keyword table
for (i=0; i<4; i++)
if (automaton_root.next[i] != &automaton_root)
ahoc_freeKeywordTree (automaton_root.next[i]);
return primers;
}
void ahoc_graphPrintNodesInfo (aho_state *node, FILE* gfile)
{
uint32_t i;
fprintf (gfile, "\"%d\"[\n", node->id);
fprintf (gfile, "label=\"%d\\n", node->id);
for (i=0; i<node->output.count; i++)
fprintf (gfile, "%d%c,", node->output.out_set[i].wordidx, node->output.out_set[i].isdirect?'d':'r');
fprintf (gfile, "\"\n];\n");
for (i=0; i<4; i++)
if (node->next[i] != NULL && node->next[i] != node)
ahoc_graphPrintNodesInfo (node->next[i], gfile);
}
void ahoc_graphPrintNodesLinks (aho_state *node, FILE* gfile)
{
uint32_t i;
static int j=0;
for (i=0; i<4; i++)
if (node->next[i] != NULL && node->next[i] != node)
{
fprintf (gfile, "\"%d\" -> \"%d\" [\n", node->id, node->next[i]->id);
fprintf (gfile, "label=\"%c\"\n];\n", "ACGT"[i]);
}
if (j++ < 40)
if (node->fail != NULL && node->fail != groot)
{
fprintf (gfile, "\"%d\" -> \"%d\" [\n", node->id, node->fail->id);
fprintf (gfile, "color= \"red\"\n];\n");
}
for (i=0; i<4; i++)
if (node->next[i] != NULL && node->next[i] != node)
ahoc_graphPrintNodesLinks (node->next[i], gfile);
}
void ahoc_graphKeywordTree (aho_state *root)
{
FILE *gfile;
groot=root;
gfile = fopen ("keywordtree.gv", "w");
fprintf (gfile, "digraph keywordtree {\n");
ahoc_graphPrintNodesInfo (root, gfile);
ahoc_graphPrintNodesLinks (root, gfile);
fprintf (gfile, "}\n");
fclose(gfile);
}

View File

@ -1,43 +0,0 @@
/*
* ahocorasick.h
*
* Created on: 26 march 2011
* Author: tiayyba
*/
#ifndef H_ahocorasick
#define H_ahocorasick
#include "ecoprimer.h"
typedef struct aho_output_t{
uint32_t wordidx; //index of strict word (dont save the word of 64B)
bool_t isdirect; //we need to find both direct and reverse words so we must know which one is it
}aho_output;
typedef struct aho_output_count_t{
uint32_t count;
aho_output *out_set;
}aho_output_count;
typedef struct aho_state_t{
int32_t id;
struct aho_state_t *next[4]; //for labels A=0,C=1,G=2 and T=3
struct aho_state_t *fail;
aho_output_count output;
}aho_state;
typedef struct queue_node_t {
aho_state *state_node;
struct queue_node_t *next;
}queue_node;
typedef struct{
queue_node *first;
queue_node *last;
}aho_queue;
pprimercount_t ahoc_lookforStrictPrimers (pecodnadb_t database, uint32_t seqdbsize,uint32_t exampleCount,
pwordcount_t words,poptions_t options);
#endif /* H_ahocorasick */

View File

@ -0,0 +1,17 @@
apat_search.o apat_search.P : apat_search.c /usr/include/stdio.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/sys/cdefs.h \
/usr/include/machine/_types.h /usr/include/i386/_types.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/string.h libstki.h ecotype.h apat.h \
/usr/include/inttypes.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
../libecoPCR/ecoPCR.h

View File

@ -103,7 +103,8 @@ int32_t ManberSub(pecoseq_t pseq,ppattern_t pat,
for (e = 0, pr = r + 3 ; e <= param->maxerr ; e++, pr += 2) for (e = 0, pr = r + 3 ; e <= param->maxerr ; e++, pr += 2)
*pr = cmask; *pr = cmask;
cmask = ~ param->omask; cmask = ~ param->omask; // A VOIR !!!!! omask (new) doit <20>tre compos<6F> de + et - ... Ancien omask : bits
/* init. scan */ /* init. scan */
data = (uint8_t*)(pseq->SQ); data = (uint8_t*)(pseq->SQ);

View File

@ -0,0 +1,17 @@
aproxpattern.o aproxpattern.P : aproxpattern.c ecoprimer.h /usr/include/inttypes.h \
/usr/include/sys/cdefs.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
debug.h /usr/include/math.h /usr/include/architecture/i386/math.h

View File

@ -39,8 +39,8 @@ ppattern_t buildPatternFromWord(word_t word, uint32_t patlen)
} }
#ifdef IS_UPPER #ifdef IS_UPPER(c)
#undef IS_UPPER #undef IS_UPPER(c)
#endif #endif
/* -------------------------------------------- */ /* -------------------------------------------- */
@ -97,8 +97,7 @@ pprimercount_t lookforAproxPrimer(pecodnadb_t database, uint32_t seqdbsize,uint3
params.circular = options->circular; params.circular = options->circular;
params.maxerr = options->error_max; params.maxerr = options->error_max;
// params.omask = (1 << options->strict_three_prime) -1; params.omask = (1 << options->strict_three_prime) -1;
params.omask = 0;
params.patlen = options->primer_length; params.patlen = options->primer_length;
positions.val=NULL; positions.val=NULL;

View File

@ -13,7 +13,6 @@
#include <stdio.h> #include <stdio.h>
#include "ecotype.h" #include "ecotype.h"
#include "../libecoPCR/ecoPCR.h" #include "../libecoPCR/ecoPCR.h"
#include "../libthermo/nnparams.h"
#include "apat.h" #include "apat.h"
#define DEBUG #define DEBUG
@ -48,7 +47,6 @@ typedef uint64_t word_t, *pword_t;
#define WORDMASK(s) ((1LLU << ((s) * 2)) -1) #define WORDMASK(s) ((1LLU << ((s) * 2)) -1)
#define LSHIFTWORD(x,s) (((x) << 2) & WORDMASK(s)) #define LSHIFTWORD(x,s) (((x) << 2) & WORDMASK(s))
#define RSHIFTWORD(x,s) (((x) & WORDMASK(s))>> 2) #define RSHIFTWORD(x,s) (((x) & WORDMASK(s))>> 2)
#define ERRORMASK(s) ((int32_t)((1LLU << (s)) -1))
#define RAPPENDBASE(x,s,c) (LSHIFTWORD((x),(s)) | (word_t)(c)) #define RAPPENDBASE(x,s,c) (LSHIFTWORD((x),(s)) | (word_t)(c))
#define LAPPENDBASE(x,s,c) (RSHIFTWORD((x),(s)) | ((word_t)((~(c)) & 3) << (((s)-1) *2))) #define LAPPENDBASE(x,s,c) (RSHIFTWORD((x),(s)) | ((word_t)((~(c)) & 3) << (((s)-1) *2)))
@ -67,27 +65,19 @@ typedef uint64_t word_t, *pword_t;
#define MINI(x,y) (((x) < (y)) ? (x):(y)) #define MINI(x,y) (((x) < (y)) ? (x):(y))
#define MAXI(x,y) (((x) < (y)) ? (y):(x)) #define MAXI(x,y) (((x) < (y)) ? (y):(x))
#define FWORDSIZE (13)
#define FWORDMASK WORDMASK(FWORDSIZE)
#define FILTERWORD(x) ((uint32_t)((x) & FWORDMASK))
#define CFILTERWORD(x,s) ((uint32_t)(((x) >> (((s)-FWORDSIZE)*2)) & FWORDMASK))
typedef struct { typedef struct {
pword_t words; pword_t words;
uint32_t *strictcount; uint32_t *strictcount;
uint32_t inseqcount; uint32_t inseqcount;
uint32_t outseqcount; uint32_t outseqcount;
uint64_t size; uint32_t size;
} wordcount_t, *pwordcount_t; } wordcount_t, *pwordcount_t;
typedef union { typedef union {
uint32_t *pointer; uint32_t *pointer;
uint32_t value; uint32_t value;
} poslist_t, *pposlist_t; } poslist_t, *ppostlist_t;
/** /**
* primer_t structure store fuzzy match positions for a primer * primer_t structure store fuzzy match positions for a primer
@ -97,10 +87,10 @@ typedef union {
typedef struct { typedef struct {
word_t word; //< code for the primer word_t word; //< code for the primer
uint32_t *directCount; //< Occurrence count on direct strand uint32_t *directCount; //< Occurrence count on direct strand
pposlist_t directPos; //< list of position list on direct strand ppostlist_t directPos; //< list of position list on direct strand
uint32_t *reverseCount; //< Occurrence count on reverse strand uint32_t *reverseCount; //< Occurrence count on reverse strand
pposlist_t reversePos; //< list of position list on reverse strand ppostlist_t reversePos; //< list of position list on reverse strand
bool_t good; //< primer match more than quorum example and no bool_t good; //< primer match more than quorum example and no
// more counterexample quorum. // more counterexample quorum.
@ -135,8 +125,6 @@ typedef struct {
bool_t strand; bool_t strand;
const char *amplifia; const char *amplifia;
int32_t length; int32_t length;
uint32_t begin;
uint32_t end;
} amplifia_t, *pamplifia_t; } amplifia_t, *pamplifia_t;
typedef struct { typedef struct {
@ -173,34 +161,22 @@ typedef struct {
uint32_t outtaxa; //< counterexample taxa count uint32_t outtaxa; //< counterexample taxa count
uint32_t notwellidentifiedtaxa; uint32_t notwellidentifiedtaxa;
int *wellIdentifiedSeqs; //< an array having elements equla to total seqs
// values are either 0 or 1, if seq is well identified
// its 1 else 0
int *coveredSeqs; //< an array having elements equal to total seqs, 1 if seq is covered else 0
// these statistics are relative to inexample sequences // these statistics are relative to inexample sequences
uint32_t mind; //< minimum distance between primers uint32_t mind; //< minimum distance between primers
uint32_t maxd; //< maximum distance between primers uint32_t maxd; //< maximum distance between primers
uint32_t sumd; //< distance sum uint32_t sumd; //< distance sum
uint32_t amplifiacount;
float yule; float yule;
float quorumin; float quorumin;
float quorumout; float quorumout;
float bs;
float bc;
int32_t refsequence;
// //
// uint32_t taxsetcount; // uint32_t taxsetcount;
// uint32_t taxsetindex; // uint32_t taxsetindex;
// ptaxampset_t taxset; // ptaxampset_t taxset;
// //
// uint32_t oktaxoncount; // uint32_t oktaxoncount;
uint32_t curseqid;
float p1temp; //strict primer1 melting temperature
float p1mintemp; //approx primer1 minimum melting temperature
float p2temp; //strict primer2 melting temperature
float p2mintemp; //approx primer2 minimum melting temperature
} pair_t, *ppair_t; } pair_t, *ppair_t;
/*TR: Added*/ /*TR: Added*/
@ -252,20 +228,13 @@ typedef struct {
}taxontoamp_t, *ptaxontoamp_t; }taxontoamp_t, *ptaxontoamp_t;
typedef struct { typedef struct {
bool_t printAC;
bool_t statistics;
bool_t filtering;
uint32_t lmin; //**< Amplifia minimal length uint32_t lmin; //**< Amplifia minimal length
uint32_t lmax; //**< Amplifia maximal length uint32_t lmax; //**< Amplifia maximal length
uint32_t error_max; //**< maximum error count in fuzzy search uint32_t error_max; //**< maximum error count in fuzzy search
uint32_t primer_length; //**< minimal length of the primers uint32_t primer_length; //**< minimal length of the primers
int32_t *restricted_taxid; //**< limit amplification below these taxid int32_t *restricted_taxid; //**< limit amplification below these taxid
int32_t *ignored_taxid; //**< no amplification below these taxid int32_t *ignored_taxid; //**< no amplification below these taxid
int32_t *exception_taxid;
char *prefix; char *prefix;
char *reference;
pecoseq_t refseq;
uint32_t refseqid;
uint32_t circular; uint32_t circular;
uint32_t doublestrand; uint32_t doublestrand;
float strict_quorum; float strict_quorum;
@ -275,7 +244,6 @@ typedef struct {
uint32_t strict_three_prime; uint32_t strict_three_prime;
int32_t r; //**< count of restrited taxa (restricted_taxid array size) int32_t r; //**< count of restrited taxa (restricted_taxid array size)
int32_t g; //**< count of ignored taxa (ignored_taxid array size) int32_t g; //**< count of ignored taxa (ignored_taxid array size)
int32_t e; //**< count of ignored taxa (ignored_taxid array size)
bool_t no_multi_match; bool_t no_multi_match;
char taxonrank[20]; //TR to count ranks against a pair char taxonrank[20]; //TR to count ranks against a pair
int32_t taxonrankidx; //TR to count ranks against a pair int32_t taxonrankidx; //TR to count ranks against a pair
@ -287,14 +255,6 @@ typedef struct {
int32_t outsamples; int32_t outsamples;
int32_t intaxa; int32_t intaxa;
int32_t outtaxa; int32_t outtaxa;
int saltmethod;
float salt;
PNNParams pnparm;
bool_t print_sets_of_primers;
float specificity_threshold;
int links_cnt;
float max_links_percent;
bool_t filter_on_links;
} options_t, *poptions_t; } options_t, *poptions_t;
typedef ecoseq_t **pecodnadb_t; typedef ecoseq_t **pecodnadb_t;
@ -302,15 +262,12 @@ typedef ecoseq_t **pecodnadb_t;
void sortword(pword_t table,uint32_t N); void sortword(pword_t table,uint32_t N);
pecodnadb_t readdnadb(const char *name, ecotaxonomy_t *taxonomy, uint32_t *size,poptions_t options); pecodnadb_t readdnadb(const char *name, uint32_t *size);
int isGoodTaxon(ecotaxonomy_t *taxonomy,int32_t taxon,poptions_t options); int isGoodTaxon(ecotaxonomy_t *taxonomy,int32_t taxon,poptions_t options);
int isExampleTaxon(ecotaxonomy_t *taxonomy,int32_t taxon,poptions_t options);
int isCounterExampleTaxon(ecotaxonomy_t *taxonomy,int32_t taxon,poptions_t options);
uint32_t ecoWordCount(uint32_t wordsize, uint32_t circular, ecoseq_t *seq); uint32_t ecoWordCount(uint32_t wordsize, uint32_t circular, ecoseq_t *seq);
pword_t ecoHashSequence(pword_t dest, uint32_t wordsize, uint32_t circular, uint32_t doublestrand, ecoseq_t *seq,uint32_t *size,int32_t *neededWords,uint32_t neededWordCount, pword_t ecoHashSequence(pword_t dest, uint32_t wordsize, uint32_t circular, uint32_t doublestrand, ecoseq_t *seq,uint32_t *size);
int32_t quorum);
uint32_t ecoCompactHashSequence(pword_t dest,uint32_t size); uint32_t ecoCompactHashSequence(pword_t dest,uint32_t size);
const char* ecoUnhashWord(word_t word,uint32_t size); const char* ecoUnhashWord(word_t word,uint32_t size);
word_t ecoComplementWord(word_t word,uint32_t size); word_t ecoComplementWord(word_t word,uint32_t size);
@ -318,8 +275,8 @@ uint32_t ecoFindWord(pwordcount_t table,word_t word);
void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,uint32_t seqQuorum); void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,uint32_t seqQuorum);
pwordcount_t initCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,uint32_t seqQuorum,ecoseq_t *seq,int32_t *neededWords,uint32_t neededWordCount); pwordcount_t initCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,ecoseq_t *seq);
void addSeqToWordCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,uint32_t exampleCount,uint32_t seqQuorum,ecoseq_t *seq,int32_t *neededWords,uint32_t neededWordCount); void addSeqToWordCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,uint32_t exampleCount,uint32_t seqQuorum,ecoseq_t *seq);
pqueue_t newQueue(pqueue_t queue, uint32_t size); pqueue_t newQueue(pqueue_t queue, uint32_t size);
pqueue_t resizeQueue(pqueue_t queue, uint32_t size); pqueue_t resizeQueue(pqueue_t queue, uint32_t size);
@ -354,13 +311,8 @@ int32_t getrankdbstats(pecodnadb_t seqdb,
uint32_t seqdbsize, uint32_t seqdbsize,
ecotaxonomy_t *taxonomy, ecotaxonomy_t *taxonomy,
poptions_t options); poptions_t options);
float taxonomycoverage(ppair_t pair, poptions_t options, pecodnadb_t seqdb,uint32_t seqdbsize); float taxonomycoverage(ppair_t pair, poptions_t options);
char ecoComplementChar(char base); char ecoComplementChar(char base);
void taxonomyspecificity (ppair_t pair, pecodnadb_t seqdb,uint32_t seqdbsize); void taxonomyspecificity (ppair_t pair);
int32_t *filteringSeq(pecodnadb_t database, uint32_t seqdbsize,
uint32_t exampleCount,poptions_t options,uint32_t *size,int32_t sequenceQuorum);
void printSeqTest(pecodnadb_t seqdb,uint32_t seqdbsize);
#endif /* EPSORT_H_ */ #endif /* EPSORT_H_ */

View File

@ -1,222 +0,0 @@
/*
* filtering.c
*
* Created on: 12 mai 2009
* Author: coissac
*/
#include "ecoprimer.h"
#include <string.h>
#include <math.h>
#include "hashencoder.h"
static int32_t *ecoFilteringHashSequence(int32_t *dest,
uint32_t circular,
uint32_t doublestrand,
ecoseq_t *seq,
uint32_t *size);
static int32_t *ecoFilteringHashSequence(int32_t *dest,
uint32_t circular,
uint32_t doublestrand,
ecoseq_t *seq,
uint32_t *size)
{
/*
* This function aims at building a vector of count for every possible hash codes
*
* The function must be applied once on each sequence
*
* The function allocates memory on the first call for the dest table
* The function also allocates memory for the static temporary table in_last_seq and
* the function must be called with *dest == -1 in order to free this temporary table
*
*/
static char *in_last_seq=NULL;
uint32_t i=0;
uint32_t j;
char *base;
int8_t code;
int32_t error=0;
word_t word=0;
word_t antiword=0;
uint32_t goodword;
uint32_t lmax=0;
// run on the first call;
if (dest==(void*)-1)
{
if (in_last_seq) ECOFREE(in_last_seq,"Free in last seq table");
return NULL;
}
/* FWORDSIZE = 13 => *size = 67 108 864
* FWORDSIZE = 14 => *size = 268 435 456
* FWORDSIZE = 15 => *size = 1 073 741 824
*/
*size = pow(4,FWORDSIZE);
/*
* in_last_seq is a vector of char as it is just to avoid counting twice (or more) a hash code (a DNA word)
* it is set to zero (with memset) and then filled with ones for each word belonging to the sequence
*/
if (!in_last_seq)
in_last_seq = ECOMALLOC(*size*sizeof(char),
"Cannot allocate filtering hash table");
memset(in_last_seq,0,*size*sizeof(char));
/*
* Allocate (on first call) the memory for the table of counts
*/
if (!dest)
{
dest = ECOMALLOC(*size*sizeof(int32_t),
"Cannot allocate filtering hash table");
memset(dest,0,*size*sizeof(int32_t));
}
lmax = seq->SQ_length;
if (!circular)
lmax-= FWORDSIZE-1;
// DEBUG_LOG("Sequence %s @ %d : %18.18s",seq->AC,i,(seq->SQ+i));
/*
* Compute first word of seq
*/
for (i=0, base = seq->SQ; i < FWORDSIZE && i < lmax; i++,base++)
{
error<<= 1;
error&=ERRORMASK(FWORDSIZE);
code = encoder[(*base) - 'A'];
if (code <0)
{
code = 0;
error|= 1;
}
word=RAPPENDBASE(word,FWORDSIZE,code);
if (doublestrand)
antiword=LAPPENDBASE(antiword,FWORDSIZE,code);
}
if (!error && i==FWORDSIZE)
{
goodword=(uint32_t)((doublestrand) ? MINI(word,antiword):word);
/*
* FB: I don't think the test is necessary as the table has just been initialized
*/
if (!in_last_seq[goodword])
{
in_last_seq[goodword]=1;
dest[goodword]++;
}
}
/*
* compute and store counts (avoid counting twice a word) for the other words of the seq
*/
for (j=1; j < lmax; j++,i++,base++)
{
// DEBUG_LOG("Sequence %s @ %d : %18.18s",seq->AC,j,(seq->SQ+j));
/* roll over the sequence for circular ones */
if (i==(uint32_t)seq->SQ_length) base=seq->SQ;
error<<= 1;
error&=ERRORMASK(FWORDSIZE);
//code = -1;
//if((*base) >= 'A' && (*base) <= 'Z')
code = encoder[(*base) - 'A'];
if (code <0)
{
code = 0;
error|= 1;
}
word=RAPPENDBASE(word,FWORDSIZE,code);
if (doublestrand)
antiword=LAPPENDBASE(antiword,FWORDSIZE,code);
if (!error)
{
if (doublestrand)
goodword=(uint32_t)MINI(word,antiword);
else
goodword=word;
if (!in_last_seq[goodword])
{
in_last_seq[goodword]=1;
dest[goodword]++;
}
}
}
return dest;
}
int32_t *filteringSeq(pecodnadb_t database, uint32_t seqdbsize,
uint32_t exampleCount,poptions_t options,uint32_t *size,int32_t sequenceQuorum)
{
int32_t *wordscount=NULL;
int32_t keep=0;
uint32_t i,j=0;
for (i=0;i<seqdbsize;i++)
{
if (database[i]->isexample && database[i]->SQ_length > options->primer_length)
{
j++;
wordscount=ecoFilteringHashSequence(wordscount,
options->circular,
options->doublestrand,
database[i],
size);
}
fprintf(stderr," Filtered sequences %5u/%5u \r",j,exampleCount);
}
fprintf(stderr,"\n");
for (i=0;i<*size;i++)
if (wordscount[i] >= sequenceQuorum)
keep++;
(void)ecoFilteringHashSequence((int32_t*)-1,
options->circular,
options->doublestrand,
NULL,
NULL);
fprintf(stderr,"ok\n Considered word of size %d for filtering : %d\n",FWORDSIZE,keep);
return wordscount;
}

View File

@ -0,0 +1,17 @@
goodtaxon.o goodtaxon.P : goodtaxon.c ecoprimer.h /usr/include/inttypes.h \
/usr/include/sys/cdefs.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
debug.h

View File

@ -12,53 +12,16 @@ int isGoodTaxon(ecotaxonomy_t *taxonomy,int32_t taxon,poptions_t options)
{ {
int result; int result;
result=((options->r == 0) || (eco_is_taxid_included(taxonomy,
options->restricted_taxid,
options->r,
taxonomy->taxons->taxon[taxon].taxid)
)) &&
((options->e == 0) || !(eco_is_taxid_included(taxonomy,
options->exception_taxid,
options->e,
taxonomy->taxons->taxon[taxon].taxid)
));
return result;
}
int isExampleTaxon(ecotaxonomy_t *taxonomy,int32_t taxon,poptions_t options)
{
int result;
result=( (options->r == 0) || (eco_is_taxid_included(taxonomy, result=( (options->r == 0) || (eco_is_taxid_included(taxonomy,
options->restricted_taxid, options->restricted_taxid,
options->r, options->r,
taxonomy->taxons->taxon[taxon].taxid) taxonomy->taxons->taxon[taxon].taxid)
)) && )) &&
((options->e == 0) || !(eco_is_taxid_included(taxonomy, ((options->g == 0) || !(eco_is_taxid_included(taxonomy,
options->exception_taxid, options->ignored_taxid,
options->e, options->g,
taxonomy->taxons->taxon[taxon].taxid) taxonomy->taxons->taxon[taxon].taxid)
)); ));
return result; return result;
} }
int isCounterExampleTaxon(ecotaxonomy_t *taxonomy,int32_t taxon,poptions_t options)
{
int result;
result=((options->g != 0) && (eco_is_taxid_included(taxonomy,
options->ignored_taxid,
options->g,
taxonomy->taxons->taxon[taxon].taxid))
) || ((options->e != 0) && (eco_is_taxid_included(taxonomy,
options->exception_taxid,
options->e,
taxonomy->taxons->taxon[taxon].taxid))
);
return result;
}

View File

@ -1,21 +0,0 @@
/*
* hashencoder.h
*
* Created on: 12 mai 2009
* Author: coissac
*/
#ifndef HASHENCODER_H_
#define HASHENCODER_H_
static int8_t encoder[] = {0, // A
-1, // b
1, // C
-1,-1,-1, // d, e, f
2, // G
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // h,i,j,k,l,m,n,o,p,q,r,s
3,3, // T,U
-1,-1,-1,-1,-1}; // v,w,x,y,z
#endif /* HASHENCODER_H_ */

View File

@ -0,0 +1,17 @@
hashsequence.o hashsequence.P : hashsequence.c ecoprimer.h /usr/include/inttypes.h \
/usr/include/sys/cdefs.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
debug.h

View File

@ -10,7 +10,15 @@
static int cmpword(const void *x,const void *y); static int cmpword(const void *x,const void *y);
#include "hashencoder.h" static int8_t encoder[] = {0, // A
-1, // b
1, // C
-1,-1,-1, // d, e, f
2, // G
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // h,i,j,k,l,m,n,o,p,q,r,s
3,3, // T,U
-1,-1,-1,-1,-1}; // v,w,x,y,z
uint32_t ecoWordCount(uint32_t wordsize, uint32_t circular, ecoseq_t *seq) uint32_t ecoWordCount(uint32_t wordsize, uint32_t circular, ecoseq_t *seq)
{ {
@ -23,29 +31,8 @@ uint32_t ecoWordCount(uint32_t wordsize, uint32_t circular, ecoseq_t *seq)
return wordcount; return wordcount;
} }
pword_t ecoHashSequence(pword_t dest, pword_t ecoHashSequence(pword_t dest, uint32_t wordsize, uint32_t circular, uint32_t doublestrand, ecoseq_t *seq,uint32_t *size)
uint32_t wordsize,
uint32_t circular,
uint32_t doublestrand,
ecoseq_t *seq,
uint32_t *size,
int32_t *neededWords,
uint32_t neededWordCount,
int32_t quorum)
{ {
/*
* dest / out : words of the hashed sequence position per position
* wordsize / in : size of the word to be hashed (record error for that size) BUT not equal to FWORDSIZE ...
* ... the size of the word REALLY returned as a result
* circular / in : is the sequence circular
* doublestrand / in : if we have to hash on both strands of the sequence
* seq / in : the sequence in ecoseq format
* size / out : number of hashed words (size of the dest vector)
* neededWordCount / in : table hash codes of word counts in the full DB (used to filter words)
* quorum / in : minimum quorum used to filter words based on the neededWordCount table
*/
uint32_t i=0; uint32_t i=0;
uint32_t j; uint32_t j;
char *base; char *base;
@ -53,7 +40,6 @@ pword_t ecoHashSequence(pword_t dest,
int32_t error=0; int32_t error=0;
word_t word=0; word_t word=0;
word_t antiword=0; word_t antiword=0;
word_t goodword;
uint32_t lmax=0; uint32_t lmax=0;
(*size)=0; (*size)=0;
@ -67,13 +53,11 @@ pword_t ecoHashSequence(pword_t dest,
"I cannot allocate memory for sequence hashing" "I cannot allocate memory for sequence hashing"
); );
//DEBUG_LOG("Sequence %s @ %d : %18.18s",seq->AC,i,(seq->SQ+i)); // DEBUG_LOG("Sequence %s @ %d : %18.18s",seq->AC,i,(seq->SQ+i));
for (i=0, base = seq->SQ; i < wordsize && i < lmax; i++,base++) for (i=0, base = seq->SQ; i < wordsize && i < lmax; i++,base++)
{ {
error<<= 1; error<<= 1;
error&=ERRORMASK(wordsize);
code = encoder[(*base) - 'A']; code = encoder[(*base) - 'A'];
if (code <0) if (code <0)
@ -84,22 +68,10 @@ pword_t ecoHashSequence(pword_t dest,
word=RAPPENDBASE(word,wordsize,code); word=RAPPENDBASE(word,wordsize,code);
if (doublestrand) if (doublestrand)
antiword=LAPPENDBASE(antiword,wordsize,code); antiword=LAPPENDBASE(antiword,wordsize,code);
if (neededWordCount && i>=(FWORDSIZE-1))
{
goodword = (doublestrand) ? MINI(FILTERWORD(word),CFILTERWORD(antiword,wordsize)):FILTERWORD(word);
if (neededWords[(uint32_t)goodword]<quorum)
error|= (1 << (FWORDSIZE-1));
}
} }
if (!error && i==wordsize) if (!error && i==wordsize)
{ {
dest[*size]=(doublestrand) ? MINI(word,antiword):word; dest[*size]=(doublestrand) ? MINI(word,antiword):word;
@ -110,14 +82,12 @@ pword_t ecoHashSequence(pword_t dest,
for (j=1; j < lmax; j++,i++,base++) for (j=1; j < lmax; j++,i++,base++)
{ {
//DEBUG_LOG("Sequence %s @ %d : %18.18s",seq->AC,j,(seq->SQ+j)); // DEBUG_LOG("Sequence %s @ %d : %18.18s",seq->AC,j,(seq->SQ+j));
/* roll over the sequence for circular ones */ /* roll over the sequence for circular ones */
if (i==(uint32_t)seq->SQ_length) base=seq->SQ; if (i==(uint32_t)seq->SQ_length) base=seq->SQ;
error<<= 1; error<<= 1;
error&=ERRORMASK(wordsize);
code = encoder[(*base) - 'A']; code = encoder[(*base) - 'A'];
if (code <0) if (code <0)
@ -130,17 +100,6 @@ pword_t ecoHashSequence(pword_t dest,
if (doublestrand) if (doublestrand)
antiword=LAPPENDBASE(antiword,wordsize,code); antiword=LAPPENDBASE(antiword,wordsize,code);
if (neededWordCount)
{
goodword = (doublestrand) ? MINI(FILTERWORD(word),CFILTERWORD(antiword,wordsize)):FILTERWORD(word);
if (neededWords[(uint32_t)goodword]<quorum)
error|= (1 << (FWORDSIZE-1));
// else
// DEBUG_LOG("%s goodword = %p %d/%d (pos:%d error:%d)",seq->AC,goodword,neededWords[(uint32_t)goodword],quorum,i,error);
}
if (!error) if (!error)
{ {
dest[*size]=(doublestrand) ? MINI(word,antiword):word; dest[*size]=(doublestrand) ? MINI(word,antiword):word;
@ -148,34 +107,24 @@ pword_t ecoHashSequence(pword_t dest,
} }
} }
//DEBUG_LOG("%s goodword = %d",seq->AC,*size);
return dest; return dest;
} }
uint32_t ecoCompactHashSequence(pword_t table,uint32_t size) uint32_t ecoCompactHashSequence(pword_t table,uint32_t size)
{ {
/*
*
* MULTIWORD is a word occurring more than once in a sequence
*
*/
uint32_t i,j; uint32_t i,j;
word_t current; word_t current;
// bool_t here=FALSE;
sortword(table,size); sortword(table,size);
current = 0; current = 0;
current=SETMULTIWORD(current); /* build impossible word for the first loop cycle */ current=SETMULTIWORD(current); /* build impossible word for the first loop cycle */
// if (strcmp(ecoUnhashWord(table[size-1],18),"GTTTGTTCAACGATTAAA")==0)
// here=TRUE;
for (i=0,j=0; j < size;j++) for (i=0,j=0; j < size;j++)
{ {
if (WORD(table[j])!=current) if (table[j]!=current)
{ {
current =table[j]; current =table[j];
table[i]=current; table[i]=current;
@ -185,9 +134,6 @@ uint32_t ecoCompactHashSequence(pword_t table,uint32_t size)
table[i]=SETMULTIWORD(table[i]); table[i]=SETMULTIWORD(table[i]);
} }
// if (strcmp(ecoUnhashWord(WORD(table[i-1]),18),"TACGACCTCGATGTTGGA")==0)
// DEBUG_LOG("winner %d",i)
return i; return i;
} }

View File

@ -0,0 +1,17 @@
libstki.o libstki.P : libstki.c /usr/include/stdio.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/sys/cdefs.h \
/usr/include/machine/_types.h /usr/include/i386/_types.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/string.h libstki.h ecotype.h ecoprimer.h \
/usr/include/inttypes.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
../libecoPCR/ecoPCR.h apat.h debug.h

17
src/libecoprimer/merge.P Normal file
View File

@ -0,0 +1,17 @@
merge.o merge.P : merge.c ecoprimer.h /usr/include/inttypes.h \
/usr/include/sys/cdefs.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
debug.h

View File

@ -10,7 +10,7 @@
static pmerge_t mergeInit(pmerge_t merge,pwordcount_t data,uint32_t s1,uint32_t s2); static pmerge_t mergeInit(pmerge_t merge,pwordcount_t data,uint32_t s1,uint32_t s2);
static pmerge_t mergeInit(pmerge_t merge, pwordcount_t data, uint32_t s1, uint32_t s2) static pmerge_t mergeInit(pmerge_t merge, pwordcount_t data,uint32_t s1,uint32_t s2)
{ {
merge->words = data->words; merge->words = data->words;
merge->count = data->strictcount; merge->count = data->strictcount;
@ -26,15 +26,6 @@ typedef enum {S1=1,S2=2,STACK=3} source_t;
void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,uint32_t seqQuorum) void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,uint32_t seqQuorum)
{ {
/*
* data / in out : the table that contains the two parts to be merged
* s1 / in : end of the first part of the table
* s2 / in : end of the second part of the table
* remainingSeq / in : the number of remaining seqs to be added to the table
* seqQuorum / in : the minimum number of sequences in which a pattern must appear
*/
merge_t merged; merge_t merged;
source_t source; source_t source;
word_t currentword,tmpword; word_t currentword,tmpword;
@ -47,31 +38,11 @@ void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,ui
// DEBUG_LOG("Coucou %p s1= %d s2= %d",data,s1,s2) // DEBUG_LOG("Coucou %p s1= %d s2= %d",data,s1,s2)
/*
* init the merged structure (used only for coding convenience, never returned, allocated on the C-stack)
* note that :
* merged.words : hashcodes (initialized to data->words)
* merged.count : counts of each word (initialized to data->strictcount)
* merged.read1 : index of the first word of the first subtable (initialized to 0)
* merged.read1 : index of the first word of the first subtable (initialized to 0)
* merged.read2 : index of the first word of the second subtable (initialized to s1)
* merged.size : total size of the table (initialized to s1+s2)
*
* allocate a new stack of size min(s1, s2)
*/
(void)mergeInit(&merged,data,s1,s2); (void)mergeInit(&merged,data,s1,s2);
(void)newQueue(&queue,MINI(s1,s2)); (void)newQueue(&queue,MINI(s1,s2));
while (merged.read1 < s1 && merged.read2 < merged.size)
/* true until
* merged.read1 == s1 AND merged.read2 == merged.size, i.e. ALL words have been processed
*/
while (merged.read1 < s1 || merged.read2 < merged.size)
{ {
/*
* initialize current{word,count} from either STACK (if not empty) or first table (S1)
*/
if (! queue.empty) if (! queue.empty)
{ {
currentword = queue.words[queue.pop]; currentword = queue.words[queue.pop];
@ -85,34 +56,18 @@ void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,ui
source=S1; source=S1;
} }
/* if (WORD(currentword) > WORD(merged.words[merged.read2]))
* IF there are some words in the second subtable remaining to be processed AND
* its first word is lower than current word
* THEN initialize current{word,count} from the second table (S2)
*
*/
if (merged.read2 < merged.size &&
WORD(currentword) > WORD(merged.words[merged.read2]))
{ {
currentword = merged.words[merged.read2]; currentword = merged.words[merged.read2];
currentcount = merged.count[merged.read2]; currentcount = merged.count[merged.read2];
source = S2; source = S2;
} }
/*
* record if the two words in the both subtable are the same
*/
same = (source != S2) && (WORD(currentword) == WORD(merged.words[merged.read2])); same = (source != S2) && (WORD(currentword) == WORD(merged.words[merged.read2]));
nsame+=same; nsame+=same;
// DEBUG_LOG("Merging : r1 = %d s1 = %d r2 = %d size = %d word = %s source=%u same=%u",merged.read1,s1,merged.read2-s1,merged.size,ecoUnhashWord(currentword,18),source,same) // DEBUG_LOG("Merging : r1 = %d s1 = %d r2 = %d size = %d word = %s source=%u same=%u",merged.read1,s1,merged.read2-s1,merged.size,ecoUnhashWord(currentword,18),source,same)
/*
* merge step (AND apply the quorum property)
* update merged.read1 AND/OR merged.read2
* record the word and its count in the table
*/
tmpword = merged.words[merged.write]; tmpword = merged.words[merged.write];
tmpcount= merged.count[merged.write]; tmpcount= merged.count[merged.write];
@ -158,15 +113,7 @@ void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,ui
// DEBUG_LOG("r1 : %d r2 : %d qsize : %d nsame : %d tot : %d write : %s count : %d source : %d size : %d pop : %d push : %d empty : %d",merged.read1,merged.read2-s1,qsize,nsame,qsize+nsame,ecoUnhashWord(currentword,18),currentcount,source,queue.size,queue.pop,queue.push,queue.empty) // DEBUG_LOG("r1 : %d r2 : %d qsize : %d nsame : %d tot : %d write : %s count : %d source : %d size : %d pop : %d push : %d empty : %d",merged.read1,merged.read2-s1,qsize,nsame,qsize+nsame,ecoUnhashWord(currentword,18),currentcount,source,queue.size,queue.pop,queue.push,queue.empty)
/*
* finish the merging with words not processed (AND apply the quorum property)
* they are stored in the second subtable (IF)
* OR in the queue (ELSE)
*/
if (merged.read2 < merged.size) if (merged.read2 < merged.size)
{
//DEBUG_LOG("end1 %d %d/%d %d/%d",merged.write,merged.read1,s1,merged.read2,merged.size);
for (;merged.read2 < merged.size;merged.read2++) for (;merged.read2 < merged.size;merged.read2++)
{ {
merged.words[merged.write]=merged.words[merged.read2]; merged.words[merged.write]=merged.words[merged.read2];
@ -175,10 +122,7 @@ void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,ui
merged.write++; merged.write++;
} }
} else while (! queue.empty)
else {
//DEBUG_LOG("end2 %d %d/%d %d/%d",merged.write,merged.read1,s1,merged.read2,merged.size);
while (! queue.empty)
{ {
// DEBUG_LOG("write : %s count : %d write : %d size : %d pop : %d push : %d empty : %d",ecoUnhashWord(queue.words[queue.pop],18),queue.count[queue.pop],merged.write,queue.size,queue.pop,queue.push,queue.empty) // DEBUG_LOG("write : %s count : %d write : %d size : %d pop : %d push : %d empty : %d",ecoUnhashWord(queue.words[queue.pop],18),queue.count[queue.pop],merged.write,queue.size,queue.pop,queue.push,queue.empty)
merged.words[merged.write]=queue.words[queue.pop]; merged.words[merged.write]=queue.words[queue.pop];
@ -187,7 +131,6 @@ void ecomerge(pwordcount_t data,uint32_t s1,uint32_t s2,uint32_t remainingSeq,ui
if (remainingSeq + merged.count[merged.write] >= seqQuorum) if (remainingSeq + merged.count[merged.write] >= seqQuorum)
merged.write++; merged.write++;
} }
}
data->size = merged.write; data->size = merged.write;

17
src/libecoprimer/pairs.P Normal file
View File

@ -0,0 +1,17 @@
pairs.o pairs.P : pairs.c ecoprimer.h /usr/include/inttypes.h \
/usr/include/sys/cdefs.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
debug.h /usr/include/string.h

View File

@ -8,7 +8,6 @@
#include "ecoprimer.h" #include "ecoprimer.h"
#include <string.h> #include <string.h>
#include <stdlib.h> #include <stdlib.h>
#include "../libthermo/thermostats.h"
static void buildPrimerPairsForOneSeq(uint32_t seqid, static void buildPrimerPairsForOneSeq(uint32_t seqid,
pecodnadb_t seqdb, pecodnadb_t seqdb,
@ -164,7 +163,9 @@ ppairtree_t buildPrimerPairs(pecodnadb_t seqdb,uint32_t seqdbsize,pprimercount_t
{ {
buildPrimerPairsForOneSeq(i, seqdb, primers, primerpairs, options); buildPrimerPairsForOneSeq(i, seqdb, primers, primerpairs, options);
} }
return primerpairs; return primerpairs;
} }
#define DMAX (2000000000) #define DMAX (2000000000)
@ -179,20 +180,13 @@ static void buildPrimerPairsForOneSeq(uint32_t seqid,
uint32_t i,j,k; uint32_t i,j,k;
uint32_t matchcount=0; uint32_t matchcount=0;
pprimermatch_t matches = NULL; pprimermatch_t matches = NULL;
//primermatchcount_t seqmatchcount; primermatchcount_t seqmatchcount;
ppair_t pcurrent; ppair_t pcurrent;
pair_t current; pair_t current;
pprimer_t wswp; pprimer_t wswp;
bool_t bswp; bool_t bswp;
size_t distance; size_t distance;
bool_t strand; bool_t strand;
//char prmr[50];
//float mtemp;
word_t w1, w1a, omask = (0x1L << (options->strict_three_prime*2)) -1;
word_t w2, w2a;//, wtmp;
uint32_t bp1,bp2;
//prmr[options->primer_length] = '\0';
for (i=0;i < primers->size; i++) for (i=0;i < primers->size; i++)
{ {
@ -211,8 +205,8 @@ static void buildPrimerPairsForOneSeq(uint32_t seqid,
{ {
if (primers->primers[i].directCount[seqid]==1) if (primers->primers[i].directCount[seqid]==1)
{ {
matches[j].primer = primers->primers+i; matches[j].primer = primers->primers+i;
matches[j].strand=TRUE; matches[j].strand=TRUE;
matches[j].position=primers->primers[i].directPos[seqid].value; matches[j].position=primers->primers[i].directPos[seqid].value;
j++; j++;
} }
@ -228,8 +222,8 @@ static void buildPrimerPairsForOneSeq(uint32_t seqid,
{ {
if (primers->primers[i].reverseCount[seqid]==1) if (primers->primers[i].reverseCount[seqid]==1)
{ {
matches[j].primer = primers->primers+i; matches[j].primer = primers->primers+i;
matches[j].strand=FALSE; matches[j].strand=FALSE;
matches[j].position=primers->primers[i].reversePos[seqid].value; matches[j].position=primers->primers[i].reversePos[seqid].value;
j++; j++;
} }
@ -252,17 +246,11 @@ static void buildPrimerPairsForOneSeq(uint32_t seqid,
{ {
// For all primers matching the sequence // For all primers matching the sequence
/*for(j=i+1; for(j=i+1;
(j<matchcount) (j<matchcount)
&& ((distance=matches[j].position - matches[i].position - options->primer_length) < options->lmax); && ((distance=matches[j].position - matches[i].position - options->primer_length) < options->lmax);
j++ j++
)//*/ )
for (j=i+1; j<matchcount; j++)
{
if (matches[j].position - matches[i].position <= options->primer_length) continue;
distance = matches[j].position - matches[i].position - options->primer_length;
if (distance >= options->lmax) break;
// For all not too far primers // For all not too far primers
@ -270,7 +258,9 @@ static void buildPrimerPairsForOneSeq(uint32_t seqid,
&& (distance > options->lmin) && (distance > options->lmin)
) )
{ {
// If possible primer pair // If possible primer pair
current.p1 = matches[i].primer; current.p1 = matches[i].primer;
current.asdirect1=matches[i].strand; current.asdirect1=matches[i].strand;
current.p2 = matches[j].primer; current.p2 = matches[j].primer;
@ -278,17 +268,12 @@ static void buildPrimerPairsForOneSeq(uint32_t seqid,
current.maxd=DMAX; current.maxd=DMAX;
current.mind=DMAX; current.mind=DMAX;
current.sumd=0; current.sumd=0;
current.amplifiacount=0;
current.inexample=0; current.inexample=0;
current.outexample=0; current.outexample=0;
current.curseqid = 0;
current.refsequence=-1;
//current.p1temp = 100;
//current.p1mintemp = 100;
//current.p2temp = 100;
//current.p2mintemp = 100;
// Standardize the pair // Standardize the pair
strand = current.p2->word > current.p1->word; strand = current.p2->word > current.p1->word;
if (!strand) if (!strand)
{ {
@ -302,42 +287,6 @@ static void buildPrimerPairsForOneSeq(uint32_t seqid,
} }
//Code to make sure that if -3 option is given then
//3' end must match upto given number of base pairs
if (options->strict_three_prime > 0)
{
w1 = current.p1->word;
w2 = current.p2->word;
if (!current.asdirect1) //make sure that word is from 5' to 3'
w1=ecoComplementWord(w1,options->primer_length);
if (!current.asdirect2) //make sure that word is from 5' to 3'
w2=ecoComplementWord(w2,options->primer_length);
//now both w1 and w2 are from 5' to 3' end
bp1 = matches[i].position;
bp2 = matches[j].position;
if (!strand)
{
bp1 = matches[j].position;
bp2 = matches[i].position;
}
//get word of first approximate repeat
w1a = extractSite(seqdb[seqid]->SQ,bp1,options->primer_length,strand);
//get word of second approximate repeat
w2a = extractSite(seqdb[seqid]->SQ,bp2,options->primer_length,!strand);
w1 = w1 & omask; //keep only strict_three_prime bases on the right (3') end
w2 = w2 & omask; //keep only strict_three_prime bases on the right (3') end
w1a = w1a & omask; //keep only strict_three_prime bases on the right (3') end
w2a = w2a & omask; //keep only strict_three_prime bases on the right (3') end
//now check that both words and primers of amplifia have same bases on 3' end
if ((w1 ^ w1a) != 0) continue;
if ((w2 ^ w2a) != 0) continue;
}
// Look for the new pair in already seen pairs // Look for the new pair in already seen pairs
pcurrent = insertpair(current,pairs); pcurrent = insertpair(current,pairs);
@ -346,9 +295,8 @@ static void buildPrimerPairsForOneSeq(uint32_t seqid,
if (seqdb[seqid]->isexample) if (seqdb[seqid]->isexample)
{ {
//pcurrent->inexample++; pcurrent->inexample++;
pcurrent->sumd+=distance; pcurrent->sumd+=distance;
pcurrent->amplifiacount++;
if ((pcurrent->maxd==DMAX) || (distance > pcurrent->maxd)) if ((pcurrent->maxd==DMAX) || (distance > pcurrent->maxd))
pcurrent->maxd = distance; pcurrent->maxd = distance;
@ -356,33 +304,11 @@ static void buildPrimerPairsForOneSeq(uint32_t seqid,
if (distance < pcurrent->mind) if (distance < pcurrent->mind)
pcurrent->mind = distance; pcurrent->mind = distance;
} }
//else else
// pcurrent->outexample++; pcurrent->outexample++;
//for each pair we save current sequence id in the pair if ((pcurrent->outexample+pcurrent->inexample)==1)
//when we see this pair for the first time in currnet sequence
//because we want to increment inexample & outexample count
//only once for one sequence
if (pcurrent->curseqid != (seqid+1))
{
if (seqdb[seqid]->isexample)
pcurrent->inexample++;
else
pcurrent->outexample++;
if (pcurrent->curseqid != 0)
pcurrent->curseqid = seqid+1;
}
/*if ((pcurrent->outexample+pcurrent->inexample)==0)
{
fprintf(stderr,"pcurrent->outexample+pcurrent->inexample=0!\n");
exit(0);
}*/
if (pcurrent->curseqid == 0)//((pcurrent->outexample+pcurrent->inexample)==1)
{ {
pcurrent->curseqid = seqid+1;
paircount++; paircount++;
pcurrent->pcr.ampslot=200; pcurrent->pcr.ampslot=200;
pcurrent->pcr.ampcount=0; pcurrent->pcr.ampcount=0;
@ -400,36 +326,15 @@ static void buildPrimerPairsForOneSeq(uint32_t seqid,
} }
} }
if (seqid==options->refseqid)
pcurrent->refsequence=seqid;
pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].length=distance; pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].length=distance;
pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].sequence=seqdb[seqid]; pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].sequence=seqdb[seqid];
pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].strand=strand; pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].strand=strand;
pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].begin=matches[i].position + options->primer_length;
pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].end= matches[j].position - 1;
if (strand) if (strand)
pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].amplifia= seqdb[seqid]->SQ + matches[i].position + options->primer_length; pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].amplifia= seqdb[seqid]->SQ + matches[i].position + options->primer_length;
else else
pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].amplifia= seqdb[seqid]->SQ + matches[j].position - 1 ; pcurrent->pcr.amplifias[pcurrent->pcr.ampcount].amplifia= seqdb[seqid]->SQ + matches[j].position - 1 ;
/*strncpy (prmr, seqdb[seqid]->SQ + matches[i].position, options->primer_length);
mtemp = nparam_CalcSelfTM (options->pnparm, prmr, options->primer_length) - 273.0;
if (mtemp < pcurrent->p1mintemp)
pcurrent->p1mintemp = mtemp;
//fprintf (stderr, "prmr1: %s\n", seqdb[seqid]->SQ);
strncpy (prmr, seqdb[seqid]->SQ + matches[j].position, options->primer_length);
mtemp = nparam_CalcSelfTM (options->pnparm, prmr, options->primer_length) - 273.0;
if (mtemp < pcurrent->p2mintemp)
pcurrent->p2mintemp = mtemp;
//fprintf (stderr, "prmr2: %s\n", prmr);
if (pcurrent->p1temp == 100)
pcurrent->p1temp = nparam_CalcSelfTM (options->pnparm, ecoUnhashWord(pcurrent->p1->word, options->primer_length), 0) - 273.0;
if (pcurrent->p2temp == 100)
pcurrent->p2temp = nparam_CalcSelfTM (options->pnparm, ecoUnhashWord(pcurrent->p2->word, options->primer_length), 0) - 273.0;
*/
pcurrent->pcr.ampcount++; pcurrent->pcr.ampcount++;
// fprintf(stderr,"%c%c W1 : %s direct : %c", // fprintf(stderr,"%c%c W1 : %s direct : %c",
// "bG"[(int)pcurrent->p1->good], // "bG"[(int)pcurrent->p1->good],
@ -452,9 +357,9 @@ static void buildPrimerPairsForOneSeq(uint32_t seqid,
// //
} }
}
} }
} }
pairs->count=paircount; pairs->count=paircount;
} }

View File

@ -0,0 +1,17 @@
pairtree.o pairtree.P : pairtree.c ecoprimer.h /usr/include/inttypes.h \
/usr/include/sys/cdefs.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
debug.h /usr/include/search.h

17
src/libecoprimer/queue.P Normal file
View File

@ -0,0 +1,17 @@
queue.o queue.P : queue.c ecoprimer.h /usr/include/inttypes.h \
/usr/include/sys/cdefs.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
debug.h

View File

@ -0,0 +1,17 @@
readdnadb.o readdnadb.P : readdnadb.c ecoprimer.h /usr/include/inttypes.h \
/usr/include/sys/cdefs.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
debug.h

View File

@ -7,7 +7,7 @@
#include "ecoprimer.h" #include "ecoprimer.h"
pecodnadb_t readdnadb(const char *name, ecotaxonomy_t *taxonomy, uint32_t *size,poptions_t options) pecodnadb_t readdnadb(const char *name, uint32_t *size)
{ {
ecoseq_t *seq; ecoseq_t *seq;
uint32_t buffsize=100; uint32_t buffsize=100;
@ -18,42 +18,18 @@ pecodnadb_t readdnadb(const char *name, ecotaxonomy_t *taxonomy, uint32_t *size,
for(seq=ecoseq_iterator(name), *size=0; for(seq=ecoseq_iterator(name), *size=0;
seq; seq;
seq=ecoseq_iterator(NULL) seq=ecoseq_iterator(NULL), (*size)++
) )
{ {
if (isExampleTaxon(taxonomy,seq->taxid,options) || if (*size==buffsize)
isCounterExampleTaxon(taxonomy,seq->taxid,options)) {
{ buffsize*=2;
if (*size==buffsize) db = ECOREALLOC(db,buffsize*sizeof(ecoseq_t*),"I cannot allocate db memory");
{ }
buffsize*=2; db[*size]=seq;
db = ECOREALLOC(db,buffsize*sizeof(ecoseq_t*),"I cannot allocate db memory");
}
db[*size]=seq;
(*size)++;
}
else
{
delete_ecoseq(seq);
}
}; };
db = ECOREALLOC(db,(*size)*sizeof(ecoseq_t*),"I cannot allocate db memory"); db = ECOREALLOC(db,(*size)*sizeof(ecoseq_t*),"I cannot allocate db memory");
return db; return db;
} }
void printSeqTest(pecodnadb_t seqdb,uint32_t seqdbsize)
{
uint32_t i;
char ch[11];
ch [10] = '\0';
for (i=0; i < seqdbsize; i++)
{
strncpy (ch, seqdb[i]->SQ, 10);
fprintf (stderr, "seq %d = %s\n", i, ch);
}
exit (0);
}

View File

@ -0,0 +1,10 @@
smothsort.o smothsort.P : smothsort.c /usr/include/assert.h /usr/include/sys/cdefs.h \
/usr/include/stdio.h /usr/include/_types.h /usr/include/sys/_types.h \
/usr/include/machine/_types.h /usr/include/i386/_types.h \
/usr/include/sys/types.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/machine/endian.h /usr/include/i386/endian.h \
/usr/include/sys/_endian.h /usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/sys/_structs.h \
/usr/include/inttypes.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h

View File

@ -35,7 +35,6 @@
* @author Pekka Pessi <Pekka.Pessi@nokia.com> * @author Pekka Pessi <Pekka.Pessi@nokia.com>
*/ */
#include <stdlib.h> /* FB for NULL */
#include <assert.h> #include <assert.h>
#include <stdio.h> #include <stdio.h>

View File

@ -0,0 +1,17 @@
sortmatch.o sortmatch.P : sortmatch.c ecoprimer.h /usr/include/inttypes.h \
/usr/include/sys/cdefs.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
debug.h /usr/include/math.h /usr/include/architecture/i386/math.h

View File

@ -0,0 +1,17 @@
sortword.o sortword.P : sortword.c ecoprimer.h /usr/include/inttypes.h \
/usr/include/sys/cdefs.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
debug.h /usr/include/math.h /usr/include/architecture/i386/math.h

View File

@ -0,0 +1,18 @@
strictprimers.o strictprimers.P : strictprimers.c ecoprimer.h /usr/include/inttypes.h \
/usr/include/sys/cdefs.h /usr/include/_types.h \
/usr/include/sys/_types.h /usr/include/machine/_types.h \
/usr/include/i386/_types.h \
/usr/lib/gcc/i686-apple-darwin9/4.0.1/include/stdint.h \
/usr/include/stdlib.h /usr/include/available.h /usr/include/sys/wait.h \
/usr/include/sys/signal.h /usr/include/sys/appleapiopts.h \
/usr/include/machine/signal.h /usr/include/i386/signal.h \
/usr/include/i386/_structs.h /usr/include/sys/_structs.h \
/usr/include/machine/_structs.h /usr/include/mach/i386/_structs.h \
/usr/include/sys/resource.h /usr/include/machine/endian.h \
/usr/include/i386/endian.h /usr/include/sys/_endian.h \
/usr/include/libkern/_OSByteOrder.h \
/usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \
/usr/include/machine/types.h /usr/include/i386/types.h \
/usr/include/stdio.h ecotype.h ../libecoPCR/ecoPCR.h apat.h libstki.h \
debug.h /usr/include/string.h /usr/include/math.h \
/usr/include/architecture/i386/math.h

View File

@ -5,50 +5,11 @@
* Author: coissac * Author: coissac
*/ */
#define _GNU_SOURCE
#include "ecoprimer.h" #include "ecoprimer.h"
#include <string.h> #include <string.h>
#include <math.h> #include <math.h>
#include <sys/resource.h>
#include <unistd.h>
#include <stdio.h>
#ifndef RUSAGE_SELF pwordcount_t initCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,ecoseq_t *seq)
#define RUSAGE_SELF 0
#define RUSAGE_CHILDREN -1
#endif
static double timeval_subtract (struct timeval *x, struct timeval *y);
/* Subtract the `struct timeval' values X and Y,
Return elapsed secondes as a double. */
double timeval_subtract (struct timeval *x, struct timeval *y)
{
struct timeval result;
/* Perform the carry for the later subtraction by updating y. */
if (x->tv_usec < y->tv_usec) {
int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1;
y->tv_usec -= 1000000 * nsec;
y->tv_sec += nsec;
}
if (x->tv_usec - y->tv_usec > 1000000) {
int nsec = (x->tv_usec - y->tv_usec) / 1000000;
y->tv_usec += 1000000 * nsec;
y->tv_sec -= nsec;
}
/* Compute the time remaining to wait.
tv_usec is certainly positive. */
result.tv_sec = x->tv_sec - y->tv_sec;
result.tv_usec = x->tv_usec - y->tv_usec;
return (double)result.tv_sec + (double)result.tv_usec/1e6;
}
pwordcount_t initCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,uint32_t seqQuorum,ecoseq_t *seq,int32_t *neededWords,uint32_t neededWordCount)
{ {
uint32_t i; uint32_t i;
uint32_t buffsize; uint32_t buffsize;
@ -65,7 +26,7 @@ pwordcount_t initCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circ
if (seq) if (seq)
{ {
table->words = ecoHashSequence(NULL,wordsize,circular,doublestrand,seq,&buffsize,neededWords,neededWordCount,seqQuorum); table->words = ecoHashSequence(NULL,wordsize,circular,doublestrand,seq,&buffsize);
table->size = ecoCompactHashSequence(table->words,buffsize); table->size = ecoCompactHashSequence(table->words,buffsize);
table->inseqcount=1; table->inseqcount=1;
@ -79,7 +40,7 @@ pwordcount_t initCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circ
return table; return table;
} }
void addSeqToWordCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,uint32_t exampleCount,uint32_t seqQuorum,ecoseq_t *seq,int32_t *neededWords,uint32_t neededWordCount) void addSeqToWordCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circular, uint32_t doublestrand,uint32_t exampleCount,uint32_t seqQuorum,ecoseq_t *seq)
{ {
uint32_t buffersize; uint32_t buffersize;
pword_t newtable; pword_t newtable;
@ -89,52 +50,33 @@ void addSeqToWordCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circ
buffersize = table->size + ecoWordCount(wordsize,circular,seq); buffersize = table->size + ecoWordCount(wordsize,circular,seq);
table->words = ECOREALLOC(table->words,buffersize*sizeof(word_t), table->words = ECOREALLOC(table->words,buffersize*sizeof(word_t),
"\n\nCannot allocate memory to extend word table" ); "Cannot allocate memory to extend word table");
/*
* newtable is a pointer on the memory planed to be used for the new sequence (ecoWordCount new hash codes max)
*/
newtable = table->words + table->size; newtable = table->words + table->size;
// DEBUG_LOG("Words = %x (%u) new = %x", table->words,table->size,newtable); // DEBUG_LOG("Words = %x (%u) new = %x", table->words,table->size,newtable);
(void)ecoHashSequence(newtable,wordsize,circular,doublestrand,seq,&newsize,neededWords,neededWordCount,seqQuorum); (void)ecoHashSequence(newtable,wordsize,circular,doublestrand,seq,&newsize);
// DEBUG_LOG("new seq wordCount : %d",newsize); // DEBUG_LOG("new seq wordCount : %d",newsize);
/*
* at this stage, new hash codes have been added in the table but the table is not sorted
*/
newsize = ecoCompactHashSequence(newtable,newsize); newsize = ecoCompactHashSequence(newtable,newsize);
/*
* new hash codes have now been sorted BUT the whole table is not.
* MULTIWORDS have been tagged (and compacted)
*/
// DEBUG_LOG("compacted wordCount : %d",newsize); // DEBUG_LOG("compacted wordCount : %d",newsize);
buffersize = table->size + newsize; buffersize = table->size + newsize;
/*
* buffersize is now set to the REAL size used by the table (but the memory chunck may be larger)
*/
// resize the count buffer // resize the count buffer
table->inseqcount++; table->inseqcount++;
//fprintf (stderr, "\nOldAddress: %x", table->strictcount);
table->strictcount = ECOREALLOC(table->strictcount,(buffersize+5000)*sizeof(uint32_t),
"Cannot allocate memory to extend example word count table");
//fprintf (stderr, " NewAddress: %x\n", table->strictcount);
table->strictcount = ECOREALLOC(table->strictcount,buffersize*sizeof(uint32_t),
"Cannot allocate memory to extend example word count table");
for (i=table->size; i < buffersize; i++) for (i=table->size; i < buffersize; i++)
table->strictcount[i]=1; table->strictcount[i]=1;
/*
* new words in the table are set to a count of ONE
*/
// Now we have to merge in situ the two tables // Now we have to merge in situ the two tables
@ -146,64 +88,23 @@ void addSeqToWordCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circ
pwordcount_t lookforStrictPrimer(pecodnadb_t database, uint32_t seqdbsize, pwordcount_t lookforStrictPrimer(pecodnadb_t database, uint32_t seqdbsize,
uint32_t exampleCount,poptions_t options) uint32_t exampleCount,poptions_t options)
{ {
struct rusage start; uint32_t i;
struct rusage usage;
double seconde;
char *logfilename;
FILE *logfile;
uint32_t i, j;
bool_t first=TRUE;
pwordcount_t strictprimers=NULL; pwordcount_t strictprimers=NULL;
uint64_t totallength=0;
uint32_t sequenceQuorum = (uint32_t)floor((float)exampleCount * options->strict_quorum); uint32_t sequenceQuorum = (uint32_t)floor((float)exampleCount * options->strict_quorum);
int32_t *neededWords;
uint32_t neededWordCount;
fprintf(stderr,"Filtering... ");
if (options->filtering)
neededWords = filteringSeq(database,seqdbsize,exampleCount,options,&neededWordCount,(int32_t)sequenceQuorum);
else
{
neededWordCount=0;
neededWords=NULL;
}
if (options->statistics)
{
asprintf(&logfilename,"ecoprimer_%d.log",getpid());
logfile = fopen(logfilename,"w");
fprintf(logfile,"# seq\tlength\tsize\ttime\tspeed\n");
fclose(logfile);
}
fprintf(stderr," Primers should be at least present in %d/%d example sequences\n",sequenceQuorum,exampleCount); fprintf(stderr," Primers should be at least present in %d/%d example sequences\n",sequenceQuorum,exampleCount);
strictprimers = initCountTable(NULL,options->primer_length, strictprimers = initCountTable(NULL,options->primer_length,
options->circular, options->circular,
options->doublestrand, options->doublestrand,
0, NULL);
NULL,NULL,0);
getrusage(RUSAGE_SELF,&start);
for (i=0;i<seqdbsize;i++) for (i=0;i<seqdbsize;i++)
{ {
if (database[i]->isexample && database[i]->SQ_length > options->primer_length) if (database[i]->isexample)
{ {
if (strictprimers->size)
if (first)
{
strictprimers = initCountTable(strictprimers,options->primer_length,
options->circular,
options->doublestrand,
sequenceQuorum,
database[i],neededWords,neededWordCount);
first=FALSE;
}
else
{ {
uint32_t s; uint32_t s;
s = strictprimers->size; s = strictprimers->size;
@ -213,29 +114,19 @@ pwordcount_t lookforStrictPrimer(pecodnadb_t database, uint32_t seqdbsize,
options->doublestrand, options->doublestrand,
exampleCount, exampleCount,
sequenceQuorum, sequenceQuorum,
database[i],neededWords,neededWordCount); database[i]);
};
totallength+=database[i]->SQ_length;
getrusage(RUSAGE_SELF,&usage);
if (options->statistics)
{
asprintf(&logfilename,"ecoprimer_%d.log",getpid());
logfile = fopen(logfilename,"a");
seconde = timeval_subtract(&(usage.ru_utime),&(start.ru_utime)) +
timeval_subtract(&(usage.ru_stime),&(start.ru_stime));
fprintf(logfile,"%d\t%llu\t%lu\t%8.3f\t%8.3e\n",i,
(long long unsigned)totallength,
strictprimers->size*(sizeof(int64_t)+sizeof(int32_t)),
seconde,seconde/(double)totallength);
fclose(logfile);
} }
else
strictprimers = initCountTable(strictprimers,options->primer_length,
options->circular,
options->doublestrand,
database[i]);
} }
else else
strictprimers->outseqcount++; strictprimers->outseqcount++;
fprintf(stderr," Indexed sequences %5d/%5d : considered words %-10llu \r", fprintf(stderr," Indexed sequences %5d/%5d : considered words %-10d \r",(int32_t)i+1,(int32_t)seqdbsize,strictprimers->size);
(int32_t)i+1,(int32_t)seqdbsize,
(long long unsigned)strictprimers->size);
// DEBUG_LOG("First word : %s ==> %d",ecoUnhashWord(strictprimers->words[0],18),strictprimers->incount[0]) // DEBUG_LOG("First word : %s ==> %d",ecoUnhashWord(strictprimers->words[0],18),strictprimers->incount[0])
// DEBUG_LOG("Second word : %s ==> %d",ecoUnhashWord(strictprimers->words[1],18),strictprimers->incount[1]) // DEBUG_LOG("Second word : %s ==> %d",ecoUnhashWord(strictprimers->words[1],18),strictprimers->incount[1])
@ -248,39 +139,6 @@ pwordcount_t lookforStrictPrimer(pecodnadb_t database, uint32_t seqdbsize,
sizeof(word_t)*strictprimers->size, sizeof(word_t)*strictprimers->size,
"Cannot reallocate strict primer table"); "Cannot reallocate strict primer table");
if (neededWords)
ECOFREE(neededWords,"Clean needed word table");
//TR: Somehow for some primers strictcount value is extremely large hence invalid
//we need to remove these primers from the list
j = strictprimers->size+1;
for (i=0; i<strictprimers->size; i++)
{
if (strictprimers->strictcount[i] > seqdbsize)
{
if (j == (strictprimers->size+1))
j = i;
}
if (j < i && strictprimers->strictcount[i] <= seqdbsize)
{
strictprimers->words[j] = strictprimers->words[i];
strictprimers->strictcount[j] = strictprimers->strictcount[i];
j++;
}
}
if (j < strictprimers->size)
{
strictprimers->size = j;
strictprimers->strictcount = ECOREALLOC(strictprimers->strictcount,
sizeof(uint32_t)*strictprimers->size,
"Cannot reallocate strict primer count table");
strictprimers->words = ECOREALLOC(strictprimers->words,
sizeof(word_t)*strictprimers->size,
"Cannot reallocate strict primer table");
}
return strictprimers; return strictprimers;
} }

View File

@ -6,46 +6,10 @@
*/ */
#include <search.h> #include <search.h>
//void tdestroy (void *root, void (*free_node)(void *nodep));
#include "ecoprimer.h" #include "ecoprimer.h"
static int cmptaxon(const void *t1, const void* t2); static int cmptaxon(const void *t1, const void* t2);
void **tree_root = NULL;
int delete_passes = 0;
void delete_twalkaction (const void *node, VISIT order, int level)
{
switch (order)
{
case preorder:
delete_passes++;
break;
case postorder:
delete_passes++;
break;
case endorder:
delete_passes++;
break;
case leaf:
if (tree_root)
tdelete (node, tree_root,cmptaxon);
delete_passes++;
break;
}
}
void free_tree_nodes (void *tree)
{
while (1)
{
delete_passes = 0;
twalk (tree, delete_twalkaction);
if (delete_passes <= 1) break;
}
}
static int cmptaxon(const void *t1, const void* t2) static int cmptaxon(const void *t1, const void* t2)
{ {
const size_t taxid1=(size_t)t1; const size_t taxid1=(size_t)t1;
@ -71,12 +35,7 @@ int32_t counttaxon(int32_t taxid)
if (taxid==-1) if (taxid==-1)
{ {
if (taxontree) if (taxontree)
{
tree_root = (void **)&taxontree;
//free_tree_nodes (taxontree);
ECOFREE(taxontree,"Free taxon tree"); ECOFREE(taxontree,"Free taxon tree");
tree_root = NULL;
}
taxontree=NULL; taxontree=NULL;
taxoncount=0; taxoncount=0;
return 0; return 0;
@ -88,6 +47,7 @@ int32_t counttaxon(int32_t taxid)
tsearch((void*)((size_t)taxid),&taxontree,cmptaxon); tsearch((void*)((size_t)taxid),&taxontree,cmptaxon);
taxoncount++; taxoncount++;
} }
return taxoncount; return taxoncount;
} }
@ -100,12 +60,11 @@ int32_t getrankdbstats(pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *tax
ecotx_t *tmptaxon; ecotx_t *tmptaxon;
counttaxon(-1); counttaxon(-1);
options->intaxa = 0;
for (i=0;i<seqdbsize;i++) for (i=0;i<seqdbsize;i++)
{ {
taxon = &(taxonomy->taxons->taxon[seqdb[i]->taxid]); taxon = &(taxonomy->taxons->taxon[seqdb[i]->taxid]);
seqdb[i]->isexample=isExampleTaxon(taxonomy,seqdb[i]->taxid,options); seqdb[i]->isexample=isGoodTaxon(taxonomy,seqdb[i]->taxid,options);
tmptaxon = eco_findtaxonatrank(taxon, tmptaxon = eco_findtaxonatrank(taxon,
options->taxonrankidx); options->taxonrankidx);
@ -126,7 +85,6 @@ int32_t getrankdbstats(pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *tax
} }
counttaxon(-1); counttaxon(-1);
options->outtaxa = 0;
for (i=0;i<seqdbsize;i++) for (i=0;i<seqdbsize;i++)
{ {
@ -138,44 +96,33 @@ int32_t getrankdbstats(pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *tax
} }
float taxonomycoverage(ppair_t pair, poptions_t options, pecodnadb_t seqdb,uint32_t seqdbsize) float taxonomycoverage(ppair_t pair, poptions_t options)
{ {
int32_t seqcount; int32_t seqcount;
int32_t i; int32_t i;
int32_t incount=0; int32_t incount=0;
int32_t outcount=0; int32_t outcount=0;
uint32_t j;
memset (pair->coveredSeqs, 0, seqdbsize*sizeof (int));
seqcount=pair->pcr.ampcount; seqcount=pair->pcr.ampcount;
counttaxon(-1); counttaxon(-1);
for (i=0; i < seqcount; i++) for (i=0; i < seqcount; i++)
if (pair->pcr.amplifias[i].sequence->isexample if (pair->pcr.amplifias[i].sequence->isexample)
&& pair->pcr.amplifias[i].sequence->ranktaxonid > 0 )
{
incount = counttaxon(pair->pcr.amplifias[i].sequence->ranktaxonid); incount = counttaxon(pair->pcr.amplifias[i].sequence->ranktaxonid);
for (j=0; j<seqdbsize; j++)
if (pair->pcr.amplifias[i].sequence == seqdb[j])
{pair->coveredSeqs[j] = 1; break;}
}
counttaxon(-1); counttaxon(-1);
for (i=0; i < seqcount; i++) for (i=0; i < seqcount; i++)
if (!pair->pcr.amplifias[i].sequence->isexample if (!pair->pcr.amplifias[i].sequence->isexample)
&& pair->pcr.amplifias[i].sequence->ranktaxonid)
outcount = counttaxon(pair->pcr.amplifias[i].sequence->ranktaxonid); outcount = counttaxon(pair->pcr.amplifias[i].sequence->ranktaxonid);
pair->intaxa=incount; pair->intaxa=incount;
pair->outtaxa=outcount; pair->outtaxa=outcount;
pair->bc=(float)incount/options->intaxa; return (float)incount/options->intaxa;
return pair->bc;
} }
/*
static int cmpamp(const void *ampf1, const void* ampf2) static int cmpamp(const void *ampf1, const void* ampf2)
{ {
int i; int i;
@ -194,14 +141,12 @@ static int cmpamp(const void *ampf1, const void* ampf2)
{ {
incr = -1; incr = -1;
j = pampf1->length - 1; j = pampf1->length - 1;
if (pampf2->strand) if (pampf2->strand)
{ {
pampf1 = (pamptotaxon_t) ampf2; pampf1 = (pamptotaxon_t) ampf2;
pampf2 = (pamptotaxon_t) ampf1; pampf2 = (pamptotaxon_t) ampf1;
chd = 1; chd = 1;
} }
//j = pampf2->length - 1; should have been here and pampf2 instead of pampf1?
} }
len = (pampf1->length <= pampf2->length)? pampf1->length: pampf2->length; len = (pampf1->length <= pampf2->length)? pampf1->length: pampf2->length;
@ -221,81 +166,20 @@ static int cmpamp(const void *ampf1, const void* ampf2)
if (pampf1->length > pampf2->length) return chd ? -1: 1; if (pampf1->length > pampf2->length) return chd ? -1: 1;
if (pampf2->length > pampf1->length) return chd ? 1: -1; if (pampf2->length > pampf1->length) return chd ? 1: -1;
return 0;
}*/
static int cmpamp(const void *ampf1, const void* ampf2)
{
int i;
char cd1;
char cd2;
int len = 0;
char *ch1;
char *ch2;
int incr1;
int incr2;
pamptotaxon_t pampf1 = (pamptotaxon_t) ampf1;
pamptotaxon_t pampf2 = (pamptotaxon_t) ampf2;
ch1 = pampf1->amplifia;
ch2 = pampf2->amplifia;
incr1 = 1;
incr2 = 1;
if (!pampf1->strand)
incr1 = -1;
if (!pampf2->strand)
incr2 = -1;
len = (pampf1->length <= pampf2->length)? pampf1->length: pampf2->length;
for (i = 0; i < len; i++)
{
cd1 = *ch1;
if (incr1 == -1)
cd1 = ecoComplementChar(*ch1);
cd2 = *ch2;
if (incr2 == -1)
cd2 = ecoComplementChar(*ch2);
if (cd1 < cd2) return -1;
if (cd2 < cd1) return 1;
ch1 += incr1;
ch2 += incr2;
}
if (pampf1->length > pampf2->length) return 1;
if (pampf2->length > pampf1->length) return -1;
return 0; return 0;
} }
void twalkaction (const void *node, VISIT order, int level) void twalkaction (const void *node, VISIT order, int level)
{ {
int32_t *taxid = (int32_t*)node; const size_t taxid=(size_t)node;
//const size_t taxid=(size_t)node; counttaxon(taxid);
//printf ("\t%d:%p, ", *taxid, node);
counttaxon(*taxid);
} }
int32_t gtxid; void taxonomyspecificity (ppair_t pair)
void twalkaction2 (const void *node, VISIT order, int level)
{ {
int32_t *pt = (int32_t *) node; uint32_t i;
gtxid = *pt;
}
void taxonomyspecificity (ppair_t pair, pecodnadb_t seqdb,uint32_t seqdbsize)
{
uint32_t i, j;
uint32_t ampfindex = 0; uint32_t ampfindex = 0;
int32_t taxid; int32_t taxid;
uint32_t wellidentifiedcount;
void *ampftree = NULL; void *ampftree = NULL;
pamptotaxon_t pcurrentampf; pamptotaxon_t pcurrentampf;
pamptotaxon_t *ptmp; pamptotaxon_t *ptmp;
@ -306,73 +190,35 @@ void taxonomyspecificity (ppair_t pair, pecodnadb_t seqdb,uint32_t seqdbsize)
{ {
/*populate taxon ids tree against each unique amplifia /*populate taxon ids tree against each unique amplifia
i.e set of taxon ids for each amplifia*/ i.e set of taxon ids for each amplifia*/
if (pair->pcr.amplifias[i].sequence->isexample) ampfwithtaxtree[ampfindex].amplifia = pair->pcr.amplifias[i].amplifia;
ampfwithtaxtree[ampfindex].strand = pair->pcr.amplifias[i].strand;
ampfwithtaxtree[ampfindex].length = pair->pcr.amplifias[i].length;
pcurrentampf = &ampfwithtaxtree[ampfindex];
taxid = pair->pcr.amplifias[i].sequence->ranktaxonid;
ptmp = tfind((const void*)pcurrentampf, &ampftree, cmpamp);
if (ptmp == NULL)
{ {
ampfwithtaxtree[ampfindex].amplifia = pair->pcr.amplifias[i].amplifia;
ampfwithtaxtree[ampfindex].strand = pair->pcr.amplifias[i].strand;
ampfwithtaxtree[ampfindex].length = pair->pcr.amplifias[i].length;
pcurrentampf = &ampfwithtaxtree[ampfindex]; pcurrentampf = &ampfwithtaxtree[ampfindex];
taxid = pair->pcr.amplifias[i].sequence->ranktaxonid; tsearch((void*)pcurrentampf,&ampftree,cmpamp);
ptmp = tfind((const void*)pcurrentampf, &ampftree, cmpamp); ampfindex++;
if (ptmp == NULL) }
{ else
pcurrentampf = &ampfwithtaxtree[ampfindex]; pcurrentampf = *ptmp;
tsearch((void*)pcurrentampf,&ampftree,cmpamp);
ampfindex++;
}
else
pcurrentampf = *ptmp;
if (tfind((void*)((size_t)taxid), &(pcurrentampf->taxontree), cmptaxon) == NULL) if (tfind((void*)((size_t)taxid), &(pcurrentampf->taxontree), cmptaxon) == NULL)
{ {
pcurrentampf->taxoncount++; pcurrentampf->taxoncount++;
tsearch((void*)((size_t)taxid),&(pcurrentampf->taxontree),cmptaxon); tsearch((void*)((size_t)taxid),&(pcurrentampf->taxontree),cmptaxon);
}
} }
} }
memset (pair->wellIdentifiedSeqs, 0, seqdbsize*sizeof (int)); counttaxon(-1);
//counttaxon(-1);
for (i = 0; i < ampfindex; i++) for (i = 0; i < ampfindex; i++)
{ {
if (ampfwithtaxtree[i].taxoncount > 1) if (ampfwithtaxtree[i].taxoncount > 1)
{ twalk(ampfwithtaxtree[i].taxontree, twalkaction);
//printf ("\nampfwithtaxtree[i].taxoncount: %d\n", ampfwithtaxtree[i].taxoncount);
//twalk(ampfwithtaxtree[i].taxontree, twalkaction);
}
//TR 5/9/10 - added code for well identified seqs
else if(ampfwithtaxtree[i].taxoncount == 1) /*well identified*/
{
gtxid = -1;
twalk(ampfwithtaxtree[i].taxontree, twalkaction2);
if (gtxid != -1)
{
for (j = 0; j < seqdbsize; j++)
if (seqdb[j]->ranktaxonid == gtxid
&& seqdb[j]->isexample
&&(pair->p1->directCount[j] > 0
|| pair->p1->reverseCount[j] > 0)
&& (pair->p2->directCount[j] > 0
|| pair->p2->reverseCount[j] > 0))
{
pair->wellIdentifiedSeqs[j] = 1;
}
}
}
} }
//printf ("\n");
counttaxon(-1);
wellidentifiedcount = 0;
for (j = 0; j < seqdbsize; j++)
if (pair->wellIdentifiedSeqs[j] == 1)
counttaxon(seqdb[j]->ranktaxonid);
wellidentifiedcount = counttaxon(-2);
//pair->notwellidentifiedtaxa = counttaxon(-2);
pair->notwellidentifiedtaxa = (pair->intaxa-wellidentifiedcount); //counttaxon(-2);
//pair->bs = ((float)pair->intaxa - (float)pair->notwellidentifiedtaxa) / pair->intaxa;
pair->bs = ((float)wellidentifiedcount) / (float)pair->intaxa;
pair->notwellidentifiedtaxa = counttaxon(-2);
ECOFREE (ampfwithtaxtree, "Free amplifia table"); ECOFREE (ampfwithtaxtree, "Free amplifia table");
} }

View File

@ -1,23 +0,0 @@
SOURCES = nnparams.c \
thermostats.c
SRCS=$(SOURCES)
OBJECTS= $(patsubst %.c,%.o,$(SOURCES))
LIBFILE= libthermo.a
RANLIB= ranlib
include ../global.mk
all: $(LIBFILE)
clean:
rm -rf $(OBJECTS) $(LIBFILE)
$(LIBFILE): $(OBJECTS)
ar -cr $@ $?
$(RANLIB) $@

View File

@ -1,600 +0,0 @@
/*
* nnparams.cpp
* PHunterLib
*
* Nearest Neighbor Model / Parameters
*
* Created by Tiayyba Riaz on 7/2/09.
*
*/
#include <memory.h>
#include <math.h>
#include <stdio.h>
#include <string.h>
#include"nnparams.h"
double forbidden_entropy;
double nparam_GetInitialEntropy(PNNParams nparm)
{
return -5.9f+nparm->rlogc;
}
//Retrieve Enthalpy for given NN-Pair from parameter table
double nparam_GetEnthalpy(PNNParams nparm, char x0, char x1, char y0, char y1)
{
return ndH(x0,x1,y0,y1); //xx, yx are already numbers
}
//Retrieve Entropy for given NN-Pair from parameter table
double nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1)
{
//xx and yx are already numbers
char nx0=x0;//nparam_convertNum(x0);
char nx1=x1;//nparam_convertNum(x1);
char ny0=y0;//nparam_convertNum(y0);
char ny1=y1;//nparam_convertNum(y1);
double answer = ndS(nx0,nx1,ny0,ny1);
/*Salt correction Santalucia*/
if (nparm->saltMethod == SALT_METHOD_SANTALUCIA) {
if(nx0!=5 && 1<= nx1 && nx1<=4) {
answer += 0.5*nparm->kfac;
}
if(ny1!=5 && 1<= ny0 && ny0<=4) {
answer += 0.5*nparm->kfac;
}
}
/*Salt correction Owczarzy*/
if (nparm->saltMethod == SALT_METHOD_OWCZARZY) {
double logk = log(nparm->kplus);
answer += ndH(nx0,nx1,ny0,ny1)*((4.29 * nparm->gcContent-3.95)*0.00001*logk+ 0.0000094*logk*logk);
}
return answer;
}
/* PURPOSE: Return melting temperature TM for given entropy and enthalpy
* Assuming a one-state transition and using the formula
* TM = dH / (dS + R ln(Ct/4))
* entropy = dS + R ln Ct/4 (must already be included!)
* enthaklpy = dH
* where
* dH = enthalpy
* dS = entropy
* R = Boltzmann factor
* Ct = Strand Concentration
*
* PARAMETERS:
* entrypy and enthalpy
*
* RETURN VALUE:
* temperature
*/
double nparam_CalcTM(double entropy,double enthalpy)
{
double tm = 0; // absolute zero - return if model fails!
if (enthalpy>=forbidden_enthalpy) //||(entropy==-cfact))
return 0;
if (entropy<0) // avoid division by zero and model errors!
{
tm = enthalpy/entropy;// - kfac; //LKFEB
if (tm<0)
return 0;
}
return tm;
}
void nparam_InitParams(PNNParams nparm, double c1, double c2, double kp, int sm)
{
nparm->Ct1 = c1;
nparm->Ct2 = c2;
nparm->kplus = kp;
int maxCT = 1;
if(nparm->Ct2 > nparm->Ct1)
{
maxCT = 2;
}
double ctFactor;
if(nparm->Ct1 == nparm->Ct2)
{
ctFactor = nparm->Ct1/2;
}
else if (maxCT == 1)
{
ctFactor = nparm->Ct1-nparm->Ct2/2;
}
else
{
ctFactor = nparm->Ct2-nparm->Ct1/2;
}
nparm->rlogc = R * log(ctFactor);
forbidden_entropy = nparm->rlogc;
nparm->kfac = 0.368 * log (nparm->kplus);
nparm->saltMethod = sm;
int x,y,a,b; // variables used as counters...
// Set all parameters to zero!
memset(nparm->dH,0,sizeof(nparm->dH));
memset(nparm->dS,0,sizeof(nparm->dS));
// Set all X-/Y-, -X/Y- and X-/-Y so, that TM will be VERY small!
for (x=1;x<=4;x++)
{
for (y=1;y<=4;y++)
{
ndH(0,x,y,0)=forbidden_enthalpy;
ndS(0,x,y,0)=forbidden_entropy;
ndH(x,0,0,y)=forbidden_enthalpy;
ndS(x,0,0,y)=forbidden_entropy;
ndH(x,0,y,0)=forbidden_enthalpy;
ndS(x,0,y,0)=forbidden_entropy;
// forbid X-/Y$ and X$/Y- etc., i.e. terminal must not be paired with gap!
ndH(x,5,y,0)=forbidden_enthalpy;
ndS(x,5,y,0)=forbidden_entropy;
ndH(x,0,y,5)=forbidden_enthalpy;
ndS(x,0,y,5)=forbidden_entropy;
ndH(5,x,0,y)=forbidden_enthalpy;
ndS(5,x,0,y)=forbidden_entropy;
ndH(0,x,5,y)=forbidden_enthalpy;
ndS(0,x,5,y)=forbidden_entropy;
// forbid X$/-Y etc.
ndH(x,5,0,y)=forbidden_enthalpy;
ndS(x,5,0,y)=forbidden_entropy;
ndH(x,0,5,y)=forbidden_enthalpy;
ndS(x,0,5,y)=forbidden_entropy;
ndH(5,x,y,0)=forbidden_enthalpy;
ndS(5,x,y,0)=forbidden_entropy;
ndH(0,x,y,5)=forbidden_enthalpy;
ndS(0,x,y,5)=forbidden_entropy;
}
// also, forbid x-/-- and --/x-, i.e. no two inner gaps paired
ndH(x,0,0,0)=forbidden_enthalpy;
ndS(x,0,0,0)=forbidden_entropy;
ndH(0,0,x,0)=forbidden_enthalpy;
ndS(0,0,x,0)=forbidden_entropy;
// x-/-$
ndH(x,0,0,5)=forbidden_enthalpy;
ndS(x,0,0,5)=forbidden_entropy;
ndH(5,0,0,x)=forbidden_enthalpy;
ndS(5,0,0,x)=forbidden_entropy;
ndH(0,5,x,0)=forbidden_enthalpy;
ndS(x,0,0,5)=forbidden_entropy;
ndH(0,x,5,0)=forbidden_enthalpy;
ndS(0,x,5,0)=forbidden_entropy;
}
// forbid --/--
ndH(0,0,0,0)=forbidden_enthalpy;
ndS(0,0,0,0)=forbidden_entropy;
ndH(5,0,0,0)=forbidden_enthalpy;
ndS(5,0,0,0)=forbidden_entropy;
ndH(0,0,5,0)=forbidden_enthalpy;
ndS(0,0,5,0)=forbidden_entropy;
ndH(0,5,5,0)=forbidden_enthalpy;
ndS(0,5,5,0)=forbidden_entropy;
// Interior loops (double Mismatches)
#define iloop_entropy -0.97f
#define iloop_enthalpy 0.0f
for (x=1; x<=4; x++)
for (y=1; y<=4; y++)
for (a=1; a<=4; a++)
for (b=1; b<=4; b++)
// AT and CG pair, and as A=1, C=2, G=3, T=4 this means
// we have Watson-Crick pairs if (x+a==5) and (y+b)==5.
if (!((x+a==5)||(y+b==5)))
{
// No watson-crick-pair, i.e. double mismatch!
// set enthalpy/entropy to loop expansion!
ndH(x,y,a,b) = iloop_enthalpy;
ndS(x,y,a,b) = iloop_entropy;
}
// xy/-- and --/xy (Bulge Loops of size > 1)
#define bloop_entropy -1.3f
#define bloop_enthalpy 0.0f
for (x=1; x<=4; x++)
for (y=1; y<=4; y++)
{
ndH(x,y,0,0) = bloop_enthalpy;
ndS(x,y,0,0) = bloop_entropy;
ndH(0,0,x,y) = bloop_enthalpy;
ndS(0,0,x,y) = bloop_entropy;
}
// x-/ya abd xa/y- as well as -x/ay and ax/-y
// bulge opening and closing parameters with
// adjacent matches / mismatches
// obulge_mism and cbulge_mism chosen so high to avoid
// AAAAAAAAA
// T--G----T
// being better than
// AAAAAAAAA
// TG------T
#define obulge_match_H (-2.66f * 1000)
#define obulge_match_S -14.22f
#define cbulge_match_H (-2.66f * 1000)
#define cbulge_match_S -14.22f
#define obulge_mism_H (0.0f * 1000)
#define obulge_mism_S -6.45f
#define cbulge_mism_H 0.0f
#define cbulge_mism_S -6.45f
for (x=1; x<=4; x++)
for (y=1; y<=4; y++)
for (a=1; a<=4; a++)
{
if (x+y==5) // other base pair matches!
{
ndH(x,0,y,a)=obulge_match_H; // bulge opening
ndS(x,0,y,a)=obulge_match_S;
ndH(x,a,y,0)=obulge_match_H;
ndS(x,a,y,0)=obulge_match_S;
ndH(0,x,a,y)=cbulge_match_H; // bulge closing
ndS(0,x,a,y)=cbulge_match_S;
ndH(a,x,0,y)=cbulge_match_H;
ndS(a,x,0,y)=cbulge_match_S;
}
else
{ // mismatch in other base pair!
ndH(x,0,y,a)=obulge_mism_H; // bulge opening
ndS(x,0,y,a)=obulge_mism_S;
ndH(x,a,y,0)=obulge_mism_H;
ndS(x,a,y,0)=obulge_mism_S;
ndH(0,x,a,y)=cbulge_mism_H; // bulge closing
ndS(0,x,a,y)=cbulge_mism_S;
ndH(a,x,0,y)=cbulge_mism_H;
ndS(a,x,0,y)=cbulge_mism_S;
}
}
// Watson-Crick pairs (note that only ten are unique, as obviously
// 5'-AG-3'/3'-TC-5' = 5'-CT-3'/3'-GA-5' etc.
ndH(1,1,4,4)=-7.6f*1000; ndS(1,1,4,4)=-21.3f; // AA/TT 04
ndH(1,2,4,3)=-8.4f*1000; ndS(1,2,4,3)=-22.4f; // AC/TG adapted GT/CA
ndH(1,3,4,2)=-7.8f*1000; ndS(1,3,4,2)=-21.0f; // AG/TC adapted CT/GA
ndH(1,4,4,1)=-7.2f*1000; ndS(1,4,4,1)=-20.4f; // AT/TA 04
ndH(2,1,3,4)=-8.5f*1000; ndS(2,1,3,4)=-22.7f; // CA/GT 04
ndH(2,2,3,3)=-8.0f*1000; ndS(2,2,3,3)=-19.9f; // CC/GG adapted GG/CC
ndH(2,3,3,2)=-10.6f*1000; ndS(2,3,3,2)=-27.2f; // CG/GC 04
ndH(2,4,3,1)=-7.8f*1000; ndS(2,4,3,1)=-21.0f; // CT/GA 04
ndH(3,1,2,4)=-8.2f*1000; ndS(3,1,2,4)=-22.2f; // GA/CT 04
ndH(3,2,2,3)=-9.8f*1000; ndS(3,2,2,3)=-24.4f; // GC/CG 04
ndH(3,3,2,2)=-8.0f*1000; ndS(3,3,2,2)=-19.9f; // GG/CC 04
ndH(3,4,2,1)=-8.4f*1000; ndS(3,4,2,1)=-22.4f; // GT/CA 04
ndH(4,1,1,4)=-7.2f*1000; ndS(4,1,1,4)=-21.3f; // TA/AT 04
ndH(4,2,1,3)=-8.2f*1000; ndS(4,2,1,3)=-22.2f; // TC/AG adapted GA/CT
ndH(4,3,1,2)=-8.5f*1000; ndS(4,3,1,2)=-22.7f; // TG/AC adapted CA/GT
ndH(4,4,1,1)=-7.6f*1000; ndS(4,4,1,1)=-21.3f; // TT/AA adapted AA/TT
// A-C Mismatches (Values for pH 7.0)
ndH(1,1,2,4)=7.6f*1000; ndS(1,1,2,4)=20.2f; // AA/CT
ndH(1,1,4,2)=2.3f*1000; ndS(1,1,4,2)=4.6f; // AA/TC
ndH(1,2,2,3)=-0.7f*1000; ndS(1,2,2,3)=-3.8f; // AC/CG
ndH(1,2,4,1)=5.3f*1000; ndS(1,2,4,1)=14.6f; // AC/TA
ndH(1,3,2,2)=0.6f*1000; ndS(1,3,2,2)=-0.6f; // AG/CC
ndH(1,4,2,1)=5.3f*1000; ndS(1,4,2,1)=14.6f; // AT/CA
ndH(2,1,1,4)=3.4f*1000; ndS(2,1,1,4)=8.0f; // CA/AT
ndH(2,1,3,2)=1.9f*1000; ndS(2,1,3,2)=3.7f; // CA/GC
ndH(2,2,1,3)=5.2f*1000; ndS(2,2,1,3)=14.2f; // CC/AG
ndH(2,2,3,1)=0.6f*1000; ndS(2,2,3,1)=-0.6f; // CC/GA
ndH(2,3,1,2)=1.9f*1000; ndS(2,3,1,2)=3.7f; // CG/AC
ndH(2,4,1,1)=2.3f*1000; ndS(2,4,1,1)=4.6f; // CT/AA
ndH(3,1,2,2)=5.2f*1000; ndS(3,1,2,2)=14.2f; // GA/CC
ndH(3,2,2,1)=-0.7f*1000; ndS(3,2,2,1)=-3.8f; // GC/CA
ndH(4,1,1,2)=3.4f*1000; ndS(4,1,1,2)=8.0f; // TA/AC
ndH(4,2,1,1)=7.6f*1000; ndS(4,2,1,1)=20.2f; // TC/AA
// C-T Mismatches
ndH(1,2,4,4)=0.7f*1000; ndS(1,2,4,4)=0.2f; // AC/TT
ndH(1,4,4,2)=-1.2f*1000; ndS(1,4,4,2)=-6.2f; // AT/TC
ndH(2,1,4,4)=1.0f*1000; ndS(2,1,4,4)=0.7f; // CA/TT
ndH(2,2,3,4)=-0.8f*1000; ndS(2,2,3,4)=-4.5f; // CC/GT
ndH(2,2,4,3)=5.2f*1000; ndS(2,2,4,3)=13.5f; // CC/TG
ndH(2,3,4,2)=-1.5f*1000; ndS(2,3,4,2)=-6.1f; // CG/TC
ndH(2,4,3,2)=-1.5f*1000; ndS(2,4,3,2)=-6.1f; // CT/GC
ndH(2,4,4,1)=-1.2f*1000; ndS(2,4,4,1)=-6.2f; // CT/TA
ndH(3,2,2,4)=2.3f*1000; ndS(3,2,2,4)=5.4f; // GC/CT
ndH(3,4,2,2)=5.2f*1000; ndS(3,4,2,2)=13.5f; // GT/CC
ndH(4,1,2,4)=1.2f*1000; ndS(4,1,2,4)=0.7f; // TA/CT
ndH(4,2,2,3)=2.3f*1000; ndS(4,2,2,3)=5.4f; // TC/CG
ndH(4,2,1,4)=1.2f*1000; ndS(4,2,1,4)=0.7f; // TC/AT
ndH(4,3,2,2)=-0.8f*1000; ndS(4,3,2,2)=-4.5f; // TG/CC
ndH(4,4,2,1)=0.7f*1000; ndS(4,4,2,1)=0.2f; // TT/CA
ndH(4,4,1,2)=1.0f*1000; ndS(4,4,1,2)=0.7f; // TT/AC
// G-A Mismatches
ndH(1,1,3,4)=3.0f*1000; ndS(1,1,3,4)=7.4f; // AA/GT
ndH(1,1,4,3)=-0.6f*1000; ndS(1,1,4,3)=-2.3f; // AA/TG
ndH(1,2,3,3)=0.5f*1000; ndS(1,2,3,3)=3.2f; // AC/GG
ndH(1,3,3,2)=-4.0f*1000; ndS(1,3,3,2)=-13.2f; // AG/GC
ndH(1,3,4,1)=-0.7f*1000; ndS(1,3,4,1)=-2.3f; // AG/TA
ndH(1,4,3,1)=-0.7f*1000; ndS(1,4,3,1)=-2.3f; // AT/GA
ndH(2,1,3,3)=-0.7f*1000; ndS(2,1,3,3)=-2.3f; // CA/GG
ndH(2,3,3,1)=-4.0f*1000; ndS(2,3,3,1)=-13.2f; // CG/GA
ndH(3,1,1,4)=0.7f*1000; ndS(3,1,1,4)=0.7f; // GA/AT
ndH(3,1,2,3)=-0.6f*1000; ndS(3,1,2,3)=-1.0f; // GA/CG
ndH(3,2,1,3)=-0.6f*1000; ndS(3,2,1,3)=-1.0f; // GC/AG
ndH(3,3,1,2)=-0.7f*1000; ndS(3,3,1,2)=-2.3f; // GG/AC
ndH(3,3,2,1)=0.5f*1000; ndS(3,3,2,1)=3.2f; // GG/CA
ndH(3,4,1,1)=-0.6f*1000; ndS(3,4,1,1)=-2.3f; // GT/AA
ndH(4,1,1,3)=0.7f*1000; ndS(4,1,1,3)=0.7f; // TA/AG
ndH(4,3,1,1)=3.0f*1000; ndS(4,3,1,1)=7.4f; // TG/AA
// G-T Mismatches
ndH(1,3,4,4)=1.0f*1000; ndS(1,3,4,4)=0.9f; // AG/TT
ndH(1,4,4,3)=-2.5f*1000; ndS(1,4,4,3)=-8.3f; // AT/TG
ndH(2,3,3,4)=-4.1f*1000; ndS(2,3,3,4)=-11.7f; // CG/GT
ndH(2,4,3,3)=-2.8f*1000; ndS(2,4,3,3)=-8.0f; // CT/GG
ndH(3,1,4,4)=-1.3f*1000; ndS(3,1,4,4)=-5.3f; // GA/TT
ndH(3,2,4,3)=-4.4f*1000; ndS(3,2,4,3)=-12.3f; // GC/TG
ndH(3,3,2,4)=3.3f*1000; ndS(3,3,2,4)=10.4f; // GG/CT
ndH(3,3,4,2)=-2.8f*1000; ndS(3,3,4,2)=-8.0f; // GG/TC
// ndH(3,3,4,4)=5.8f*1000; ndS(3,3,4,4)=16.3f; // GG/TT
ndH(3,4,2,3)=-4.4f*1000; ndS(3,4,2,3)=-12.3f; // GT/CG
ndH(3,4,4,1)=-2.5f*1000; ndS(3,4,4,1)=-8.3f; // GT/TA
// ndH(3,4,4,3)=4.1f*1000; ndS(3,4,4,3)=9.5f; // GT/TG
ndH(4,1,3,4)=-0.1f*1000; ndS(4,1,3,4)=-1.7f; // TA/GT
ndH(4,2,3,3)=3.3f*1000; ndS(4,2,3,3)=10.4f; // TC/GG
ndH(4,3,1,4)=-0.1f*1000; ndS(4,3,1,4)=-1.7f; // TG/AT
ndH(4,3,3,2)=-4.1f*1000; ndS(4,3,3,2)=-11.7f; // TG/GC
// ndH(4,3,3,4)=-1.4f*1000; ndS(4,3,3,4)=-6.2f; // TG/GT
ndH(4,4,1,3)=-1.3f*1000; ndS(4,4,1,3)=-5.3f; // TT/AG
ndH(4,4,3,1)=1.0f*1000; ndS(4,4,3,1)=0.9f; // TT/GA
// ndH(4,4,3,3)=5.8f*1000; ndS(4,4,3,3)=16.3f; // TT/GG
// A-A Mismatches
ndH(1,1,1,4)=4.7f*1000; ndS(1,1,1,4)=12.9f; // AA/AT
ndH(1,1,4,1)=1.2f*1000; ndS(1,1,4,1)=1.7f; // AA/TA
ndH(1,2,1,3)=-2.9f*1000; ndS(1,2,1,3)=-9.8f; // AC/AG
ndH(1,3,1,2)=-0.9f*1000; ndS(1,3,1,2)=-4.2f; // AG/AC
ndH(1,4,1,1)=1.2f*1000; ndS(1,4,1,1)=1.7f; // AT/AA
ndH(2,1,3,1)=-0.9f*1000; ndS(2,1,3,1)=-4.2f; // CA/GA
ndH(3,1,2,1)=-2.9f*1000; ndS(3,1,2,1)=-9.8f; // GA/CA
ndH(4,1,1,1)=4.7f*1000; ndS(4,1,1,1)=12.9f; // TA/AA
// C-C Mismatches
ndH(1,2,4,2)=0.0f*1000; ndS(1,2,4,2)=-4.4f; // AC/TC
ndH(2,1,2,4)=6.1f*1000; ndS(2,1,2,4)=16.4f; // CA/CT
ndH(2,2,2,3)=3.6f*1000; ndS(2,2,2,3)=8.9f; // CC/CG
ndH(2,2,3,2)=-1.5f*1000; ndS(2,2,3,2)=-7.2f; // CC/GC
ndH(2,3,2,2)=-1.5f*1000; ndS(2,3,2,2)=-7.2f; // CG/CC
ndH(2,4,2,1)=0.0f*1000; ndS(2,4,2,1)=-4.4f; // CT/CA
ndH(3,2,2,2)=3.6f*1000; ndS(3,2,2,2)=8.9f; // GC/CC
ndH(4,2,1,2)=6.1f*1000; ndS(4,2,1,2)=16.4f; // TC/AC
// G-G Mismatches
ndH(1,3,4,3)=-3.1f*1000; ndS(1,3,4,3)=-9.5f; // AG/TG
ndH(2,3,3,3)=-4.9f*1000; ndS(2,3,3,3)=-15.3f; // CG/GG
ndH(3,1,3,4)=1.6f*1000; ndS(3,1,3,4)=3.6f; // GA/GT
ndH(3,2,3,3)=-6.0f*1000; ndS(3,2,3,3)=-15.8f; // GC/GG
ndH(3,3,2,3)=-6.0f*1000; ndS(3,3,2,3)=-15.8f; // GG/CG
ndH(3,3,3,2)=-4.9f*1000; ndS(3,3,3,2)=-15.3f; // GG/GC
ndH(3,4,3,1)=-3.1f*1000; ndS(3,4,3,1)=-9.5f; // GT/GA
ndH(4,3,1,3)=1.6f*1000; ndS(4,3,1,3)=3.6f; // TG/AG
// T-T Mismatches
ndH(1,4,4,4)=-2.7f*1000; ndS(1,4,4,4)=-10.8f; // AT/TT
ndH(2,4,3,4)=-5.0f*1000; ndS(2,4,3,4)=-15.8f; // CT/GT
ndH(3,4,2,4)=-2.2f*1000; ndS(3,4,2,4)=-8.4f; // GT/CT
ndH(4,1,4,4)=0.2f*1000; ndS(4,1,4,4)=-1.5f; // TA/TT
ndH(4,2,4,3)=-2.2f*1000; ndS(4,2,4,3)=-8.4f; // TC/TG
ndH(4,3,4,2)=-5.0f*1000; ndS(4,3,4,2)=-15.8f; // TG/TC
ndH(4,4,1,4)=0.2f*1000; ndS(4,4,1,4)=-1.5f; // TT/AT
ndH(4,4,4,1)=-2.7f*1000; ndS(4,4,4,1)=-10.8f; // TT/TA
// Dangling Ends
ndH(5,1,1,4)=-0.7f*1000; ndS(5,1,1,4)=-0.8f; // $A/AT
ndH(5,1,2,4)=4.4f*1000; ndS(5,1,2,4)=14.9f; // $A/CT
ndH(5,1,3,4)=-1.6f*1000; ndS(5,1,3,4)=-3.6f; // $A/GT
ndH(5,1,4,4)=2.9f*1000; ndS(5,1,4,4)=10.4f; // $A/TT
ndH(5,2,1,3)=-2.1f*1000; ndS(5,2,1,3)=-3.9f; // $C/AG
ndH(5,2,2,3)=-0.2f*1000; ndS(5,2,2,3)=-0.1f; // $C/CG
ndH(5,2,3,3)=-3.9f*1000; ndS(5,2,3,3)=-11.2f; // $C/GG
ndH(5,2,4,3)=-4.4f*1000; ndS(5,2,4,3)=-13.1f; // $C/TG
ndH(5,3,1,2)=-5.9f*1000; ndS(5,3,1,2)=-16.5f; // $G/AC
ndH(5,3,2,2)=-2.6f*1000; ndS(5,3,2,2)=-7.4f; // $G/CC
ndH(5,3,3,2)=-3.2f*1000; ndS(5,3,3,2)=-10.4f; // $G/GC
ndH(5,3,4,2)=-5.2f*1000; ndS(5,3,4,2)=-15.0f; // $G/TC
ndH(5,4,1,1)=-0.5f*1000; ndS(5,4,1,1)=-1.1f; // $T/AA
ndH(5,4,2,1)=4.7f*1000; ndS(5,4,2,1)=14.2f; // $T/CA
ndH(5,4,3,1)=-4.1f*1000; ndS(5,4,3,1)=-13.1f; // $T/GA
ndH(5,4,4,1)=-3.8f*1000; ndS(5,4,4,1)=-12.6f; // $T/TA
ndH(1,5,4,1)=-2.9f*1000; ndS(1,5,4,1)=-7.6f; // A$/TA
ndH(1,5,4,2)=-4.1f*1000; ndS(1,5,4,2)=-13.0f; // A$/TC
ndH(1,5,4,3)=-4.2f*1000; ndS(1,5,4,3)=-15.0f; // A$/TG
ndH(1,5,4,4)=-0.2f*1000; ndS(1,5,4,4)=-0.5f; // A$/TT
ndH(1,1,5,4)=0.2f*1000; ndS(1,1,5,4)=2.3f; // AA/$T
ndH(1,1,4,5)=-0.5f*1000; ndS(1,1,4,5)=-1.1f; // AA/T$
ndH(1,2,5,3)=-6.3f*1000; ndS(1,2,5,3)=-17.1f; // AC/$G
ndH(1,2,4,5)=4.7f*1000; ndS(1,2,4,5)=14.2f; // AC/T$
ndH(1,3,5,2)=-3.7f*1000; ndS(1,3,5,2)=-10.0f; // AG/$C
ndH(1,3,4,5)=-4.1f*1000; ndS(1,3,4,5)=-13.1f; // AG/T$
ndH(1,4,5,1)=-2.9f*1000; ndS(1,4,5,1)=-7.6f; // AT/$A
ndH(1,4,4,5)=-3.8f*1000; ndS(1,4,4,5)=-12.6f; // AT/T$
ndH(2,5,3,1)=-3.7f*1000; ndS(2,5,3,1)=-10.0f; // C$/GA
ndH(2,5,3,2)=-4.0f*1000; ndS(2,5,3,2)=-11.9f; // C$/GC
ndH(2,5,3,3)=-3.9f*1000; ndS(2,5,3,3)=-10.9f; // C$/GG
ndH(2,5,3,4)=-4.9f*1000; ndS(2,5,3,4)=-13.8f; // C$/GT
ndH(2,1,5,4)=0.6f*1000; ndS(2,1,5,4)=3.3f; // CA/$T
ndH(2,1,3,5)=-5.9f*1000; ndS(2,1,3,5)=-16.5f; // CA/G$
ndH(2,2,5,3)=-4.4f*1000; ndS(2,2,5,3)=-12.6f; // CC/$G
ndH(2,2,3,5)=-2.6f*1000; ndS(2,2,3,5)=-7.4f; // CC/G$
ndH(2,3,5,2)=-4.0f*1000; ndS(2,3,5,2)=-11.9f; // CG/$C
ndH(2,3,3,5)=-3.2f*1000; ndS(2,3,3,5)=-10.4f; // CG/G$
ndH(2,4,5,1)=-4.1f*1000; ndS(2,4,5,1)=-13.0f; // CT/$A
ndH(2,4,3,5)=-5.2f*1000; ndS(2,4,3,5)=-15.0f; // CT/G$
ndH(3,5,2,1)=-6.3f*1000; ndS(3,5,2,1)=-17.1f; // G$/CA
ndH(3,5,2,2)=-4.4f*1000; ndS(3,5,2,2)=-12.6f; // G$/CC
ndH(3,5,2,3)=-5.1f*1000; ndS(3,5,2,3)=-14.0f; // G$/CG
ndH(3,5,2,4)=-4.0f*1000; ndS(3,5,2,4)=-10.9f; // G$/CT
ndH(3,1,5,4)=-1.1f*1000; ndS(3,1,5,4)=-1.6f; // GA/$T
ndH(3,1,2,5)=-2.1f*1000; ndS(3,1,2,5)=-3.9f; // GA/C$
ndH(3,2,5,3)=-5.1f*1000; ndS(3,2,5,3)=-14.0f; // GC/$G
ndH(3,2,2,5)=-0.2f*1000; ndS(3,2,2,5)=-0.1f; // GC/C$
ndH(3,3,5,2)=-3.9f*1000; ndS(3,3,5,2)=-10.9f; // GG/$C
ndH(3,3,2,5)=-3.9f*1000; ndS(3,3,2,5)=-11.2f; // GG/C$
ndH(3,4,5,1)=-4.2f*1000; ndS(3,4,5,1)=-15.0f; // GT/$A
ndH(3,4,2,5)=-4.4f*1000; ndS(3,4,2,5)=-13.1f; // GT/C$
ndH(4,5,1,1)=0.2f*1000; ndS(4,5,1,1)=2.3f; // T$/AA
ndH(4,5,1,2)=0.6f*1000; ndS(4,5,1,2)=3.3f; // T$/AC
ndH(4,5,1,3)=-1.1f*1000; ndS(4,5,1,3)=-1.6f; // T$/AG
ndH(4,5,1,4)=-6.9f*1000; ndS(4,5,1,4)=-20.0f; // T$/AT
ndH(4,1,5,4)=-6.9f*1000; ndS(4,1,5,4)=-20.0f; // TA/$T
ndH(4,1,1,5)=-0.7f*1000; ndS(4,1,1,5)=-0.7f; // TA/A$
ndH(4,2,5,3)=-4.0f*1000; ndS(4,2,5,3)=-10.9f; // TC/$G
ndH(4,2,1,5)=4.4f*1000; ndS(4,2,1,5)=14.9f; // TC/A$
ndH(4,3,5,2)=-4.9f*1000; ndS(4,3,5,2)=-13.8f; // TG/$C
ndH(4,3,1,5)=-1.6f*1000; ndS(4,3,1,5)=-3.6f; // TG/A$
ndH(4,4,5,1)=-0.2f*1000; ndS(4,4,5,1)=-0.5f; // TT/$A
ndH(4,4,1,5)=2.9f*1000; ndS(4,4,1,5)=10.4f; // TT/A$
return;
}
int nparam_CountGCContent(char * seq ) {
int lseq = strlen(seq);
int k;
double count = 0;
for( k=0;k<lseq;k++) {
if (seq[k] == 'G' || seq[k] == 'C' ) {
count+=1;
}
}
return count;
}
void nparam_CleanSeq (char* inseq, char* outseq, int len)
{
int seqlen = strlen (inseq);
int i, j;
if (len != 0)
seqlen = len;
outseq[0]='x';
for (i = 0, j = 0; i < seqlen && outseq[0]; i++,j++)
{
switch (inseq[i])
{
case 'a':
case '\0':
case 'A':
outseq[j] = 'A'; break;
case 'c':
case '\1':
case 'C':
outseq[j] = 'C'; break;
case 'g':
case '\2':
case 'G':
outseq[j] = 'G'; break;
case 't':
case '\3':
case 'T':
outseq[j] = 'T'; break;
default:
outseq[0]=0;
}
}
outseq[j] = '\0';
}
//Calculate TM for given sequence against its complement
double nparam_CalcSelfTM(PNNParams nparm, char* seq, int len)
{
double thedH = 0;
//double thedS = nparam_GetInitialEntropy(nparm);
double thedS = -5.9f+nparm->rlogc;
double mtemp;
char c1;
char c2;
char c3;
char c4;
unsigned int i;
char nseq[50];
char *useq = seq;
nparam_CleanSeq (seq, nseq, len);
useq = nseq;
for ( i=1;i<len;i++)
{
c1 = GETREVCODE(useq[i-1]); //nparam_getComplement(seq[i-1],1);
c2 = GETREVCODE(useq[i]); //nparam_getComplement(seq[i],1);
c3 = GETNUMCODE(useq[i-1]);
c4 = GETNUMCODE(useq[i]);
thedH += nparm->dH[c3][c4][c1][c2];//nparam_GetEnthalpy(nparm, c3,c4,c1,c2);
thedS += nparam_GetEntropy(nparm, c3,c4,c1,c2);
}
//printf("------------------\n");
mtemp = nparam_CalcTM(thedS,thedH);
//fprintf(stderr,"Enthalpy: %f, entropy: %f, seq: %s rloc=%f\n", thedH, thedS, useq, nparm->rlogc);
//exit (0);
return mtemp;
}
double nparam_CalcTwoTM(PNNParams nparm, char* seq1, char* seq2, int len)
{
double thedH = 0;
//double thedS = nparam_GetInitialEntropy(nparm);
double thedS = -5.9f+nparm->rlogc;
double mtemp;
char c1;
char c2;
char c3;
char c4;
unsigned int i;
char nseq1[50];
char nseq2[50];
char *useq1;
char *useq2;
nparam_CleanSeq (seq1, nseq1, len);
useq1 = nseq1;
nparam_CleanSeq (seq2, nseq2, len);
useq2 = nseq2;
//fprintf (stderr,"Primer : %s\n",useq);
for ( i=1;i<len;i++)
{
c1 = GETREVCODE(useq2[i-1]); //nparam_getComplement(seq[i-1],1);
c2 = GETREVCODE(useq2[i]); //nparam_getComplement(seq[i],1);
c3 = GETNUMCODE(useq1[i-1]);
c4 = GETNUMCODE(useq1[i]);
//fprintf (stderr,"Primer : %s %f %f %d %d, %d %d %f\n",useq,thedH,thedS,(int)c3,(int)c4,(int)c1,(int)c2,nparam_GetEnthalpy(nparm, c3,c4,c1,c2));
thedH += nparm->dH[c3][c4][c1][c2];//nparam_GetEnthalpy(nparm, c3,c4,c1,c2);
thedS += nparam_GetEntropy(nparm, c3,c4,c1,c2);
}
//fprintf(stderr,"------------------\n");
mtemp = nparam_CalcTM(thedS,thedH);
//if (mtemp == 0)
//{
// fprintf(stderr,"Enthalpy: %f, entropy: %f, seq: %s\n", thedH, thedS, useq);
//exit (0);
//}
return mtemp;
}
double calculateMeltingTemperatureBasic (char * seq) {
int gccount;
double temp;
int seqlen;
seqlen = strlen (seq);
gccount = nparam_CountGCContent (seq);
temp = 64.9 + 41*(gccount - 16.4)/seqlen;
return temp;
}

View File

@ -1,72 +0,0 @@
/*
* nnparams.h
* PHunterLib
*
* Nearest Neighbor Model Parameters
*
* Created by Tiayyba Riaz on 02/07/09.
*
*/
#ifndef NNPARAMS_H_
#define NNPARAMS_H_
#include <math.h>
#include <string.h>
//#include "../libecoprimer/ecoprimer.h"
// following defines to simplify coding...
#define ndH(a,b,c,d) nparm->dH[a][b][c][d]
#define ndS(a,b,c,d) nparm->dS[a][b][c][d]
#define forbidden_enthalpy 1000000000000000000.0f
#define R 1.987f
#define SALT_METHOD_SANTALUCIA 1
#define SALT_METHOD_OWCZARZY 2
#define DEF_CONC_PRIMERS 0.0000008
#define DEF_CONC_SEQUENCES 0
#define DEF_SALT 0.05
#define GETNUMCODE(a) bpencoder[a - 'A']
#define GETREVCODE(a) 5-bpencoder[a - 'A']
extern double forbidden_entropy;
static char bpencoder[] = { 1, // A
0, // b
2, // C
0,0,0, // d, e, f
3, // G
0,0,0,0,0,0,0,0,0,0,0,0, // h,i,j,k,l,m,n,o,p,q,r,s
4,0, // T,U
0,0,0,0,0}; // v,w,x,y,z
typedef struct CNNParams_st
{
double Ct1;
double Ct2;
double rlogc;
double kplus;
double kfac;
int saltMethod;
double gcContent;
double new_TM;
double dH[6][6][6][6]; // A-C-G-T + gap + initiation (dangling end, $ sign)
double dS[6][6][6][6];
}CNNParams, * PNNParams;
void nparam_InitParams(PNNParams nparm, double c1, double c2, double kp, int sm);
int nparam_CountGCContent(char * seq );
double nparam_GetEntropy(PNNParams nparm, char x0, char x1, char y0, char y1);
double nparam_GetEnthalpy(PNNParams nparm, char x0, char x1, char y0, char y1);
double nparam_CalcTM(double entropy,double enthalpy);
double nparam_CalcSelfTM(PNNParams nparm, char* seq, int len);
double nparam_CalcTwoTM(PNNParams nparm, char* seq1, char* seq2, int len);
double nparam_GetInitialEntropy(PNNParams nparm) ;
double calculateMeltingTemperatureBasic (char * seq);
//void getThermoProperties (ppair_t* pairs, size_t count, poptions_t options);
#endif

View File

@ -1,115 +0,0 @@
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include "thermostats.h"
word_t extractSite(char* sequence, size_t begin, size_t length, bool_t strand)
{
char *c;
char *start;
uint32_t l;
word_t site = 0;
start=sequence+begin;
if (!strand)
start+=length-1;
for (c=start,
l=0;
l<length;
l++,
c+=(strand)? 1:-1)
site = (site << 2) | ((strand)? (*c):(~*c)&3);
return site;
}
void getThermoProperties (ppair_t* pairs, size_t count, poptions_t options)
{
size_t i, j,k,l;
uint32_t bp1,bp2;
uint32_t ep1,ep2;
word_t w1;
word_t w2;
bool_t strand;
char *sq,*sq1,*sq2,*c;
char prmrd[50];
char prmrr[50];
char sqsite[50];
double mtemp;
for (i = 0; i < count; i++)
{
w1 = pairs[i]->p1->word;
w2 = pairs[i]->p2->word;
if (!pairs[i]->asdirect1)
w1=ecoComplementWord(w1,options->primer_length);
if (!pairs[i]->asdirect2)
w2=ecoComplementWord(w2,options->primer_length);
strncpy(prmrd,ecoUnhashWord(w1, options->primer_length),options->primer_length);
strncpy(prmrr,ecoUnhashWord(w2, options->primer_length),options->primer_length);
prmrd[options->primer_length]=0;
prmrr[options->primer_length]=0;
pairs[i]->p1temp = nparam_CalcSelfTM (options->pnparm, prmrd, options->primer_length) - 273.0;
pairs[i]->p2temp = nparam_CalcSelfTM (options->pnparm, prmrr, options->primer_length) - 273.0;
pairs[i]->p1mintemp = 100;
pairs[i]->p2mintemp = 100;
for (j = 0; j < pairs[i]->pcr.ampcount; j++)
if (pairs[i]->pcr.amplifias[j].sequence->isexample)
{
sq = pairs[i]->pcr.amplifias[j].sequence->SQ;
strand = pairs[i]->pcr.amplifias[j].strand;
bp1 = pairs[i]->pcr.amplifias[j].begin - options->primer_length;
bp2 = pairs[i]->pcr.amplifias[j].end + 1;
if (!strand)
{
uint32_t tmp;
tmp=bp1;
bp1=bp2;
bp2=tmp;
}
// printf("%s : %s, %c",prmrd,
// ecoUnhashWord(extractSite(sq,bp1,options->primer_length,strand),options->primer_length),
// "rd"[strand]);
mtemp = nparam_CalcTwoTM(options->pnparm,
prmrd,
ecoUnhashWord(extractSite(sq,bp1,options->primer_length,strand),options->primer_length),
options->primer_length) - 273.0;
// printf(" %4.2f %4.2f\n",pairs[i]->p1temp,mtemp);
if (mtemp < pairs[i]->p1mintemp)
pairs[i]->p1mintemp = mtemp;
// printf("%s : %s, %c\n",prmrr,ecoUnhashWord(extractSite(sq,bp2,options->primer_length,!strand),options->primer_length),
// "rd"[strand]);
//
mtemp = nparam_CalcTwoTM(options->pnparm,
prmrr,
ecoUnhashWord(extractSite(sq,bp2,options->primer_length,!strand),options->primer_length),
options->primer_length) - 273.0;
if (mtemp < pairs[i]->p2mintemp)
pairs[i]->p2mintemp = mtemp;
}
if (w2 < w1)
{
mtemp = pairs[i]->p1temp;
pairs[i]->p1temp = pairs[i]->p2temp;
pairs[i]->p2temp = mtemp;
mtemp = pairs[i]->p1mintemp;
pairs[i]->p1mintemp = pairs[i]->p2mintemp;
pairs[i]->p2mintemp = mtemp;
}
}
}

View File

@ -1,9 +0,0 @@
#ifndef THERMOSTATS_H_
#define THERMOSTATS_H_
#include "../libecoprimer/ecoprimer.h"
void getThermoProperties (ppair_t* pairs, size_t count, poptions_t options);
word_t extractSite(char* sequence, size_t begin, size_t length, bool_t strand);
#endif

View File

@ -1,651 +0,0 @@
#!/usr/bin/env python
import re
import gzip
import struct
import sys
import time
import getopt
try:
import psycopg2
_dbenable=True
except ImportError:
_dbenable=False
#####
#
#
# Generic file function
#
#
#####
def universalOpen(file):
if isinstance(file,str):
if file[-3:] == '.gz':
rep = gzip.open(file)
else:
rep = open(file)
else:
rep = file
return rep
def universalTell(file):
if isinstance(file, gzip.GzipFile):
file=file.myfileobj
return file.tell()
def fileSize(file):
if isinstance(file, gzip.GzipFile):
file=file.myfileobj
pos = file.tell()
file.seek(0,2)
length = file.tell()
file.seek(pos,0)
return length
def progressBar(pos,max,reset=False,delta=[]):
if reset:
del delta[:]
if not delta:
delta.append(time.time())
delta.append(time.time())
delta[1]=time.time()
elapsed = delta[1]-delta[0]
percent = float(pos)/max * 100
remain = time.strftime('%H:%M:%S',time.gmtime(elapsed / percent * (100-percent)))
bar = '#' * int(percent/2)
bar+= '|/-\\-'[pos % 5]
bar+= ' ' * (50 - int(percent/2))
sys.stderr.write('\r%5.1f %% |%s] remain : %s' %(percent,bar,remain))
#####
#
#
# NCBI Dump Taxonomy reader
#
#
#####
def endLessIterator(endedlist):
for x in endedlist:
yield x
while(1):
yield endedlist[-1]
class ColumnFile(object):
def __init__(self,stream,sep=None,strip=True,types=None):
if isinstance(stream,str):
self._stream = open(stream)
elif hasattr(stream,'next'):
self._stream = stream
else:
raise ValueError,'stream must be string or an iterator'
self._delimiter=sep
self._strip=strip
if types:
self._types=[x for x in types]
for i in xrange(len(self._types)):
if self._types[i] is bool:
self._types[i]=ColumnFile.str2bool
else:
self._types=None
def str2bool(x):
return bool(eval(x.strip()[0].upper(),{'T':True,'V':True,'F':False}))
str2bool = staticmethod(str2bool)
def __iter__(self):
return self
def next(self):
ligne = self._stream.next()
data = ligne.split(self._delimiter)
if self._strip or self._types:
data = [x.strip() for x in data]
if self._types:
it = endLessIterator(self._types)
data = [x[1](x[0]) for x in ((y,it.next()) for y in data)]
return data
def taxonCmp(t1,t2):
if t1[0] < t2[0]:
return -1
elif t1[0] > t2[0]:
return +1
return 0
def bsearchTaxon(taxonomy,taxid):
taxCount = len(taxonomy)
begin = 0
end = taxCount
oldcheck=taxCount
check = begin + end / 2
while check != oldcheck and taxonomy[check][0]!=taxid :
if taxonomy[check][0] < taxid:
begin=check
else:
end=check
oldcheck=check
check = (begin + end) / 2
if taxonomy[check][0]==taxid:
return check
else:
return None
def readNodeTable(file):
file = universalOpen(file)
nodes = ColumnFile(file,
sep='|',
types=(int,int,str,
str,str,bool,
int,bool,int,
bool,bool,bool,str))
print >>sys.stderr,"Reading taxonomy dump file..."
taxonomy=[[n[0],n[2],n[1]] for n in nodes]
print >>sys.stderr,"List all taxonomy rank..."
ranks =list(set(x[1] for x in taxonomy))
ranks.sort()
ranks = dict(map(None,ranks,xrange(len(ranks))))
print >>sys.stderr,"Sorting taxons..."
taxonomy.sort(taxonCmp)
print >>sys.stderr,"Indexing taxonomy..."
index = {}
for t in taxonomy:
index[t[0]]=bsearchTaxon(taxonomy, t[0])
print >>sys.stderr,"Indexing parent and rank..."
for t in taxonomy:
t[1]=ranks[t[1]]
t[2]=index[t[2]]
return taxonomy,ranks,index
def nameIterator(file):
file = universalOpen(file)
names = ColumnFile(file,
sep='|',
types=(int,str,
str,str))
for taxid,name,unique,classname,white in names:
yield taxid,name,classname
def mergedNodeIterator(file):
file = universalOpen(file)
merged = ColumnFile(file,
sep='|',
types=(int,int,str))
for taxid,current,white in merged:
yield taxid,current
def deletedNodeIterator(file):
file = universalOpen(file)
deleted = ColumnFile(file,
sep='|',
types=(int,str))
for taxid,white in deleted:
yield taxid
def readTaxonomyDump(taxdir):
taxonomy,ranks,index = readNodeTable('%s/nodes.dmp' % taxdir)
print >>sys.stderr,"Adding scientific name..."
alternativeName=[]
for taxid,name,classname in nameIterator('%s/names.dmp' % taxdir):
alternativeName.append((name,classname,index[taxid]))
if classname == 'scientific name':
taxonomy[index[taxid]].append(name)
print >>sys.stderr,"Adding taxid alias..."
for taxid,current in mergedNodeIterator('%s/merged.dmp' % taxdir):
index[taxid]=index[current]
print >>sys.stderr,"Adding deleted taxid..."
for taxid in deletedNodeIterator('%s/delnodes.dmp' % taxdir):
index[taxid]=None
return taxonomy,ranks,alternativeName,index
def readTaxonomyDB(dbname):
connection = psycopg2.connect(database=dbname)
cursor = connection.cursor()
cursor.execute("select numid,rank,parent from ncbi_taxonomy.taxon")
taxonomy=[list(x) for x in cursor]
cursor.execute("select rank_class from ncbi_taxonomy.taxon_rank_class order by rank_class")
ranks=cursor.fetchall()
ranks = dict(map(None,(x[0] for x in ranks),xrange(len(ranks))))
print >>sys.stderr,"Sorting taxons..."
taxonomy.sort(taxonCmp)
print >>sys.stderr,"Indexing taxonomy..."
index = {}
for t in taxonomy:
index[t[0]]=bsearchTaxon(taxonomy, t[0])
print >>sys.stderr,"Indexing parent and rank..."
for t in taxonomy:
t[1]=ranks[t[1]]
try:
t[2]=index[t[2]]
except KeyError,e:
if t[2] is None and t[0]==1:
t[2]=index[t[0]]
else:
raise e
cursor.execute("select taxid,name,category from ncbi_taxonomy.name")
alternativeName=[]
for taxid,name,classname in cursor:
alternativeName.append((name,classname,index[taxid]))
if classname == 'scientific name':
taxonomy[index[taxid]].append(name)
cursor.execute("select old_numid,current_numid from ncbi_taxonomy.taxon_id_alias")
print >>sys.stderr,"Adding taxid alias..."
for taxid,current in cursor:
if current is not None:
index[taxid]=index[current]
else:
index[taxid]=None
return taxonomy,ranks,alternativeName,index
#####
#
#
# Genbank/EMBL sequence reader
#
#
#####
def entryIterator(file):
file = universalOpen(file)
rep =[]
for ligne in file:
rep.append(ligne)
if ligne == '//\n':
rep = ''.join(rep)
yield rep
rep = []
def fastaEntryIterator(file):
file = universalOpen(file)
rep =[]
for ligne in file:
if ligne[0] == '>' and rep:
rep = ''.join(rep)
yield rep
rep = []
rep.append(ligne)
if rep:
rep = ''.join(rep)
yield rep
_cleanSeq = re.compile('[ \n0-9]+')
def cleanSeq(seq):
return _cleanSeq.sub('',seq)
_gbParseID = re.compile('(?<=^LOCUS {7})[^ ]+(?= )',re.MULTILINE)
_gbParseDE = re.compile('(?<=^DEFINITION {2}).+?\. *$(?=[^ ])',re.MULTILINE+re.DOTALL)
_gbParseSQ = re.compile('(?<=^ORIGIN).+?(?=^//$)',re.MULTILINE+re.DOTALL)
_gbParseTX = re.compile('(?<= /db_xref="taxon:)[0-9]+(?=")')
def genbankEntryParser(entry):
Id = _gbParseID.findall(entry)[0]
De = ' '.join(_gbParseDE.findall(entry)[0].split())
Sq = cleanSeq(_gbParseSQ.findall(entry)[0].upper())
try:
Tx = int(_gbParseTX.findall(entry)[0])
except IndexError:
Tx = None
return {'id':Id,'taxid':Tx,'definition':De,'sequence':Sq}
######################
_cleanDef = re.compile('[\nDE]')
def cleanDef(definition):
return _cleanDef.sub('',definition)
_emblParseID = re.compile('(?<=^ID {3})[^ ]+(?=;)',re.MULTILINE)
_emblParseDE = re.compile('(?<=^DE {3}).+?\. *$(?=[^ ])',re.MULTILINE+re.DOTALL)
_emblParseSQ = re.compile('(?<=^ ).+?(?=^//$)',re.MULTILINE+re.DOTALL)
_emblParseTX = re.compile('(?<= /db_xref="taxon:)[0-9]+(?=")')
def emblEntryParser(entry):
Id = _emblParseID.findall(entry)[0]
De = ' '.join(cleanDef(_emblParseDE.findall(entry)[0]).split())
Sq = cleanSeq(_emblParseSQ.findall(entry)[0].upper())
try:
Tx = int(_emblParseTX.findall(entry)[0])
except IndexError:
Tx = None
return {'id':Id,'taxid':Tx,'definition':De,'sequence':Sq}
######################
_fastaSplit=re.compile(';\W*')
def parseFasta(seq):
seq=seq.split('\n')
title = seq[0].strip()[1:].split(None,1)
id=title[0]
if len(title) == 2:
field = _fastaSplit.split(title[1])
else:
field=[]
info = dict(x.split('=',1) for x in field if '=' in x)
definition = ' '.join([x for x in field if '=' not in x])
seq=(''.join([x.strip() for x in seq[1:]])).upper()
return id,seq,definition,info
def fastaEntryParser(entry):
id,seq,definition,info = parseFasta(entry)
Tx = info.get('taxid',None)
if Tx is not None:
Tx=int(Tx)
return {'id':id,'taxid':Tx,'definition':definition,'sequence':seq}
def sequenceIteratorFactory(entryParser,entryIterator):
def sequenceIterator(file):
for entry in entryIterator(file):
yield entryParser(entry)
return sequenceIterator
def taxonomyInfo(entry,connection):
taxid = entry['taxid']
curseur = connection.cursor()
curseur.execute("""
select taxid,species,genus,family,
taxonomy.scientificName(taxid) as sn,
taxonomy.scientificName(species) as species_sn,
taxonomy.scientificName(genus) as genus_sn,
taxonomy.scientificName(family) as family_sn
from
(
select alias as taxid,
taxonomy.getSpecies(alias) as species,
taxonomy.getGenus(alias) as genus,
taxonomy.getFamily(alias) as family
from taxonomy.aliases
where id=%d ) as tax
""" % taxid)
rep = curseur.fetchone()
entry['current_taxid']=rep[0]
entry['species']=rep[1]
entry['genus']=rep[2]
entry['family']=rep[3]
entry['scientific_name']=rep[4]
entry['species_sn']=rep[5]
entry['genus_sn']=rep[6]
entry['family_sn']=rep[7]
return entry
#####
#
#
# Binary writer
#
#
#####
def ecoSeqPacker(sq):
compactseq = gzip.zlib.compress(sq['sequence'],9)
cptseqlength = len(compactseq)
delength = len(sq['definition'])
totalSize = 4 + 20 + 4 + 4 + 4 + cptseqlength + delength
packed = struct.pack('> I I 20s I I I %ds %ds' % (delength,cptseqlength),
totalSize,
sq['taxid'],
sq['id'],
delength,
len(sq['sequence']),
cptseqlength,
sq['definition'],
compactseq)
assert len(packed) == totalSize+4, "error in sequence packing"
return packed
def ecoTaxPacker(tx):
namelength = len(tx[3])
totalSize = 4 + 4 + 4 + 4 + namelength
packed = struct.pack('> I I I I I %ds' % namelength,
totalSize,
tx[0],
tx[1],
tx[2],
namelength,
tx[3])
return packed
def ecoRankPacker(rank):
namelength = len(rank)
packed = struct.pack('> I %ds' % namelength,
namelength,
rank)
return packed
def ecoNamePacker(name):
namelength = len(name[0])
classlength= len(name[1])
totalSize = namelength + classlength + 4 + 4 + 4 + 4
packed = struct.pack('> I I I I I %ds %ds' % (namelength,classlength),
totalSize,
int(name[1]=='scientific name'),
namelength,
classlength,
name[2],
name[0],
name[1])
return packed
def ecoSeqWriter(file,input,taxindex,parser):
output = open(file,'wb')
input = universalOpen(input)
inputsize = fileSize(input)
entries = parser(input)
seqcount=0
skipped = []
output.write(struct.pack('> I',seqcount))
progressBar(1, inputsize,reset=True)
for entry in entries:
if entry['taxid'] is not None:
try:
entry['taxid']=taxindex[entry['taxid']]
except KeyError:
entry['taxid']=None
if entry['taxid'] is not None:
seqcount+=1
output.write(ecoSeqPacker(entry))
else:
skipped.append(entry['id'])
where = universalTell(input)
progressBar(where, inputsize)
print >>sys.stderr," Readed sequences : %d " % seqcount,
else:
skipped.append(entry['id'])
print >>sys.stderr
output.seek(0,0)
output.write(struct.pack('> I',seqcount))
output.close()
return skipped
def ecoTaxWriter(file,taxonomy):
output = open(file,'wb')
output.write(struct.pack('> I',len(taxonomy)))
for tx in taxonomy:
output.write(ecoTaxPacker(tx))
output.close()
def ecoRankWriter(file,ranks):
output = open(file,'wb')
output.write(struct.pack('> I',len(ranks)))
rankNames = ranks.keys()
rankNames.sort()
for rank in rankNames:
output.write(ecoRankPacker(rank))
output.close()
def nameCmp(n1,n2):
name1=n1[0].upper()
name2=n2[0].upper()
if name1 < name2:
return -1
elif name1 > name2:
return 1
return 0
def ecoNameWriter(file,names):
output = open(file,'wb')
output.write(struct.pack('> I',len(names)))
names.sort(nameCmp)
for name in names:
output.write(ecoNamePacker(name))
output.close()
def ecoDBWriter(prefix,taxonomy,seqFileNames,parser):
ecoRankWriter('%s.rdx' % prefix, taxonomy[1])
ecoTaxWriter('%s.tdx' % prefix, taxonomy[0])
ecoNameWriter('%s.ndx' % prefix, taxonomy[2])
filecount = 0
for filename in seqFileNames:
filecount+=1
sk=ecoSeqWriter('%s_%03d.sdx' % (prefix,filecount),
filename,
taxonomy[3],
parser)
if sk:
print >>sys.stderr,"Skipped entry :"
print >>sys.stderr,sk
def ecoParseOptions(arguments):
opt = {
'prefix' : 'ecodb',
'taxdir' : 'taxdump',
'parser' : sequenceIteratorFactory(genbankEntryParser,
entryIterator)
}
o,filenames = getopt.getopt(arguments,
'ht:T:n:gfe',
['help',
'taxonomy=',
'taxonomy_db=',
'name=',
'genbank',
'fasta',
'embl'])
for name,value in o:
if name in ('-h','--help'):
printHelp()
exit()
elif name in ('-t','--taxonomy'):
opt['taxmod']='dump'
opt['taxdir']=value
elif name in ('-T','--taxonomy_db'):
opt['taxmod']='db'
opt['taxdb']=value
elif name in ('-n','--name'):
opt['prefix']=value
elif name in ('-g','--genbank'):
opt['parser']=sequenceIteratorFactory(genbankEntryParser,
entryIterator)
elif name in ('-f','--fasta'):
opt['parser']=sequenceIteratorFactory(fastaEntryParser,
fastaEntryIterator)
elif name in ('-e','--embl'):
opt['parser']=sequenceIteratorFactory(emblEntryParser,
entryIterator)
else:
raise ValueError,'Unknown option %s' % name
return opt,filenames
def printHelp():
print "-----------------------------------"
print " ecoPCRFormat.py"
print "-----------------------------------"
print "ecoPCRFormat.py [option] <argument>"
print "-----------------------------------"
print "-e --embl :[E]mbl format"
print "-f --fasta :[F]asta format"
print "-g --genbank :[G]enbank format"
print "-h --help :[H]elp - print this help"
print "-n --name :[N]ame of the new database created"
print "-t --taxonomy :[T]axonomy - path to the taxonomy database"
print " :bcp-like dump from GenBank taxonomy database."
print "-----------------------------------"
if __name__ == '__main__':
opt,filenames = ecoParseOptions(sys.argv[1:])
if opt['taxmod']=='dump':
taxonomy = readTaxonomyDump(opt['taxdir'])
elif opt['taxmod']=='db':
taxonomy = readTaxonomyDB(opt['taxdb'])
ecoDBWriter(opt['prefix'], taxonomy, filenames, opt['parser'])