My complete changes on my laptop, with specificity bug fix + ahocorasick + sets
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/trunk@393 60f365c0-8329-0410-b2a4-ec073aeeaa1d
This commit is contained in:
364
.cproject
364
.cproject
@ -1,151 +1,221 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<?fileVersion 4.0.0?>
|
||||
|
||||
<cproject>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.settings">
|
||||
<cconfiguration id="cdt.managedbuild.toolchain.gnu.macosx.base.2134184396">
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.toolchain.gnu.macosx.base.2134184396" moduleId="org.eclipse.cdt.core.settings" name="MacOSX GCC">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.MachO" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.MakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactName="ecoPrimers" buildProperties="" id="cdt.managedbuild.toolchain.gnu.macosx.base.2134184396" name="MacOSX GCC" parent="org.eclipse.cdt.build.core.emptycfg">
|
||||
<folderInfo id="cdt.managedbuild.toolchain.gnu.macosx.base.2134184396.1840911077" name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.base.766054112" name="cdt.managedbuild.toolchain.gnu.macosx.base" superClass="cdt.managedbuild.toolchain.gnu.macosx.base">
|
||||
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.MachO" id="cdt.managedbuild.target.gnu.platform.macosx.base.2057035265" name="Debug Platform" osList="macosx" superClass="cdt.managedbuild.target.gnu.platform.macosx.base"/>
|
||||
<builder id="cdt.managedbuild.target.gnu.builder.macosx.base.783726363" managedBuildOn="false" name="Gnu Make Builder.MacOSX GCC" superClass="cdt.managedbuild.target.gnu.builder.macosx.base"/>
|
||||
<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.base.914103467" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.base">
|
||||
<inputType id="cdt.managedbuild.tool.macosx.c.linker.input.62980206" superClass="cdt.managedbuild.tool.macosx.c.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
|
||||
</inputType>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.base.691108439" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.base"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.base.695639877" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.base">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1507665054" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.1786370580" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.base.454329831" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.base"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.base.1928774909" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.base">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.330854350" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
||||
</tool>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="scannerConfiguration">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile"/>
|
||||
<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile">
|
||||
<buildOutputProvider>
|
||||
<openAction enabled="true" filePath=""/>
|
||||
<parser enabled="true"/>
|
||||
</buildOutputProvider>
|
||||
<scannerInfoProvider id="specsFile">
|
||||
<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
|
||||
<parser enabled="true"/>
|
||||
</scannerInfoProvider>
|
||||
</profile>
|
||||
<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerFileProfile">
|
||||
<buildOutputProvider>
|
||||
<openAction enabled="true" filePath=""/>
|
||||
<parser enabled="true"/>
|
||||
</buildOutputProvider>
|
||||
<scannerInfoProvider id="makefileGenerator">
|
||||
<runAction arguments="-f ${project_name}_scd.mk" command="make" useDefault="true"/>
|
||||
<parser enabled="true"/>
|
||||
</scannerInfoProvider>
|
||||
</profile>
|
||||
<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfile">
|
||||
<buildOutputProvider>
|
||||
<openAction enabled="true" filePath=""/>
|
||||
<parser enabled="true"/>
|
||||
</buildOutputProvider>
|
||||
<scannerInfoProvider id="specsFile">
|
||||
<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
|
||||
<parser enabled="true"/>
|
||||
</scannerInfoProvider>
|
||||
</profile>
|
||||
<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP">
|
||||
<buildOutputProvider>
|
||||
<openAction enabled="true" filePath=""/>
|
||||
<parser enabled="true"/>
|
||||
</buildOutputProvider>
|
||||
<scannerInfoProvider id="specsFile">
|
||||
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
|
||||
<parser enabled="true"/>
|
||||
</scannerInfoProvider>
|
||||
</profile>
|
||||
<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC">
|
||||
<buildOutputProvider>
|
||||
<openAction enabled="true" filePath=""/>
|
||||
<parser enabled="true"/>
|
||||
</buildOutputProvider>
|
||||
<scannerInfoProvider id="specsFile">
|
||||
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
|
||||
<parser enabled="true"/>
|
||||
</scannerInfoProvider>
|
||||
</profile>
|
||||
<profile id="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfile">
|
||||
<buildOutputProvider>
|
||||
<openAction enabled="true" filePath=""/>
|
||||
<parser enabled="true"/>
|
||||
</buildOutputProvider>
|
||||
<scannerInfoProvider id="specsFile">
|
||||
<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
|
||||
<parser enabled="true"/>
|
||||
</scannerInfoProvider>
|
||||
</profile>
|
||||
<profile id="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfileCPP">
|
||||
<buildOutputProvider>
|
||||
<openAction enabled="true" filePath=""/>
|
||||
<parser enabled="true"/>
|
||||
</buildOutputProvider>
|
||||
<scannerInfoProvider id="specsFile">
|
||||
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
|
||||
<parser enabled="true"/>
|
||||
</scannerInfoProvider>
|
||||
</profile>
|
||||
<profile id="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfileC">
|
||||
<buildOutputProvider>
|
||||
<openAction enabled="true" filePath=""/>
|
||||
<parser enabled="true"/>
|
||||
</buildOutputProvider>
|
||||
<scannerInfoProvider id="specsFile">
|
||||
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
|
||||
<parser enabled="true"/>
|
||||
</scannerInfoProvider>
|
||||
</profile>
|
||||
<profile id="org.eclipse.cdt.managedbuilder.xlc.core.XLCManagedMakePerProjectProfile">
|
||||
<buildOutputProvider>
|
||||
<openAction enabled="false" filePath=""/>
|
||||
<parser enabled="false"/>
|
||||
</buildOutputProvider>
|
||||
<scannerInfoProvider id="specsFile">
|
||||
<runAction arguments="-E -v ${plugin_state_location}/${specs_file}" command="${XL_compilerRoot}/xlc" useDefault="true"/>
|
||||
<parser enabled="true"/>
|
||||
</scannerInfoProvider>
|
||||
</profile>
|
||||
<profile id="org.eclipse.cdt.managedbuilder.xlc.core.XLCManagedMakePerProjectProfileCPP">
|
||||
<buildOutputProvider>
|
||||
<openAction enabled="false" filePath=""/>
|
||||
<parser enabled="false"/>
|
||||
</buildOutputProvider>
|
||||
<scannerInfoProvider id="specsFile">
|
||||
<runAction arguments="-E -v ${plugin_state_location}/${specs_file}" command="${XL_compilerRoot}/xlC" useDefault="true"/>
|
||||
<parser enabled="true"/>
|
||||
</scannerInfoProvider>
|
||||
</profile>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
|
||||
</cconfiguration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<project id="ecoPrimers.null.1292969001" name="ecoPrimers"/>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.settings">
|
||||
<cconfiguration id="cdt.managedbuild.toolchain.gnu.macosx.base.2134184396">
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.toolchain.gnu.macosx.base.2134184396" moduleId="org.eclipse.cdt.core.settings" name="MacOSX GCC">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.MachO" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactName="ecoPrimers" buildProperties="" description="" id="cdt.managedbuild.toolchain.gnu.macosx.base.2134184396" name="MacOSX GCC" parent="org.eclipse.cdt.build.core.emptycfg">
|
||||
<folderInfo id="cdt.managedbuild.toolchain.gnu.macosx.base.2134184396.1840911077" name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.base.766054112" name="cdt.managedbuild.toolchain.gnu.macosx.base" superClass="cdt.managedbuild.toolchain.gnu.macosx.base">
|
||||
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.MachO" id="cdt.managedbuild.target.gnu.platform.macosx.base.2057035265" name="Debug Platform" osList="macosx" superClass="cdt.managedbuild.target.gnu.platform.macosx.base"/>
|
||||
<builder id="cdt.managedbuild.target.gnu.builder.macosx.base.783726363" keepEnvironmentInBuildfile="false" managedBuildOn="false" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.base"/>
|
||||
<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.base.914103467" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.base">
|
||||
<inputType id="cdt.managedbuild.tool.macosx.c.linker.input.62980206" superClass="cdt.managedbuild.tool.macosx.c.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
|
||||
</inputType>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.base.691108439" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.base"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.base.695639877" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.base">
|
||||
<option id="gnu.both.asm.option.include.paths.1544375094" name="Include paths (-I)" superClass="gnu.both.asm.option.include.paths" valueType="includePath"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1507665054" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.1786370580" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.base.454329831" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.base"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.base.1928774909" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.base">
|
||||
<option id="gnu.c.compiler.option.include.paths.823251305" superClass="gnu.c.compiler.option.include.paths" valueType="includePath">
|
||||
<listOptionValue builtIn="false" value="/usr/include"/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.330854350" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
||||
</tool>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.language.mapping"/>
|
||||
<storageModule moduleId="scannerConfiguration">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile"/>
|
||||
<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile">
|
||||
<buildOutputProvider>
|
||||
<openAction enabled="true" filePath=""/>
|
||||
<parser enabled="true"/>
|
||||
</buildOutputProvider>
|
||||
<scannerInfoProvider id="specsFile">
|
||||
<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
|
||||
<parser enabled="true"/>
|
||||
</scannerInfoProvider>
|
||||
</profile>
|
||||
<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerFileProfile">
|
||||
<buildOutputProvider>
|
||||
<openAction enabled="true" filePath=""/>
|
||||
<parser enabled="true"/>
|
||||
</buildOutputProvider>
|
||||
<scannerInfoProvider id="makefileGenerator">
|
||||
<runAction arguments="-f ${project_name}_scd.mk" command="make" useDefault="true"/>
|
||||
<parser enabled="true"/>
|
||||
</scannerInfoProvider>
|
||||
</profile>
|
||||
<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfile">
|
||||
<buildOutputProvider>
|
||||
<openAction enabled="true" filePath=""/>
|
||||
<parser enabled="true"/>
|
||||
</buildOutputProvider>
|
||||
<scannerInfoProvider id="specsFile">
|
||||
<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
|
||||
<parser enabled="true"/>
|
||||
</scannerInfoProvider>
|
||||
</profile>
|
||||
<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP">
|
||||
<buildOutputProvider>
|
||||
<openAction enabled="true" filePath=""/>
|
||||
<parser enabled="true"/>
|
||||
</buildOutputProvider>
|
||||
<scannerInfoProvider id="specsFile">
|
||||
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
|
||||
<parser enabled="true"/>
|
||||
</scannerInfoProvider>
|
||||
</profile>
|
||||
<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC">
|
||||
<buildOutputProvider>
|
||||
<openAction enabled="true" filePath=""/>
|
||||
<parser enabled="true"/>
|
||||
</buildOutputProvider>
|
||||
<scannerInfoProvider id="specsFile">
|
||||
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
|
||||
<parser enabled="true"/>
|
||||
</scannerInfoProvider>
|
||||
</profile>
|
||||
<profile id="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfile">
|
||||
<buildOutputProvider>
|
||||
<openAction enabled="true" filePath=""/>
|
||||
<parser enabled="true"/>
|
||||
</buildOutputProvider>
|
||||
<scannerInfoProvider id="specsFile">
|
||||
<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
|
||||
<parser enabled="true"/>
|
||||
</scannerInfoProvider>
|
||||
</profile>
|
||||
<profile id="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfileCPP">
|
||||
<buildOutputProvider>
|
||||
<openAction enabled="true" filePath=""/>
|
||||
<parser enabled="true"/>
|
||||
</buildOutputProvider>
|
||||
<scannerInfoProvider id="specsFile">
|
||||
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
|
||||
<parser enabled="true"/>
|
||||
</scannerInfoProvider>
|
||||
</profile>
|
||||
<profile id="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfileC">
|
||||
<buildOutputProvider>
|
||||
<openAction enabled="true" filePath=""/>
|
||||
<parser enabled="true"/>
|
||||
</buildOutputProvider>
|
||||
<scannerInfoProvider id="specsFile">
|
||||
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
|
||||
<parser enabled="true"/>
|
||||
</scannerInfoProvider>
|
||||
</profile>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.toolchain.gnu.macosx.base.2134184396;cdt.managedbuild.toolchain.gnu.macosx.base.2134184396.1840911077;cdt.managedbuild.tool.gnu.c.compiler.macosx.base.1928774909;cdt.managedbuild.tool.gnu.c.compiler.input.330854350">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
|
||||
<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile">
|
||||
<buildOutputProvider>
|
||||
<openAction enabled="true" filePath=""/>
|
||||
<parser enabled="true"/>
|
||||
</buildOutputProvider>
|
||||
<scannerInfoProvider id="specsFile">
|
||||
<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
|
||||
<parser enabled="true"/>
|
||||
</scannerInfoProvider>
|
||||
</profile>
|
||||
<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerFileProfile">
|
||||
<buildOutputProvider>
|
||||
<openAction enabled="true" filePath=""/>
|
||||
<parser enabled="true"/>
|
||||
</buildOutputProvider>
|
||||
<scannerInfoProvider id="makefileGenerator">
|
||||
<runAction arguments="-f ${project_name}_scd.mk" command="make" useDefault="true"/>
|
||||
<parser enabled="true"/>
|
||||
</scannerInfoProvider>
|
||||
</profile>
|
||||
<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfile">
|
||||
<buildOutputProvider>
|
||||
<openAction enabled="true" filePath=""/>
|
||||
<parser enabled="true"/>
|
||||
</buildOutputProvider>
|
||||
<scannerInfoProvider id="specsFile">
|
||||
<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
|
||||
<parser enabled="true"/>
|
||||
</scannerInfoProvider>
|
||||
</profile>
|
||||
<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP">
|
||||
<buildOutputProvider>
|
||||
<openAction enabled="true" filePath=""/>
|
||||
<parser enabled="true"/>
|
||||
</buildOutputProvider>
|
||||
<scannerInfoProvider id="specsFile">
|
||||
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
|
||||
<parser enabled="true"/>
|
||||
</scannerInfoProvider>
|
||||
</profile>
|
||||
<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC">
|
||||
<buildOutputProvider>
|
||||
<openAction enabled="true" filePath=""/>
|
||||
<parser enabled="true"/>
|
||||
</buildOutputProvider>
|
||||
<scannerInfoProvider id="specsFile">
|
||||
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
|
||||
<parser enabled="true"/>
|
||||
</scannerInfoProvider>
|
||||
</profile>
|
||||
<profile id="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfile">
|
||||
<buildOutputProvider>
|
||||
<openAction enabled="true" filePath=""/>
|
||||
<parser enabled="true"/>
|
||||
</buildOutputProvider>
|
||||
<scannerInfoProvider id="specsFile">
|
||||
<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
|
||||
<parser enabled="true"/>
|
||||
</scannerInfoProvider>
|
||||
</profile>
|
||||
<profile id="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfileCPP">
|
||||
<buildOutputProvider>
|
||||
<openAction enabled="true" filePath=""/>
|
||||
<parser enabled="true"/>
|
||||
</buildOutputProvider>
|
||||
<scannerInfoProvider id="specsFile">
|
||||
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
|
||||
<parser enabled="true"/>
|
||||
</scannerInfoProvider>
|
||||
</profile>
|
||||
<profile id="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfileC">
|
||||
<buildOutputProvider>
|
||||
<openAction enabled="true" filePath=""/>
|
||||
<parser enabled="true"/>
|
||||
</buildOutputProvider>
|
||||
<scannerInfoProvider id="specsFile">
|
||||
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
|
||||
<parser enabled="true"/>
|
||||
</scannerInfoProvider>
|
||||
</profile>
|
||||
</scannerConfigBuildInfo>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.internal.ui.text.commentOwnerProjectMappings"/>
|
||||
</cconfiguration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<project id="ecoPrimers.null.1292969001" name="ecoPrimers"/>
|
||||
</storageModule>
|
||||
</cproject>
|
||||
|
220
src/ecoprimer.c
220
src/ecoprimer.c
@ -7,6 +7,7 @@
|
||||
|
||||
#include "libecoprimer/ecoprimer.h"
|
||||
#include "libecoprimer/PrimerSets.h"
|
||||
#include "libecoprimer/ahocorasick.h"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
@ -25,6 +26,8 @@
|
||||
|
||||
static int cmpprintedpairs(const void* p1,const void* p2);
|
||||
//float _Z27calculateMeltingTemperature_ (char * seq1, char * seq2);
|
||||
pwordcount_t reduce_words_to_debug (pwordcount_t words, poptions_t options);
|
||||
void print_wordwith_positions (primer_t prm, uint32_t seqdbsize, poptions_t options);
|
||||
|
||||
void* lib_handle = NULL;
|
||||
float (*calcMelTemp)(char*, char*);
|
||||
@ -71,12 +74,12 @@ static void PrintHelp()
|
||||
PP "-m : Salt correction method for Tm computation (SANTALUCIA : 1 or OWCZARZY:2, default=1)\n\n");
|
||||
PP "-a : Salt contentration in M for Tm computation (default 0.05 M)\n\n");
|
||||
PP "-U : No multi match\n\n");
|
||||
PP "-U : Define the [R]eference sequence identifier (must be part of example set)\n\n");
|
||||
PP "-R : Define the [R]eference sequence identifier (must be part of example set)\n\n");
|
||||
PP "-A : Print the list of all identifier of sequences present in the database\n\n");
|
||||
PP "-f : Remove data mining step during strict primer identification\n\n");
|
||||
PP "-v : Store statistic file about memory usage during strict primer identification\n\n");
|
||||
PP "-p : Print sets of primers\n\n");
|
||||
PP "-T : Ignore pairs having specificity below this Threshold\n\n");
|
||||
PP "-p : Print sets of primers (may take several minutes after primers have been designed!)\n\n");
|
||||
PP "-T : Ignore pairs having specificity below this Threshold\n\n");
|
||||
PP "\n");
|
||||
PP "------------------------------------------\n");
|
||||
PP "Table result description : \n");
|
||||
@ -151,6 +154,9 @@ void initoptions(poptions_t options)
|
||||
options->printAC=FALSE;
|
||||
options->print_sets_of_primers = FALSE;
|
||||
options->specificity_threshold = 0.6;
|
||||
options->links_cnt = 1;
|
||||
options->max_links_percent = -1; /*graph only those primers having maximum 15% links*/
|
||||
options->filter_on_links = FALSE;
|
||||
}
|
||||
|
||||
void printapair(int32_t index,ppair_t pair, poptions_t options)
|
||||
@ -165,7 +171,7 @@ void printapair(int32_t index,ppair_t pair, poptions_t options)
|
||||
bool_t good2=pair->p2->good;
|
||||
bool_t goodtmp;
|
||||
bool_t strand;
|
||||
uint32_t i;
|
||||
uint32_t i, j;
|
||||
float temp;
|
||||
CNNParams nnparams;
|
||||
|
||||
@ -296,6 +302,12 @@ void printapair(int32_t index,ppair_t pair, poptions_t options)
|
||||
else
|
||||
printf("\t\t");
|
||||
|
||||
/* j=0;
|
||||
for (i=0; i<options->dbsize; i++)
|
||||
if (pair->wellIdentifiedSeqs[i] == 1)
|
||||
j++;
|
||||
printf("%d", j);*/
|
||||
|
||||
printf("\n");
|
||||
|
||||
}
|
||||
@ -335,6 +347,7 @@ uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t opti
|
||||
else qfp=0.0;
|
||||
|
||||
sortedpairs[i]->wellIdentifiedSeqs = NULL; //TR 05/09/10 - wellIdentified needed for primer sets
|
||||
sortedpairs[i]->coveredSeqs = NULL; //TR 05/09/10 - wellIdentified needed for primer sets
|
||||
sortedpairs[i]->quorumin = q;
|
||||
sortedpairs[i]->quorumout = qfp;
|
||||
sortedpairs[i]->yule = q - qfp;
|
||||
@ -345,13 +358,13 @@ uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t opti
|
||||
{
|
||||
//TR 05/09/10 - wellIdentified needed for primer sets
|
||||
sortedpairs[j]->wellIdentifiedSeqs = ECOMALLOC(options->dbsize * sizeof(int),"Cannot allocate well_identified_array");
|
||||
(void)taxonomycoverage(sortedpairs[j],options);
|
||||
sortedpairs[j]->coveredSeqs = ECOMALLOC(options->dbsize * sizeof(int),"Cannot allocate well_identified_array");
|
||||
(void)taxonomycoverage(sortedpairs[j],options, seqdb, options->dbsize);
|
||||
taxonomyspecificity(sortedpairs[j], seqdb, options->dbsize);
|
||||
//j++;
|
||||
//if specificity less than user provieded threshold (default 60%) then ignore this pair
|
||||
if (sortedpairs[j]->bs >= options->specificity_threshold)
|
||||
j++;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@ -369,7 +382,8 @@ void printpairs (ppairtree_t pairs, poptions_t options,ecotaxonomy_t *taxonomy,
|
||||
size_t count;
|
||||
char *taxon[]={"taxon","taxa"};
|
||||
ecotx_t *current_taxon;
|
||||
pairset pair_sets;
|
||||
//pairset pair_sets;
|
||||
pairset *pset = NULL;
|
||||
|
||||
//printf("Index\tPrimer1\tPrimer2\tGB\tInexampleCount\tOutexampleCount\tYule\tIntaxaCount\tOuttaxaCount\tCoverage\tSpecificity\tMinAmplifiedLength\tMaxAmplifiedLength\tAvgAmplifiedLength\n");
|
||||
|
||||
@ -388,7 +402,7 @@ void printpairs (ppairtree_t pairs, poptions_t options,ecotaxonomy_t *taxonomy,
|
||||
|
||||
for (i=0;i<pl->paircount;i++,j++)
|
||||
sortedpairs[j]=pl->pairs+i;
|
||||
|
||||
|
||||
count=filterandsortpairs(sortedpairs,pairs->count,options, seqdb);
|
||||
getThermoProperties(sortedpairs, count, options);
|
||||
|
||||
@ -451,15 +465,55 @@ void printpairs (ppairtree_t pairs, poptions_t options,ecotaxonomy_t *taxonomy,
|
||||
printf("# DB sequences are considered as linear\n");
|
||||
printf("# Pairs having specificity less than %0.2f will be ignored\n", options->specificity_threshold);
|
||||
printf("#\n");
|
||||
|
||||
|
||||
for (i=0;i < count;i++)
|
||||
printapair(i,sortedpairs[i],options);
|
||||
|
||||
if (options->filter_on_links)
|
||||
{
|
||||
fprintf (stderr, "Old size: %d, ", count);
|
||||
count = primers_changeSortedArray (&sortedpairs, count, options);
|
||||
//count = primers_filterWithGivenLinks (&sortedpairs, count, options);
|
||||
fprintf (stderr, "New size: %d\n", count);
|
||||
|
||||
if (count == 0)
|
||||
{
|
||||
fprintf (stderr, "No pairs passed the links constraints.\n");
|
||||
printf ("No pairs passed the links constraints.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
for (i=0;i < count;i++)
|
||||
printapair(i,sortedpairs[i],options);
|
||||
}
|
||||
|
||||
if (options->print_sets_of_primers == TRUE)
|
||||
{
|
||||
pair_sets = build_primers_set (sortedpairs, count, seqdb, options);
|
||||
/*pair_sets = build_primers_set (sortedpairs, count, seqdb, options);
|
||||
printf("Results from Greedy Algorithm and some other possibilities:\n");
|
||||
some_other_set_possibilities (&pair_sets, sortedpairs, count, seqdb, options);
|
||||
printf("Results from simulated Anealing:\n");
|
||||
sets_by_SimulatedAnealing (&pair_sets, sortedpairs, count, seqdb, options);
|
||||
printf("Results from Tabu Search:\n");
|
||||
sets_by_TabuSearch (&pair_sets, sortedpairs, count, seqdb, options);*/
|
||||
//pset = sets_by_BruteForce (sortedpairs, count, seqdb, options);
|
||||
//if (pset)
|
||||
/*/{
|
||||
printf("Results from simulated Anealing:\n");
|
||||
sets_by_SimulatedAnealing (pset, sortedpairs, count, seqdb, options);
|
||||
printf("Results from Tabu Search:\n");
|
||||
sets_by_TabuSearch (pset, sortedpairs, count, seqdb, options);
|
||||
|
||||
if (pset)
|
||||
{
|
||||
ECOFREE (pset->set_wellIdentifiedTaxa, "Could not free memory for pair set wi");
|
||||
ECOFREE (pset, "Could not free memory for pair");
|
||||
}
|
||||
}*/
|
||||
build_and_print_sets (sortedpairs, count, seqdb, options);
|
||||
}
|
||||
//primers_graph_graphviz (sortedpairs, count, options);
|
||||
}
|
||||
|
||||
|
||||
@ -545,7 +599,7 @@ int main(int argc, char **argv)
|
||||
|
||||
initoptions(&options);
|
||||
|
||||
while ((carg = getopt(argc, argv, "hAfvcUDSpE:d:l:L:e:i:r:R:q:3:s:x:t:O:m:a:T:")) != -1) {
|
||||
while ((carg = getopt(argc, argv, "hAfvcUDSpbE:d:l:L:e:i:r:R:q:3:s:x:t:O:m:a:T:k:M:")) != -1) {
|
||||
|
||||
switch (carg) {
|
||||
/* ---------------------------- */
|
||||
@ -711,15 +765,31 @@ int main(int argc, char **argv)
|
||||
/* -------------------- */
|
||||
case 'p': /* print sets of primers */
|
||||
/* --------------------------------- */
|
||||
options.print_sets_of_primers = TRUE;
|
||||
//options.print_sets_of_primers = TRUE;
|
||||
break;
|
||||
|
||||
|
||||
/* --------------------------------- */
|
||||
case 'T': /* Ignore pairs having specificity below this Threshold */
|
||||
/* --------------------------------- */
|
||||
case 'T': /* Ignore pairs having specificity below this Threshold */
|
||||
sscanf(optarg,"%f",&(options.specificity_threshold));
|
||||
break;
|
||||
|
||||
/* --------------------------------- */
|
||||
sscanf(optarg,"%f",&(options.specificity_threshold));
|
||||
case 'M': /* Max link percentage for graph */
|
||||
/* --------------------------------- */
|
||||
sscanf(optarg,"%f",&(options.max_links_percent));
|
||||
break;
|
||||
|
||||
|
||||
/* --------------------------------- */
|
||||
case 'k': /* links count */
|
||||
/* --------------------------------- */
|
||||
sscanf(optarg,"%d",&(options.links_cnt));
|
||||
break;
|
||||
|
||||
case 'b':
|
||||
options.filter_on_links = TRUE;
|
||||
break;
|
||||
|
||||
case '?': /* bad option */
|
||||
/* -------------------- */
|
||||
errflag++;
|
||||
@ -779,7 +849,11 @@ int main(int argc, char **argv)
|
||||
|
||||
words = lookforStrictPrimer(seqdb,seqdbsize,insamples,&options);
|
||||
fprintf(stderr,"\n Strict primer count : %d\n",words->size);
|
||||
|
||||
|
||||
/*/TR Testing
|
||||
fprintf(stderr,"\nReducing for debugging\n");
|
||||
words = reduce_words_to_debug (words, &options);
|
||||
///*/
|
||||
// options.filtering=FALSE;
|
||||
// words2= lookforStrictPrimer(seqdb,seqdbsize,insamples,&options);
|
||||
// fprintf(stderr,"\n Strict primer count : %d\n",words2->size);
|
||||
@ -802,7 +876,6 @@ int main(int argc, char **argv)
|
||||
for (i=0; i<MINI(10,words->size); i++)
|
||||
fprintf(stderr," + Primer : %s sequence count : %d\n",ecoUnhashWord(words->words[i],options.primer_length),words->strictcount[i]);
|
||||
|
||||
|
||||
fprintf(stderr,"\nEncoding sequences for fuzzy pattern matching...\n");
|
||||
for (i=0;i<seqdbsize;i++)
|
||||
{
|
||||
@ -812,7 +885,13 @@ int main(int argc, char **argv)
|
||||
|
||||
ECOFREE(words->strictcount,"Free strict primer count table");
|
||||
|
||||
primers = lookforAproxPrimer(seqdb,seqdbsize,insamples,words,&options);
|
||||
if (options.error_max == 0)//aho, if(options.error_max == 0 && 0) old
|
||||
primers = ahoc_lookforStrictPrimers (seqdb,seqdbsize,insamples,words,&options);
|
||||
else
|
||||
primers = lookforAproxPrimer(seqdb,seqdbsize,insamples,words,&options);
|
||||
|
||||
//for (i=0; i<primers->size; i++)
|
||||
// print_wordwith_positions (primers->primers[i], seqdbsize, &options);
|
||||
|
||||
ECOFREE(words->words,"Free strict primer table");
|
||||
ECOFREE(words,"Free strict primer structure");
|
||||
@ -833,3 +912,108 @@ int main(int argc, char **argv)
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define DEBUG_WORDS_CNT 14
|
||||
pwordcount_t reduce_words_to_debug (pwordcount_t words, poptions_t options)
|
||||
{
|
||||
uint32_t i, k;
|
||||
pwordcount_t new_words;
|
||||
char *rwrd;
|
||||
char dwrd[20];
|
||||
/*char *strict_words[DEBUG_WORDS_CNT] = {"GAGTCTCTGCACCTATCC", "GCAATCCTGAGCCAAATC", "ACCCCTAACCACAACTCA",
|
||||
"TCCGAACCGACTGATGTT", "GAAGCTTGGGTGAAACTA", "GGAGAACCAGCTAGCTCT", "GCTGGTTCTCCCCGAAAT",
|
||||
"TCGATTTGGTACCGCTCT", "AAAGGAGAGAGAGGGATT", "GGATTGCTAATCCGTTGT", "CCCCCATCGTCTCACTGG",
|
||||
"TGAGGCGCAGCAGTTGAC", "GCGCTACGGCGCTGAAGT", "TTTCCTGGGAGTATGGCA"};*/
|
||||
char *strict_words[DEBUG_WORDS_CNT] = {"CTCCGGTCTGAACTCAGA", "TGTTGGATCAGGACATCC", "TAGATAGAAACCGACCTG",
|
||||
"TGGTGCAGCCGCTATTAA", "AGATAGAAACTGACCTGG", "TGGTGCAGCCGCTATTAA", "CTAATGGTGCAGCCGCTA",
|
||||
"TAGAAACTGACCTGGATT", "AGATAGAAACCGACCTGG", "ATGGTGCAGCCGCTATTA", "ATAGATAGAAACCGACCT",
|
||||
"GCCGCTATTAAGGGTTCG", "GGTGCAGCCGCTATTAAG", "TAGAAACTGACCTGGATT"};
|
||||
int word_seen[DEBUG_WORDS_CNT];
|
||||
|
||||
|
||||
new_words = ECOMALLOC(sizeof(wordcount_t),"Cannot allocate memory for word count structure");
|
||||
new_words->inseqcount = words->inseqcount;
|
||||
new_words->outseqcount = words->outseqcount;
|
||||
new_words->size = DEBUG_WORDS_CNT;
|
||||
new_words->strictcount = ECOMALLOC((new_words->size*sizeof(uint32_t)), "Cannot allocate memory for word count table");
|
||||
new_words->words = ECOMALLOC(new_words->size*sizeof(word_t), "I cannot allocate memory for debug words");
|
||||
|
||||
for (k = 0; k < DEBUG_WORDS_CNT; k++)
|
||||
word_seen[k] = 0;
|
||||
|
||||
for (i=0; i < words->size; i++)
|
||||
{
|
||||
rwrd = ecoUnhashWord(words->words[i],options->primer_length);
|
||||
strcpy (dwrd, rwrd);
|
||||
rwrd = ecoUnhashWord(ecoComplementWord(words->words[i],options->primer_length),options->primer_length);
|
||||
for (k = 0; k < DEBUG_WORDS_CNT; k++)
|
||||
{
|
||||
if (strcmp (dwrd, strict_words[k]) == 0) break;
|
||||
if (strcmp (rwrd, strict_words[k]) == 0) break;
|
||||
}
|
||||
|
||||
if (k < DEBUG_WORDS_CNT)
|
||||
{
|
||||
if (word_seen[k] == 0)
|
||||
{
|
||||
new_words->words[k] = words->words[i];
|
||||
new_words->strictcount[k] = words->strictcount[i];
|
||||
}
|
||||
word_seen[k]++;
|
||||
}
|
||||
}
|
||||
|
||||
fprintf (stderr, "Debug Words Info:\n");
|
||||
for (k = 0; k < DEBUG_WORDS_CNT; k++)
|
||||
fprintf (stderr, "%s:%d\n", strict_words[k], word_seen[k]);
|
||||
|
||||
|
||||
//clean input wods;
|
||||
ECOFREE(words->words,"Clean word table");
|
||||
ECOFREE(words->strictcount,"Clean word count table");
|
||||
ECOFREE(words,"Clean word structure");
|
||||
|
||||
return new_words;
|
||||
}
|
||||
|
||||
void print_wordwith_positions (primer_t prm, uint32_t seqdbsize, poptions_t options)
|
||||
{
|
||||
char *wrd;
|
||||
uint32_t i, j;
|
||||
char *twrd = "GCCTGTTTACCAAAAACA";
|
||||
|
||||
wrd = ecoUnhashWord(prm.word,options->primer_length);
|
||||
|
||||
if (strcmp (twrd, wrd) == 0)
|
||||
{
|
||||
printf ("Positions for Word: %s\n", wrd);
|
||||
for (i=0; i<seqdbsize; i++)
|
||||
{
|
||||
if (prm.directCount[i] > 0)
|
||||
{
|
||||
printf ("%d:", i);
|
||||
if (prm.directCount[i] == 1)
|
||||
printf ("%d", prm.directPos[i].value);
|
||||
else
|
||||
for (j=0; j<prm.directCount[i]; j++)
|
||||
printf ("%d,", prm.directPos[i].pointer[j]);
|
||||
printf (" ");
|
||||
}
|
||||
}
|
||||
printf ("\n");
|
||||
for (i=0; i<seqdbsize; i++)
|
||||
{
|
||||
if (prm.reverseCount[i] > 0)
|
||||
{
|
||||
printf ("%d:", i);
|
||||
if (prm.reverseCount[i] == 1)
|
||||
printf ("%d", prm.reversePos[i].value);
|
||||
else
|
||||
for (j=0; j<prm.reverseCount[i]; j++)
|
||||
printf ("%d,", prm.reversePos[i].pointer[j]);
|
||||
printf (" ");
|
||||
}
|
||||
}
|
||||
printf ("\n");
|
||||
}
|
||||
}
|
||||
|
@ -3,7 +3,8 @@ LIBPATH= -Llibapat -LlibecoPCR -Llibecoprimer -Llibthermo
|
||||
MAKEDEPEND = gcc -D$(MACHINE) -M $(CPPFLAGS) -o $*.d $<
|
||||
|
||||
CC=gcc
|
||||
CFLAGS= -W -Wall -O5 -m64
|
||||
CFLAGS= -W -Wall -m64 -g
|
||||
#CFLAGS= -W -Wall -O5 -m64 -g
|
||||
#CFLAGS= -W -Wall -O0 -m64 -g
|
||||
#CFLAGS= -W -Wall -O5 -fast -g
|
||||
|
||||
|
@ -15,7 +15,8 @@ SOURCES = goodtaxon.c \
|
||||
taxstats.c \
|
||||
apat_search.c \
|
||||
filtering.c \
|
||||
PrimerSets.c
|
||||
PrimerSets.c \
|
||||
ahocorasick.c
|
||||
|
||||
SRCS=$(SOURCES)
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -13,6 +13,8 @@ typedef struct {
|
||||
float set_lmean;
|
||||
float set_lcov;
|
||||
float set_score;
|
||||
int32_t set_intaxa;
|
||||
int32_t set_wi_cnt;
|
||||
}pairset;
|
||||
|
||||
typedef struct{
|
||||
@ -33,9 +35,24 @@ typedef struct{
|
||||
void add_pair_in_set (pairset *pair_set, int32_t pset_idx, int32_t prb_idx, SetParams *pparams);
|
||||
void get_next_pair_options (int *pair_wi_count_sorted_ids, pairset *pair_set, SetParams *pparams);
|
||||
float get_links_distribution (int prb_idx, pairset *prob_set, SetParams *pparams);
|
||||
pairset build_primers_set (ppair_t* sortedpairs, int32_t sorted_count, pecodnadb_t seqdb,
|
||||
poptions_t options);
|
||||
pairset build_primers_set_greedy_spc (SetParams *pparams);
|
||||
void get_set_mean_cov_stats (pairset *prob_set, SetParams *pparams);
|
||||
void some_other_set_possibilities (pairset *pair_set,
|
||||
ppair_t * sortedpairs, int32_t sorted_count, pecodnadb_t seqdb, poptions_t options);
|
||||
void sets_by_SimulatedAnealing (pairset *pair_set,
|
||||
ppair_t * sortedpairs, int32_t sorted_count, pecodnadb_t seqdb, poptions_t options);
|
||||
void sets_by_TabuSearch (pairset *pair_set,
|
||||
ppair_t * sortedpairs, int32_t sorted_count, pecodnadb_t seqdb, poptions_t options);
|
||||
pairset * sets_by_BruteForce (ppair_t * sortedpairs,
|
||||
int32_t sorted_count, pecodnadb_t seqdb, poptions_t options);
|
||||
pairset * extend_set_randomly (pairset *pair_set, SetParams *params, int extend_to_cnt);
|
||||
void build_and_print_sets (ppair_t * sortedpairs, int32_t sorted_count, pecodnadb_t seqdb, poptions_t options);
|
||||
int32_t get_next_option_increasing_cov (pairset *pair_set, SetParams *pparams);
|
||||
void reset_set_props (pairset *pair_set, SetParams *pparams);
|
||||
void primers_graph_graphviz (ppair_t * sortedpairs,
|
||||
int32_t sorted_count, poptions_t options);
|
||||
size_t primers_changeSortedArray (ppair_t ** pairs,
|
||||
size_t sorted_count, poptions_t options);
|
||||
size_t primers_filterWithGivenLinks (ppair_t ** pairs,
|
||||
size_t sorted_count, poptions_t options);
|
||||
#endif
|
||||
|
479
src/libecoprimer/ahocorasick.c
Executable file
479
src/libecoprimer/ahocorasick.c
Executable file
@ -0,0 +1,479 @@
|
||||
/*
|
||||
* ahocorasick.h
|
||||
*
|
||||
* Created on: 26 march 2011
|
||||
* Author: tiayyba
|
||||
*/
|
||||
#include <inttypes.h>
|
||||
#include "hashencoder.h"
|
||||
#include "ahocorasick.h"
|
||||
|
||||
void ahoc_graphKeywordTree (aho_state *root);
|
||||
aho_state *groot = NULL; //just for graph testing
|
||||
|
||||
#define BASEATINDEX(w, l, i) (uint8_t)((((w)&(0x3LLU<<(((l)-(i))*2)))>>(((l)-(i))*2)) & 0x3LLU)
|
||||
|
||||
void ahoc_addOutputElement (aho_state *node, bool_t isdirect, uint32_t idx)
|
||||
{
|
||||
if (!node) return;
|
||||
if (node->output.count == 0)
|
||||
node->output.out_set = ECOMALLOC(sizeof(aho_output),
|
||||
"Cannot allocate memory for aho-corasick state output element");
|
||||
else
|
||||
node->output.out_set = ECOREALLOC(node->output.out_set, (node->output.count+1)*sizeof(aho_output),
|
||||
"Cannot allocate memory for aho-corasick state output element");
|
||||
node->output.out_set[node->output.count].wordidx = idx;
|
||||
node->output.out_set[node->output.count].isdirect = isdirect;
|
||||
node->output.count++;
|
||||
}
|
||||
|
||||
//is the passed output element in the set
|
||||
bool_t ahoc_isOutputIn (aho_state *node, aho_output ot)
|
||||
{
|
||||
uint32_t i;
|
||||
|
||||
for (i=0; i<node->output.count; i++)
|
||||
if (node->output.out_set[i].isdirect == ot.isdirect && node->output.out_set[i].wordidx == ot.wordidx) return TRUE;
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
//take union of output of the two nodes and put in node1
|
||||
void ahoc_unionOutputElements (aho_state *node1, aho_state *node2)
|
||||
{
|
||||
uint32_t i;
|
||||
|
||||
for (i=0; i<node2->output.count; i++)
|
||||
if (ahoc_isOutputIn (node1, node2->output.out_set[i]) == FALSE)
|
||||
ahoc_addOutputElement (node1, node2->output.out_set[i].isdirect, node2->output.out_set[i].wordidx);
|
||||
}
|
||||
|
||||
void ahoc_addKeyword (aho_state *root, word_t w, bool_t isdirect, uint32_t idx, poptions_t options)
|
||||
{
|
||||
uint32_t i;
|
||||
aho_state *nextnode = root;
|
||||
uint8_t basecode;
|
||||
static uint32_t state_id = 0;
|
||||
|
||||
//fprintf (stderr, "%s\n", ecoUnhashWord(w, options->primer_length));
|
||||
for (i=1; i<=options->primer_length; i++)
|
||||
{
|
||||
basecode = BASEATINDEX (w, options->primer_length, i);
|
||||
//fprintf (stderr, "%d", basecode);
|
||||
if (nextnode->next[basecode] == NULL)
|
||||
{
|
||||
//add new state
|
||||
nextnode->next[basecode] = ECOMALLOC(sizeof(aho_state),
|
||||
"Cannot allocate memory for aho-corasick state");
|
||||
nextnode = nextnode->next[basecode];
|
||||
//initialize state
|
||||
nextnode->id = ++state_id;
|
||||
nextnode->next[0]=nextnode->next[1]=nextnode->next[2]=nextnode->next[3]=NULL;
|
||||
nextnode->fail = NULL;
|
||||
nextnode->output.count = 0;
|
||||
}
|
||||
else
|
||||
nextnode = nextnode->next[basecode];
|
||||
}
|
||||
//fprintf (stderr, "\n", basecode);
|
||||
//new pattern addess so add node ouptup element
|
||||
ahoc_addOutputElement (nextnode, isdirect, idx);
|
||||
}
|
||||
|
||||
void ahoc_buildKeywordTree (aho_state *root, pwordcount_t words, poptions_t options)
|
||||
{
|
||||
uint32_t i;
|
||||
if (!root) return;
|
||||
|
||||
//init root
|
||||
root->id = 0;
|
||||
root->next[0]=root->next[1]=root->next[2]=root->next[3]=NULL;
|
||||
root->fail = NULL;
|
||||
root->output.count = 0;
|
||||
|
||||
//now add each word as a pattern in the keyword tree
|
||||
for (i=0; i<words->size; i++)
|
||||
{
|
||||
//add direct word
|
||||
word_t w=WORD(words->words[i]);
|
||||
ahoc_addKeyword (root, w, TRUE, i, options);
|
||||
|
||||
//add reverse word
|
||||
w=ecoComplementWord(w,options->primer_length);
|
||||
ahoc_addKeyword (root, w, FALSE, i, options);
|
||||
}
|
||||
|
||||
//loop on root if some base has no out going edge from roots
|
||||
for (i=0; i<4; i++)
|
||||
if (root->next[i] == NULL)
|
||||
root->next[i] = root;
|
||||
}
|
||||
|
||||
void ahoc_enqueue (aho_queue *ahoqueue, aho_state *node)
|
||||
{
|
||||
queue_node *q;
|
||||
if (node == NULL) return;
|
||||
|
||||
q = ECOMALLOC(sizeof(queue_node),
|
||||
"Cannot allocate memory for aho-corasick queue node");
|
||||
q->state_node = node;
|
||||
q->next = NULL;
|
||||
|
||||
if (ahoqueue->first == NULL)
|
||||
{
|
||||
ahoqueue->first = q;
|
||||
ahoqueue->last = q;
|
||||
}
|
||||
else
|
||||
{
|
||||
ahoqueue->last->next = q;
|
||||
ahoqueue->last = q;
|
||||
}
|
||||
}
|
||||
|
||||
aho_state *ahoc_dequeue (aho_queue *ahoqueue)
|
||||
{
|
||||
aho_state *node = NULL;
|
||||
queue_node *q;
|
||||
|
||||
if (ahoqueue->first == NULL) return node;
|
||||
q = ahoqueue->first;
|
||||
ahoqueue->first = q->next;
|
||||
|
||||
node = q->state_node;
|
||||
ECOFREE (q, "Cannot free memory for aho-corasick queue node");
|
||||
return node;
|
||||
}
|
||||
|
||||
//set fail links and output sets for the keyword tree
|
||||
void ahoc_updateForFailAndOutput (aho_state *root)
|
||||
{
|
||||
int32_t i;
|
||||
aho_queue Q;
|
||||
aho_state *node_r;
|
||||
aho_state *node_u;
|
||||
aho_state *node_v;
|
||||
|
||||
//empty queue
|
||||
Q.first = NULL;
|
||||
Q.last = NULL;
|
||||
|
||||
//for us alphabet has 4 elements, A=0, C=1, G=2 and T=3
|
||||
for (i=0; i<4; i++)
|
||||
{
|
||||
if (root->next[i] != root && root->next[i] != NULL)
|
||||
{
|
||||
root->next[i]->fail = root;
|
||||
ahoc_enqueue (&Q, root->next[i]);
|
||||
}
|
||||
}
|
||||
|
||||
//while queue not empty
|
||||
while (Q.first != NULL)
|
||||
{
|
||||
node_r = ahoc_dequeue (&Q);
|
||||
for (i=0; i<4; i++)
|
||||
{
|
||||
if (node_r->next[i] != NULL)
|
||||
{
|
||||
node_u = node_r->next[i];
|
||||
ahoc_enqueue (&Q, node_u);
|
||||
node_v = node_r->fail;
|
||||
while (node_v->next[i] == NULL)
|
||||
node_v = node_v->fail;
|
||||
node_u->fail = node_v->next[i];
|
||||
ahoc_unionOutputElements (node_u, node_u->fail);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ahoc_freeKeywordTree (aho_state *node)
|
||||
{
|
||||
int i;
|
||||
for (i=0; i<4; i++)
|
||||
if (node->next[i])
|
||||
ahoc_freeKeywordTree (node->next[i]);
|
||||
if (node->output.count > 0)
|
||||
ECOFREE (node->output.out_set, "Free failed for node output");
|
||||
ECOFREE (node, "Free failed for node");
|
||||
}
|
||||
|
||||
pprimercount_t ahoc_lookforStrictPrimers (pecodnadb_t database, uint32_t seqdbsize,uint32_t exampleCount,
|
||||
pwordcount_t words,poptions_t options)
|
||||
{
|
||||
aho_state automaton_root;
|
||||
aho_state *curr_state;
|
||||
//uint32_t inSequenceQuorum;
|
||||
uint32_t outSequenceQuorum;
|
||||
pprimer_t data;
|
||||
pprimercount_t primers;
|
||||
uint32_t i, j, k;
|
||||
int32_t pos;
|
||||
uint32_t lmax;
|
||||
char *base;
|
||||
int8_t code;
|
||||
uint32_t goodPrimers=0;
|
||||
static int iii=0;
|
||||
|
||||
|
||||
//inSequenceQuorum = (uint32_t)floor((float)exampleCount * options->sensitivity_quorum);
|
||||
outSequenceQuorum = (uint32_t)floor((float)(seqdbsize-exampleCount) * options->false_positive_quorum);
|
||||
|
||||
//fprintf(stderr," Primers should be at least present in %d/%d example sequences\n",inSequenceQuorum,exampleCount);
|
||||
fprintf(stderr," Primers should not be present in more than %d/%d counterexample sequences\n",outSequenceQuorum,(seqdbsize-exampleCount));
|
||||
|
||||
data = ECOMALLOC(words->size * sizeof(primer_t),
|
||||
"Cannot allocate memory for fuzzy matching results");
|
||||
for (i=0; i < words->size; i++)
|
||||
{
|
||||
data[i].word=WORD(words->words[i]);
|
||||
data[i].inexample = 0;
|
||||
data[i].outexample= 0;
|
||||
|
||||
data[i].directCount=ECOMALLOC(seqdbsize * sizeof(uint32_t),
|
||||
"Cannot allocate memory for primer position");
|
||||
data[i].directPos = ECOMALLOC(seqdbsize * sizeof(poslist_t),
|
||||
"Cannot allocate memory for primer position");
|
||||
data[i].reverseCount=ECOMALLOC(seqdbsize * sizeof(uint32_t),
|
||||
"Cannot allocate memory for primer position");
|
||||
data[i].reversePos = ECOMALLOC(seqdbsize * sizeof(poslist_t),
|
||||
"Cannot allocate memory for primer position");
|
||||
}
|
||||
|
||||
//build keywords automaton
|
||||
ahoc_buildKeywordTree (&automaton_root, words, options);
|
||||
//set fail links and output sets
|
||||
ahoc_updateForFailAndOutput (&automaton_root);
|
||||
|
||||
//debug; print keywordtree in a gv file
|
||||
//ahoc_graphKeywordTree (&automaton_root);
|
||||
|
||||
//loop on each sequence for its each base and find words
|
||||
for (i=0; i < seqdbsize; i++)
|
||||
{
|
||||
if(database[i]->SQ_length <= options->primer_length) continue;
|
||||
|
||||
lmax = database[i]->SQ_length;
|
||||
if (!options->circular)
|
||||
lmax += options->primer_length-1;
|
||||
curr_state = &automaton_root;
|
||||
|
||||
for (j=0,base=database[i]->SQ; j<lmax; j++,base++)
|
||||
{
|
||||
if (i==(uint32_t)database[i]->SQ_length) base=database[i]->SQ;
|
||||
|
||||
//code = encoder[(*base) - 'A'];
|
||||
code = *base;
|
||||
//if (iii++ < 30)
|
||||
// fprintf (stderr, "%d:%d,", *base, code);
|
||||
if (code < 0 || code > 3)
|
||||
{
|
||||
//if error char, start from root for next character
|
||||
//+forget any incomplete words
|
||||
curr_state = &automaton_root;
|
||||
continue;
|
||||
}
|
||||
while (curr_state->next[code] == NULL) curr_state = curr_state->fail;
|
||||
curr_state = curr_state->next[code];
|
||||
|
||||
//start position of primer is options->primer_length-1 chars back
|
||||
pos = j-options->primer_length+1;
|
||||
if (pos < 0) pos = database[i]->SQ_length+pos;
|
||||
|
||||
//set output, if there is some output on this state then
|
||||
//+all words in the output set complete here, so increment their
|
||||
//+found properties for current sequence
|
||||
for (k=0; k<curr_state->output.count; k++)
|
||||
{
|
||||
if (curr_state->output.out_set[k].isdirect)
|
||||
data[curr_state->output.out_set[k].wordidx].directCount[i]++;
|
||||
else
|
||||
data[curr_state->output.out_set[k].wordidx].reverseCount[i]++;
|
||||
|
||||
if (options->no_multi_match)
|
||||
{
|
||||
if ((data[curr_state->output.out_set[k].wordidx].directCount[i] +
|
||||
data[curr_state->output.out_set[k].wordidx].reverseCount[i]) > 1)
|
||||
//since multimach not allowd, set an indication on 1st seq position that
|
||||
//+ a multimatch was found, so that this word will be filtered out
|
||||
//+ and because of first postion we wont have to search the whole array
|
||||
//+ to find if it voilated nomultimatch constraint for some seq
|
||||
data[curr_state->output.out_set[k].wordidx].directCount[0] = 2;
|
||||
else
|
||||
{
|
||||
if (curr_state->output.out_set[k].isdirect)
|
||||
//direct word found on jth position of ith sequence
|
||||
data[curr_state->output.out_set[k].wordidx].directPos[i].value = (uint32_t)pos;
|
||||
else
|
||||
//reverse word found on jth position of ith sequence
|
||||
data[curr_state->output.out_set[k].wordidx].reversePos[i].value = (uint32_t)pos;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
//okay multi match allowed
|
||||
if (curr_state->output.out_set[k].isdirect)
|
||||
{
|
||||
if (data[curr_state->output.out_set[k].wordidx].directCount[i] == 1)
|
||||
data[curr_state->output.out_set[k].wordidx].directPos[i].value = (uint32_t)pos;
|
||||
else
|
||||
{
|
||||
//need to create or extend the positions list
|
||||
if (data[curr_state->output.out_set[k].wordidx].directCount[i] == 2)
|
||||
{
|
||||
//for second element, first was put in .value, so dont forget to copy that in the array too
|
||||
data[curr_state->output.out_set[k].wordidx].directPos[i].pointer = ECOMALLOC(2 * sizeof(uint32_t),
|
||||
"Cannot allocate memory for primer position");
|
||||
data[curr_state->output.out_set[k].wordidx].directPos[i].pointer[0] = data[curr_state->output.out_set[k].wordidx].directPos[i].value;
|
||||
data[curr_state->output.out_set[k].wordidx].directPos[i].pointer[1] = (uint32_t)pos;
|
||||
}
|
||||
else
|
||||
{
|
||||
//for third or greater element
|
||||
data[curr_state->output.out_set[k].wordidx].directPos[i].pointer = ECOREALLOC(data[curr_state->output.out_set[k].wordidx].directPos[i].pointer,
|
||||
data[curr_state->output.out_set[k].wordidx].directCount[i] * sizeof(uint32_t),
|
||||
"Cannot allocate memory for primer position");
|
||||
data[curr_state->output.out_set[k].wordidx].directPos[i].pointer[data[curr_state->output.out_set[k].wordidx].directCount[i]-1] = (uint32_t)pos;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (data[curr_state->output.out_set[k].wordidx].reverseCount[i] == 1)
|
||||
data[curr_state->output.out_set[k].wordidx].reversePos[i].value = (uint32_t)pos;
|
||||
else
|
||||
{
|
||||
//need to create or extend the positions list
|
||||
if (data[curr_state->output.out_set[k].wordidx].reverseCount[i] == 2)
|
||||
{
|
||||
//for second element, first was put in .value, so dont forget to copy that in the array too
|
||||
data[curr_state->output.out_set[k].wordidx].reversePos[i].pointer = ECOMALLOC(2 * sizeof(uint32_t),
|
||||
"Cannot allocate memory for primer position");
|
||||
data[curr_state->output.out_set[k].wordidx].reversePos[i].pointer[0] = data[curr_state->output.out_set[k].wordidx].reversePos[i].value;
|
||||
data[curr_state->output.out_set[k].wordidx].reversePos[i].pointer[1] = (uint32_t)pos;
|
||||
}
|
||||
else
|
||||
{
|
||||
//for third or greater element
|
||||
data[curr_state->output.out_set[k].wordidx].reversePos[i].pointer = ECOREALLOC(data[curr_state->output.out_set[k].wordidx].reversePos[i].pointer,
|
||||
data[curr_state->output.out_set[k].wordidx].reverseCount[i] * sizeof(uint32_t),
|
||||
"Cannot allocate memory for primer position");
|
||||
data[curr_state->output.out_set[k].wordidx].reversePos[i].pointer[data[curr_state->output.out_set[k].wordidx].reverseCount[i]-1] = (uint32_t)pos;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
//dont forget to increment inexample or outexample count, but only once for a sequence
|
||||
if ((data[curr_state->output.out_set[k].wordidx].directCount[i] +
|
||||
data[curr_state->output.out_set[k].wordidx].reverseCount[i]) == 1)
|
||||
{
|
||||
if (database[i]->isexample)
|
||||
data[curr_state->output.out_set[k].wordidx].inexample++;
|
||||
else
|
||||
data[curr_state->output.out_set[k].wordidx].outexample++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//Only thing that remains is to remove the failed words
|
||||
for (i=0,j=0; i<words->size; i++)
|
||||
{
|
||||
fprintf(stderr,"Primers %5d/%lld analyzed => sequence : %s in %d example and %d counterexample sequences \r",
|
||||
i+1,words->size,ecoUnhashWord(data[i].word,options->primer_length),
|
||||
data[i].inexample,data[i].outexample);
|
||||
|
||||
//if (data[i].inexample < inSequenceQuorum || (data[i].directCount[0] == 2 && options->no_multi_match))
|
||||
if (data[i].directCount[0] == 2 && options->no_multi_match)
|
||||
{
|
||||
//bad word, delete from the array
|
||||
for (k=0; k<seqdbsize; k++)
|
||||
{
|
||||
if (data[i].directCount[k] > 1)
|
||||
ECOFREE (data[i].directPos[k].pointer, "Cannot free position pointer.");
|
||||
if (data[i].reverseCount[k] > 1)
|
||||
ECOFREE (data[i].reversePos[k].pointer, "Cannot free position pointer.");
|
||||
}
|
||||
ECOFREE (data[i].directCount, "Cannot free position pointer.");
|
||||
ECOFREE (data[i].directPos, "Cannot free position pointer.");
|
||||
ECOFREE (data[i].reverseCount, "Cannot free position pointer.");
|
||||
ECOFREE (data[i].reversePos, "Cannot free position pointer.");
|
||||
}
|
||||
else
|
||||
{
|
||||
//data[i].good = data[i].inexample >= inSequenceQuorum && data[i].outexample <= outSequenceQuorum;
|
||||
data[i].good = data[i].outexample <= outSequenceQuorum;
|
||||
goodPrimers+=data[i].good? 1:0;
|
||||
if (j < i)
|
||||
data[j] = data[i];
|
||||
j++;
|
||||
}
|
||||
}
|
||||
fprintf(stderr,"\n\nOn %lld analyzed primers %d respect quorum conditions\n",words->size,goodPrimers);
|
||||
fprintf(stderr,"Conserved primers for further analysis : %d/%lld\n",j,words->size);
|
||||
|
||||
primers = ECOMALLOC(sizeof(primercount_t),"Cannot allocate memory for primer table");
|
||||
primers->primers=ECOREALLOC(data,
|
||||
j * sizeof(primer_t),
|
||||
"Cannot reallocate memory for fuzzy matching results");
|
||||
primers->size=j;
|
||||
|
||||
//free memory of keyword table
|
||||
for (i=0; i<4; i++)
|
||||
if (automaton_root.next[i] != &automaton_root)
|
||||
ahoc_freeKeywordTree (automaton_root.next[i]);
|
||||
|
||||
return primers;
|
||||
}
|
||||
|
||||
void ahoc_graphPrintNodesInfo (aho_state *node, FILE* gfile)
|
||||
{
|
||||
uint32_t i;
|
||||
fprintf (gfile, "\"%d\"[\n", node->id);
|
||||
fprintf (gfile, "label=\"%d\\n", node->id);
|
||||
for (i=0; i<node->output.count; i++)
|
||||
fprintf (gfile, "%d%c,", node->output.out_set[i].wordidx, node->output.out_set[i].isdirect?'d':'r');
|
||||
fprintf (gfile, "\"\n];\n");
|
||||
|
||||
for (i=0; i<4; i++)
|
||||
if (node->next[i] != NULL && node->next[i] != node)
|
||||
ahoc_graphPrintNodesInfo (node->next[i], gfile);
|
||||
}
|
||||
|
||||
void ahoc_graphPrintNodesLinks (aho_state *node, FILE* gfile)
|
||||
{
|
||||
uint32_t i;
|
||||
static int j=0;
|
||||
|
||||
for (i=0; i<4; i++)
|
||||
if (node->next[i] != NULL && node->next[i] != node)
|
||||
{
|
||||
fprintf (gfile, "\"%d\" -> \"%d\" [\n", node->id, node->next[i]->id);
|
||||
fprintf (gfile, "label=\"%c\"\n];\n", "ACGT"[i]);
|
||||
}
|
||||
|
||||
if (j++ < 40)
|
||||
if (node->fail != NULL && node->fail != groot)
|
||||
{
|
||||
fprintf (gfile, "\"%d\" -> \"%d\" [\n", node->id, node->fail->id);
|
||||
fprintf (gfile, "color= \"red\"\n];\n");
|
||||
}
|
||||
|
||||
for (i=0; i<4; i++)
|
||||
if (node->next[i] != NULL && node->next[i] != node)
|
||||
ahoc_graphPrintNodesLinks (node->next[i], gfile);
|
||||
}
|
||||
|
||||
void ahoc_graphKeywordTree (aho_state *root)
|
||||
{
|
||||
FILE *gfile;
|
||||
|
||||
groot=root;
|
||||
gfile = fopen ("keywordtree.gv", "w");
|
||||
fprintf (gfile, "digraph keywordtree {\n");
|
||||
ahoc_graphPrintNodesInfo (root, gfile);
|
||||
ahoc_graphPrintNodesLinks (root, gfile);
|
||||
fprintf (gfile, "}\n");
|
||||
fclose(gfile);
|
||||
}
|
||||
|
43
src/libecoprimer/ahocorasick.h
Executable file
43
src/libecoprimer/ahocorasick.h
Executable file
@ -0,0 +1,43 @@
|
||||
/*
|
||||
* ahocorasick.h
|
||||
*
|
||||
* Created on: 26 march 2011
|
||||
* Author: tiayyba
|
||||
*/
|
||||
|
||||
#ifndef H_ahocorasick
|
||||
#define H_ahocorasick
|
||||
|
||||
#include "ecoprimer.h"
|
||||
|
||||
typedef struct aho_output_t{
|
||||
uint32_t wordidx; //index of strict word (dont save the word of 64B)
|
||||
bool_t isdirect; //we need to find both direct and reverse words so we must know which one is it
|
||||
}aho_output;
|
||||
|
||||
typedef struct aho_output_count_t{
|
||||
uint32_t count;
|
||||
aho_output *out_set;
|
||||
}aho_output_count;
|
||||
|
||||
typedef struct aho_state_t{
|
||||
int32_t id;
|
||||
struct aho_state_t *next[4]; //for labels A=0,C=1,G=2 and T=3
|
||||
struct aho_state_t *fail;
|
||||
aho_output_count output;
|
||||
}aho_state;
|
||||
|
||||
typedef struct queue_node_t {
|
||||
aho_state *state_node;
|
||||
struct queue_node_t *next;
|
||||
}queue_node;
|
||||
|
||||
typedef struct{
|
||||
queue_node *first;
|
||||
queue_node *last;
|
||||
}aho_queue;
|
||||
|
||||
pprimercount_t ahoc_lookforStrictPrimers (pecodnadb_t database, uint32_t seqdbsize,uint32_t exampleCount,
|
||||
pwordcount_t words,poptions_t options);
|
||||
#endif /* H_ahocorasick */
|
||||
|
@ -176,6 +176,7 @@ typedef struct {
|
||||
int *wellIdentifiedSeqs; //< an array having elements equla to total seqs
|
||||
// values are either 0 or 1, if seq is well identified
|
||||
// its 1 else 0
|
||||
int *coveredSeqs; //< an array having elements equal to total seqs, 1 if seq is covered else 0
|
||||
|
||||
// these statistics are relative to inexample sequences
|
||||
|
||||
@ -291,6 +292,9 @@ typedef struct {
|
||||
PNNParams pnparm;
|
||||
bool_t print_sets_of_primers;
|
||||
float specificity_threshold;
|
||||
int links_cnt;
|
||||
float max_links_percent;
|
||||
bool_t filter_on_links;
|
||||
} options_t, *poptions_t;
|
||||
|
||||
typedef ecoseq_t **pecodnadb_t;
|
||||
@ -350,7 +354,7 @@ int32_t getrankdbstats(pecodnadb_t seqdb,
|
||||
uint32_t seqdbsize,
|
||||
ecotaxonomy_t *taxonomy,
|
||||
poptions_t options);
|
||||
float taxonomycoverage(ppair_t pair, poptions_t options);
|
||||
float taxonomycoverage(ppair_t pair, poptions_t options, pecodnadb_t seqdb,uint32_t seqdbsize);
|
||||
char ecoComplementChar(char base);
|
||||
void taxonomyspecificity (ppair_t pair, pecodnadb_t seqdb,uint32_t seqdbsize);
|
||||
|
||||
|
@ -114,6 +114,8 @@ static int32_t *ecoFilteringHashSequence(int32_t *dest,
|
||||
error<<= 1;
|
||||
error&=ERRORMASK(FWORDSIZE);
|
||||
|
||||
//code = -1;
|
||||
//if((*base) >= 'A' && (*base) <= 'Z')
|
||||
code = encoder[(*base) - 'A'];
|
||||
if (code <0)
|
||||
{
|
||||
@ -154,7 +156,7 @@ int32_t *filteringSeq(pecodnadb_t database, uint32_t seqdbsize,
|
||||
|
||||
for (i=0;i<seqdbsize;i++)
|
||||
{
|
||||
if (database[i]->isexample)
|
||||
if (database[i]->isexample && database[i]->SQ_length > options->primer_length)
|
||||
{
|
||||
j++;
|
||||
wordscount=ecoFilteringHashSequence(wordscount,
|
||||
|
@ -179,7 +179,7 @@ static void buildPrimerPairsForOneSeq(uint32_t seqid,
|
||||
uint32_t i,j,k;
|
||||
uint32_t matchcount=0;
|
||||
pprimermatch_t matches = NULL;
|
||||
primermatchcount_t seqmatchcount;
|
||||
//primermatchcount_t seqmatchcount;
|
||||
ppair_t pcurrent;
|
||||
pair_t current;
|
||||
pprimer_t wswp;
|
||||
@ -189,9 +189,9 @@ static void buildPrimerPairsForOneSeq(uint32_t seqid,
|
||||
//char prmr[50];
|
||||
//float mtemp;
|
||||
word_t w1, w1a, omask = (0x1L << (options->strict_three_prime*2)) -1;
|
||||
word_t w2, w2a, wtmp;
|
||||
word_t w2, w2a;//, wtmp;
|
||||
uint32_t bp1,bp2;
|
||||
|
||||
|
||||
//prmr[options->primer_length] = '\0';
|
||||
|
||||
for (i=0;i < primers->size; i++)
|
||||
@ -252,16 +252,17 @@ static void buildPrimerPairsForOneSeq(uint32_t seqid,
|
||||
{
|
||||
// For all primers matching the sequence
|
||||
|
||||
//for(j=i+1;
|
||||
// (j<matchcount)
|
||||
// && ((distance=matches[j].position - matches[i].position - options->primer_length) < options->lmax);
|
||||
// j++
|
||||
// )
|
||||
/*for(j=i+1;
|
||||
(j<matchcount)
|
||||
&& ((distance=matches[j].position - matches[i].position - options->primer_length) < options->lmax);
|
||||
j++
|
||||
)//*/
|
||||
for (j=i+1; j<matchcount; j++)
|
||||
{
|
||||
if (matches[j].position - matches[i].position <= options->primer_length) continue;
|
||||
distance = matches[j].position - matches[i].position - options->primer_length;
|
||||
if (distance >= options->lmax) break;
|
||||
|
||||
|
||||
// For all not too far primers
|
||||
|
||||
@ -269,9 +270,7 @@ static void buildPrimerPairsForOneSeq(uint32_t seqid,
|
||||
&& (distance > options->lmin)
|
||||
)
|
||||
{
|
||||
|
||||
// If possible primer pair
|
||||
|
||||
current.p1 = matches[i].primer;
|
||||
current.asdirect1=matches[i].strand;
|
||||
current.p2 = matches[j].primer;
|
||||
@ -456,7 +455,6 @@ static void buildPrimerPairsForOneSeq(uint32_t seqid,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pairs->count=paircount;
|
||||
|
||||
}
|
||||
|
@ -108,10 +108,11 @@ void addSeqToWordCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circ
|
||||
|
||||
table->inseqcount++;
|
||||
|
||||
|
||||
table->strictcount = ECOREALLOC(table->strictcount,buffersize*sizeof(uint32_t),
|
||||
//fprintf (stderr, "\nOldAddress: %x", table->strictcount);
|
||||
table->strictcount = ECOREALLOC(table->strictcount,(buffersize+5000)*sizeof(uint32_t),
|
||||
"Cannot allocate memory to extend example word count table");
|
||||
|
||||
//fprintf (stderr, " NewAddress: %x\n", table->strictcount);
|
||||
|
||||
for (i=table->size; i < buffersize; i++)
|
||||
table->strictcount[i]=1;
|
||||
|
||||
@ -172,7 +173,7 @@ pwordcount_t lookforStrictPrimer(pecodnadb_t database, uint32_t seqdbsize,
|
||||
|
||||
for (i=0;i<seqdbsize;i++)
|
||||
{
|
||||
if (database[i]->isexample)
|
||||
if (database[i]->isexample && database[i]->SQ_length > options->primer_length)
|
||||
{
|
||||
|
||||
if (first)
|
||||
|
@ -6,10 +6,46 @@
|
||||
*/
|
||||
|
||||
#include <search.h>
|
||||
//void tdestroy (void *root, void (*free_node)(void *nodep));
|
||||
|
||||
#include "ecoprimer.h"
|
||||
|
||||
static int cmptaxon(const void *t1, const void* t2);
|
||||
|
||||
void **tree_root = NULL;
|
||||
int delete_passes = 0;
|
||||
|
||||
void delete_twalkaction (const void *node, VISIT order, int level)
|
||||
{
|
||||
switch (order)
|
||||
{
|
||||
case preorder:
|
||||
delete_passes++;
|
||||
break;
|
||||
case postorder:
|
||||
delete_passes++;
|
||||
break;
|
||||
case endorder:
|
||||
delete_passes++;
|
||||
break;
|
||||
case leaf:
|
||||
if (tree_root)
|
||||
tdelete (node, tree_root,cmptaxon);
|
||||
delete_passes++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void free_tree_nodes (void *tree)
|
||||
{
|
||||
while (1)
|
||||
{
|
||||
delete_passes = 0;
|
||||
twalk (tree, delete_twalkaction);
|
||||
if (delete_passes <= 1) break;
|
||||
}
|
||||
}
|
||||
|
||||
static int cmptaxon(const void *t1, const void* t2)
|
||||
{
|
||||
const size_t taxid1=(size_t)t1;
|
||||
@ -35,7 +71,12 @@ int32_t counttaxon(int32_t taxid)
|
||||
if (taxid==-1)
|
||||
{
|
||||
if (taxontree)
|
||||
{
|
||||
tree_root = (void **)&taxontree;
|
||||
//free_tree_nodes (taxontree);
|
||||
ECOFREE(taxontree,"Free taxon tree");
|
||||
tree_root = NULL;
|
||||
}
|
||||
taxontree=NULL;
|
||||
taxoncount=0;
|
||||
return 0;
|
||||
@ -97,22 +138,30 @@ int32_t getrankdbstats(pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *tax
|
||||
}
|
||||
|
||||
|
||||
float taxonomycoverage(ppair_t pair, poptions_t options)
|
||||
float taxonomycoverage(ppair_t pair, poptions_t options, pecodnadb_t seqdb,uint32_t seqdbsize)
|
||||
{
|
||||
int32_t seqcount;
|
||||
int32_t i;
|
||||
int32_t incount=0;
|
||||
int32_t outcount=0;
|
||||
uint32_t j;
|
||||
|
||||
|
||||
memset (pair->coveredSeqs, 0, seqdbsize*sizeof (int));
|
||||
seqcount=pair->pcr.ampcount;
|
||||
|
||||
counttaxon(-1);
|
||||
for (i=0; i < seqcount; i++)
|
||||
if (pair->pcr.amplifias[i].sequence->isexample
|
||||
&& pair->pcr.amplifias[i].sequence->ranktaxonid > 0 )
|
||||
{
|
||||
incount = counttaxon(pair->pcr.amplifias[i].sequence->ranktaxonid);
|
||||
|
||||
for (j=0; j<seqdbsize; j++)
|
||||
if (pair->pcr.amplifias[i].sequence == seqdb[j])
|
||||
{pair->coveredSeqs[j] = 1; break;}
|
||||
}
|
||||
|
||||
counttaxon(-1);
|
||||
for (i=0; i < seqcount; i++)
|
||||
if (!pair->pcr.amplifias[i].sequence->isexample
|
||||
@ -145,12 +194,14 @@ static int cmpamp(const void *ampf1, const void* ampf2)
|
||||
{
|
||||
incr = -1;
|
||||
j = pampf1->length - 1;
|
||||
|
||||
if (pampf2->strand)
|
||||
{
|
||||
pampf1 = (pamptotaxon_t) ampf2;
|
||||
pampf2 = (pamptotaxon_t) ampf1;
|
||||
chd = 1;
|
||||
}
|
||||
//j = pampf2->length - 1; should have been here and pampf2 instead of pampf1?
|
||||
}
|
||||
|
||||
len = (pampf1->length <= pampf2->length)? pampf1->length: pampf2->length;
|
||||
@ -173,6 +224,7 @@ static int cmpamp(const void *ampf1, const void* ampf2)
|
||||
return 0;
|
||||
}*/
|
||||
|
||||
|
||||
static int cmpamp(const void *ampf1, const void* ampf2)
|
||||
{
|
||||
int i;
|
||||
@ -183,10 +235,10 @@ static int cmpamp(const void *ampf1, const void* ampf2)
|
||||
char *ch2;
|
||||
int incr1;
|
||||
int incr2;
|
||||
|
||||
|
||||
pamptotaxon_t pampf1 = (pamptotaxon_t) ampf1;
|
||||
pamptotaxon_t pampf2 = (pamptotaxon_t) ampf2;
|
||||
|
||||
|
||||
ch1 = pampf1->amplifia;
|
||||
ch2 = pampf2->amplifia;
|
||||
|
||||
@ -218,7 +270,7 @@ static int cmpamp(const void *ampf1, const void* ampf2)
|
||||
|
||||
if (pampf1->length > pampf2->length) return 1;
|
||||
if (pampf2->length > pampf1->length) return -1;
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -242,6 +294,8 @@ void taxonomyspecificity (ppair_t pair, pecodnadb_t seqdb,uint32_t seqdbsize)
|
||||
uint32_t i, j;
|
||||
uint32_t ampfindex = 0;
|
||||
int32_t taxid;
|
||||
uint32_t wellidentifiedcount;
|
||||
|
||||
void *ampftree = NULL;
|
||||
pamptotaxon_t pcurrentampf;
|
||||
pamptotaxon_t *ptmp;
|
||||
@ -278,11 +332,14 @@ void taxonomyspecificity (ppair_t pair, pecodnadb_t seqdb,uint32_t seqdbsize)
|
||||
}
|
||||
|
||||
memset (pair->wellIdentifiedSeqs, 0, seqdbsize*sizeof (int));
|
||||
counttaxon(-1);
|
||||
//counttaxon(-1);
|
||||
for (i = 0; i < ampfindex; i++)
|
||||
{
|
||||
if (ampfwithtaxtree[i].taxoncount > 1)
|
||||
twalk(ampfwithtaxtree[i].taxontree, twalkaction);
|
||||
{
|
||||
//printf ("\nampfwithtaxtree[i].taxoncount: %d\n", ampfwithtaxtree[i].taxoncount);
|
||||
//twalk(ampfwithtaxtree[i].taxontree, twalkaction);
|
||||
}
|
||||
//TR 5/9/10 - added code for well identified seqs
|
||||
else if(ampfwithtaxtree[i].taxoncount == 1) /*well identified*/
|
||||
{
|
||||
@ -293,6 +350,7 @@ void taxonomyspecificity (ppair_t pair, pecodnadb_t seqdb,uint32_t seqdbsize)
|
||||
{
|
||||
for (j = 0; j < seqdbsize; j++)
|
||||
if (seqdb[j]->ranktaxonid == gtxid
|
||||
&& seqdb[j]->isexample
|
||||
&&(pair->p1->directCount[j] > 0
|
||||
|| pair->p1->reverseCount[j] > 0)
|
||||
&& (pair->p2->directCount[j] > 0
|
||||
@ -303,10 +361,18 @@ void taxonomyspecificity (ppair_t pair, pecodnadb_t seqdb,uint32_t seqdbsize)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pair->notwellidentifiedtaxa = counttaxon(-2);
|
||||
pair->bs = ((float)pair->intaxa - (float)pair->notwellidentifiedtaxa) / pair->intaxa;
|
||||
|
||||
//printf ("\n");
|
||||
counttaxon(-1);
|
||||
wellidentifiedcount = 0;
|
||||
for (j = 0; j < seqdbsize; j++)
|
||||
if (pair->wellIdentifiedSeqs[j] == 1)
|
||||
counttaxon(seqdb[j]->ranktaxonid);
|
||||
wellidentifiedcount = counttaxon(-2);
|
||||
//pair->notwellidentifiedtaxa = counttaxon(-2);
|
||||
pair->notwellidentifiedtaxa = (pair->intaxa-wellidentifiedcount); //counttaxon(-2);
|
||||
//pair->bs = ((float)pair->intaxa - (float)pair->notwellidentifiedtaxa) / pair->intaxa;
|
||||
pair->bs = ((float)wellidentifiedcount) / (float)pair->intaxa;
|
||||
|
||||
ECOFREE (ampfwithtaxtree, "Free amplifia table");
|
||||
|
||||
}
|
||||
|
Reference in New Issue
Block a user