My complete changes on my laptop, with specificity bug fix + ahocorasick + sets
git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPrimers/trunk@393 60f365c0-8329-0410-b2a4-ec073aeeaa1d
This commit is contained in:
364
.cproject
364
.cproject
@ -1,151 +1,221 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||||
<?fileVersion 4.0.0?>
|
<?fileVersion 4.0.0?>
|
||||||
|
|
||||||
<cproject>
|
<cproject>
|
||||||
<storageModule moduleId="org.eclipse.cdt.core.settings">
|
<storageModule moduleId="org.eclipse.cdt.core.settings">
|
||||||
<cconfiguration id="cdt.managedbuild.toolchain.gnu.macosx.base.2134184396">
|
<cconfiguration id="cdt.managedbuild.toolchain.gnu.macosx.base.2134184396">
|
||||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.toolchain.gnu.macosx.base.2134184396" moduleId="org.eclipse.cdt.core.settings" name="MacOSX GCC">
|
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.toolchain.gnu.macosx.base.2134184396" moduleId="org.eclipse.cdt.core.settings" name="MacOSX GCC">
|
||||||
<externalSettings/>
|
<externalSettings/>
|
||||||
<extensions>
|
<extensions>
|
||||||
<extension id="org.eclipse.cdt.core.MachO" point="org.eclipse.cdt.core.BinaryParser"/>
|
<extension id="org.eclipse.cdt.core.MachO" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||||
<extension id="org.eclipse.cdt.core.MakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||||
</extensions>
|
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||||
</storageModule>
|
</extensions>
|
||||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
</storageModule>
|
||||||
<configuration artifactName="ecoPrimers" buildProperties="" id="cdt.managedbuild.toolchain.gnu.macosx.base.2134184396" name="MacOSX GCC" parent="org.eclipse.cdt.build.core.emptycfg">
|
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||||
<folderInfo id="cdt.managedbuild.toolchain.gnu.macosx.base.2134184396.1840911077" name="/" resourcePath="">
|
<configuration artifactName="ecoPrimers" buildProperties="" description="" id="cdt.managedbuild.toolchain.gnu.macosx.base.2134184396" name="MacOSX GCC" parent="org.eclipse.cdt.build.core.emptycfg">
|
||||||
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.base.766054112" name="cdt.managedbuild.toolchain.gnu.macosx.base" superClass="cdt.managedbuild.toolchain.gnu.macosx.base">
|
<folderInfo id="cdt.managedbuild.toolchain.gnu.macosx.base.2134184396.1840911077" name="/" resourcePath="">
|
||||||
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.MachO" id="cdt.managedbuild.target.gnu.platform.macosx.base.2057035265" name="Debug Platform" osList="macosx" superClass="cdt.managedbuild.target.gnu.platform.macosx.base"/>
|
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.base.766054112" name="cdt.managedbuild.toolchain.gnu.macosx.base" superClass="cdt.managedbuild.toolchain.gnu.macosx.base">
|
||||||
<builder id="cdt.managedbuild.target.gnu.builder.macosx.base.783726363" managedBuildOn="false" name="Gnu Make Builder.MacOSX GCC" superClass="cdt.managedbuild.target.gnu.builder.macosx.base"/>
|
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.MachO" id="cdt.managedbuild.target.gnu.platform.macosx.base.2057035265" name="Debug Platform" osList="macosx" superClass="cdt.managedbuild.target.gnu.platform.macosx.base"/>
|
||||||
<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.base.914103467" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.base">
|
<builder id="cdt.managedbuild.target.gnu.builder.macosx.base.783726363" keepEnvironmentInBuildfile="false" managedBuildOn="false" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.base"/>
|
||||||
<inputType id="cdt.managedbuild.tool.macosx.c.linker.input.62980206" superClass="cdt.managedbuild.tool.macosx.c.linker.input">
|
<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.base.914103467" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.base">
|
||||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
<inputType id="cdt.managedbuild.tool.macosx.c.linker.input.62980206" superClass="cdt.managedbuild.tool.macosx.c.linker.input">
|
||||||
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
|
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||||
</inputType>
|
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
|
||||||
</tool>
|
</inputType>
|
||||||
<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.base.691108439" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.base"/>
|
</tool>
|
||||||
<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.base.695639877" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.base">
|
<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.base.691108439" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.base"/>
|
||||||
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1507665054" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
|
<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.base.695639877" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.base">
|
||||||
</tool>
|
<option id="gnu.both.asm.option.include.paths.1544375094" name="Include paths (-I)" superClass="gnu.both.asm.option.include.paths" valueType="includePath"/>
|
||||||
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.1786370580" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
|
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1507665054" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
|
||||||
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.base.454329831" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.base"/>
|
</tool>
|
||||||
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.base.1928774909" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.base">
|
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.1786370580" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
|
||||||
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.330854350" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.base.454329831" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.base"/>
|
||||||
</tool>
|
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.base.1928774909" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.base">
|
||||||
</toolChain>
|
<option id="gnu.c.compiler.option.include.paths.823251305" superClass="gnu.c.compiler.option.include.paths" valueType="includePath">
|
||||||
</folderInfo>
|
<listOptionValue builtIn="false" value="/usr/include"/>
|
||||||
</configuration>
|
</option>
|
||||||
</storageModule>
|
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.330854350" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
||||||
<storageModule moduleId="scannerConfiguration">
|
</tool>
|
||||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile"/>
|
</toolChain>
|
||||||
<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile">
|
</folderInfo>
|
||||||
<buildOutputProvider>
|
</configuration>
|
||||||
<openAction enabled="true" filePath=""/>
|
</storageModule>
|
||||||
<parser enabled="true"/>
|
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
|
||||||
</buildOutputProvider>
|
<storageModule moduleId="org.eclipse.cdt.core.language.mapping"/>
|
||||||
<scannerInfoProvider id="specsFile">
|
<storageModule moduleId="scannerConfiguration">
|
||||||
<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
|
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile"/>
|
||||||
<parser enabled="true"/>
|
<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile">
|
||||||
</scannerInfoProvider>
|
<buildOutputProvider>
|
||||||
</profile>
|
<openAction enabled="true" filePath=""/>
|
||||||
<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerFileProfile">
|
<parser enabled="true"/>
|
||||||
<buildOutputProvider>
|
</buildOutputProvider>
|
||||||
<openAction enabled="true" filePath=""/>
|
<scannerInfoProvider id="specsFile">
|
||||||
<parser enabled="true"/>
|
<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
|
||||||
</buildOutputProvider>
|
<parser enabled="true"/>
|
||||||
<scannerInfoProvider id="makefileGenerator">
|
</scannerInfoProvider>
|
||||||
<runAction arguments="-f ${project_name}_scd.mk" command="make" useDefault="true"/>
|
</profile>
|
||||||
<parser enabled="true"/>
|
<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerFileProfile">
|
||||||
</scannerInfoProvider>
|
<buildOutputProvider>
|
||||||
</profile>
|
<openAction enabled="true" filePath=""/>
|
||||||
<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfile">
|
<parser enabled="true"/>
|
||||||
<buildOutputProvider>
|
</buildOutputProvider>
|
||||||
<openAction enabled="true" filePath=""/>
|
<scannerInfoProvider id="makefileGenerator">
|
||||||
<parser enabled="true"/>
|
<runAction arguments="-f ${project_name}_scd.mk" command="make" useDefault="true"/>
|
||||||
</buildOutputProvider>
|
<parser enabled="true"/>
|
||||||
<scannerInfoProvider id="specsFile">
|
</scannerInfoProvider>
|
||||||
<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
|
</profile>
|
||||||
<parser enabled="true"/>
|
<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfile">
|
||||||
</scannerInfoProvider>
|
<buildOutputProvider>
|
||||||
</profile>
|
<openAction enabled="true" filePath=""/>
|
||||||
<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP">
|
<parser enabled="true"/>
|
||||||
<buildOutputProvider>
|
</buildOutputProvider>
|
||||||
<openAction enabled="true" filePath=""/>
|
<scannerInfoProvider id="specsFile">
|
||||||
<parser enabled="true"/>
|
<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
|
||||||
</buildOutputProvider>
|
<parser enabled="true"/>
|
||||||
<scannerInfoProvider id="specsFile">
|
</scannerInfoProvider>
|
||||||
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
|
</profile>
|
||||||
<parser enabled="true"/>
|
<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP">
|
||||||
</scannerInfoProvider>
|
<buildOutputProvider>
|
||||||
</profile>
|
<openAction enabled="true" filePath=""/>
|
||||||
<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC">
|
<parser enabled="true"/>
|
||||||
<buildOutputProvider>
|
</buildOutputProvider>
|
||||||
<openAction enabled="true" filePath=""/>
|
<scannerInfoProvider id="specsFile">
|
||||||
<parser enabled="true"/>
|
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
|
||||||
</buildOutputProvider>
|
<parser enabled="true"/>
|
||||||
<scannerInfoProvider id="specsFile">
|
</scannerInfoProvider>
|
||||||
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
|
</profile>
|
||||||
<parser enabled="true"/>
|
<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC">
|
||||||
</scannerInfoProvider>
|
<buildOutputProvider>
|
||||||
</profile>
|
<openAction enabled="true" filePath=""/>
|
||||||
<profile id="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfile">
|
<parser enabled="true"/>
|
||||||
<buildOutputProvider>
|
</buildOutputProvider>
|
||||||
<openAction enabled="true" filePath=""/>
|
<scannerInfoProvider id="specsFile">
|
||||||
<parser enabled="true"/>
|
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
|
||||||
</buildOutputProvider>
|
<parser enabled="true"/>
|
||||||
<scannerInfoProvider id="specsFile">
|
</scannerInfoProvider>
|
||||||
<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
|
</profile>
|
||||||
<parser enabled="true"/>
|
<profile id="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfile">
|
||||||
</scannerInfoProvider>
|
<buildOutputProvider>
|
||||||
</profile>
|
<openAction enabled="true" filePath=""/>
|
||||||
<profile id="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfileCPP">
|
<parser enabled="true"/>
|
||||||
<buildOutputProvider>
|
</buildOutputProvider>
|
||||||
<openAction enabled="true" filePath=""/>
|
<scannerInfoProvider id="specsFile">
|
||||||
<parser enabled="true"/>
|
<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
|
||||||
</buildOutputProvider>
|
<parser enabled="true"/>
|
||||||
<scannerInfoProvider id="specsFile">
|
</scannerInfoProvider>
|
||||||
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
|
</profile>
|
||||||
<parser enabled="true"/>
|
<profile id="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfileCPP">
|
||||||
</scannerInfoProvider>
|
<buildOutputProvider>
|
||||||
</profile>
|
<openAction enabled="true" filePath=""/>
|
||||||
<profile id="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfileC">
|
<parser enabled="true"/>
|
||||||
<buildOutputProvider>
|
</buildOutputProvider>
|
||||||
<openAction enabled="true" filePath=""/>
|
<scannerInfoProvider id="specsFile">
|
||||||
<parser enabled="true"/>
|
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
|
||||||
</buildOutputProvider>
|
<parser enabled="true"/>
|
||||||
<scannerInfoProvider id="specsFile">
|
</scannerInfoProvider>
|
||||||
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
|
</profile>
|
||||||
<parser enabled="true"/>
|
<profile id="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfileC">
|
||||||
</scannerInfoProvider>
|
<buildOutputProvider>
|
||||||
</profile>
|
<openAction enabled="true" filePath=""/>
|
||||||
<profile id="org.eclipse.cdt.managedbuilder.xlc.core.XLCManagedMakePerProjectProfile">
|
<parser enabled="true"/>
|
||||||
<buildOutputProvider>
|
</buildOutputProvider>
|
||||||
<openAction enabled="false" filePath=""/>
|
<scannerInfoProvider id="specsFile">
|
||||||
<parser enabled="false"/>
|
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
|
||||||
</buildOutputProvider>
|
<parser enabled="true"/>
|
||||||
<scannerInfoProvider id="specsFile">
|
</scannerInfoProvider>
|
||||||
<runAction arguments="-E -v ${plugin_state_location}/${specs_file}" command="${XL_compilerRoot}/xlc" useDefault="true"/>
|
</profile>
|
||||||
<parser enabled="true"/>
|
<scannerConfigBuildInfo instanceId="cdt.managedbuild.toolchain.gnu.macosx.base.2134184396;cdt.managedbuild.toolchain.gnu.macosx.base.2134184396.1840911077;cdt.managedbuild.tool.gnu.c.compiler.macosx.base.1928774909;cdt.managedbuild.tool.gnu.c.compiler.input.330854350">
|
||||||
</scannerInfoProvider>
|
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
|
||||||
</profile>
|
<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerProjectProfile">
|
||||||
<profile id="org.eclipse.cdt.managedbuilder.xlc.core.XLCManagedMakePerProjectProfileCPP">
|
<buildOutputProvider>
|
||||||
<buildOutputProvider>
|
<openAction enabled="true" filePath=""/>
|
||||||
<openAction enabled="false" filePath=""/>
|
<parser enabled="true"/>
|
||||||
<parser enabled="false"/>
|
</buildOutputProvider>
|
||||||
</buildOutputProvider>
|
<scannerInfoProvider id="specsFile">
|
||||||
<scannerInfoProvider id="specsFile">
|
<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
|
||||||
<runAction arguments="-E -v ${plugin_state_location}/${specs_file}" command="${XL_compilerRoot}/xlC" useDefault="true"/>
|
<parser enabled="true"/>
|
||||||
<parser enabled="true"/>
|
</scannerInfoProvider>
|
||||||
</scannerInfoProvider>
|
</profile>
|
||||||
</profile>
|
<profile id="org.eclipse.cdt.make.core.GCCStandardMakePerFileProfile">
|
||||||
</storageModule>
|
<buildOutputProvider>
|
||||||
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
|
<openAction enabled="true" filePath=""/>
|
||||||
</cconfiguration>
|
<parser enabled="true"/>
|
||||||
</storageModule>
|
</buildOutputProvider>
|
||||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
<scannerInfoProvider id="makefileGenerator">
|
||||||
<project id="ecoPrimers.null.1292969001" name="ecoPrimers"/>
|
<runAction arguments="-f ${project_name}_scd.mk" command="make" useDefault="true"/>
|
||||||
</storageModule>
|
<parser enabled="true"/>
|
||||||
|
</scannerInfoProvider>
|
||||||
|
</profile>
|
||||||
|
<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfile">
|
||||||
|
<buildOutputProvider>
|
||||||
|
<openAction enabled="true" filePath=""/>
|
||||||
|
<parser enabled="true"/>
|
||||||
|
</buildOutputProvider>
|
||||||
|
<scannerInfoProvider id="specsFile">
|
||||||
|
<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
|
||||||
|
<parser enabled="true"/>
|
||||||
|
</scannerInfoProvider>
|
||||||
|
</profile>
|
||||||
|
<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP">
|
||||||
|
<buildOutputProvider>
|
||||||
|
<openAction enabled="true" filePath=""/>
|
||||||
|
<parser enabled="true"/>
|
||||||
|
</buildOutputProvider>
|
||||||
|
<scannerInfoProvider id="specsFile">
|
||||||
|
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
|
||||||
|
<parser enabled="true"/>
|
||||||
|
</scannerInfoProvider>
|
||||||
|
</profile>
|
||||||
|
<profile id="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC">
|
||||||
|
<buildOutputProvider>
|
||||||
|
<openAction enabled="true" filePath=""/>
|
||||||
|
<parser enabled="true"/>
|
||||||
|
</buildOutputProvider>
|
||||||
|
<scannerInfoProvider id="specsFile">
|
||||||
|
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
|
||||||
|
<parser enabled="true"/>
|
||||||
|
</scannerInfoProvider>
|
||||||
|
</profile>
|
||||||
|
<profile id="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfile">
|
||||||
|
<buildOutputProvider>
|
||||||
|
<openAction enabled="true" filePath=""/>
|
||||||
|
<parser enabled="true"/>
|
||||||
|
</buildOutputProvider>
|
||||||
|
<scannerInfoProvider id="specsFile">
|
||||||
|
<runAction arguments="-E -P -v -dD ${plugin_state_location}/${specs_file}" command="gcc" useDefault="true"/>
|
||||||
|
<parser enabled="true"/>
|
||||||
|
</scannerInfoProvider>
|
||||||
|
</profile>
|
||||||
|
<profile id="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfileCPP">
|
||||||
|
<buildOutputProvider>
|
||||||
|
<openAction enabled="true" filePath=""/>
|
||||||
|
<parser enabled="true"/>
|
||||||
|
</buildOutputProvider>
|
||||||
|
<scannerInfoProvider id="specsFile">
|
||||||
|
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.cpp" command="g++" useDefault="true"/>
|
||||||
|
<parser enabled="true"/>
|
||||||
|
</scannerInfoProvider>
|
||||||
|
</profile>
|
||||||
|
<profile id="org.eclipse.cdt.managedbuilder.core.GCCWinManagedMakePerProjectProfileC">
|
||||||
|
<buildOutputProvider>
|
||||||
|
<openAction enabled="true" filePath=""/>
|
||||||
|
<parser enabled="true"/>
|
||||||
|
</buildOutputProvider>
|
||||||
|
<scannerInfoProvider id="specsFile">
|
||||||
|
<runAction arguments="-E -P -v -dD ${plugin_state_location}/specs.c" command="gcc" useDefault="true"/>
|
||||||
|
<parser enabled="true"/>
|
||||||
|
</scannerInfoProvider>
|
||||||
|
</profile>
|
||||||
|
</scannerConfigBuildInfo>
|
||||||
|
</storageModule>
|
||||||
|
<storageModule moduleId="org.eclipse.cdt.internal.ui.text.commentOwnerProjectMappings"/>
|
||||||
|
</cconfiguration>
|
||||||
|
</storageModule>
|
||||||
|
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||||
|
<project id="ecoPrimers.null.1292969001" name="ecoPrimers"/>
|
||||||
|
</storageModule>
|
||||||
</cproject>
|
</cproject>
|
||||||
|
212
src/ecoprimer.c
212
src/ecoprimer.c
@ -7,6 +7,7 @@
|
|||||||
|
|
||||||
#include "libecoprimer/ecoprimer.h"
|
#include "libecoprimer/ecoprimer.h"
|
||||||
#include "libecoprimer/PrimerSets.h"
|
#include "libecoprimer/PrimerSets.h"
|
||||||
|
#include "libecoprimer/ahocorasick.h"
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
@ -25,6 +26,8 @@
|
|||||||
|
|
||||||
static int cmpprintedpairs(const void* p1,const void* p2);
|
static int cmpprintedpairs(const void* p1,const void* p2);
|
||||||
//float _Z27calculateMeltingTemperature_ (char * seq1, char * seq2);
|
//float _Z27calculateMeltingTemperature_ (char * seq1, char * seq2);
|
||||||
|
pwordcount_t reduce_words_to_debug (pwordcount_t words, poptions_t options);
|
||||||
|
void print_wordwith_positions (primer_t prm, uint32_t seqdbsize, poptions_t options);
|
||||||
|
|
||||||
void* lib_handle = NULL;
|
void* lib_handle = NULL;
|
||||||
float (*calcMelTemp)(char*, char*);
|
float (*calcMelTemp)(char*, char*);
|
||||||
@ -71,12 +74,12 @@ static void PrintHelp()
|
|||||||
PP "-m : Salt correction method for Tm computation (SANTALUCIA : 1 or OWCZARZY:2, default=1)\n\n");
|
PP "-m : Salt correction method for Tm computation (SANTALUCIA : 1 or OWCZARZY:2, default=1)\n\n");
|
||||||
PP "-a : Salt contentration in M for Tm computation (default 0.05 M)\n\n");
|
PP "-a : Salt contentration in M for Tm computation (default 0.05 M)\n\n");
|
||||||
PP "-U : No multi match\n\n");
|
PP "-U : No multi match\n\n");
|
||||||
PP "-U : Define the [R]eference sequence identifier (must be part of example set)\n\n");
|
PP "-R : Define the [R]eference sequence identifier (must be part of example set)\n\n");
|
||||||
PP "-A : Print the list of all identifier of sequences present in the database\n\n");
|
PP "-A : Print the list of all identifier of sequences present in the database\n\n");
|
||||||
PP "-f : Remove data mining step during strict primer identification\n\n");
|
PP "-f : Remove data mining step during strict primer identification\n\n");
|
||||||
PP "-v : Store statistic file about memory usage during strict primer identification\n\n");
|
PP "-v : Store statistic file about memory usage during strict primer identification\n\n");
|
||||||
PP "-p : Print sets of primers\n\n");
|
PP "-p : Print sets of primers (may take several minutes after primers have been designed!)\n\n");
|
||||||
PP "-T : Ignore pairs having specificity below this Threshold\n\n");
|
PP "-T : Ignore pairs having specificity below this Threshold\n\n");
|
||||||
PP "\n");
|
PP "\n");
|
||||||
PP "------------------------------------------\n");
|
PP "------------------------------------------\n");
|
||||||
PP "Table result description : \n");
|
PP "Table result description : \n");
|
||||||
@ -151,6 +154,9 @@ void initoptions(poptions_t options)
|
|||||||
options->printAC=FALSE;
|
options->printAC=FALSE;
|
||||||
options->print_sets_of_primers = FALSE;
|
options->print_sets_of_primers = FALSE;
|
||||||
options->specificity_threshold = 0.6;
|
options->specificity_threshold = 0.6;
|
||||||
|
options->links_cnt = 1;
|
||||||
|
options->max_links_percent = -1; /*graph only those primers having maximum 15% links*/
|
||||||
|
options->filter_on_links = FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
void printapair(int32_t index,ppair_t pair, poptions_t options)
|
void printapair(int32_t index,ppair_t pair, poptions_t options)
|
||||||
@ -165,7 +171,7 @@ void printapair(int32_t index,ppair_t pair, poptions_t options)
|
|||||||
bool_t good2=pair->p2->good;
|
bool_t good2=pair->p2->good;
|
||||||
bool_t goodtmp;
|
bool_t goodtmp;
|
||||||
bool_t strand;
|
bool_t strand;
|
||||||
uint32_t i;
|
uint32_t i, j;
|
||||||
float temp;
|
float temp;
|
||||||
CNNParams nnparams;
|
CNNParams nnparams;
|
||||||
|
|
||||||
@ -296,6 +302,12 @@ void printapair(int32_t index,ppair_t pair, poptions_t options)
|
|||||||
else
|
else
|
||||||
printf("\t\t");
|
printf("\t\t");
|
||||||
|
|
||||||
|
/* j=0;
|
||||||
|
for (i=0; i<options->dbsize; i++)
|
||||||
|
if (pair->wellIdentifiedSeqs[i] == 1)
|
||||||
|
j++;
|
||||||
|
printf("%d", j);*/
|
||||||
|
|
||||||
printf("\n");
|
printf("\n");
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -335,6 +347,7 @@ uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t opti
|
|||||||
else qfp=0.0;
|
else qfp=0.0;
|
||||||
|
|
||||||
sortedpairs[i]->wellIdentifiedSeqs = NULL; //TR 05/09/10 - wellIdentified needed for primer sets
|
sortedpairs[i]->wellIdentifiedSeqs = NULL; //TR 05/09/10 - wellIdentified needed for primer sets
|
||||||
|
sortedpairs[i]->coveredSeqs = NULL; //TR 05/09/10 - wellIdentified needed for primer sets
|
||||||
sortedpairs[i]->quorumin = q;
|
sortedpairs[i]->quorumin = q;
|
||||||
sortedpairs[i]->quorumout = qfp;
|
sortedpairs[i]->quorumout = qfp;
|
||||||
sortedpairs[i]->yule = q - qfp;
|
sortedpairs[i]->yule = q - qfp;
|
||||||
@ -345,13 +358,13 @@ uint32_t filterandsortpairs(ppair_t* sortedpairs,uint32_t count, poptions_t opti
|
|||||||
{
|
{
|
||||||
//TR 05/09/10 - wellIdentified needed for primer sets
|
//TR 05/09/10 - wellIdentified needed for primer sets
|
||||||
sortedpairs[j]->wellIdentifiedSeqs = ECOMALLOC(options->dbsize * sizeof(int),"Cannot allocate well_identified_array");
|
sortedpairs[j]->wellIdentifiedSeqs = ECOMALLOC(options->dbsize * sizeof(int),"Cannot allocate well_identified_array");
|
||||||
(void)taxonomycoverage(sortedpairs[j],options);
|
sortedpairs[j]->coveredSeqs = ECOMALLOC(options->dbsize * sizeof(int),"Cannot allocate well_identified_array");
|
||||||
|
(void)taxonomycoverage(sortedpairs[j],options, seqdb, options->dbsize);
|
||||||
taxonomyspecificity(sortedpairs[j], seqdb, options->dbsize);
|
taxonomyspecificity(sortedpairs[j], seqdb, options->dbsize);
|
||||||
//j++;
|
//j++;
|
||||||
//if specificity less than user provieded threshold (default 60%) then ignore this pair
|
//if specificity less than user provieded threshold (default 60%) then ignore this pair
|
||||||
if (sortedpairs[j]->bs >= options->specificity_threshold)
|
if (sortedpairs[j]->bs >= options->specificity_threshold)
|
||||||
j++;
|
j++;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -369,7 +382,8 @@ void printpairs (ppairtree_t pairs, poptions_t options,ecotaxonomy_t *taxonomy,
|
|||||||
size_t count;
|
size_t count;
|
||||||
char *taxon[]={"taxon","taxa"};
|
char *taxon[]={"taxon","taxa"};
|
||||||
ecotx_t *current_taxon;
|
ecotx_t *current_taxon;
|
||||||
pairset pair_sets;
|
//pairset pair_sets;
|
||||||
|
pairset *pset = NULL;
|
||||||
|
|
||||||
//printf("Index\tPrimer1\tPrimer2\tGB\tInexampleCount\tOutexampleCount\tYule\tIntaxaCount\tOuttaxaCount\tCoverage\tSpecificity\tMinAmplifiedLength\tMaxAmplifiedLength\tAvgAmplifiedLength\n");
|
//printf("Index\tPrimer1\tPrimer2\tGB\tInexampleCount\tOutexampleCount\tYule\tIntaxaCount\tOuttaxaCount\tCoverage\tSpecificity\tMinAmplifiedLength\tMaxAmplifiedLength\tAvgAmplifiedLength\n");
|
||||||
|
|
||||||
@ -452,14 +466,54 @@ void printpairs (ppairtree_t pairs, poptions_t options,ecotaxonomy_t *taxonomy,
|
|||||||
printf("# Pairs having specificity less than %0.2f will be ignored\n", options->specificity_threshold);
|
printf("# Pairs having specificity less than %0.2f will be ignored\n", options->specificity_threshold);
|
||||||
printf("#\n");
|
printf("#\n");
|
||||||
|
|
||||||
|
|
||||||
for (i=0;i < count;i++)
|
for (i=0;i < count;i++)
|
||||||
printapair(i,sortedpairs[i],options);
|
printapair(i,sortedpairs[i],options);
|
||||||
|
|
||||||
|
if (options->filter_on_links)
|
||||||
|
{
|
||||||
|
fprintf (stderr, "Old size: %d, ", count);
|
||||||
|
count = primers_changeSortedArray (&sortedpairs, count, options);
|
||||||
|
//count = primers_filterWithGivenLinks (&sortedpairs, count, options);
|
||||||
|
fprintf (stderr, "New size: %d\n", count);
|
||||||
|
|
||||||
|
if (count == 0)
|
||||||
|
{
|
||||||
|
fprintf (stderr, "No pairs passed the links constraints.\n");
|
||||||
|
printf ("No pairs passed the links constraints.\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i=0;i < count;i++)
|
||||||
|
printapair(i,sortedpairs[i],options);
|
||||||
|
}
|
||||||
|
|
||||||
if (options->print_sets_of_primers == TRUE)
|
if (options->print_sets_of_primers == TRUE)
|
||||||
{
|
{
|
||||||
pair_sets = build_primers_set (sortedpairs, count, seqdb, options);
|
/*pair_sets = build_primers_set (sortedpairs, count, seqdb, options);
|
||||||
|
printf("Results from Greedy Algorithm and some other possibilities:\n");
|
||||||
some_other_set_possibilities (&pair_sets, sortedpairs, count, seqdb, options);
|
some_other_set_possibilities (&pair_sets, sortedpairs, count, seqdb, options);
|
||||||
|
printf("Results from simulated Anealing:\n");
|
||||||
|
sets_by_SimulatedAnealing (&pair_sets, sortedpairs, count, seqdb, options);
|
||||||
|
printf("Results from Tabu Search:\n");
|
||||||
|
sets_by_TabuSearch (&pair_sets, sortedpairs, count, seqdb, options);*/
|
||||||
|
//pset = sets_by_BruteForce (sortedpairs, count, seqdb, options);
|
||||||
|
//if (pset)
|
||||||
|
/*/{
|
||||||
|
printf("Results from simulated Anealing:\n");
|
||||||
|
sets_by_SimulatedAnealing (pset, sortedpairs, count, seqdb, options);
|
||||||
|
printf("Results from Tabu Search:\n");
|
||||||
|
sets_by_TabuSearch (pset, sortedpairs, count, seqdb, options);
|
||||||
|
|
||||||
|
if (pset)
|
||||||
|
{
|
||||||
|
ECOFREE (pset->set_wellIdentifiedTaxa, "Could not free memory for pair set wi");
|
||||||
|
ECOFREE (pset, "Could not free memory for pair");
|
||||||
|
}
|
||||||
|
}*/
|
||||||
|
build_and_print_sets (sortedpairs, count, seqdb, options);
|
||||||
}
|
}
|
||||||
|
//primers_graph_graphviz (sortedpairs, count, options);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -545,7 +599,7 @@ int main(int argc, char **argv)
|
|||||||
|
|
||||||
initoptions(&options);
|
initoptions(&options);
|
||||||
|
|
||||||
while ((carg = getopt(argc, argv, "hAfvcUDSpE:d:l:L:e:i:r:R:q:3:s:x:t:O:m:a:T:")) != -1) {
|
while ((carg = getopt(argc, argv, "hAfvcUDSpbE:d:l:L:e:i:r:R:q:3:s:x:t:O:m:a:T:k:M:")) != -1) {
|
||||||
|
|
||||||
switch (carg) {
|
switch (carg) {
|
||||||
/* ---------------------------- */
|
/* ---------------------------- */
|
||||||
@ -711,13 +765,29 @@ int main(int argc, char **argv)
|
|||||||
/* -------------------- */
|
/* -------------------- */
|
||||||
case 'p': /* print sets of primers */
|
case 'p': /* print sets of primers */
|
||||||
/* --------------------------------- */
|
/* --------------------------------- */
|
||||||
options.print_sets_of_primers = TRUE;
|
//options.print_sets_of_primers = TRUE;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
/* --------------------------------- */
|
||||||
|
case 'T': /* Ignore pairs having specificity below this Threshold */
|
||||||
/* --------------------------------- */
|
/* --------------------------------- */
|
||||||
case 'T': /* Ignore pairs having specificity below this Threshold */
|
sscanf(optarg,"%f",&(options.specificity_threshold));
|
||||||
|
break;
|
||||||
|
|
||||||
/* --------------------------------- */
|
/* --------------------------------- */
|
||||||
sscanf(optarg,"%f",&(options.specificity_threshold));
|
case 'M': /* Max link percentage for graph */
|
||||||
|
/* --------------------------------- */
|
||||||
|
sscanf(optarg,"%f",&(options.max_links_percent));
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* --------------------------------- */
|
||||||
|
case 'k': /* links count */
|
||||||
|
/* --------------------------------- */
|
||||||
|
sscanf(optarg,"%d",&(options.links_cnt));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'b':
|
||||||
|
options.filter_on_links = TRUE;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case '?': /* bad option */
|
case '?': /* bad option */
|
||||||
@ -780,6 +850,10 @@ int main(int argc, char **argv)
|
|||||||
words = lookforStrictPrimer(seqdb,seqdbsize,insamples,&options);
|
words = lookforStrictPrimer(seqdb,seqdbsize,insamples,&options);
|
||||||
fprintf(stderr,"\n Strict primer count : %d\n",words->size);
|
fprintf(stderr,"\n Strict primer count : %d\n",words->size);
|
||||||
|
|
||||||
|
/*/TR Testing
|
||||||
|
fprintf(stderr,"\nReducing for debugging\n");
|
||||||
|
words = reduce_words_to_debug (words, &options);
|
||||||
|
///*/
|
||||||
// options.filtering=FALSE;
|
// options.filtering=FALSE;
|
||||||
// words2= lookforStrictPrimer(seqdb,seqdbsize,insamples,&options);
|
// words2= lookforStrictPrimer(seqdb,seqdbsize,insamples,&options);
|
||||||
// fprintf(stderr,"\n Strict primer count : %d\n",words2->size);
|
// fprintf(stderr,"\n Strict primer count : %d\n",words2->size);
|
||||||
@ -802,7 +876,6 @@ int main(int argc, char **argv)
|
|||||||
for (i=0; i<MINI(10,words->size); i++)
|
for (i=0; i<MINI(10,words->size); i++)
|
||||||
fprintf(stderr," + Primer : %s sequence count : %d\n",ecoUnhashWord(words->words[i],options.primer_length),words->strictcount[i]);
|
fprintf(stderr," + Primer : %s sequence count : %d\n",ecoUnhashWord(words->words[i],options.primer_length),words->strictcount[i]);
|
||||||
|
|
||||||
|
|
||||||
fprintf(stderr,"\nEncoding sequences for fuzzy pattern matching...\n");
|
fprintf(stderr,"\nEncoding sequences for fuzzy pattern matching...\n");
|
||||||
for (i=0;i<seqdbsize;i++)
|
for (i=0;i<seqdbsize;i++)
|
||||||
{
|
{
|
||||||
@ -812,7 +885,13 @@ int main(int argc, char **argv)
|
|||||||
|
|
||||||
ECOFREE(words->strictcount,"Free strict primer count table");
|
ECOFREE(words->strictcount,"Free strict primer count table");
|
||||||
|
|
||||||
primers = lookforAproxPrimer(seqdb,seqdbsize,insamples,words,&options);
|
if (options.error_max == 0)//aho, if(options.error_max == 0 && 0) old
|
||||||
|
primers = ahoc_lookforStrictPrimers (seqdb,seqdbsize,insamples,words,&options);
|
||||||
|
else
|
||||||
|
primers = lookforAproxPrimer(seqdb,seqdbsize,insamples,words,&options);
|
||||||
|
|
||||||
|
//for (i=0; i<primers->size; i++)
|
||||||
|
// print_wordwith_positions (primers->primers[i], seqdbsize, &options);
|
||||||
|
|
||||||
ECOFREE(words->words,"Free strict primer table");
|
ECOFREE(words->words,"Free strict primer table");
|
||||||
ECOFREE(words,"Free strict primer structure");
|
ECOFREE(words,"Free strict primer structure");
|
||||||
@ -833,3 +912,108 @@ int main(int argc, char **argv)
|
|||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define DEBUG_WORDS_CNT 14
|
||||||
|
pwordcount_t reduce_words_to_debug (pwordcount_t words, poptions_t options)
|
||||||
|
{
|
||||||
|
uint32_t i, k;
|
||||||
|
pwordcount_t new_words;
|
||||||
|
char *rwrd;
|
||||||
|
char dwrd[20];
|
||||||
|
/*char *strict_words[DEBUG_WORDS_CNT] = {"GAGTCTCTGCACCTATCC", "GCAATCCTGAGCCAAATC", "ACCCCTAACCACAACTCA",
|
||||||
|
"TCCGAACCGACTGATGTT", "GAAGCTTGGGTGAAACTA", "GGAGAACCAGCTAGCTCT", "GCTGGTTCTCCCCGAAAT",
|
||||||
|
"TCGATTTGGTACCGCTCT", "AAAGGAGAGAGAGGGATT", "GGATTGCTAATCCGTTGT", "CCCCCATCGTCTCACTGG",
|
||||||
|
"TGAGGCGCAGCAGTTGAC", "GCGCTACGGCGCTGAAGT", "TTTCCTGGGAGTATGGCA"};*/
|
||||||
|
char *strict_words[DEBUG_WORDS_CNT] = {"CTCCGGTCTGAACTCAGA", "TGTTGGATCAGGACATCC", "TAGATAGAAACCGACCTG",
|
||||||
|
"TGGTGCAGCCGCTATTAA", "AGATAGAAACTGACCTGG", "TGGTGCAGCCGCTATTAA", "CTAATGGTGCAGCCGCTA",
|
||||||
|
"TAGAAACTGACCTGGATT", "AGATAGAAACCGACCTGG", "ATGGTGCAGCCGCTATTA", "ATAGATAGAAACCGACCT",
|
||||||
|
"GCCGCTATTAAGGGTTCG", "GGTGCAGCCGCTATTAAG", "TAGAAACTGACCTGGATT"};
|
||||||
|
int word_seen[DEBUG_WORDS_CNT];
|
||||||
|
|
||||||
|
|
||||||
|
new_words = ECOMALLOC(sizeof(wordcount_t),"Cannot allocate memory for word count structure");
|
||||||
|
new_words->inseqcount = words->inseqcount;
|
||||||
|
new_words->outseqcount = words->outseqcount;
|
||||||
|
new_words->size = DEBUG_WORDS_CNT;
|
||||||
|
new_words->strictcount = ECOMALLOC((new_words->size*sizeof(uint32_t)), "Cannot allocate memory for word count table");
|
||||||
|
new_words->words = ECOMALLOC(new_words->size*sizeof(word_t), "I cannot allocate memory for debug words");
|
||||||
|
|
||||||
|
for (k = 0; k < DEBUG_WORDS_CNT; k++)
|
||||||
|
word_seen[k] = 0;
|
||||||
|
|
||||||
|
for (i=0; i < words->size; i++)
|
||||||
|
{
|
||||||
|
rwrd = ecoUnhashWord(words->words[i],options->primer_length);
|
||||||
|
strcpy (dwrd, rwrd);
|
||||||
|
rwrd = ecoUnhashWord(ecoComplementWord(words->words[i],options->primer_length),options->primer_length);
|
||||||
|
for (k = 0; k < DEBUG_WORDS_CNT; k++)
|
||||||
|
{
|
||||||
|
if (strcmp (dwrd, strict_words[k]) == 0) break;
|
||||||
|
if (strcmp (rwrd, strict_words[k]) == 0) break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (k < DEBUG_WORDS_CNT)
|
||||||
|
{
|
||||||
|
if (word_seen[k] == 0)
|
||||||
|
{
|
||||||
|
new_words->words[k] = words->words[i];
|
||||||
|
new_words->strictcount[k] = words->strictcount[i];
|
||||||
|
}
|
||||||
|
word_seen[k]++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf (stderr, "Debug Words Info:\n");
|
||||||
|
for (k = 0; k < DEBUG_WORDS_CNT; k++)
|
||||||
|
fprintf (stderr, "%s:%d\n", strict_words[k], word_seen[k]);
|
||||||
|
|
||||||
|
|
||||||
|
//clean input wods;
|
||||||
|
ECOFREE(words->words,"Clean word table");
|
||||||
|
ECOFREE(words->strictcount,"Clean word count table");
|
||||||
|
ECOFREE(words,"Clean word structure");
|
||||||
|
|
||||||
|
return new_words;
|
||||||
|
}
|
||||||
|
|
||||||
|
void print_wordwith_positions (primer_t prm, uint32_t seqdbsize, poptions_t options)
|
||||||
|
{
|
||||||
|
char *wrd;
|
||||||
|
uint32_t i, j;
|
||||||
|
char *twrd = "GCCTGTTTACCAAAAACA";
|
||||||
|
|
||||||
|
wrd = ecoUnhashWord(prm.word,options->primer_length);
|
||||||
|
|
||||||
|
if (strcmp (twrd, wrd) == 0)
|
||||||
|
{
|
||||||
|
printf ("Positions for Word: %s\n", wrd);
|
||||||
|
for (i=0; i<seqdbsize; i++)
|
||||||
|
{
|
||||||
|
if (prm.directCount[i] > 0)
|
||||||
|
{
|
||||||
|
printf ("%d:", i);
|
||||||
|
if (prm.directCount[i] == 1)
|
||||||
|
printf ("%d", prm.directPos[i].value);
|
||||||
|
else
|
||||||
|
for (j=0; j<prm.directCount[i]; j++)
|
||||||
|
printf ("%d,", prm.directPos[i].pointer[j]);
|
||||||
|
printf (" ");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
printf ("\n");
|
||||||
|
for (i=0; i<seqdbsize; i++)
|
||||||
|
{
|
||||||
|
if (prm.reverseCount[i] > 0)
|
||||||
|
{
|
||||||
|
printf ("%d:", i);
|
||||||
|
if (prm.reverseCount[i] == 1)
|
||||||
|
printf ("%d", prm.reversePos[i].value);
|
||||||
|
else
|
||||||
|
for (j=0; j<prm.reverseCount[i]; j++)
|
||||||
|
printf ("%d,", prm.reversePos[i].pointer[j]);
|
||||||
|
printf (" ");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
printf ("\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -3,7 +3,8 @@ LIBPATH= -Llibapat -LlibecoPCR -Llibecoprimer -Llibthermo
|
|||||||
MAKEDEPEND = gcc -D$(MACHINE) -M $(CPPFLAGS) -o $*.d $<
|
MAKEDEPEND = gcc -D$(MACHINE) -M $(CPPFLAGS) -o $*.d $<
|
||||||
|
|
||||||
CC=gcc
|
CC=gcc
|
||||||
CFLAGS= -W -Wall -O5 -m64
|
CFLAGS= -W -Wall -m64 -g
|
||||||
|
#CFLAGS= -W -Wall -O5 -m64 -g
|
||||||
#CFLAGS= -W -Wall -O0 -m64 -g
|
#CFLAGS= -W -Wall -O0 -m64 -g
|
||||||
#CFLAGS= -W -Wall -O5 -fast -g
|
#CFLAGS= -W -Wall -O5 -fast -g
|
||||||
|
|
||||||
|
@ -15,7 +15,8 @@ SOURCES = goodtaxon.c \
|
|||||||
taxstats.c \
|
taxstats.c \
|
||||||
apat_search.c \
|
apat_search.c \
|
||||||
filtering.c \
|
filtering.c \
|
||||||
PrimerSets.c
|
PrimerSets.c \
|
||||||
|
ahocorasick.c
|
||||||
|
|
||||||
SRCS=$(SOURCES)
|
SRCS=$(SOURCES)
|
||||||
|
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -13,6 +13,8 @@ typedef struct {
|
|||||||
float set_lmean;
|
float set_lmean;
|
||||||
float set_lcov;
|
float set_lcov;
|
||||||
float set_score;
|
float set_score;
|
||||||
|
int32_t set_intaxa;
|
||||||
|
int32_t set_wi_cnt;
|
||||||
}pairset;
|
}pairset;
|
||||||
|
|
||||||
typedef struct{
|
typedef struct{
|
||||||
@ -33,9 +35,24 @@ typedef struct{
|
|||||||
void add_pair_in_set (pairset *pair_set, int32_t pset_idx, int32_t prb_idx, SetParams *pparams);
|
void add_pair_in_set (pairset *pair_set, int32_t pset_idx, int32_t prb_idx, SetParams *pparams);
|
||||||
void get_next_pair_options (int *pair_wi_count_sorted_ids, pairset *pair_set, SetParams *pparams);
|
void get_next_pair_options (int *pair_wi_count_sorted_ids, pairset *pair_set, SetParams *pparams);
|
||||||
float get_links_distribution (int prb_idx, pairset *prob_set, SetParams *pparams);
|
float get_links_distribution (int prb_idx, pairset *prob_set, SetParams *pparams);
|
||||||
pairset build_primers_set (ppair_t* sortedpairs, int32_t sorted_count, pecodnadb_t seqdb,
|
pairset build_primers_set_greedy_spc (SetParams *pparams);
|
||||||
poptions_t options);
|
|
||||||
void get_set_mean_cov_stats (pairset *prob_set, SetParams *pparams);
|
void get_set_mean_cov_stats (pairset *prob_set, SetParams *pparams);
|
||||||
void some_other_set_possibilities (pairset *pair_set,
|
void some_other_set_possibilities (pairset *pair_set,
|
||||||
ppair_t * sortedpairs, int32_t sorted_count, pecodnadb_t seqdb, poptions_t options);
|
ppair_t * sortedpairs, int32_t sorted_count, pecodnadb_t seqdb, poptions_t options);
|
||||||
|
void sets_by_SimulatedAnealing (pairset *pair_set,
|
||||||
|
ppair_t * sortedpairs, int32_t sorted_count, pecodnadb_t seqdb, poptions_t options);
|
||||||
|
void sets_by_TabuSearch (pairset *pair_set,
|
||||||
|
ppair_t * sortedpairs, int32_t sorted_count, pecodnadb_t seqdb, poptions_t options);
|
||||||
|
pairset * sets_by_BruteForce (ppair_t * sortedpairs,
|
||||||
|
int32_t sorted_count, pecodnadb_t seqdb, poptions_t options);
|
||||||
|
pairset * extend_set_randomly (pairset *pair_set, SetParams *params, int extend_to_cnt);
|
||||||
|
void build_and_print_sets (ppair_t * sortedpairs, int32_t sorted_count, pecodnadb_t seqdb, poptions_t options);
|
||||||
|
int32_t get_next_option_increasing_cov (pairset *pair_set, SetParams *pparams);
|
||||||
|
void reset_set_props (pairset *pair_set, SetParams *pparams);
|
||||||
|
void primers_graph_graphviz (ppair_t * sortedpairs,
|
||||||
|
int32_t sorted_count, poptions_t options);
|
||||||
|
size_t primers_changeSortedArray (ppair_t ** pairs,
|
||||||
|
size_t sorted_count, poptions_t options);
|
||||||
|
size_t primers_filterWithGivenLinks (ppair_t ** pairs,
|
||||||
|
size_t sorted_count, poptions_t options);
|
||||||
#endif
|
#endif
|
||||||
|
479
src/libecoprimer/ahocorasick.c
Executable file
479
src/libecoprimer/ahocorasick.c
Executable file
@ -0,0 +1,479 @@
|
|||||||
|
/*
|
||||||
|
* ahocorasick.h
|
||||||
|
*
|
||||||
|
* Created on: 26 march 2011
|
||||||
|
* Author: tiayyba
|
||||||
|
*/
|
||||||
|
#include <inttypes.h>
|
||||||
|
#include "hashencoder.h"
|
||||||
|
#include "ahocorasick.h"
|
||||||
|
|
||||||
|
void ahoc_graphKeywordTree (aho_state *root);
|
||||||
|
aho_state *groot = NULL; //just for graph testing
|
||||||
|
|
||||||
|
#define BASEATINDEX(w, l, i) (uint8_t)((((w)&(0x3LLU<<(((l)-(i))*2)))>>(((l)-(i))*2)) & 0x3LLU)
|
||||||
|
|
||||||
|
void ahoc_addOutputElement (aho_state *node, bool_t isdirect, uint32_t idx)
|
||||||
|
{
|
||||||
|
if (!node) return;
|
||||||
|
if (node->output.count == 0)
|
||||||
|
node->output.out_set = ECOMALLOC(sizeof(aho_output),
|
||||||
|
"Cannot allocate memory for aho-corasick state output element");
|
||||||
|
else
|
||||||
|
node->output.out_set = ECOREALLOC(node->output.out_set, (node->output.count+1)*sizeof(aho_output),
|
||||||
|
"Cannot allocate memory for aho-corasick state output element");
|
||||||
|
node->output.out_set[node->output.count].wordidx = idx;
|
||||||
|
node->output.out_set[node->output.count].isdirect = isdirect;
|
||||||
|
node->output.count++;
|
||||||
|
}
|
||||||
|
|
||||||
|
//is the passed output element in the set
|
||||||
|
bool_t ahoc_isOutputIn (aho_state *node, aho_output ot)
|
||||||
|
{
|
||||||
|
uint32_t i;
|
||||||
|
|
||||||
|
for (i=0; i<node->output.count; i++)
|
||||||
|
if (node->output.out_set[i].isdirect == ot.isdirect && node->output.out_set[i].wordidx == ot.wordidx) return TRUE;
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
//take union of output of the two nodes and put in node1
|
||||||
|
void ahoc_unionOutputElements (aho_state *node1, aho_state *node2)
|
||||||
|
{
|
||||||
|
uint32_t i;
|
||||||
|
|
||||||
|
for (i=0; i<node2->output.count; i++)
|
||||||
|
if (ahoc_isOutputIn (node1, node2->output.out_set[i]) == FALSE)
|
||||||
|
ahoc_addOutputElement (node1, node2->output.out_set[i].isdirect, node2->output.out_set[i].wordidx);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ahoc_addKeyword (aho_state *root, word_t w, bool_t isdirect, uint32_t idx, poptions_t options)
|
||||||
|
{
|
||||||
|
uint32_t i;
|
||||||
|
aho_state *nextnode = root;
|
||||||
|
uint8_t basecode;
|
||||||
|
static uint32_t state_id = 0;
|
||||||
|
|
||||||
|
//fprintf (stderr, "%s\n", ecoUnhashWord(w, options->primer_length));
|
||||||
|
for (i=1; i<=options->primer_length; i++)
|
||||||
|
{
|
||||||
|
basecode = BASEATINDEX (w, options->primer_length, i);
|
||||||
|
//fprintf (stderr, "%d", basecode);
|
||||||
|
if (nextnode->next[basecode] == NULL)
|
||||||
|
{
|
||||||
|
//add new state
|
||||||
|
nextnode->next[basecode] = ECOMALLOC(sizeof(aho_state),
|
||||||
|
"Cannot allocate memory for aho-corasick state");
|
||||||
|
nextnode = nextnode->next[basecode];
|
||||||
|
//initialize state
|
||||||
|
nextnode->id = ++state_id;
|
||||||
|
nextnode->next[0]=nextnode->next[1]=nextnode->next[2]=nextnode->next[3]=NULL;
|
||||||
|
nextnode->fail = NULL;
|
||||||
|
nextnode->output.count = 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
nextnode = nextnode->next[basecode];
|
||||||
|
}
|
||||||
|
//fprintf (stderr, "\n", basecode);
|
||||||
|
//new pattern addess so add node ouptup element
|
||||||
|
ahoc_addOutputElement (nextnode, isdirect, idx);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ahoc_buildKeywordTree (aho_state *root, pwordcount_t words, poptions_t options)
|
||||||
|
{
|
||||||
|
uint32_t i;
|
||||||
|
if (!root) return;
|
||||||
|
|
||||||
|
//init root
|
||||||
|
root->id = 0;
|
||||||
|
root->next[0]=root->next[1]=root->next[2]=root->next[3]=NULL;
|
||||||
|
root->fail = NULL;
|
||||||
|
root->output.count = 0;
|
||||||
|
|
||||||
|
//now add each word as a pattern in the keyword tree
|
||||||
|
for (i=0; i<words->size; i++)
|
||||||
|
{
|
||||||
|
//add direct word
|
||||||
|
word_t w=WORD(words->words[i]);
|
||||||
|
ahoc_addKeyword (root, w, TRUE, i, options);
|
||||||
|
|
||||||
|
//add reverse word
|
||||||
|
w=ecoComplementWord(w,options->primer_length);
|
||||||
|
ahoc_addKeyword (root, w, FALSE, i, options);
|
||||||
|
}
|
||||||
|
|
||||||
|
//loop on root if some base has no out going edge from roots
|
||||||
|
for (i=0; i<4; i++)
|
||||||
|
if (root->next[i] == NULL)
|
||||||
|
root->next[i] = root;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ahoc_enqueue (aho_queue *ahoqueue, aho_state *node)
|
||||||
|
{
|
||||||
|
queue_node *q;
|
||||||
|
if (node == NULL) return;
|
||||||
|
|
||||||
|
q = ECOMALLOC(sizeof(queue_node),
|
||||||
|
"Cannot allocate memory for aho-corasick queue node");
|
||||||
|
q->state_node = node;
|
||||||
|
q->next = NULL;
|
||||||
|
|
||||||
|
if (ahoqueue->first == NULL)
|
||||||
|
{
|
||||||
|
ahoqueue->first = q;
|
||||||
|
ahoqueue->last = q;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ahoqueue->last->next = q;
|
||||||
|
ahoqueue->last = q;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
aho_state *ahoc_dequeue (aho_queue *ahoqueue)
|
||||||
|
{
|
||||||
|
aho_state *node = NULL;
|
||||||
|
queue_node *q;
|
||||||
|
|
||||||
|
if (ahoqueue->first == NULL) return node;
|
||||||
|
q = ahoqueue->first;
|
||||||
|
ahoqueue->first = q->next;
|
||||||
|
|
||||||
|
node = q->state_node;
|
||||||
|
ECOFREE (q, "Cannot free memory for aho-corasick queue node");
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
//set fail links and output sets for the keyword tree
|
||||||
|
void ahoc_updateForFailAndOutput (aho_state *root)
|
||||||
|
{
|
||||||
|
int32_t i;
|
||||||
|
aho_queue Q;
|
||||||
|
aho_state *node_r;
|
||||||
|
aho_state *node_u;
|
||||||
|
aho_state *node_v;
|
||||||
|
|
||||||
|
//empty queue
|
||||||
|
Q.first = NULL;
|
||||||
|
Q.last = NULL;
|
||||||
|
|
||||||
|
//for us alphabet has 4 elements, A=0, C=1, G=2 and T=3
|
||||||
|
for (i=0; i<4; i++)
|
||||||
|
{
|
||||||
|
if (root->next[i] != root && root->next[i] != NULL)
|
||||||
|
{
|
||||||
|
root->next[i]->fail = root;
|
||||||
|
ahoc_enqueue (&Q, root->next[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//while queue not empty
|
||||||
|
while (Q.first != NULL)
|
||||||
|
{
|
||||||
|
node_r = ahoc_dequeue (&Q);
|
||||||
|
for (i=0; i<4; i++)
|
||||||
|
{
|
||||||
|
if (node_r->next[i] != NULL)
|
||||||
|
{
|
||||||
|
node_u = node_r->next[i];
|
||||||
|
ahoc_enqueue (&Q, node_u);
|
||||||
|
node_v = node_r->fail;
|
||||||
|
while (node_v->next[i] == NULL)
|
||||||
|
node_v = node_v->fail;
|
||||||
|
node_u->fail = node_v->next[i];
|
||||||
|
ahoc_unionOutputElements (node_u, node_u->fail);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ahoc_freeKeywordTree (aho_state *node)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i=0; i<4; i++)
|
||||||
|
if (node->next[i])
|
||||||
|
ahoc_freeKeywordTree (node->next[i]);
|
||||||
|
if (node->output.count > 0)
|
||||||
|
ECOFREE (node->output.out_set, "Free failed for node output");
|
||||||
|
ECOFREE (node, "Free failed for node");
|
||||||
|
}
|
||||||
|
|
||||||
|
pprimercount_t ahoc_lookforStrictPrimers (pecodnadb_t database, uint32_t seqdbsize,uint32_t exampleCount,
|
||||||
|
pwordcount_t words,poptions_t options)
|
||||||
|
{
|
||||||
|
aho_state automaton_root;
|
||||||
|
aho_state *curr_state;
|
||||||
|
//uint32_t inSequenceQuorum;
|
||||||
|
uint32_t outSequenceQuorum;
|
||||||
|
pprimer_t data;
|
||||||
|
pprimercount_t primers;
|
||||||
|
uint32_t i, j, k;
|
||||||
|
int32_t pos;
|
||||||
|
uint32_t lmax;
|
||||||
|
char *base;
|
||||||
|
int8_t code;
|
||||||
|
uint32_t goodPrimers=0;
|
||||||
|
static int iii=0;
|
||||||
|
|
||||||
|
|
||||||
|
//inSequenceQuorum = (uint32_t)floor((float)exampleCount * options->sensitivity_quorum);
|
||||||
|
outSequenceQuorum = (uint32_t)floor((float)(seqdbsize-exampleCount) * options->false_positive_quorum);
|
||||||
|
|
||||||
|
//fprintf(stderr," Primers should be at least present in %d/%d example sequences\n",inSequenceQuorum,exampleCount);
|
||||||
|
fprintf(stderr," Primers should not be present in more than %d/%d counterexample sequences\n",outSequenceQuorum,(seqdbsize-exampleCount));
|
||||||
|
|
||||||
|
data = ECOMALLOC(words->size * sizeof(primer_t),
|
||||||
|
"Cannot allocate memory for fuzzy matching results");
|
||||||
|
for (i=0; i < words->size; i++)
|
||||||
|
{
|
||||||
|
data[i].word=WORD(words->words[i]);
|
||||||
|
data[i].inexample = 0;
|
||||||
|
data[i].outexample= 0;
|
||||||
|
|
||||||
|
data[i].directCount=ECOMALLOC(seqdbsize * sizeof(uint32_t),
|
||||||
|
"Cannot allocate memory for primer position");
|
||||||
|
data[i].directPos = ECOMALLOC(seqdbsize * sizeof(poslist_t),
|
||||||
|
"Cannot allocate memory for primer position");
|
||||||
|
data[i].reverseCount=ECOMALLOC(seqdbsize * sizeof(uint32_t),
|
||||||
|
"Cannot allocate memory for primer position");
|
||||||
|
data[i].reversePos = ECOMALLOC(seqdbsize * sizeof(poslist_t),
|
||||||
|
"Cannot allocate memory for primer position");
|
||||||
|
}
|
||||||
|
|
||||||
|
//build keywords automaton
|
||||||
|
ahoc_buildKeywordTree (&automaton_root, words, options);
|
||||||
|
//set fail links and output sets
|
||||||
|
ahoc_updateForFailAndOutput (&automaton_root);
|
||||||
|
|
||||||
|
//debug; print keywordtree in a gv file
|
||||||
|
//ahoc_graphKeywordTree (&automaton_root);
|
||||||
|
|
||||||
|
//loop on each sequence for its each base and find words
|
||||||
|
for (i=0; i < seqdbsize; i++)
|
||||||
|
{
|
||||||
|
if(database[i]->SQ_length <= options->primer_length) continue;
|
||||||
|
|
||||||
|
lmax = database[i]->SQ_length;
|
||||||
|
if (!options->circular)
|
||||||
|
lmax += options->primer_length-1;
|
||||||
|
curr_state = &automaton_root;
|
||||||
|
|
||||||
|
for (j=0,base=database[i]->SQ; j<lmax; j++,base++)
|
||||||
|
{
|
||||||
|
if (i==(uint32_t)database[i]->SQ_length) base=database[i]->SQ;
|
||||||
|
|
||||||
|
//code = encoder[(*base) - 'A'];
|
||||||
|
code = *base;
|
||||||
|
//if (iii++ < 30)
|
||||||
|
// fprintf (stderr, "%d:%d,", *base, code);
|
||||||
|
if (code < 0 || code > 3)
|
||||||
|
{
|
||||||
|
//if error char, start from root for next character
|
||||||
|
//+forget any incomplete words
|
||||||
|
curr_state = &automaton_root;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
while (curr_state->next[code] == NULL) curr_state = curr_state->fail;
|
||||||
|
curr_state = curr_state->next[code];
|
||||||
|
|
||||||
|
//start position of primer is options->primer_length-1 chars back
|
||||||
|
pos = j-options->primer_length+1;
|
||||||
|
if (pos < 0) pos = database[i]->SQ_length+pos;
|
||||||
|
|
||||||
|
//set output, if there is some output on this state then
|
||||||
|
//+all words in the output set complete here, so increment their
|
||||||
|
//+found properties for current sequence
|
||||||
|
for (k=0; k<curr_state->output.count; k++)
|
||||||
|
{
|
||||||
|
if (curr_state->output.out_set[k].isdirect)
|
||||||
|
data[curr_state->output.out_set[k].wordidx].directCount[i]++;
|
||||||
|
else
|
||||||
|
data[curr_state->output.out_set[k].wordidx].reverseCount[i]++;
|
||||||
|
|
||||||
|
if (options->no_multi_match)
|
||||||
|
{
|
||||||
|
if ((data[curr_state->output.out_set[k].wordidx].directCount[i] +
|
||||||
|
data[curr_state->output.out_set[k].wordidx].reverseCount[i]) > 1)
|
||||||
|
//since multimach not allowd, set an indication on 1st seq position that
|
||||||
|
//+ a multimatch was found, so that this word will be filtered out
|
||||||
|
//+ and because of first postion we wont have to search the whole array
|
||||||
|
//+ to find if it voilated nomultimatch constraint for some seq
|
||||||
|
data[curr_state->output.out_set[k].wordidx].directCount[0] = 2;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (curr_state->output.out_set[k].isdirect)
|
||||||
|
//direct word found on jth position of ith sequence
|
||||||
|
data[curr_state->output.out_set[k].wordidx].directPos[i].value = (uint32_t)pos;
|
||||||
|
else
|
||||||
|
//reverse word found on jth position of ith sequence
|
||||||
|
data[curr_state->output.out_set[k].wordidx].reversePos[i].value = (uint32_t)pos;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
//okay multi match allowed
|
||||||
|
if (curr_state->output.out_set[k].isdirect)
|
||||||
|
{
|
||||||
|
if (data[curr_state->output.out_set[k].wordidx].directCount[i] == 1)
|
||||||
|
data[curr_state->output.out_set[k].wordidx].directPos[i].value = (uint32_t)pos;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
//need to create or extend the positions list
|
||||||
|
if (data[curr_state->output.out_set[k].wordidx].directCount[i] == 2)
|
||||||
|
{
|
||||||
|
//for second element, first was put in .value, so dont forget to copy that in the array too
|
||||||
|
data[curr_state->output.out_set[k].wordidx].directPos[i].pointer = ECOMALLOC(2 * sizeof(uint32_t),
|
||||||
|
"Cannot allocate memory for primer position");
|
||||||
|
data[curr_state->output.out_set[k].wordidx].directPos[i].pointer[0] = data[curr_state->output.out_set[k].wordidx].directPos[i].value;
|
||||||
|
data[curr_state->output.out_set[k].wordidx].directPos[i].pointer[1] = (uint32_t)pos;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
//for third or greater element
|
||||||
|
data[curr_state->output.out_set[k].wordidx].directPos[i].pointer = ECOREALLOC(data[curr_state->output.out_set[k].wordidx].directPos[i].pointer,
|
||||||
|
data[curr_state->output.out_set[k].wordidx].directCount[i] * sizeof(uint32_t),
|
||||||
|
"Cannot allocate memory for primer position");
|
||||||
|
data[curr_state->output.out_set[k].wordidx].directPos[i].pointer[data[curr_state->output.out_set[k].wordidx].directCount[i]-1] = (uint32_t)pos;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (data[curr_state->output.out_set[k].wordidx].reverseCount[i] == 1)
|
||||||
|
data[curr_state->output.out_set[k].wordidx].reversePos[i].value = (uint32_t)pos;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
//need to create or extend the positions list
|
||||||
|
if (data[curr_state->output.out_set[k].wordidx].reverseCount[i] == 2)
|
||||||
|
{
|
||||||
|
//for second element, first was put in .value, so dont forget to copy that in the array too
|
||||||
|
data[curr_state->output.out_set[k].wordidx].reversePos[i].pointer = ECOMALLOC(2 * sizeof(uint32_t),
|
||||||
|
"Cannot allocate memory for primer position");
|
||||||
|
data[curr_state->output.out_set[k].wordidx].reversePos[i].pointer[0] = data[curr_state->output.out_set[k].wordidx].reversePos[i].value;
|
||||||
|
data[curr_state->output.out_set[k].wordidx].reversePos[i].pointer[1] = (uint32_t)pos;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
//for third or greater element
|
||||||
|
data[curr_state->output.out_set[k].wordidx].reversePos[i].pointer = ECOREALLOC(data[curr_state->output.out_set[k].wordidx].reversePos[i].pointer,
|
||||||
|
data[curr_state->output.out_set[k].wordidx].reverseCount[i] * sizeof(uint32_t),
|
||||||
|
"Cannot allocate memory for primer position");
|
||||||
|
data[curr_state->output.out_set[k].wordidx].reversePos[i].pointer[data[curr_state->output.out_set[k].wordidx].reverseCount[i]-1] = (uint32_t)pos;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//dont forget to increment inexample or outexample count, but only once for a sequence
|
||||||
|
if ((data[curr_state->output.out_set[k].wordidx].directCount[i] +
|
||||||
|
data[curr_state->output.out_set[k].wordidx].reverseCount[i]) == 1)
|
||||||
|
{
|
||||||
|
if (database[i]->isexample)
|
||||||
|
data[curr_state->output.out_set[k].wordidx].inexample++;
|
||||||
|
else
|
||||||
|
data[curr_state->output.out_set[k].wordidx].outexample++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//Only thing that remains is to remove the failed words
|
||||||
|
for (i=0,j=0; i<words->size; i++)
|
||||||
|
{
|
||||||
|
fprintf(stderr,"Primers %5d/%lld analyzed => sequence : %s in %d example and %d counterexample sequences \r",
|
||||||
|
i+1,words->size,ecoUnhashWord(data[i].word,options->primer_length),
|
||||||
|
data[i].inexample,data[i].outexample);
|
||||||
|
|
||||||
|
//if (data[i].inexample < inSequenceQuorum || (data[i].directCount[0] == 2 && options->no_multi_match))
|
||||||
|
if (data[i].directCount[0] == 2 && options->no_multi_match)
|
||||||
|
{
|
||||||
|
//bad word, delete from the array
|
||||||
|
for (k=0; k<seqdbsize; k++)
|
||||||
|
{
|
||||||
|
if (data[i].directCount[k] > 1)
|
||||||
|
ECOFREE (data[i].directPos[k].pointer, "Cannot free position pointer.");
|
||||||
|
if (data[i].reverseCount[k] > 1)
|
||||||
|
ECOFREE (data[i].reversePos[k].pointer, "Cannot free position pointer.");
|
||||||
|
}
|
||||||
|
ECOFREE (data[i].directCount, "Cannot free position pointer.");
|
||||||
|
ECOFREE (data[i].directPos, "Cannot free position pointer.");
|
||||||
|
ECOFREE (data[i].reverseCount, "Cannot free position pointer.");
|
||||||
|
ECOFREE (data[i].reversePos, "Cannot free position pointer.");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
//data[i].good = data[i].inexample >= inSequenceQuorum && data[i].outexample <= outSequenceQuorum;
|
||||||
|
data[i].good = data[i].outexample <= outSequenceQuorum;
|
||||||
|
goodPrimers+=data[i].good? 1:0;
|
||||||
|
if (j < i)
|
||||||
|
data[j] = data[i];
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fprintf(stderr,"\n\nOn %lld analyzed primers %d respect quorum conditions\n",words->size,goodPrimers);
|
||||||
|
fprintf(stderr,"Conserved primers for further analysis : %d/%lld\n",j,words->size);
|
||||||
|
|
||||||
|
primers = ECOMALLOC(sizeof(primercount_t),"Cannot allocate memory for primer table");
|
||||||
|
primers->primers=ECOREALLOC(data,
|
||||||
|
j * sizeof(primer_t),
|
||||||
|
"Cannot reallocate memory for fuzzy matching results");
|
||||||
|
primers->size=j;
|
||||||
|
|
||||||
|
//free memory of keyword table
|
||||||
|
for (i=0; i<4; i++)
|
||||||
|
if (automaton_root.next[i] != &automaton_root)
|
||||||
|
ahoc_freeKeywordTree (automaton_root.next[i]);
|
||||||
|
|
||||||
|
return primers;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ahoc_graphPrintNodesInfo (aho_state *node, FILE* gfile)
|
||||||
|
{
|
||||||
|
uint32_t i;
|
||||||
|
fprintf (gfile, "\"%d\"[\n", node->id);
|
||||||
|
fprintf (gfile, "label=\"%d\\n", node->id);
|
||||||
|
for (i=0; i<node->output.count; i++)
|
||||||
|
fprintf (gfile, "%d%c,", node->output.out_set[i].wordidx, node->output.out_set[i].isdirect?'d':'r');
|
||||||
|
fprintf (gfile, "\"\n];\n");
|
||||||
|
|
||||||
|
for (i=0; i<4; i++)
|
||||||
|
if (node->next[i] != NULL && node->next[i] != node)
|
||||||
|
ahoc_graphPrintNodesInfo (node->next[i], gfile);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ahoc_graphPrintNodesLinks (aho_state *node, FILE* gfile)
|
||||||
|
{
|
||||||
|
uint32_t i;
|
||||||
|
static int j=0;
|
||||||
|
|
||||||
|
for (i=0; i<4; i++)
|
||||||
|
if (node->next[i] != NULL && node->next[i] != node)
|
||||||
|
{
|
||||||
|
fprintf (gfile, "\"%d\" -> \"%d\" [\n", node->id, node->next[i]->id);
|
||||||
|
fprintf (gfile, "label=\"%c\"\n];\n", "ACGT"[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (j++ < 40)
|
||||||
|
if (node->fail != NULL && node->fail != groot)
|
||||||
|
{
|
||||||
|
fprintf (gfile, "\"%d\" -> \"%d\" [\n", node->id, node->fail->id);
|
||||||
|
fprintf (gfile, "color= \"red\"\n];\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i=0; i<4; i++)
|
||||||
|
if (node->next[i] != NULL && node->next[i] != node)
|
||||||
|
ahoc_graphPrintNodesLinks (node->next[i], gfile);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ahoc_graphKeywordTree (aho_state *root)
|
||||||
|
{
|
||||||
|
FILE *gfile;
|
||||||
|
|
||||||
|
groot=root;
|
||||||
|
gfile = fopen ("keywordtree.gv", "w");
|
||||||
|
fprintf (gfile, "digraph keywordtree {\n");
|
||||||
|
ahoc_graphPrintNodesInfo (root, gfile);
|
||||||
|
ahoc_graphPrintNodesLinks (root, gfile);
|
||||||
|
fprintf (gfile, "}\n");
|
||||||
|
fclose(gfile);
|
||||||
|
}
|
||||||
|
|
43
src/libecoprimer/ahocorasick.h
Executable file
43
src/libecoprimer/ahocorasick.h
Executable file
@ -0,0 +1,43 @@
|
|||||||
|
/*
|
||||||
|
* ahocorasick.h
|
||||||
|
*
|
||||||
|
* Created on: 26 march 2011
|
||||||
|
* Author: tiayyba
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef H_ahocorasick
|
||||||
|
#define H_ahocorasick
|
||||||
|
|
||||||
|
#include "ecoprimer.h"
|
||||||
|
|
||||||
|
typedef struct aho_output_t{
|
||||||
|
uint32_t wordidx; //index of strict word (dont save the word of 64B)
|
||||||
|
bool_t isdirect; //we need to find both direct and reverse words so we must know which one is it
|
||||||
|
}aho_output;
|
||||||
|
|
||||||
|
typedef struct aho_output_count_t{
|
||||||
|
uint32_t count;
|
||||||
|
aho_output *out_set;
|
||||||
|
}aho_output_count;
|
||||||
|
|
||||||
|
typedef struct aho_state_t{
|
||||||
|
int32_t id;
|
||||||
|
struct aho_state_t *next[4]; //for labels A=0,C=1,G=2 and T=3
|
||||||
|
struct aho_state_t *fail;
|
||||||
|
aho_output_count output;
|
||||||
|
}aho_state;
|
||||||
|
|
||||||
|
typedef struct queue_node_t {
|
||||||
|
aho_state *state_node;
|
||||||
|
struct queue_node_t *next;
|
||||||
|
}queue_node;
|
||||||
|
|
||||||
|
typedef struct{
|
||||||
|
queue_node *first;
|
||||||
|
queue_node *last;
|
||||||
|
}aho_queue;
|
||||||
|
|
||||||
|
pprimercount_t ahoc_lookforStrictPrimers (pecodnadb_t database, uint32_t seqdbsize,uint32_t exampleCount,
|
||||||
|
pwordcount_t words,poptions_t options);
|
||||||
|
#endif /* H_ahocorasick */
|
||||||
|
|
@ -176,6 +176,7 @@ typedef struct {
|
|||||||
int *wellIdentifiedSeqs; //< an array having elements equla to total seqs
|
int *wellIdentifiedSeqs; //< an array having elements equla to total seqs
|
||||||
// values are either 0 or 1, if seq is well identified
|
// values are either 0 or 1, if seq is well identified
|
||||||
// its 1 else 0
|
// its 1 else 0
|
||||||
|
int *coveredSeqs; //< an array having elements equal to total seqs, 1 if seq is covered else 0
|
||||||
|
|
||||||
// these statistics are relative to inexample sequences
|
// these statistics are relative to inexample sequences
|
||||||
|
|
||||||
@ -291,6 +292,9 @@ typedef struct {
|
|||||||
PNNParams pnparm;
|
PNNParams pnparm;
|
||||||
bool_t print_sets_of_primers;
|
bool_t print_sets_of_primers;
|
||||||
float specificity_threshold;
|
float specificity_threshold;
|
||||||
|
int links_cnt;
|
||||||
|
float max_links_percent;
|
||||||
|
bool_t filter_on_links;
|
||||||
} options_t, *poptions_t;
|
} options_t, *poptions_t;
|
||||||
|
|
||||||
typedef ecoseq_t **pecodnadb_t;
|
typedef ecoseq_t **pecodnadb_t;
|
||||||
@ -350,7 +354,7 @@ int32_t getrankdbstats(pecodnadb_t seqdb,
|
|||||||
uint32_t seqdbsize,
|
uint32_t seqdbsize,
|
||||||
ecotaxonomy_t *taxonomy,
|
ecotaxonomy_t *taxonomy,
|
||||||
poptions_t options);
|
poptions_t options);
|
||||||
float taxonomycoverage(ppair_t pair, poptions_t options);
|
float taxonomycoverage(ppair_t pair, poptions_t options, pecodnadb_t seqdb,uint32_t seqdbsize);
|
||||||
char ecoComplementChar(char base);
|
char ecoComplementChar(char base);
|
||||||
void taxonomyspecificity (ppair_t pair, pecodnadb_t seqdb,uint32_t seqdbsize);
|
void taxonomyspecificity (ppair_t pair, pecodnadb_t seqdb,uint32_t seqdbsize);
|
||||||
|
|
||||||
|
@ -114,6 +114,8 @@ static int32_t *ecoFilteringHashSequence(int32_t *dest,
|
|||||||
error<<= 1;
|
error<<= 1;
|
||||||
error&=ERRORMASK(FWORDSIZE);
|
error&=ERRORMASK(FWORDSIZE);
|
||||||
|
|
||||||
|
//code = -1;
|
||||||
|
//if((*base) >= 'A' && (*base) <= 'Z')
|
||||||
code = encoder[(*base) - 'A'];
|
code = encoder[(*base) - 'A'];
|
||||||
if (code <0)
|
if (code <0)
|
||||||
{
|
{
|
||||||
@ -154,7 +156,7 @@ int32_t *filteringSeq(pecodnadb_t database, uint32_t seqdbsize,
|
|||||||
|
|
||||||
for (i=0;i<seqdbsize;i++)
|
for (i=0;i<seqdbsize;i++)
|
||||||
{
|
{
|
||||||
if (database[i]->isexample)
|
if (database[i]->isexample && database[i]->SQ_length > options->primer_length)
|
||||||
{
|
{
|
||||||
j++;
|
j++;
|
||||||
wordscount=ecoFilteringHashSequence(wordscount,
|
wordscount=ecoFilteringHashSequence(wordscount,
|
||||||
|
@ -179,7 +179,7 @@ static void buildPrimerPairsForOneSeq(uint32_t seqid,
|
|||||||
uint32_t i,j,k;
|
uint32_t i,j,k;
|
||||||
uint32_t matchcount=0;
|
uint32_t matchcount=0;
|
||||||
pprimermatch_t matches = NULL;
|
pprimermatch_t matches = NULL;
|
||||||
primermatchcount_t seqmatchcount;
|
//primermatchcount_t seqmatchcount;
|
||||||
ppair_t pcurrent;
|
ppair_t pcurrent;
|
||||||
pair_t current;
|
pair_t current;
|
||||||
pprimer_t wswp;
|
pprimer_t wswp;
|
||||||
@ -189,7 +189,7 @@ static void buildPrimerPairsForOneSeq(uint32_t seqid,
|
|||||||
//char prmr[50];
|
//char prmr[50];
|
||||||
//float mtemp;
|
//float mtemp;
|
||||||
word_t w1, w1a, omask = (0x1L << (options->strict_three_prime*2)) -1;
|
word_t w1, w1a, omask = (0x1L << (options->strict_three_prime*2)) -1;
|
||||||
word_t w2, w2a, wtmp;
|
word_t w2, w2a;//, wtmp;
|
||||||
uint32_t bp1,bp2;
|
uint32_t bp1,bp2;
|
||||||
|
|
||||||
//prmr[options->primer_length] = '\0';
|
//prmr[options->primer_length] = '\0';
|
||||||
@ -252,26 +252,25 @@ static void buildPrimerPairsForOneSeq(uint32_t seqid,
|
|||||||
{
|
{
|
||||||
// For all primers matching the sequence
|
// For all primers matching the sequence
|
||||||
|
|
||||||
//for(j=i+1;
|
/*for(j=i+1;
|
||||||
// (j<matchcount)
|
(j<matchcount)
|
||||||
// && ((distance=matches[j].position - matches[i].position - options->primer_length) < options->lmax);
|
&& ((distance=matches[j].position - matches[i].position - options->primer_length) < options->lmax);
|
||||||
// j++
|
j++
|
||||||
// )
|
)//*/
|
||||||
for (j=i+1; j<matchcount; j++)
|
for (j=i+1; j<matchcount; j++)
|
||||||
{
|
{
|
||||||
if (matches[j].position - matches[i].position <= options->primer_length) continue;
|
if (matches[j].position - matches[i].position <= options->primer_length) continue;
|
||||||
distance = matches[j].position - matches[i].position - options->primer_length;
|
distance = matches[j].position - matches[i].position - options->primer_length;
|
||||||
if (distance >= options->lmax) break;
|
if (distance >= options->lmax) break;
|
||||||
|
|
||||||
|
|
||||||
// For all not too far primers
|
// For all not too far primers
|
||||||
|
|
||||||
if ( (matches[i].primer->good || matches[j].primer->good)
|
if ( (matches[i].primer->good || matches[j].primer->good)
|
||||||
&& (distance > options->lmin)
|
&& (distance > options->lmin)
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
|
|
||||||
// If possible primer pair
|
// If possible primer pair
|
||||||
|
|
||||||
current.p1 = matches[i].primer;
|
current.p1 = matches[i].primer;
|
||||||
current.asdirect1=matches[i].strand;
|
current.asdirect1=matches[i].strand;
|
||||||
current.p2 = matches[j].primer;
|
current.p2 = matches[j].primer;
|
||||||
@ -456,7 +455,6 @@ static void buildPrimerPairsForOneSeq(uint32_t seqid,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pairs->count=paircount;
|
pairs->count=paircount;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -108,9 +108,10 @@ void addSeqToWordCountTable(pwordcount_t table, uint32_t wordsize, uint32_t circ
|
|||||||
|
|
||||||
table->inseqcount++;
|
table->inseqcount++;
|
||||||
|
|
||||||
|
//fprintf (stderr, "\nOldAddress: %x", table->strictcount);
|
||||||
table->strictcount = ECOREALLOC(table->strictcount,buffersize*sizeof(uint32_t),
|
table->strictcount = ECOREALLOC(table->strictcount,(buffersize+5000)*sizeof(uint32_t),
|
||||||
"Cannot allocate memory to extend example word count table");
|
"Cannot allocate memory to extend example word count table");
|
||||||
|
//fprintf (stderr, " NewAddress: %x\n", table->strictcount);
|
||||||
|
|
||||||
for (i=table->size; i < buffersize; i++)
|
for (i=table->size; i < buffersize; i++)
|
||||||
table->strictcount[i]=1;
|
table->strictcount[i]=1;
|
||||||
@ -172,7 +173,7 @@ pwordcount_t lookforStrictPrimer(pecodnadb_t database, uint32_t seqdbsize,
|
|||||||
|
|
||||||
for (i=0;i<seqdbsize;i++)
|
for (i=0;i<seqdbsize;i++)
|
||||||
{
|
{
|
||||||
if (database[i]->isexample)
|
if (database[i]->isexample && database[i]->SQ_length > options->primer_length)
|
||||||
{
|
{
|
||||||
|
|
||||||
if (first)
|
if (first)
|
||||||
|
@ -6,10 +6,46 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include <search.h>
|
#include <search.h>
|
||||||
|
//void tdestroy (void *root, void (*free_node)(void *nodep));
|
||||||
|
|
||||||
#include "ecoprimer.h"
|
#include "ecoprimer.h"
|
||||||
|
|
||||||
static int cmptaxon(const void *t1, const void* t2);
|
static int cmptaxon(const void *t1, const void* t2);
|
||||||
|
|
||||||
|
void **tree_root = NULL;
|
||||||
|
int delete_passes = 0;
|
||||||
|
|
||||||
|
void delete_twalkaction (const void *node, VISIT order, int level)
|
||||||
|
{
|
||||||
|
switch (order)
|
||||||
|
{
|
||||||
|
case preorder:
|
||||||
|
delete_passes++;
|
||||||
|
break;
|
||||||
|
case postorder:
|
||||||
|
delete_passes++;
|
||||||
|
break;
|
||||||
|
case endorder:
|
||||||
|
delete_passes++;
|
||||||
|
break;
|
||||||
|
case leaf:
|
||||||
|
if (tree_root)
|
||||||
|
tdelete (node, tree_root,cmptaxon);
|
||||||
|
delete_passes++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void free_tree_nodes (void *tree)
|
||||||
|
{
|
||||||
|
while (1)
|
||||||
|
{
|
||||||
|
delete_passes = 0;
|
||||||
|
twalk (tree, delete_twalkaction);
|
||||||
|
if (delete_passes <= 1) break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static int cmptaxon(const void *t1, const void* t2)
|
static int cmptaxon(const void *t1, const void* t2)
|
||||||
{
|
{
|
||||||
const size_t taxid1=(size_t)t1;
|
const size_t taxid1=(size_t)t1;
|
||||||
@ -35,7 +71,12 @@ int32_t counttaxon(int32_t taxid)
|
|||||||
if (taxid==-1)
|
if (taxid==-1)
|
||||||
{
|
{
|
||||||
if (taxontree)
|
if (taxontree)
|
||||||
|
{
|
||||||
|
tree_root = (void **)&taxontree;
|
||||||
|
//free_tree_nodes (taxontree);
|
||||||
ECOFREE(taxontree,"Free taxon tree");
|
ECOFREE(taxontree,"Free taxon tree");
|
||||||
|
tree_root = NULL;
|
||||||
|
}
|
||||||
taxontree=NULL;
|
taxontree=NULL;
|
||||||
taxoncount=0;
|
taxoncount=0;
|
||||||
return 0;
|
return 0;
|
||||||
@ -97,22 +138,30 @@ int32_t getrankdbstats(pecodnadb_t seqdb, uint32_t seqdbsize, ecotaxonomy_t *tax
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
float taxonomycoverage(ppair_t pair, poptions_t options)
|
float taxonomycoverage(ppair_t pair, poptions_t options, pecodnadb_t seqdb,uint32_t seqdbsize)
|
||||||
{
|
{
|
||||||
int32_t seqcount;
|
int32_t seqcount;
|
||||||
int32_t i;
|
int32_t i;
|
||||||
int32_t incount=0;
|
int32_t incount=0;
|
||||||
int32_t outcount=0;
|
int32_t outcount=0;
|
||||||
|
uint32_t j;
|
||||||
|
|
||||||
|
|
||||||
|
memset (pair->coveredSeqs, 0, seqdbsize*sizeof (int));
|
||||||
seqcount=pair->pcr.ampcount;
|
seqcount=pair->pcr.ampcount;
|
||||||
|
|
||||||
counttaxon(-1);
|
counttaxon(-1);
|
||||||
for (i=0; i < seqcount; i++)
|
for (i=0; i < seqcount; i++)
|
||||||
if (pair->pcr.amplifias[i].sequence->isexample
|
if (pair->pcr.amplifias[i].sequence->isexample
|
||||||
&& pair->pcr.amplifias[i].sequence->ranktaxonid > 0 )
|
&& pair->pcr.amplifias[i].sequence->ranktaxonid > 0 )
|
||||||
|
{
|
||||||
incount = counttaxon(pair->pcr.amplifias[i].sequence->ranktaxonid);
|
incount = counttaxon(pair->pcr.amplifias[i].sequence->ranktaxonid);
|
||||||
|
|
||||||
|
for (j=0; j<seqdbsize; j++)
|
||||||
|
if (pair->pcr.amplifias[i].sequence == seqdb[j])
|
||||||
|
{pair->coveredSeqs[j] = 1; break;}
|
||||||
|
}
|
||||||
|
|
||||||
counttaxon(-1);
|
counttaxon(-1);
|
||||||
for (i=0; i < seqcount; i++)
|
for (i=0; i < seqcount; i++)
|
||||||
if (!pair->pcr.amplifias[i].sequence->isexample
|
if (!pair->pcr.amplifias[i].sequence->isexample
|
||||||
@ -145,12 +194,14 @@ static int cmpamp(const void *ampf1, const void* ampf2)
|
|||||||
{
|
{
|
||||||
incr = -1;
|
incr = -1;
|
||||||
j = pampf1->length - 1;
|
j = pampf1->length - 1;
|
||||||
|
|
||||||
if (pampf2->strand)
|
if (pampf2->strand)
|
||||||
{
|
{
|
||||||
pampf1 = (pamptotaxon_t) ampf2;
|
pampf1 = (pamptotaxon_t) ampf2;
|
||||||
pampf2 = (pamptotaxon_t) ampf1;
|
pampf2 = (pamptotaxon_t) ampf1;
|
||||||
chd = 1;
|
chd = 1;
|
||||||
}
|
}
|
||||||
|
//j = pampf2->length - 1; should have been here and pampf2 instead of pampf1?
|
||||||
}
|
}
|
||||||
|
|
||||||
len = (pampf1->length <= pampf2->length)? pampf1->length: pampf2->length;
|
len = (pampf1->length <= pampf2->length)? pampf1->length: pampf2->length;
|
||||||
@ -173,6 +224,7 @@ static int cmpamp(const void *ampf1, const void* ampf2)
|
|||||||
return 0;
|
return 0;
|
||||||
}*/
|
}*/
|
||||||
|
|
||||||
|
|
||||||
static int cmpamp(const void *ampf1, const void* ampf2)
|
static int cmpamp(const void *ampf1, const void* ampf2)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
@ -242,6 +294,8 @@ void taxonomyspecificity (ppair_t pair, pecodnadb_t seqdb,uint32_t seqdbsize)
|
|||||||
uint32_t i, j;
|
uint32_t i, j;
|
||||||
uint32_t ampfindex = 0;
|
uint32_t ampfindex = 0;
|
||||||
int32_t taxid;
|
int32_t taxid;
|
||||||
|
uint32_t wellidentifiedcount;
|
||||||
|
|
||||||
void *ampftree = NULL;
|
void *ampftree = NULL;
|
||||||
pamptotaxon_t pcurrentampf;
|
pamptotaxon_t pcurrentampf;
|
||||||
pamptotaxon_t *ptmp;
|
pamptotaxon_t *ptmp;
|
||||||
@ -278,11 +332,14 @@ void taxonomyspecificity (ppair_t pair, pecodnadb_t seqdb,uint32_t seqdbsize)
|
|||||||
}
|
}
|
||||||
|
|
||||||
memset (pair->wellIdentifiedSeqs, 0, seqdbsize*sizeof (int));
|
memset (pair->wellIdentifiedSeqs, 0, seqdbsize*sizeof (int));
|
||||||
counttaxon(-1);
|
//counttaxon(-1);
|
||||||
for (i = 0; i < ampfindex; i++)
|
for (i = 0; i < ampfindex; i++)
|
||||||
{
|
{
|
||||||
if (ampfwithtaxtree[i].taxoncount > 1)
|
if (ampfwithtaxtree[i].taxoncount > 1)
|
||||||
twalk(ampfwithtaxtree[i].taxontree, twalkaction);
|
{
|
||||||
|
//printf ("\nampfwithtaxtree[i].taxoncount: %d\n", ampfwithtaxtree[i].taxoncount);
|
||||||
|
//twalk(ampfwithtaxtree[i].taxontree, twalkaction);
|
||||||
|
}
|
||||||
//TR 5/9/10 - added code for well identified seqs
|
//TR 5/9/10 - added code for well identified seqs
|
||||||
else if(ampfwithtaxtree[i].taxoncount == 1) /*well identified*/
|
else if(ampfwithtaxtree[i].taxoncount == 1) /*well identified*/
|
||||||
{
|
{
|
||||||
@ -293,6 +350,7 @@ void taxonomyspecificity (ppair_t pair, pecodnadb_t seqdb,uint32_t seqdbsize)
|
|||||||
{
|
{
|
||||||
for (j = 0; j < seqdbsize; j++)
|
for (j = 0; j < seqdbsize; j++)
|
||||||
if (seqdb[j]->ranktaxonid == gtxid
|
if (seqdb[j]->ranktaxonid == gtxid
|
||||||
|
&& seqdb[j]->isexample
|
||||||
&&(pair->p1->directCount[j] > 0
|
&&(pair->p1->directCount[j] > 0
|
||||||
|| pair->p1->reverseCount[j] > 0)
|
|| pair->p1->reverseCount[j] > 0)
|
||||||
&& (pair->p2->directCount[j] > 0
|
&& (pair->p2->directCount[j] > 0
|
||||||
@ -303,9 +361,17 @@ void taxonomyspecificity (ppair_t pair, pecodnadb_t seqdb,uint32_t seqdbsize)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
//printf ("\n");
|
||||||
pair->notwellidentifiedtaxa = counttaxon(-2);
|
counttaxon(-1);
|
||||||
pair->bs = ((float)pair->intaxa - (float)pair->notwellidentifiedtaxa) / pair->intaxa;
|
wellidentifiedcount = 0;
|
||||||
|
for (j = 0; j < seqdbsize; j++)
|
||||||
|
if (pair->wellIdentifiedSeqs[j] == 1)
|
||||||
|
counttaxon(seqdb[j]->ranktaxonid);
|
||||||
|
wellidentifiedcount = counttaxon(-2);
|
||||||
|
//pair->notwellidentifiedtaxa = counttaxon(-2);
|
||||||
|
pair->notwellidentifiedtaxa = (pair->intaxa-wellidentifiedcount); //counttaxon(-2);
|
||||||
|
//pair->bs = ((float)pair->intaxa - (float)pair->notwellidentifiedtaxa) / pair->intaxa;
|
||||||
|
pair->bs = ((float)wellidentifiedcount) / (float)pair->intaxa;
|
||||||
|
|
||||||
ECOFREE (ampfwithtaxtree, "Free amplifia table");
|
ECOFREE (ampfwithtaxtree, "Free amplifia table");
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user