diff --git a/Debug/.gdb_history b/Debug/.gdb_history new file mode 100644 index 0000000..942574c --- /dev/null +++ b/Debug/.gdb_history @@ -0,0 +1,62 @@ +r -1acghtgd -2gctcagctagcta /Users/coissac/encours/ecoPCR/tools/ecodb +print put +print out +f ecoPCR +r -1GGGCAATCCTGAGCCAA -2CCATTGAGTCTCTGCACCTATC -l5 -L200 -e2 /Users/coissac/encours/ecoPCR/tools/ecodb +bt +print label1 +up +print label1 +print (char*)label1 +r -1GGGCAATCCTGAGCCAA -2CCATTGAGTCTCTGCACCTATC -l5 -L200 -e2 /Users/coissac/encours/ecoPCR/tools/ecodb +bt +up 3 +l +b 38 +r +r -1GGGCAATCCTGAGCCAA -2CCATTGAGTCTCTGCACCTATC -l5 -L200 -e2 /Users/coissac/encours/ecoPCR/tools/ecodb +print label +print ranks +print *ranks +print ranks->label +1 +print *ranks->label +1 +print *(ranks->label +1) +print *(ranks->label +2) +b ecotax.c:147 +r -1GGGCAATCCTGAGCCAA -2CCATTGAGTCTCTGCACCTATC -l5 -L200 -e2 /Users/coissac/encours/ecoPCR/tools/ecodb +print current_taxon ; +print current_taxon +n +print current_taxon ; +print current_taxon +print *current_taxon +print taxonomy->taxons[11] +print taxonomy->taxons[2952] +print taxonomy->taxons->taxon[2952] +print taxonomy->ranks->rank[11] +print taxonomy->ranks->label[11] +print taxonomy->ranks->label[3] +b ecotax.c:147 +r -1GGGCAATCCTGAGCCAA -2CCATTGAGTCTCTGCACCTATC -l5 -L200 -e2 /Users/coissac/encours/ecoPCR/tools/ecodb +print current_taxon +print *current_taxon +n +print *current_taxon +b ecotax.c:147 +r -1GGGCAATCCTGAGCCAA -2CCATTGAGTCTCTGCACCTATC -l5 -L200 -e2 /Users/coissac/encours/ecoPCR/tools/ecodb +n +print *current_taxon +r -1GGGCAATCCTGAGCCAA -2CCATTGAGTCTCTGCACCTATC -l5 -L200 -e2 /Users/coissac/encours/ecoPCR/tools/ecodb +n +print *current_taxon +b ecodna.c:70 +r -1GGGCAATCCTGAGCC#A#A# -2CCATTGAGTCTCTGCACCTA#T#C# -l5 -L200 -e2 /Users/coissac/encours/ecoPCR/tools/ecodb +*sb +print *sb +print sb +print str +b 52 +r -1GGGCAATCCTGAGCC#A#A# -2CCATTGAGTCTCTGCACCTA#T#C# -l5 -L200 -e2 /Users/coissac/encours/ecoPCR/tools/ecodb +print str +r -1GGGCAATCCTGAGCC#A#A# -2CCATTGAGTCTCTGCACCTA#T#C# -l5 -L200 -e2 /Users/coissac/encours/ecoPCR/tools/ecodb +bt diff --git a/Debug/makefile b/Debug/makefile new file mode 100644 index 0000000..2c69725 --- /dev/null +++ b/Debug/makefile @@ -0,0 +1,46 @@ +################################################################################ +# Automatically-generated file. Do not edit! +################################################################################ + +-include ../makefile.init + +RM := rm -rf + +# All of the sources participating in the build are defined here +-include sources.mk +-include subdir.mk +-include src/libecoPCR/subdir.mk +-include src/libapat/subdir.mk +-include src/subdir.mk +-include objects.mk + +ifneq ($(MAKECMDGOALS),clean) +ifneq ($(strip $(C_DEPS)),) +-include $(C_DEPS) +endif +endif + +-include ../makefile.defs + +# Add inputs and outputs from these tool invocations to the build variables + +# All Target +all: ecoPCR + +# Tool invocations +ecoPCR: $(OBJS) $(USER_OBJS) + @echo 'Building target: $@' + @echo 'Invoking: MacOS X C Linker' + gcc -o "ecoPCR" $(OBJS) $(USER_OBJS) $(LIBS) + @echo 'Finished building target: $@' + @echo ' ' + +# Other Targets +clean: + -$(RM) $(OBJS)$(EXECUTABLES)$(C_DEPS) ecoPCR + -@echo ' ' + +.PHONY: all clean dependents +.SECONDARY: + +-include ../makefile.targets diff --git a/Debug/objects.mk b/Debug/objects.mk new file mode 100644 index 0000000..899fd62 --- /dev/null +++ b/Debug/objects.mk @@ -0,0 +1,7 @@ +################################################################################ +# Automatically-generated file. Do not edit! +################################################################################ + +LIBS := -lz + +USER_OBJS := \ No newline at end of file diff --git a/Debug/sources.mk b/Debug/sources.mk new file mode 100644 index 0000000..eb1bf94 --- /dev/null +++ b/Debug/sources.mk @@ -0,0 +1,19 @@ +################################################################################ +# Automatically-generated file. Do not edit! +################################################################################ + +C_SRCS := +O_SRCS := +ASM_SRCS := +S_SRCS := +OBJ_SRCS := +OBJS := +EXECUTABLES := +C_DEPS := + +# Every subdirectory with source files must be described here +SUBDIRS := \ +src/libecoPCR \ +src/libapat \ +src \ + diff --git a/Debug/src/libapat/CODES/subdir.mk b/Debug/src/libapat/CODES/subdir.mk new file mode 100644 index 0000000..a769d35 --- /dev/null +++ b/Debug/src/libapat/CODES/subdir.mk @@ -0,0 +1,24 @@ +################################################################################ +# Automatically-generated file. Do not edit! +################################################################################ + +# Add inputs and outputs from these tool invocations to the build variables +C_SRCS += \ +../src/libapat/CODES/gendual.c + +OBJS += \ +./src/libapat/CODES/gendual.o + +C_DEPS += \ +./src/libapat/CODES/gendual.d + + +# Each subdirectory must supply rules for building sources it contributes +src/libapat/CODES/%.o: ../src/libapat/CODES/%.c + @echo 'Building file: $<' + @echo 'Invoking: GCC C Compiler' + gcc -DMAC_OS_X -O0 -g3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<" + @echo 'Finished building: $<' + @echo ' ' + + diff --git a/Debug/src/libapat/subdir.mk b/Debug/src/libapat/subdir.mk new file mode 100644 index 0000000..1ba3c9a --- /dev/null +++ b/Debug/src/libapat/subdir.mk @@ -0,0 +1,30 @@ +################################################################################ +# Automatically-generated file. Do not edit! +################################################################################ + +# Add inputs and outputs from these tool invocations to the build variables +C_SRCS += \ +../src/libapat/apat_parse.c \ +../src/libapat/apat_search.c \ +../src/libapat/libstki.c + +OBJS += \ +./src/libapat/apat_parse.o \ +./src/libapat/apat_search.o \ +./src/libapat/libstki.o + +C_DEPS += \ +./src/libapat/apat_parse.d \ +./src/libapat/apat_search.d \ +./src/libapat/libstki.d + + +# Each subdirectory must supply rules for building sources it contributes +src/libapat/%.o: ../src/libapat/%.c + @echo 'Building file: $<' + @echo 'Invoking: GCC C Compiler' + gcc -DMAC_OS_X -O0 -g3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<" + @echo 'Finished building: $<' + @echo ' ' + + diff --git a/Debug/src/libecoPCR/subdir.mk b/Debug/src/libecoPCR/subdir.mk new file mode 100644 index 0000000..35eef17 --- /dev/null +++ b/Debug/src/libecoPCR/subdir.mk @@ -0,0 +1,45 @@ +################################################################################ +# Automatically-generated file. Do not edit! +################################################################################ + +# Add inputs and outputs from these tool invocations to the build variables +C_SRCS += \ +../src/libecoPCR/ecoError.c \ +../src/libecoPCR/ecoIOUtils.c \ +../src/libecoPCR/ecoMalloc.c \ +../src/libecoPCR/ecoapat.c \ +../src/libecoPCR/ecodna.c \ +../src/libecoPCR/ecorank.c \ +../src/libecoPCR/ecoseq.c \ +../src/libecoPCR/ecotax.c + +OBJS += \ +./src/libecoPCR/ecoError.o \ +./src/libecoPCR/ecoIOUtils.o \ +./src/libecoPCR/ecoMalloc.o \ +./src/libecoPCR/ecoapat.o \ +./src/libecoPCR/ecodna.o \ +./src/libecoPCR/ecorank.o \ +./src/libecoPCR/ecoseq.o \ +./src/libecoPCR/ecotax.o + +C_DEPS += \ +./src/libecoPCR/ecoError.d \ +./src/libecoPCR/ecoIOUtils.d \ +./src/libecoPCR/ecoMalloc.d \ +./src/libecoPCR/ecoapat.d \ +./src/libecoPCR/ecodna.d \ +./src/libecoPCR/ecorank.d \ +./src/libecoPCR/ecoseq.d \ +./src/libecoPCR/ecotax.d + + +# Each subdirectory must supply rules for building sources it contributes +src/libecoPCR/%.o: ../src/libecoPCR/%.c + @echo 'Building file: $<' + @echo 'Invoking: GCC C Compiler' + gcc -DMAC_OS_X -O0 -g3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<" + @echo 'Finished building: $<' + @echo ' ' + + diff --git a/Debug/src/subdir.mk b/Debug/src/subdir.mk new file mode 100644 index 0000000..8a80215 --- /dev/null +++ b/Debug/src/subdir.mk @@ -0,0 +1,24 @@ +################################################################################ +# Automatically-generated file. Do not edit! +################################################################################ + +# Add inputs and outputs from these tool invocations to the build variables +C_SRCS += \ +../src/ecopcr.c + +OBJS += \ +./src/ecopcr.o + +C_DEPS += \ +./src/ecopcr.d + + +# Each subdirectory must supply rules for building sources it contributes +src/%.o: ../src/%.c + @echo 'Building file: $<' + @echo 'Invoking: GCC C Compiler' + gcc -DMAC_OS_X -O0 -g3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<" + @echo 'Finished building: $<' + @echo ' ' + + diff --git a/Release/makefile b/Release/makefile new file mode 100644 index 0000000..2c69725 --- /dev/null +++ b/Release/makefile @@ -0,0 +1,46 @@ +################################################################################ +# Automatically-generated file. Do not edit! +################################################################################ + +-include ../makefile.init + +RM := rm -rf + +# All of the sources participating in the build are defined here +-include sources.mk +-include subdir.mk +-include src/libecoPCR/subdir.mk +-include src/libapat/subdir.mk +-include src/subdir.mk +-include objects.mk + +ifneq ($(MAKECMDGOALS),clean) +ifneq ($(strip $(C_DEPS)),) +-include $(C_DEPS) +endif +endif + +-include ../makefile.defs + +# Add inputs and outputs from these tool invocations to the build variables + +# All Target +all: ecoPCR + +# Tool invocations +ecoPCR: $(OBJS) $(USER_OBJS) + @echo 'Building target: $@' + @echo 'Invoking: MacOS X C Linker' + gcc -o "ecoPCR" $(OBJS) $(USER_OBJS) $(LIBS) + @echo 'Finished building target: $@' + @echo ' ' + +# Other Targets +clean: + -$(RM) $(OBJS)$(EXECUTABLES)$(C_DEPS) ecoPCR + -@echo ' ' + +.PHONY: all clean dependents +.SECONDARY: + +-include ../makefile.targets diff --git a/Release/objects.mk b/Release/objects.mk new file mode 100644 index 0000000..899fd62 --- /dev/null +++ b/Release/objects.mk @@ -0,0 +1,7 @@ +################################################################################ +# Automatically-generated file. Do not edit! +################################################################################ + +LIBS := -lz + +USER_OBJS := \ No newline at end of file diff --git a/Release/sources.mk b/Release/sources.mk new file mode 100644 index 0000000..eb1bf94 --- /dev/null +++ b/Release/sources.mk @@ -0,0 +1,19 @@ +################################################################################ +# Automatically-generated file. Do not edit! +################################################################################ + +C_SRCS := +O_SRCS := +ASM_SRCS := +S_SRCS := +OBJ_SRCS := +OBJS := +EXECUTABLES := +C_DEPS := + +# Every subdirectory with source files must be described here +SUBDIRS := \ +src/libecoPCR \ +src/libapat \ +src \ + diff --git a/Release/src/libapat/subdir.mk b/Release/src/libapat/subdir.mk new file mode 100644 index 0000000..44cfe8f --- /dev/null +++ b/Release/src/libapat/subdir.mk @@ -0,0 +1,30 @@ +################################################################################ +# Automatically-generated file. Do not edit! +################################################################################ + +# Add inputs and outputs from these tool invocations to the build variables +C_SRCS += \ +../src/libapat/apat_parse.c \ +../src/libapat/apat_search.c \ +../src/libapat/libstki.c + +OBJS += \ +./src/libapat/apat_parse.o \ +./src/libapat/apat_search.o \ +./src/libapat/libstki.o + +C_DEPS += \ +./src/libapat/apat_parse.d \ +./src/libapat/apat_search.d \ +./src/libapat/libstki.d + + +# Each subdirectory must supply rules for building sources it contributes +src/libapat/%.o: ../src/libapat/%.c + @echo 'Building file: $<' + @echo 'Invoking: GCC C Compiler' + gcc -DMAC_OS_X -O3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<" + @echo 'Finished building: $<' + @echo ' ' + + diff --git a/Release/src/libecoPCR/subdir.mk b/Release/src/libecoPCR/subdir.mk new file mode 100644 index 0000000..59b89c3 --- /dev/null +++ b/Release/src/libecoPCR/subdir.mk @@ -0,0 +1,45 @@ +################################################################################ +# Automatically-generated file. Do not edit! +################################################################################ + +# Add inputs and outputs from these tool invocations to the build variables +C_SRCS += \ +../src/libecoPCR/ecoError.c \ +../src/libecoPCR/ecoIOUtils.c \ +../src/libecoPCR/ecoMalloc.c \ +../src/libecoPCR/ecoapat.c \ +../src/libecoPCR/ecodna.c \ +../src/libecoPCR/ecorank.c \ +../src/libecoPCR/ecoseq.c \ +../src/libecoPCR/ecotax.c + +OBJS += \ +./src/libecoPCR/ecoError.o \ +./src/libecoPCR/ecoIOUtils.o \ +./src/libecoPCR/ecoMalloc.o \ +./src/libecoPCR/ecoapat.o \ +./src/libecoPCR/ecodna.o \ +./src/libecoPCR/ecorank.o \ +./src/libecoPCR/ecoseq.o \ +./src/libecoPCR/ecotax.o + +C_DEPS += \ +./src/libecoPCR/ecoError.d \ +./src/libecoPCR/ecoIOUtils.d \ +./src/libecoPCR/ecoMalloc.d \ +./src/libecoPCR/ecoapat.d \ +./src/libecoPCR/ecodna.d \ +./src/libecoPCR/ecorank.d \ +./src/libecoPCR/ecoseq.d \ +./src/libecoPCR/ecotax.d + + +# Each subdirectory must supply rules for building sources it contributes +src/libecoPCR/%.o: ../src/libecoPCR/%.c + @echo 'Building file: $<' + @echo 'Invoking: GCC C Compiler' + gcc -DMAC_OS_X -O3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<" + @echo 'Finished building: $<' + @echo ' ' + + diff --git a/Release/src/subdir.mk b/Release/src/subdir.mk new file mode 100644 index 0000000..f317992 --- /dev/null +++ b/Release/src/subdir.mk @@ -0,0 +1,24 @@ +################################################################################ +# Automatically-generated file. Do not edit! +################################################################################ + +# Add inputs and outputs from these tool invocations to the build variables +C_SRCS += \ +../src/ecopcr.c + +OBJS += \ +./src/ecopcr.o + +C_DEPS += \ +./src/ecopcr.d + + +# Each subdirectory must supply rules for building sources it contributes +src/%.o: ../src/%.c + @echo 'Building file: $<' + @echo 'Invoking: GCC C Compiler' + gcc -DMAC_OS_X -O3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<" + @echo 'Finished building: $<' + @echo ' ' + + diff --git a/doxyfile b/doxyfile new file mode 100644 index 0000000..72047f4 --- /dev/null +++ b/doxyfile @@ -0,0 +1,1237 @@ +# Doxyfile 1.4.6 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project +# +# All text after a hash (#) is considered a comment and will be ignored +# The format is: +# TAG = value [value, ...] +# For lists items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (" ") + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +# The PROJECT_NAME tag is a single word (or a sequence of words surrounded +# by quotes) that should identify the project. + +PROJECT_NAME = ecoPCR + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. +# This could be handy for archiving the generated documentation or +# if some version control system is used. + +PROJECT_NUMBER = 0.1 + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) +# base path where the generated documentation will be put. +# If a relative path is entered, it will be relative to the location +# where doxygen was started. If left blank the current directory will be used. + +OUTPUT_DIRECTORY = doc/api + +# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create +# 4096 sub-directories (in 2 levels) under the output directory of each output +# format and will distribute the generated files over these directories. +# Enabling this option can be useful when feeding doxygen a huge amount of +# source files, where putting all generated files in the same directory would +# otherwise cause performance problems for the file system. + +CREATE_SUBDIRS = YES + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# The default language is English, other supported languages are: +# Brazilian, Catalan, Chinese, Chinese-Traditional, Croatian, Czech, Danish, +# Dutch, Finnish, French, German, Greek, Hungarian, Italian, Japanese, +# Japanese-en (Japanese with English messages), Korean, Korean-en, Norwegian, +# Polish, Portuguese, Romanian, Russian, Serbian, Slovak, Slovene, Spanish, +# Swedish, and Ukrainian. + +OUTPUT_LANGUAGE = English + +# This tag can be used to specify the encoding used in the generated output. +# The encoding is not always determined by the language that is chosen, +# but also whether or not the output is meant for Windows or non-Windows users. +# In case there is a difference, setting the USE_WINDOWS_ENCODING tag to YES +# forces the Windows encoding (this is the default for the Windows binary), +# whereas setting the tag to NO uses a Unix-style encoding (the default for +# all platforms other than Windows). + +USE_WINDOWS_ENCODING = NO + +# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will +# include brief member descriptions after the members that are listed in +# the file and class documentation (similar to JavaDoc). +# Set to NO to disable this. + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend +# the brief description of a member or function before the detailed description. +# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. + +REPEAT_BRIEF = YES + +# This tag implements a quasi-intelligent brief description abbreviator +# that is used to form the text in various listings. Each string +# in this list, if found as the leading text of the brief description, will be +# stripped from the text and the result after processing the whole list, is +# used as the annotated text. Otherwise, the brief description is used as-is. +# If left blank, the following values are used ("$name" is automatically +# replaced with the name of the entity): "The $name class" "The $name widget" +# "The $name file" "is" "provides" "specifies" "contains" +# "represents" "a" "an" "the" + +ABBREVIATE_BRIEF = + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# Doxygen will generate a detailed section even if there is only a brief +# description. + +ALWAYS_DETAILED_SEC = NO + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all +# inherited members of a class in the documentation of that class as if those +# members were ordinary class members. Constructors, destructors and assignment +# operators of the base classes will not be shown. + +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full +# path before files name in the file list and in the header files. If set +# to NO the shortest path that makes the file name unique will be used. + +FULL_PATH_NAMES = YES + +# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag +# can be used to strip a user-defined part of the path. Stripping is +# only done if one of the specified strings matches the left-hand part of +# the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the +# path to strip. + +STRIP_FROM_PATH = + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of +# the path mentioned in the documentation of a class, which tells +# the reader which header file to include in order to use a class. +# If left blank only the name of the header file containing the class +# definition is used. Otherwise one should specify the include paths that +# are normally passed to the compiler using the -I flag. + +STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter +# (but less readable) file names. This can be useful is your file systems +# doesn't support long names like on DOS, Mac, or CD-ROM. + +SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen +# will interpret the first line (until the first dot) of a JavaDoc-style +# comment as the brief description. If set to NO, the JavaDoc +# comments will behave just like the Qt-style comments (thus requiring an +# explicit @brief command for a brief description. + +JAVADOC_AUTOBRIEF = NO + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen +# treat a multi-line C++ special comment block (i.e. a block of //! or /// +# comments) as a brief description. This used to be the default behaviour. +# The new default is to treat a multi-line C++ comment block as a detailed +# description. Set this tag to YES if you prefer the old behaviour instead. + +MULTILINE_CPP_IS_BRIEF = NO + +# If the DETAILS_AT_TOP tag is set to YES then Doxygen +# will output the detailed description near the top, like JavaDoc. +# If set to NO, the detailed description appears after the member +# documentation. + +DETAILS_AT_TOP = NO + +# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented +# member inherits the documentation from any documented member that it +# re-implements. + +INHERIT_DOCS = YES + +# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce +# a new page for each member. If set to NO, the documentation of a member will +# be part of the file/class/namespace that contains it. + +SEPARATE_MEMBER_PAGES = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. +# Doxygen uses this value to replace tabs by spaces in code fragments. + +TAB_SIZE = 8 + +# This tag can be used to specify a number of aliases that acts +# as commands in the documentation. An alias has the form "name=value". +# For example adding "sideeffect=\par Side Effects:\n" will allow you to +# put the command \sideeffect (or @sideeffect) in the documentation, which +# will result in a user-defined paragraph with heading "Side Effects:". +# You can put \n's in the value part of an alias to insert newlines. + +ALIASES = + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C +# sources only. Doxygen will then generate output that is more tailored for C. +# For instance, some of the names that are used will be different. The list +# of all members will be omitted, etc. + +OPTIMIZE_OUTPUT_FOR_C = YES + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java +# sources only. Doxygen will then generate output that is more tailored for Java. +# For instance, namespaces will be presented as packages, qualified scopes +# will look different, etc. + +OPTIMIZE_OUTPUT_JAVA = NO + +# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want to +# include (a tag file for) the STL sources as input, then you should +# set this tag to YES in order to let doxygen match functions declarations and +# definitions whose arguments contain STL classes (e.g. func(std::string); v.s. +# func(std::string) {}). This also make the inheritance and collaboration +# diagrams that involve STL classes more complete and accurate. + +BUILTIN_STL_SUPPORT = NO + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES, then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. + +DISTRIBUTE_GROUP_DOC = NO + +# Set the SUBGROUPING tag to YES (the default) to allow class member groups of +# the same type (for instance a group of public functions) to be put as a +# subgroup of that type (e.g. under the Public Functions section). Set it to +# NO to prevent subgrouping. Alternatively, this can be done per class using +# the \nosubgrouping command. + +SUBGROUPING = YES + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in +# documentation are documented, even if no documentation was available. +# Private class members and static file members will be hidden unless +# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES + +EXTRACT_ALL = YES + +# If the EXTRACT_PRIVATE tag is set to YES all private members of a class +# will be included in the documentation. + +EXTRACT_PRIVATE = NO + +# If the EXTRACT_STATIC tag is set to YES all static members of a file +# will be included in the documentation. + +EXTRACT_STATIC = NO + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) +# defined locally in source files will be included in the documentation. +# If set to NO only classes defined in header files are included. + +EXTRACT_LOCAL_CLASSES = YES + +# This flag is only useful for Objective-C code. When set to YES local +# methods, which are defined in the implementation section but not in +# the interface are included in the documentation. +# If set to NO (the default) only methods in the interface are included. + +EXTRACT_LOCAL_METHODS = NO + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all +# undocumented members of documented classes, files or namespaces. +# If set to NO (the default) these members will be included in the +# various overviews, but no documentation section is generated. +# This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. +# If set to NO (the default) these classes will be included in the various +# overviews. This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_CLASSES = NO + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all +# friend (class|struct|union) declarations. +# If set to NO (the default) these declarations will be included in the +# documentation. + +HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any +# documentation blocks found inside the body of a function. +# If set to NO (the default) these blocks will be appended to the +# function's detailed documentation block. + +HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation +# that is typed after a \internal command is included. If the tag is set +# to NO (the default) then the documentation will be excluded. +# Set it to YES to include the internal documentation. + +INTERNAL_DOCS = NO + +# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate +# file names in lower-case letters. If set to YES upper-case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# and Mac users are advised to set this option to NO. + +CASE_SENSE_NAMES = NO + +# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen +# will show members with their full class and namespace scopes in the +# documentation. If set to YES the scope will be hidden. + +HIDE_SCOPE_NAMES = NO + +# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen +# will put a list of the files that are included by a file in the documentation +# of that file. + +SHOW_INCLUDE_FILES = YES + +# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] +# is inserted in the documentation for inline members. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen +# will sort the (detailed) documentation of file and class members +# alphabetically by member name. If set to NO the members will appear in +# declaration order. + +SORT_MEMBER_DOCS = YES + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the +# brief documentation of file, namespace and class members alphabetically +# by member name. If set to NO (the default) the members will appear in +# declaration order. + +SORT_BRIEF_DOCS = NO + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be +# sorted by fully-qualified names, including namespaces. If set to +# NO (the default), the class list will be sorted only by class name, +# not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the +# alphabetical list. + +SORT_BY_SCOPE_NAME = NO + +# The GENERATE_TODOLIST tag can be used to enable (YES) or +# disable (NO) the todo list. This list is created by putting \todo +# commands in the documentation. + +GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable (YES) or +# disable (NO) the test list. This list is created by putting \test +# commands in the documentation. + +GENERATE_TESTLIST = YES + +# The GENERATE_BUGLIST tag can be used to enable (YES) or +# disable (NO) the bug list. This list is created by putting \bug +# commands in the documentation. + +GENERATE_BUGLIST = YES + +# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or +# disable (NO) the deprecated list. This list is created by putting +# \deprecated commands in the documentation. + +GENERATE_DEPRECATEDLIST = YES + +# The ENABLED_SECTIONS tag can be used to enable conditional +# documentation sections, marked by \if sectionname ... \endif. + +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines +# the initial value of a variable or define consists of for it to appear in +# the documentation. If the initializer consists of more lines than specified +# here it will be hidden. Use a value of 0 to hide initializers completely. +# The appearance of the initializer of individual variables and defines in the +# documentation can be controlled using \showinitializer or \hideinitializer +# command in the documentation regardless of this setting. + +MAX_INITIALIZER_LINES = 30 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated +# at the bottom of the documentation of classes and structs. If set to YES the +# list will mention the files that were used to generate the documentation. + +SHOW_USED_FILES = YES + +# If the sources in your project are distributed over multiple directories +# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy +# in the documentation. The default is NO. + +SHOW_DIRECTORIES = NO + +# The FILE_VERSION_FILTER tag can be used to specify a program or script that +# doxygen should invoke to get the current version for each file (typically from the +# version control system). Doxygen will invoke the program by executing (via +# popen()) the command , where is the value of +# the FILE_VERSION_FILTER tag, and is the name of an input file +# provided by doxygen. Whatever the program writes to standard output +# is used as the file version. See the manual for examples. + +FILE_VERSION_FILTER = + +#--------------------------------------------------------------------------- +# configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated +# by doxygen. Possible values are YES and NO. If left blank NO is used. + +QUIET = NO + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated by doxygen. Possible values are YES and NO. If left blank +# NO is used. + +WARNINGS = YES + +# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings +# for undocumented members. If EXTRACT_ALL is set to YES then this flag will +# automatically be disabled. + +WARN_IF_UNDOCUMENTED = YES + +# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as not documenting some +# parameters in a documented function, or documenting parameters that +# don't exist or using markup commands wrongly. + +WARN_IF_DOC_ERROR = YES + +# This WARN_NO_PARAMDOC option can be abled to get warnings for +# functions that are documented, but have no documentation for their parameters +# or return value. If set to NO (the default) doxygen will only warn about +# wrong or incomplete parameter documentation, but not about the absence of +# documentation. + +WARN_NO_PARAMDOC = NO + +# The WARN_FORMAT tag determines the format of the warning messages that +# doxygen can produce. The string should contain the $file, $line, and $text +# tags, which will be replaced by the file and line number from which the +# warning originated and the warning text. Optionally the format may contain +# $version, which will be replaced by the version of the file (if it could +# be obtained via FILE_VERSION_FILTER) + +WARN_FORMAT = "$file:$line: $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning +# and error messages should be written. If left blank the output is written +# to stderr. + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag can be used to specify the files and/or directories that contain +# documented source files. You may enter file names like "myfile.cpp" or +# directories like "/usr/src/myproject". Separate the files or directories +# with spaces. + +INPUT = + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank the following patterns are tested: +# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx +# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py + +FILE_PATTERNS = + +# The RECURSIVE tag can be used to turn specify whether or not subdirectories +# should be searched for input files as well. Possible values are YES and NO. +# If left blank NO is used. + +RECURSIVE = NO + +# The EXCLUDE tag can be used to specify files and/or directories that should +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. + +EXCLUDE = + +# The EXCLUDE_SYMLINKS tag can be used select whether or not files or +# directories that are symbolic links (a Unix filesystem feature) are excluded +# from the input. + +EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. Note that the wildcards are matched +# against the file with absolute path, so to exclude all test directories +# for example use the pattern */test/* + +EXCLUDE_PATTERNS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or +# directories that contain example code fragments that are included (see +# the \include command). + +EXAMPLE_PATH = + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank all files are included. + +EXAMPLE_PATTERNS = + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude +# commands irrespective of the value of the RECURSIVE tag. +# Possible values are YES and NO. If left blank NO is used. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or +# directories that contain image that are included in the documentation (see +# the \image command). + +IMAGE_PATH = + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command , where +# is the value of the INPUT_FILTER tag, and is the name of an +# input file. Doxygen will then use the output that the filter program writes +# to standard output. If FILTER_PATTERNS is specified, this tag will be +# ignored. + +INPUT_FILTER = + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. The filters are a list of the form: +# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further +# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER +# is applied to all files. + +FILTER_PATTERNS = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER) will be used to filter the input files when producing source +# files to browse (i.e. when SOURCE_BROWSER is set to YES). + +FILTER_SOURCE_FILES = NO + +#--------------------------------------------------------------------------- +# configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will +# be generated. Documented entities will be cross-referenced with these sources. +# Note: To get rid of all source code in the generated output, make sure also +# VERBATIM_HEADERS is set to NO. + +SOURCE_BROWSER = NO + +# Setting the INLINE_SOURCES tag to YES will include the body +# of functions and classes directly in the documentation. + +INLINE_SOURCES = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct +# doxygen to hide any special comment blocks from generated source code +# fragments. Normal C and C++ comments will always remain visible. + +STRIP_CODE_COMMENTS = YES + +# If the REFERENCED_BY_RELATION tag is set to YES (the default) +# then for each documented function all documented +# functions referencing it will be listed. + +REFERENCED_BY_RELATION = YES + +# If the REFERENCES_RELATION tag is set to YES (the default) +# then for each documented function all documented entities +# called/used by that function will be listed. + +REFERENCES_RELATION = YES + +# If the USE_HTAGS tag is set to YES then the references to source code +# will point to the HTML generated by the htags(1) tool instead of doxygen +# built-in source browser. The htags tool is part of GNU's global source +# tagging system (see http://www.gnu.org/software/global/global.html). You +# will need version 4.8.6 or higher. + +USE_HTAGS = NO + +# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen +# will generate a verbatim copy of the header file for each class for +# which an include is specified. Set to NO to disable this. + +VERBATIM_HEADERS = YES + +#--------------------------------------------------------------------------- +# configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index +# of all compounds will be generated. Enable this if the project +# contains a lot of classes, structs, unions or interfaces. + +ALPHABETICAL_INDEX = NO + +# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then +# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns +# in which this list will be split (can be a number in the range [1..20]) + +COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all +# classes will be put under the same header in the alphabetical index. +# The IGNORE_PREFIX tag can be used to specify one or more prefixes that +# should be ignored while generating the index headers. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES (the default) Doxygen will +# generate HTML output. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `html' will be used as the default path. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for +# each generated HTML page (for example: .htm,.php,.asp). If it is left blank +# doxygen will generate files with .html extension. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a personal HTML header for +# each generated HTML page. If it is left blank doxygen will generate a +# standard header. + +HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a personal HTML footer for +# each generated HTML page. If it is left blank doxygen will generate a +# standard footer. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading +# style sheet that is used by each HTML page. It can be used to +# fine-tune the look of the HTML output. If the tag is left blank doxygen +# will generate a default style sheet. Note that doxygen will try to copy +# the style sheet file to the HTML output directory, so don't put your own +# stylesheet in the HTML output directory as well, or it will be erased! + +HTML_STYLESHEET = + +# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, +# files or namespaces will be aligned in HTML using tables. If set to +# NO a bullet list will be used. + +HTML_ALIGN_MEMBERS = YES + +# If the GENERATE_HTMLHELP tag is set to YES, additional index files +# will be generated that can be used as input for tools like the +# Microsoft HTML help workshop to generate a compressed HTML help file (.chm) +# of the generated HTML documentation. + +GENERATE_HTMLHELP = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can +# be used to specify the file name of the resulting .chm file. You +# can add a path in front of the file if the result should not be +# written to the html output directory. + +CHM_FILE = + +# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can +# be used to specify the location (absolute path including file name) of +# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run +# the HTML help compiler on the generated index.hhp. + +HHC_LOCATION = + +# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag +# controls if a separate .chi index file is generated (YES) or that +# it should be included in the master .chm file (NO). + +GENERATE_CHI = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag +# controls whether a binary table of contents is generated (YES) or a +# normal table of contents (NO) in the .chm file. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members +# to the contents of the HTML help documentation and to the tree view. + +TOC_EXPAND = NO + +# The DISABLE_INDEX tag can be used to turn on/off the condensed index at +# top of each HTML page. The value NO (the default) enables the index and +# the value YES disables it. + +DISABLE_INDEX = NO + +# This tag can be used to set the number of enum values (range [1..20]) +# that doxygen will group on one line in the generated HTML documentation. + +ENUM_VALUES_PER_LINE = 4 + +# If the GENERATE_TREEVIEW tag is set to YES, a side panel will be +# generated containing a tree-like index structure (just like the one that +# is generated for HTML Help). For this to work a browser that supports +# JavaScript, DHTML, CSS and frames is required (for instance Mozilla 1.0+, +# Netscape 6.0+, Internet explorer 5.0+, or Konqueror). Windows users are +# probably better off using the HTML help feature. + +GENERATE_TREEVIEW = NO + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be +# used to set the initial width (in pixels) of the frame in which the tree +# is shown. + +TREEVIEW_WIDTH = 250 + +#--------------------------------------------------------------------------- +# configuration options related to the LaTeX output +#--------------------------------------------------------------------------- + +# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will +# generate Latex output. + +GENERATE_LATEX = YES + +# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `latex' will be used as the default path. + +LATEX_OUTPUT = latex + +# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be +# invoked. If left blank `latex' will be used as the default command name. + +LATEX_CMD_NAME = latex + +# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to +# generate index for LaTeX. If left blank `makeindex' will be used as the +# default command name. + +MAKEINDEX_CMD_NAME = makeindex + +# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact +# LaTeX documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_LATEX = NO + +# The PAPER_TYPE tag can be used to set the paper type that is used +# by the printer. Possible values are: a4, a4wide, letter, legal and +# executive. If left blank a4wide will be used. + +PAPER_TYPE = a4wide + +# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX +# packages that should be included in the LaTeX output. + +EXTRA_PACKAGES = + +# The LATEX_HEADER tag can be used to specify a personal LaTeX header for +# the generated latex document. The header should contain everything until +# the first chapter. If it is left blank doxygen will generate a +# standard header. Notice: only use this tag if you know what you are doing! + +LATEX_HEADER = + +# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated +# is prepared for conversion to pdf (using ps2pdf). The pdf file will +# contain links (just like the HTML output) instead of page references +# This makes the output suitable for online browsing using a pdf viewer. + +PDF_HYPERLINKS = NO + +# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of +# plain latex in the generated Makefile. Set this option to YES to get a +# higher quality PDF documentation. + +USE_PDFLATEX = NO + +# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. +# command to the generated LaTeX files. This will instruct LaTeX to keep +# running if errors occur, instead of asking the user for help. +# This option is also used when generating formulas in HTML. + +LATEX_BATCHMODE = NO + +# If LATEX_HIDE_INDICES is set to YES then doxygen will not +# include the index chapters (such as File Index, Compound Index, etc.) +# in the output. + +LATEX_HIDE_INDICES = NO + +#--------------------------------------------------------------------------- +# configuration options related to the RTF output +#--------------------------------------------------------------------------- + +# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output +# The RTF output is optimized for Word 97 and may not look very pretty with +# other RTF readers or editors. + +GENERATE_RTF = NO + +# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `rtf' will be used as the default path. + +RTF_OUTPUT = rtf + +# If the COMPACT_RTF tag is set to YES Doxygen generates more compact +# RTF documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_RTF = NO + +# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated +# will contain hyperlink fields. The RTF file will +# contain links (just like the HTML output) instead of page references. +# This makes the output suitable for online browsing using WORD or other +# programs which support those fields. +# Note: wordpad (write) and others do not support links. + +RTF_HYPERLINKS = NO + +# Load stylesheet definitions from file. Syntax is similar to doxygen's +# config file, i.e. a series of assignments. You only have to provide +# replacements, missing definitions are set to their default value. + +RTF_STYLESHEET_FILE = + +# Set optional variables used in the generation of an rtf document. +# Syntax is similar to doxygen's config file. + +RTF_EXTENSIONS_FILE = + +#--------------------------------------------------------------------------- +# configuration options related to the man page output +#--------------------------------------------------------------------------- + +# If the GENERATE_MAN tag is set to YES (the default) Doxygen will +# generate man pages + +GENERATE_MAN = NO + +# The MAN_OUTPUT tag is used to specify where the man pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `man' will be used as the default path. + +MAN_OUTPUT = man + +# The MAN_EXTENSION tag determines the extension that is added to +# the generated man pages (default is the subroutine's section .3) + +MAN_EXTENSION = .3 + +# If the MAN_LINKS tag is set to YES and Doxygen generates man output, +# then it will generate one additional man file for each entity +# documented in the real man page(s). These additional files +# only source the real man page, but without them the man command +# would be unable to find the correct page. The default is NO. + +MAN_LINKS = NO + +#--------------------------------------------------------------------------- +# configuration options related to the XML output +#--------------------------------------------------------------------------- + +# If the GENERATE_XML tag is set to YES Doxygen will +# generate an XML file that captures the structure of +# the code including all documentation. + +GENERATE_XML = NO + +# The XML_OUTPUT tag is used to specify where the XML pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `xml' will be used as the default path. + +XML_OUTPUT = xml + +# The XML_SCHEMA tag can be used to specify an XML schema, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_SCHEMA = + +# The XML_DTD tag can be used to specify an XML DTD, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_DTD = + +# If the XML_PROGRAMLISTING tag is set to YES Doxygen will +# dump the program listings (including syntax highlighting +# and cross-referencing information) to the XML output. Note that +# enabling this will significantly increase the size of the XML output. + +XML_PROGRAMLISTING = YES + +#--------------------------------------------------------------------------- +# configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- + +# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will +# generate an AutoGen Definitions (see autogen.sf.net) file +# that captures the structure of the code including all +# documentation. Note that this feature is still experimental +# and incomplete at the moment. + +GENERATE_AUTOGEN_DEF = NO + +#--------------------------------------------------------------------------- +# configuration options related to the Perl module output +#--------------------------------------------------------------------------- + +# If the GENERATE_PERLMOD tag is set to YES Doxygen will +# generate a Perl module file that captures the structure of +# the code including all documentation. Note that this +# feature is still experimental and incomplete at the +# moment. + +GENERATE_PERLMOD = NO + +# If the PERLMOD_LATEX tag is set to YES Doxygen will generate +# the necessary Makefile rules, Perl scripts and LaTeX code to be able +# to generate PDF and DVI output from the Perl module output. + +PERLMOD_LATEX = NO + +# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be +# nicely formatted so it can be parsed by a human reader. This is useful +# if you want to understand what is going on. On the other hand, if this +# tag is set to NO the size of the Perl module output will be much smaller +# and Perl will parse it just the same. + +PERLMOD_PRETTY = YES + +# The names of the make variables in the generated doxyrules.make file +# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. +# This is useful so different doxyrules.make files included by the same +# Makefile don't overwrite each other's variables. + +PERLMOD_MAKEVAR_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- + +# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will +# evaluate all C-preprocessor directives found in the sources and include +# files. + +ENABLE_PREPROCESSING = YES + +# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro +# names in the source code. If set to NO (the default) only conditional +# compilation will be performed. Macro expansion can be done in a controlled +# way by setting EXPAND_ONLY_PREDEF to YES. + +MACRO_EXPANSION = NO + +# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES +# then the macro expansion is limited to the macros specified with the +# PREDEFINED and EXPAND_AS_DEFINED tags. + +EXPAND_ONLY_PREDEF = NO + +# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files +# in the INCLUDE_PATH (see below) will be search if a #include is found. + +SEARCH_INCLUDES = YES + +# The INCLUDE_PATH tag can be used to specify one or more directories that +# contain include files that are not input files but should be processed by +# the preprocessor. + +INCLUDE_PATH = + +# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard +# patterns (like *.h and *.hpp) to filter out the header-files in the +# directories. If left blank, the patterns specified with FILE_PATTERNS will +# be used. + +INCLUDE_FILE_PATTERNS = + +# The PREDEFINED tag can be used to specify one or more macro names that +# are defined before the preprocessor is started (similar to the -D option of +# gcc). The argument of the tag is a list of macros of the form: name +# or name=definition (no spaces). If the definition and the = are +# omitted =1 is assumed. To prevent a macro definition from being +# undefined via #undef or recursively expanded use the := operator +# instead of the = operator. + +PREDEFINED = + +# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then +# this tag can be used to specify a list of macro names that should be expanded. +# The macro definition that is found in the sources will be used. +# Use the PREDEFINED tag if you want to use a different macro definition. + +EXPAND_AS_DEFINED = + +# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then +# doxygen's preprocessor will remove all function-like macros that are alone +# on a line, have an all uppercase name, and do not end with a semicolon. Such +# function macros are typically used for boiler-plate code, and will confuse +# the parser if not removed. + +SKIP_FUNCTION_MACROS = YES + +#--------------------------------------------------------------------------- +# Configuration::additions related to external references +#--------------------------------------------------------------------------- + +# The TAGFILES option can be used to specify one or more tagfiles. +# Optionally an initial location of the external documentation +# can be added for each tagfile. The format of a tag file without +# this location is as follows: +# TAGFILES = file1 file2 ... +# Adding location for the tag files is done as follows: +# TAGFILES = file1=loc1 "file2 = loc2" ... +# where "loc1" and "loc2" can be relative or absolute paths or +# URLs. If a location is present for each tag, the installdox tool +# does not have to be run to correct the links. +# Note that each tag file must have a unique name +# (where the name does NOT include the path) +# If a tag file is not located in the directory in which doxygen +# is run, you must also specify the path to the tagfile here. + +TAGFILES = + +# When a file name is specified after GENERATE_TAGFILE, doxygen will create +# a tag file that is based on the input files it reads. + +GENERATE_TAGFILE = + +# If the ALLEXTERNALS tag is set to YES all external classes will be listed +# in the class index. If set to NO only the inherited external classes +# will be listed. + +ALLEXTERNALS = NO + +# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed +# in the modules index. If set to NO, only the current project's groups will +# be listed. + +EXTERNAL_GROUPS = YES + +# The PERL_PATH should be the absolute path and name of the perl script +# interpreter (i.e. the result of `which perl'). + +PERL_PATH = /usr/bin/perl + +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- + +# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will +# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base +# or super classes. Setting the tag to NO turns the diagrams off. Note that +# this option is superseded by the HAVE_DOT option below. This is only a +# fallback. It is recommended to install and use dot, since it yields more +# powerful graphs. + +CLASS_DIAGRAMS = YES + +# If set to YES, the inheritance and collaboration graphs will hide +# inheritance and usage relations if the target is undocumented +# or is not a class. + +HIDE_UNDOC_RELATIONS = YES + +# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is +# available from the path. This tool is part of Graphviz, a graph visualization +# toolkit from AT&T and Lucent Bell Labs. The other options in this section +# have no effect if this option is set to NO (the default) + +HAVE_DOT = NO + +# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect inheritance relations. Setting this tag to YES will force the +# the CLASS_DIAGRAMS tag to NO. + +CLASS_GRAPH = YES + +# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect implementation dependencies (inheritance, containment, and +# class references variables) of the class with other documented classes. + +COLLABORATION_GRAPH = YES + +# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for groups, showing the direct groups dependencies + +GROUP_GRAPHS = YES + +# If the UML_LOOK tag is set to YES doxygen will generate inheritance and +# collaboration diagrams in a style similar to the OMG's Unified Modeling +# Language. + +UML_LOOK = NO + +# If set to YES, the inheritance and collaboration graphs will show the +# relations between templates and their instances. + +TEMPLATE_RELATIONS = NO + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT +# tags are set to YES then doxygen will generate a graph for each documented +# file showing the direct and indirect include dependencies of the file with +# other documented files. + +INCLUDE_GRAPH = YES + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and +# HAVE_DOT tags are set to YES then doxygen will generate a graph for each +# documented header file showing the documented files that directly or +# indirectly include this file. + +INCLUDED_BY_GRAPH = YES + +# If the CALL_GRAPH and HAVE_DOT tags are set to YES then doxygen will +# generate a call dependency graph for every global function or class method. +# Note that enabling this option will significantly increase the time of a run. +# So in most cases it will be better to enable call graphs for selected +# functions only using the \callgraph command. + +CALL_GRAPH = NO + +# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen +# will graphical hierarchy of all classes instead of a textual one. + +GRAPHICAL_HIERARCHY = YES + +# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES +# then doxygen will show the dependencies a directory has on other directories +# in a graphical way. The dependency relations are determined by the #include +# relations between the files in the directories. + +DIRECTORY_GRAPH = YES + +# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images +# generated by dot. Possible values are png, jpg, or gif +# If left blank png will be used. + +DOT_IMAGE_FORMAT = png + +# The tag DOT_PATH can be used to specify the path where the dot tool can be +# found. If left blank, it is assumed the dot tool can be found in the path. + +DOT_PATH = + +# The DOTFILE_DIRS tag can be used to specify one or more directories that +# contain dot files that are included in the documentation (see the +# \dotfile command). + +DOTFILE_DIRS = + +# The MAX_DOT_GRAPH_WIDTH tag can be used to set the maximum allowed width +# (in pixels) of the graphs generated by dot. If a graph becomes larger than +# this value, doxygen will try to truncate the graph, so that it fits within +# the specified constraint. Beware that most browsers cannot cope with very +# large images. + +MAX_DOT_GRAPH_WIDTH = 1024 + +# The MAX_DOT_GRAPH_HEIGHT tag can be used to set the maximum allows height +# (in pixels) of the graphs generated by dot. If a graph becomes larger than +# this value, doxygen will try to truncate the graph, so that it fits within +# the specified constraint. Beware that most browsers cannot cope with very +# large images. + +MAX_DOT_GRAPH_HEIGHT = 1024 + +# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the +# graphs generated by dot. A depth value of 3 means that only nodes reachable +# from the root by following a path via at most 3 edges will be shown. Nodes +# that lay further from the root node will be omitted. Note that setting this +# option to 1 or 2 may greatly reduce the computation time needed for large +# code bases. Also note that a graph may be further truncated if the graph's +# image dimensions are not sufficient to fit the graph (see MAX_DOT_GRAPH_WIDTH +# and MAX_DOT_GRAPH_HEIGHT). If 0 is used for the depth value (the default), +# the graph is not depth-constrained. + +MAX_DOT_GRAPH_DEPTH = 0 + +# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent +# background. This is disabled by default, which results in a white background. +# Warning: Depending on the platform used, enabling this option may lead to +# badly anti-aliased labels on the edges of a graph (i.e. they become hard to +# read). + +DOT_TRANSPARENT = NO + +# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output +# files in one run (i.e. multiple -o and -T options on the command line). This +# makes dot run faster, but since only newer versions of dot (>1.8.10) +# support this, this feature is disabled by default. + +DOT_MULTI_TARGETS = NO + +# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will +# generate a legend page explaining the meaning of the various boxes and +# arrows in the dot generated graphs. + +GENERATE_LEGEND = YES + +# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will +# remove the intermediate dot files that are used to generate +# the various graphs. + +DOT_CLEANUP = YES + +#--------------------------------------------------------------------------- +# Configuration::additions related to the search engine +#--------------------------------------------------------------------------- + +# The SEARCHENGINE tag specifies whether or not a search engine should be +# used. If set to NO the values of all tags below this one will be ignored. + +SEARCHENGINE = NO diff --git a/src/ecopcr.c b/src/ecopcr.c new file mode 100644 index 0000000..f0dba6d --- /dev/null +++ b/src/ecopcr.c @@ -0,0 +1,512 @@ +#include "libecoPCR/ecoPCR.h" +#include +#include +#include +#include + + +#define VERSION "0.1" + +/* ----------------------------------------------- */ +/* printout help */ /* ----------------------------------------------- */ + +#define PP fprintf(stdout, + +static void PrintHelp() +{ + PP "------------------------------------------\n"); + PP " Apat Version %s\n", VERSION); + PP "------------------------------------------\n"); + PP "synopsis : pattern(s) searching program\n"); + PP "usage: apat [options] patfile datafile\n"); + PP "------------------------------------------\n"); + PP "options:\n"); + PP "-a code : [A]lphabet encoding for pattern\n"); + PP " code is one of : \n"); + PP " dna: use IUPAC equivalences for dna/rna\n"); + PP " prot: use IUPAC equivalences for proteins\n"); + PP " alpha: no equivalences, just treat plain symbols\n"); + PP " note: the equivalences are used in pattern only\n"); + PP " *not* in sequence(s) (see note (4) below)\n"); + PP " dft: alpha\n"); + PP "-c : [C]ooccurences\n"); + PP " print patterns cooccurence matrix \n"); + PP " dft: off\n"); + PP "-h : [H]elp - print help\n"); + PP "-m : [M]ultiple occurences\n"); + PP " see -q option \n"); + PP " dft: off\n"); + PP "-o file : [O]utput sequences\n"); + PP " additionaly output sequence(s) that match into\n"); + PP " 'file' in fasta format\n"); + PP " dft: off\n"); + PP "-p : no [Print] - don't printout hits\n"); + PP " when just counts are needed\n"); + PP " dft: off\n"); + PP "-q nn : [Quorum]\n"); + PP " printout result if at least nn\n"); + PP " different patterns are found on the sequence\n"); + PP " (with -m : at least nn different )\n"); + PP " dft: # of patterns read\n"); + PP "-s : no [Sort] - don't sort hits before printing\n"); + PP " usually hits are printed by increasing position\n"); + PP " this option will list them by pattern\n"); + PP " dft: off\n"); + PP "-t : [T]est sequence\n"); + PP " additionnaly check if sequences are uppercase\n"); + PP " this is mostly used for testing\n"); + PP " dft: off\n"); + PP "-u : [U]pper\n"); + PP " force lower->upper sequence conversion\n"); + PP " without this option lowercase symbols in sequence\n"); + PP " will not be considered to as matches\n"); + PP " dft: off\n"); + PP "-v : [V]erbose\n"); + PP " just display a kind of progress clock on stderr\n"); + PP " (this is only useful if you redirect stdout)\n"); + PP "\n"); + PP "patfile : pattern file (see below)\n"); + PP "datafile : database file (see below)\n"); + PP "------------------------------------------\n"); + PP "pattern file format :\n"); + PP " one pattern/line\n"); + PP " format : #errors\n"); + PP " := pattern\n"); + PP " or !pattern\n"); + PP " or pattern#\n"); + PP " or !pattern#\n"); + PP " := \n"); + PP " or [....]\n"); + PP " := uppercase letter (A-Z)\n"); + PP " := a positive number indicates max number of mismatches\n"); + PP " a negative number indicates max number of mismatches or indels\n"); + PP " # means that no error is allowed at this position\n"); + PP " ! complement the \n"); + PP " [...] means that all symbols within [] are allowed\n"); + PP " in addition IUPAC equivalences may be used as symbols\n"); + PP " with the -a option\n"); + PP "\n"); + PP "example: G[DE]S#[GIV]!HP![DE]# 1\n"); + PP "\n"); + PP "------------------------------------------\n"); + PP "datafile contains one or more sequences in\n"); + PP "Fasta format, with *uppercase* symbols \n"); + PP "\n"); + PP "------------------------------------------\n"); + PP "note (1): the maximum number of patterns is %d\n", MAX_PATTERN); + PP "\n"); + PP "note (2): the maximum length for one pattern is %d\n", MAX_PAT_LEN); + PP "\n"); + PP "note (3): indels are still experimental and are :\n"); + PP " not handled gracefully with the # syntax\n"); + PP " and hits are not printed very nicely\n"); + PP "\n"); + PP "note (4): the IUPAC equivalences (-a option) are used\n"); + PP " in pattern only *not* in sequence(s).\n"); + PP " for instance GATN (with option -a dna) is equivalent to GAT[ACGT]\n"); + PP " and will match GATA/GATC/GATG/GATC but will not match GATN\n"); + PP " (nor NNNN) in sequence.\n"); + PP "\n"); + +} + +#undef PP + +/* ----------------------------------------------- */ +/* printout usage and exit */ +/* ----------------------------------------------- */ + +#define PP fprintf(stderr, + +static void ExitUsage(stat) + int stat; +{ + PP "usage: apat [-a dna|prot] [-c] [-h] [-m] [-o file] [-p]\n"); + PP " [-q nn] [-t] [-u] [-v]\n"); + PP " patfile datafile\n"); + PP "type \"apat -h\" for help\n"); + + if (stat) + exit(stat); +} + +#undef PP + +void printRepeat(ecoseq_t *seq, + PatternPtr o1, PatternPtr o2, + char strand, + int32_t pos1, int32_t pos2, + int32_t err1, int32_t err2, + ecotaxonomy_t *taxonomy) +{ + char *AC; + int32_t seqlength; + int32_t taxid; + int32_t species_taxid; + int32_t genus_taxid; + int32_t family_taxid; + int32_t superkingdom_taxid; + char *rank; + char *scientificName; + char *genus_name; + char *family_name; + char *superkingdom_name; + + ecotx_t *taxon; + ecotx_t *main_taxon; + + char oligo1[MAX_PAT_LEN+1]; + char oligo2[MAX_PAT_LEN+1]; + + int32_t error1; + int32_t error2; + + char *amplifia = NULL; + int32_t amplength; + + AC = seq->AC; + seqlength = seq->SQ_length; + + main_taxon = &taxonomy->taxons->taxon[seq->taxid]; + taxid = main_taxon->taxid; + scientificName= main_taxon->name; + rank = taxonomy->ranks->label[main_taxon->rank]; + taxon = eco_getspecies(main_taxon,taxonomy); + if (taxon) + { + species_taxid = taxon->taxid; + scientificName= taxon->name; + } + else + species_taxid = -1; + + taxon = eco_getgenus((taxon) ? taxon:main_taxon,taxonomy); + if (taxon) + { + genus_taxid = taxon->taxid; + genus_name= taxon->name; + } + else + { + genus_taxid = -1; + genus_name = "###"; + } + + taxon = eco_getfamily((taxon) ? taxon:main_taxon,taxonomy); + if (taxon) + { + family_taxid = taxon->taxid; + family_name= taxon->name; + } + else + { + family_taxid = -1; + family_name = "###"; + } + + taxon = eco_getsuperkingdom((taxon) ? taxon:main_taxon,taxonomy); + if (taxon) + { + superkingdom_taxid = taxon->taxid; + superkingdom_name= taxon->name; + } + else + { + superkingdom_taxid = -1; + superkingdom_name = "###"; + } + + amplength = pos2-pos1; + + amplifia = getSubSequence(seq->SQ,pos1,pos2); + + + if (strand=='R') + { + ecoComplementSequence(amplifia); + strncpy(oligo1,amplifia,o2->patlen); + oligo1[o2->patlen]=0; + error1=err2; + + strncpy(oligo2,amplifia + amplength - o1->patlen,o1->patlen); + oligo2[o1->patlen]=0; + error2=err1; + + amplifia+=o2->patlen; + } + else + { + strncpy(oligo1,amplifia,o1->patlen); + oligo1[o1->patlen]=0; + error1=err1; + + strncpy(oligo2,amplifia + amplength - o2->patlen,o2->patlen); + oligo2[o2->patlen]=0; + error2=err2; + + amplifia+=o1->patlen; + + } + + + + ecoComplementSequence(oligo2); + amplifia[amplength - o2->patlen - o1->patlen]=0; + + printf("%-15s | %9d | %8d | %-20s | %8d | %-30s | %8d | %-30s | %8d | %-30s | %8d | %-30s | %c | %-32s | %2d | %-32s | %2d | %5d | %s | %s\n", + AC, + seqlength, + taxid, + rank, + species_taxid, + scientificName, + genus_taxid, + genus_name, + family_taxid, + family_name, + superkingdom_taxid, + superkingdom_name, + strand, + oligo1, + error1, + oligo2, + error2, + amplength, + amplifia, + seq->DE + ); +} + +int main(int argc, char **argv) +{ + ecoseq_t *seq; + ecotaxonomy_t *taxonomy; + char *scname; + char head[11]; + char tail[11]; + + int carg; + + char *oligo1=NULL; + char *oligo2=NULL; + + PatternPtr o1; + PatternPtr o2; + PatternPtr o1c; + PatternPtr o2c; + + int32_t lmin=0; + int32_t lmax=0; + int32_t error_max=0; + int32_t errflag=0; + + char *prefix; + + int32_t checkedSequence = 0; + int32_t positiveSequence= 0; + int32_t amplifiatCount = 0; + + int32_t o1Hits; + int32_t o2Hits; + int32_t o1cHits; + int32_t o2cHits; + + int32_t begin; + int32_t length; + + SeqPtr apatseq=NULL; + StackiPtr stktmp; + + int32_t i; + int32_t j; + int32_t posi; + int32_t posj; + int32_t erri; + int32_t errj; + + + while ((carg = getopt(argc, argv, "h1:2:l:L:e:")) != -1) { + + switch (carg) { + /* -------------------- */ + case '1': /* prenier oligo */ + /* -------------------- */ + oligo1 = ECOMALLOC(strlen(optarg)+1, + "Error on oligo 1 allocation"); + strcpy(oligo1,optarg); + break; + + /* -------------------- */ + case '2': /* coocurence option */ + /* -------------------- */ + oligo2 = ECOMALLOC(strlen(optarg)+1, + "Error on oligo 1 allocation"); + strcpy(oligo2,optarg); + break; + + /* -------------------- */ + case 'h': /* help */ + /* -------------------- */ + + PrintHelp(); + exit(0); + break; + + /* -------------------- */ + case 'l': /* lmin amplification */ + /* -------------------- */ + sscanf(optarg,"%d",&lmin); + break; + + /* -------------------- */ + case 'L': /* lmax amplification */ + /* -------------------- */ + sscanf(optarg,"%d",&lmax); + break; + + /* -------------------- */ + case 'e': /* error max */ + /* -------------------- */ + sscanf(optarg,"%d",&error_max); + break; + + /* -------------------- */ + case '?': /* bad option */ + /* -------------------- */ + errflag++; + } + + } + + if ((argc -= optind) != 1) + errflag++; + + if (!oligo1 || !oligo2) + errflag++; + + if (errflag) + ExitUsage(errflag); + + prefix = argv[optind]; + + o1 = buildPattern(oligo1,error_max); + o2 = buildPattern(oligo2,error_max); + + o1c = complementPattern(o1); + o2c = complementPattern(o2); + + + + printf("#\n"); + printf("# ecoPCR version %s\n",VERSION); + printf("# direct strand oligo1 : %-32s ; oligo2c : %32s\n", o1->cpat,o2c->cpat); + printf("# reverse strand oligo2 : %-32s ; oligo1c : %32s\n", o2->cpat,o1c->cpat); + printf("# max error count by oligonucleotide : %d\n",error_max); + printf("# database : %s\n",prefix); + if (lmin && lmax) + printf("# amplifiat length between [%d,%d] bp\n",lmin,lmax); + else if (lmin) + printf("# amplifiat length larger than %d bp\n",lmin); + else if (lmax) + printf("# amplifiat length smaller than %d bp\n",lmax); + printf("#\n"); + + taxonomy = read_taxonomy(prefix); + + seq = ecoseq_iterator(prefix); + + + checkedSequence = 0; + positiveSequence= 0; + amplifiatCount = 0; + + while(seq) + { + checkedSequence++; + + scname = taxonomy->taxons->taxon[seq->taxid].name; + strncpy(head,seq->SQ,10); + head[10]=0; + strncpy(tail,seq->SQ+seq->SQ_length-10,10); + tail[10]=0; + + + apatseq=ecoseq2apatseq(seq,apatseq); + + o1Hits = ManberAll(apatseq,o1,0,0,apatseq->seqlen); + o2cHits= 0; + + + if (o1Hits) + { + stktmp = apatseq->hitpos[0]; + begin = stktmp->val[0] + o1->patlen; + + if (lmax) + length= stktmp->val[stktmp->top-1] + o1->patlen - begin + lmax + o2->patlen; + else + length= apatseq->seqlen - begin; + + o2cHits = ManberAll(apatseq,o2c,1,begin,length); + + if (o2cHits) + for (i=0; i < o1Hits;i++) + { + posi = apatseq->hitpos[0]->val[i]; + erri = apatseq->hiterr[0]->val[i]; + for (j=0; j < o2cHits; j++) + { + posj =apatseq->hitpos[1]->val[j] + o2c->patlen; + errj =apatseq->hiterr[1]->val[j]; + length=posj - posi + 1 - o1->patlen - o2->patlen; + + if ((!lmin || (length >= lmin)) && + (!lmax || (length <= lmax))) + printRepeat(seq,o1,o2c,'D',posi,posj,erri,errj,taxonomy); + //printf("%s\tD\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o1Hits,o2cHits,posi,posj,scname); + } + } + } + + o2Hits = ManberAll(apatseq,o2,2,0,apatseq->seqlen); + o1cHits= 0; + if (o2Hits) + { + stktmp = apatseq->hitpos[2]; + begin = stktmp->val[0] + o2->patlen; + + if (lmax) + length= stktmp->val[stktmp->top-1] + o2->patlen - begin + lmax + o1->patlen; + else + length= apatseq->seqlen - begin; + + o1cHits = ManberAll(apatseq,o1c,3,begin,length); + + if (o1cHits) + for (i=0; i < o2Hits;i++) + { + posi = apatseq->hitpos[2]->val[i]; + erri = apatseq->hiterr[2]->val[i]; + for (j=0; j < o1cHits; j++) + { + posj=apatseq->hitpos[3]->val[j] + o1c->patlen; + errj=apatseq->hiterr[3]->val[j]; + length=posj - posi + 1 - o1->patlen - o2->patlen; + + if ((!lmin || (length >= lmin)) && + (!lmax || (length <= lmax))) + printRepeat(seq,o2,o1c,'R',posi,posj,erri,errj,taxonomy); + //printf("%s\tR\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o2Hits,o1cHits,posi,posj,scname); + } + } + } + + + + delete_ecoseq(seq); + + seq = ecoseq_iterator(NULL); + } + + return 0; +} diff --git a/src/libapat/CODES/dft_code.h b/src/libapat/CODES/dft_code.h new file mode 100644 index 0000000..b9caf28 --- /dev/null +++ b/src/libapat/CODES/dft_code.h @@ -0,0 +1,14 @@ +/* ----------------------------------------------- */ +/* dft_pat_seq_code.h */ +/* default alphabet encoding for alpha */ +/* ----------------------------------------------- */ + + 0x00000001 /* A */, 0x00000002 /* B */, 0x00000004 /* C */, + 0x00000008 /* D */, 0x00000010 /* E */, 0x00000020 /* F */, + 0x00000040 /* G */, 0x00000080 /* H */, 0x00000100 /* I */, + 0x00000200 /* J */, 0x00000400 /* K */, 0x00000800 /* L */, + 0x00001000 /* M */, 0x00002000 /* N */, 0x00004000 /* O */, + 0x00008000 /* P */, 0x00010000 /* Q */, 0x00020000 /* R */, + 0x00040000 /* S */, 0x00080000 /* T */, 0x00100000 /* U */, + 0x00200000 /* V */, 0x00400000 /* W */, 0x00800000 /* X */, + 0x01000000 /* Y */, 0x02000000 /* Z */ diff --git a/src/libapat/CODES/dna_code.h b/src/libapat/CODES/dna_code.h new file mode 100644 index 0000000..0febf41 --- /dev/null +++ b/src/libapat/CODES/dna_code.h @@ -0,0 +1,71 @@ +/* ----------------------------------------------- */ +/* dna_code.h */ +/* alphabet encoding for dna/rna */ +/* ----------------------------------------- */ +/* IUPAC encoding */ +/* ----------------------------------------- */ +/* G/A/T/C */ +/* U=T */ +/* R=AG */ +/* Y=CT */ +/* M=AC */ +/* K=GT */ +/* S=CG */ +/* W=AT */ +/* H=ACT */ +/* B=CGT */ +/* V=ACG */ +/* D=AGT */ +/* N=ACGT */ +/* X=ACGT */ +/* EFIJLOPQZ not recognized */ +/* ----------------------------------------- */ +/* dual encoding */ +/* ----------------------------------------- */ +/* A=ADHMNRVW */ +/* B=BCDGHKMNRSTUVWY */ +/* C=BCHMNSVY */ +/* D=ABDGHKMNRSTUVWY */ +/* G=BDGKNRSV */ +/* H=ABCDHKMNRSTUVWY */ +/* K=BDGHKNRSTUVWY */ +/* M=ABCDHMNRSVWY */ +/* N=ABCDGHKMNRSTUVWY */ +/* R=ABDGHKMNRSVW */ +/* S=BCDGHKMNRSVY */ +/* T=BDHKNTUWY */ +/* U=BDHKNTUWY */ +/* V=ABCDGHKMNRSVWY */ +/* W=ABDHKMNRTUVWY */ +/* X=ABCDGHKMNRSTUVWY */ +/* Y=BCDHKMNSTUVWY */ +/* EFIJLOPQZ not recognized */ +/* ----------------------------------------------- */ + +#ifndef USE_DUAL + + /* IUPAC */ + + 0x00000001 /* A */, 0x00080044 /* B */, 0x00000004 /* C */, + 0x00080041 /* D */, 0x00000000 /* E */, 0x00000000 /* F */, + 0x00000040 /* G */, 0x00080005 /* H */, 0x00000000 /* I */, + 0x00000000 /* J */, 0x00080040 /* K */, 0x00000000 /* L */, + 0x00000005 /* M */, 0x00080045 /* N */, 0x00000000 /* O */, + 0x00000000 /* P */, 0x00000000 /* Q */, 0x00000041 /* R */, + 0x00000044 /* S */, 0x00080000 /* T */, 0x00080000 /* U */, + 0x00000045 /* V */, 0x00080001 /* W */, 0x00080045 /* X */, + 0x00080004 /* Y */, 0x00000000 /* Z */ + +#else + /* DUAL */ + + 0x00623089 /* A */, 0x017e34ce /* B */, 0x01243086 /* C */, + 0x017e34cb /* D */, 0x00000000 /* E */, 0x00000000 /* F */, + 0x0026244a /* G */, 0x017e348f /* H */, 0x00000000 /* I */, + 0x00000000 /* J */, 0x017e24ca /* K */, 0x00000000 /* L */, + 0x0166308f /* M */, 0x017e34cf /* N */, 0x00000000 /* O */, + 0x00000000 /* P */, 0x00000000 /* Q */, 0x006634cb /* R */, + 0x012634ce /* S */, 0x0158248a /* T */, 0x0158248a /* U */, + 0x016634cf /* V */, 0x017a348b /* W */, 0x017e34cf /* X */, + 0x017c348e /* Y */, 0x00000000 /* Z */ +#endif diff --git a/src/libapat/CODES/prot_code.h b/src/libapat/CODES/prot_code.h new file mode 100644 index 0000000..edcdfc1 --- /dev/null +++ b/src/libapat/CODES/prot_code.h @@ -0,0 +1,51 @@ +/* ----------------------------------------------- */ +/* prot_code.h */ +/* alphabet encoding for proteins */ +/* ----------------------------------------- */ +/* IUPAC encoding */ +/* ----------------------------------------- */ +/* B=DN */ +/* Z=EQ */ +/* X=any - {X} */ +/* JOU not recognized */ +/* ----------------------------------------- */ +/* dual encoding */ +/* ----------------------------------------- */ +/* B=BDN */ +/* D=BD */ +/* E=EZ */ +/* N=BN */ +/* Q=QZ */ +/* X=any - {X} */ +/* Z=EQZ */ +/* JOU not recognized */ +/* ----------------------------------------------- */ + +#ifndef USE_DUAL + + /* IUPAC */ + + 0x00000001 /* A */, 0x00002008 /* B */, 0x00000004 /* C */, + 0x00000008 /* D */, 0x00000010 /* E */, 0x00000020 /* F */, + 0x00000040 /* G */, 0x00000080 /* H */, 0x00000100 /* I */, + 0x00000000 /* J */, 0x00000400 /* K */, 0x00000800 /* L */, + 0x00001000 /* M */, 0x00002000 /* N */, 0x00000000 /* O */, + 0x00008000 /* P */, 0x00010000 /* Q */, 0x00020000 /* R */, + 0x00040000 /* S */, 0x00080000 /* T */, 0x00000000 /* U */, + 0x00200000 /* V */, 0x00400000 /* W */, 0x037fffff /* X */, + 0x01000000 /* Y */, 0x00010010 /* Z */ + +#else + /* DUAL */ + + 0x00000001 /* A */, 0x0000200a /* B */, 0x00000004 /* C */, + 0x0000000a /* D */, 0x02000010 /* E */, 0x00000020 /* F */, + 0x00000040 /* G */, 0x00000080 /* H */, 0x00000100 /* I */, + 0x00000000 /* J */, 0x00000400 /* K */, 0x00000800 /* L */, + 0x00001000 /* M */, 0x00002002 /* N */, 0x00000000 /* O */, + 0x00008000 /* P */, 0x02010000 /* Q */, 0x00020000 /* R */, + 0x00040000 /* S */, 0x00080000 /* T */, 0x00000000 /* U */, + 0x00200000 /* V */, 0x00400000 /* W */, 0x037fffff /* X */, + 0x01000000 /* Y */, 0x02010010 /* Z */ + +#endif diff --git a/src/libapat/Gmach.h b/src/libapat/Gmach.h new file mode 100644 index 0000000..8fb1c69 --- /dev/null +++ b/src/libapat/Gmach.h @@ -0,0 +1,97 @@ +/* ---------------------------------------------------------------- */ +/* Copyright (c) Atelier de BioInformatique */ +/* @file: Gmach.h */ +/* @desc: machine dependant setup */ +/* @+ *should* be included in all ABI softs */ +/* */ +/* @history: */ +/* @+ : Jul 95 : MWC first draft */ +/* @+ : Jan 96 : adapted to Pwg */ +/* @+ : Nov 00 : adapted to Mac_OS_X */ +/* ---------------------------------------------------------------- */ + +#ifndef _H_Gmach + + /* OS names */ + +#define _H_Gmach + + /* Macintosh Classic */ + /* Think C environment */ +#ifdef THINK_C +#define MACINTOSH +#define MAC_OS_C +#endif + + + /* Macintosh Classic */ + /* Code-Warrior */ +#ifdef __MWERKS__ +#define MACINTOSH +#define MAC_OS_C +#endif + + /* Macintosh OS-X */ +#ifdef MAC_OS_X +#define MACINTOSH +#define UNIX +#define UNIX_BSD +#endif + + /* LINUX */ +#ifdef LINUX +#define UNIX +#define UNIX_BSD +#endif + + /* other Unix Boxes */ + /* SunOS / Solaris */ +#ifdef SUN +#define UNIX +#ifdef SOLARIS +#define UNIX_S7 +#else +#define UNIX_BSD +#endif +#endif + + /* SGI Irix */ +#ifdef SGI +#define UNIX +#define UNIX_S7 +#endif + +/* ansi setup */ +/* for unix machines see makefile */ + +#ifndef PROTO +#define PROTO 1 +#endif + +#ifndef ANSI_PROTO +#define ANSI_PROTO PROTO +#endif + +#ifndef ANSI_STR +#define ANSI_STR 1 +#endif + +/* unistd.h header file */ + +#ifdef UNIX +#define HAS_UNISTD_H +#endif + +/* getopt.h header file */ + +#ifdef MAC_OS_C +#define HAS_GETOPT_H "getopt.h" +#endif + +#ifdef SGI +#define HAS_GETOPT_H +#endif + + + +#endif diff --git a/src/libapat/Gtypes.h b/src/libapat/Gtypes.h new file mode 100644 index 0000000..9bf5a93 --- /dev/null +++ b/src/libapat/Gtypes.h @@ -0,0 +1,104 @@ +/* ---------------------------------------------------------------- */ +/* Copyright (c) Atelier de BioInformatique */ +/* @file: Gtypes.h */ +/* @desc: general & machine dependant types */ +/* @+ *should* be included in all ABI softs */ +/* */ +/* @history: */ +/* @+ : Jan 91 : MWC first draft */ +/* @+ : Jul 95 : Gmach addition */ +/* ---------------------------------------------------------------- */ + +#define _H_Gtypes + +#ifndef _H_Gmach +#include "Gmach.h" +#endif + +#ifndef NULL +#include /* is the official NULL here ? */ +#endif + +/* ==================================================== */ +/* constantes */ +/* ==================================================== */ + +#ifndef PROTO +#define PROTO 1 /* prototypes flag */ +#endif + +#ifdef MAC_OS_C +#define Vrai true /* TC boolean values */ +#define Faux false /* */ +#else +#define Vrai 0x1 /* bool values = TRUE */ +#define Faux 0x0 /* = FALSE */ +#endif + +#define Nil NULL /* nil pointer */ + +#define kBigInt16 0x7fff /* plus grand 16 bits signe */ +#define kBigInt32 0x7fffffff /* plus grand 32 bits signe */ +#define kBigUInt16 0xffff /* plus grand 16 bits ~signe */ +#define kBigUInt32 0xffffffff /* plus grand 32 bits ~signe */ + +#ifdef MAC_OS_C +/* ==================================================== */ +/* Types (for Macintosh ThinK C || MWerks) */ +/* ==================================================== */ + + /* --- specific sizes --------- */ +typedef long Int32; /* Int32 = 32 bits signe */ +typedef unsigned long UInt32; /* UInt32 = 32 bits ~signe */ +typedef short Int16; /* Int16 = 16 bits signe */ +typedef unsigned short UInt16; /* UInt32 = 16 bits ~signe */ +typedef char Int8; /* Int8 = 8 bits signe */ +typedef unsigned char UInt8; /* UInt8 = 8 bits ~signe */ + + /* --- default types ---------- */ + +typedef Boolean Bool; /* booleen */ + +typedef long Int; /* 'natural' int (>= 32 bits) */ + +typedef void *Ptr; /* pointeur */ + +#elif ((defined SUN) || (defined SGI) || (defined UNIX)) +/* ==================================================== */ +/* Types (for Sun & Iris - 32 bits machines) */ +/* ==================================================== */ + + /* --- specific sizes --------- */ +typedef int Int32; /* Int32 = 32 bits signe */ +typedef unsigned int UInt32; /* UInt32 = 32 bits ~signe */ +typedef short Int16; /* Int16 = 16 bits signe */ +typedef unsigned short UInt16; /* UInt32 = 16 bits ~signe */ +typedef char Int8; /* Int8 = 8 bits signe */ +typedef unsigned char UInt8; /* UInt8 = 8 bits ~signe */ + + /* --- default types ---------- */ + +typedef int Bool; /* booleen (int for ANSI) */ + +typedef int Int; /* 'natural' int (>= 32 bits) */ + +typedef void *Ptr; /* pointeur */ + +#else +/* ==================================================== */ +/* Types (for undefined machines) */ +/* ==================================================== */ + +#error undefined MACHINE + +#endif + +/* ==================================================== */ +/* special macro for prototypes */ +/* ==================================================== */ + +#if PROTO +#define P(s) s +#else +#define P(s) () +#endif diff --git a/src/libapat/apat.h b/src/libapat/apat.h new file mode 100644 index 0000000..230276c --- /dev/null +++ b/src/libapat/apat.h @@ -0,0 +1,172 @@ +/* ==================================================== */ +/* Copyright (c) Atelier de BioInformatique */ +/* Dec. 94 */ +/* File: apat.h */ +/* Purpose: pattern scan */ +/* History: */ +/* 28/12/94 : ascan first version */ +/* 14/05/99 : last revision */ +/* ==================================================== */ + +#ifndef _H_Gtypes +#include "Gtypes.h" +#endif + +#ifndef _H_libstki +#include "libstki.h" +#endif + +#define H_apat + +/* ----------------------------------------------- */ +/* constantes */ +/* ----------------------------------------------- */ + +#ifndef BUFSIZ +#define BUFSIZ 1024 /* io buffer size */ +#endif + +#define MAX_NAME_LEN BUFSIZ /* max length of sequence name */ + +#define ALPHA_LEN 26 /* alphabet length */ + /* *DO NOT* modify */ + +#define MAX_PATTERN 4 /* max # of patterns */ + /* *DO NOT* modify */ + +#define MAX_PAT_LEN 32 /* max pattern length */ + /* *DO NOT* modify */ + +#define MAX_PAT_ERR 32 /* max # of errors */ + /* *DO NOT* modify */ + +#define PATMASK 0x3ffffff /* mask for 26 symbols */ + /* *DO NOT* modify */ + +#define OBLIBIT 0x4000000 /* bit 27 to 1 -> oblig. pos */ + /* *DO NOT* modify */ + + /* mask for position */ +#define ONEMASK 0x80000000 /* mask for highest position */ + + /* masks for Levenhstein edit */ +#define OPER_IDT 0x00000000 /* identity */ +#define OPER_INS 0x40000000 /* insertion */ +#define OPER_DEL 0x80000000 /* deletion */ +#define OPER_SUB 0xc0000000 /* substitution */ + +#define OPER_SHFT 30 /* shift */ + + /* Levenhstein Opcodes */ +#define SOPER_IDT 0x0 /* identity */ +#define SOPER_INS 0x1 /* insertion */ +#define SOPER_DEL 0x2 /* deletion */ +#define SOPER_SUB 0x3 /* substitution */ + + /* Levenhstein Opcodes masks */ +#define OPERMASK 0xc0000000 /* mask for Opcodes */ +#define NOPERMASK 0x3fffffff /* negate of previous */ + + /* special chars in pattern */ +#define PATCHARS "[]!#" + + /* 26 letter alphabet */ + /* in alphabetical order */ + +#define ORD_ALPHA "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + + /* protein alphabet */ + +#define PROT_ALPHA "ACDEFGHIKLMNPQRSTVWY" + + /* dna/rna alphabet */ + +#define DNA_ALPHA "ABCDGHKMNRSTUVWXY" + + +/* ----------------------------------------------- */ +/* data structures */ +/* ----------------------------------------------- */ + + /* -------------------- */ +typedef enum { /* data encoding */ + /* -------------------- */ + alpha = 0, /* [A-Z] */ + dna, /* IUPAC DNA */ + protein /* IUPAC proteins */ +} CodType; + + /* -------------------- */ +typedef struct { /* sequence */ + /* -------------------- */ + char *name; /* sequence name */ + Int32 seqlen; /* sequence length */ + Int32 seqsiz; /* sequence buffer size */ + Int32 datsiz; /* data buffer size */ + UInt8 *data; /* data buffer */ + char *cseq; /* sequence buffer */ + StackiPtr hitpos[MAX_PATTERN]; /* stack of hit pos. */ + StackiPtr hiterr[MAX_PATTERN]; /* stack of errors */ +} Seq, *SeqPtr; + + /* -------------------- */ +typedef struct { /* pattern */ + /* -------------------- */ + int patlen; /* pattern length */ + int maxerr; /* max # of errors */ + char *cpat; /* pattern string */ + Int32 *patcode; /* encoded pattern */ + UInt32 *smat; /* S matrix */ + UInt32 omask; /* oblig. bits mask */ + Bool hasIndel; /* are indels allowed */ + Bool ok; /* is pattern ok */ +} Pattern, *PatternPtr; + +/* ----------------------------------------------- */ +/* macros */ +/* ----------------------------------------------- */ + +#ifndef NEW +#define NEW(typ) (typ*)malloc(sizeof(typ)) +#define NEWN(typ, dim) (typ*)malloc((unsigned long)(dim) * sizeof(typ)) +#define REALLOC(typ, ptr, dim) (typ*)realloc((void *) (ptr), (unsigned long)(dim) * sizeof(typ)) +#define FREE(ptr) free((void *) ptr) +#endif + +/* ----------------------------------------------- */ +/* prototypes */ +/* ----------------------------------------------- */ + + /* apat_seq.c */ + +SeqPtr FreeSequence (SeqPtr pseq); +SeqPtr NewSequence (void); +int ReadNextSequence (SeqPtr pseq); +int WriteSequence (FILE *filou , SeqPtr pseq); + + /* apat_parse.c */ + +Int32 *GetCode (CodType ctype); +int CheckPattern (Pattern *ppat); +int EncodePattern (Pattern *ppat, CodType ctype); +int ReadPattern (Pattern *ppat); +void PrintDebugPattern (Pattern *ppat); + + /* apat_search.c */ + +int CreateS (Pattern *ppat, Int32 lalpha); +Int32 ManberNoErr (Seq *pseq , Pattern *ppat, int patnum,int begin,int length); +Int32 ManberSub (Seq *pseq , Pattern *ppat, int patnum,int begin,int length); +Int32 ManberIndel (Seq *pseq , Pattern *ppat, int patnum,int begin,int length); +Int32 ManberAll (Seq *pseq , Pattern *ppat, int patnum,int begin,int length); +Int32 NwsPatAlign (Seq *pseq , Pattern *ppat, Int32 nerr , + Int32 *reslen , Int32 *reserr); + + /* apat_sys.c */ + +float UserCpuTime (int reset); +float SysCpuTime (int reset); +char *StrCpuTime (int reset); +void Erreur (char *msg , int stat); +int AccessFile (char *path, char *mode); + diff --git a/src/libapat/apat_parse.c b/src/libapat/apat_parse.c new file mode 100644 index 0000000..43cda48 --- /dev/null +++ b/src/libapat/apat_parse.c @@ -0,0 +1,369 @@ +/* ==================================================== */ +/* Copyright (c) Atelier de BioInformatique */ +/* Mar. 92 */ +/* File: apat_parse.c */ +/* Purpose: Codage du pattern */ +/* History: */ +/* 00/07/94 : first version (stanford) */ +/* 00/11/94 : revised for DNA/PROTEIN */ +/* 30/12/94 : modified EncodePattern */ +/* for manber search */ +/* 14/05/99 : indels added */ +/* ==================================================== */ + +#include +#include +#include +#include + +#include "Gtypes.h" +#include "apat.h" + /* -------------------- */ + /* default char */ + /* encodings */ + /* -------------------- */ + +static Int32 sDftCode[] = { + +#include "CODES/dft_code.h" + +}; + /* -------------------- */ + /* char encodings */ + /* IUPAC */ + /* -------------------- */ + + /* IUPAC Proteins */ +static Int32 sProtCode[] = { + +#include "CODES/prot_code.h" + +}; + /* IUPAC Dna/Rna */ +static Int32 sDnaCode[] = { + +#include "CODES/dna_code.h" + +}; + + +/* -------------------------------------------- */ +/* internal replacement of gets */ +/* -------------------------------------------- */ +static char *sGets(char *buffer, int size) { + + char *ebuf; + + if (! fgets(buffer, size-1, stdin)) + return NULL; + + /* remove trailing line feed */ + + ebuf = buffer + strlen(buffer); + + while (--ebuf >= buffer) { + if ((*ebuf == '\n') || (*ebuf == '\r')) + *ebuf = '\000'; + else + break; + } + + return buffer; +} + +/* -------------------------------------------- */ +/* returns actual code associated to type */ +/* -------------------------------------------- */ + +Int32 *GetCode(CodType ctype) +{ + Int32 *code = sDftCode; + + switch (ctype) { + case dna : code = sDnaCode ; break; + case protein : code = sProtCode ; break; + default : code = sDftCode ; break; + } + + return code; +} + +/* -------------------------------------------- */ + +#define BAD_IF(tst) if (tst) return 0 + +int CheckPattern(Pattern *ppat) +{ + int lev; + char *pat; + + pat = ppat->cpat; + + BAD_IF (*pat == '#'); + + for (lev = 0; *pat ; pat++) + + switch (*pat) { + + case '[' : + BAD_IF (lev); + BAD_IF (*(pat+1) == ']'); + lev++; + break; + + case ']' : + lev--; + BAD_IF (lev); + break; + + case '!' : + BAD_IF (lev); + BAD_IF (! *(pat+1)); + BAD_IF (*(pat+1) == ']'); + break; + + case '#' : + BAD_IF (lev); + BAD_IF (*(pat-1) == '['); + break; + + default : + if (! isupper(*pat)) + return 0; + break; + } + + return (lev ? 0 : 1); +} + +#undef BAD_IF + + +/* -------------------------------------------- */ +static char *skipOblig(char *pat) +{ + return (*(pat+1) == '#' ? pat+1 : pat); +} + +/* -------------------------------------------- */ +static char *splitPattern(char *pat) +{ + switch (*pat) { + + case '[' : + for (; *pat; pat++) + if (*pat == ']') + return skipOblig(pat); + return NULL; + break; + + case '!' : + return splitPattern(pat+1); + break; + + } + + return skipOblig(pat); +} + +/* -------------------------------------------- */ +static Int32 valPattern(char *pat, Int32 *code) +{ + Int32 val; + + switch (*pat) { + + case '[' : + return valPattern(pat+1, code); + break; + + case '!' : + val = valPattern(pat+1, code); + return (~val & PATMASK); + break; + + default : + val = 0x0; + while (isupper(*pat)) { + val |= code[*pat - 'A']; + pat++; + } + return val; + } + + return 0x0; +} + +/* -------------------------------------------- */ +static Int32 obliBitPattern(char *pat) +{ + return (*(pat + strlen(pat) - 1) == '#' ? OBLIBIT : 0x0); +} + + +/* -------------------------------------------- */ +static int lenPattern(char *pat) +{ + int lpat; + + lpat = 0; + + while (*pat) { + + if (! (pat = splitPattern(pat))) + return 0; + + pat++; + + lpat++; + } + + return lpat; +} + +/* -------------------------------------------- */ +/* Interface */ +/* -------------------------------------------- */ + +/* -------------------------------------------- */ +/* encode un pattern */ +/* -------------------------------------------- */ +int EncodePattern(Pattern *ppat, CodType ctype) +{ + int pos, lpat; + Int32 *code; + char *pp, *pa, c; + + ppat->ok = Faux; + + code = GetCode(ctype); + + ppat->patlen = lpat = lenPattern(ppat->cpat); + + if (lpat <= 0) + return 0; + + if (! (ppat->patcode = NEWN(Int32, lpat))) + return 0; + + pa = pp = ppat->cpat; + + pos = 0; + + while (*pa) { + + pp = splitPattern(pa); + + c = *++pp; + + *pp = '\000'; + + ppat->patcode[pos++] = valPattern(pa, code) | obliBitPattern(pa); + + *pp = c; + + pa = pp; + } + + ppat->ok = Vrai; + + return lpat; +} + +/* -------------------------------------------- */ +/* remove blanks */ +/* -------------------------------------------- */ +static char *RemBlanks(char *s) +{ + char *sb, *sc; + + for (sb = sc = s ; *sb ; sb++) + if (! isspace(*sb)) + *sc++ = *sb; + + return s; +} + +/* -------------------------------------------- */ +/* count non blanks */ +/* -------------------------------------------- */ +static Int32 CountAlpha(char *s) +{ + Int32 n; + + for (n = 0 ; *s ; s++) + if (! isspace(*s)) + n++; + + return n; +} + + +/* -------------------------------------------- */ +/* lit un pattern */ +/* #mis */ +/* ligne starting with '/' are comments */ +/* -------------------------------------------- */ +int ReadPattern(Pattern *ppat) +{ + int val; + char *spac; + char buffer[BUFSIZ]; + + ppat->ok = Vrai; + + if (! sGets(buffer, sizeof(buffer))) + return 0; + + if (*buffer == '/') + return ReadPattern(ppat); + + if (! CountAlpha(buffer)) + return ReadPattern(ppat); + + for (spac = buffer ; *spac ; spac++) + if ((*spac == ' ') || (*spac == '\t')) + break; + + ppat->ok = Faux; + + if (! *spac) + return 0; + + if (sscanf(spac, "%d", &val) != 1) + return 0; + + ppat->hasIndel = (val < 0); + + ppat->maxerr = ((val >= 0) ? val : -val); + + *spac = '\000'; + + (void) RemBlanks(buffer); + + if ((ppat->cpat = NEWN(char, strlen(buffer)+1))) + strcpy(ppat->cpat, buffer); + + ppat->ok = (ppat->cpat != NULL); + + return (ppat->ok ? 1 : 0); +} + +/* -------------------------------------------- */ +/* ecrit un pattern - Debug - */ +/* -------------------------------------------- */ +void PrintDebugPattern(Pattern *ppat) +{ + int i; + + printf("Pattern : %s\n", ppat->cpat); + printf("Encoding : \n\t"); + + for (i = 0 ; i < ppat->patlen ; i++) { + printf("0x%8.8x ", ppat->patcode[i]); + if (i%4 == 3) + printf("\n\t"); + } + printf("\n"); +} + diff --git a/src/libapat/apat_search.c b/src/libapat/apat_search.c new file mode 100644 index 0000000..11353d5 --- /dev/null +++ b/src/libapat/apat_search.c @@ -0,0 +1,339 @@ +/* ==================================================== */ +/* Copyright (c) Atelier de BioInformatique */ +/* Dec. 94 */ +/* File: apat_search.c */ +/* Purpose: recherche du pattern */ +/* algorithme de Baeza-Yates/Gonnet */ +/* Manber (agrep) */ +/* History: */ +/* 07/12/94 : first version */ +/* 28/12/94 : revised version */ +/* 14/05/99 : last revision */ +/* ==================================================== */ + +#if 0 +#ifndef THINK_C +#include +#endif +#endif + +#include +#include +#include + +#include "Gtypes.h" +#include "libstki.h" +#include "apat.h" + +#define POP PopiOut +#define PUSH PushiIn +#define TOPCURS CursiToTop +#define DOWNREAD ReadiDown + +#define KRONECK(x, msk) ((~x & msk) ? 0 : 1) +#define MIN(x, y) ((x) < (y) ? (x) : (y)) + +/* -------------------------------------------- */ +/* Construction de la matrice S */ +/* -------------------------------------------- */ + +int CreateS(Pattern *ppat, Int32 lalpha) +{ + Int32 i, j, indx; + UInt32 pindx, amask, omask, *smat; + + ppat->ok = Faux; + + omask = 0x0L; + + if (! (smat = NEWN(UInt32, lalpha))) + return 0; + + for (i = 0 ; i < lalpha ; i++) + smat[i] = 0x0; + + for (i = ppat->patlen - 1, amask = 0x1L ; i >= 0 ; i--, amask <<= 1) { + + indx = ppat->patcode[i]; + + if (ppat->patcode[i] & OBLIBIT) + omask |= amask; + + for (j = 0, pindx = 0x1L ; j < lalpha ; j++, pindx <<= 1) + if (indx & pindx) + smat[j] |= amask; + } + + ppat->smat = smat; + + ppat->omask = omask; + + ppat->ok = Vrai; + + return 1; + +} + +/* -------------------------------------------- */ +/* Baeza-Yates/Manber algorithm */ +/* NoError */ +/* -------------------------------------------- */ +Int32 ManberNoErr(Seq *pseq, Pattern *ppat, int patnum,int begin,int length) +{ + Int32 pos; + UInt32 smask, r; + UInt8 *data; + StackiPtr *stkpos, *stkerr; + UInt32 end; + + end = begin + length; + end = (end <= pseq->seqlen) ? end:pseq->seqlen; + + + /* create local masks */ + + smask = r = 0x1L << ppat->patlen; + + /* init. scan */ + data = pseq->data + begin; + stkpos = pseq->hitpos + patnum; + stkerr = pseq->hiterr + patnum; + + /* loop on text data */ + + for (pos = begin ; pos < end ; pos++) { + + r = (r >> 1) & ppat->smat[*data++]; + + if (r & 0x1L) { + PUSH(stkpos, pos - ppat->patlen + 1); + PUSH(stkerr, 0); + } + + r |= smask; + } + + return (*stkpos)->top; /* aka # of hits */ +} + +/* -------------------------------------------- */ +/* Baeza-Yates/Manber algorithm */ +/* Substitution only */ +/* */ +/* Note : r array is stored as : */ +/* 0 0 r(0,j) r(0,j+1) r(1,j) r(1,j+1) ... */ +/* */ +/* -------------------------------------------- */ +Int32 ManberSub(Seq *pseq, Pattern *ppat, int patnum,int begin,int length) +{ + int e, emax, found; + Int32 pos; + UInt32 smask, cmask, sindx; + UInt32 *pr, r[2 * MAX_PAT_ERR + 2]; + UInt8 *data; + StackiPtr *stkpos, *stkerr; + UInt32 end; + + end = begin + length; + end = (end <= pseq->seqlen) ? end:pseq->seqlen; + + /* create local masks */ + emax = ppat->maxerr; + + r[0] = r[1] = 0x0; + + cmask = smask = 0x1L << ppat->patlen; + + for (e = 0, pr = r + 3 ; e <= emax ; e++, pr += 2) + *pr = cmask; + + cmask = ~ ppat->omask; + + /* init. scan */ + data = pseq->data + begin; + stkpos = pseq->hitpos + patnum; + stkerr = pseq->hiterr + patnum; + + /* loop on text data */ + + for (pos = begin ; pos < end ; pos++) { + + sindx = ppat->smat[*data++]; + + for (e = found = 0, pr = r ; e <= emax ; e++, pr += 2) { + + pr[2] = pr[3] | smask; + + pr[3] = ((pr[0] >> 1) & cmask) /* sub */ + | ((pr[2] >> 1) & sindx); /* ident */ + + if (pr[3] & 0x1L) { /* found */ + if (! found) { + PUSH(stkpos, pos - ppat->patlen + 1); + PUSH(stkerr, e); + } + found++; + } + } + } + + return (*stkpos)->top; /* aka # of hits */ +} + +/* -------------------------------------------- */ +/* Baeza-Yates/Manber algorithm */ +/* Substitution + Indels */ +/* */ +/* Note : r array is stored as : */ +/* 0 0 r(0,j) r(0,j+1) r(1,j) r(1,j+1) ... */ +/* */ +/* Warning: may return shifted pos. */ +/* */ +/* -------------------------------------------- */ +Int32 ManberIndel(Seq *pseq, Pattern *ppat, int patnum,int begin,int length) +{ + int e, emax, found; + Int32 pos; + UInt32 smask, cmask, sindx; + UInt32 *pr, r[2 * MAX_PAT_ERR + 2]; + UInt8 *data; + StackiPtr *stkpos, *stkerr; + UInt32 end; + + end = begin + length; + end = (end <= pseq->seqlen) ? end:pseq->seqlen; + + /* create local masks */ + emax = ppat->maxerr; + + r[0] = r[1] = 0x0; + + cmask = smask = 0x1L << ppat->patlen; + + for (e = 0, pr = r + 3 ; e <= emax ; e++, pr += 2) { + *pr = cmask; + cmask = (cmask >> 1) | smask; + } + + cmask = ~ ppat->omask; + + /* init. scan */ + data = pseq->data + begin; + stkpos = pseq->hitpos + patnum; + stkerr = pseq->hiterr + patnum; + + /* loop on text data */ + + for (pos = begin ; pos < end ; pos++) { + + sindx = ppat->smat[*data++]; + + for (e = found = 0, pr = r ; e <= emax ; e++, pr += 2) { + + pr[2] = pr[3] | smask; + + pr[3] = (( pr[0] /* ins */ + | (pr[0] >> 1) /* sub */ + | (pr[1] >> 1)) /* del */ + & cmask) + | ((pr[2] >> 1) & sindx); /* ident */ + + if (pr[3] & 0x1L) { /* found */ + if (! found) { + PUSH(stkpos, pos - ppat->patlen + 1); + PUSH(stkerr, e); + } + found++; + } + + } + } + + return (*stkpos)->top; /* aka # of hits */ +} + +/* -------------------------------------------- */ +/* Baeza-Yates/Manber algorithm */ +/* API call to previous functions */ +/* -------------------------------------------- */ +Int32 ManberAll(Seq *pseq, Pattern *ppat, int patnum,int begin,int length) +{ + if (ppat->maxerr == 0) + return ManberNoErr(pseq, ppat, patnum, begin, length); + else if (ppat->hasIndel) + return ManberIndel(pseq, ppat, patnum, begin, length); + else + return ManberSub(pseq, ppat, patnum, begin, length); +} + + +/* -------------------------------------------- */ +/* Alignement NWS */ +/* pour edition des hits */ +/* (avec substitution obligatoire aux bords) */ +/* -------------------------------------------- */ + +Int32 NwsPatAlign(pseq, ppat, nerr, reslen, reserr) + Seq *pseq; + Pattern *ppat; + Int32 nerr, *reslen, *reserr; +{ + UInt8 *sseq, *px; + Int32 i, j, lseq, lpat, npos, dindel, dsub, + *pc, *pi, *pd, *ps; + UInt32 amask; + + static Int32 sTab[(MAX_PAT_LEN+MAX_PAT_ERR+1) * (MAX_PAT_LEN+1)]; + + lseq = pseq->seqlen; + + pc = sTab; /* |----|----| --> i */ + pi = pc - 1; /* | ps | pd | | */ + pd = pi - lseq; /* |----|----| | */ + ps = pd - 1; /* | pi | pc | v j */ + /* |---------| */ + + lseq = pseq->seqlen; + lpat = ppat->patlen; + + sseq = pseq->data - 1; + + amask = ONEMASK >> lpat; + + for (j = 0 ; j <= lpat ; j++) { + + for (i = 0 , px = sseq ; i <= lseq ; i++, px++) { + + if (i && j) { + dindel = MIN(*pi, *pd) + 1; + dsub = *ps + KRONECK(ppat->smat[*px], amask); + *pc = MIN(dindel, dsub); + } + else if (i) /* j == 0 */ + *pc = *pi + 1; + else if (j) /* i == 0 */ + *pc = *pd + 1; + else /* root */ + *pc = 0; + + pc++; + pi++; + pd++; + ps++; + } + + amask <<= 1; + } + + pc--; + + for (i = lseq, npos = 0 ; i >= 0 ; i--, pc--) { + if (*pc <= nerr) { + *reslen++ = i; + *reserr++ = *pc; + npos++; + } + } + + return npos; +} diff --git a/src/libapat/libstki.c b/src/libapat/libstki.c new file mode 100644 index 0000000..1ca9868 --- /dev/null +++ b/src/libapat/libstki.c @@ -0,0 +1,379 @@ +/* ==================================================== */ +/* Copyright (c) Atelier de BioInformatique */ +/* Mar. 92 */ +/* File: libstki.c */ +/* Purpose: A library to deal with 'stacks' of */ +/* integers */ +/* Note: 'stacks' are dynamic (i.e. size is */ +/* automatically readjusted when needed) */ +/* History: */ +/* 00/03/92 : first draft */ +/* 15/08/93 : revised version */ +/* 14/05/99 : last revision */ +/* ==================================================== */ + +#include +#include +#include + +#include "Gtypes.h" +#include "libstki.h" + + +/* ============================ */ +/* Constantes et Macros locales */ +/* ============================ */ + +#define ExpandStack(stkh) ResizeStacki((stkh), (*stkh)->size << 1) + +#define ShrinkStack(stkh) ResizeStacki((stkh), (*stkh)->size >> 1) + + +static Int16 sStkiLastError = kStkiNoErr; + +/* -------------------------------------------- */ +/* gestion des erreurs */ +/* get/reset erreur flag */ +/* */ +/* @function: StkiError */ +/* -------------------------------------------- */ + +Int16 StkiError(Bool reset) +{ + Int16 err; + + err = sStkiLastError; + + if (reset) + sStkiLastError = kStkiNoErr; + + return err; + +} /* end of StkiError */ + +/* -------------------------------------------- */ +/* creation d'un stack */ +/* */ +/* @function: NewStacki */ +/* -------------------------------------------- */ + +StackiPtr NewStacki(Int32 size) +{ + StackiPtr stki; + + if (! (stki = NEW(Stacki))) + return NULL; + + stki->size = size; + stki->top = 0; + stki->cursor = 0; + + if ( ! (stki->val = NEWN(Int32, size))) { + sStkiLastError = kStkiMemErr; + return FreeStacki(stki); + } + + return stki; + +} /* end of NewStacki */ + + +/* -------------------------------------------- */ +/* liberation d'un stack */ +/* */ +/* @function: FreeStacki */ +/* -------------------------------------------- */ + +StackiPtr FreeStacki(StackiPtr stki) +{ + if (stki) { + if (stki->val) + FREE(stki->val); + FREE(stki); + } + + return NULL; + +} /* end of FreeStacki */ + +/* -------------------------------------------- */ +/* creation d'un vecteur de stacks */ +/* */ +/* @function: NewStackiVector */ +/* -------------------------------------------- */ + +StackiHdle NewStackiVector(Int32 vectSize, Int32 stackSize) +{ + Int32 i; + StackiHdle stkh; + + if (! (stkh = NEWN(StackiPtr, vectSize))) { + sStkiLastError = kStkiMemErr; + return NULL; + } + + for (i = 0 ; i < vectSize ; i++) + if (! (stkh[i] = NewStacki(stackSize))) + return FreeStackiVector(stkh, i); + + return stkh; + +} /* end of NewStackiVector */ + + +/* -------------------------------------------- */ +/* liberation d'un vecteur de stacks */ +/* */ +/* @function: FreeStackiVector */ +/* -------------------------------------------- */ + +StackiHdle FreeStackiVector(StackiHdle stkh, Int32 vectSize) +{ + Int32 i; + + if (stkh) { + for (i = 0 ; i < vectSize ; i++) + (void) FreeStacki(stkh[i]); + FREE(stkh); + } + + return NULL; + +} /* end of FreeStackiVector */ + +/* -------------------------------------------- */ +/* resize d'un stack */ +/* */ +/* @function: ResizeStacki */ +/* -------------------------------------------- */ + +Int32 ResizeStacki(StackiHdle stkh, Int32 size) +{ + Int32 resize = 0; /* assume error */ + Int32 *val; + + if ((val = REALLOC(Int32, (*stkh)->val, size))) { + (*stkh)->size = resize = size; + (*stkh)->val = val; + } + + if (! resize) + sStkiLastError = kStkiMemErr; + + return resize; + +} /* end of ResizeStacki */ + +/* -------------------------------------------- */ +/* empilage(/lement) */ +/* */ +/* @function: PushiIn */ +/* -------------------------------------------- */ + +Bool PushiIn(StackiHdle stkh, Int32 val) +{ + if (((*stkh)->top >= (*stkh)->size) && (! ExpandStack(stkh))) + return Faux; + + (*stkh)->val[((*stkh)->top)++] = val; + + return Vrai; + +} /* end of PushiIn */ + +/* -------------------------------------------- */ +/* depilage(/lement) */ +/* */ +/* @function: PopiOut */ +/* -------------------------------------------- */ + +Bool PopiOut(StackiHdle stkh, Int32 *val) +{ + if ((*stkh)->top <= 0) + return Faux; + + *val = (*stkh)->val[--((*stkh)->top)]; + + if ( ((*stkh)->top < ((*stkh)->size >> 1)) + && ((*stkh)->top > kMinStackiSize)) + + (void) ShrinkStack(stkh); + + return Vrai; + +} /* end of PopiOut */ + +/* -------------------------------------------- */ +/* lecture descendante */ +/* */ +/* @function: ReadiDown */ +/* -------------------------------------------- */ + +Bool ReadiDown(StackiPtr stki, Int32 *val) +{ + if (stki->cursor <= 0) + return Faux; + + *val = stki->val[--(stki->cursor)]; + + return Vrai; + +} /* end of ReadiDown */ + +/* -------------------------------------------- */ +/* lecture ascendante */ +/* */ +/* @function: ReadiUp */ +/* -------------------------------------------- */ + +Bool ReadiUp(StackiPtr stki, Int32 *val) +{ + if (stki->cursor >= stki->top) + return Faux; + + *val = stki->val[(stki->cursor)++]; + + return Vrai; + +} /* end of ReadiUp */ + +/* -------------------------------------------- */ +/* remontee/descente du curseur */ +/* */ +/* @function: CursiToTop */ +/* @function: CursiToBottom */ +/* -------------------------------------------- */ + +void CursiToTop(StackiPtr stki) +{ + stki->cursor = stki->top; + +} /* end of CursiToTop */ + +void CursiToBottom(stki) + StackiPtr stki; +{ + stki->cursor = 0; + +} /* end of CursiToBottom */ + +/* -------------------------------------------- */ +/* echange des valeurs cursor <-> (top - 1) */ +/* */ +/* @function: CursiSwap */ +/* -------------------------------------------- */ + +void CursiSwap(StackiPtr stki) +{ + Int32 tmp; + + if ((stki->top <= 0) || (stki->cursor < 0)) + return; + + tmp = stki->val[stki->cursor]; + stki->val[stki->cursor] = stki->val[stki->top - 1]; + stki->val[stki->top - 1] = tmp; + +} /* end of CursiSwap */ + +/* -------------------------------------------- */ +/* Recherche d'une valeur en stack a partir du */ +/* curseur courant en descendant. */ +/* on laisse le curseur a l'endroit trouve */ +/* */ +/* @function: SearchDownStacki */ +/* -------------------------------------------- */ + +Bool SearchDownStacki(StackiPtr stki, Int32 sval) +{ + Int32 val; + Bool more; + + while ((more = ReadiDown(stki, &val))) + if (val == sval) + break; + + return more; + +} /* end of SearchDownStacki */ + +/* -------------------------------------------- */ +/* Recherche dichotomique d'une valeur en stack */ +/* le stack est suppose trie par valeurs */ +/* croissantes. */ +/* on place le curseur a l'endroit trouve */ +/* */ +/* @function: BinSearchStacki */ +/* -------------------------------------------- */ + +Bool BinSearchStacki(StackiPtr stki, Int32 sval) +{ + Int32 midd, low, high, span; + + low = 0; + high = stki->top - 1; + + while (high >= low) { + + midd = (high + low) / 2; + + span = stki->val[midd] - sval; + + if (span == 0) { + stki->cursor = midd; + return Vrai; + } + + if (span > 0) + high = midd - 1; + else + low = midd + 1; + } + + return Faux; + +} /* end of BinSearchStacki */ + +/* -------------------------------------------- */ +/* teste l'egalite *physique* de deux stacks */ +/* */ +/* @function: SameStacki */ +/* -------------------------------------------- */ + +Bool SameStacki(StackiPtr stki1, StackiPtr stki2) +{ + if (stki1->top != stki2->top) + return Faux; + + return ((memcmp(stki1->val, stki2->val, + stki1->top * sizeof(Int32)) == 0) ? Vrai : Faux); + +} /* end of SameStacki */ + + +/* -------------------------------------------- */ +/* inverse l'ordre des elements dans un stack */ +/* */ +/* @function: ReverseStacki */ +/* -------------------------------------------- */ + +Bool ReverseStacki(StackiPtr stki) +{ + Int32 *t, *b, swp; + + if (stki->top <= 0) + return Faux; + + b = stki->val; + t = b + stki->top - 1; + + while (t > b) { + swp = *t; + *t-- = *b; + *b++ = swp; + } + + return Vrai; + +} /* end of ReverseStacki */ + diff --git a/src/libapat/libstki.h b/src/libapat/libstki.h new file mode 100644 index 0000000..6331ae7 --- /dev/null +++ b/src/libapat/libstki.h @@ -0,0 +1,87 @@ +/* ==================================================== */ +/* Copyright (c) Atelier de BioInformatique */ +/* Mar. 92 */ +/* File: libstki.h */ +/* Purpose: library of dynamic stacks holding */ +/* integer values */ +/* History: */ +/* 00/03/92 : first draft */ +/* 07/07/93 : complete revision */ +/* 10/03/94 : added xxxVector funcs */ +/* 14/05/99 : last revision */ +/* ==================================================== */ + +#ifndef _H_Gtypes +#include "Gtypes.h" +#endif + +#define _H_libstki + +/* ==================================================== */ +/* Constantes de dimensionnement */ +/* ==================================================== */ + +#ifndef kMinStackiSize +#define kMinStackiSize 2 /* taille mini stack */ +#endif + + +#define kStkiNoErr 0 /* ok */ +#define kStkiMemErr 1 /* not enough memory */ + +#define kStkiReset Vrai +#define kStkiGet Faux + +/* ==================================================== */ +/* Macros standards */ +/* ==================================================== */ + +#ifndef NEW +#define NEW(typ) (typ*)malloc(sizeof(typ)) +#define NEWN(typ, dim) (typ*)malloc((unsigned long)(dim) * sizeof(typ)) +#define REALLOC(typ, ptr, dim) (typ*)realloc((void *) (ptr), (unsigned long)(dim) * sizeof(typ)) +#define FREE(ptr) free((Ptr) ptr) +#endif + + +/* ==================================================== */ +/* Types & Structures de donnees */ +/* ==================================================== */ + + /* -------------------- */ + /* structure : pile */ + /* -------------------- */ +typedef struct Stacki { + /* ---------------------*/ + Int32 size; /* stack size */ + Int32 top; /* current free pos. */ + Int32 cursor; /* current cursor */ + Int32 *val; /* values */ + /* ---------------------*/ +} Stacki, *StackiPtr, **StackiHdle; + + + +/* ==================================================== */ +/* Prototypes (generated by mproto) */ +/* ==================================================== */ + + /* libstki.c */ + +Int16 StkiError (Bool reset ); +StackiPtr NewStacki (Int32 size ); +StackiPtr FreeStacki (StackiPtr stki ); +StackiHdle NewStackiVector (Int32 vectSize, Int32 stackSize ); +StackiHdle FreeStackiVector (StackiHdle stkh, Int32 vectSize ); +Int32 ResizeStacki (StackiHdle stkh , Int32 size ); +Bool PushiIn (StackiHdle stkh , Int32 val ); +Bool PopiOut (StackiHdle stkh , Int32 *val ); +Bool ReadiDown (StackiPtr stki , Int32 *val ); +Bool ReadiUp (StackiPtr stki , Int32 *val ); +void CursiToTop (StackiPtr stki ); +void CursiToBottom (StackiPtr stki ); +void CursiSwap (StackiPtr stki ); +Bool SearchDownStacki (StackiPtr stki , Int32 sval ); +Bool BinSearchStacki (StackiPtr stki , Int32 sval ); +Bool SameStacki (StackiPtr stki1 , StackiPtr stki2 ); +Bool ReverseStacki (StackiPtr stki ); diff --git a/src/libecoPCR/ecoError.c b/src/libecoPCR/ecoError.c new file mode 100644 index 0000000..a52b9e9 --- /dev/null +++ b/src/libecoPCR/ecoError.c @@ -0,0 +1,18 @@ +#include "ecoPCR.h" +#include +#include + + +void ecoError(int32_t error, + const char* message, + const char * filename, + int linenumber) +{ + fprintf(stderr,"Error %d in file %s line %d : %s\n", + error, + filename, + linenumber, + message); + + abort(); +} diff --git a/src/libecoPCR/ecoIOUtils.c b/src/libecoPCR/ecoIOUtils.c new file mode 100644 index 0000000..0f8cebc --- /dev/null +++ b/src/libecoPCR/ecoIOUtils.c @@ -0,0 +1,119 @@ +#include "ecoPCR.h" +#include +#include + +#define SWAPINT32(x) ((((x) << 24) & 0xFF000000) | (((x) << 8) & 0xFF0000) | \ + (((x) >> 8) & 0xFF00) | (((x) >> 24) & 0xFF)) + + +int32_t is_big_endian() +{ + int32_t i=1; + + return (int32_t)((char*)&i)[0]; +} + + + + + + + +int32_t swap_int32_t(int32_t i) +{ + return SWAPINT32(i); +} + + + + + + + +void *read_ecorecord(FILE *f,int32_t *recordSize) +{ + static void *buffer =NULL; + int32_t buffersize=0; + int32_t read; + + if (!recordSize) + ECOERROR(ECO_ASSERT_ERROR, + "recordSize cannot be NULL"); + + read = fread(recordSize, + 1, + sizeof(int32_t), + f); + + if (feof(f)) + return NULL; + + if (read != sizeof(int32_t)) + ECOERROR(ECO_IO_ERROR,"Reading record size error"); + + if (is_big_endian()) + *recordSize=swap_int32_t(*recordSize); + + if (buffersize < *recordSize) + { + if (buffer) + buffer = ECOREALLOC(buffer,*recordSize, + "Increase size of record buffer"); + else + buffer = ECOMALLOC(*recordSize, + "Allocate record buffer"); + } + + read = fread(buffer, + 1, + *recordSize, + f); + + if (read != *recordSize) + ECOERROR(ECO_IO_ERROR,"Reading record data error"); + + return buffer; +}; + + + + + + + + + +FILE *open_ecorecorddb(const char *filename, + int32_t *sequencecount, + int32_t abort_on_open_error) +{ + FILE *f; + int32_t read; + + f = fopen(filename,"rb"); + + if (!f) + { + if (abort_on_open_error) + ECOERROR(ECO_IO_ERROR,"Cannot open file"); + else + { + *sequencecount=0; + return NULL; + } + } + + read = fread(sequencecount, + 1, + sizeof(int32_t), + f); + + if (read != sizeof(int32_t)) + ECOERROR(ECO_IO_ERROR,"Reading record size error"); + + if (is_big_endian()) + *sequencecount=swap_int32_t(*sequencecount); + + return f; +} + diff --git a/src/libecoPCR/ecoMalloc.c b/src/libecoPCR/ecoMalloc.c new file mode 100644 index 0000000..0ea8d3b --- /dev/null +++ b/src/libecoPCR/ecoMalloc.c @@ -0,0 +1,79 @@ +#include "ecoPCR.h" +#include + +static int eco_log_malloc = 0; + +void eco_trace_memory_allocation() +{ + eco_log_malloc=1; +} + +void eco_untrace_memory_allocation() +{ + eco_log_malloc=0; +} + + +void *eco_malloc(int32_t chunksize, + const char *error_message, + const char *filename, + int32_t line) +{ + void * chunk; + + chunk = calloc(1,chunksize); + + if (!chunk) + ecoError(ECO_MEM_ERROR,error_message,filename,line); + + if (eco_log_malloc) + fprintf(stderr, + "Memory segment located at %p of size %d is allocated (file : %s [%d])", + chunk, + chunksize, + filename, + line); + + return chunk; +} + +void *eco_realloc(void *chunk, + int32_t newsize, + const char *error_message, + const char *filename, + int32_t line) +{ + void *newchunk; + + newchunk = realloc(chunk,newsize); + + if (!newchunk) + ecoError(ECO_MEM_ERROR,error_message,filename,line); + + if (eco_log_malloc) + fprintf(stderr, + "Old memory segment %p is reallocated at %p with a size of %d (file : %s [%d])", + chunk, + newchunk, + newsize, + filename, + line); + + return newchunk; +} + +void eco_free(void *chunk, + const char *error_message, + const char *filename, + int32_t line) +{ + free(chunk); + + if (eco_log_malloc) + fprintf(stderr, + "Memory segment %p is released => %s (file : %s [%d])", + chunk, + error_message, + filename, + line); +} diff --git a/src/libecoPCR/ecoPCR.h b/src/libecoPCR/ecoPCR.h new file mode 100644 index 0000000..424f339 --- /dev/null +++ b/src/libecoPCR/ecoPCR.h @@ -0,0 +1,229 @@ +#ifndef ECOPCR_H_ +#define ECOPCR_H_ + +#include +#include + +#ifndef H_apat +#include "../libapat/apat.h" +#endif + +/***************************************************** + * + * Data type declarations + * + *****************************************************/ + +/* + * + * Sequence types + * + */ + +typedef struct { + + int32_t taxid; + char AC[20]; + int32_t DE_length; + int32_t SQ_length; + int32_t CSQ_length; + + char data[1]; + +} ecoseqformat_t; + +typedef struct { + int32_t taxid; + int32_t SQ_length; + char *AC; + char *DE; + char *SQ; +} ecoseq_t; + +/* + * + * Taxonomy taxon types + * + */ + + +typedef struct { + int32_t taxid; + int32_t rank; + int32_t parent; + int32_t namelength; + char name[1]; + +} ecotxformat_t; + +typedef struct { + int32_t taxid; + int32_t rank; + int32_t parent; + char *name; +} ecotx_t; + +typedef struct { + int32_t count; + ecotx_t taxon[1]; +} ecotxidx_t; + + +/* + * + * Taxonomy rank types + * + */ + +typedef struct { + int32_t count; + char* label[1]; +} ecorankidx_t; + +typedef struct { + ecorankidx_t *ranks; + ecotxidx_t *taxons; +} ecotaxonomy_t; + + +/***************************************************** + * + * Function declarations + * + *****************************************************/ + +/* + * + * Low level system functions + * + */ + +int32_t is_big_endian(); +int32_t swap_int32_t(int32_t); + +void *eco_malloc(int32_t chunksize, + const char *error_message, + const char *filename, + int32_t line); + + +void *eco_realloc(void *chunk, + int32_t chunksize, + const char *error_message, + const char *filename, + int32_t line); + +void eco_free(void *chunk, + const char *error_message, + const char *filename, + int32_t line); + +void eco_trace_memory_allocation(); +void eco_untrace_memory_allocation(); + +#define ECOMALLOC(size,error_message) \ + eco_malloc((size),(error_message),__FILE__,__LINE__) + +#define ECOREALLOC(chunk,size,error_message) \ + eco_realloc((chunk),(size),(error_message),__FILE__,__LINE__) + +#define ECOFREE(chunk,error_message) \ + eco_free((chunk),(error_message),__FILE__,__LINE__) + + + + +/* + * + * Error managment + * + */ + + +void ecoError(int32_t,const char*,const char *,int); + +#define ECOERROR(code,message) ecoError((code),(message),__FILE__,__LINE__) + +#define ECO_IO_ERROR (1) +#define ECO_MEM_ERROR (2) +#define ECO_ASSERT_ERROR (3) +#define ECO_NOTFOUND_ERROR (4) + + +/* + * + * Low level Disk access functions + * + */ + +FILE *open_ecorecorddb(const char *filename, + int32_t *sequencecount, + int32_t abort_on_open_error); + +void *read_ecorecord(FILE *,int32_t *recordSize); + + + +/* + * Read function in internal binary format + */ + +FILE *open_ecoseqdb(const char *filename, + int32_t *sequencecount); + +ecoseq_t *readnext_ecoseq(FILE *); + +ecorankidx_t *read_rankidx(const char *filename); + + + /** + * Read taxonomy data as formated by the ecoPCRFormat.py script. + * + * This function is normaly uses internaly by the read_taxonomy + * function and should not be called directly. + * + * @arg filename path to the *.tdx file of the reformated db + * + * @return pointer to a taxonomy index structure + */ + +ecotxidx_t *read_taxonomyidx(const char *filename); + +ecotaxonomy_t *read_taxonomy(const char *prefix); + + +ecoseq_t *ecoseq_iterator(const char *prefix); + + + +ecoseq_t *new_ecoseq(); +int32_t delete_ecoseq(ecoseq_t *); +ecoseq_t *new_ecoseq_with_data( char *AC, + char *DE, + char *SQ, + int32_t taxid + ); + + +int32_t delete_taxon(ecotx_t *taxon); +int32_t delete_taxonomy(ecotxidx_t *index); + + +int32_t rank_index(const char* label,ecorankidx_t* ranks); + +int32_t delete_apatseq(SeqPtr pseq); +PatternPtr buildPattern(const char *pat, int32_t error_max); +PatternPtr complementPattern(PatternPtr pat); + +SeqPtr ecoseq2apatseq(ecoseq_t *in,SeqPtr out); + +char *ecoComplementPattern(char *nucAcSeq); +char *ecoComplementSequence(char *nucAcSeq); +char *getSubSequence(char* nucAcSeq,int32_t begin,int32_t end); + +ecotx_t *eco_getspecies(ecotx_t *taxon,ecotaxonomy_t *taxonomy); +ecotx_t *eco_getgenus(ecotx_t *taxon,ecotaxonomy_t *taxonomy); +ecotx_t *eco_getfamily(ecotx_t *taxon,ecotaxonomy_t *taxonomy); +ecotx_t *eco_getsuperkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy); + +#endif /*ECOPCR_H_*/ diff --git a/src/libecoPCR/ecoapat.c b/src/libecoPCR/ecoapat.c new file mode 100644 index 0000000..05d8bf4 --- /dev/null +++ b/src/libecoPCR/ecoapat.c @@ -0,0 +1,194 @@ +#include "../libapat/libstki.h" +#include "../libapat/apat.h" + +#include "ecoPCR.h" + +#include + +static void EncodeSequence(SeqPtr seq); +static void UpperSequence(char *seq); + +/* -------------------------------------------- */ +/* uppercase sequence */ +/* -------------------------------------------- */ + +#define IS_LOWER(c) (((c) >= 'a') && ((c) <= 'z')) +#define TO_UPPER(c) ((c) - 'a' + 'A') + +void UpperSequence(char *seq) +{ + char *cseq; + + for (cseq = seq ; *cseq ; cseq++) + if (IS_LOWER(*cseq)) + *cseq = TO_UPPER(*cseq); +} + +#undef IS_LOWER +#undef TO_UPPER + + + + +/* -------------------------------------------- */ +/* encode sequence */ +/* IS_UPPER is slightly faster than isupper */ +/* -------------------------------------------- */ + +#define IS_UPPER(c) (((c) >= 'A') && ((c) <= 'Z')) + + + +void EncodeSequence(SeqPtr seq) +{ + int i; + UInt8 *data; + char *cseq; + + data = seq->data; + cseq = seq->cseq; + + while (*cseq) { + + *data++ = (IS_UPPER(*cseq) ? *cseq - 'A' : 0x0); + + cseq++; + } + + for (i = 0 ; i < MAX_PATTERN ; i++) + seq->hitpos[i]->top = seq->hiterr[i]->top = 0; + +} + +#undef IS_UPPER + + +SeqPtr ecoseq2apatseq(ecoseq_t *in,SeqPtr out) +{ + int i; + + if (!out) + { + out = ECOMALLOC(sizeof(Seq), + "Error in Allocation of a new Seq structure"); + + for (i = 0 ; i < MAX_PATTERN ; i++) + { + + if (! (out->hitpos[i] = NewStacki(kMinStackiSize))) + ECOERROR(ECO_MEM_ERROR,"Error in hit stack Allocation"); + + if (! (out->hiterr[i] = NewStacki(kMinStackiSize))) + ECOERROR(ECO_MEM_ERROR,"Error in error stack Allocation"); + } + } + + out->name = in->AC; + out->seqsiz = out->seqlen = in->SQ_length; + + if (!out->data) + { + out->data = ECOMALLOC(out->seqlen *sizeof(UInt8), + "Error in Allocation of a new Seq data member"); + out->datsiz= out->seqlen; + } + else if (out->seqlen >= out->datsiz) + { + out->data = ECOREALLOC(out->data,out->seqlen, + "Error during Seq data buffer realloc"); + out->datsiz= out->seqlen; + } + + out->cseq = in->SQ; + + EncodeSequence(out); + + return out; +} + +int32_t delete_apatseq(SeqPtr pseq) +{ + int i; + + if (pseq) { + + if (pseq->data) + ECOFREE(pseq->data,"Freeing sequence data buffer"); + + for (i = 0 ; i < MAX_PATTERN ; i++) { + if (pseq->hitpos[i]) FreeStacki(pseq->hitpos[i]); + if (pseq->hiterr[i]) FreeStacki(pseq->hiterr[i]); + } + + ECOFREE(pseq,"Freeing apat sequence structure"); + + return 0; + } + + return 1; +} + +PatternPtr buildPattern(const char *pat, int32_t error_max) +{ + PatternPtr pattern; + int32_t patlen; + + pattern = ECOMALLOC(sizeof(Pattern), + "Error in pattern allocation"); + + pattern->ok = Vrai; + pattern->hasIndel= Faux; + pattern->maxerr = error_max; + patlen = strlen(pat); + + pattern->cpat = ECOMALLOC(sizeof(char)*patlen+1, + "Error in sequence pattern allocation"); + + strncpy(pattern->cpat,pat,patlen); + pattern->cpat[patlen]=0; + UpperSequence(pattern->cpat); + + if (!CheckPattern(pattern)) + ECOERROR(ECO_ASSERT_ERROR,"Error in pattern checking"); + + if (! EncodePattern(pattern, dna)) + ECOERROR(ECO_ASSERT_ERROR,"Error in pattern encoding"); + + if (! CreateS(pattern, ALPHA_LEN)) + ECOERROR(ECO_ASSERT_ERROR,"Error in pattern compiling"); + + return pattern; + +} + +PatternPtr complementPattern(PatternPtr pat) +{ + PatternPtr pattern; + + pattern = ECOMALLOC(sizeof(Pattern), + "Error in pattern allocation"); + + pattern->ok = Vrai; + pattern->hasIndel= pat->hasIndel; + pattern->maxerr = pat->maxerr; + pattern->patlen = pat->patlen; + + pattern->cpat = ECOMALLOC(sizeof(char)*(strlen(pat->cpat)+1), + "Error in sequence pattern allocation"); + + strcpy(pattern->cpat,pat->cpat); + + ecoComplementPattern(pattern->cpat); + + if (!CheckPattern(pattern)) + ECOERROR(ECO_ASSERT_ERROR,"Error in pattern checking"); + + if (! EncodePattern(pattern, dna)) + ECOERROR(ECO_ASSERT_ERROR,"Error in pattern encoding"); + + if (! CreateS(pattern, ALPHA_LEN)) + ECOERROR(ECO_ASSERT_ERROR,"Error in pattern compiling"); + + return pattern; + +} \ No newline at end of file diff --git a/src/libecoPCR/ecodna.c b/src/libecoPCR/ecodna.c new file mode 100644 index 0000000..8fc420e --- /dev/null +++ b/src/libecoPCR/ecodna.c @@ -0,0 +1,131 @@ +#include +#include "ecoPCR.h" + +/* + * @doc: DNA alphabet (IUPAC) + */ +#define LX_BIO_DNA_ALPHA "ABCDEFGHIJKLMNOPQRSTUVWXYZ#![]" + +/* + * @doc: complementary DNA alphabet (IUPAC) + */ +#define LX_BIO_CDNA_ALPHA "TVGHEFCDIJMLKNOPQYSAABWXRZ#!][" + + +static char sNuc[] = LX_BIO_DNA_ALPHA; +static char sAnuc[] = LX_BIO_CDNA_ALPHA; + +static char LXBioBaseComplement(char nucAc); +static char *LXBioSeqComplement(char *nucAcSeq); +static char *reverseSequence(char *str,char isPattern); + + +/* ---------------------------- */ + +char LXBioBaseComplement(char nucAc) +{ + char *c; + + if ((c = strchr(sNuc, nucAc))) + return sAnuc[(c - sNuc)]; + else + return nucAc; +} + +/* ---------------------------- */ + +char *LXBioSeqComplement(char *nucAcSeq) +{ + char *s; + + for (s = nucAcSeq ; *s ; s++) + *s = LXBioBaseComplement(*s); + + return nucAcSeq; +} + + +char *reverseSequence(char *str,char isPattern) +{ + char *sb, *se, c; + + if (! str) + return str; + + sb = str; + se = str + strlen(str) - 1; + + while(sb <= se) { + c = *sb; + *sb++ = *se; + *se-- = c; + } + + sb = str; + se = str + strlen(str) - 1; + + if (isPattern) + for (;sb < se; sb++) + { + if (*sb=='#') + { + if (((se - sb) > 2) && (*(sb+2)=='!')) + { + *sb='!'; + sb+=2; + *sb='#'; + } + else + { + *sb=*(sb+1); + sb++; + *sb='#'; + } + } + else if (*sb=='!') + { + *sb=*(sb-1); + *(sb-1)='!'; + } + } + + return str; +} + +char *ecoComplementPattern(char *nucAcSeq) +{ + return reverseSequence(LXBioSeqComplement(nucAcSeq),1); +} + +char *ecoComplementSequence(char *nucAcSeq) +{ + return reverseSequence(LXBioSeqComplement(nucAcSeq),0); +} + + +char *getSubSequence(char* nucAcSeq,int32_t begin,int32_t end) +{ + static char *buffer = NULL; + static int32_t buffSize= 0; + int32_t length; + + length = end - begin; + + if (length >= buffSize) + { + buffSize = length+1; + if (buffer) + buffer=ECOREALLOC(buffer,buffSize, + "Error in reallocating sub sequence buffer"); + else + buffer=ECOMALLOC(buffSize, + "Error in allocating sub sequence buffer"); + + } + + strncpy(buffer,nucAcSeq + begin,length); + buffer[length]=0; + + return buffer; +} + diff --git a/src/libecoPCR/ecorank.c b/src/libecoPCR/ecorank.c new file mode 100644 index 0000000..4796088 --- /dev/null +++ b/src/libecoPCR/ecorank.c @@ -0,0 +1,52 @@ +#include "ecoPCR.h" +#include +#include + +static int compareRankLabel(const void *label1, const void *label2); + +ecorankidx_t *read_rankidx(const char *filename) +{ + int32_t count; + FILE *f; + ecorankidx_t *index; + int32_t i; + int32_t rs; + char *buffer; + + f = open_ecorecorddb(filename,&count,1); + + index = (ecorankidx_t*) ECOMALLOC(sizeof(ecorankidx_t) + sizeof(char*) * (count-1), + "Allocate rank index"); + + index->count=count; + + for (i=0; i < count; i++) + { + buffer = read_ecorecord(f,&rs); + index->label[i]=(char*) ECOMALLOC(rs+1, + "Allocate rank label"); + strncpy(index->label[i],buffer,rs); + } + + return index; +} + +int32_t rank_index(const char* label,ecorankidx_t* ranks) +{ + char **rep; + + rep = bsearch(label,ranks->label,ranks->count,sizeof(char*),compareRankLabel); + + if (rep) + return rep-ranks->label; + else + ECOERROR(ECO_NOTFOUND_ERROR,"Rank label not found"); + + return -1; +} + + +int compareRankLabel(const void *label1, const void *label2) +{ + return strcmp((const char*)label1,*(const char**)label2); +} diff --git a/src/libecoPCR/ecoseq.c b/src/libecoPCR/ecoseq.c new file mode 100644 index 0000000..2b5b56a --- /dev/null +++ b/src/libecoPCR/ecoseq.c @@ -0,0 +1,205 @@ +#include "ecoPCR.h" +#include +#include +#include +#include +#include + +static FILE *open_seqfile(const char *prefix,int32_t index); + + +ecoseq_t *new_ecoseq() +{ + void *tmp; + + tmp = ECOMALLOC(sizeof(ecoseq_t),"Allocate new ecoseq structure"); + + return tmp; +} + +int32_t delete_ecoseq(ecoseq_t * seq) +{ + + if (seq) + { + if (seq->AC) + ECOFREE(seq->AC,"Free sequence AC"); + + if (seq->DE) + ECOFREE(seq->DE,"Free sequence DE"); + + if (seq->SQ) + ECOFREE(seq->SQ,"Free sequence SQ"); + + ECOFREE(seq,"Free sequence structure"); + + return 0; + + } + + return 1; +} + +ecoseq_t *new_ecoseq_with_data( char *AC, + char *DE, + char *SQ, + int32_t taxid_idx + ) +{ + ecoseq_t *tmp; + int32_t lstr; + tmp = new_ecoseq(); + + tmp->taxid=taxid_idx; + + if (AC) + { + lstr =strlen(AC); + tmp->AC=ECOMALLOC((lstr+1) * sizeof(char), + "Allocate sequence accession"); + strcpy(tmp->AC,AC); + } + + if (DE) + { + lstr =strlen(DE); + tmp->DE=ECOMALLOC((lstr+1) * sizeof(char), + "Allocate sequence definition"); + strcpy(tmp->DE,DE); + } + + if (SQ) + { + lstr =strlen(SQ); + tmp->SQ=ECOMALLOC((lstr+1) * sizeof(char), + "Allocate sequence data"); + strcpy(tmp->SQ,SQ); + } + return tmp; + +} + + +FILE *open_ecoseqdb(const char *filename, + int32_t *sequencecount) +{ + return open_ecorecorddb(filename,sequencecount,1); +} + +ecoseq_t *readnext_ecoseq(FILE *f) +{ + char *compressed=NULL; + + ecoseqformat_t *raw; + ecoseq_t *seq; + int32_t comp_status; + unsigned long int seqlength; + int32_t rs; + + raw = read_ecorecord(f,&rs); + + if (!raw) + return NULL; + + if (is_big_endian()) + { + raw->CSQ_length = swap_int32_t(raw->CSQ_length); + raw->DE_length = swap_int32_t(raw->DE_length); + raw->SQ_length = swap_int32_t(raw->SQ_length); + raw->taxid = swap_int32_t(raw->taxid); + } + + seq = new_ecoseq(); + + seq->taxid = raw->taxid; + + seq->AC = ECOMALLOC(strlen(raw->AC) +1, + "Allocate Sequence Accesion number"); + strncpy(seq->AC,raw->AC,strlen(raw->AC)); + + + seq->DE = ECOMALLOC(raw->DE_length+1, + "Allocate Sequence definition"); + strncpy(seq->DE,raw->data,raw->DE_length); + + seqlength = seq->SQ_length = raw->SQ_length; + + compressed = raw->data + raw->DE_length; + + seq->SQ = ECOMALLOC(seqlength+1, + "Allocate sequence buffer"); + + comp_status = uncompress((unsigned char*)seq->SQ, + &seqlength, + (unsigned char*)compressed, + raw->CSQ_length); + + if (comp_status != Z_OK) + ECOERROR(ECO_IO_ERROR,"I cannot uncompress sequence data"); + + return seq; +} + +FILE *open_seqfile(const char *prefix,int32_t index) +{ + char filename_buffer[1024]; + int32_t filename_length; + FILE *input; + int32_t seqcount; + + filename_length = snprintf(filename_buffer, + 1023, + "%s_%03d.sdx", + prefix, + index); + + if (filename_length >= 1024) + ECOERROR(ECO_ASSERT_ERROR,"file name is too long"); + + filename_buffer[filename_length]=0; + + input=open_ecorecorddb(filename_buffer,&seqcount,0); + + if (input) + fprintf(stderr,"Reading file %s containing %d sequences...\n", + filename_buffer, + seqcount); + + return input; +} + +ecoseq_t *ecoseq_iterator(const char *prefix) +{ + static FILE *current_seq_file= NULL; + static int32_t current_file_idx = 1; + ecoseq_t *seq; + + if (prefix) + { + current_file_idx = 1; + + if (current_seq_file) + fclose(current_seq_file); + + current_seq_file = open_seqfile(prefix, + current_file_idx); + + if (!current_seq_file) + return NULL; + + } + + seq = readnext_ecoseq(current_seq_file); + + if (!seq && feof(current_seq_file)) + { + current_file_idx++; + fclose(current_seq_file); + current_seq_file = open_seqfile(prefix, + current_file_idx); + if (current_seq_file) + seq = readnext_ecoseq(current_seq_file); + } + + return seq; +} \ No newline at end of file diff --git a/src/libecoPCR/ecotax.c b/src/libecoPCR/ecotax.c new file mode 100644 index 0000000..2d3c8d8 --- /dev/null +++ b/src/libecoPCR/ecotax.c @@ -0,0 +1,234 @@ +#include "ecoPCR.h" +#include +#include +#include + +static ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon); + + +ecotxidx_t *read_taxonomyidx(const char *filename) +{ + int32_t count; + FILE *f; + ecotxidx_t *index; + int32_t i; + + f = open_ecorecorddb(filename,&count,1); + + index = (ecotxidx_t*) ECOMALLOC(sizeof(ecotxidx_t) + sizeof(ecotx_t) * (count-1), + "Allocate taxonomy"); + + index->count=count; + + for (i=0; i < count; i++) + readnext_ecotaxon(f,&(index->taxon[i])); + + return index; +} + +int32_t delete_taxonomy(ecotxidx_t *index) +{ + int32_t i; + + if (index) + { + for (i=0; i< index->count; i++) + if (index->taxon[i].name) + ECOFREE(index->taxon[i].name,"Free scientific name"); + + ECOFREE(index,"Free Taxonomy"); + + return 0; + } + + return 1; +} + + + +int32_t delete_taxon(ecotx_t *taxon) +{ + if (taxon) + { + if (taxon->name) + ECOFREE(taxon->name,"Free scientific name"); + + ECOFREE(taxon,"Free Taxon"); + + return 0; + } + + return 1; +} + +ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon) +{ + + ecotxformat_t *raw; + int32_t rs; + + raw = read_ecorecord(f,&rs); + + if (!raw) + return NULL; + + if (is_big_endian()) + { + raw->namelength = swap_int32_t(raw->namelength); + raw->parent = swap_int32_t(raw->parent); + raw->rank = swap_int32_t(raw->rank); + raw->taxid = swap_int32_t(raw->taxid); + } + + taxon->parent = raw->parent; + taxon->taxid = raw->taxid; + taxon->rank = raw->rank; + + taxon->name = ECOMALLOC((raw->namelength+1) * sizeof(char), + "Allocate taxon scientific name"); + + strncpy(taxon->name,raw->name,raw->namelength); + + return taxon; +} + + +ecotaxonomy_t *read_taxonomy(const char *prefix) +{ + ecotaxonomy_t *tax; + char *filename; + int buffsize; + + tax = ECOMALLOC(sizeof(ecotaxonomy_t), + "Allocate taxonomy structure"); + + buffsize = strlen(prefix)+10; + + filename = ECOMALLOC(buffsize, + "Allocate filename"); + + snprintf(filename,buffsize,"%s.rdx",prefix); + + tax->ranks = read_rankidx(filename); + + snprintf(filename,buffsize,"%s.tdx",prefix); + + tax->taxons = read_taxonomyidx(filename); + + return tax; + +} + +int32_t delete_ecotaxonomy(ecotaxonomy_t *taxonomy) +{ + if (taxonomy) + { + if (taxonomy->ranks) + ECOFREE(taxonomy->ranks,"Free rank index"); + + if (taxonomy->taxons) + ECOFREE(taxonomy->taxons,"Free taxon index"); + + ECOFREE(taxonomy,"Free taxonomy structure"); + + return 0; + } + + return 1; +} + +ecotx_t *eco_findtaxonatrank(ecotx_t *taxon, + int32_t rankidx, + ecotaxonomy_t *taxonomy) +{ + ecotx_t *current_taxon; + ecotx_t *next_taxon; + + current_taxon = taxon; + next_taxon = &(taxonomy->taxons->taxon[current_taxon->parent]); + + while ((current_taxon!=next_taxon) && + (current_taxon->rank!=rankidx)) + { + current_taxon = next_taxon; + next_taxon = &(taxonomy->taxons->taxon[current_taxon->parent]); + } + + if (current_taxon->rank==rankidx) + return current_taxon; + else + return NULL; +} + +ecotx_t *eco_getspecies(ecotx_t *taxon, + ecotaxonomy_t *taxonomy) +{ + static ecotaxonomy_t *tax=NULL; + static int32_t rankindex=-1; + + if (taxonomy && tax!=taxonomy) + { + rankindex = rank_index("species",taxonomy->ranks); + tax=taxonomy; + } + + if (!tax || rankindex < 0) + ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined"); + + return eco_findtaxonatrank(taxon,rankindex,tax); +} + +ecotx_t *eco_getgenus(ecotx_t *taxon, + ecotaxonomy_t *taxonomy) +{ + static ecotaxonomy_t *tax=NULL; + static int32_t rankindex=-1; + + if (taxonomy && tax!=taxonomy) + { + rankindex = rank_index("genus",taxonomy->ranks); + tax=taxonomy; + } + + if (!tax || rankindex < 0) + ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined"); + + return eco_findtaxonatrank(taxon,rankindex,tax); +} + + +ecotx_t *eco_getfamily(ecotx_t *taxon, + ecotaxonomy_t *taxonomy) +{ + static ecotaxonomy_t *tax=NULL; + static int32_t rankindex=-1; + + if (taxonomy && tax!=taxonomy) + { + rankindex = rank_index("family",taxonomy->ranks); + tax=taxonomy; + } + + if (!tax || rankindex < 0) + ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined"); + + return eco_findtaxonatrank(taxon,rankindex,tax); +} + +ecotx_t *eco_getsuperkingdom(ecotx_t *taxon, + ecotaxonomy_t *taxonomy) +{ + static ecotaxonomy_t *tax=NULL; + static int32_t rankindex=-1; + + if (taxonomy && tax!=taxonomy) + { + rankindex = rank_index("superkingdom",taxonomy->ranks); + tax=taxonomy; + } + + if (!tax || rankindex < 0) + ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined"); + + return eco_findtaxonatrank(taxon,rankindex,tax); +} \ No newline at end of file diff --git a/tests/ecodb.rdx b/tests/ecodb.rdx new file mode 100644 index 0000000..db10ce6 Binary files /dev/null and b/tests/ecodb.rdx differ diff --git a/tests/ecodb.tdx b/tests/ecodb.tdx new file mode 100644 index 0000000..9cd8879 Binary files /dev/null and b/tests/ecodb.tdx differ diff --git a/tests/ecodb_001.sdx b/tests/ecodb_001.sdx new file mode 100644 index 0000000..7f73e97 Binary files /dev/null and b/tests/ecodb_001.sdx differ diff --git a/tools/ecoPCRFormat.py b/tools/ecoPCRFormat.py new file mode 100755 index 0000000..1de51e8 --- /dev/null +++ b/tools/ecoPCRFormat.py @@ -0,0 +1,447 @@ +#!/usr/bin/env python + +import re +import gzip +import psycopg2 +import struct +import sys +import time +import getopt + +##### +# +# +# Generic file function +# +# +##### + +def universalOpen(file): + if isinstance(file,str): + if file[-3:] == '.gz': + rep = gzip.open(file) + else: + rep = open(file) + else: + rep = file + return rep + +def universalTell(file): + if isinstance(file, gzip.GzipFile): + file=file.myfileobj + return file.tell() + +def fileSize(file): + if isinstance(file, gzip.GzipFile): + file=file.myfileobj + pos = file.tell() + file.seek(0,2) + length = file.tell() + file.seek(pos,0) + return length + +def progressBar(pos,max,reset=False,delta=[]): + if reset: + del delta[:] + if not delta: + delta.append(time.time()) + delta.append(time.time()) + + delta[1]=time.time() + elapsed = delta[1]-delta[0] + percent = float(pos)/max * 100 + remain = time.strftime('%H:%M:%S',time.gmtime(elapsed / percent * (100-percent))) + bar = '#' * int(percent/2) + bar+= '|/-\\-'[pos % 5] + bar+= ' ' * (50 - int(percent/2)) + sys.stderr.write('\r%5.1f %% |%s] remain : %s' %(percent,bar,remain)) + +##### +# +# +# NCBI Dump Taxonomy reader +# +# +##### + +def endLessIterator(endedlist): + for x in endedlist: + yield x + while(1): + yield endedlist[-1] + + +class ColumnFile(object): + + def __init__(self,stream,sep=None,strip=True,types=None): + if isinstance(stream,str): + self._stream = open(stream) + elif hasattr(stream,'next'): + self._stream = stream + else: + raise ValueError,'stream must be string or an iterator' + self._delimiter=sep + self._strip=strip + if types: + self._types=[x for x in types] + for i in xrange(len(self._types)): + if self._types[i] is bool: + self._types[i]=ColumnFile.str2bool + else: + self._types=None + + def str2bool(x): + return bool(eval(x.strip()[0].upper(),{'T':True,'V':True,'F':False})) + + str2bool = staticmethod(str2bool) + + + def __iter__(self): + return self + + def next(self): + ligne = self._stream.next() + data = ligne.split(self._delimiter) + if self._strip or self._types: + data = [x.strip() for x in data] + if self._types: + it = endLessIterator(self._types) + data = [x[1](x[0]) for x in ((y,it.next()) for y in data)] + return data + +def taxonCmp(t1,t2): + if t1[0] < t2[0]: + return -1 + elif t1[0] > t2[0]: + return +1 + return 0 + +def bsearchTaxon(taxonomy,taxid): + taxCount = len(taxonomy) + begin = 0 + end = taxCount + oldcheck=taxCount + check = begin + end / 2 + while check != oldcheck and taxonomy[check][0]!=taxid : + if taxonomy[check][0] < taxid: + begin=check + else: + end=check + oldcheck=check + check = (begin + end) / 2 + + + if taxonomy[check][0]==taxid: + return check + else: + return None + + + +def readNodeTable(file): + + file = universalOpen(file) + + nodes = ColumnFile(file, + sep='|', + types=(int,int,str, + str,str,bool, + int,bool,int, + bool,bool,bool,str)) + print >>sys.stderr,"Reading taxonomy dump file..." + taxonomy=[[n[0],n[2],n[1]] for n in nodes] + + print >>sys.stderr,"List all taxonomy rank..." + ranks =list(set(x[1] for x in taxonomy)) + ranks.sort() + ranks = dict(map(None,ranks,xrange(len(ranks)))) + + print >>sys.stderr,"Sorting taxons..." + taxonomy.sort(taxonCmp) + + print >>sys.stderr,"Indexing taxonomy..." + index = {} + for t in taxonomy: + index[t[0]]=bsearchTaxon(taxonomy, t[0]) + + print >>sys.stderr,"Indexing parent and rank..." + for t in taxonomy: + t[1]=ranks[t[1]] + t[2]=index[t[2]] + + + return taxonomy,ranks,index + +def scientificNameIterator(file): + file = universalOpen(file) + names = ColumnFile(file, + sep='|', + types=(int,str, + str,str)) + for taxid,name,unique,classname,white in names: + if classname == 'scientific name': + yield taxid,name + +def mergedNodeIterator(file): + file = universalOpen(file) + merged = ColumnFile(file, + sep='|', + types=(int,int,str)) + for taxid,current,white in merged: + yield taxid,current + +def deletedNodeIterator(file): + file = universalOpen(file) + deleted = ColumnFile(file, + sep='|', + types=(int,str)) + for taxid,white in deleted: + yield taxid + +def readTaxonomyDump(taxdir): + taxonomy,ranks,index = readNodeTable('%s/nodes.dmp' % taxdir) + + print >>sys.stderr,"Adding scientific name..." + for taxid,name in scientificNameIterator('%s/names.dmp' % taxdir): + taxonomy[index[taxid]].append(name) + + print >>sys.stderr,"Adding taxid alias..." + for taxid,current in mergedNodeIterator('%s/merged.dmp' % taxdir): + index[taxid]=index[current] + + print >>sys.stderr,"Adding deleted taxid..." + for taxid in deletedNodeIterator('%s/delnodes.dmp' % taxdir): + index[taxid]=None + + return taxonomy,ranks,index + + +##### +# +# +# Genbank/EMBL sequence reader +# +# +##### + +def entryIterator(file): + file = universalOpen(file) + rep =[] + for ligne in file: + rep.append(ligne) + if ligne == '//\n': + rep = ''.join(rep) + yield rep + rep = [] + +_cleanSeq = re.compile('[ \n0-9]+') + +def cleanSeq(seq): + return _cleanSeq.sub('',seq) + + +_gbParseID = re.compile('(?<=^LOCUS {7})[^ ]+(?= )',re.MULTILINE) +_gbParseDE = re.compile('(?<=^DEFINITION {2}).+?\. *$(?=[^ ])',re.MULTILINE+re.DOTALL) +_gbParseSQ = re.compile('(?<=^ORIGIN).+?(?=^//$)',re.MULTILINE+re.DOTALL) +_gbParseTX = re.compile('(?<= /db_xref="taxon:)[0-9]+(?=")') + +def genbankParser(entry): + Id = _gbParseID.findall(entry)[0] + De = ' '.join(_gbParseDE.findall(entry)[0].split()) + Sq = cleanSeq(_gbParseSQ.findall(entry)[0].upper()) + Tx = int(_gbParseTX.findall(entry)[0]) + return {'id':Id,'taxid':Tx,'definition':De,'sequence':Sq} + +def sequenceIterator(file,parser): + for entry in entryIterator(file): + yield parser(entry) + +def taxonomyInfo(entry,connection): + taxid = entry['taxid'] + curseur = connection.cursor() + curseur.execute(""" + select taxid,species,genus,family, + taxonomy.scientificName(taxid) as sn, + taxonomy.scientificName(species) as species_sn, + taxonomy.scientificName(genus) as genus_sn, + taxonomy.scientificName(family) as family_sn + from + ( + select alias as taxid, + taxonomy.getSpecies(alias) as species, + taxonomy.getGenus(alias) as genus, + taxonomy.getFamily(alias) as family + from taxonomy.aliases + where id=%d ) as tax + """ % taxid) + rep = curseur.fetchone() + entry['current_taxid']=rep[0] + entry['species']=rep[1] + entry['genus']=rep[2] + entry['family']=rep[3] + entry['scientific_name']=rep[4] + entry['species_sn']=rep[5] + entry['genus_sn']=rep[6] + entry['family_sn']=rep[7] + return entry + +##### +# +# +# Binary writer +# +# +##### + +def ecoSeqPacker(sq): + + compactseq = gzip.zlib.compress(sq['sequence'],9) + cptseqlength = len(compactseq) + delength = len(sq['definition']) + + totalSize = 4 + 20 + 4 + 4 + 4 + cptseqlength + delength + + packed = struct.pack('> I I 20s I I I %ds %ds' % (delength,cptseqlength), + totalSize, + sq['taxid'], + sq['id'], + delength, + len(sq['sequence']), + cptseqlength, + sq['definition'], + compactseq) + + assert len(packed) == totalSize+4, "error in sequence packing" + + return packed + +def ecoTaxPacker(tx): + + namelength = len(tx[3]) + + totalSize = 4 + 4 + 4 + 4 + namelength + + packed = struct.pack('> I I I I I %ds' % namelength, + totalSize, + tx[0], + tx[1], + tx[2], + namelength, + tx[3]) + + return packed + +def ecoRankPacker(rank): + + namelength = len(rank) + + packed = struct.pack('> I %ds' % namelength, + namelength, + rank) + + return packed + + +def ecoSeqWriter(file,input,taxindex,parser=genbankParser): + output = open(file,'wb') + input = universalOpen(input) + inputsize = fileSize(input) + entries = sequenceIterator(input, parser) + seqcount=0 + skipped = [] + + output.write(struct.pack('> I',seqcount)) + + progressBar(1, inputsize,reset=True) + for entry in entries: + entry['taxid']=taxindex[entry['taxid']] + if entry['taxid'] is not None: + seqcount+=1 + output.write(ecoSeqPacker(entry)) + else: + skipped.append[entry['id']] + where = universalTell(input) + progressBar(where, inputsize) + print >>sys.stderr," Read sequences : %d " % seqcount, + + print >>sys.stderr + output.seek(0,0) + output.write(struct.pack('> I',seqcount)) + + output.close() + return skipped + + +def ecoTaxWriter(file,taxonomy): + output = open(file,'wb') + output.write(struct.pack('> I',len(taxonomy))) + + for tx in taxonomy: + output.write(ecoTaxPacker(tx)) + + output.close() + +def ecoRankWriter(file,ranks): + output = open(file,'wb') + output.write(struct.pack('> I',len(ranks))) + + rankNames = ranks.keys() + rankNames.sort() + + for rank in rankNames: + output.write(ecoRankPacker(rank)) + + output.close() + +def ecoDBWriter(prefix,taxonomy,seqFileNames,parser=genbankParser): + + ecoRankWriter('%s.rdx' % prefix, taxonomy[1]) + ecoTaxWriter('%s.tdx' % prefix, taxonomy[0]) + + filecount = 0 + for filename in seqFileNames: + filecount+=1 + sk=ecoSeqWriter('%s_%03d.sdx' % (prefix,filecount), + filename, + taxonomy[2], + parser) + if sk: + print >>sys.stderr,"Skipped entry :" + print >>sys.stderr,sk + +def ecoParseOptions(arguments): + opt = { + 'prefix' : 'ecodb', + 'taxdir' : 'taxdump', + 'parser' : genbankParser + } + + o,filenames = getopt.getopt(arguments, + 'ht:n:g', + ['help', + 'taxonomy=', + 'name=', + 'genbank']) + + for name,value in o: + if name in ('-h','--help'): + pass + elif name in ('-t','--taxonomy'): + opt['taxdir']=value + elif name in ('-n','--name'): + opt['prefix']=value + elif name in ('-g','--genbank'): + opt['parser']=genbankParser + else: + raise ValueError,'Unknown option %s' % name + + return opt,filenames + +if __name__ == '__main__': + + opt,filenames = ecoParseOptions(sys.argv[1:]) + + taxonomy = readTaxonomyDump(opt['taxdir']) + + ecoDBWriter(opt['prefix'], taxonomy, filenames, opt['parser']) +