Initial version of ecoPCR

git-svn-id: https://www.grenoble.prabi.fr/svn/LECASofts/ecoPCR/trunk@5 60f365c0-8329-0410-b2a4-ec073aeeaa1d
2007-02-15 13:33:32 +00:00
parent f3d2273d8d
commit ebe8c0b516
39 changed files with 5568 additions and 0 deletions
--- a/Debug/.gdb_history
+++ b/Debug/.gdb_history
@ -0,0 +1,62 @@
+r  -1acghtgd -2gctcagctagcta /Users/coissac/encours/ecoPCR/tools/ecodb 
+print put
+print out
+f ecoPCR
+r  -1GGGCAATCCTGAGCCAA -2CCATTGAGTCTCTGCACCTATC -l5 -L200 -e2 /Users/coissac/encours/ecoPCR/tools/ecodb
+bt
+print label1
+up
+print label1
+print (char*)label1
+r  -1GGGCAATCCTGAGCCAA -2CCATTGAGTCTCTGCACCTATC -l5 -L200 -e2 /Users/coissac/encours/ecoPCR/tools/ecodb
+bt
+up 3
+l
+b 38
+r
+r  -1GGGCAATCCTGAGCCAA -2CCATTGAGTCTCTGCACCTATC -l5 -L200 -e2 /Users/coissac/encours/ecoPCR/tools/ecodb
+print label
+print ranks
+print *ranks
+print ranks->label +1
+print *ranks->label +1
+print *(ranks->label +1)
+print *(ranks->label +2)
+b ecotax.c:147
+r  -1GGGCAATCCTGAGCCAA -2CCATTGAGTCTCTGCACCTATC -l5 -L200 -e2 /Users/coissac/encours/ecoPCR/tools/ecodb
+print current_taxon ;
+print current_taxon 
+n
+print current_taxon ;
+print current_taxon 
+print *current_taxon 
+print taxonomy->taxons[11]
+print taxonomy->taxons[2952]
+print taxonomy->taxons->taxon[2952]
+print taxonomy->ranks->rank[11]
+print taxonomy->ranks->label[11]
+print taxonomy->ranks->label[3]
+b ecotax.c:147
+r  -1GGGCAATCCTGAGCCAA -2CCATTGAGTCTCTGCACCTATC -l5 -L200 -e2 /Users/coissac/encours/ecoPCR/tools/ecodb
+print current_taxon 
+print *current_taxon 
+n
+print *current_taxon 
+b ecotax.c:147
+r  -1GGGCAATCCTGAGCCAA -2CCATTGAGTCTCTGCACCTATC -l5 -L200 -e2 /Users/coissac/encours/ecoPCR/tools/ecodb
+n
+print *current_taxon 
+r  -1GGGCAATCCTGAGCCAA -2CCATTGAGTCTCTGCACCTATC -l5 -L200 -e2 /Users/coissac/encours/ecoPCR/tools/ecodb
+n
+print *current_taxon 
+b ecodna.c:70
+r  -1GGGCAATCCTGAGCC#A#A# -2CCATTGAGTCTCTGCACCTA#T#C# -l5 -L200 -e2 /Users/coissac/encours/ecoPCR/tools/ecodb
+*sb
+print *sb
+print sb
+print str
+b 52
+r  -1GGGCAATCCTGAGCC#A#A# -2CCATTGAGTCTCTGCACCTA#T#C# -l5 -L200 -e2 /Users/coissac/encours/ecoPCR/tools/ecodb
+print str
+r  -1GGGCAATCCTGAGCC#A#A# -2CCATTGAGTCTCTGCACCTA#T#C# -l5 -L200 -e2 /Users/coissac/encours/ecoPCR/tools/ecodb
+bt
--- a/Debug/makefile
+++ b/Debug/makefile
@ -0,0 +1,46 @@
+################################################################################
+# Automatically-generated file. Do not edit!
+################################################################################
+
+-include ../makefile.init
+
+RM := rm -rf
+
+# All of the sources participating in the build are defined here
+-include sources.mk
+-include subdir.mk
+-include src/libecoPCR/subdir.mk
+-include src/libapat/subdir.mk
+-include src/subdir.mk
+-include objects.mk
+
+ifneq ($(MAKECMDGOALS),clean)
+ifneq ($(strip $(C_DEPS)),)
+-include $(C_DEPS)
+endif
+endif
+
+-include ../makefile.defs
+
+# Add inputs and outputs from these tool invocations to the build variables 
+
+# All Target
+all: ecoPCR
+
+# Tool invocations
+ecoPCR: $(OBJS) $(USER_OBJS)
+	@echo 'Building target: $@'
+	@echo 'Invoking: MacOS X C Linker'
+	gcc  -o "ecoPCR" $(OBJS) $(USER_OBJS) $(LIBS)
+	@echo 'Finished building target: $@'
+	@echo ' '
+
+# Other Targets
+clean:
+	-$(RM) $(OBJS)$(EXECUTABLES)$(C_DEPS) ecoPCR
+	-@echo ' '
+
+.PHONY: all clean dependents
+.SECONDARY:
+
+-include ../makefile.targets
--- a/Debug/objects.mk
+++ b/Debug/objects.mk
@ -0,0 +1,7 @@
+################################################################################
+# Automatically-generated file. Do not edit!
+################################################################################
+
+LIBS := -lz
+
+USER_OBJS :=
--- a/Debug/sources.mk
+++ b/Debug/sources.mk
@ -0,0 +1,19 @@
+################################################################################
+# Automatically-generated file. Do not edit!
+################################################################################
+
+C_SRCS := 
+O_SRCS := 
+ASM_SRCS := 
+S_SRCS := 
+OBJ_SRCS := 
+OBJS := 
+EXECUTABLES := 
+C_DEPS := 
+
+# Every subdirectory with source files must be described here
+SUBDIRS := \
+src/libecoPCR \
+src/libapat \
+src \
+
--- a/Debug/src/libapat/CODES/subdir.mk
+++ b/Debug/src/libapat/CODES/subdir.mk
@ -0,0 +1,24 @@
+################################################################################
+# Automatically-generated file. Do not edit!
+################################################################################
+
+# Add inputs and outputs from these tool invocations to the build variables 
+C_SRCS += \
+../src/libapat/CODES/gendual.c 
+
+OBJS += \
+./src/libapat/CODES/gendual.o 
+
+C_DEPS += \
+./src/libapat/CODES/gendual.d 
+
+
+# Each subdirectory must supply rules for building sources it contributes
+src/libapat/CODES/%.o: ../src/libapat/CODES/%.c
+	@echo 'Building file: $<'
+	@echo 'Invoking: GCC C Compiler'
+	gcc -DMAC_OS_X -O0 -g3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<"
+	@echo 'Finished building: $<'
+	@echo ' '
+
+
--- a/Debug/src/libapat/subdir.mk
+++ b/Debug/src/libapat/subdir.mk
@ -0,0 +1,30 @@
+################################################################################
+# Automatically-generated file. Do not edit!
+################################################################################
+
+# Add inputs and outputs from these tool invocations to the build variables 
+C_SRCS += \
+../src/libapat/apat_parse.c \
+../src/libapat/apat_search.c \
+../src/libapat/libstki.c 
+
+OBJS += \
+./src/libapat/apat_parse.o \
+./src/libapat/apat_search.o \
+./src/libapat/libstki.o 
+
+C_DEPS += \
+./src/libapat/apat_parse.d \
+./src/libapat/apat_search.d \
+./src/libapat/libstki.d 
+
+
+# Each subdirectory must supply rules for building sources it contributes
+src/libapat/%.o: ../src/libapat/%.c
+	@echo 'Building file: $<'
+	@echo 'Invoking: GCC C Compiler'
+	gcc -DMAC_OS_X -O0 -g3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<"
+	@echo 'Finished building: $<'
+	@echo ' '
+
+
--- a/Debug/src/libecoPCR/subdir.mk
+++ b/Debug/src/libecoPCR/subdir.mk
@ -0,0 +1,45 @@
+################################################################################
+# Automatically-generated file. Do not edit!
+################################################################################
+
+# Add inputs and outputs from these tool invocations to the build variables 
+C_SRCS += \
+../src/libecoPCR/ecoError.c \
+../src/libecoPCR/ecoIOUtils.c \
+../src/libecoPCR/ecoMalloc.c \
+../src/libecoPCR/ecoapat.c \
+../src/libecoPCR/ecodna.c \
+../src/libecoPCR/ecorank.c \
+../src/libecoPCR/ecoseq.c \
+../src/libecoPCR/ecotax.c 
+
+OBJS += \
+./src/libecoPCR/ecoError.o \
+./src/libecoPCR/ecoIOUtils.o \
+./src/libecoPCR/ecoMalloc.o \
+./src/libecoPCR/ecoapat.o \
+./src/libecoPCR/ecodna.o \
+./src/libecoPCR/ecorank.o \
+./src/libecoPCR/ecoseq.o \
+./src/libecoPCR/ecotax.o 
+
+C_DEPS += \
+./src/libecoPCR/ecoError.d \
+./src/libecoPCR/ecoIOUtils.d \
+./src/libecoPCR/ecoMalloc.d \
+./src/libecoPCR/ecoapat.d \
+./src/libecoPCR/ecodna.d \
+./src/libecoPCR/ecorank.d \
+./src/libecoPCR/ecoseq.d \
+./src/libecoPCR/ecotax.d 
+
+
+# Each subdirectory must supply rules for building sources it contributes
+src/libecoPCR/%.o: ../src/libecoPCR/%.c
+	@echo 'Building file: $<'
+	@echo 'Invoking: GCC C Compiler'
+	gcc -DMAC_OS_X -O0 -g3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<"
+	@echo 'Finished building: $<'
+	@echo ' '
+
+
--- a/Debug/src/subdir.mk
+++ b/Debug/src/subdir.mk
@ -0,0 +1,24 @@
+################################################################################
+# Automatically-generated file. Do not edit!
+################################################################################
+
+# Add inputs and outputs from these tool invocations to the build variables 
+C_SRCS += \
+../src/ecopcr.c 
+
+OBJS += \
+./src/ecopcr.o 
+
+C_DEPS += \
+./src/ecopcr.d 
+
+
+# Each subdirectory must supply rules for building sources it contributes
+src/%.o: ../src/%.c
+	@echo 'Building file: $<'
+	@echo 'Invoking: GCC C Compiler'
+	gcc -DMAC_OS_X -O0 -g3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<"
+	@echo 'Finished building: $<'
+	@echo ' '
+
+
--- a/Release/makefile
+++ b/Release/makefile
@ -0,0 +1,46 @@
+################################################################################
+# Automatically-generated file. Do not edit!
+################################################################################
+
+-include ../makefile.init
+
+RM := rm -rf
+
+# All of the sources participating in the build are defined here
+-include sources.mk
+-include subdir.mk
+-include src/libecoPCR/subdir.mk
+-include src/libapat/subdir.mk
+-include src/subdir.mk
+-include objects.mk
+
+ifneq ($(MAKECMDGOALS),clean)
+ifneq ($(strip $(C_DEPS)),)
+-include $(C_DEPS)
+endif
+endif
+
+-include ../makefile.defs
+
+# Add inputs and outputs from these tool invocations to the build variables 
+
+# All Target
+all: ecoPCR
+
+# Tool invocations
+ecoPCR: $(OBJS) $(USER_OBJS)
+	@echo 'Building target: $@'
+	@echo 'Invoking: MacOS X C Linker'
+	gcc  -o "ecoPCR" $(OBJS) $(USER_OBJS) $(LIBS)
+	@echo 'Finished building target: $@'
+	@echo ' '
+
+# Other Targets
+clean:
+	-$(RM) $(OBJS)$(EXECUTABLES)$(C_DEPS) ecoPCR
+	-@echo ' '
+
+.PHONY: all clean dependents
+.SECONDARY:
+
+-include ../makefile.targets
--- a/Release/objects.mk
+++ b/Release/objects.mk
@ -0,0 +1,7 @@
+################################################################################
+# Automatically-generated file. Do not edit!
+################################################################################
+
+LIBS := -lz
+
+USER_OBJS :=
--- a/Release/sources.mk
+++ b/Release/sources.mk
@ -0,0 +1,19 @@
+################################################################################
+# Automatically-generated file. Do not edit!
+################################################################################
+
+C_SRCS := 
+O_SRCS := 
+ASM_SRCS := 
+S_SRCS := 
+OBJ_SRCS := 
+OBJS := 
+EXECUTABLES := 
+C_DEPS := 
+
+# Every subdirectory with source files must be described here
+SUBDIRS := \
+src/libecoPCR \
+src/libapat \
+src \
+
--- a/Release/src/libapat/subdir.mk
+++ b/Release/src/libapat/subdir.mk
@ -0,0 +1,30 @@
+################################################################################
+# Automatically-generated file. Do not edit!
+################################################################################
+
+# Add inputs and outputs from these tool invocations to the build variables 
+C_SRCS += \
+../src/libapat/apat_parse.c \
+../src/libapat/apat_search.c \
+../src/libapat/libstki.c 
+
+OBJS += \
+./src/libapat/apat_parse.o \
+./src/libapat/apat_search.o \
+./src/libapat/libstki.o 
+
+C_DEPS += \
+./src/libapat/apat_parse.d \
+./src/libapat/apat_search.d \
+./src/libapat/libstki.d 
+
+
+# Each subdirectory must supply rules for building sources it contributes
+src/libapat/%.o: ../src/libapat/%.c
+	@echo 'Building file: $<'
+	@echo 'Invoking: GCC C Compiler'
+	gcc -DMAC_OS_X -O3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<"
+	@echo 'Finished building: $<'
+	@echo ' '
+
+
--- a/Release/src/libecoPCR/subdir.mk
+++ b/Release/src/libecoPCR/subdir.mk
@ -0,0 +1,45 @@
+################################################################################
+# Automatically-generated file. Do not edit!
+################################################################################
+
+# Add inputs and outputs from these tool invocations to the build variables 
+C_SRCS += \
+../src/libecoPCR/ecoError.c \
+../src/libecoPCR/ecoIOUtils.c \
+../src/libecoPCR/ecoMalloc.c \
+../src/libecoPCR/ecoapat.c \
+../src/libecoPCR/ecodna.c \
+../src/libecoPCR/ecorank.c \
+../src/libecoPCR/ecoseq.c \
+../src/libecoPCR/ecotax.c 
+
+OBJS += \
+./src/libecoPCR/ecoError.o \
+./src/libecoPCR/ecoIOUtils.o \
+./src/libecoPCR/ecoMalloc.o \
+./src/libecoPCR/ecoapat.o \
+./src/libecoPCR/ecodna.o \
+./src/libecoPCR/ecorank.o \
+./src/libecoPCR/ecoseq.o \
+./src/libecoPCR/ecotax.o 
+
+C_DEPS += \
+./src/libecoPCR/ecoError.d \
+./src/libecoPCR/ecoIOUtils.d \
+./src/libecoPCR/ecoMalloc.d \
+./src/libecoPCR/ecoapat.d \
+./src/libecoPCR/ecodna.d \
+./src/libecoPCR/ecorank.d \
+./src/libecoPCR/ecoseq.d \
+./src/libecoPCR/ecotax.d 
+
+
+# Each subdirectory must supply rules for building sources it contributes
+src/libecoPCR/%.o: ../src/libecoPCR/%.c
+	@echo 'Building file: $<'
+	@echo 'Invoking: GCC C Compiler'
+	gcc -DMAC_OS_X -O3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<"
+	@echo 'Finished building: $<'
+	@echo ' '
+
+
--- a/Release/src/subdir.mk
+++ b/Release/src/subdir.mk
@ -0,0 +1,24 @@
+################################################################################
+# Automatically-generated file. Do not edit!
+################################################################################
+
+# Add inputs and outputs from these tool invocations to the build variables 
+C_SRCS += \
+../src/ecopcr.c 
+
+OBJS += \
+./src/ecopcr.o 
+
+C_DEPS += \
+./src/ecopcr.d 
+
+
+# Each subdirectory must supply rules for building sources it contributes
+src/%.o: ../src/%.c
+	@echo 'Building file: $<'
+	@echo 'Invoking: GCC C Compiler'
+	gcc -DMAC_OS_X -O3 -Wall -c -fmessage-length=0 -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@:%.o=%.d)" -o"$@" "$<"
+	@echo 'Finished building: $<'
+	@echo ' '
+
+
--- a/1237
+++ b/1237
--- a/src/ecopcr.c
+++ b/src/ecopcr.c
@ -0,0 +1,512 @@
+#include "libecoPCR/ecoPCR.h"
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <getopt.h>
+
+
+#define VERSION "0.1"
+
+/* ----------------------------------------------- */
+/* printout help                                   */                                           /* ----------------------------------------------- */
+
+#define PP fprintf(stdout, 
+
+static void PrintHelp()
+{
+        PP      "------------------------------------------\n");
+        PP      " Apat Version %s\n", VERSION);
+        PP      "------------------------------------------\n");
+        PP      "synopsis : pattern(s) searching program\n");
+        PP      "usage: apat [options] patfile datafile\n");
+        PP      "------------------------------------------\n");
+        PP      "options:\n");
+        PP      "-a code         : [A]lphabet encoding for pattern\n");
+        PP      "                  code is one of : \n");
+        PP      "                  dna:   use IUPAC equivalences for dna/rna\n");
+        PP      "                  prot:  use IUPAC equivalences for proteins\n");
+        PP      "                  alpha: no equivalences, just treat plain symbols\n");
+        PP      "                  note: the equivalences are used in pattern only\n");
+        PP      "                  *not* in sequence(s) (see note (4) below)\n");
+        PP      "                  dft: alpha\n");
+        PP      "-c              : [C]ooccurences\n");
+        PP      "                  print patterns cooccurence matrix \n");     
+        PP      "                  dft: off\n");
+        PP      "-h              : [H]elp - print <this> help\n");
+        PP      "-m              : [M]ultiple occurences\n");
+        PP      "                  see -q option \n");
+        PP      "                  dft: off\n");
+        PP      "-o file         : [O]utput sequences\n");
+        PP      "                  additionaly output sequence(s) that match into\n");     
+        PP      "                  'file' in fasta format\n"); 
+        PP      "                  dft: off\n");
+        PP      "-p              : no [Print] - don't printout hits\n");
+        PP      "                  when just counts are needed\n");
+        PP      "                  dft: off\n");
+        PP      "-q nn           : [Quorum]\n");
+        PP      "                  printout result if at least nn\n");
+        PP      "                  different patterns are found on the sequence\n");
+        PP      "                  (with -m : at least nn different <hits>)\n");
+        PP      "                  dft: # of patterns read\n");
+        PP      "-s              : no [Sort] - don't sort hits before printing\n");
+        PP      "                  usually hits are printed by increasing position\n");
+        PP      "                  this option will list them by pattern\n");
+        PP      "                  dft: off\n");
+        PP      "-t              : [T]est sequence\n");
+        PP      "                  additionnaly check if sequences are uppercase\n");
+        PP      "                  this is mostly used for testing\n");
+        PP      "                  dft: off\n");
+        PP      "-u              : [U]pper\n");
+        PP      "                  force lower->upper sequence conversion\n");
+        PP      "                  without this option lowercase symbols in sequence\n");
+        PP      "                  will not be considered to as matches\n");
+        PP      "                  dft: off\n");
+        PP      "-v              : [V]erbose\n");
+        PP      "                  just display a kind of progress clock on stderr\n");
+        PP      "                  (this is only useful if you redirect stdout)\n");
+        PP      "\n");
+        PP      "patfile  : pattern file  (see below)\n");
+        PP      "datafile : database file (see below)\n");
+        PP      "------------------------------------------\n");
+        PP      "pattern file format :\n");
+        PP      "  one pattern/line\n");
+        PP      "  format : <pattern> <space> #errors\n");
+        PP      "           <pattern> := pattern<symbol>\n");
+        PP      "                 or     !pattern<symbol>\n");
+        PP      "                 or      pattern<symbol>#\n");
+        PP      "                 or     !pattern<symbol>#\n");
+        PP      "           <symbol>  := <letter>\n");
+        PP      "                 or     [<letter>....<letter>]\n");
+        PP      "           <letter>  := uppercase letter (A-Z)\n");
+        PP      "           <number>  := a positive number indicates max number of mismatches\n");
+        PP      "                        a negative number indicates max number of mismatches or indels\n");
+        PP      "  # means that no error is allowed at this position\n");
+        PP      "  ! complement the <symbol>\n");
+        PP      " [...] means that all symbols within [] are allowed\n");
+        PP      " in addition IUPAC equivalences may be used as symbols\n");
+        PP      " with the -a option\n");
+        PP      "\n");
+        PP      "example: G[DE]S#[GIV]!HP![DE]#  1\n");
+        PP      "\n");
+        PP      "------------------------------------------\n");
+        PP      "datafile contains one or more sequences in\n");
+        PP      "Fasta format, with *uppercase* symbols \n");
+        PP      "\n");
+        PP      "------------------------------------------\n");
+        PP      "note (1): the maximum number of patterns is %d\n", MAX_PATTERN);
+        PP      "\n");
+        PP      "note (2): the maximum length for one pattern is %d\n", MAX_PAT_LEN);
+        PP      "\n");
+        PP      "note (3): indels are still experimental and are :\n");
+        PP      "     not handled gracefully with the # syntax\n");
+        PP      "     and hits are not printed very nicely\n");
+        PP      "\n");
+        PP      "note (4): the IUPAC equivalences (-a option) are used\n");
+        PP      "     in pattern only *not* in sequence(s).\n");
+        PP      "     for instance GATN (with option -a dna) is equivalent to GAT[ACGT]\n");
+        PP      "     and will match GATA/GATC/GATG/GATC but will not match GATN\n");
+        PP      "     (nor NNNN) in sequence.\n");
+        PP      "\n");
+
+}
+
+#undef PP
+
+/* ----------------------------------------------- */
+/* printout usage and exit                         */
+/* ----------------------------------------------- */
+
+#define PP fprintf(stderr, 
+
+static void ExitUsage(stat)
+        int stat;
+{
+        PP      "usage: apat [-a dna|prot] [-c] [-h] [-m] [-o file] [-p]\n");
+        PP      "             [-q nn] [-t] [-u] [-v]\n");
+        PP      "       patfile datafile\n");
+        PP      "type \"apat -h\" for help\n");
+
+        if (stat)
+            exit(stat);
+}
+
+#undef  PP
+
+void printRepeat(ecoseq_t *seq,
+                 PatternPtr o1, PatternPtr o2,
+                 char strand, 
+                 int32_t pos1, int32_t pos2,
+                 int32_t err1, int32_t err2,
+                 ecotaxonomy_t *taxonomy)
+{
+	char     *AC;
+	int32_t  seqlength;
+	int32_t	 taxid;
+	int32_t  species_taxid;
+	int32_t  genus_taxid;
+	int32_t  family_taxid;
+	int32_t  superkingdom_taxid;
+	char     *rank;
+	char     *scientificName;
+	char     *genus_name;
+	char     *family_name;
+	char     *superkingdom_name;
+	
+	ecotx_t  *taxon;
+	ecotx_t  *main_taxon;
+	
+	char     oligo1[MAX_PAT_LEN+1];
+	char     oligo2[MAX_PAT_LEN+1];
+	
+	int32_t  error1;
+	int32_t  error2;
+	
+	char     *amplifia = NULL;
+	int32_t  amplength;
+	
+	AC = seq->AC;
+	seqlength = seq->SQ_length;
+	
+	main_taxon    = &taxonomy->taxons->taxon[seq->taxid];
+	taxid         = main_taxon->taxid;
+	scientificName= main_taxon->name;				
+	rank          = taxonomy->ranks->label[main_taxon->rank];
+	taxon         = eco_getspecies(main_taxon,taxonomy);
+	if (taxon)
+		{
+			species_taxid = taxon->taxid;
+			scientificName= taxon->name;				
+		}
+	else 
+		species_taxid = -1;
+		
+	taxon         = eco_getgenus((taxon) ? taxon:main_taxon,taxonomy);
+	if (taxon)
+		{
+			genus_taxid = taxon->taxid;
+			genus_name= taxon->name;				
+		}
+	else 
+		{
+			genus_taxid = -1;
+			genus_name  = "###";
+		}
+	
+	taxon         = eco_getfamily((taxon) ? taxon:main_taxon,taxonomy);
+	if (taxon)
+		{
+			family_taxid = taxon->taxid;
+			family_name= taxon->name;				
+		}
+	else 
+		{
+			family_taxid = -1;
+			family_name  = "###";
+		}
+	
+	taxon         = eco_getsuperkingdom((taxon) ? taxon:main_taxon,taxonomy);
+	if (taxon)
+		{
+			superkingdom_taxid = taxon->taxid;
+			superkingdom_name= taxon->name;				
+		}
+	else 
+		{
+			superkingdom_taxid = -1;
+			superkingdom_name  = "###";
+		}
+	
+	amplength = pos2-pos1;     
+	
+	amplifia = getSubSequence(seq->SQ,pos1,pos2);
+	
+	
+	if (strand=='R')
+	{
+		ecoComplementSequence(amplifia);
+		strncpy(oligo1,amplifia,o2->patlen);
+		oligo1[o2->patlen]=0;
+		error1=err2;
+		
+		strncpy(oligo2,amplifia + amplength - o1->patlen,o1->patlen);
+		oligo2[o1->patlen]=0;
+		error2=err1;
+		
+		amplifia+=o2->patlen;
+	}
+	else
+	{
+		strncpy(oligo1,amplifia,o1->patlen);
+		oligo1[o1->patlen]=0;
+		error1=err1;
+		
+		strncpy(oligo2,amplifia + amplength - o2->patlen,o2->patlen);
+		oligo2[o2->patlen]=0;
+		error2=err2;
+		
+		amplifia+=o1->patlen;
+		
+	}
+	
+
+	
+	ecoComplementSequence(oligo2);
+	amplifia[amplength - o2->patlen - o1->patlen]=0;
+	
+	printf("%-15s | %9d | %8d | %-20s | %8d | %-30s | %8d | %-30s | %8d | %-30s | %8d | %-30s | %c | %-32s | %2d | %-32s | %2d | %5d | %s | %s\n",
+			AC,
+			seqlength,
+			taxid,
+			rank,
+			species_taxid,
+			scientificName,
+			genus_taxid,
+			genus_name,
+			family_taxid,
+			family_name,
+			superkingdom_taxid,
+			superkingdom_name,
+			strand,
+			oligo1,
+			error1,
+			oligo2,
+			error2,
+			amplength,
+			amplifia,
+			seq->DE
+		  );
+}
+
+int main(int argc, char **argv) 
+{
+	ecoseq_t      *seq;
+	ecotaxonomy_t *taxonomy;
+	char          *scname;
+	char          head[11];
+	char          tail[11];
+	
+	int           carg;
+	
+	char          *oligo1=NULL;
+	char          *oligo2=NULL;
+	
+	PatternPtr    o1;
+	PatternPtr    o2;
+	PatternPtr    o1c;
+	PatternPtr 	  o2c;
+	
+	int32_t       lmin=0;
+	int32_t       lmax=0;
+	int32_t       error_max=0;
+	int32_t       errflag=0;
+	
+	char          *prefix;
+	
+	int32_t       checkedSequence = 0;
+	int32_t       positiveSequence= 0;
+	int32_t       amplifiatCount  = 0;
+	
+	int32_t       o1Hits;
+	int32_t       o2Hits;
+	int32_t       o1cHits;
+	int32_t       o2cHits;
+	
+	int32_t		  begin;
+	int32_t       length;
+	
+	SeqPtr        apatseq=NULL;
+	StackiPtr     stktmp;
+	
+	int32_t       i;
+	int32_t       j;
+	int32_t       posi;
+	int32_t		  posj;
+	int32_t       erri;
+	int32_t		  errj;
+	
+	
+    while ((carg = getopt(argc, argv, "h1:2:l:L:e:")) != -1) {
+
+     switch (carg) {
+                                /* -------------------- */
+        case '1':               /* prenier oligo        */
+                                /* -------------------- */
+           oligo1 = ECOMALLOC(strlen(optarg)+1,
+                              "Error on oligo 1 allocation");
+           strcpy(oligo1,optarg);
+           break;
+
+                                /* -------------------- */
+        case '2':               /* coocurence option    */
+                                /* -------------------- */
+           oligo2 = ECOMALLOC(strlen(optarg)+1,
+                              "Error on oligo 1 allocation");
+           strcpy(oligo2,optarg);
+           break;
+
+                                /* -------------------- */
+        case 'h':               /* help                 */
+                                /* -------------------- */
+            
+           PrintHelp();
+           exit(0);
+           break;
+
+                                /* -------------------- */
+        case 'l':               /* lmin amplification   */
+                                /* -------------------- */
+           sscanf(optarg,"%d",&lmin);
+           break;
+
+                                /* -------------------- */
+        case 'L':               /* lmax amplification   */
+                                /* -------------------- */
+          sscanf(optarg,"%d",&lmax);
+          break;
+
+                                /* -------------------- */
+        case 'e':               /* error max            */
+                                /* -------------------- */
+          sscanf(optarg,"%d",&error_max);
+          break;
+          
+                                /* -------------------- */
+        case '?':               /* bad option           */
+                                /* -------------------- */
+            errflag++;
+   		}
+
+	}
+	
+	if ((argc -= optind) != 1)
+            errflag++;
+            
+    if (!oligo1 || !oligo2)
+    		errflag++;
+	
+	if (errflag)
+		ExitUsage(errflag);
+		
+	prefix = argv[optind];
+	
+	o1 = buildPattern(oligo1,error_max);
+	o2 = buildPattern(oligo2,error_max);
+	
+	o1c = complementPattern(o1);
+	o2c = complementPattern(o2);
+	
+	
+	
+	printf("#\n");
+	printf("# ecoPCR version %s\n",VERSION);
+	printf("# direct  strand oligo1 : %-32s ; oligo2c : %32s\n", o1->cpat,o2c->cpat);
+	printf("# reverse strand oligo2 : %-32s ; oligo1c : %32s\n", o2->cpat,o1c->cpat);
+	printf("# max error count by oligonucleotide : %d\n",error_max);
+	printf("# database : %s\n",prefix);
+	if (lmin && lmax)
+		printf("# amplifiat length between [%d,%d] bp\n",lmin,lmax);
+	else if (lmin)
+		printf("# amplifiat length larger than %d bp\n",lmin);
+	else if (lmax)
+		printf("# amplifiat length smaller than %d bp\n",lmax);
+	printf("#\n");
+
+	taxonomy = read_taxonomy(prefix);
+	
+	seq = ecoseq_iterator(prefix);
+	
+	
+	checkedSequence = 0;
+	positiveSequence= 0;
+	amplifiatCount  = 0;
+	
+	while(seq)
+	{
+		checkedSequence++;
+
+		scname = taxonomy->taxons->taxon[seq->taxid].name;
+		strncpy(head,seq->SQ,10);
+		head[10]=0;
+		strncpy(tail,seq->SQ+seq->SQ_length-10,10);
+		tail[10]=0;
+
+		
+		apatseq=ecoseq2apatseq(seq,apatseq);
+		
+		o1Hits = ManberAll(apatseq,o1,0,0,apatseq->seqlen);
+		o2cHits= 0;
+		
+		
+		if (o1Hits)
+		{
+			stktmp = apatseq->hitpos[0];
+			begin = stktmp->val[0] + o1->patlen;
+			
+			if (lmax)
+				length= stktmp->val[stktmp->top-1] + o1->patlen - begin + lmax + o2->patlen;
+			else
+				length= apatseq->seqlen - begin;
+				
+			o2cHits = ManberAll(apatseq,o2c,1,begin,length);
+
+			if (o2cHits)
+				for (i=0; i < o1Hits;i++)
+				{
+					posi = apatseq->hitpos[0]->val[i];
+					erri = apatseq->hiterr[0]->val[i];
+					for (j=0; j < o2cHits; j++)
+					{
+						posj  =apatseq->hitpos[1]->val[j] + o2c->patlen;
+						errj  =apatseq->hiterr[1]->val[j];
+						length=posj - posi + 1  - o1->patlen - o2->patlen;
+						
+						if ((!lmin || (length >= lmin)) &&
+						    (!lmax || (length <= lmax)))
+						    printRepeat(seq,o1,o2c,'D',posi,posj,erri,errj,taxonomy);
+							//printf("%s\tD\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o1Hits,o2cHits,posi,posj,scname);
+					}
+				}
+		}
+			
+		o2Hits = ManberAll(apatseq,o2,2,0,apatseq->seqlen);
+		o1cHits= 0;
+		if (o2Hits)
+		{
+			stktmp = apatseq->hitpos[2];
+			begin = stktmp->val[0] + o2->patlen;
+			
+			if (lmax)
+				length= stktmp->val[stktmp->top-1] + o2->patlen - begin + lmax + o1->patlen;
+			else
+				length= apatseq->seqlen - begin;
+				
+			o1cHits = ManberAll(apatseq,o1c,3,begin,length);
+			
+			if (o1cHits)
+				for (i=0; i < o2Hits;i++)
+				{
+					posi = apatseq->hitpos[2]->val[i];
+					erri = apatseq->hiterr[2]->val[i];
+					for (j=0; j < o1cHits; j++)
+					{
+						posj=apatseq->hitpos[3]->val[j] + o1c->patlen;
+						errj=apatseq->hiterr[3]->val[j];
+						length=posj - posi + 1 - o1->patlen - o2->patlen;
+						
+						if ((!lmin || (length >= lmin)) &&
+						    (!lmax || (length <= lmax)))
+						    printRepeat(seq,o2,o1c,'R',posi,posj,erri,errj,taxonomy);
+							//printf("%s\tR\t%s...%s (%d)\t%d\t%d\t%d\t%d\t%s\n",seq->AC,head,tail,seq->SQ_length,o2Hits,o1cHits,posi,posj,scname);
+					}
+				}
+		}	
+		
+		
+		
+		delete_ecoseq(seq);
+		
+		seq = ecoseq_iterator(NULL);
+	}
+		
+	return 0;
+}
--- a/src/libapat/CODES/dft_code.h
+++ b/src/libapat/CODES/dft_code.h
@ -0,0 +1,14 @@
+/* ----------------------------------------------- */
+/* dft_pat_seq_code.h                              */
+/* default alphabet encoding for alpha             */
+/* ----------------------------------------------- */
+
+        0x00000001 /* A */, 0x00000002 /* B */, 0x00000004 /* C */,
+        0x00000008 /* D */, 0x00000010 /* E */, 0x00000020 /* F */,
+        0x00000040 /* G */, 0x00000080 /* H */, 0x00000100 /* I */,
+        0x00000200 /* J */, 0x00000400 /* K */, 0x00000800 /* L */,
+        0x00001000 /* M */, 0x00002000 /* N */, 0x00004000 /* O */,
+        0x00008000 /* P */, 0x00010000 /* Q */, 0x00020000 /* R */,
+        0x00040000 /* S */, 0x00080000 /* T */, 0x00100000 /* U */,
+        0x00200000 /* V */, 0x00400000 /* W */, 0x00800000 /* X */,
+        0x01000000 /* Y */, 0x02000000 /* Z */
--- a/src/libapat/CODES/dna_code.h
+++ b/src/libapat/CODES/dna_code.h
@ -0,0 +1,71 @@
+/* ----------------------------------------------- */
+/* dna_code.h                                      */
+/* alphabet encoding for dna/rna                   */
+/* -----------------------------------------       */
+/* IUPAC encoding                                  */
+/* -----------------------------------------       */
+/* G/A/T/C                                         */
+/* U=T                                             */
+/* R=AG                                            */
+/* Y=CT                                            */
+/* M=AC                                            */
+/* K=GT                                            */
+/* S=CG                                            */
+/* W=AT                                            */
+/* H=ACT                                           */
+/* B=CGT                                           */
+/* V=ACG                                           */
+/* D=AGT                                           */
+/* N=ACGT                                          */
+/* X=ACGT                                          */
+/* EFIJLOPQZ  not recognized                       */
+/* -----------------------------------------       */
+/* dual encoding                                   */
+/* -----------------------------------------       */
+/* A=ADHMNRVW                                      */
+/* B=BCDGHKMNRSTUVWY                               */
+/* C=BCHMNSVY                                      */
+/* D=ABDGHKMNRSTUVWY                               */
+/* G=BDGKNRSV                                      */
+/* H=ABCDHKMNRSTUVWY                               */
+/* K=BDGHKNRSTUVWY                                 */
+/* M=ABCDHMNRSVWY                                  */
+/* N=ABCDGHKMNRSTUVWY                              */
+/* R=ABDGHKMNRSVW                                  */
+/* S=BCDGHKMNRSVY                                  */
+/* T=BDHKNTUWY                                     */
+/* U=BDHKNTUWY                                     */
+/* V=ABCDGHKMNRSVWY                                */
+/* W=ABDHKMNRTUVWY                                 */
+/* X=ABCDGHKMNRSTUVWY                              */
+/* Y=BCDHKMNSTUVWY                                 */
+/* EFIJLOPQZ  not recognized                       */
+/* ----------------------------------------------- */
+
+#ifndef USE_DUAL
+
+        /* IUPAC */
+
+        0x00000001 /* A */, 0x00080044 /* B */, 0x00000004 /* C */, 
+        0x00080041 /* D */, 0x00000000 /* E */, 0x00000000 /* F */, 
+        0x00000040 /* G */, 0x00080005 /* H */, 0x00000000 /* I */, 
+        0x00000000 /* J */, 0x00080040 /* K */, 0x00000000 /* L */, 
+        0x00000005 /* M */, 0x00080045 /* N */, 0x00000000 /* O */, 
+        0x00000000 /* P */, 0x00000000 /* Q */, 0x00000041 /* R */, 
+        0x00000044 /* S */, 0x00080000 /* T */, 0x00080000 /* U */, 
+        0x00000045 /* V */, 0x00080001 /* W */, 0x00080045 /* X */, 
+        0x00080004 /* Y */, 0x00000000 /* Z */
+
+#else
+        /* DUAL  */
+
+        0x00623089 /* A */, 0x017e34ce /* B */, 0x01243086 /* C */, 
+        0x017e34cb /* D */, 0x00000000 /* E */, 0x00000000 /* F */, 
+        0x0026244a /* G */, 0x017e348f /* H */, 0x00000000 /* I */, 
+        0x00000000 /* J */, 0x017e24ca /* K */, 0x00000000 /* L */, 
+        0x0166308f /* M */, 0x017e34cf /* N */, 0x00000000 /* O */, 
+        0x00000000 /* P */, 0x00000000 /* Q */, 0x006634cb /* R */, 
+        0x012634ce /* S */, 0x0158248a /* T */, 0x0158248a /* U */, 
+        0x016634cf /* V */, 0x017a348b /* W */, 0x017e34cf /* X */, 
+        0x017c348e /* Y */, 0x00000000 /* Z */
+#endif
--- a/src/libapat/CODES/prot_code.h
+++ b/src/libapat/CODES/prot_code.h
@ -0,0 +1,51 @@
+/* ----------------------------------------------- */
+/* prot_code.h                                     */
+/* alphabet encoding for proteins                  */
+/* -----------------------------------------       */
+/* IUPAC encoding                                  */
+/* -----------------------------------------       */
+/* B=DN                                            */
+/* Z=EQ                                            */
+/* X=any - {X}                                     */
+/* JOU not recognized                              */
+/* -----------------------------------------       */
+/* dual encoding                                   */
+/* -----------------------------------------       */
+/* B=BDN                                           */
+/* D=BD                                            */
+/* E=EZ                                            */
+/* N=BN                                            */
+/* Q=QZ                                            */
+/* X=any - {X}                                     */
+/* Z=EQZ                                           */
+/* JOU not recognized                              */
+/* ----------------------------------------------- */
+
+#ifndef USE_DUAL
+
+        /* IUPAC */
+
+        0x00000001 /* A */, 0x00002008 /* B */, 0x00000004 /* C */,
+        0x00000008 /* D */, 0x00000010 /* E */, 0x00000020 /* F */,
+        0x00000040 /* G */, 0x00000080 /* H */, 0x00000100 /* I */,
+        0x00000000 /* J */, 0x00000400 /* K */, 0x00000800 /* L */,
+        0x00001000 /* M */, 0x00002000 /* N */, 0x00000000 /* O */,
+        0x00008000 /* P */, 0x00010000 /* Q */, 0x00020000 /* R */,
+        0x00040000 /* S */, 0x00080000 /* T */, 0x00000000 /* U */,
+        0x00200000 /* V */, 0x00400000 /* W */, 0x037fffff /* X */,
+        0x01000000 /* Y */, 0x00010010 /* Z */
+
+#else
+        /* DUAL  */
+
+        0x00000001 /* A */, 0x0000200a /* B */, 0x00000004 /* C */, 
+        0x0000000a /* D */, 0x02000010 /* E */, 0x00000020 /* F */, 
+        0x00000040 /* G */, 0x00000080 /* H */, 0x00000100 /* I */, 
+        0x00000000 /* J */, 0x00000400 /* K */, 0x00000800 /* L */, 
+        0x00001000 /* M */, 0x00002002 /* N */, 0x00000000 /* O */, 
+        0x00008000 /* P */, 0x02010000 /* Q */, 0x00020000 /* R */, 
+        0x00040000 /* S */, 0x00080000 /* T */, 0x00000000 /* U */, 
+        0x00200000 /* V */, 0x00400000 /* W */, 0x037fffff /* X */, 
+        0x01000000 /* Y */, 0x02010010 /* Z */
+
+#endif
--- a/src/libapat/Gmach.h
+++ b/src/libapat/Gmach.h
@ -0,0 +1,97 @@
+/* ---------------------------------------------------------------- */
+/* Copyright (c) Atelier de BioInformatique                         */
+/* @file: Gmach.h                                                   */
+/* @desc: machine dependant setup                                   */
+/* @+     *should* be included in all ABI softs                     */
+/*                                                                  */
+/* @history:                                                        */
+/* @+       <Gloup> : Jul 95 : MWC first draft                      */
+/* @+       <Gloup> : Jan 96 : adapted to Pwg                       */
+/* @+       <Gloup> : Nov 00 : adapted to Mac_OS_X                  */
+/* ---------------------------------------------------------------- */
+
+#ifndef _H_Gmach
+
+                        /* OS names             */
+
+#define _H_Gmach
+
+                        /* Macintosh Classic    */
+                        /* Think C environment  */
+#ifdef THINK_C 
+#define MACINTOSH
+#define MAC_OS_C
+#endif
+
+
+                        /* Macintosh Classic    */
+                        /* Code-Warrior         */
+#ifdef __MWERKS__ 
+#define MACINTOSH
+#define MAC_OS_C
+#endif
+
+                        /* Macintosh OS-X       */
+#ifdef MAC_OS_X
+#define MACINTOSH
+#define UNIX
+#define UNIX_BSD
+#endif
+
+						/* LINUX				*/
+#ifdef LINUX
+#define UNIX
+#define UNIX_BSD
+#endif
+
+                        /* other Unix Boxes     */
+                        /* SunOS /  Solaris     */      
+#ifdef SUN
+#define UNIX
+#ifdef SOLARIS
+#define UNIX_S7
+#else
+#define UNIX_BSD
+#endif
+#endif
+
+                        /* SGI Irix             */      
+#ifdef SGI
+#define UNIX
+#define UNIX_S7
+#endif
+
+/* ansi setup                           */
+/* for unix machines see makefile       */
+
+#ifndef PROTO
+#define PROTO 1
+#endif
+
+#ifndef ANSI_PROTO
+#define ANSI_PROTO PROTO
+#endif
+
+#ifndef ANSI_STR
+#define ANSI_STR 1
+#endif
+
+/* unistd.h header file */
+
+#ifdef UNIX
+#define HAS_UNISTD_H <unistd.h>
+#endif
+
+/* getopt.h header file */
+
+#ifdef MAC_OS_C
+#define HAS_GETOPT_H "getopt.h"
+#endif
+
+#ifdef SGI
+#define HAS_GETOPT_H <getopt.h>
+#endif
+
+
+
+#endif
--- a/src/libapat/Gtypes.h
+++ b/src/libapat/Gtypes.h
@ -0,0 +1,104 @@
+/* ---------------------------------------------------------------- */
+/* Copyright (c) Atelier de BioInformatique                         */
+/* @file: Gtypes.h                                                  */
+/* @desc: general & machine dependant types                         */
+/* @+     *should* be included in all ABI softs                     */
+/*                                                                  */
+/* @history:                                                        */
+/* @+       <Gloup> : Jan 91 : MWC first draft                      */
+/* @+       <Gloup> : Jul 95 : Gmach addition                       */
+/* ---------------------------------------------------------------- */
+
+#define _H_Gtypes
+
+#ifndef _H_Gmach
+#include "Gmach.h"
+#endif
+
+#ifndef NULL
+#include <stdio.h>                      /* is the official NULL here ?  */
+#endif
+
+/* ==================================================== */
+/* constantes                                           */
+/* ==================================================== */
+
+#ifndef PROTO
+#define PROTO   1                       /* prototypes flag              */
+#endif
+
+#ifdef MAC_OS_C
+#define Vrai    true                    /* TC boolean values            */
+#define Faux    false                   /*                              */
+#else
+#define Vrai    0x1                     /* bool values  = TRUE          */
+#define Faux    0x0                     /*              = FALSE         */
+#endif
+
+#define Nil     NULL                    /* nil pointer                  */
+
+#define kBigInt16       0x7fff          /* plus grand 16 bits signe     */
+#define kBigInt32       0x7fffffff      /* plus grand 32 bits signe     */
+#define kBigUInt16      0xffff          /* plus grand 16 bits ~signe    */
+#define kBigUInt32      0xffffffff      /* plus grand 32 bits ~signe    */
+
+#ifdef MAC_OS_C
+/* ==================================================== */
+/*  Types (for Macintosh ThinK C || MWerks)             */
+/* ==================================================== */
+
+                                        /* --- specific sizes --------- */
+typedef long            Int32;          /* Int32  = 32 bits signe       */
+typedef unsigned long   UInt32;         /* UInt32 = 32 bits ~signe      */
+typedef short           Int16;          /* Int16  = 16 bits signe       */
+typedef unsigned short  UInt16;         /* UInt32 = 16 bits ~signe      */
+typedef char            Int8;           /* Int8   = 8 bits signe        */
+typedef unsigned char   UInt8;          /* UInt8  = 8 bits ~signe       */
+
+                                        /* --- default types ---------- */
+
+typedef Boolean         Bool;           /* booleen                      */
+
+typedef long            Int;            /* 'natural' int (>= 32 bits)   */
+
+typedef void            *Ptr;           /* pointeur                     */
+
+#elif ((defined SUN) || (defined SGI) || (defined UNIX))
+/* ==================================================== */
+/*  Types (for Sun & Iris - 32 bits machines)           */
+/* ==================================================== */
+
+                                        /* --- specific sizes --------- */
+typedef int             Int32;          /* Int32  = 32 bits signe       */
+typedef unsigned int    UInt32;         /* UInt32 = 32 bits ~signe      */
+typedef short           Int16;          /* Int16  = 16 bits signe       */
+typedef unsigned short  UInt16;         /* UInt32 = 16 bits ~signe      */
+typedef char            Int8;           /* Int8   = 8 bits signe        */
+typedef unsigned char   UInt8;          /* UInt8  = 8 bits ~signe       */
+
+                                        /* --- default types ---------- */
+
+typedef int             Bool;           /* booleen  (int for ANSI)      */
+
+typedef int             Int;            /* 'natural' int (>= 32 bits)   */
+
+typedef void            *Ptr;           /* pointeur                     */
+
+#else 
+/* ==================================================== */
+/*  Types (for undefined machines)                      */
+/* ==================================================== */
+
+#error undefined MACHINE <please edit Gmach.h>
+
+#endif
+
+/* ==================================================== */
+/*  special macro for prototypes                        */
+/* ==================================================== */
+
+#if PROTO
+#define         P(s)    s 
+#else
+#define         P(s)    ()
+#endif
--- a/src/libapat/apat.h
+++ b/src/libapat/apat.h
@ -0,0 +1,172 @@
+/* ==================================================== */
+/*      Copyright (c) Atelier de BioInformatique        */
+/*      Dec. 94                                         */
+/*      File: apat.h                                    */
+/*      Purpose: pattern scan                           */
+/*      History:                                        */
+/*      28/12/94 : <Gloup> ascan first version          */
+/*      14/05/99 : <Gloup> last revision                */
+/* ==================================================== */
+
+#ifndef _H_Gtypes
+#include "Gtypes.h"
+#endif
+
+#ifndef _H_libstki
+#include "libstki.h" 
+#endif
+
+#define H_apat
+
+/* ----------------------------------------------- */
+/* constantes                                      */
+/* ----------------------------------------------- */
+
+#ifndef BUFSIZ
+#define BUFSIZ          1024    /* io buffer size               */
+#endif
+
+#define MAX_NAME_LEN    BUFSIZ  /* max length of sequence name  */
+
+#define ALPHA_LEN        26     /* alphabet length              */
+                                /* *DO NOT* modify              */
+
+#define MAX_PATTERN       4     /* max # of patterns            */
+                                /* *DO NOT* modify              */
+
+#define MAX_PAT_LEN      32     /* max pattern length           */
+                                /* *DO NOT* modify              */
+
+#define MAX_PAT_ERR      32     /* max # of errors              */
+                                /* *DO NOT* modify              */
+
+#define PATMASK 0x3ffffff       /* mask for 26 symbols          */
+                                /* *DO NOT* modify              */
+
+#define OBLIBIT 0x4000000       /* bit 27 to 1 -> oblig. pos    */
+                                /* *DO NOT* modify              */
+
+                                /* mask for position            */
+#define ONEMASK 0x80000000      /* mask for highest position    */
+
+                                /* masks for Levenhstein edit   */
+#define OPER_IDT  0x00000000    /* identity                     */
+#define OPER_INS  0x40000000    /* insertion                    */
+#define OPER_DEL  0x80000000    /* deletion                     */
+#define OPER_SUB  0xc0000000    /* substitution                 */
+
+#define OPER_SHFT 30            /* <unused> shift               */
+
+                                /* Levenhstein Opcodes          */
+#define SOPER_IDT 0x0           /* identity                     */
+#define SOPER_INS 0x1           /* insertion                    */
+#define SOPER_DEL 0x2           /* deletion                     */
+#define SOPER_SUB 0x3           /* substitution                 */
+
+                                /* Levenhstein Opcodes masks    */
+#define OPERMASK  0xc0000000    /* mask for Opcodes             */
+#define NOPERMASK 0x3fffffff    /* negate of previous           */
+
+                                /* special chars in pattern     */
+#define PATCHARS  "[]!#"
+
+                                /* 26 letter alphabet           */
+                                /* in alphabetical order        */
+
+#define ORD_ALPHA "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+
+                                /* protein alphabet             */
+
+#define PROT_ALPHA "ACDEFGHIKLMNPQRSTVWY"
+
+                                /* dna/rna alphabet             */
+
+#define DNA_ALPHA "ABCDGHKMNRSTUVWXY"
+
+
+/* ----------------------------------------------- */
+/* data structures                                 */
+/* ----------------------------------------------- */
+
+                                        /* -------------------- */
+typedef enum {                          /* data encoding        */
+                                        /* -------------------- */
+        alpha = 0,                      /* [A-Z]                */
+        dna,                            /* IUPAC DNA            */
+        protein                         /* IUPAC proteins       */
+} CodType;
+
+                                        /* -------------------- */
+typedef struct {                        /* sequence             */
+                                        /* -------------------- */
+    char      *name;                    /* sequence name        */
+    Int32     seqlen;                   /* sequence length      */
+    Int32     seqsiz;                   /* sequence buffer size */
+    Int32     datsiz;                   /* data buffer size     */
+    UInt8     *data;                    /* data buffer          */
+    char      *cseq;                    /* sequence buffer      */
+    StackiPtr hitpos[MAX_PATTERN];      /* stack of hit pos.    */
+    StackiPtr hiterr[MAX_PATTERN];      /* stack of errors      */
+} Seq, *SeqPtr;
+
+                                        /* -------------------- */
+typedef struct {                        /* pattern              */
+                                        /* -------------------- */
+    int    patlen;                      /* pattern length       */
+    int    maxerr;                      /* max # of errors      */
+    char   *cpat;                       /* pattern string       */
+    Int32  *patcode;                    /* encoded pattern      */
+    UInt32 *smat;                       /* S matrix             */
+    UInt32 omask;                       /* oblig. bits mask     */
+    Bool   hasIndel;                    /* are indels allowed   */
+    Bool   ok;                          /* is pattern ok        */
+} Pattern, *PatternPtr;
+
+/* ----------------------------------------------- */
+/* macros                                          */
+/* ----------------------------------------------- */
+
+#ifndef NEW
+#define NEW(typ)                (typ*)malloc(sizeof(typ)) 
+#define NEWN(typ, dim)          (typ*)malloc((unsigned long)(dim) * sizeof(typ))
+#define REALLOC(typ, ptr, dim)  (typ*)realloc((void *) (ptr), (unsigned long)(dim) * sizeof(typ))
+#define FREE(ptr)               free((void *) ptr)
+#endif
+
+/* ----------------------------------------------- */
+/* prototypes                                      */
+/* ----------------------------------------------- */
+
+                                     /* apat_seq.c */
+
+SeqPtr  FreeSequence     (SeqPtr pseq);
+SeqPtr  NewSequence      (void);
+int     ReadNextSequence (SeqPtr pseq);
+int     WriteSequence    (FILE *filou , SeqPtr pseq);
+
+                                   /* apat_parse.c      */
+
+Int32   *GetCode          (CodType ctype);
+int     CheckPattern      (Pattern *ppat);
+int     EncodePattern     (Pattern *ppat, CodType ctype);
+int     ReadPattern       (Pattern *ppat);
+void    PrintDebugPattern (Pattern *ppat);
+
+                                /* apat_search.c        */
+
+int     CreateS           (Pattern *ppat, Int32 lalpha);
+Int32   ManberNoErr       (Seq *pseq , Pattern *ppat, int patnum,int begin,int length);
+Int32   ManberSub         (Seq *pseq , Pattern *ppat, int patnum,int begin,int length);
+Int32   ManberIndel       (Seq *pseq , Pattern *ppat, int patnum,int begin,int length);
+Int32   ManberAll         (Seq *pseq , Pattern *ppat, int patnum,int begin,int length);
+Int32   NwsPatAlign       (Seq *pseq , Pattern *ppat, Int32 nerr , 
+                              Int32 *reslen , Int32 *reserr);
+
+                                   /* apat_sys.c   */
+
+float   UserCpuTime     (int reset);
+float   SysCpuTime      (int reset);
+char    *StrCpuTime     (int reset);
+void    Erreur          (char *msg , int stat);
+int     AccessFile      (char *path, char *mode);
+
--- a/src/libapat/apat_parse.c
+++ b/src/libapat/apat_parse.c
@ -0,0 +1,369 @@
+/* ==================================================== */
+/*      Copyright (c) Atelier de BioInformatique        */
+/*      Mar. 92                                         */
+/*      File: apat_parse.c                              */
+/*      Purpose: Codage du pattern                      */
+/*      History:                                        */
+/*      00/07/94 : <Gloup> first version (stanford)     */
+/*      00/11/94 : <Gloup> revised for DNA/PROTEIN      */
+/*      30/12/94 : <Gloup> modified EncodePattern       */
+/*                         for manber search            */
+/*      14/05/99 : <Gloup> indels added                 */
+/* ==================================================== */
+
+#include <ctype.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "Gtypes.h"
+#include "apat.h"
+                                /* -------------------- */
+                                /* default char         */
+                                /* encodings            */
+                                /* -------------------- */
+
+static Int32 sDftCode[]  =  {
+
+#include "CODES/dft_code.h"
+
+};
+                                /* -------------------- */
+                                /* char encodings       */
+                                /* IUPAC                */
+                                /* -------------------- */
+
+                                /* IUPAC Proteins       */
+static Int32 sProtCode[]  =  {
+
+#include "CODES/prot_code.h"
+
+};
+                                /* IUPAC Dna/Rna        */
+static Int32 sDnaCode[]  =  {
+
+#include "CODES/dna_code.h"
+
+};
+
+
+/* -------------------------------------------- */
+/* internal replacement of gets                 */
+/* -------------------------------------------- */
+static char *sGets(char *buffer, int size) {
+        
+        char *ebuf;
+
+        if (! fgets(buffer, size-1, stdin))
+           return NULL;
+
+        /* remove trailing line feed */
+
+        ebuf = buffer + strlen(buffer); 
+
+        while (--ebuf >= buffer) {
+           if ((*ebuf == '\n') || (*ebuf == '\r'))
+                *ebuf = '\000';
+           else
+                break;
+        }
+
+        return buffer;
+}
+
+/* -------------------------------------------- */
+/* returns actual code associated to type       */
+/* -------------------------------------------- */
+
+Int32 *GetCode(CodType ctype)
+{
+        Int32 *code = sDftCode;
+
+        switch (ctype) {
+           case dna     : code = sDnaCode  ; break;
+           case protein : code = sProtCode ; break;
+           default      : code = sDftCode  ; break;
+        }
+
+        return code;
+}
+
+/* -------------------------------------------- */
+
+#define BAD_IF(tst)   if (tst)  return 0
+
+int CheckPattern(Pattern *ppat)
+{
+        int lev;
+        char *pat;
+
+        pat = ppat->cpat;
+        
+        BAD_IF (*pat == '#');
+
+        for (lev = 0; *pat ; pat++)
+
+            switch (*pat) {
+
+                case '[' :
+                   BAD_IF (lev);
+                   BAD_IF (*(pat+1) == ']');
+                   lev++;
+                   break;
+
+                case ']' :
+                   lev--;
+                   BAD_IF (lev);
+                   break;
+
+                case '!' :
+                   BAD_IF (lev);
+                   BAD_IF (! *(pat+1));
+                   BAD_IF (*(pat+1) == ']');
+                   break;
+
+                case '#' :
+                   BAD_IF (lev);
+                   BAD_IF (*(pat-1) == '[');
+                   break;
+
+                default :
+                   if (! isupper(*pat))
+                        return 0;
+                   break;
+            }
+
+        return (lev ? 0 : 1);
+}
+ 
+#undef BAD_IF
+           
+
+/* -------------------------------------------- */
+static char *skipOblig(char *pat)
+{
+        return (*(pat+1) == '#' ? pat+1 : pat);
+}
+
+/* -------------------------------------------- */
+static char *splitPattern(char *pat)
+{
+        switch (*pat) {
+                   
+                case '[' :
+                   for (; *pat; pat++)
+                        if (*pat == ']')
+                          return skipOblig(pat);
+                   return NULL;
+                   break;
+
+                case '!' :
+                   return splitPattern(pat+1);
+                   break;
+
+        }
+        
+        return skipOblig(pat);                  
+}
+
+/* -------------------------------------------- */
+static Int32 valPattern(char *pat, Int32 *code)
+{
+        Int32 val;
+        
+        switch (*pat) {
+                   
+                case '[' :
+                   return valPattern(pat+1, code);
+                   break;
+
+                case '!' :
+                   val = valPattern(pat+1, code);
+                   return (~val & PATMASK);
+                   break;
+
+                default :
+                   val = 0x0;
+                   while (isupper(*pat)) {
+                       val |= code[*pat - 'A'];
+                       pat++;
+                   }
+                   return val;
+        }
+
+        return 0x0;                     
+}
+
+/* -------------------------------------------- */
+static Int32 obliBitPattern(char *pat)
+{
+        return (*(pat + strlen(pat) - 1) == '#' ? OBLIBIT : 0x0);
+}       
+                
+
+/* -------------------------------------------- */
+static int lenPattern(char *pat)
+{
+        int  lpat;
+
+        lpat = 0;
+        
+        while (*pat) {
+        
+            if (! (pat = splitPattern(pat)))
+                return 0;
+
+            pat++;
+
+            lpat++;
+        }
+
+        return lpat;
+}
+
+/* -------------------------------------------- */
+/* Interface                                    */
+/* -------------------------------------------- */
+
+/* -------------------------------------------- */
+/* encode un pattern                            */
+/* -------------------------------------------- */
+int EncodePattern(Pattern *ppat, CodType ctype)
+{
+        int   pos, lpat;
+        Int32 *code;
+        char  *pp, *pa, c;
+
+        ppat->ok = Faux;
+
+        code = GetCode(ctype);
+
+        ppat->patlen = lpat = lenPattern(ppat->cpat);
+        
+        if (lpat <= 0)
+            return 0;
+        
+        if (! (ppat->patcode = NEWN(Int32, lpat)))
+            return 0;
+
+        pa = pp = ppat->cpat;
+
+        pos = 0;
+        
+        while (*pa) {
+        
+            pp = splitPattern(pa);
+
+            c = *++pp;
+            
+            *pp = '\000';
+                    
+            ppat->patcode[pos++] = valPattern(pa, code) | obliBitPattern(pa);
+            
+            *pp = c;
+            
+            pa = pp;
+        }
+
+        ppat->ok = Vrai;
+
+        return lpat;
+}
+
+/* -------------------------------------------- */
+/* remove blanks                                */
+/* -------------------------------------------- */
+static char *RemBlanks(char *s)
+{
+        char *sb, *sc;
+
+        for (sb = sc = s ; *sb ; sb++)
+           if (! isspace(*sb))
+                *sc++ = *sb;
+
+        return s;
+}
+
+/* -------------------------------------------- */
+/* count non blanks                             */
+/* -------------------------------------------- */
+static Int32 CountAlpha(char *s)
+{
+        Int32 n;
+
+        for (n = 0 ; *s ; s++)
+           if (! isspace(*s))
+                n++;
+
+        return n;
+}
+           
+        
+/* -------------------------------------------- */
+/* lit un pattern                               */
+/* <pattern> #mis                               */
+/* ligne starting with '/' are comments         */
+/* -------------------------------------------- */
+int ReadPattern(Pattern *ppat)
+{
+        int  val;
+        char *spac;
+        char buffer[BUFSIZ];
+
+        ppat->ok = Vrai;
+
+        if (! sGets(buffer, sizeof(buffer)))
+            return 0;
+
+        if (*buffer == '/')
+            return ReadPattern(ppat);
+
+        if (! CountAlpha(buffer))
+            return ReadPattern(ppat);
+
+        for (spac = buffer ; *spac ; spac++)
+            if ((*spac == ' ') || (*spac == '\t'))
+                break;
+
+        ppat->ok = Faux;
+
+        if (! *spac)
+            return 0;
+        
+        if (sscanf(spac, "%d", &val) != 1)
+            return 0;
+
+        ppat->hasIndel = (val < 0);
+        
+        ppat->maxerr = ((val >= 0) ? val : -val);
+
+        *spac = '\000';
+
+        (void) RemBlanks(buffer);
+
+        if ((ppat->cpat = NEWN(char, strlen(buffer)+1)))
+            strcpy(ppat->cpat, buffer);
+        
+        ppat->ok = (ppat->cpat != NULL);
+
+        return (ppat->ok ? 1 : 0);
+}
+
+/* -------------------------------------------- */
+/* ecrit un pattern - Debug -                   */
+/* -------------------------------------------- */
+void PrintDebugPattern(Pattern *ppat)
+{
+        int i;
+
+        printf("Pattern  : %s\n", ppat->cpat);
+        printf("Encoding : \n\t");
+
+        for (i = 0 ; i < ppat->patlen ; i++) {
+            printf("0x%8.8x ", ppat->patcode[i]);
+            if (i%4 == 3)
+                printf("\n\t");
+        }
+        printf("\n");
+}
+
--- a/src/libapat/apat_search.c
+++ b/src/libapat/apat_search.c
@ -0,0 +1,339 @@
+/* ==================================================== */
+/*      Copyright (c) Atelier de BioInformatique        */
+/*      Dec. 94                                         */
+/*      File: apat_search.c                             */
+/*      Purpose: recherche du pattern                   */
+/*               algorithme de Baeza-Yates/Gonnet       */
+/*                             Manber (agrep)           */
+/*      History:                                        */
+/*      07/12/94 : <MFS>   first version                */
+/*      28/12/94 : <Gloup> revised version              */
+/*      14/05/99 : <Gloup> last revision                */
+/* ==================================================== */
+
+#if 0
+#ifndef THINK_C
+#include <sys/types.h>
+#endif
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "Gtypes.h"
+#include "libstki.h"
+#include "apat.h"
+
+#define POP             PopiOut
+#define PUSH            PushiIn
+#define TOPCURS         CursiToTop
+#define DOWNREAD        ReadiDown
+
+#define KRONECK(x, msk) ((~x & msk) ? 0 : 1)
+#define MIN(x, y)       ((x) < (y)  ? (x) : (y))
+
+/* -------------------------------------------- */
+/* Construction de la matrice S                 */
+/* -------------------------------------------- */
+
+int CreateS(Pattern *ppat, Int32 lalpha)
+{
+        Int32     i, j, indx; 
+        UInt32    pindx, amask, omask, *smat;
+
+        ppat->ok = Faux;
+
+        omask = 0x0L;
+
+        if (! (smat = NEWN(UInt32, lalpha)))
+            return 0;
+
+        for (i = 0 ; i < lalpha ; i++)
+           smat[i] = 0x0;
+
+        for (i = ppat->patlen - 1, amask = 0x1L ; i >= 0 ; i--, amask <<= 1) {
+
+            indx = ppat->patcode[i];
+
+            if (ppat->patcode[i] & OBLIBIT)
+                omask |= amask;
+
+            for (j = 0, pindx = 0x1L ; j < lalpha ; j++, pindx <<= 1)
+                if (indx & pindx)
+                    smat[j] |= amask;
+        }
+
+        ppat->smat = smat;
+
+        ppat->omask = omask;
+
+        ppat->ok = Vrai;
+
+        return 1;
+
+}
+
+/* -------------------------------------------- */
+/* Baeza-Yates/Manber algorithm                 */
+/* NoError                                      */
+/* -------------------------------------------- */
+Int32 ManberNoErr(Seq *pseq, Pattern *ppat, int patnum,int begin,int length)
+{
+        Int32     pos;
+        UInt32    smask, r;
+        UInt8     *data;
+        StackiPtr *stkpos, *stkerr;
+        UInt32    end;
+        
+        end = begin + length;
+        end = (end <= pseq->seqlen) ? end:pseq->seqlen;
+        
+
+                                        /* create local masks   */
+
+        smask = r = 0x1L << ppat->patlen;
+
+                                        /* init. scan           */
+        data   = pseq->data + begin;
+        stkpos = pseq->hitpos + patnum;
+        stkerr = pseq->hiterr + patnum;
+
+                                        /* loop on text data    */
+                                        
+        for (pos = begin ; pos < end ; pos++) {
+
+            r = (r >> 1) & ppat->smat[*data++];
+
+            if (r & 0x1L) {
+                PUSH(stkpos, pos - ppat->patlen + 1);
+                PUSH(stkerr, 0);
+            }
+
+            r |= smask;
+        }
+        
+        return (*stkpos)->top;  /* aka # of hits        */
+}
+
+/* -------------------------------------------- */
+/* Baeza-Yates/Manber algorithm                 */
+/* Substitution only                            */
+/*                                              */
+/* Note : r array is stored as :                */
+/*    0 0 r(0,j) r(0,j+1) r(1,j) r(1,j+1) ...   */
+/*                                              */
+/* -------------------------------------------- */
+Int32 ManberSub(Seq *pseq, Pattern *ppat, int patnum,int begin,int length)
+{
+        int       e, emax, found;
+        Int32     pos;
+        UInt32    smask, cmask, sindx;
+        UInt32    *pr, r[2 * MAX_PAT_ERR + 2];
+        UInt8     *data;
+        StackiPtr *stkpos, *stkerr;
+        UInt32    end;
+        
+        end = begin + length;
+        end = (end <= pseq->seqlen) ? end:pseq->seqlen;
+
+                                        /* create local masks   */
+        emax = ppat->maxerr;
+
+        r[0] = r[1] = 0x0;
+
+        cmask = smask = 0x1L << ppat->patlen;
+
+        for (e = 0, pr = r + 3 ; e <= emax ; e++, pr += 2)
+                *pr = cmask;
+
+        cmask = ~ ppat->omask;
+
+                                        /* init. scan           */
+        data   = pseq->data + begin;
+        stkpos = pseq->hitpos + patnum;
+        stkerr = pseq->hiterr + patnum;
+
+                                        /* loop on text data    */
+                                        
+        for (pos = begin ; pos < end ; pos++) {
+
+            sindx  = ppat->smat[*data++];
+
+            for (e = found = 0, pr = r ; e <= emax ; e++, pr += 2) {
+                
+                pr[2]  = pr[3] | smask;
+
+                pr[3]  =   ((pr[0] >> 1) & cmask)       /* sub   */
+                         | ((pr[2] >> 1) & sindx);      /* ident */
+
+                if (pr[3] & 0x1L) {                     /* found */
+                    if (! found)  {
+                       PUSH(stkpos, pos - ppat->patlen + 1);
+                       PUSH(stkerr, e);
+                    }
+                    found++;
+                }
+            }
+        }
+
+        return (*stkpos)->top;  /* aka # of hits        */
+}
+
+/* -------------------------------------------- */
+/* Baeza-Yates/Manber algorithm                 */
+/* Substitution + Indels                        */
+/*                                              */
+/* Note : r array is stored as :                */
+/*    0 0 r(0,j) r(0,j+1) r(1,j) r(1,j+1) ...   */
+/*                                              */
+/* Warning: may return shifted pos.             */
+/*                                              */
+/* -------------------------------------------- */
+Int32 ManberIndel(Seq *pseq, Pattern *ppat, int patnum,int begin,int length)
+{
+        int       e, emax, found;
+        Int32     pos;
+        UInt32    smask, cmask, sindx;
+        UInt32    *pr, r[2 * MAX_PAT_ERR + 2];
+        UInt8     *data;
+        StackiPtr *stkpos, *stkerr;
+        UInt32    end;
+        
+        end = begin + length;
+        end = (end <= pseq->seqlen) ? end:pseq->seqlen;
+
+                                        /* create local masks   */
+        emax = ppat->maxerr;
+
+        r[0] = r[1] = 0x0;
+
+        cmask = smask = 0x1L << ppat->patlen;
+
+        for (e = 0, pr = r + 3 ; e <= emax ; e++, pr += 2) {
+                *pr = cmask;
+                cmask = (cmask >> 1) | smask;
+        }
+
+        cmask = ~ ppat->omask;
+
+                                        /* init. scan           */
+        data   = pseq->data + begin;
+        stkpos = pseq->hitpos + patnum;
+        stkerr = pseq->hiterr + patnum;
+
+                                        /* loop on text data    */
+                                        
+        for (pos = begin ; pos < end ; pos++) {
+
+            sindx  = ppat->smat[*data++];
+
+            for (e = found = 0, pr = r ; e <= emax ; e++, pr += 2) {
+                
+                pr[2]  = pr[3] | smask;
+
+                pr[3]   =  ((     pr[0]                 /* ins   */
+                              |  (pr[0] >> 1)           /* sub   */
+                              |  (pr[1] >> 1))          /* del   */
+                            & cmask)
+                          | ((pr[2] >> 1) & sindx);     /* ident */
+
+                if (pr[3] & 0x1L) {                     /* found */
+                    if (! found) {
+                        PUSH(stkpos, pos - ppat->patlen + 1);
+                        PUSH(stkerr, e);
+                    }
+                    found++;
+                }
+
+            }
+        }
+
+        return (*stkpos)->top;  /* aka # of hits        */
+}
+
+/* -------------------------------------------- */
+/* Baeza-Yates/Manber algorithm                 */
+/* API call to previous functions               */
+/* -------------------------------------------- */
+Int32 ManberAll(Seq *pseq, Pattern *ppat, int patnum,int begin,int length)
+{
+        if (ppat->maxerr == 0)
+           return ManberNoErr(pseq, ppat, patnum, begin, length);
+        else if (ppat->hasIndel)
+           return ManberIndel(pseq, ppat, patnum, begin, length);
+        else
+           return ManberSub(pseq, ppat, patnum, begin, length);
+}
+
+
+/* -------------------------------------------- */
+/* Alignement NWS                               */
+/* pour edition des hits                        */
+/* (avec substitution obligatoire aux bords)    */
+/* -------------------------------------------- */
+
+Int32 NwsPatAlign(pseq, ppat, nerr, reslen, reserr)
+        Seq     *pseq;
+        Pattern *ppat;
+        Int32   nerr, *reslen, *reserr;
+{
+        UInt8  *sseq, *px;
+        Int32  i, j, lseq, lpat, npos, dindel, dsub,
+               *pc, *pi, *pd, *ps;
+        UInt32 amask;
+
+        static Int32 sTab[(MAX_PAT_LEN+MAX_PAT_ERR+1) * (MAX_PAT_LEN+1)];
+
+        lseq = pseq->seqlen;
+
+        pc = sTab;              /*  |----|----| --> i   */
+        pi = pc - 1;            /*  | ps | pd | |       */
+        pd = pi - lseq;         /*  |----|----| |       */
+        ps = pd - 1;            /*  | pi | pc | v j     */
+                                /*  |---------|         */
+
+        lseq = pseq->seqlen;
+        lpat = ppat->patlen;
+
+        sseq = pseq->data - 1;
+
+        amask = ONEMASK >> lpat;
+
+        for (j = 0 ; j <= lpat ; j++) {
+
+           for (i = 0 , px = sseq ; i <= lseq ; i++, px++) {
+
+              if (i && j) {
+                  dindel = MIN(*pi, *pd) + 1;
+                  dsub   = *ps + KRONECK(ppat->smat[*px], amask);
+                  *pc    = MIN(dindel, dsub);
+              }
+              else if (i)               /* j == 0       */
+                  *pc = *pi + 1;
+              else if (j)               /* i == 0       */
+                  *pc = *pd + 1;
+              else                      /* root         */
+                   *pc = 0;
+
+              pc++;
+              pi++;
+              pd++;
+              ps++;
+           }
+        
+           amask <<= 1;
+        }
+
+        pc--;
+
+        for (i = lseq, npos = 0 ; i >= 0 ; i--, pc--) {
+            if (*pc <= nerr) {
+                *reslen++ =  i;
+                *reserr++ = *pc;
+                npos++;
+             }
+        }
+
+        return npos;
+}
--- a/src/libapat/libstki.c
+++ b/src/libapat/libstki.c
@ -0,0 +1,379 @@
+/* ==================================================== */
+/*      Copyright (c) Atelier de BioInformatique        */
+/*      Mar. 92                                         */
+/*      File: libstki.c                                 */
+/*      Purpose: A library to deal with 'stacks' of     */
+/*               integers                               */
+/*      Note: 'stacks' are dynamic (i.e. size is        */
+/*            automatically readjusted when needed)     */
+/*      History:                                        */
+/*      00/03/92 : <Gloup> first draft                  */
+/*      15/08/93 : <Gloup> revised version              */
+/*      14/05/99 : <Gloup> last revision                */
+/* ==================================================== */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "Gtypes.h"
+#include "libstki.h"
+
+
+/* ============================ */
+/* Constantes et Macros locales */
+/* ============================ */
+
+#define ExpandStack(stkh) ResizeStacki((stkh), (*stkh)->size << 1)
+
+#define ShrinkStack(stkh) ResizeStacki((stkh), (*stkh)->size >> 1)
+
+
+static Int16 sStkiLastError = kStkiNoErr;
+
+/* -------------------------------------------- */
+/* gestion des erreurs                          */
+/* get/reset erreur flag                        */
+/*                                              */
+/* @function: StkiError                         */
+/* -------------------------------------------- */
+
+Int16 StkiError(Bool reset)
+{
+        Int16 err;
+        
+        err = sStkiLastError;
+        
+        if (reset)
+           sStkiLastError = kStkiNoErr;
+           
+        return err;
+        
+} /* end of StkiError */
+
+/* -------------------------------------------- */
+/* creation d'un stack                          */
+/*                                              */
+/* @function: NewStacki                         */
+/* -------------------------------------------- */
+
+StackiPtr NewStacki(Int32 size)
+{
+        StackiPtr stki;
+        
+        if (! (stki = NEW(Stacki)))
+                return NULL;
+
+        stki->size    = size;
+        stki->top     = 0;
+        stki->cursor  = 0;
+        
+        if ( ! (stki->val = NEWN(Int32, size))) {
+            sStkiLastError = kStkiMemErr;
+            return FreeStacki(stki);
+        }
+
+        return stki;    
+                        
+} /* end of NewStacki */
+
+
+/* -------------------------------------------- */
+/* liberation d'un stack                        */
+/*                                              */
+/* @function: FreeStacki                        */
+/* -------------------------------------------- */
+
+StackiPtr FreeStacki(StackiPtr stki)
+{
+        if (stki) {
+            if (stki->val)
+                FREE(stki->val);
+            FREE(stki);
+        }
+        
+        return NULL;
+        
+} /* end of FreeStacki */
+
+/* -------------------------------------------- */
+/* creation d'un vecteur de stacks              */
+/*                                              */
+/* @function: NewStackiVector                   */
+/* -------------------------------------------- */
+
+StackiHdle NewStackiVector(Int32 vectSize, Int32 stackSize)
+{
+        Int32           i;
+        StackiHdle      stkh;
+        
+        if (! (stkh = NEWN(StackiPtr, vectSize))) {
+            sStkiLastError = kStkiMemErr;
+            return NULL;
+        }
+        
+        for (i = 0 ; i < vectSize ; i++)
+            if (! (stkh[i] = NewStacki(stackSize)))
+                return FreeStackiVector(stkh, i);
+            
+        return stkh;
+        
+} /* end of NewStackiVector */
+
+
+/* -------------------------------------------- */
+/* liberation d'un vecteur de stacks            */
+/*                                              */
+/* @function: FreeStackiVector                  */
+/* -------------------------------------------- */
+
+StackiHdle FreeStackiVector(StackiHdle stkh, Int32 vectSize)
+{
+        Int32   i;
+        
+        if (stkh) {
+            for (i = 0 ; i < vectSize ; i++)
+                (void) FreeStacki(stkh[i]);
+            FREE(stkh);
+        }
+
+        return NULL;
+                
+} /* end of FreeStackiVector */
+
+/* -------------------------------------------- */
+/* resize d'un stack                            */
+/*                                              */
+/* @function: ResizeStacki                      */
+/* -------------------------------------------- */
+
+Int32 ResizeStacki(StackiHdle stkh, Int32 size)
+{
+        Int32 resize = 0;               /* assume error         */
+        Int32 *val;
+        
+        if ((val = REALLOC(Int32, (*stkh)->val, size))) {
+            (*stkh)->size = resize = size;
+            (*stkh)->val = val;
+        }
+
+        if (! resize)
+            sStkiLastError = kStkiMemErr;
+
+        return resize;
+        
+} /* end of ResizeStacki */
+
+/* -------------------------------------------- */
+/* empilage(/lement)                            */
+/*                                              */
+/* @function: PushiIn                           */
+/* -------------------------------------------- */
+
+Bool PushiIn(StackiHdle stkh, Int32 val)
+{
+        if (((*stkh)->top >= (*stkh)->size) && (! ExpandStack(stkh)))
+            return Faux;
+
+        (*stkh)->val[((*stkh)->top)++] = val;
+
+        return Vrai;
+        
+} /* end of PushiIn */
+
+/* -------------------------------------------- */
+/* depilage(/lement)                            */
+/*                                              */
+/* @function: PopiOut                           */
+/* -------------------------------------------- */
+
+Bool PopiOut(StackiHdle stkh, Int32 *val)
+{
+        if ((*stkh)->top <= 0)
+            return Faux;
+        
+        *val = (*stkh)->val[--((*stkh)->top)];
+
+        if (    ((*stkh)->top < ((*stkh)->size >> 1)) 
+             && ((*stkh)->top > kMinStackiSize))
+
+            (void) ShrinkStack(stkh);
+
+        return Vrai;
+        
+} /* end of PopiOut */
+                
+/* -------------------------------------------- */
+/* lecture descendante                          */
+/*                                              */
+/* @function: ReadiDown                         */
+/* -------------------------------------------- */
+
+Bool ReadiDown(StackiPtr stki, Int32 *val)
+{
+        if (stki->cursor <= 0)
+            return Faux;
+                
+        *val = stki->val[--(stki->cursor)];
+        
+        return Vrai;
+        
+} /* end of ReadiDown */
+
+/* -------------------------------------------- */
+/* lecture ascendante                           */
+/*                                              */
+/* @function: ReadiUp                           */
+/* -------------------------------------------- */
+
+Bool ReadiUp(StackiPtr stki, Int32 *val)
+{
+        if (stki->cursor >= stki->top)
+            return Faux;
+                
+        *val = stki->val[(stki->cursor)++];
+        
+        return Vrai;
+        
+} /* end of ReadiUp */
+
+/* -------------------------------------------- */
+/* remontee/descente du curseur                 */
+/*                                              */
+/* @function: CursiToTop                        */
+/* @function: CursiToBottom                     */
+/* -------------------------------------------- */
+
+void CursiToTop(StackiPtr stki)
+{
+        stki->cursor = stki->top;
+
+} /* end of CursiToTop */
+
+void CursiToBottom(stki)
+        StackiPtr stki;
+{
+        stki->cursor = 0;
+
+} /* end of CursiToBottom */
+
+/* -------------------------------------------- */
+/* echange des valeurs cursor <-> (top - 1)     */
+/*                                              */
+/* @function: CursiSwap                         */
+/* -------------------------------------------- */
+
+void CursiSwap(StackiPtr stki)
+{
+        Int32   tmp;
+        
+        if ((stki->top <= 0) || (stki->cursor < 0))
+            return;
+        
+        tmp = stki->val[stki->cursor];
+        stki->val[stki->cursor] = stki->val[stki->top - 1];
+        stki->val[stki->top - 1] = tmp;
+        
+} /* end of CursiSwap */
+
+/* -------------------------------------------- */
+/* Recherche d'une valeur en stack a partir du  */
+/* curseur courant en descendant.               */
+/* on laisse le curseur a l'endroit trouve      */
+/*                                              */
+/* @function: SearchDownStacki                  */
+/* -------------------------------------------- */
+
+Bool SearchDownStacki(StackiPtr stki, Int32 sval)
+{
+        Int32   val;
+        Bool    more;
+        
+        while ((more = ReadiDown(stki, &val)))
+            if (val == sval) 
+                break;
+        
+        return more;
+        
+} /* end of SearchDownStacki */
+
+/* -------------------------------------------- */
+/* Recherche dichotomique d'une valeur en stack */
+/* le stack est suppose trie par valeurs        */
+/* croissantes.                                 */
+/* on place le curseur a l'endroit trouve       */
+/*                                              */
+/* @function: BinSearchStacki                   */
+/* -------------------------------------------- */
+
+Bool BinSearchStacki(StackiPtr stki, Int32 sval)
+{
+        Int32   midd, low, high, span;
+
+        low  = 0;
+        high = stki->top - 1;
+        
+        while (high >= low) {   
+
+            midd = (high + low) / 2;
+
+            span = stki->val[midd] - sval;
+
+            if (span == 0) {
+                stki->cursor = midd;
+                return Vrai;
+            }
+        
+            if (span > 0)
+                high = midd - 1;
+            else
+                low  = midd + 1;
+        }
+
+        return Faux;
+        
+} /* end of BinSearchStacki */
+
+/* -------------------------------------------- */
+/* teste l'egalite *physique* de deux stacks    */
+/*                                              */
+/* @function: SameStacki                        */
+/* -------------------------------------------- */
+
+Bool SameStacki(StackiPtr stki1, StackiPtr stki2)
+{
+        if (stki1->top != stki2->top) 
+            return Faux;
+        
+        return ((memcmp(stki1->val, stki2->val, 
+                        stki1->top * sizeof(Int32)) == 0) ? Vrai : Faux);
+                        
+} /* end of SameStacki */
+
+
+/* -------------------------------------------- */
+/* inverse l'ordre des elements dans un stack   */
+/*                                              */
+/* @function: ReverseStacki                     */
+/* -------------------------------------------- */
+
+Bool ReverseStacki(StackiPtr stki)
+{
+        Int32   *t, *b, swp;
+
+        if (stki->top <= 0) 
+            return Faux;
+        
+        b = stki->val;
+        t = b + stki->top - 1;
+
+        while (t > b) {
+             swp  = *t;
+             *t-- = *b;
+             *b++ = swp;
+        }
+
+        return Vrai;
+        
+} /* end of ReverseStacki */
+
--- a/src/libapat/libstki.h
+++ b/src/libapat/libstki.h
@ -0,0 +1,87 @@
+/* ==================================================== */
+/*      Copyright (c) Atelier de BioInformatique        */
+/*      Mar. 92                                         */
+/*      File: libstki.h                                 */
+/*      Purpose: library of dynamic stacks holding      */
+/*               integer values                         */
+/*      History:                                        */
+/*      00/03/92 : <Gloup> first draft                  */
+/*      07/07/93 : <Gloup> complete revision            */
+/*      10/03/94 : <Gloup> added xxxVector funcs        */
+/*      14/05/99 : <Gloup> last revision                */
+/* ==================================================== */
+
+#ifndef _H_Gtypes
+#include "Gtypes.h"
+#endif
+
+#define _H_libstki
+
+/* ==================================================== */
+/* Constantes de dimensionnement                        */
+/* ==================================================== */
+
+#ifndef kMinStackiSize
+#define kMinStackiSize          2       /* taille mini stack    */
+#endif
+
+
+#define kStkiNoErr              0       /* ok                   */
+#define kStkiMemErr             1       /* not enough memory    */
+
+#define kStkiReset              Vrai
+#define kStkiGet                Faux
+
+/* ==================================================== */
+/* Macros standards                                     */
+/* ==================================================== */
+
+#ifndef NEW
+#define NEW(typ)                (typ*)malloc(sizeof(typ)) 
+#define NEWN(typ, dim)          (typ*)malloc((unsigned long)(dim) * sizeof(typ))
+#define REALLOC(typ, ptr, dim)  (typ*)realloc((void *) (ptr), (unsigned long)(dim) * sizeof(typ))
+#define FREE(ptr)               free((Ptr) ptr)
+#endif
+
+
+/* ==================================================== */
+/*  Types & Structures de donnees                       */
+/* ==================================================== */
+
+                                                /* -------------------- */
+                                                /* structure : pile     */
+                                                /* -------------------- */
+typedef struct Stacki {
+                                                /* ---------------------*/
+        Int32                   size;           /* stack size           */
+        Int32                   top;            /* current free pos.    */
+        Int32                   cursor;         /* current cursor       */
+        Int32                   *val;           /* values               */
+                                                /* ---------------------*/
+} Stacki, *StackiPtr, **StackiHdle;      
+
+
+
+/* ==================================================== */
+/*  Prototypes (generated by mproto)                    */
+/* ==================================================== */
+
+                                        /* libstki.c    */
+                                        
+Int16           StkiError          (Bool       reset                    );
+StackiPtr       NewStacki          (Int32      size                     );
+StackiPtr       FreeStacki         (StackiPtr  stki                     );
+StackiHdle      NewStackiVector    (Int32 vectSize,   Int32 stackSize   );
+StackiHdle      FreeStackiVector   (StackiHdle stkh,  Int32 vectSize    );
+Int32           ResizeStacki       (StackiHdle stkh , Int32 size        );
+Bool            PushiIn            (StackiHdle stkh , Int32 val         );
+Bool            PopiOut            (StackiHdle stkh , Int32 *val        );
+Bool            ReadiDown          (StackiPtr  stki , Int32 *val        );
+Bool            ReadiUp            (StackiPtr  stki , Int32 *val        );
+void            CursiToTop         (StackiPtr  stki                     );
+void            CursiToBottom      (StackiPtr  stki                     );
+void            CursiSwap          (StackiPtr  stki                     );
+Bool            SearchDownStacki   (StackiPtr  stki  , Int32 sval       );
+Bool            BinSearchStacki    (StackiPtr  stki  , Int32 sval       );
+Bool            SameStacki         (StackiPtr  stki1 , StackiPtr stki2  );
+Bool            ReverseStacki      (StackiPtr  stki                     );
--- a/src/libecoPCR/ecoError.c
+++ b/src/libecoPCR/ecoError.c
@ -0,0 +1,18 @@
+#include "ecoPCR.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+
+void ecoError(int32_t error,
+              const char* message,
+              const char * filename,
+              int linenumber)
+{
+	fprintf(stderr,"Error %d in file %s line %d : %s\n",
+	               error,
+	               filename,
+	               linenumber,
+	               message);
+	
+	abort();
+}
--- a/src/libecoPCR/ecoIOUtils.c
+++ b/src/libecoPCR/ecoIOUtils.c
@ -0,0 +1,119 @@
+#include "ecoPCR.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+#define SWAPINT32(x)     ((((x) << 24) & 0xFF000000) | (((x) <<  8) & 0xFF0000) | \
+                         (((x) >>  8) & 0xFF00)     | (((x) >> 24) & 0xFF))
+
+
+int32_t is_big_endian()
+{
+	int32_t i=1;
+	
+	return (int32_t)((char*)&i)[0];
+}
+
+
+
+
+
+
+
+int32_t swap_int32_t(int32_t i)
+{
+	return SWAPINT32(i);
+}
+
+
+
+
+
+
+
+void *read_ecorecord(FILE *f,int32_t *recordSize)
+{
+	static void *buffer    =NULL;
+	int32_t      buffersize=0;
+	int32_t      read;
+	
+	if (!recordSize)
+		ECOERROR(ECO_ASSERT_ERROR,
+		         "recordSize cannot be NULL");
+		
+	read = fread(recordSize,
+	      		 1,
+	      		 sizeof(int32_t),
+	             f);
+	             
+	if (feof(f))
+		return NULL;
+	             
+	if (read != sizeof(int32_t))
+		ECOERROR(ECO_IO_ERROR,"Reading record size error");
+		
+	if (is_big_endian())
+		*recordSize=swap_int32_t(*recordSize);
+		
+	if (buffersize < *recordSize)
+	{
+		if (buffer)
+			buffer = ECOREALLOC(buffer,*recordSize,
+			                    "Increase size of record buffer");
+		else
+			buffer = ECOMALLOC(*recordSize,
+			                    "Allocate record buffer");
+	}
+	
+	read = fread(buffer,
+	             1,
+				 *recordSize,
+				 f);
+				 
+	if (read != *recordSize)
+		ECOERROR(ECO_IO_ERROR,"Reading record data error");
+		
+	return buffer;	 
+};
+
+
+
+
+
+
+
+
+
+FILE *open_ecorecorddb(const char *filename,
+                       int32_t    *sequencecount,
+                       int32_t    abort_on_open_error)
+{
+    FILE        *f;
+	int32_t      read;
+	
+	f = fopen(filename,"rb");
+	
+	if (!f)
+		{
+			if (abort_on_open_error)
+		 		ECOERROR(ECO_IO_ERROR,"Cannot open file");
+		 	else
+		 	{
+		 		*sequencecount=0;
+		 		return NULL;
+		 	}
+		}
+		
+	read = fread(sequencecount,
+	      		 1,
+	      		 sizeof(int32_t),
+	      		 f);
+	             
+	if (read != sizeof(int32_t))
+		ECOERROR(ECO_IO_ERROR,"Reading record size error");
+
+	if (is_big_endian())
+		*sequencecount=swap_int32_t(*sequencecount);
+		
+	return f;                  
+}
+
--- a/src/libecoPCR/ecoMalloc.c
+++ b/src/libecoPCR/ecoMalloc.c
@ -0,0 +1,79 @@
+#include "ecoPCR.h"
+#include <stdlib.h>
+
+static int eco_log_malloc = 0;
+
+void    eco_trace_memory_allocation()
+{
+	eco_log_malloc=1;
+}
+
+void    eco_untrace_memory_allocation()
+{
+	eco_log_malloc=0;
+}
+
+
+void   *eco_malloc(int32_t chunksize,
+                   const char *error_message,
+                   const char *filename,
+                   int32_t    line)
+{
+	void * chunk;
+	
+	chunk = calloc(1,chunksize);
+	
+	if (!chunk)
+		ecoError(ECO_MEM_ERROR,error_message,filename,line);
+		
+	if (eco_log_malloc)
+		fprintf(stderr,
+			    "Memory segment located at %p of size %d is allocated (file : %s [%d])",
+			    chunk,
+			    chunksize,
+			    filename,
+			    line);
+		
+	return chunk;
+}
+
+void   *eco_realloc(void *chunk,
+                    int32_t newsize,
+                    const char *error_message,
+                    const char *filename,
+                    int32_t    line)
+{
+	void *newchunk;
+	
+	newchunk = realloc(chunk,newsize);
+	
+	if (!newchunk)
+		ecoError(ECO_MEM_ERROR,error_message,filename,line);
+
+	if (eco_log_malloc)
+		fprintf(stderr,
+			    "Old memory segment %p is reallocated at %p with a size of %d (file : %s [%d])",
+			    chunk,
+			    newchunk,
+			    newsize,
+			    filename,
+			    line);
+		
+	return newchunk;	
+}
+
+void    eco_free(void *chunk,
+                 const char *error_message,
+                 const char *filename,
+                 int32_t    line)
+{
+	free(chunk);
+	
+	if (eco_log_malloc)
+		fprintf(stderr,
+			    "Memory segment %p is released => %s (file : %s [%d])",
+			    chunk,
+			    error_message,
+			    filename,
+			    line);
+}
--- a/src/libecoPCR/ecoPCR.h
+++ b/src/libecoPCR/ecoPCR.h
@ -0,0 +1,229 @@
+#ifndef ECOPCR_H_
+#define ECOPCR_H_
+
+#include <stdio.h>
+#include <inttypes.h>
+
+#ifndef H_apat
+#include "../libapat/apat.h"
+#endif
+
+/*****************************************************
+ * 
+ *  Data type declarations
+ * 
+ *****************************************************/
+
+/*
+ * 
+ *  Sequence types
+ * 
+ */
+
+typedef struct {
+	
+	int32_t  taxid;
+	char     AC[20];
+	int32_t  DE_length;
+	int32_t  SQ_length;
+	int32_t  CSQ_length;
+	
+	char     data[1];
+	
+} ecoseqformat_t;
+
+typedef struct {
+	int32_t taxid;
+	int32_t SQ_length;
+	char    *AC;
+	char    *DE;
+	char    *SQ;
+} ecoseq_t;
+
+/*
+ * 
+ * Taxonomy taxon types
+ * 
+ */
+
+
+typedef struct {
+	int32_t  taxid;
+	int32_t  rank;
+	int32_t	 parent;
+	int32_t  namelength;
+	char     name[1];
+	
+} ecotxformat_t;
+
+typedef struct {
+	int32_t  taxid;
+	int32_t  rank;
+	int32_t  parent;
+	char     *name;
+} ecotx_t;
+
+typedef struct {
+	int32_t count;
+	ecotx_t taxon[1];
+} ecotxidx_t;
+ 
+	
+/*
+ * 
+ * Taxonomy rank types
+ * 
+ */
+	
+typedef struct {
+	int32_t count;
+	char*   label[1];
+} ecorankidx_t;
+
+typedef struct {
+	ecorankidx_t *ranks;
+	ecotxidx_t   *taxons;
+} ecotaxonomy_t;
+
+
+/*****************************************************
+ * 
+ *  Function declarations
+ * 
+ *****************************************************/
+
+/*
+ * 
+ * Low level system functions
+ * 
+ */
+
+int32_t is_big_endian();
+int32_t swap_int32_t(int32_t);
+
+void   *eco_malloc(int32_t chunksize,
+                   const char *error_message,
+                   const char *filename,
+                   int32_t    line);
+                   
+                   
+void   *eco_realloc(void *chunk,
+                    int32_t chunksize,
+                    const char *error_message,
+                    const char *filename,
+                    int32_t    line);
+                    
+void    eco_free(void *chunk,
+                 const char *error_message,
+                 const char *filename,
+                 int32_t    line);
+                 
+void    eco_trace_memory_allocation();
+void    eco_untrace_memory_allocation();
+
+#define ECOMALLOC(size,error_message) \
+	    eco_malloc((size),(error_message),__FILE__,__LINE__)
+	   
+#define ECOREALLOC(chunk,size,error_message) \
+        eco_realloc((chunk),(size),(error_message),__FILE__,__LINE__)
+        
+#define ECOFREE(chunk,error_message) \
+        eco_free((chunk),(error_message),__FILE__,__LINE__)
+        
+        
+
+
+/*
+ * 
+ * Error managment
+ * 
+ */
+ 
+  
+void ecoError(int32_t,const char*,const char *,int);
+
+#define ECOERROR(code,message) ecoError((code),(message),__FILE__,__LINE__)
+
+#define ECO_IO_ERROR       (1)
+#define ECO_MEM_ERROR      (2)
+#define ECO_ASSERT_ERROR   (3)
+#define ECO_NOTFOUND_ERROR (4)
+
+
+/*
+ * 
+ * Low level Disk access functions
+ * 
+ */
+
+FILE *open_ecorecorddb(const char *filename,
+                       int32_t    *sequencecount,
+                       int32_t    abort_on_open_error);
+                       
+void *read_ecorecord(FILE *,int32_t *recordSize);
+
+
+
+/* 
+ *   Read function in internal binary format
+ */
+
+FILE             *open_ecoseqdb(const char *filename,
+                                int32_t    *sequencecount);
+                                                                
+ecoseq_t         *readnext_ecoseq(FILE *);
+
+ecorankidx_t     *read_rankidx(const char *filename);
+
+
+	/**
+	 * Read taxonomy data as formated by the ecoPCRFormat.py script.
+	 * 
+	 * This function is normaly uses internaly by the read_taxonomy
+	 * function and should not be called directly.
+	 * 
+	 * @arg filename  path to the *.tdx file of the reformated db
+	 * 
+	 * @return pointer to a taxonomy index structure
+	 */
+ 
+ecotxidx_t       *read_taxonomyidx(const char *filename);
+
+ecotaxonomy_t    *read_taxonomy(const char *prefix);
+
+
+ecoseq_t *ecoseq_iterator(const char *prefix);
+
+
+
+ecoseq_t *new_ecoseq();
+int32_t   delete_ecoseq(ecoseq_t *);
+ecoseq_t *new_ecoseq_with_data( char *AC,
+								char *DE,
+								char *SQ,
+								int32_t   taxid
+								);
+
+
+int32_t delete_taxon(ecotx_t *taxon);
+int32_t delete_taxonomy(ecotxidx_t *index);
+
+
+int32_t rank_index(const char* label,ecorankidx_t* ranks);
+
+int32_t  delete_apatseq(SeqPtr pseq);
+PatternPtr buildPattern(const char *pat, int32_t error_max);
+PatternPtr complementPattern(PatternPtr pat);
+
+SeqPtr ecoseq2apatseq(ecoseq_t *in,SeqPtr out);
+
+char *ecoComplementPattern(char *nucAcSeq);
+char *ecoComplementSequence(char *nucAcSeq);
+char *getSubSequence(char* nucAcSeq,int32_t begin,int32_t end);
+
+ecotx_t *eco_getspecies(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
+ecotx_t *eco_getgenus(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
+ecotx_t *eco_getfamily(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
+ecotx_t *eco_getsuperkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
+
+#endif /*ECOPCR_H_*/
--- a/src/libecoPCR/ecoapat.c
+++ b/src/libecoPCR/ecoapat.c
@ -0,0 +1,194 @@
+#include "../libapat/libstki.h"
+#include "../libapat/apat.h"
+
+#include "ecoPCR.h"
+
+#include <string.h>
+
+static void EncodeSequence(SeqPtr seq);
+static void UpperSequence(char *seq);
+
+/* -------------------------------------------- */
+/* uppercase sequence                           */
+/* -------------------------------------------- */
+
+#define IS_LOWER(c) (((c) >= 'a') && ((c) <= 'z'))
+#define TO_UPPER(c) ((c) - 'a' + 'A')
+
+void UpperSequence(char *seq)
+{
+        char *cseq;
+
+        for (cseq = seq ; *cseq ; cseq++) 
+            if (IS_LOWER(*cseq))
+                *cseq = TO_UPPER(*cseq);
+}
+ 
+#undef IS_LOWER
+#undef TO_UPPER
+
+
+
+
+/* -------------------------------------------- */
+/* encode sequence                              */
+/* IS_UPPER is slightly faster than isupper     */
+/* -------------------------------------------- */
+
+#define IS_UPPER(c) (((c) >= 'A') && ((c) <= 'Z'))
+
+
+
+void EncodeSequence(SeqPtr seq)
+{
+        int   i;
+        UInt8 *data;
+        char  *cseq;
+
+        data = seq->data;
+        cseq = seq->cseq;
+
+        while (*cseq) {
+
+            *data++ = (IS_UPPER(*cseq) ? *cseq - 'A' : 0x0);
+
+            cseq++;
+        }
+
+        for (i = 0 ; i < MAX_PATTERN ; i++)
+            seq->hitpos[i]->top = seq->hiterr[i]->top = 0;
+
+}
+
+#undef IS_UPPER
+
+
+SeqPtr ecoseq2apatseq(ecoseq_t *in,SeqPtr out)
+{
+        int    i;
+
+		if (!out)
+		{
+			out = ECOMALLOC(sizeof(Seq),
+			                "Error in Allocation of a new Seq structure");
+			          
+	        for (i  = 0 ; i < MAX_PATTERN ; i++) 
+	        {
+			   
+	           if (! (out->hitpos[i] = NewStacki(kMinStackiSize)))
+	             	ECOERROR(ECO_MEM_ERROR,"Error in hit stack Allocation");
+	
+	           if (! (out->hiterr[i] = NewStacki(kMinStackiSize)))
+	            	ECOERROR(ECO_MEM_ERROR,"Error in error stack Allocation");
+	        }
+		}
+
+		out->name    = in->AC;
+		out->seqsiz = out->seqlen = in->SQ_length;
+
+		if (!out->data)
+		{
+			out->data = ECOMALLOC(out->seqlen *sizeof(UInt8),
+		    	     			  "Error in Allocation of a new Seq data member");    
+		   	out->datsiz=  out->seqlen;
+		}
+		else if (out->seqlen >= out->datsiz)
+		{
+			out->data = ECOREALLOC(out->data,out->seqlen,
+			                      "Error during Seq data buffer realloc");
+		   	out->datsiz=  out->seqlen;			                      
+		}
+
+		out->cseq = in->SQ;
+		
+		EncodeSequence(out);
+
+        return out;
+}
+
+int32_t delete_apatseq(SeqPtr pseq)
+{
+         int i;
+
+        if (pseq) {
+
+            if (pseq->data) 
+            	ECOFREE(pseq->data,"Freeing sequence data buffer");
+
+            for (i = 0 ; i < MAX_PATTERN ; i++) {
+                if (pseq->hitpos[i]) FreeStacki(pseq->hitpos[i]);
+                if (pseq->hiterr[i]) FreeStacki(pseq->hiterr[i]);
+            }
+
+            ECOFREE(pseq,"Freeing apat sequence structure");
+            
+            return 0;
+        }
+        
+        return 1;
+}
+
+PatternPtr buildPattern(const char *pat, int32_t error_max)
+{
+	PatternPtr pattern;
+	int32_t    patlen;
+	
+	pattern = ECOMALLOC(sizeof(Pattern),
+						"Error in pattern allocation");
+						
+	pattern->ok      = Vrai;
+	pattern->hasIndel= Faux;
+	pattern->maxerr  = error_max;
+	patlen  = strlen(pat);
+	
+	pattern->cpat    = ECOMALLOC(sizeof(char)*patlen+1,
+	                             "Error in sequence pattern allocation");
+	                             
+	strncpy(pattern->cpat,pat,patlen);
+	pattern->cpat[patlen]=0;
+	UpperSequence(pattern->cpat);
+	
+	if (!CheckPattern(pattern))
+		ECOERROR(ECO_ASSERT_ERROR,"Error in pattern checking");
+		
+	if (! EncodePattern(pattern, dna))
+		ECOERROR(ECO_ASSERT_ERROR,"Error in pattern encoding");
+
+   	if (! CreateS(pattern, ALPHA_LEN))
+		ECOERROR(ECO_ASSERT_ERROR,"Error in pattern compiling");
+	
+	return pattern;
+		
+}
+
+PatternPtr complementPattern(PatternPtr pat)
+{
+	PatternPtr pattern;
+	
+	pattern = ECOMALLOC(sizeof(Pattern),
+						"Error in pattern allocation");
+						
+	pattern->ok      = Vrai;
+	pattern->hasIndel= pat->hasIndel;
+	pattern->maxerr  = pat->maxerr;
+	pattern->patlen  = pat->patlen;
+	
+	pattern->cpat    = ECOMALLOC(sizeof(char)*(strlen(pat->cpat)+1),
+	                             "Error in sequence pattern allocation");
+	                             
+	strcpy(pattern->cpat,pat->cpat);
+	
+	ecoComplementPattern(pattern->cpat);
+	
+	if (!CheckPattern(pattern))
+		ECOERROR(ECO_ASSERT_ERROR,"Error in pattern checking");
+		
+	if (! EncodePattern(pattern, dna))
+		ECOERROR(ECO_ASSERT_ERROR,"Error in pattern encoding");
+
+   	if (! CreateS(pattern, ALPHA_LEN))
+		ECOERROR(ECO_ASSERT_ERROR,"Error in pattern compiling");
+	
+	return pattern;
+		
+}
--- a/src/libecoPCR/ecodna.c
+++ b/src/libecoPCR/ecodna.c
@ -0,0 +1,131 @@
+#include <string.h>
+#include "ecoPCR.h"
+
+/*
+ * @doc: DNA alphabet (IUPAC)
+ */
+#define LX_BIO_DNA_ALPHA   "ABCDEFGHIJKLMNOPQRSTUVWXYZ#![]"
+
+/*
+ * @doc: complementary DNA alphabet (IUPAC)
+ */
+#define LX_BIO_CDNA_ALPHA  "TVGHEFCDIJMLKNOPQYSAABWXRZ#!]["
+
+
+static char sNuc[]     = LX_BIO_DNA_ALPHA;
+static char sAnuc[]    = LX_BIO_CDNA_ALPHA;
+
+static char LXBioBaseComplement(char nucAc);
+static char *LXBioSeqComplement(char *nucAcSeq);
+static char *reverseSequence(char *str,char isPattern);
+
+ 
+/* ---------------------------- */
+
+char LXBioBaseComplement(char nucAc)
+{
+    char *c;
+
+    if ((c = strchr(sNuc, nucAc)))
+        return sAnuc[(c - sNuc)];
+    else
+        return nucAc;
+}
+
+/* ---------------------------- */
+
+char *LXBioSeqComplement(char *nucAcSeq)
+{
+    char *s;
+
+    for (s = nucAcSeq ; *s ; s++)
+        *s = LXBioBaseComplement(*s);
+
+    return nucAcSeq;
+}
+
+
+char *reverseSequence(char *str,char isPattern)
+{
+        char *sb, *se, c;
+
+        if (! str)
+            return str;
+            
+        sb = str;
+        se = str + strlen(str) - 1;
+
+        while(sb <= se) {
+           c    = *sb;
+          *sb++ = *se;
+          *se-- = c;
+        }
+
+		sb = str;
+		se = str + strlen(str) - 1;
+		
+		if (isPattern)
+			for (;sb < se; sb++)
+			{
+				if (*sb=='#')
+				{
+					if (((se - sb) > 2) && (*(sb+2)=='!'))
+					{
+						*sb='!';
+						sb+=2;
+						*sb='#';
+					}
+					else
+					{
+						*sb=*(sb+1);
+						sb++;
+						*sb='#';
+					}
+				}
+				else if (*sb=='!')
+					{
+						*sb=*(sb-1);
+						*(sb-1)='!';
+					}
+			}
+
+        return str;
+}
+
+char *ecoComplementPattern(char *nucAcSeq)
+{
+    return reverseSequence(LXBioSeqComplement(nucAcSeq),1);
+}
+
+char *ecoComplementSequence(char *nucAcSeq)
+{
+    return reverseSequence(LXBioSeqComplement(nucAcSeq),0);
+}
+
+
+char *getSubSequence(char* nucAcSeq,int32_t begin,int32_t end)
+{
+	static char *buffer  = NULL;
+	static int32_t buffSize= 0;
+	int32_t length;
+	
+	length = end - begin;
+	
+	if (length >= buffSize)
+	{
+		buffSize = length+1;
+		if (buffer)
+			buffer=ECOREALLOC(buffer,buffSize,
+					   	      "Error in reallocating sub sequence buffer");
+		else
+			buffer=ECOMALLOC(buffSize,
+			          		 "Error in allocating sub sequence buffer");
+			
+	}
+	
+	strncpy(buffer,nucAcSeq + begin,length);
+	buffer[length]=0;
+	
+	return buffer;
+}
+
--- a/src/libecoPCR/ecorank.c
+++ b/src/libecoPCR/ecorank.c
@ -0,0 +1,52 @@
+#include "ecoPCR.h"
+#include <string.h>
+#include <stdlib.h>
+
+static int compareRankLabel(const void *label1, const void *label2);
+
+ecorankidx_t     *read_rankidx(const char *filename)
+{
+	int32_t      count;
+	FILE         *f;
+	ecorankidx_t *index;
+	int32_t      i;
+	int32_t      rs;
+	char         *buffer;
+	
+	f = open_ecorecorddb(filename,&count,1);
+	
+	index = (ecorankidx_t*) ECOMALLOC(sizeof(ecorankidx_t) + sizeof(char*) * (count-1),
+	                                  "Allocate rank index");
+	 
+	index->count=count;                                 
+	          
+	for (i=0; i < count; i++)
+		{
+			buffer = read_ecorecord(f,&rs);
+			index->label[i]=(char*) ECOMALLOC(rs+1,
+			                                  "Allocate rank label");
+			strncpy(index->label[i],buffer,rs);
+		}
+		
+	return index;
+}
+
+int32_t rank_index(const char* label,ecorankidx_t* ranks)
+{
+	char **rep;
+	
+	rep = bsearch(label,ranks->label,ranks->count,sizeof(char*),compareRankLabel);
+	
+	if (rep)
+		return rep-ranks->label;
+	else
+		ECOERROR(ECO_NOTFOUND_ERROR,"Rank label not found");
+		
+	return -1;
+}
+
+
+int compareRankLabel(const void *label1, const void *label2)
+{
+	return strcmp((const char*)label1,*(const char**)label2);
+}
--- a/src/libecoPCR/ecoseq.c
+++ b/src/libecoPCR/ecoseq.c
@ -0,0 +1,205 @@
+#include "ecoPCR.h"
+#include <stdlib.h>
+#include <string.h>
+#include <zlib.h>
+#include <string.h>
+#include <stdio.h>
+
+static FILE *open_seqfile(const char *prefix,int32_t index);
+
+
+ecoseq_t *new_ecoseq()
+{
+	void *tmp;
+	
+	tmp = ECOMALLOC(sizeof(ecoseq_t),"Allocate new ecoseq structure");
+	
+	return tmp;
+}
+
+int32_t delete_ecoseq(ecoseq_t * seq)
+{
+	
+	if (seq)
+	{
+		if (seq->AC)
+			ECOFREE(seq->AC,"Free sequence AC");
+			
+		if (seq->DE)
+			ECOFREE(seq->DE,"Free sequence DE");
+			
+		if (seq->SQ)
+			ECOFREE(seq->SQ,"Free sequence SQ");
+			
+		ECOFREE(seq,"Free sequence structure");
+		
+		return 0;
+			
+	}
+	
+	return 1;
+}
+
+ecoseq_t *new_ecoseq_with_data( char *AC,
+								char *DE,
+								char *SQ,
+								int32_t   taxid_idx
+								)
+{
+	ecoseq_t *tmp;
+	int32_t lstr;
+	tmp = new_ecoseq();
+	
+	tmp->taxid=taxid_idx;
+	
+	if (AC)
+		{
+			lstr =strlen(AC);
+			tmp->AC=ECOMALLOC((lstr+1) * sizeof(char),
+			                  "Allocate sequence accession");
+			strcpy(tmp->AC,AC);
+		}
+
+	if (DE)
+		{
+			lstr =strlen(DE);
+			tmp->DE=ECOMALLOC((lstr+1) * sizeof(char),
+			                  "Allocate sequence definition");
+			strcpy(tmp->DE,DE);
+		}
+
+	if (SQ)
+		{
+			lstr =strlen(SQ);
+			tmp->SQ=ECOMALLOC((lstr+1) * sizeof(char),
+			                  "Allocate sequence data");
+			strcpy(tmp->SQ,SQ);
+		}
+	return tmp;
+
+}
+
+
+FILE *open_ecoseqdb(const char *filename,
+                    int32_t    *sequencecount)
+{
+	return open_ecorecorddb(filename,sequencecount,1);
+}
+
+ecoseq_t *readnext_ecoseq(FILE *f)
+{
+	char     *compressed=NULL;
+
+	ecoseqformat_t *raw;
+	ecoseq_t *seq;
+	int32_t  comp_status;
+	unsigned long int seqlength;
+	int32_t  rs;
+	
+	raw = read_ecorecord(f,&rs);
+	
+	if (!raw)
+		return NULL;
+		
+	if (is_big_endian())
+	{
+		raw->CSQ_length = swap_int32_t(raw->CSQ_length);
+		raw->DE_length  = swap_int32_t(raw->DE_length);
+		raw->SQ_length  = swap_int32_t(raw->SQ_length);
+		raw->taxid      = swap_int32_t(raw->taxid);
+	}
+	
+	seq = new_ecoseq();
+	
+	seq->taxid = raw->taxid;
+	
+    seq->AC    = ECOMALLOC(strlen(raw->AC) +1,
+                           "Allocate Sequence Accesion number");
+    strncpy(seq->AC,raw->AC,strlen(raw->AC));
+
+                           
+    seq->DE    = ECOMALLOC(raw->DE_length+1,
+                           "Allocate Sequence definition");
+    strncpy(seq->DE,raw->data,raw->DE_length);
+    
+	seqlength = seq->SQ_length = raw->SQ_length;
+    
+    compressed = raw->data + raw->DE_length;
+
+    seq->SQ = ECOMALLOC(seqlength+1,
+                        "Allocate sequence buffer");
+                        
+    comp_status = uncompress((unsigned char*)seq->SQ,
+                             &seqlength,
+                             (unsigned char*)compressed,
+                             raw->CSQ_length);
+    
+    if (comp_status != Z_OK)
+    	ECOERROR(ECO_IO_ERROR,"I cannot uncompress sequence data");
+    
+	return seq;
+}
+
+FILE *open_seqfile(const char *prefix,int32_t index)
+{
+	char           filename_buffer[1024];
+	int32_t        filename_length;
+	FILE           *input;
+	int32_t        seqcount;
+
+	filename_length = snprintf(filename_buffer,
+								1023,
+	                           "%s_%03d.sdx",
+	                           prefix,
+	                           index);
+
+	if (filename_length >= 1024)
+		ECOERROR(ECO_ASSERT_ERROR,"file name is too long");
+
+	filename_buffer[filename_length]=0;
+		
+	input=open_ecorecorddb(filename_buffer,&seqcount,0);
+	
+	if (input)
+		fprintf(stderr,"Reading file %s containing %d sequences...\n",
+				filename_buffer,
+				seqcount);
+				
+	return input;
+}
+
+ecoseq_t *ecoseq_iterator(const char *prefix)
+{
+	static FILE    *current_seq_file= NULL;
+	static int32_t current_file_idx = 1;
+	ecoseq_t       *seq;
+	
+	if (prefix)
+	{
+		current_file_idx = 1;
+
+		if (current_seq_file)
+			fclose(current_seq_file);
+		
+		current_seq_file = open_seqfile(prefix,
+		 							    current_file_idx);
+		
+		if (!current_seq_file)
+			return NULL;
+			
+	}
+	
+	seq = readnext_ecoseq(current_seq_file);
+	
+	if (!seq && feof(current_seq_file))
+	{
+		current_file_idx++;
+		fclose(current_seq_file);
+		current_seq_file = open_seqfile(prefix,
+		 							    current_file_idx);
+		if (current_seq_file)
+			seq = readnext_ecoseq(current_seq_file);
+	}
+	
+	return seq;
+}
--- a/src/libecoPCR/ecotax.c
+++ b/src/libecoPCR/ecotax.c
@ -0,0 +1,234 @@
+#include "ecoPCR.h"
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+static ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon);
+
+
+ecotxidx_t     *read_taxonomyidx(const char *filename)
+{
+	int32_t      count;
+	FILE         *f;
+	ecotxidx_t *index;
+	int32_t      i;
+	
+	f = open_ecorecorddb(filename,&count,1);
+	
+	index = (ecotxidx_t*) ECOMALLOC(sizeof(ecotxidx_t) + sizeof(ecotx_t) * (count-1),
+	                                  "Allocate taxonomy");
+	 
+	index->count=count;                                 
+	          
+	for (i=0; i < count; i++)
+		readnext_ecotaxon(f,&(index->taxon[i]));
+			
+	return index;
+}
+
+int32_t delete_taxonomy(ecotxidx_t *index)
+{
+	int32_t i;
+	
+	if (index)
+	{
+		for (i=0; i< index->count; i++)
+			if (index->taxon[i].name)
+				ECOFREE(index->taxon[i].name,"Free scientific name");
+				
+		ECOFREE(index,"Free Taxonomy");
+		
+		return 0;
+	}
+	
+	return 1;
+}
+
+
+
+int32_t delete_taxon(ecotx_t *taxon)
+{
+	if (taxon)
+	{
+		if (taxon->name)
+			ECOFREE(taxon->name,"Free scientific name");
+			
+		ECOFREE(taxon,"Free Taxon");
+		
+		return 0;
+	}
+		
+	return 1;
+}
+
+ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon)
+{
+	
+	ecotxformat_t *raw;
+	int32_t  rs;
+	
+	raw = read_ecorecord(f,&rs);
+	
+	if (!raw)
+		return NULL;
+
+	if (is_big_endian())
+	{
+		raw->namelength = swap_int32_t(raw->namelength);
+		raw->parent     = swap_int32_t(raw->parent);
+		raw->rank       = swap_int32_t(raw->rank);
+		raw->taxid      = swap_int32_t(raw->taxid); 
+	}
+	
+	taxon->parent = raw->parent;
+	taxon->taxid  = raw->taxid;
+	taxon->rank   = raw->rank;
+	
+	taxon->name   = ECOMALLOC((raw->namelength+1) * sizeof(char),
+	                          "Allocate taxon scientific name");
+	                          
+	strncpy(taxon->name,raw->name,raw->namelength);
+	
+	return taxon;
+}
+
+
+ecotaxonomy_t    *read_taxonomy(const char *prefix)
+{
+	ecotaxonomy_t *tax;
+	char          *filename;
+	int           buffsize;
+	
+	tax = ECOMALLOC(sizeof(ecotaxonomy_t),
+	                "Allocate taxonomy structure");
+	
+	buffsize = strlen(prefix)+10;
+	
+	filename = ECOMALLOC(buffsize,
+	                     "Allocate filename");
+	
+	snprintf(filename,buffsize,"%s.rdx",prefix);
+	
+	tax->ranks = read_rankidx(filename);
+	
+	snprintf(filename,buffsize,"%s.tdx",prefix);
+	
+	tax->taxons = read_taxonomyidx(filename);
+	
+	return tax;
+	
+}
+
+int32_t delete_ecotaxonomy(ecotaxonomy_t *taxonomy)
+{
+	if (taxonomy)
+	{
+		if (taxonomy->ranks)
+			ECOFREE(taxonomy->ranks,"Free rank index");
+			
+		if (taxonomy->taxons)
+			ECOFREE(taxonomy->taxons,"Free taxon index");
+			
+		ECOFREE(taxonomy,"Free taxonomy structure");
+		
+		return 0;
+	}
+	
+	return 1;
+}
+
+ecotx_t *eco_findtaxonatrank(ecotx_t *taxon,
+                                int32_t rankidx,
+                                ecotaxonomy_t *taxonomy)
+{
+	ecotx_t *current_taxon;
+	ecotx_t *next_taxon;
+	
+	current_taxon = taxon;
+	next_taxon    = &(taxonomy->taxons->taxon[current_taxon->parent]);
+	
+	while ((current_taxon!=next_taxon) &&
+		   (current_taxon->rank!=rankidx))
+		   {
+		   	current_taxon = next_taxon;
+		   	next_taxon    = &(taxonomy->taxons->taxon[current_taxon->parent]);
+		   }
+		   
+	if (current_taxon->rank==rankidx)
+		return current_taxon;
+	else
+		return NULL;
+}
+
+ecotx_t *eco_getspecies(ecotx_t *taxon,
+						ecotaxonomy_t *taxonomy)
+{
+	static ecotaxonomy_t *tax=NULL;
+	static int32_t		 rankindex=-1;
+	
+	if (taxonomy && tax!=taxonomy)
+	{
+		rankindex = rank_index("species",taxonomy->ranks);
+		tax=taxonomy;
+	}
+		
+	if (!tax || rankindex < 0)
+		ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
+		
+	return eco_findtaxonatrank(taxon,rankindex,tax);
+}
+
+ecotx_t *eco_getgenus(ecotx_t *taxon,
+						ecotaxonomy_t *taxonomy)
+{
+	static ecotaxonomy_t *tax=NULL;
+	static int32_t		 rankindex=-1;
+	
+	if (taxonomy && tax!=taxonomy)
+	{
+		rankindex = rank_index("genus",taxonomy->ranks);
+		tax=taxonomy;
+	}
+		
+	if (!tax || rankindex < 0)
+		ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
+		
+	return eco_findtaxonatrank(taxon,rankindex,tax);
+}
+
+
+ecotx_t *eco_getfamily(ecotx_t *taxon,
+						ecotaxonomy_t *taxonomy)
+{
+	static ecotaxonomy_t *tax=NULL;
+	static int32_t		 rankindex=-1;
+	
+	if (taxonomy && tax!=taxonomy)
+	{
+		rankindex = rank_index("family",taxonomy->ranks);
+		tax=taxonomy;
+	}
+		
+	if (!tax || rankindex < 0)
+		ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
+		
+	return eco_findtaxonatrank(taxon,rankindex,tax);
+}
+
+ecotx_t *eco_getsuperkingdom(ecotx_t *taxon,
+						ecotaxonomy_t *taxonomy)
+{
+	static ecotaxonomy_t *tax=NULL;
+	static int32_t		 rankindex=-1;
+	
+	if (taxonomy && tax!=taxonomy)
+	{
+		rankindex = rank_index("superkingdom",taxonomy->ranks);
+		tax=taxonomy;
+	}
+		
+	if (!tax || rankindex < 0)
+		ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
+		
+	return eco_findtaxonatrank(taxon,rankindex,tax);
+}
--- a/tests/ecodb.rdx
+++ b/tests/ecodb.rdx
--- a/tests/ecodb.tdx
+++ b/tests/ecodb.tdx
--- a/tests/ecodb_001.sdx
+++ b/tests/ecodb_001.sdx
--- a/tools/ecoPCRFormat.py
+++ b/tools/ecoPCRFormat.py
@ -0,0 +1,447 @@
+#!/usr/bin/env python
+
+import re
+import gzip
+import psycopg2
+import struct
+import sys
+import time
+import getopt
+
+#####
+#
+#
+# Generic file function
+#
+#
+#####
+
+def universalOpen(file):
+    if isinstance(file,str):
+        if file[-3:] == '.gz':
+            rep = gzip.open(file)
+        else:
+            rep = open(file)
+    else:
+        rep = file
+    return rep
+
+def universalTell(file):
+    if isinstance(file, gzip.GzipFile):
+        file=file.myfileobj
+    return file.tell()
+
+def fileSize(file):
+    if isinstance(file, gzip.GzipFile):
+        file=file.myfileobj
+    pos = file.tell()
+    file.seek(0,2)
+    length = file.tell()
+    file.seek(pos,0)
+    return length
+
+def progressBar(pos,max,reset=False,delta=[]):
+    if reset:
+        del delta[:]
+    if not delta:
+        delta.append(time.time())
+        delta.append(time.time())
+
+    delta[1]=time.time()
+    elapsed = delta[1]-delta[0]
+    percent = float(pos)/max * 100
+    remain = time.strftime('%H:%M:%S',time.gmtime(elapsed / percent * (100-percent)))
+    bar = '#' * int(percent/2)
+    bar+= '|/-\\-'[pos % 5]
+    bar+= ' ' * (50 - int(percent/2))
+    sys.stderr.write('\r%5.1f %% |%s] remain : %s' %(percent,bar,remain))
+
+#####
+#
+#
+# NCBI Dump Taxonomy reader
+#
+#
+#####
+
+def endLessIterator(endedlist):
+    for x in endedlist:
+        yield x
+    while(1):
+        yield endedlist[-1]
+    
+    
+class ColumnFile(object):
+    
+    def __init__(self,stream,sep=None,strip=True,types=None):
+        if isinstance(stream,str):
+            self._stream = open(stream)
+        elif hasattr(stream,'next'):
+            self._stream = stream
+        else:
+            raise ValueError,'stream must be string or an iterator'
+        self._delimiter=sep
+        self._strip=strip
+        if types:
+            self._types=[x for x in types]
+            for i in xrange(len(self._types)):
+                if self._types[i] is bool:
+                    self._types[i]=ColumnFile.str2bool
+        else:
+            self._types=None
+            
+    def str2bool(x):
+        return bool(eval(x.strip()[0].upper(),{'T':True,'V':True,'F':False}))
+                    
+    str2bool = staticmethod(str2bool)
+            
+        
+    def __iter__(self):
+        return self
+    
+    def next(self):
+        ligne = self._stream.next()
+        data = ligne.split(self._delimiter)
+        if self._strip or self._types:
+            data = [x.strip() for x in data]
+        if self._types:
+            it = endLessIterator(self._types)
+            data = [x[1](x[0]) for x in ((y,it.next()) for y in data)]
+        return data
+    
+def taxonCmp(t1,t2):
+    if t1[0] < t2[0]:
+        return -1
+    elif t1[0] > t2[0]:
+        return +1
+    return 0
+
+def bsearchTaxon(taxonomy,taxid):
+    taxCount = len(taxonomy)
+    begin = 0
+    end   = taxCount 
+    oldcheck=taxCount
+    check = begin + end / 2
+    while check != oldcheck and taxonomy[check][0]!=taxid :
+        if taxonomy[check][0] < taxid:
+            begin=check
+        else:
+            end=check
+        oldcheck=check
+        check = (begin + end) / 2
+        
+        
+    if taxonomy[check][0]==taxid:
+        return check
+    else:
+        return None
+        
+        
+    
+def readNodeTable(file):
+
+    file = universalOpen(file)
+    
+    nodes = ColumnFile(file, 
+                       sep='|', 
+                       types=(int,int,str,
+                              str,str,bool,
+                              int,bool,int,
+                              bool,bool,bool,str))
+    print >>sys.stderr,"Reading taxonomy dump file..."
+    taxonomy=[[n[0],n[2],n[1]] for n in nodes]
+    
+    print >>sys.stderr,"List all taxonomy rank..."    
+    ranks =list(set(x[1] for x in taxonomy))
+    ranks.sort()
+    ranks = dict(map(None,ranks,xrange(len(ranks))))
+    
+    print >>sys.stderr,"Sorting taxons..."
+    taxonomy.sort(taxonCmp)
+
+    print >>sys.stderr,"Indexing taxonomy..."
+    index = {}
+    for t in taxonomy:
+        index[t[0]]=bsearchTaxon(taxonomy, t[0])
+    
+    print >>sys.stderr,"Indexing parent and rank..."
+    for t in taxonomy:
+        t[1]=ranks[t[1]]
+        t[2]=index[t[2]]
+        
+        
+    return taxonomy,ranks,index
+
+def scientificNameIterator(file):
+    file = universalOpen(file)
+    names = ColumnFile(file, 
+                       sep='|', 
+                       types=(int,str,
+                              str,str))
+    for taxid,name,unique,classname,white in names:
+        if classname == 'scientific name':
+            yield taxid,name
+        
+def mergedNodeIterator(file):
+    file = universalOpen(file)
+    merged = ColumnFile(file, 
+                       sep='|', 
+                       types=(int,int,str))
+    for taxid,current,white in merged:
+            yield taxid,current
+    
+def deletedNodeIterator(file):
+    file = universalOpen(file)
+    deleted = ColumnFile(file, 
+                       sep='|', 
+                       types=(int,str))
+    for taxid,white in deleted:
+            yield taxid
+    
+def readTaxonomyDump(taxdir):
+    taxonomy,ranks,index = readNodeTable('%s/nodes.dmp' % taxdir)
+    
+    print >>sys.stderr,"Adding scientific name..."
+    for taxid,name in scientificNameIterator('%s/names.dmp' % taxdir):
+        taxonomy[index[taxid]].append(name)
+        
+    print >>sys.stderr,"Adding taxid alias..."
+    for taxid,current in mergedNodeIterator('%s/merged.dmp' % taxdir):
+        index[taxid]=index[current]
+    
+    print >>sys.stderr,"Adding deleted taxid..."
+    for taxid in deletedNodeIterator('%s/delnodes.dmp' % taxdir):
+        index[taxid]=None
+    
+    return taxonomy,ranks,index
+
+
+#####
+#
+#
+#  Genbank/EMBL sequence reader
+#
+#
+#####
+
+def entryIterator(file):
+    file = universalOpen(file)
+    rep =[]
+    for ligne in file:
+        rep.append(ligne)
+        if ligne == '//\n':
+            rep = ''.join(rep)
+            yield rep
+            rep = []
+        
+_cleanSeq = re.compile('[ \n0-9]+')
+            
+def cleanSeq(seq):
+    return _cleanSeq.sub('',seq)
+    
+    
+_gbParseID = re.compile('(?<=^LOCUS {7})[^ ]+(?= )',re.MULTILINE)   
+_gbParseDE = re.compile('(?<=^DEFINITION {2}).+?\. *$(?=[^ ])',re.MULTILINE+re.DOTALL)   
+_gbParseSQ = re.compile('(?<=^ORIGIN).+?(?=^//$)',re.MULTILINE+re.DOTALL)  
+_gbParseTX = re.compile('(?<= /db_xref="taxon:)[0-9]+(?=")')
+  
+def genbankParser(entry):
+    Id = _gbParseID.findall(entry)[0]
+    De = ' '.join(_gbParseDE.findall(entry)[0].split())
+    Sq = cleanSeq(_gbParseSQ.findall(entry)[0].upper())
+    Tx = int(_gbParseTX.findall(entry)[0])
+    return {'id':Id,'taxid':Tx,'definition':De,'sequence':Sq}
+
+def sequenceIterator(file,parser):
+    for entry in entryIterator(file):
+        yield parser(entry)
+
+def taxonomyInfo(entry,connection):
+    taxid = entry['taxid']
+    curseur = connection.cursor()
+    curseur.execute("""
+                        select taxid,species,genus,family,
+                               taxonomy.scientificName(taxid) as sn,
+                               taxonomy.scientificName(species) as species_sn,
+                               taxonomy.scientificName(genus) as genus_sn,
+                               taxonomy.scientificName(family) as family_sn
+                        from
+                            (   
+                             select alias                      as taxid,
+                               taxonomy.getSpecies(alias) as species,
+                               taxonomy.getGenus(alias)   as genus,
+                               taxonomy.getFamily(alias)  as family
+                                from taxonomy.aliases
+                               where id=%d ) as tax
+                    """ % taxid)
+    rep = curseur.fetchone()
+    entry['current_taxid']=rep[0]
+    entry['species']=rep[1]
+    entry['genus']=rep[2]
+    entry['family']=rep[3]
+    entry['scientific_name']=rep[4]
+    entry['species_sn']=rep[5]
+    entry['genus_sn']=rep[6]
+    entry['family_sn']=rep[7]
+    return entry
+    
+#####
+#
+#
+# Binary writer
+#
+#
+#####
+    
+def ecoSeqPacker(sq):
+    
+    compactseq = gzip.zlib.compress(sq['sequence'],9)
+    cptseqlength  = len(compactseq)
+    delength   = len(sq['definition'])
+    
+    totalSize = 4 + 20 + 4 + 4 + 4 + cptseqlength + delength
+    
+    packed = struct.pack('> I I 20s I I I %ds %ds' % (delength,cptseqlength),
+                         totalSize,
+                         sq['taxid'],
+                         sq['id'],
+                         delength,
+                         len(sq['sequence']),
+                         cptseqlength,
+                         sq['definition'],
+                         compactseq)
+    
+    assert len(packed) == totalSize+4, "error in sequence packing"
+    
+    return packed
+
+def ecoTaxPacker(tx):
+    
+    namelength = len(tx[3])
+    
+    totalSize = 4 + 4 + 4 + 4 + namelength
+    
+    packed = struct.pack('> I I I I I %ds' % namelength, 
+                         totalSize, 
+                         tx[0],
+                         tx[1],
+                         tx[2], 
+                         namelength,
+                         tx[3])
+    
+    return packed
+
+def ecoRankPacker(rank):
+    
+    namelength = len(rank)
+    
+    packed = struct.pack('> I %ds' % namelength,
+                         namelength,
+                         rank)
+    
+    return packed
+                
+    
+def ecoSeqWriter(file,input,taxindex,parser=genbankParser):
+    output = open(file,'wb')
+    input  = universalOpen(input)
+    inputsize = fileSize(input)
+    entries = sequenceIterator(input, parser)
+    seqcount=0
+    skipped = []
+
+    output.write(struct.pack('> I',seqcount))
+    
+    progressBar(1, inputsize,reset=True)
+    for entry in entries:
+        entry['taxid']=taxindex[entry['taxid']]
+        if entry['taxid'] is not None:
+            seqcount+=1
+            output.write(ecoSeqPacker(entry))
+        else:
+            skipped.append[entry['id']]
+        where = universalTell(input)
+        progressBar(where, inputsize)
+        print >>sys.stderr," Read sequences : %d     " % seqcount,
+        
+    print >>sys.stderr
+    output.seek(0,0)
+    output.write(struct.pack('> I',seqcount))
+    
+    output.close()
+    return skipped
+        
+
+def ecoTaxWriter(file,taxonomy):
+    output = open(file,'wb')
+    output.write(struct.pack('> I',len(taxonomy)))
+    
+    for tx in taxonomy:
+        output.write(ecoTaxPacker(tx))
+
+    output.close()
+    
+def ecoRankWriter(file,ranks):
+    output = open(file,'wb')
+    output.write(struct.pack('> I',len(ranks)))
+
+    rankNames = ranks.keys()
+    rankNames.sort()
+    
+    for rank in rankNames:
+        output.write(ecoRankPacker(rank))
+
+    output.close()
+    
+def ecoDBWriter(prefix,taxonomy,seqFileNames,parser=genbankParser):
+    
+    ecoRankWriter('%s.rdx' % prefix, taxonomy[1])
+    ecoTaxWriter('%s.tdx' % prefix, taxonomy[0])
+    
+    filecount = 0
+    for filename in seqFileNames:
+        filecount+=1
+        sk=ecoSeqWriter('%s_%03d.sdx' % (prefix,filecount), 
+                     filename, 
+                     taxonomy[2], 
+                     parser)
+        if sk:
+            print >>sys.stderr,"Skipped entry :"
+            print >>sys.stderr,sk
+        
+def ecoParseOptions(arguments):
+    opt = {
+            'prefix' : 'ecodb',
+            'taxdir' : 'taxdump',
+            'parser' : genbankParser
+           }
+    
+    o,filenames = getopt.getopt(arguments,
+                                'ht:n:g',
+                                ['help',
+                                 'taxonomy=',
+                                 'name=',
+                                 'genbank'])
+    
+    for name,value in o:
+        if name in ('-h','--help'):
+            pass
+        elif name in ('-t','--taxonomy'):
+            opt['taxdir']=value
+        elif name in ('-n','--name'):
+            opt['prefix']=value
+        elif name in ('-g','--genbank'):
+            opt['parser']=genbankParser
+        else:
+            raise ValueError,'Unknown option %s' % name
+
+    return opt,filenames
+
+if __name__ == '__main__':
+    
+    opt,filenames = ecoParseOptions(sys.argv[1:])
+    
+    taxonomy = readTaxonomyDump(opt['taxdir'])
+    
+    ecoDBWriter(opt['prefix'], taxonomy, filenames, opt['parser'])
+