Change the package path

2018-02-20 06:40:29 +11:00
parent 0450ebf427
commit 51f152cca4
48 changed files with 0 additions and 3 deletions
--- a/src/ecoError.c
+++ b/src/ecoError.c
@@ -0,0 +1,26 @@
+#include "ecoPCR.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+/*
+ * print the message given as argument and exit the program
+ * @param error		error number	
+ * @param message 	the text explaining what's going on
+ * @param filename	the file source where the program failed
+ * @param linenumber	the line where it has failed
+ * filename and linenumber are written at pre-processing 
+ * time by a macro
+ */
+void ecoError(int32_t error,
+              const char* message,
+              const char * filename,
+              int linenumber)
+{
+	fprintf(stderr,"Error %d in file %s line %d : %s\n",
+	               error,
+	               filename,
+	               linenumber,
+	               message);
+	
+	abort();
+}
--- a/src/ecoIOUtils.c
+++ b/src/ecoIOUtils.c
@@ -0,0 +1,122 @@
+#include "ecoPCR.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+#define SWAPINT32(x)     ((((x) << 24) & 0xFF000000) | (((x) <<  8) & 0xFF0000) | \
+                         (((x) >>  8) & 0xFF00)     | (((x) >> 24) & 0xFF))
+
+
+int32_t is_big_endian()
+{
+	int32_t i=1;
+	
+	return (int32_t)((char*)&i)[0];
+}
+
+
+
+
+int32_t swap_int32_t(int32_t i)
+{
+	return SWAPINT32(i);
+}
+
+
+/**
+ * Read part of the file
+ * @param	*f	the database
+ * @param	recordSize the size to be read
+ * 
+ * @return	buffer
+ */
+void *read_ecorecord(FILE *f,int32_t *recordSize)
+{
+	static void *buffer    =NULL;
+	int32_t      buffersize=0;
+	int32_t      read;
+	
+	if (!recordSize)
+		ECOERROR(ECO_ASSERT_ERROR,
+		         "recordSize cannot be NULL");
+		
+	read = fread(recordSize,
+	      		 1,
+	      		 sizeof(int32_t),
+	             f);
+	             
+	if (feof(f))
+		return NULL;
+	             
+	if (read != sizeof(int32_t))
+		ECOERROR(ECO_IO_ERROR,"Reading record size error");
+		
+	if (is_big_endian())
+		*recordSize=swap_int32_t(*recordSize);
+		
+	if (buffersize < *recordSize)
+	{
+		if (buffer)
+			buffer = ECOREALLOC(buffer,*recordSize,
+			                    "Increase size of record buffer");
+		else
+			buffer = ECOMALLOC(*recordSize,
+			                    "Allocate record buffer");
+	}
+	
+	read = fread(buffer,
+	             1,
+				 *recordSize,
+				 f);
+				 
+	if (read != *recordSize)
+		ECOERROR(ECO_IO_ERROR,"Reading record data error");
+		
+	return buffer;	 
+};
+
+
+
+
+
+/**
+ * Open the database and check it's readable
+ * @param 	filename 		name of the database (.sdx, .rdx, .tbx)
+ * @param 	sequencecount	buffer - pointer to variable storing the number of occurence    
+ * @param 	abort_on_open_error		 	boolean to define the behaviour in case of error 
+ * 										while opening the database
+ * @return 	FILE type
+ **/
+FILE *open_ecorecorddb(const char *filename,
+                       int32_t    *sequencecount,
+                       int32_t    abort_on_open_error)
+{
+    FILE        *f;
+	int32_t      read;
+	
+	f = fopen(filename,"rb");
+	
+	if (!f)
+		{
+			if (abort_on_open_error)
+		 		ECOERROR(ECO_IO_ERROR,"Cannot open file");
+		 	else
+		 	{
+		 		*sequencecount=0;
+		 		return NULL;
+		 	}
+		}
+		
+	read = fread(sequencecount,
+	      		 1,
+	      		 sizeof(int32_t),
+	      		 f);
+	             
+	if (read != sizeof(int32_t))
+		ECOERROR(ECO_IO_ERROR,"Reading record size error");
+
+	if (is_big_endian())
+		*sequencecount=swap_int32_t(*sequencecount);
+		
+	return f;                  
+}
+
--- a/src/ecoMalloc.c
+++ b/src/ecoMalloc.c
@@ -0,0 +1,79 @@
+#include "ecoPCR.h"
+#include <stdlib.h>
+
+static int eco_log_malloc = 0;
+
+void    eco_trace_memory_allocation()
+{
+	eco_log_malloc=1;
+}
+
+void    eco_untrace_memory_allocation()
+{
+	eco_log_malloc=0;
+}
+
+
+void   *eco_malloc(int32_t chunksize,
+                   const char *error_message,
+                   const char *filename,
+                   int32_t    line)
+{
+	void * chunk;
+	
+	chunk = calloc(1,chunksize);
+	
+	if (!chunk)
+		ecoError(ECO_MEM_ERROR,error_message,filename,line);
+		
+	if (eco_log_malloc)
+		fprintf(stderr,
+			    "Memory segment located at %p of size %d is allocated (file : %s [%d])",
+			    chunk,
+			    chunksize,
+			    filename,
+			    line);
+		
+	return chunk;
+}
+
+void   *eco_realloc(void *chunk,
+                    int32_t newsize,
+                    const char *error_message,
+                    const char *filename,
+                    int32_t    line)
+{
+	void *newchunk;
+	
+	newchunk = realloc(chunk,newsize);
+	
+	if (!newchunk)
+		ecoError(ECO_MEM_ERROR,error_message,filename,line);
+
+	if (eco_log_malloc)
+		fprintf(stderr,
+			    "Old memory segment %p is reallocated at %p with a size of %d (file : %s [%d])",
+			    chunk,
+			    newchunk,
+			    newsize,
+			    filename,
+			    line);
+		
+	return newchunk;	
+}
+
+void    eco_free(void *chunk,
+                 const char *error_message,
+                 const char *filename,
+                 int32_t    line)
+{
+	free(chunk);
+	
+	if (eco_log_malloc)
+		fprintf(stderr,
+			    "Memory segment %p is released => %s (file : %s [%d])",
+			    chunk,
+			    error_message,
+			    filename,
+			    line);
+}
--- a/src/ecoPCR.h
+++ b/src/ecoPCR.h
@@ -0,0 +1,283 @@
+#ifndef ECOPCR_H_
+#define ECOPCR_H_
+
+#include <stdio.h>
+#include <inttypes.h>
+
+#include <R.h>
+#include <Rinternals.h>
+#include <Rdefines.h>
+
+
+//#ifndef H_apat
+//#include "../libapat/apat.h"
+//#endif
+
+/*****************************************************
+ * 
+ *  Data type declarations
+ * 
+ *****************************************************/
+
+/*
+ * 
+ *  Sequence types
+ * 
+ */
+
+typedef struct {
+	
+	int32_t  taxid;
+	char     AC[20];
+	int32_t  DE_length;
+	int32_t  SQ_length;
+	int32_t  CSQ_length;
+	
+	char     data[1];
+	
+} ecoseqformat_t;
+
+typedef struct {
+	int32_t taxid;
+	int32_t SQ_length;
+	char    *AC;
+	char    *DE;
+	char    *SQ;
+} ecoseq_t;
+
+/*
+ * 
+ * Taxonomy taxon types
+ * 
+ */
+
+
+typedef struct {
+	int32_t  taxid;
+	int32_t  rank;
+	int32_t	 parent;
+	int32_t  namelength;
+	char     name[1];
+	
+} ecotxformat_t;
+
+typedef struct ecotxnode {
+	int32_t           taxid;
+	int32_t           rank;
+	int32_t  		      farest;
+	struct ecotxnode  *parent;
+	char              *name;
+} ecotx_t;
+
+typedef struct {
+	int32_t count;
+	int32_t maxtaxid;
+  int32_t buffersize;
+	ecotx_t taxon[1];
+} ecotxidx_t;
+ 
+	
+/*
+ * 
+ * Taxonomy rank types
+ * 
+ */
+	
+typedef struct {
+	int32_t count;
+	char*   label[1];
+} ecorankidx_t;
+
+/*
+ * 
+ * Taxonomy name types
+ * 
+ */
+
+typedef struct {
+ 	int32_t is_scientificname;
+	int32_t  namelength;
+	int32_t  classlength;
+	int32_t  taxid;
+	char     names[1];	
+} econameformat_t;
+ 
+ 
+ typedef struct {
+ 	char 	*name;
+ 	char 	*classname;
+ 	int32_t is_scientificname;
+ 	struct ecotxnode  *taxon;
+} econame_t;
+
+ 
+typedef struct {
+	int32_t count;
+	econame_t   names[1];
+} econameidx_t;
+
+
+ typedef struct {
+	ecorankidx_t *ranks;
+	econameidx_t *names;
+	ecotxidx_t   *taxons;
+} ecotaxonomy_t;
+
+ 
+/*****************************************************
+ * 
+ *  Function declarations
+ * 
+ *****************************************************/
+
+/*
+ * 
+ * Low level system functions
+ * 
+ */
+
+int32_t is_big_endian();
+int32_t swap_int32_t(int32_t);
+
+void   *eco_malloc(int32_t chunksize,
+                   const char *error_message,
+                   const char *filename,
+                   int32_t    line);
+                   
+                   
+void   *eco_realloc(void *chunk,
+                    int32_t chunksize,
+                    const char *error_message,
+                    const char *filename,
+                    int32_t    line);
+                    
+void    eco_free(void *chunk,
+                 const char *error_message,
+                 const char *filename,
+                 int32_t    line);
+                 
+void    eco_trace_memory_allocation();
+void    eco_untrace_memory_allocation();
+
+#define ECOMALLOC(size,error_message) \
+	    eco_malloc((size),(error_message),__FILE__,__LINE__)
+	   
+#define ECOREALLOC(chunk,size,error_message) \
+        eco_realloc((chunk),(size),(error_message),__FILE__,__LINE__)
+        
+#define ECOFREE(chunk,error_message) \
+        eco_free((chunk),(error_message),__FILE__,__LINE__)
+        
+        
+
+
+/*
+ * 
+ * Error managment
+ * 
+ */
+ 
+  
+void ecoError(int32_t,const char*,const char *,int);
+
+#define ECOERROR(code,message) ecoError((code),(message),__FILE__,__LINE__)
+
+#define ECO_IO_ERROR       (1)
+#define ECO_MEM_ERROR      (2)
+#define ECO_ASSERT_ERROR   (3)
+#define ECO_NOTFOUND_ERROR (4)
+
+
+/*
+ * 
+ * Low level Disk access functions
+ * 
+ */
+
+FILE *open_ecorecorddb(const char *filename,
+                       int32_t    *sequencecount,
+                       int32_t    abort_on_open_error);
+                       
+void *read_ecorecord(FILE *,int32_t *recordSize);
+
+
+
+/* 
+ *   Read function in internal binary format
+ */
+
+FILE             *open_ecoseqdb(const char *filename,
+                                int32_t    *sequencecount);
+                                                                
+ecoseq_t         *readnext_ecoseq(FILE *);
+
+ecorankidx_t     *read_rankidx(const char *filename);
+
+econameidx_t     *read_nameidx(const char *filename,ecotaxonomy_t *taxonomy);
+
+
+
+	/**
+	 * Read taxonomy data as formated by the ecoPCRFormat.py script.
+	 * 
+	 * This function is normaly uses internaly by the read_taxonomy
+	 * function and should not be called directly.
+	 * 
+	 * @arg filename  path to the *.tdx file of the reformated db
+	 * 
+	 * @return pointer to a taxonomy index structure
+	 */
+ 
+ecotxidx_t       *read_taxonomyidx(const char *filename,const char *filename2);
+
+ecotaxonomy_t    *read_taxonomy(const char *prefix,int32_t readAlternativeName);
+
+ecotx_t *eco_findtaxonatrank(ecotx_t *taxon, int32_t rankidx);
+
+ecotx_t *eco_findtaxonbytaxid(ecotaxonomy_t *taxonomy, int32_t taxid);
+
+int eco_isundertaxon(ecotx_t *taxon, int other_taxid);
+
+ecoseq_t *ecoseq_iterator(const char *prefix);
+
+
+
+ecoseq_t *new_ecoseq();
+int32_t   delete_ecoseq(ecoseq_t *);
+ecoseq_t *new_ecoseq_with_data( char *AC,
+								char *DE,
+								char *SQ,
+								int32_t   taxid
+								);
+
+
+int32_t delete_taxon(ecotx_t *taxon);
+int32_t delete_taxonomy(ecotxidx_t *index);
+int32_t delete_ecotaxonomy(ecotaxonomy_t *taxonomy);
+
+
+int32_t rank_index(const char* label,ecorankidx_t* ranks);
+
+//int32_t  delete_apatseq(SeqPtr pseq);
+//PatternPtr buildPattern(const char *pat, int32_t error_max);
+//PatternPtr complementPattern(PatternPtr pat);
+//
+//SeqPtr ecoseq2apatseq(ecoseq_t *in,SeqPtr out,int32_t circular);
+
+//char *ecoComplementPattern(char *nucAcSeq);
+//char *ecoComplementSequence(char *nucAcSeq);
+//char *getSubSequence(char* nucAcSeq,int32_t begin,int32_t end);
+
+ecotx_t *eco_getspecies(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
+ecotx_t *eco_getgenus(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
+ecotx_t *eco_getfamily(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
+ecotx_t *eco_getkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
+ecotx_t *eco_getsuperkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
+
+//int eco_is_taxid_ignored(int32_t *ignored_taxid, int32_t tab_len, int32_t taxid);
+//int eco_is_taxid_included(ecotaxonomy_t *taxonomy, int32_t *included_taxid, int32_t tab_len, int32_t taxid);
+
+
+ecotaxonomy_t *getTaxPointer(SEXP Rtaxonomy);
+
+#endif /*ECOPCR_H_*/
--- a/src/ecodna.c
+++ b/src/ecodna.c
@@ -0,0 +1,156 @@
+#include <string.h>
+#include "ecoPCR.h"
+
+/*
+ * @doc: DNA alphabet (IUPAC)
+ */
+#define LX_BIO_DNA_ALPHA   "ABCDEFGHIJKLMNOPQRSTUVWXYZ#![]"
+
+/*
+ * @doc: complementary DNA alphabet (IUPAC)
+ */
+#define LX_BIO_CDNA_ALPHA  "TVGHEFCDIJMLKNOPQYSAABWXRZ#!]["
+
+
+static char sNuc[]     = LX_BIO_DNA_ALPHA;
+static char sAnuc[]    = LX_BIO_CDNA_ALPHA;
+
+static char LXBioBaseComplement(char nucAc);
+static char *LXBioSeqComplement(char *nucAcSeq);
+static char *reverseSequence(char *str,char isPattern);
+
+ 
+/* ---------------------------- */
+
+char LXBioBaseComplement(char nucAc)
+{
+    char *c;
+
+    if ((c = strchr(sNuc, nucAc)))
+        return sAnuc[(c - sNuc)];
+    else
+        return nucAc;
+}
+
+/* ---------------------------- */
+
+char *LXBioSeqComplement(char *nucAcSeq)
+{
+    char *s;
+
+    for (s = nucAcSeq ; *s ; s++)
+        *s = LXBioBaseComplement(*s);
+
+    return nucAcSeq;
+}
+
+
+char *reverseSequence(char *str,char isPattern)
+{
+        char *sb, *se, c;
+
+        if (! str)
+            return str;
+            
+        sb = str;
+        se = str + strlen(str) - 1;
+
+        while(sb <= se) {
+           c    = *sb;
+          *sb++ = *se;
+          *se-- = c;
+        }
+
+		sb = str;
+		se = str + strlen(str) - 1;
+		
+		if (isPattern)
+			for (;sb < se; sb++)
+			{
+				if (*sb=='#')
+				{
+					if (((se - sb) > 2) && (*(sb+2)=='!'))
+					{
+						*sb='!';
+						sb+=2;
+						*sb='#';
+					}
+					else
+					{
+						*sb=*(sb+1);
+						sb++;
+						*sb='#';
+					}
+				}
+				else if (*sb=='!')
+					{
+						*sb=*(sb-1);
+						*(sb-1)='!';
+					}
+			}
+
+        return str;
+}
+
+char *ecoComplementPattern(char *nucAcSeq)
+{
+    return reverseSequence(LXBioSeqComplement(nucAcSeq),1);
+}
+
+char *ecoComplementSequence(char *nucAcSeq)
+{
+    return reverseSequence(LXBioSeqComplement(nucAcSeq),0);
+}
+
+
+char *getSubSequence(char* nucAcSeq,int32_t begin,int32_t end)
+/*
+   extract subsequence from nucAcSeq [begin,end[
+*/
+{
+	static char *buffer  = NULL;
+	static int32_t buffSize= 0;
+	int32_t length;
+	
+	if (begin < end)
+	{
+		length = end - begin;
+		
+		if (length >= buffSize)
+		{
+			buffSize = length+1;
+			if (buffer)
+				buffer=ECOREALLOC(buffer,buffSize,
+						   	      "Error in reallocating sub sequence buffer");
+			else
+				buffer=ECOMALLOC(buffSize,
+				          		 "Error in allocating sub sequence buffer");
+				
+		}
+		
+		strncpy(buffer,nucAcSeq + begin,length);
+		buffer[length]=0;
+	}
+	else
+	{
+		length = end + strlen(nucAcSeq) - begin;
+		
+		if (length >= buffSize)
+		{
+			buffSize = length+1;
+			if (buffer)
+				buffer=ECOREALLOC(buffer,buffSize,
+						   	      "Error in reallocating sub sequence buffer");
+			else
+				buffer=ECOMALLOC(buffSize,
+				          		 "Error in allocating sub sequence buffer");
+				
+		}
+		strncpy(buffer,nucAcSeq+begin,length - end);
+		strncpy(buffer+(length-end),nucAcSeq ,end);
+		buffer[length]=0;
+	}
+	
+	return buffer;
+}
+
--- a/src/ecofilter.c
+++ b/src/ecofilter.c
@@ -0,0 +1,20 @@
+#include "ecoPCR.h"
+
+int eco_is_taxid_included(	ecotaxonomy_t *taxonomy, 
+							int32_t *restricted_taxid, 
+							int32_t tab_len, 
+							int32_t taxid)
+{
+	int i;
+	ecotx_t *taxon;
+	
+	taxon = eco_findtaxonbytaxid(taxonomy, taxid);
+	
+	if (taxon)
+		for (i=0; i < tab_len; i++)
+			if ( (taxon->taxid == restricted_taxid[i]) ||
+				 (eco_isundertaxon(taxon, restricted_taxid[i])) )
+				return 1;
+	
+	return 0;
+}
--- a/src/econame.c
+++ b/src/econame.c
@@ -0,0 +1,64 @@
+#include "ecoPCR.h"
+#include <string.h>
+#include <stdlib.h>
+
+static econame_t *readnext_econame(FILE *f,econame_t *name,ecotaxonomy_t *taxonomy);
+
+econameidx_t *read_nameidx(const char *filename,ecotaxonomy_t *taxonomy)
+{
+
+	int32_t      		count;
+	FILE         		*f;
+	econameidx_t		*indexname;
+	int32_t      		i;
+	
+	f = open_ecorecorddb(filename,&count,0);
+	
+	if (f==NULL)
+		return NULL;
+
+	indexname = (econameidx_t*) ECOMALLOC(sizeof(econameidx_t) + sizeof(econame_t) * (count-1),"Allocate names");
+	
+	indexname->count=count;
+	                                    
+	for (i=0; i < count; i++){
+		readnext_econame(f,(indexname->names)+i,taxonomy);
+	}
+
+	return indexname;
+}
+
+econame_t *readnext_econame(FILE *f,econame_t *name,ecotaxonomy_t *taxonomy)
+{
+	
+	econameformat_t *raw;
+	int32_t  rs;
+	
+	raw = read_ecorecord(f,&rs);
+	
+	if (!raw)
+		return NULL;
+
+	if (is_big_endian())
+	{
+		raw->is_scientificname 	= swap_int32_t(raw->is_scientificname);
+		raw->namelength 	    = swap_int32_t(raw->namelength);
+		raw->classlength        = swap_int32_t(raw->classlength);
+		raw->taxid  	        = swap_int32_t(raw->taxid); 
+	}
+	
+	name->is_scientificname=raw->is_scientificname;
+	
+	name->name   	= ECOMALLOC((raw->namelength+1) * sizeof(char),"Allocate name");
+	strncpy(name->name,raw->names,raw->namelength);
+	name->name[raw->namelength]=0;
+	
+	name->classname = ECOMALLOC((raw->classlength+1) * sizeof(char),"Allocate classname");
+	strncpy(name->classname,(raw->names+raw->namelength),raw->classlength);
+	name->classname[raw->classlength]=0;
+	
+	name->taxon = taxonomy->taxons->taxon + raw->taxid;
+
+	return name;
+}
+
--- a/src/ecorank.c
+++ b/src/ecorank.c
@@ -0,0 +1,55 @@
+#include "ecoPCR.h"
+#include <string.h>
+#include <stdlib.h>
+
+static int compareRankLabel(const void *label1, const void *label2);
+
+ecorankidx_t     *read_rankidx(const char *filename)
+{
+	int32_t      count;
+	FILE         *f;
+	ecorankidx_t *index;
+	int32_t      i;
+	int32_t      rs;
+	char         *buffer;
+	
+	f = open_ecorecorddb(filename,&count,0);
+	
+	if (f==NULL)
+		return NULL;
+
+	index = (ecorankidx_t*) ECOMALLOC(sizeof(ecorankidx_t) + sizeof(char*) * (count-1),
+	                                  "Allocate rank index");
+	 
+	index->count=count;                                 
+	          
+	for (i=0; i < count; i++)
+		{
+			buffer = read_ecorecord(f,&rs);
+			index->label[i]=(char*) ECOMALLOC(rs+1,
+			                                  "Allocate rank label");
+			strncpy(index->label[i],buffer,rs);
+		}
+		
+	return index;
+}
+
+int32_t rank_index(const char* label,ecorankidx_t* ranks)
+{
+	char **rep;
+	
+	rep = bsearch(label,ranks->label,ranks->count,sizeof(char*),compareRankLabel);
+	
+	if (rep)
+		return rep-ranks->label;
+//	else
+//		ECOERROR(ECO_NOTFOUND_ERROR,"Rank label not found");
+		
+	return -1;
+}
+
+
+int compareRankLabel(const void *label1, const void *label2)
+{
+	return strcmp((const char*)label1,*(const char**)label2);
+}
--- a/src/ecoseq.c
+++ b/src/ecoseq.c
@@ -0,0 +1,230 @@
+#include "ecoPCR.h"
+#include <stdlib.h>
+#include <string.h>
+#include <zlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <ctype.h>
+
+static FILE *open_seqfile(const char *prefix,int32_t index);
+
+
+ecoseq_t *new_ecoseq()
+{
+	void *tmp;
+
+	tmp = ECOMALLOC(sizeof(ecoseq_t),"Allocate new ecoseq structure");
+
+	return tmp;
+}
+
+int32_t delete_ecoseq(ecoseq_t * seq)
+{
+
+	if (seq)
+	{
+		if (seq->AC)
+			ECOFREE(seq->AC,"Free sequence AC");
+
+		if (seq->DE)
+			ECOFREE(seq->DE,"Free sequence DE");
+
+		if (seq->SQ)
+			ECOFREE(seq->SQ,"Free sequence SQ");
+
+		ECOFREE(seq,"Free sequence structure");
+
+		return 0;
+
+	}
+
+	return 1;
+}
+
+ecoseq_t *new_ecoseq_with_data( char *AC,
+								char *DE,
+								char *SQ,
+								int32_t   taxid_idx
+								)
+{
+	ecoseq_t *tmp;
+	int32_t lstr;
+	tmp = new_ecoseq();
+
+	tmp->taxid=taxid_idx;
+
+	if (AC)
+		{
+			lstr =strlen(AC);
+			tmp->AC=ECOMALLOC((lstr+1) * sizeof(char),
+			                  "Allocate sequence accession");
+			strcpy(tmp->AC,AC);
+		}
+
+	if (DE)
+		{
+			lstr =strlen(DE);
+			tmp->DE=ECOMALLOC((lstr+1) * sizeof(char),
+			                  "Allocate sequence definition");
+			strcpy(tmp->DE,DE);
+		}
+
+	if (SQ)
+		{
+			lstr =strlen(SQ);
+			tmp->SQ=ECOMALLOC((lstr+1) * sizeof(char),
+			                  "Allocate sequence data");
+			strcpy(tmp->SQ,SQ);
+		}
+	return tmp;
+
+}
+
+/**
+ * ?? used ??
+ **/
+FILE *open_ecoseqdb(const char *filename,
+                    int32_t    *sequencecount)
+{
+	return open_ecorecorddb(filename,sequencecount,1);
+}
+
+ecoseq_t *readnext_ecoseq(FILE *f)
+{
+	char     *compressed=NULL;
+
+	ecoseqformat_t *raw;
+	ecoseq_t *seq;
+	int32_t  comp_status;
+	unsigned long int seqlength;
+	int32_t  rs;
+	char *c;
+	int32_t i;
+
+	raw = read_ecorecord(f,&rs);
+
+	if (!raw)
+		return NULL;
+
+	if (is_big_endian())
+	{
+		raw->CSQ_length = swap_int32_t(raw->CSQ_length);
+		raw->DE_length  = swap_int32_t(raw->DE_length);
+		raw->SQ_length  = swap_int32_t(raw->SQ_length);
+		raw->taxid      = swap_int32_t(raw->taxid);
+	}
+
+	seq = new_ecoseq();
+
+	seq->taxid = raw->taxid;
+
+    seq->AC    = ECOMALLOC(strlen(raw->AC) +1,
+                           "Allocate Sequence Accesion number");
+    strncpy(seq->AC,raw->AC,strlen(raw->AC));
+
+
+    seq->DE    = ECOMALLOC(raw->DE_length+1,
+                           "Allocate Sequence definition");
+    strncpy(seq->DE,raw->data,raw->DE_length);
+
+	seqlength = seq->SQ_length = raw->SQ_length;
+
+    compressed = raw->data + raw->DE_length;
+
+    seq->SQ = ECOMALLOC(seqlength+1,
+                        "Allocate sequence buffer");
+
+//    comp_status = uncompress((unsigned char*)seq->SQ,
+//                             &seqlength,
+//                             (unsigned char*)compressed,
+//                             raw->CSQ_length);
+//
+    if (comp_status != Z_OK)
+    	ECOERROR(ECO_IO_ERROR,"I cannot uncompress sequence data");
+
+    for (c=seq->SQ,i=0;i<seqlength;c++,i++)
+    	*c=toupper(*c);
+
+
+	return seq;
+}
+
+/**
+ * Open the sequences database (.sdx file)
+ * @param	prefix	name of the database (radical without extension)
+ * @param	index 	integer
+ *
+ * @return	file object
+ */
+FILE *open_seqfile(const char *prefix,int32_t index)
+{
+	char           filename_buffer[1024];
+	int32_t        filename_length;
+	FILE           *input;
+	int32_t        seqcount;
+
+	filename_length = snprintf(filename_buffer,
+								1023,
+	                           "%s_%03d.sdx",
+	                           prefix,
+	                           index);
+
+		//	fprintf(stderr,"# Coucou %s\n",filename_buffer);
+
+
+	if (filename_length >= 1024)
+		ECOERROR(ECO_ASSERT_ERROR,"file name is too long");
+
+	filename_buffer[filename_length]=0;
+
+	input=open_ecorecorddb(filename_buffer,&seqcount,0);
+
+	if (input)
+		fprintf(stderr,"# Reading file %s containing %d sequences...\n",
+				filename_buffer,
+				seqcount);
+
+	return input;
+}
+
+ecoseq_t *ecoseq_iterator(const char *prefix)
+{
+	static FILE    *current_seq_file= NULL;
+	static int32_t current_file_idx = 1;
+	static char    current_prefix[1024];
+	ecoseq_t       *seq;
+
+	if (prefix)
+	{
+		current_file_idx = 1;
+
+		if (current_seq_file)
+			fclose(current_seq_file);
+
+		strncpy(current_prefix,prefix,1023);
+		current_prefix[1023]=0;
+
+		current_seq_file = open_seqfile(current_prefix,
+		 							    current_file_idx);
+
+		if (!current_seq_file)
+			return NULL;
+
+	}
+
+	seq = readnext_ecoseq(current_seq_file);
+
+	if (!seq && feof(current_seq_file))
+	{
+		current_file_idx++;
+		fclose(current_seq_file);
+		current_seq_file = open_seqfile(current_prefix,
+		 							    current_file_idx);
+
+
+		if (current_seq_file)
+			seq = readnext_ecoseq(current_seq_file);
+	}
+
+	return seq;
+}
--- a/src/ecotax.c
+++ b/src/ecotax.c
@@ -0,0 +1,437 @@
+#include "ecoPCR.h"
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include <R.h>
+
+#ifndef MAX
+#define MAX(x,y) (((x)>(y)) ? (x):(y))
+#endif
+
+static ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon);
+
+ /** 
+ * Open the taxonomy database 
+ * @param	pointer to the database (.tdx file) 
+ * @return	a ecotxidx_t structure 
+ */
+ecotxidx_t     *read_taxonomyidx(const char *filename,const char *filename2)
+{
+	int32_t      count;
+	int32_t      count2;
+	FILE         *f;
+	FILE         *f2;
+	ecotxidx_t *index;
+	struct ecotxnode  *t;
+	int32_t      i;
+	int32_t      j;
+	
+	f  = open_ecorecorddb(filename,&count,0);
+
+	if (f==NULL) return NULL;
+
+	f2 = open_ecorecorddb(filename2,&count2,0);
+
+	index = (ecotxidx_t*) ECOMALLOC(sizeof(ecotxidx_t) + sizeof(ecotx_t) * (count+count2-1),
+	                                  "Allocate taxonomy");
+	 
+	index->count=count+count2;
+  index->buffersize = index->count;
+
+	index->maxtaxid=0;
+	REprintf("Readind %d taxa...\n",count);
+	for (i=0; i < count; i++){
+		readnext_ecotaxon(f,&(index->taxon[i]));
+		index->taxon[i].parent=index->taxon + (size_t)index->taxon[i].parent;
+		index->taxon[i].parent->farest=0;
+		if (index->taxon[i].taxid > index->maxtaxid)
+			index->maxtaxid=index->taxon[i].taxid;
+	}					
+
+
+	if (count2>0)
+		REprintf("Readind %d local taxa...\n",count2);
+	else
+		REprintf("No local taxon\n");
+
+	count = index->count;
+
+	for (; i < count; i++){
+		readnext_ecotaxon(f2,&(index->taxon[i]));
+		index->taxon[i].parent=index->taxon + (size_t)index->taxon[i].parent;
+		index->taxon[i].parent->farest=0;
+		if (index->taxon[i].taxid > index->maxtaxid)
+			index->maxtaxid=index->taxon[i].taxid;
+	}
+
+	REprintf("Computing longest branches...\n",count);
+
+	for (i=0; i < count; i++){
+		t=index->taxon+i;
+		if (t->farest==-1)
+		{
+			t->farest=0;
+            while(t->parent != t)
+            {
+            	j = t->farest + 1;
+            	if (j > t->parent->farest)
+            	{
+            		t->parent->farest = j;
+            		t=t->parent;
+            	}
+            	else
+            		t=index->taxon;
+            }
+		}
+	}
+
+	return index;
+}
+
+
+int32_t delete_taxonomy(ecotxidx_t *index)
+{
+	int32_t i;
+	
+	if (index)
+	{
+		for (i=0; i< index->count; i++)
+			if (index->taxon[i].name)
+				ECOFREE(index->taxon[i].name,"Free scientific name");
+				
+		ECOFREE(index,"Free Taxonomy");
+		
+		return 0;
+	}
+	
+	return 1;
+}
+
+
+
+int32_t delete_taxon(ecotx_t *taxon)
+{
+	if (taxon)
+	{
+		if (taxon->name)
+			ECOFREE(taxon->name,"Free scientific name");
+			
+		ECOFREE(taxon,"Free Taxon");
+		
+		return 0;
+	}
+		
+	return 1;
+}
+
+
+/**
+ * Read the database for a given taxon a save the data 
+ * into the taxon structure(if any found)
+ * @param	*f	pointer to FILE type returned by fopen
+ * @param	*taxon	pointer to the structure
+ * 
+ * @return	a ecotx_t structure if any taxon found else NULL 
+ */
+ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon)
+{
+	
+	ecotxformat_t *raw;
+	int32_t  rs;
+	
+	raw = read_ecorecord(f,&rs);
+	
+	if (!raw)
+		return NULL;
+
+	if (is_big_endian())
+	{
+		raw->namelength = swap_int32_t(raw->namelength);
+		raw->parent     = swap_int32_t(raw->parent);
+		raw->rank       = swap_int32_t(raw->rank);
+		raw->taxid      = swap_int32_t(raw->taxid); 
+	}
+	
+	taxon->parent = (ecotx_t*)((size_t)raw->parent);
+	taxon->taxid  = raw->taxid;
+	taxon->rank   = raw->rank;
+	taxon->farest = -1;
+	
+	taxon->name   = ECOMALLOC((raw->namelength+1) * sizeof(char),
+	                          "Allocate taxon scientific name");
+	                          
+	strncpy(taxon->name,raw->name,raw->namelength);
+		
+	return taxon;
+}
+
+
+ecotaxonomy_t    *read_taxonomy(const char *prefix,int32_t readAlternativeName)
+{
+	ecotaxonomy_t *tax;
+	char          *filename;
+	char          *filename2;
+	int           buffsize;
+	
+	tax = ECOMALLOC(sizeof(ecotaxonomy_t),
+	                "Allocate taxonomy structure");
+	
+	tax->ranks =NULL;
+	tax->taxons=NULL;
+	tax->names =NULL;
+
+	buffsize = strlen(prefix)+10;
+	
+	filename = ECOMALLOC(buffsize,
+	                     "Allocate filename");
+	filename2= ECOMALLOC(buffsize,
+	                     "Allocate filename");
+	
+	snprintf(filename,buffsize,"%s.rdx",prefix);
+	
+	tax->ranks = read_rankidx(filename);
+
+	if (tax->ranks == NULL)
+	{
+		ECOFREE(filename,"Desallocate filename 1");
+		ECOFREE(filename2,"Desallocate filename 2");
+
+		delete_ecotaxonomy(tax);
+		return NULL;
+	}
+	
+	snprintf(filename,buffsize,"%s.tdx",prefix);
+	snprintf(filename2,buffsize,"%s.ldx",prefix);
+	
+	tax->taxons = read_taxonomyidx(filename,filename2);
+	
+	if (tax->taxons == NULL)
+	{
+		ECOFREE(filename,"Desallocate filename 1");
+		ECOFREE(filename,"Desallocate filename 2");
+
+		delete_ecotaxonomy(tax);
+		return NULL;
+	}
+
+	if (readAlternativeName)
+	{
+  	   snprintf(filename,buffsize,"%s.ndx",prefix);
+	   tax->names=read_nameidx(filename,tax);
+	}
+	else
+	   tax->names=NULL;
+
+	ECOFREE(filename,"Desallocate filename 1");
+	ECOFREE(filename2,"Desallocate filename 2");
+
+	return tax;
+	
+}
+
+
+
+int32_t delete_ecotaxonomy(ecotaxonomy_t *taxonomy)
+{
+	if (taxonomy)
+	{
+		if (taxonomy->ranks)
+			ECOFREE(taxonomy->ranks,"Free rank index");
+			
+		if (taxonomy->names)
+			ECOFREE(taxonomy->names,"Free names index");
+
+		if (taxonomy->taxons)
+			ECOFREE(taxonomy->taxons,"Free taxon index");
+			
+		ECOFREE(taxonomy,"Free taxonomy structure");
+		
+		return 0;
+	}
+	
+	return 1;
+}
+
+ecotx_t *eco_findtaxonatrank(ecotx_t *taxon,
+                                int32_t rankidx)
+{
+	ecotx_t *current_taxon;
+	ecotx_t *next_taxon;
+	
+	current_taxon = taxon;
+	next_taxon    = current_taxon->parent;
+	
+	while ((current_taxon!=next_taxon) &&  // I' am the root node
+		   (current_taxon->rank!=rankidx))
+		   {
+		   	current_taxon = next_taxon;
+		   	next_taxon    = current_taxon->parent;
+		   }
+		   
+	if (current_taxon->rank==rankidx)
+		return current_taxon;
+	else
+		return NULL;
+}
+
+static int bcomptaxon (const void * ptaxid, const void * ptaxon) {
+  
+  ecotx_t    *current_taxon = (ecotx_t*)ptaxon;
+  int32_t    taxid=(int32_t)((size_t)ptaxid);
+  return taxid - current_taxon->taxid;
+}
+
+/**
+ * Get back information concerning a taxon from a taxonomic id
+ * @param 	*taxonomy 	the taxonomy database
+ * @param	taxid		the taxonomic id 
+ * 
+ * @result	a ecotx_t structure containing the taxonimic information  
+ **/
+ecotx_t *eco_findtaxonbytaxid(ecotaxonomy_t *taxonomy, 
+							  int32_t taxid)
+{
+	ecotx_t    *current_taxon;
+	int32_t     taxoncount;
+//	int32_t     i;
+	
+	taxoncount=taxonomy->taxons->count;
+	
+  current_taxon = (ecotx_t*)  bsearch((const void *)((size_t)taxid), 
+                                      (const void *)taxonomy->taxons->taxon, 
+                                      taxoncount, 
+                                      sizeof(ecotx_t), 
+                                      bcomptaxon);
+
+/* Old version  
+	for (current_taxon=taxonomy->taxons->taxon,
+	     i=0;
+	     i < taxoncount;
+	     i++,
+	     current_taxon++){
+	     if (current_taxon->taxid==taxid){
+	     	return current_taxon;
+	     }
+	 }
+*/
+	
+	return current_taxon;	
+}
+
+/**
+ * Find out if taxon is son of other taxon (identified by its taxid)
+ * @param	*taxon son 		taxon
+ * @param	parent_taxid 	taxonomic id of the other taxon
+ * 
+ * @return 	1 is the other taxid math a parent taxid, else 0
+ **/
+int eco_isundertaxon(ecotx_t *taxon, 
+						int other_taxid)
+{
+	ecotx_t *next_parent;
+	
+	next_parent = taxon->parent;	
+	
+	while ( (other_taxid != next_parent->taxid) && 
+			(strcmp(next_parent->name, "root")) )
+	{
+		next_parent = next_parent->parent;
+	}
+		
+	if (other_taxid == next_parent->taxid)
+		return 1;
+	else
+		return 0;
+}
+
+ecotx_t *eco_getspecies(ecotx_t *taxon,
+						ecotaxonomy_t *taxonomy)
+{
+	static ecotaxonomy_t *tax=NULL;
+	static int32_t		 rankindex=-1;
+	
+	if (taxonomy && tax!=taxonomy)
+	{
+		rankindex = rank_index("species",taxonomy->ranks);
+		tax=taxonomy;
+	}
+		
+	if (!tax || rankindex < 0)
+		ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
+		
+	return eco_findtaxonatrank(taxon,rankindex);
+}
+
+ecotx_t *eco_getgenus(ecotx_t *taxon,
+						ecotaxonomy_t *taxonomy)
+{
+	static ecotaxonomy_t *tax=NULL;
+	static int32_t		 rankindex=-1;
+	
+	if (taxonomy && tax!=taxonomy)
+	{
+		rankindex = rank_index("genus",taxonomy->ranks);
+		tax=taxonomy;
+	}
+		
+	if (!tax || rankindex < 0)
+		ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
+		
+	return eco_findtaxonatrank(taxon,rankindex);
+}
+
+
+ecotx_t *eco_getfamily(ecotx_t *taxon,
+						ecotaxonomy_t *taxonomy)
+{
+	static ecotaxonomy_t *tax=NULL;
+	static int32_t		 rankindex=-1;
+	
+	if (taxonomy && tax!=taxonomy)
+	{
+		rankindex = rank_index("family",taxonomy->ranks);
+		tax=taxonomy;
+	}
+		
+	if (!tax || rankindex < 0)
+		ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
+		
+	return eco_findtaxonatrank(taxon,rankindex);
+}
+
+ecotx_t *eco_getkingdom(ecotx_t *taxon,
+						ecotaxonomy_t *taxonomy)
+{
+	static ecotaxonomy_t *tax=NULL;
+	static int32_t		 rankindex=-1;
+	
+	if (taxonomy && tax!=taxonomy)
+	{
+		rankindex = rank_index("kingdom",taxonomy->ranks);
+		tax=taxonomy;
+	}
+		
+	if (!tax || rankindex < 0)
+		ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
+		
+	return eco_findtaxonatrank(taxon,rankindex);
+}
+
+ecotx_t *eco_getsuperkingdom(ecotx_t *taxon,
+						ecotaxonomy_t *taxonomy)
+{
+	static ecotaxonomy_t *tax=NULL;
+	static int32_t		 rankindex=-1;
+	
+	if (taxonomy && tax!=taxonomy)
+	{
+		rankindex = rank_index("superkingdom",taxonomy->ranks);
+		tax=taxonomy;
+	}
+		
+	if (!tax || rankindex < 0)
+		ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
+		
+	return eco_findtaxonatrank(taxon,rankindex);
+}
--- a/src/robitax.c
+++ b/src/robitax.c
@@ -0,0 +1,835 @@
+/*
+ * robitax.c
+ *
+ *  Created on: 17 janv. 2013
+ *      Author: coissac
+ */
+
+#include "robitax.h"
+#include <unistd.h>
+//#include <regex.h>
+#include "slre.h"
+
+/**
+ * Return a pointeur to an obitools taxonomy C structure
+ * from an R instance of taxonomy.obitools
+ *
+ * The function checks if the pointer stored in the R object is set
+ * to NULL. In this case this means that we have to load the taxonomy
+ * from the disk.
+ *
+ * @param taxonomy an R object
+ * @type  taxonomy SEXP
+ *
+ * @return a pointer to the C structure
+ * @rtype  ecotaxonomy_t *
+ */
+
+ecotaxonomy_t *getTaxPointer(SEXP Rtaxonomy)
+{
+
+
+	char *pwd;
+	SEXP pointer;
+	SEXP rclass;
+	SEXP rdir;
+	SEXP rfile;
+	ecotaxonomy_t *ptax;
+	const char *class;
+	const char *file;
+	const char *dir;
+
+  int saved;
+
+    if (!IS_S4_OBJECT(Rtaxonomy) )
+        error("argument not taxonomy.obitools instance");
+
+	// We get the class name and compare it to "taxonomy.obitools"
+    rclass = getAttrib(Rtaxonomy, R_ClassSymbol);
+    class = CHAR(asChar(rclass));
+
+    if (strcmp(class,"taxonomy.obitools"))
+        error("argument not taxonomy.obitools instance");
+
+    pointer = R_do_slot(Rtaxonomy,mkString("pointer"));
+    saved = LOGICAL(R_do_slot(Rtaxonomy,mkString("saved")))[0];
+    ptax = (ecotaxonomy_t *) R_ExternalPtrAddr(pointer);
+
+    // If the external pointer is set to NULL we have to load
+    // the taxonomy from file
+    if (ptax==NULL && saved)
+    {
+    	pwd = getcwd(NULL,0);
+
+    	rfile = R_do_slot(Rtaxonomy,mkString("dbname"));
+    	file  = CHAR(asChar(rfile));
+
+    	rdir  = R_do_slot(Rtaxonomy,mkString("workingdir"));
+    	dir   = CHAR(asChar(rdir));
+
+    	chdir(dir);
+
+    	ptax = read_taxonomy(file,1);
+
+    	R_SetExternalPtrAddr(pointer,(void*)ptax);
+
+    	chdir(pwd);
+    	free(pwd);
+    }
+    
+    if (ptax==NULL && ! saved)
+      error("The taxonomy instance is no more valid and must be rebuilt");
+
+    return ptax;
+}
+
+SEXP R_delete_taxonomy(SEXP Rtaxonomy)
+{
+	ecotaxonomy_t *ptax;
+//	SEXP pointer;
+
+    ptax = (ecotaxonomy_t *) R_ExternalPtrAddr(Rtaxonomy);
+
+    (void) delete_ecotaxonomy(ptax);
+
+    // Clear the external pointer
+    R_ClearExternalPtr(Rtaxonomy);
+
+    return R_NilValue;
+
+}
+
+
+
+SEXP R_read_taxonomy(SEXP filename, SEXP altenative)
+{
+	int   alt;
+	const char* file;
+	SEXP  Rtax;
+
+    if (! isString(filename))
+        error("filename not character");
+    file = CHAR(STRING_ELT(filename, 0));
+
+    if (! isLogical(altenative))
+        error("altenative not logical");
+    alt = LOGICAL(altenative)[0];
+
+    ecotaxonomy_t *taxonomy = read_taxonomy(file,alt);
+
+    if (! taxonomy)
+    	error("Cannot open the taxonomy database");
+
+	Rtax = PROTECT(R_MakeExternalPtr(taxonomy, mkString("ROBITools NCBI Taxonomy pointer"), R_NilValue));
+	R_RegisterCFinalizerEx(Rtax, (R_CFinalizer_t)R_delete_taxonomy,TRUE);
+
+    UNPROTECT(1);
+
+
+	return Rtax;
+}
+
+
+SEXP R_get_scientific_name(SEXP Rtaxonomy,SEXP Rtaxid)
+{
+	ecotx_t *taxon;
+	ecotaxonomy_t *ptax;
+	int taxid;
+//	SEXP scname;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    if (! isInteger(Rtaxid))
+        error("taxid not integer");
+
+    taxid = *INTEGER(Rtaxid);
+
+    if (! (taxid > 0))
+        error("taxid not positive");
+
+
+	taxon = eco_findtaxonbytaxid(ptax, taxid);
+
+	if (!taxon)
+		return ScalarString(R_NaString);
+		// error("unkown taxid");
+
+	return mkString(taxon->name);
+
+}
+
+SEXP R_get_rank(SEXP Rtaxonomy,SEXP Rtaxid)
+{
+	ecotx_t *taxon;
+	ecotaxonomy_t *ptax;
+	int *taxid;
+  int ntaxid;
+  int i;
+  SEXP results;
+//	SEXP scname;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    if (! isInteger(Rtaxid))
+        error("taxid not integer");
+        
+    ntaxid = length(Rtaxid);
+    
+    results = PROTECT(allocVector(STRSXP, ntaxid));
+
+    taxid = INTEGER(Rtaxid);
+
+    for (i=0; i < ntaxid; i++)
+    {
+      if (taxid[i]== NA_INTEGER || taxid[i] <= 0)
+        SET_STRING_ELT(results, i, R_NaString);
+      else {
+        taxon = eco_findtaxonbytaxid(ptax, taxid[i]);
+        if (!taxon)
+          SET_STRING_ELT(results, i, R_NaString);
+        else
+          SET_STRING_ELT(results, i, mkChar(ptax->ranks->label[taxon->rank]));
+      }
+    }
+
+  UNPROTECT(1);
+  
+	return results;
+
+}
+
+SEXP R_findtaxonatrank(SEXP Rtaxonomy,SEXP Rtaxid,SEXP Rrank, SEXP Rname)
+{
+	ecotx_t *taxon;
+	ecotx_t *rep;
+	ecotaxonomy_t *ptax;
+	int taxid;
+	int name;
+	const char *rank;
+	int   rankidx;
+//	SEXP scname;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    if (! isInteger(Rtaxid))
+        error("taxid not integer");
+
+    taxid = *INTEGER(Rtaxid);
+
+    if (! (taxid > 0))
+        error("taxid not positive");
+
+    if (! isString(Rrank))
+        error("rank not a string");
+
+    rank=CHAR(STRING_ELT(Rrank,0));
+
+    rankidx=rank_index(rank,ptax->ranks);
+
+    if (rankidx < 0)
+        error("unkown rank name");
+
+    if (! isLogical(Rname))
+        error("name not logical");
+    name = LOGICAL(Rname)[0];
+
+	taxon = eco_findtaxonbytaxid(ptax, taxid);
+
+	if (!taxon)
+  {
+		if (name)
+			return ScalarString(R_NaString);
+		else
+			return ScalarInteger(R_NaInt);
+  }
+	rep = eco_findtaxonatrank(taxon,rankidx);
+
+	if (!rep)
+  {
+		if (name)
+			return ScalarString(R_NaString);
+		else
+			return ScalarInteger(R_NaInt);
+  }
+  
+	if (name)
+		return mkString(rep->name);
+	else
+		return ScalarInteger(rep->taxid);
+}
+
+
+SEXP R_get_species(SEXP Rtaxonomy,SEXP Rtaxid,SEXP Rname)
+{
+	ecotx_t *taxon;
+	ecotx_t *rep;
+	ecotaxonomy_t *ptax;
+	int taxid;
+	int name;
+//	SEXP scname;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    if (! isInteger(Rtaxid))
+        error("taxid not integer");
+
+    taxid = *INTEGER(Rtaxid);
+
+    if (! (taxid > 0))
+        error("taxid not positive");
+
+    if (! isLogical(Rname))
+        error("name not logical");
+    name = LOGICAL(Rname)[0];
+
+	taxon = eco_findtaxonbytaxid(ptax, taxid);
+
+	if (!taxon)
+  {
+		if (name)
+			return ScalarString(R_NaString);
+		else
+			return ScalarInteger(R_NaInt);
+  }
+  
+	rep = eco_getspecies(taxon,ptax);
+
+	if (!rep)
+  {
+		if (name)
+			return ScalarString(R_NaString);
+		else
+			return ScalarInteger(R_NaInt);
+  }
+  
+	if (name)
+		return mkString(rep->name);
+	else
+		return ScalarInteger(rep->taxid);
+}
+
+SEXP R_get_genus(SEXP Rtaxonomy,SEXP Rtaxid,SEXP Rname)
+{
+	ecotx_t *taxon;
+	ecotx_t *rep;
+	ecotaxonomy_t *ptax;
+	int taxid;
+	int name;
+//	SEXP scname;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    if (! isInteger(Rtaxid))
+        error("taxid not integer");
+
+    taxid = *INTEGER(Rtaxid);
+
+    if (! (taxid > 0))
+        error("taxid not positive");
+
+    if (! isLogical(Rname))
+        error("name not logical");
+    name = LOGICAL(Rname)[0];
+
+	taxon = eco_findtaxonbytaxid(ptax, taxid);
+
+	if (!taxon)
+  {
+		if (name)
+			return ScalarString(R_NaString);
+		else
+			return ScalarInteger(R_NaInt);
+  }
+  
+	rep = eco_getgenus(taxon,ptax);
+
+	if (!rep)
+  {
+		if (name)
+			return ScalarString(R_NaString);
+		else
+			return ScalarInteger(R_NaInt);
+  }
+  
+	if (name)
+		return mkString(rep->name);
+	else
+		return ScalarInteger(rep->taxid);
+}
+
+SEXP R_get_family(SEXP Rtaxonomy,SEXP Rtaxid,SEXP Rname)
+{
+	ecotx_t *taxon;
+	ecotx_t *rep;
+	ecotaxonomy_t *ptax;
+	int taxid;
+	int name;
+//	SEXP scname;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    if (! isInteger(Rtaxid))
+        error("taxid not integer");
+
+    taxid = *INTEGER(Rtaxid);
+
+    if (! (taxid > 0))
+        error("taxid not positive");
+
+    if (! isLogical(Rname))
+        error("name not logical");
+    name = LOGICAL(Rname)[0];
+
+	taxon = eco_findtaxonbytaxid(ptax, taxid);
+
+	if (!taxon)
+  {
+		if (name)
+			return ScalarString(R_NaString);
+		else
+			return ScalarInteger(R_NaInt);
+  }
+  
+	rep = eco_getfamily(taxon,ptax);
+
+	if (!rep)
+  {
+		if (name)
+			return ScalarString(R_NaString);
+		else
+			return ScalarInteger(R_NaInt);
+  }
+  
+	if (name)
+		return mkString(rep->name);
+	else
+		return ScalarInteger(rep->taxid);
+}
+
+SEXP R_get_kingdom(SEXP Rtaxonomy,SEXP Rtaxid,SEXP Rname)
+{
+	ecotx_t *taxon;
+	ecotx_t *rep;
+	ecotaxonomy_t *ptax;
+	int taxid;
+	int name;
+//	SEXP scname;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    if (! isInteger(Rtaxid))
+        error("taxid not integer");
+
+    taxid = *INTEGER(Rtaxid);
+
+    if (! (taxid > 0))
+        error("taxid not positive");
+
+    if (! isLogical(Rname))
+        error("name not logical");
+    name = LOGICAL(Rname)[0];
+
+	taxon = eco_findtaxonbytaxid(ptax, taxid);
+
+	if (!taxon)
+  {
+		if (name)
+			return ScalarString(R_NaString);
+		else
+			return ScalarInteger(R_NaInt);
+  }
+  
+	rep = eco_getkingdom(taxon,ptax);
+
+	if (!rep)
+  {
+		if (name)
+			return ScalarString(R_NaString);
+		else
+			return ScalarInteger(R_NaInt);
+  }
+  
+	if (name)
+		return mkString(rep->name);
+	else
+		return ScalarInteger(rep->taxid);
+}
+
+SEXP R_get_superkingdom(SEXP Rtaxonomy,SEXP Rtaxid,SEXP Rname)
+{
+	ecotx_t *taxon;
+	ecotx_t *rep;
+	ecotaxonomy_t *ptax;
+	int taxid;
+	int name;
+//	SEXP scname;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    if (! isInteger(Rtaxid))
+        error("taxid not integer");
+
+    taxid = *INTEGER(Rtaxid);
+
+    if (! (taxid > 0))
+        error("taxid not positive");
+
+    if (! isLogical(Rname))
+        error("name not logical");
+    name = LOGICAL(Rname)[0];
+
+	taxon = eco_findtaxonbytaxid(ptax, taxid);
+
+	if (!taxon)
+  {
+		if (name)
+			return ScalarString(R_NaString);
+		else
+			return ScalarInteger(R_NaInt);
+  }
+  
+	rep = eco_getsuperkingdom(taxon,ptax);
+
+	if (!rep)
+  {
+		if (name)
+			return ScalarString(R_NaString);
+		else
+			return ScalarInteger(R_NaInt);
+  }
+  
+	if (name)
+		return mkString(rep->name);
+	else
+		return ScalarInteger(rep->taxid);
+}
+
+SEXP R_get_parent(SEXP Rtaxonomy,SEXP Rtaxid,SEXP Rname)
+{
+	ecotx_t *taxon;
+	ecotx_t *rep;
+	ecotaxonomy_t *ptax;
+	int taxid;
+	int name;
+//	SEXP scname;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    if (! isInteger(Rtaxid))
+        error("taxid not integer");
+
+    taxid = *INTEGER(Rtaxid);
+
+    if (! (taxid > 0))
+        error("taxid not positive");
+
+    if (! isLogical(Rname))
+        error("name not logical");
+    name = LOGICAL(Rname)[0];
+
+	taxon = eco_findtaxonbytaxid(ptax, taxid);
+
+	if (!taxon)
+  {
+		if (name)
+			return ScalarString(R_NaString);
+		else
+			return ScalarInteger(R_NaInt);
+  }
+  
+	rep = taxon->parent;
+
+	if (rep->taxid==taxid)
+  {
+		if (name)
+			return ScalarString(R_NaString);
+		else
+			return ScalarInteger(R_NaInt);
+  }
+  
+	if (name)
+		return mkString(rep->name);
+	else
+		return ScalarInteger(rep->taxid);
+}
+
+
+SEXP R_validate_taxid(SEXP Rtaxonomy,SEXP Rtaxid)
+{
+	ecotx_t *taxon;
+	ecotaxonomy_t *ptax;
+	int taxid;
+//	int name;
+//	SEXP scname;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    if (! isInteger(Rtaxid))
+        error("taxid not integer");
+
+    taxid = *INTEGER(Rtaxid);
+
+    if (! (taxid > 0))
+    	return ScalarInteger(R_NaInt);
+
+	taxon = eco_findtaxonbytaxid(ptax, taxid);
+
+	if (!taxon)
+		return ScalarInteger(R_NaInt);
+	else
+		return ScalarInteger(taxon->taxid);
+}
+
+
+SEXP R_is_under_taxon(SEXP Rtaxonomy, SEXP Rtaxid, SEXP Rparent)
+{
+	ecotx_t *taxon;
+	ecotaxonomy_t *ptax;
+	int taxid;
+	int parent;
+	int rep;
+//	SEXP isunder;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    if (! isInteger(Rparent))
+        error("parent not integer");
+
+    parent = *INTEGER(Rparent);
+
+    if (parent <= 0)
+    	return ScalarInteger(R_NaInt);
+
+	taxon = eco_findtaxonbytaxid(ptax, parent);
+
+	if (!taxon)
+		return ScalarInteger(R_NaInt);
+
+
+	if (! isInteger(Rtaxid))
+        error("taxid not integer");
+
+    taxid = *INTEGER(Rtaxid);
+
+    if (taxid <= 0)
+    	return ScalarInteger(R_NaInt);
+
+	taxon = eco_findtaxonbytaxid(ptax, taxid);
+
+	if (!taxon)
+		return ScalarInteger(R_NaInt);
+
+
+	rep = eco_isundertaxon(taxon, parent);
+
+	return ScalarLogical(rep);
+
+
+}
+
+
+SEXP R_longest_path(SEXP Rtaxonomy,SEXP Rtaxid)
+{
+	ecotx_t *taxon;
+	ecotaxonomy_t *ptax;
+	int taxid;
+//	int name;
+//	SEXP scname;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    if (! isInteger(Rtaxid))
+        error("taxid not integer");
+
+    taxid = *INTEGER(Rtaxid);
+
+    if (taxid <= 0)
+    	return ScalarInteger(R_NaInt);
+
+	taxon = eco_findtaxonbytaxid(ptax, taxid);
+
+	if (!taxon)
+		return ScalarInteger(R_NaInt);
+	else
+		return ScalarInteger(taxon->farest);
+}
+
+SEXP R_rank_list(SEXP Rtaxonomy)
+{
+	int nrank;
+	int i;
+	ecotaxonomy_t *ptax;
+	SEXP rNames;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    nrank = ptax->ranks->count;
+
+    rNames = PROTECT(allocVector(STRSXP, nrank));
+
+	for (i=0; i < nrank;i++)
+		SET_STRING_ELT(rNames, i, mkChar(ptax->ranks->label[i]));
+
+	UNPROTECT(1);
+
+	return rNames;
+}
+
+SEXP R_taxid_list(SEXP Rtaxonomy)
+{
+	int ntaxid;
+	int i;
+	ecotaxonomy_t *ptax;
+	SEXP rTaxids;
+
+    ptax = getTaxPointer(Rtaxonomy);
+    ntaxid  = ptax->taxons->count;
+    rTaxids = PROTECT(allocVector(INTSXP, ntaxid));
+
+	for (i=0; i < ntaxid;i++)
+		INTEGER(rTaxids)[i]=ptax->taxons->taxon[i].taxid;
+
+	UNPROTECT(1);
+
+	return rTaxids;
+
+}
+
+SEXP R_max_taxid(SEXP Rtaxonomy)
+{
+//	int nrank;
+//	int i;
+	ecotaxonomy_t *ptax;
+//	SEXP rNames;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    return ScalarInteger(ptax->taxons->maxtaxid);
+}
+
+SEXP R_length_taxonomy(SEXP Rtaxonomy)
+{
+	ecotaxonomy_t *ptax;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    return ScalarInteger(ptax->taxons->count);
+}
+
+SEXP R_ecofind(SEXP Rtaxonomy, SEXP Rpattern, SEXP Rrank, SEXP Ralternative)
+{
+  ecotaxonomy_t *ptax;
+  econame_t		*name;
+  char*     pattern=NULL;
+	int				re_match;
+  SEXP      taxids;
+  int32_t*  buffer;
+  int32_t		tax_count	= 0;
+  size_t 		j 		= 0;
+  int32_t		rankfilter 	= 1;
+  int*      ptaxid;
+  char			*rankname=NULL;
+  int32_t			nummatch 	= 0;
+  int32_t         alternative = 0;
+
+  size_t    bsize;
+	
+  ptax = getTaxPointer(Rtaxonomy);
+  tax_count = ptax->taxons->count;
+	
+  if (! isString(Rpattern))
+      error("pattern not a string");
+
+  pattern= (char*) CHAR(STRING_ELT(Rpattern,0));
+
+  if (! isNull(Rrank))
+  {
+    if (! isString(Rrank))
+      error("rank not a string");
+
+    rankname= (char*) CHAR(STRING_ELT(Rrank,0));
+  }
+  
+  if (! isLogical(Ralternative))
+      error("rank not a logical");
+      
+  alternative = LOGICAL(Ralternative)[0];
+
+		
+	nummatch=0;
+  buffer = (int32_t*) malloc(100 * sizeof(int32_t));
+  bsize=100;
+
+  if (alternative && ptax->names!=NULL)
+	  for (j=0,name=ptax->names->names;
+			  j < ptax->names->count;
+			  name++,j++)
+	  {
+		  if(rankname)
+			  rankfilter = !(strcmp(rankname,ptax->ranks->label[name->taxon->rank]));
+
+  	  re_match = slre_match(pattern, name->name, 
+                            strlen(name->name), 
+                            NULL, 0, 
+                            SLRE_IGNORE_CASE);
+
+  	  if (re_match > 0 && rankfilter)
+		  {
+			  buffer[nummatch]=name->taxon->taxid;
+			  nummatch++;
+			  if (nummatch==bsize) {
+				  bsize*=2;
+				  buffer = (int32_t*) realloc(buffer, bsize * sizeof(int32_t));
+				  if (buffer==0)
+				  {
+					  // regfree(&re_preg);
+					  error("Cannot allocate memory for the taxid list");
+				  }
+			  }
+		  }
+
+	  }
+  else
+	  for (j=0; j < ptax->taxons->count;j++)
+	  {
+		  if(rankname)
+			  rankfilter = !(strcmp(rankname,ptax->ranks->label[ptax->taxons->taxon[j].rank]));
+
+//		  re_match = regexec (&re_preg, ptax->taxons->taxon[j].name, 0, NULL, 0);
+      re_match = slre_match(pattern, ptax->taxons->taxon[j].name, 
+                            strlen(ptax->taxons->taxon[j].name), 
+                            NULL, 0, 
+                            SLRE_IGNORE_CASE);
+
+
+//  	  if (!re_match && rankfilter)
+  	  if (re_match > 0 && rankfilter)
+		  {
+			  buffer[nummatch]=ptax->taxons->taxon[j].taxid;
+			  nummatch++;
+			  if (nummatch==bsize) {
+				  bsize*=2;
+				  buffer = (int32_t*) realloc(buffer, bsize * sizeof(int32_t));
+				  if (buffer==0)
+				  {
+					  // regfree(&re_preg);
+					  error("Cannot allocate memory for the taxid list");
+				  }
+			  }
+		  }
+
+	  }
+
+   	//regfree(&re_preg);
+
+    taxids = PROTECT(NEW_INTEGER(nummatch));
+    ptaxid = INTEGER(taxids);
+    
+    for (j=0; j < nummatch; j++)
+      ptaxid[j]=buffer[j];
+      
+    free(buffer);
+
+    UNPROTECT(1);
+    return taxids;
+}
--- a/src/robitax.h
+++ b/src/robitax.h
@@ -0,0 +1,6 @@
+#include "ecoPCR.h"
+
+
+ecotaxonomy_t *getTaxPointer(SEXP Rtaxonomy);
+SEXP R_delete_taxonomy(SEXP Rtaxonomy);
+
--- a/src/slre.c
+++ b/src/slre.c
@@ -0,0 +1,433 @@
+/*
+ * Copyright (c) 2004-2013 Sergey Lyubka <valenok@gmail.com>
+ * Copyright (c) 2013 Cesanta Software Limited
+ * All rights reserved
+ *
+ * This library is dual-licensed: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation. For the terms of this
+ * license, see <http://www.gnu.org/licenses/>.
+ *
+ * You are free to use this library under the terms of the GNU General
+ * Public License, but WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * Alternatively, you can license this library under a commercial
+ * license, as set out in <http://cesanta.com/products.html>.
+ */
+
+#include <stdio.h>
+#include <ctype.h>
+#include <string.h>
+
+#include "slre.h"
+
+#define MAX_BRANCHES 100
+#define MAX_BRACKETS 100
+#define FAIL_IF(condition, error_code) if (condition) return (error_code)
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(ar) (sizeof(ar) / sizeof((ar)[0]))
+#endif
+
+#ifdef SLRE_DEBUG
+#define DBG(x) printf x
+#else
+#define DBG(x)
+#endif
+
+struct bracket_pair {
+  const char *ptr;  /* Points to the first char after '(' in regex  */
+  int len;          /* Length of the text between '(' and ')'       */
+  int branches;     /* Index in the branches array for this pair    */
+  int num_branches; /* Number of '|' in this bracket pair           */
+};
+
+struct branch {
+  int bracket_index;    /* index for 'struct bracket_pair brackets' */
+                        /* array defined below                      */
+  const char *schlong;  /* points to the '|' character in the regex */
+};
+
+struct regex_info {
+  /*
+   * Describes all bracket pairs in the regular expression.
+   * First entry is always present, and grabs the whole regex.
+   */
+  struct bracket_pair brackets[MAX_BRACKETS];
+  int num_brackets;
+
+  /*
+   * Describes alternations ('|' operators) in the regular expression.
+   * Each branch falls into a specific branch pair.
+   */
+  struct branch branches[MAX_BRANCHES];
+  int num_branches;
+
+  /* Array of captures provided by the user */
+  struct slre_cap *caps;
+  int num_caps;
+
+  /* E.g. SLRE_IGNORE_CASE. See enum below */
+  int flags;
+};
+
+static int is_metacharacter(const unsigned char *s) {
+  static const char *metacharacters = "^$().[]*+?|\\Ssdbfnrtv";
+  return strchr(metacharacters, *s) != NULL;
+}
+
+static int op_len(const char *re) {
+  return re[0] == '\\' && re[1] == 'x' ? 4 : re[0] == '\\' ? 2 : 1;
+}
+
+static int set_len(const char *re, int re_len) {
+  int len = 0;
+
+  while (len < re_len && re[len] != ']') {
+    len += op_len(re + len);
+  }
+
+  return len <= re_len ? len + 1 : -1;
+}
+
+static int get_op_len(const char *re, int re_len) {
+  return re[0] == '[' ? set_len(re + 1, re_len - 1) + 1 : op_len(re);
+}
+
+static int is_quantifier(const char *re) {
+  return re[0] == '*' || re[0] == '+' || re[0] == '?';
+}
+
+static int toi(int x) {
+  return isdigit(x) ? x - '0' : x - 'W';
+}
+
+static int hextoi(const unsigned char *s) {
+  return (toi(tolower(s[0])) << 4) | toi(tolower(s[1]));
+}
+
+static int match_op(const unsigned char *re, const unsigned char *s,
+                    struct regex_info *info) {
+  int result = 0;
+  switch (*re) {
+    case '\\':
+      /* Metacharacters */
+      switch (re[1]) {
+        case 'S': FAIL_IF(isspace(*s), SLRE_NO_MATCH); result++; break;
+        case 's': FAIL_IF(!isspace(*s), SLRE_NO_MATCH); result++; break;
+        case 'd': FAIL_IF(!isdigit(*s), SLRE_NO_MATCH); result++; break;
+        case 'b': FAIL_IF(*s != '\b', SLRE_NO_MATCH); result++; break;
+        case 'f': FAIL_IF(*s != '\f', SLRE_NO_MATCH); result++; break;
+        case 'n': FAIL_IF(*s != '\n', SLRE_NO_MATCH); result++; break;
+        case 'r': FAIL_IF(*s != '\r', SLRE_NO_MATCH); result++; break;
+        case 't': FAIL_IF(*s != '\t', SLRE_NO_MATCH); result++; break;
+        case 'v': FAIL_IF(*s != '\v', SLRE_NO_MATCH); result++; break;
+
+        case 'x':
+          /* Match byte, \xHH where HH is hexadecimal byte representaion */
+          FAIL_IF(hextoi(re + 2) != *s, SLRE_NO_MATCH);
+          result++;
+          break;
+
+        default:
+          /* Valid metacharacter check is done in bar() */
+          FAIL_IF(re[1] != s[0], SLRE_NO_MATCH);
+          result++;
+          break;
+      }
+      break;
+
+    case '|': FAIL_IF(1, SLRE_INTERNAL_ERROR); break;
+    case '$': FAIL_IF(1, SLRE_NO_MATCH); break;
+    case '.': result++; break;
+
+    default:
+      if (info->flags & SLRE_IGNORE_CASE) {
+        FAIL_IF(tolower(*re) != tolower(*s), SLRE_NO_MATCH);
+      } else {
+        FAIL_IF(*re != *s, SLRE_NO_MATCH);
+      }
+      result++;
+      break;
+  }
+
+  return result;
+}
+
+static int match_set(const char *re, int re_len, const char *s,
+                     struct regex_info *info) {
+  int len = 0, result = -1, invert = re[0] == '^';
+
+  if (invert) re++, re_len--;
+
+  while (len <= re_len && re[len] != ']' && result <= 0) {
+    /* Support character range */
+    if (re[len] != '-' && re[len + 1] == '-' && re[len + 2] != ']' &&
+        re[len + 2] != '\0') {
+      result = info->flags &&  SLRE_IGNORE_CASE ?
+        *s >= re[len] && *s <= re[len + 2] :
+        tolower(*s) >= tolower(re[len]) && tolower(*s) <= tolower(re[len + 2]);
+      len += 3;
+    } else {
+      result = match_op((unsigned char *) re + len, (unsigned char *) s, info);
+      len += op_len(re + len);
+    }
+  }
+  return (!invert && result > 0) || (invert && result <= 0) ? 1 : -1;
+}
+
+static int doh(const char *s, int s_len, struct regex_info *info, int bi);
+
+static int bar(const char *re, int re_len, const char *s, int s_len,
+               struct regex_info *info, int bi) {
+  /* i is offset in re, j is offset in s, bi is brackets index */
+  int i, j, n, step;
+
+  for (i = j = 0; i < re_len && j <= s_len; i += step) {
+
+    /* Handle quantifiers. Get the length of the chunk. */
+    step = re[i] == '(' ? info->brackets[bi + 1].len + 2 :
+      get_op_len(re + i, re_len - i);
+
+    DBG(("%s [%.*s] [%.*s] re_len=%d step=%d i=%d j=%d\n", __func__,
+         re_len - i, re + i, s_len - j, s + j, re_len, step, i, j));
+
+    FAIL_IF(is_quantifier(&re[i]), SLRE_UNEXPECTED_QUANTIFIER);
+    FAIL_IF(step <= 0, SLRE_INVALID_CHARACTER_SET);
+
+    if (i + step < re_len && is_quantifier(re + i + step)) {
+      DBG(("QUANTIFIER: [%.*s]%c [%.*s]\n", step, re + i,
+           re[i + step], s_len - j, s + j));
+      if (re[i + step] == '?') {
+        int result = bar(re + i, step, s + j, s_len - j, info, bi);
+        j += result > 0 ? result : 0;
+        i++;
+      } else if (re[i + step] == '+' || re[i + step] == '*') {
+        int j2 = j, nj = j, n1, n2 = -1, ni, non_greedy = 0;
+
+        /* Points to the regexp code after the quantifier */
+        ni = i + step + 1;
+        if (ni < re_len && re[ni] == '?') {
+          non_greedy = 1;
+          ni++;
+        }
+
+        do {
+          if ((n1 = bar(re + i, step, s + j2, s_len - j2, info, bi)) > 0) {
+            j2 += n1;
+          }
+          if (re[i + step] == '+' && n1 < 0) break;
+
+          if (ni >= re_len) {
+            /* After quantifier, there is nothing */
+            nj = j2;
+          } else if ((n2 = bar(re + ni, re_len - ni, s + j2,
+                               s_len - j2, info, bi)) >= 0) {
+            /* Regex after quantifier matched */
+            nj = j2 + n2;
+          }
+          if (nj > j && non_greedy) break;
+        } while (n1 > 0);
+
+        if (n1 < 0 && re[i + step] == '*' &&
+            (n2 = bar(re + ni, re_len - ni, s + j, s_len - j, info, bi)) > 0) {
+          nj = j + n2;
+        }
+
+        DBG(("STAR/PLUS END: %d %d %d %d %d\n", j, nj, re_len - ni, n1, n2));
+        FAIL_IF(re[i + step] == '+' && nj == j, SLRE_NO_MATCH);
+
+        /* If while loop body above was not executed for the * quantifier,  */
+        /* make sure the rest of the regex matches                          */
+        FAIL_IF(nj == j && ni < re_len && n2 < 0, SLRE_NO_MATCH);
+
+        /* Returning here cause we've matched the rest of RE already */
+        return nj;
+      }
+      continue;
+    }
+
+    if (re[i] == '[') {
+      n = match_set(re + i + 1, re_len - (i + 2), s + j, info);
+      DBG(("SET %.*s [%.*s] -> %d\n", step, re + i, s_len - j, s + j, n));
+      FAIL_IF(n <= 0, SLRE_NO_MATCH);
+      j += n;
+    } else if (re[i] == '(') {
+      n = SLRE_NO_MATCH;
+      bi++;
+      FAIL_IF(bi >= info->num_brackets, SLRE_INTERNAL_ERROR);
+      DBG(("CAPTURING [%.*s] [%.*s] [%s]\n",
+           step, re + i, s_len - j, s + j, re + i + step));
+
+      if (re_len - (i + step) <= 0) {
+        /* Nothing follows brackets */
+        n = doh(s + j, s_len - j, info, bi);
+      } else {
+        int j2;
+        for (j2 = 0; j2 <= s_len - j; j2++) {
+          if ((n = doh(s + j, s_len - (j + j2), info, bi)) >= 0 &&
+              bar(re + i + step, re_len - (i + step),
+                  s + j + n, s_len - (j + n), info, bi) >= 0) break;
+        }
+      }
+
+      DBG(("CAPTURED [%.*s] [%.*s]:%d\n", step, re + i, s_len - j, s + j, n));
+      FAIL_IF(n < 0, n);
+      if (info->caps != NULL) {
+        info->caps[bi - 1].ptr = s + j;
+        info->caps[bi - 1].len = n;
+      }
+      j += n;
+    } else if (re[i] == '^') {
+      FAIL_IF(j != 0, SLRE_NO_MATCH);
+    } else if (re[i] == '$') {
+      FAIL_IF(j != s_len, SLRE_NO_MATCH);
+    } else {
+      FAIL_IF(j >= s_len, SLRE_NO_MATCH);
+      n = match_op((unsigned char *) (re + i), (unsigned char *) (s + j), info);
+      FAIL_IF(n <= 0, n);
+      j += n;
+    }
+  }
+
+  return j;
+}
+
+/* Process branch points */
+static int doh(const char *s, int s_len, struct regex_info *info, int bi) {
+  const struct bracket_pair *b = &info->brackets[bi];
+  int i = 0, len, result;
+  const char *p;
+
+  do {
+    p = i == 0 ? b->ptr : info->branches[b->branches + i - 1].schlong + 1;
+    len = b->num_branches == 0 ? b->len :
+      i == b->num_branches ? (int) (b->ptr + b->len - p) :
+      (int) (info->branches[b->branches + i].schlong - p);
+    DBG(("%s %d %d [%.*s] [%.*s]\n", __func__, bi, i, len, p, s_len, s));
+    result = bar(p, len, s, s_len, info, bi);
+    DBG(("%s <- %d\n", __func__, result));
+  } while (result <= 0 && i++ < b->num_branches);  /* At least 1 iteration */
+
+  return result;
+}
+
+static int baz(const char *s, int s_len, struct regex_info *info) {
+  int i, result = -1, is_anchored = info->brackets[0].ptr[0] == '^';
+
+  for (i = 0; i <= s_len; i++) {
+    result = doh(s + i, s_len - i, info, 0);
+    if (result >= 0) {
+      result += i;
+      break;
+    }
+    if (is_anchored) break;
+  }
+
+  return result;
+}
+
+static void setup_branch_points(struct regex_info *info) {
+  int i, j;
+  struct branch tmp;
+
+  /* First, sort branches. Must be stable, no qsort. Use bubble algo. */
+  for (i = 0; i < info->num_branches; i++) {
+    for (j = i + 1; j < info->num_branches; j++) {
+      if (info->branches[i].bracket_index > info->branches[j].bracket_index) {
+        tmp = info->branches[i];
+        info->branches[i] = info->branches[j];
+        info->branches[j] = tmp;
+      }
+    }
+  }
+
+  /*
+   * For each bracket, set their branch points. This way, for every bracket
+   * (i.e. every chunk of regex) we know all branch points before matching.
+   */
+  for (i = j = 0; i < info->num_brackets; i++) {
+    info->brackets[i].num_branches = 0;
+    info->brackets[i].branches = j;
+    while (j < info->num_branches && info->branches[j].bracket_index == i) {
+      info->brackets[i].num_branches++;
+      j++;
+    }
+  }
+}
+
+static int foo(const char *re, int re_len, const char *s, int s_len,
+               struct regex_info *info) {
+  int i, step, depth = 0;
+
+  /* First bracket captures everything */
+  info->brackets[0].ptr = re;
+  info->brackets[0].len = re_len;
+  info->num_brackets = 1;
+
+  /* Make a single pass over regex string, memorize brackets and branches */
+  for (i = 0; i < re_len; i += step) {
+    step = get_op_len(re + i, re_len - i);
+
+    if (re[i] == '|') {
+      FAIL_IF(info->num_branches >= (int) ARRAY_SIZE(info->branches),
+              SLRE_TOO_MANY_BRANCHES);
+      info->branches[info->num_branches].bracket_index =
+        info->brackets[info->num_brackets - 1].len == -1 ?
+        info->num_brackets - 1 : depth;
+      info->branches[info->num_branches].schlong = &re[i];
+      info->num_branches++;
+    } else if (re[i] == '\\') {
+      FAIL_IF(i >= re_len - 1, SLRE_INVALID_METACHARACTER);
+      if (re[i + 1] == 'x') {
+        /* Hex digit specification must follow */
+        FAIL_IF(re[i + 1] == 'x' && i >= re_len - 3,
+                SLRE_INVALID_METACHARACTER);
+        FAIL_IF(re[i + 1] ==  'x' && !(isxdigit(re[i + 2]) &&
+                isxdigit(re[i + 3])), SLRE_INVALID_METACHARACTER);
+      } else {
+        FAIL_IF(!is_metacharacter((unsigned char *) re + i + 1),
+                SLRE_INVALID_METACHARACTER);
+      }
+    } else if (re[i] == '(') {
+      FAIL_IF(info->num_brackets >= (int) ARRAY_SIZE(info->brackets),
+              SLRE_TOO_MANY_BRACKETS);
+      depth++;  /* Order is important here. Depth increments first. */
+      info->brackets[info->num_brackets].ptr = re + i + 1;
+      info->brackets[info->num_brackets].len = -1;
+      info->num_brackets++;
+      FAIL_IF(info->num_caps > 0 && info->num_brackets - 1 > info->num_caps,
+              SLRE_CAPS_ARRAY_TOO_SMALL);
+    } else if (re[i] == ')') {
+      int ind = info->brackets[info->num_brackets - 1].len == -1 ?
+        info->num_brackets - 1 : depth;
+      info->brackets[ind].len = (int) (&re[i] - info->brackets[ind].ptr);
+      DBG(("SETTING BRACKET %d [%.*s]\n",
+           ind, info->brackets[ind].len, info->brackets[ind].ptr));
+      depth--;
+      FAIL_IF(depth < 0, SLRE_UNBALANCED_BRACKETS);
+      FAIL_IF(i > 0 && re[i - 1] == '(', SLRE_NO_MATCH);
+    }
+  }
+
+  FAIL_IF(depth != 0, SLRE_UNBALANCED_BRACKETS);
+  setup_branch_points(info);
+
+  return baz(s, s_len, info);
+}
+
+int slre_match(const char *regexp, const char *s, int s_len,
+               struct slre_cap *caps, int num_caps, int flags) {
+  struct regex_info info;
+
+  /* Initialize info structure */
+  info.flags = flags;
+  info.num_brackets = info.num_branches = 0;
+  info.num_caps = num_caps;
+  info.caps = caps;
+
+  DBG(("========================> [%s] [%.*s]\n", regexp, s_len, s));
+  return foo(regexp, (int) strlen(regexp), s, s_len, &info);
+}
--- a/src/slre.h
+++ b/src/slre.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2004-2013 Sergey Lyubka <valenok@gmail.com>
+ * Copyright (c) 2013 Cesanta Software Limited
+ * All rights reserved
+ *
+ * This library is dual-licensed: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation. For the terms of this
+ * license, see <http://www.gnu.org/licenses/>.
+ *
+ * You are free to use this library under the terms of the GNU General
+ * Public License, but WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * Alternatively, you can license this library under a commercial
+ * license, as set out in <http://cesanta.com/products.html>.
+ */
+
+/*
+ * This is a regular expression library that implements a subset of Perl RE.
+ * Please refer to README.md for a detailed reference.
+ */
+
+#ifndef SLRE_HEADER_DEFINED
+#define SLRE_HEADER_DEFINED
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct slre_cap {
+  const char *ptr;
+  int len;
+};
+
+
+int slre_match(const char *regexp, const char *buf, int buf_len,
+               struct slre_cap *caps, int num_caps, int flags);
+
+/* Possible flags for slre_match() */
+enum { SLRE_IGNORE_CASE = 1 };
+
+
+/* slre_match() failure codes */
+#define SLRE_NO_MATCH               -1
+#define SLRE_UNEXPECTED_QUANTIFIER  -2
+#define SLRE_UNBALANCED_BRACKETS    -3
+#define SLRE_INTERNAL_ERROR         -4
+#define SLRE_INVALID_CHARACTER_SET  -5
+#define SLRE_INVALID_METACHARACTER  -6
+#define SLRE_CAPS_ARRAY_TOO_SMALL   -7
+#define SLRE_TOO_MANY_BRANCHES      -8
+#define SLRE_TOO_MANY_BRACKETS      -9
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  /* SLRE_HEADER_DEFINED */