initial commit

2016-01-13 10:05:51 +01:00
commit 05efb38fed
42 changed files with 4331 additions and 0 deletions
@@ -0,0 +1,3 @@
+/man/
+/vignettes/
+/Read-and-delete-me
@@ -0,0 +1,20 @@
+Package: ROBITaxonomy
+Type: Package
+Title: Metabarcoding data biodiversity analysis
+Version: 0.1
+Date: 2012-08-23
+Author: LECA - Laboratoire d'ecologie alpine
+Maintainer: LECA OBITools team <obitools@metabarcoding.org>
+Description: More about what it does (maybe more than one line)
+License: CeCILL v2.0
+LazyLoad: yes
+Roxygen: list(wrap = FALSE)
+Collate:
+    'ROBITaxonomy.R'
+    'taxonomy.R'
+    'basic.R'
+    'default.R'
+    'distance.R'
+    'lca.R'
+    'rank.R'
+RoxygenNote: 5.0.1
@@ -0,0 +1,28 @@
+# Generated by roxygen2: do not edit by hand
+
+export(default.taxonomy)
+export(distance.taxonomy)
+export(ecofind)
+export(family)
+export(genus)
+export(is.obitools.taxonomy)
+export(is.subcladeof)
+export(kingdom)
+export(length.obitools.taxonomy)
+export(longest.path)
+export(lowest.common.ancestor)
+export(max.obitools.taxonomy)
+export(parent)
+export(path)
+export(rank.list)
+export(read.taxonomy)
+export(scientificname)
+export(species)
+export(superkingdom)
+export(taxid.list)
+export(taxonatrank)
+export(taxonomicrank)
+export(validate)
+exportClasses(obitools.taxonomy)
+exportClasses(obitools.taxonomyOrNULL)
+useDynLib(ROBITaxonomy)
@@ -0,0 +1,359 @@
+#' @include taxonomy.R
+NULL
+
+#' @export
+setGeneric("scientificname", function(taxonomy,taxid) {
+  return(standardGeneric("scientificname"))
+})
+
+#' Returns the scientific name corresponding to a \emph{NCBI taxid}
+#' 
+#' \code{scientificname} function in package \pkg{\link{ROBITaxonomy}} returns the 
+#' scientific name corresponding to a \emph{NCBI taxid}.
+#' 
+#' @param   taxonomy a \code{\link{obitools.taxonomy}} instance
+#' @param   taxid an integer value or a vector of integer representing NCBI 
+#'          taxonomic identifiers.
+#' @return  The scientific name of the corresponding taxon as a string or a 
+#'          vector of string if the \code{taxid} argument is itself a vector
+#' 
+#' @examples
+#' # load the default taxonomy database include in the ROBITaxonomy library
+#' taxo=default.taxonomy()
+#' 
+#' # build a vector of 6 taxids corresponding to species
+#' sp.taxid=c(7000,7004,7007,7009,7010,7011)
+#' 
+#' # look for the scientific names correponding to these taxids
+#' scientificname(taxo,sp.taxid)
+#' 
+#' @seealso class \code{\linkS4class{obitools.taxonomy}}
+#'          
+#'          
+#' @docType methods
+#' @rdname scientificname-methods
+#' @aliases scientificname-methods,obitools.taxonomy
+#' @author Eric Coissac
+#' 
+setMethod("scientificname", "obitools.taxonomy",function(taxonomy,taxid) {
+  getscname = function(t)  {	
+    if (is.na(t))
+      return(NA)
+    else
+      return(	.Call('R_get_scientific_name',
+                    taxonomy,
+                    t,
+                    PACKAGE="ROBITaxonomy"))
+  }
+  
+  taxid = as.integer(taxid)
+  sapply(taxid,getscname)
+})
+
+
+######################################################################
+######################################################################
+
+
+#' @export
+setGeneric("parent", function(taxonomy,taxid,name=FALSE) {
+  return(standardGeneric("parent"))
+})
+
+#' Returns the parent taxon corresponding to a \emph{NCBI taxid}
+#' 
+#' \code{parent} function in package \pkg{\link{ROBITaxonomy}} returns the 
+#' parent taxon corresponding to a \emph{NCBI taxid}.
+#' 
+#' @param   taxonomy a \code{\link{obitools.taxonomy}} instance
+#' @param   taxid an integer value or a vector of integer representing NCBI 
+#'          taxonomic identifiers.
+#' @param   name A logical value \code{TRUE} or \code{FALSE} indicating if the 
+#'          method returns a taxid or a scientific name.
+#'                            
+#' @return  \describe{ \item{If \code{name==FALSE}}{the taxid of the 
+#'                            parent taxon as an integer or a vector of 
+#'                            integers if the \code{taxid} argument is itself 
+#'                            a vector} 
+#'                      \item{If \code{name==TRUE}}{the scientific name of the
+#'                            parent taxon as a string or a vector of 
+#'                            string if  the \code{taxid} argument is itself a 
+#'                            vector} }
+#' 
+#' @examples
+#' # load the default taxonomy database include in the ROBITaxonomy library
+#' taxo=default.taxonomy()
+#' 
+#' # build a vector of 6 taxids corresponding to species
+#' sp.taxid=c(7000,7004,7007,7009,7010,7011)
+#' 
+#' # look for the parent taxa correponding to these taxids
+#' parent(taxo,sp.taxid)
+#' 
+#' # same things but scientific names are returned
+#' parent(taxo,sp.taxid,TRUE)
+#' 
+#' @seealso class \code{\linkS4class{obitools.taxonomy}}
+#'          
+#'          
+#' @docType methods
+#' @rdname parent-methods
+#' @aliases parent-methods,obitools.taxonomy
+#' @author Eric Coissac
+#' 
+setMethod("parent", "obitools.taxonomy",function(taxonomy,taxid,name=FALSE) {
+  getp = function(t)	 {	
+    if (is.na(t))
+      return(NA)
+    else
+      return(.Call('R_get_parent',
+                   taxonomy,
+                   as.integer(t),
+                   name,
+                   PACKAGE="ROBITaxonomy"))
+  }
+  
+  taxid = as.integer(taxid)
+  name  = as.logical(name[1])
+  sapply(taxid,getp)
+})
+
+
+
+######################################################################
+######################################################################
+
+
+
+#' @export
+setGeneric("taxid.list", function(taxonomy) {
+  return(standardGeneric("taxid.list"))
+})
+
+
+#' Returns the list of all taxids belonging the taxonomy.
+#'  
+#' \code{taxid.list} returns the list of all taxids included in the
+#' instance of the class \code{\linkS4class{obitools.taxonomy}}
+#' 
+#' @param taxonomy the \code{\linkS4class{obitools.taxonomy}} to use.
+#' 
+#' @return an \code{integer} vector containing the list of taxids.
+#' 
+#' @examples
+#' # loads the default taxonomy database
+#' taxo=default.taxonomy()
+#' 
+#' # returns the count of taxa described in the taxonomy
+#' length(taxo)
+#' 
+#' # extracts the list of all valid taxids
+#' good = taxid.list(taxo)
+#' 
+#' # returns the size of the returned list
+#' length(good)
+#' 
+#' @seealso \code{\linkS4class{obitools.taxonomy}}
+#' 
+#' @author Eric Coissac
+#' @keywords taxonomy
+#' @docType methods
+#' @rdname taxid.list-method
+#' @aliases taxid.list
+#' 
+setMethod("taxid.list", "obitools.taxonomy",
+          function(taxonomy) {
+            return(.Call('R_taxid_list',
+                         taxonomy,
+                         PACKAGE="ROBITaxonomy"))
+          })
+
+######################################################################
+######################################################################
+
+
+
+#' Returns the count of taxa in the taxonomy.
+#'  
+#' \code{length} returns the count of taxa included in the
+#' instance of the class \code{\linkS4class{obitools.taxonomy}}
+#' 
+#' @param x the \code{\linkS4class{obitools.taxonomy}} to use.
+#' 
+#' @return an \code{integer} corresponding to the count of taxa.
+#' 
+#' @examples
+#' # loads the default taxonomy database
+#' taxo=default.taxonomy()
+#' 
+#' # returns the count of taxa described in the taxonomy
+#' length(taxo)
+#' 
+#' @seealso \code{\link{length}}, \code{\linkS4class{obitools.taxonomy}}
+#' 
+#' @author Eric Coissac
+#' @keywords taxonomy
+#' @export  length.obitools.taxonomy
+#' 
+length.obitools.taxonomy = function(x) 
+{
+  return(.Call('R_length_taxonomy',
+               x,
+               PACKAGE="ROBITaxonomy"))
+}
+
+
+######################################################################
+######################################################################
+
+setGeneric('max')  	
+
+#' Returns the maximum taxid in the taxonomy.
+#'  
+#' \code{length} returns the maximum taxid included in the
+#' instance of the class \code{\linkS4class{obitools.taxonomy}}
+#' 
+#' @param taxonomy the \code{\linkS4class{obitools.taxonomy}} to use.
+#' @param na.rm included for compatibility purpose, this parameter as
+#'              no effect on this implementation of \code{max}
+#' 
+#' @return an \code{integer} corresponding to the count of taxa.
+#' 
+#' @examples
+#' # load the default taxonomy database
+#' taxo=default.taxonomy()
+#' 
+#' # gets the larger taxid of the database
+#' max(taxo)
+#' 
+#' @seealso \code{\link{max}}, \code{\linkS4class{obitools.taxonomy}}
+#' 
+#' @author Eric Coissac
+#' @keywords taxonomy
+#' @export  max.obitools.taxonomy
+#' 
+max.obitools.taxonomy=function(taxonomy,na.rm = FALSE) {
+  return(.Call('R_max_taxid',
+               taxonomy,
+               PACKAGE="ROBITaxonomy"))
+}
+
+#' @export
+setGeneric("ecofind", function(taxonomy,patterns,rank=NULL,alternative=FALSE) {
+  return(standardGeneric("ecofind"))
+})
+
+#' Returns taxids associated to the names
+#' 
+#' Return the set of taxids having their name matching the given pattern.
+#' 
+#' @param taxonomy the \code{\linkS4class{obitools.taxonomy}} to use.
+#' @param patterns one or several regular pattern used to select the the taxa.
+#' @param   rank a \code{character} indicating a taxonomic rank. If not \code{NULL}
+#'               only taxids correponding to this rank are returned.
+#' @param   alternative  A logical value \code{TRUE} or \code{FALSE} indicating 
+#'          if the function must only look for a scientific name.
+#' 
+#' @return if just one pattern is given, an integer vector is returned with the
+#'         corresponding taxids. If a list of patterns is given, the function
+#'         returns a list of integer vectors, each vector containing the taxids
+#'         corresponding to a pattern. The returned list is in the same order 
+#'         than the given patern list.
+#' 
+#' @examples
+#' # load the default taxonomy database
+#' taxo=default.taxonomy()
+#' 
+#' # retreives the Vertebrata taxid
+#' taxid = ecofind(taxo,"Vertebrata")
+#' 
+#' taxid
+#' scientificname(taxo,taxid)
+#' 
+#' 
+#' taxid = ecofind(taxo,"^Vertebrata$")
+#' 
+#' taxid
+#' scientificname(taxo,taxid)
+#' 
+#'
+#' @author Eric Coissac
+#' @keywords taxonomy
+#' @docType methods
+#' @rdname ecofind-method
+#' @aliases ecofind,obitools.taxonomy
+#' 
+setMethod("ecofind", "obitools.taxonomy",function(taxonomy,patterns,rank=NULL,alternative=FALSE) {
+  getp = function(t)   {	
+    if (is.na(t))
+      return(NA)
+    else
+      return(unique(.Call('R_ecofind',
+                          taxonomy,
+                          t,
+                          rank,
+                          alternative,
+                          PACKAGE="ROBITaxonomy")))
+  }
+  
+  patterns = as.character(patterns)
+  taxid=lapply(patterns,getp)
+  if (length(taxid)==1)
+    taxid=taxid[[1]]
+  
+  return(taxid)
+})
+
+
+
+#' @export
+setGeneric("validate", function(taxonomy,taxid) {
+  return(standardGeneric("validate"))
+})
+
+#' Checks that a \emph{taxid} is really present in taxonomy
+#' 
+#' \code{validate} function in package \pkg{\link{ROBITaxonomy}} checks 
+#' that a \emph{taxid} is declared in the considered taxonomy.
+#' 
+#' @param   taxonomy a \code{\link{obitools.taxonomy}} instance
+#' @param   taxid an integer value or a vector of integer representing NCBI 
+#'          taxonomic identifiers.
+#'                            
+#' @return  The taxid if it exists, NA otherwise. If the input taxid is a 
+#'          vector of integer returns an integer vector composed of validated
+#'          taxids and NA values.
+#' 
+#' @examples
+#' # load the default taxonomy database include in the ROBITaxonomy library
+#' taxo=default.taxonomy()
+#' 
+#' # build a vector of 101 taxids 
+#' sp.taxid=c(7000:7100)
+#' 
+#' # checks the list of taxids
+#' validate(taxo,sp.taxid)
+#' 
+#' @seealso class \code{\linkS4class{obitools.taxonomy}}
+#'          
+#'          
+#' @docType methods
+#' @rdname validate-methods
+#' @aliases validate-methods,obitools.taxonomy
+#' @author Eric Coissac
+#' 
+setMethod("validate", "obitools.taxonomy",function(taxonomy,taxid) {
+  getp = function(t)   {  
+    if (is.na(t))
+      return(NA)
+    else
+      return(.Call('R_validate_taxid',
+                   taxonomy,
+                   t,
+                   PACKAGE="ROBITaxonomy"))
+  }
+  
+  taxid = as.integer(taxid)
+  sapply(taxid,getp)
+})
+
@@ -0,0 +1,52 @@
+#' @include taxonomy.R
+NULL
+
+
+#
+#
+# Manage le loading of the default taxonomy
+#
+#
+
+.__default__taxonomy__ = NULL
+
+#' Returns the default taxonomy
+#' 
+#' Returns a \code{\linkS4class{obitools.taxonomy}} instance corresponding
+#' to a NCBI taxonomy included by default in the \pkg{\link{ROBITaxonomy}} package.
+#' 
+#' @return a \code{\linkS4class{obitools.taxonomy}} instance.
+#' 
+#' @examples
+#' 
+#' # Load the default taxonomy
+#' taxo = default.taxonomy()
+#' 
+#' # and use it for requesting a scientific name
+#' scientificname(taxo,7742)
+#' 
+#' @seealso \code{\linkS4class{obitools.taxonomy}}
+#' 
+#' @author Eric Coissac
+#' @keywords taxonomy
+#' @export
+#' 
+default.taxonomy = function() {
+  if (is.null(get(".__default__taxonomy__",envir = environment())))
+    assign(".__default__taxonomy__", 
+           read.taxonomy(paste(system.file("extdata", 
+                                           package="ROBITaxonomy"),
+                                           'ncbitaxo',
+                                        sep='/')), 
+           envir=globalenv())
+  
+  return(get(".__default__taxonomy__",envir = globalenv()))
+}
+
+
+#' @export
+#' 
+is.obitools.taxonomy = function(taxonomy) {
+  class(t)[1] == "obitools.taxonomy"
+}
+
@@ -0,0 +1,189 @@
+#' @include taxonomy.R
+NULL
+
+#' @export
+setGeneric("longest.path", function(taxonomy,taxid) {
+  return(standardGeneric("longest.path"))
+})
+
+#' Returns the longuest path from a taxon.
+#' 
+#' The method \code{longest.path} returns the length of the
+#' path linking a taxid to the farest leaf belonging this taxid.
+#' 
+#' @param taxonomy the \code{\linkS4class{obitools.taxonomy}} to use.
+#' 
+#' @param taxid an \code{integer} vector containing the list of taxids.
+#' 
+#' @return an \code{integer} vector containing the list length.
+#' 
+#' @examples
+#' # loads the default taxonomy database
+#' taxo=default.taxonomy()
+#' 
+#' # returns the longest path in the taxonomy (from the root node)
+#' longest.path(taxo,1)
+#' 
+#' 
+#' @seealso \code{\linkS4class{obitools.taxonomy}}
+#' 
+#' @author Eric Coissac
+#' @keywords taxonomy
+#' @docType methods
+#' @rdname longest.path-method
+#' @aliases longest.path,obitools.taxonomy
+#' 
+setMethod("longest.path", "obitools.taxonomy",
+          function(taxonomy,taxid) {
+            getp = function(t)   {	
+              if (is.na(t))
+                return(NA)
+              else
+                return(.Call('R_longest_path',
+                             taxonomy,
+                             t,
+                             PACKAGE="ROBITaxonomy"))
+            }
+            
+            taxid = as.integer(taxid)
+            sapply(taxid,getp)
+          })
+
+
+#' @export
+setGeneric("distance.taxonomy", function(taxonomy,taxid1,taxid2=NULL,name=F) {
+  return(standardGeneric("distance.taxonomy"))
+})
+
+
+#' Computes a distance matrix between taxids
+#' 
+#' The method \code{taxonomy.distance} computes a distance matrix between a
+#' set of taxids. The distance between two taxa is based on the topology of
+#' the taxonomomy tree.
+#' 
+#' \deqn{ d(Taxon_A,Taxon_B) = \frac{longest.path(lca(Taxon_A,Taxon_B))}{max(longest.path(Taxon_A),longest.path(Taxon_B))}}
+#'                                  { longest.path(lca(Taxon_A,Taxon_B)) / max(longest.path(Taxon_A),longest.path(Taxon_B)) }
+#' 
+#' 
+#' @param taxonomy the \code{\linkS4class{obitools.taxonomy}} to use.
+#' 
+#' @param taxid1 an \code{integer} vector containing a list of taxids.
+#' 
+#' @param taxid2 an \code{integer} vector containing a list of taxids.
+#'               If \code{taxid2} is set to \code{NULL} (it's default value)
+#'               then the \code{taxid2} list is considered as equal to 
+#'               \code{taxid1} list. 
+#' @param name  A logical value \code{TRUE} or \code{FALSE} indicating 
+#'          if the method return distance matrix annotated by taxids or 
+#'          by scientific names.
+#'          
+#' @return the distance matrix between taxids specified in the \code{taxid1}
+#'         set and the \code{taxid2} set.
+#'         
+#' @examples
+#' # loads the default taxonomy database
+#' taxo=default.taxonomy()
+#' 
+#' # build a vector of 6 taxids corresponding to species
+#' sp.taxid=c(7000,7004,7007,7009,7010,7011)
+#' 
+#' # computes the distance matrix between taxids
+#' distance.taxonomy(taxo,sp.taxid)
+#' 
+#' # Same thing but the matrix is annotated by scientific names
+#' distance.taxonomy(taxo,sp.taxid,name=TRUE)
+#' 
+#' @seealso \code{\link{longest.path}}
+#' 
+#' @author Eric Coissac
+#' @keywords taxonomy
+#' @docType methods
+#' @rdname distance.taxonomy-method
+#' @aliases taxonomy.distance,obitools.taxonomy
+#' 
+setMethod("distance.taxonomy", "obitools.taxonomy",
+          function(taxonomy,taxid1,taxid2=NULL,name=F) {
+            taxdist = function(r)
+            {	
+              t1=r[1]
+              t2=r[2]
+              if (is.na(t1) | is.na(t2))
+                return(NA)
+              
+              p1 = path(taxonomy,t1)
+              p2 = path(taxonomy,t2)
+              
+              minp = min(length(p1),length(p2))
+              common = sum(p1[1:minp] == p2[1:minp])
+              lca = p1[common]
+              lp = longest.path(taxonomy,lca)
+              return(lp/(lp+common))
+            }
+            
+            multitaxdist=function(t1,t2) {
+              apply(data.frame(t1,t2),1,taxdist)
+            }
+            
+            taxid1 = taxid1[! is.na(validate(taxonomy,taxid1))]
+            t1 = path(taxonomy,taxid1)
+            
+            same = is.null(taxid2)
+            
+            if (same)
+            {
+              ntaxon = length(taxid1) 
+              t2 = t1[unlist(sapply(2:ntaxon,
+                                    function(x) x:ntaxon))]
+              t1 = t1[rep(1:(ntaxon-1),(ntaxon-1):1)]
+            }
+            else
+            {
+              taxid2 = taxid2[! is.na(validate(taxonomy,taxid2))]
+              t2 = path(taxonomy,taxid2)
+              nt1 = length(taxid1)
+              nt2 = length(taxid2)
+              t1 = t1[rep(1:nt1,nt2)]
+              t2 = t2[rep(1:nt2,rep(nt1,nt2))]
+            }
+            
+            lmin = mapply(function(a,b) min(length(a),length(b)),
+                          t1,
+                          t2)
+            
+            llca = mapply(function(x,y,l) sum(x[1:l]==y[1:l]),
+                          t1,
+                          t2,
+                          lmin)
+            
+            lb   = longest.path(taxonomy,mapply(function(x,y) x[y],t1,llca))
+            d    = as.double(lb / (lb + llca))
+            
+            if (same) {
+              attr(d, "Size") <- ntaxon
+              if (name)
+                attr(d, "Labels") <- scientificname(taxonomy,taxid1)
+              else
+                attr(d, "Labels") <- as.character(taxid1)
+              attr(d, "Diag") <- FALSE
+              attr(d, "Upper") <- FALSE
+              attr(d, "method") <- NULL
+              attr(d, "call") <- match.call()
+              class(d) <- "dist"
+            }
+            else {
+              if (name)
+                d = matrix(d,nt1,nt2,
+                           dimnames=list(scientificname(taxonomy,taxid1),
+                                         scientificname(taxonomy,taxid2)))
+              else
+                d = matrix(d,nt1,nt2,
+                           dimnames=list(as.character(taxid1),
+                                         as.character(taxid2)))
+              
+            }
+            
+            return(d)
+          })
+
+
@@ -0,0 +1,122 @@
+#' @include taxonomy.R
+NULL
+
+#' @export
+setGeneric("lowest.common.ancestor", function(taxonomy,taxid,threshold=1.0,error=0,name=FALSE) {
+  return(standardGeneric("lowest.common.ancestor"))
+})
+
+#' Computes the lowest common ancestor in the taxonomy tree between a set of taxa
+#' 
+#' The \code{lowest.common.ancestor} function in package \pkg{ROBITaxonomy} computes
+#' the lowest common ancestor of a set of taxids. The lowest common ancestor (LCA)
+#' is the most precise taxonomic group shared by all the considered taxa. Tha
+#' \code{lowest.common.ancestor} function implemented in the \pkg{ROBITaxonomy}
+#' package, considers a fuzzy definition of the LCA as the most precise
+#' taxonomic group shared by a quorum of the considered taxa.
+#' 
+#' @param taxonomy an instance of \code{\linkS4class{obitools.taxonomy}}
+#' @param taxid an integer value or a vector of integer representing NCBI 
+#'   taxonomic identifiers.
+#' @param threshold a numeric value between 0.0 and 1.0 indicating the minimum 
+#' quorum of taxid that must belong the LCA.
+#' @param error an integer value indicating the maximum count of taxids that 
+#' have not to belong the returned taxid. A \code{threshold} below 1.0 have
+#'  priority on the \code{error} parameter.
+#' @param name A logical value \code{TRUE} or \code{FALSE} indicating if the
+#' method return a \emph{taxid} or a scientific name.
+#' 
+#' @return Depending on the value of the \code{name} argument, set by default 
+#'         to \code{FALSE} the method returns :
+#'    \describe{  
+#'           \item{If \code{name==FALSE}}{ the taxid of the taxon corresponding 
+#'             to the LCA as an integer value}
+#'           \item{If \code{name==TRUE}}{ the scientific name of the taxon 
+#'                 corresponding to the LCA as a string}
+#'          }
+#'          
+#' @examples
+#' require(ROBITaxonomy)
+#'
+#' \dontshow{# switch the working directory to the data package directory}
+#' \dontshow{setwd(system.file("extdata", package="ROBITaxonomy"))}
+#'   
+#' # read the taxonomy database
+#'   
+#' taxo=read.taxonomy('ncbitaxo')
+#' 
+#' # build a vector of 6 taxids corresponding to species
+#' 
+#' sp.taxid=c(7000,7004,7007,7009,7010,7011)
+#' 
+#' # look for the lowest common ancestor taxids
+#' 
+#' lowest.common.ancestor(taxo,sp.taxid)
+#' 
+#' # same thing but returns results as a vector of scientific names
+#' lowest.common.ancestor(taxo,sp.taxid,name=TRUE)
+#' 
+#' # If we accept than 2 or 1 taxa do not belong the LCA
+#' lowest.common.ancestor(taxo,sp.taxid,name=TRUE,error=2)
+#' lowest.common.ancestor(taxo,sp.taxid,name=TRUE,error=1)
+#' 
+#' # Partial LCA can also be speciefied as the minimal frequency of
+#' # taxa belonging the LCA 
+#' lowest.common.ancestor(taxo,sp.taxid,name=TRUE,threshold=0.8)
+#' 
+#' @seealso class \code{\linkS4class{obitools.taxonomy}},
+#' and methods \code{\link{path}}, \code{\link{parent}},
+#' 
+#' @author Eric Coissac
+#' @keywords taxonomy
+#' @docType methods
+#' @rdname lowest.common.ancestor-method
+#' @aliases lowest.common.ancestor,obitools.taxonomy
+#' 
+setMethod("lowest.common.ancestor", "obitools.taxonomy",
+          function(taxonomy,taxid,threshold=1.0,error=0,name=FALSE) {
+            
+            if (threshold != 1.0)
+              error=as.integer(floor(length(taxid) * (1-threshold)))
+            
+            
+            #
+            # Remove nod valid taxid
+            #
+            
+            taxid = validate(taxonomy,taxid)
+            if (any(is.na(taxid)))
+              return(NA)
+                      
+            ntaxid=length(taxid)
+            nok = ntaxid - error
+            if (ntaxid==1)
+              return(taxid)
+            
+            allpath  = path(taxonomy,taxid)
+            minlength= min(vapply(allpath,length,0))
+                        
+            lca=NA
+            for (i in 1:minlength) {
+              
+              n = vapply(allpath,function(x) x[i],0)
+              nt = table(n)
+              mt = max(nt)
+              if (mt >= nok) {
+                p = nt[nt==mt]
+                if (length(p)==1)
+                  lca=as.integer(names(p)[1])
+                else
+                  break
+              }
+              else
+                  break
+            }
+            
+            if (name)
+              return(scientificname(taxonomy,lca))
+            else
+              return(lca)
+          })
+
+
@@ -0,0 +1,483 @@
+#' @include taxonomy.R
+NULL
+
+#' @export
+setGeneric("rank.list", function(taxonomy) {
+  return(standardGeneric("rank.list"))
+})
+
+#' Returns the list of taxonomic ranks
+#' 
+#' The \code{rank.list} function returns the list of all taxonomic
+#' ranks described in the taxonomy
+#' 
+#' @param   taxonomy a \code{\linkS4class{obitools.taxonomy}} instance
+#' 
+#' @return a vector of type \code{character} containing the taxonomic rank names
+#'  
+#' @examples
+#' # read the taxonomy database
+#' taxo=default.taxonomy()
+#' 
+#' # returns the taxonomic rank for all taxid between 1000 and 1020
+#' rank.list(taxo)
+#' 
+#' @docType methods
+#' @rdname rank.list-methods
+#' @aliases rank.list-methods,obitools.taxonomy
+#' @author Eric Coissac
+#' @keywords taxonomy
+#' 
+setMethod("rank.list", "obitools.taxonomy",
+          function(taxonomy) {
+            return(.Call('R_rank_list',
+                         taxonomy,
+                         PACKAGE="ROBITaxonomy"))
+          })
+
+
+#' @export
+setGeneric("taxonomicrank", function(taxonomy,taxid) {
+  return(standardGeneric("taxonomicrank"))
+})
+
+#' Returns the taxonomic rank associated to a taxid
+#' 
+#' @param   taxonomy a \code{\linkS4class{obitools.taxonomy}} instance
+#' @param   taxid a vector of taxid to analyse
+#'
+#' @return a vector of type \code{character} containing the taxonomic ranks
+#'  
+#' @examples
+#' # read the taxonomy database
+#' taxo=default.taxonomy()
+#' 
+#' # returns the taxonomic rank for all taxid between 1000 and 1020
+#' taxonomicrank(taxo,1000:1020)
+#' 
+#' @docType methods
+#' @rdname taxonomicrank-methods
+#' @aliases taxonomicrank-methods,obitools.taxonomy
+#' @author Eric Coissac
+#' @keywords taxonomy
+#' 
+setMethod("taxonomicrank", "obitools.taxonomy",function(taxonomy,taxid) {  
+  taxid = as.integer(taxid)
+  return(.Call('R_get_rank',
+               taxonomy,
+               taxid,
+               PACKAGE="ROBITaxonomy"))
+})
+
+#' @export
+setGeneric("taxonatrank", function(taxonomy,taxid,rank,name=FALSE) {
+  return(standardGeneric("taxonatrank"))
+})
+
+#' Extracts the taxid at a specified taxonomic rank.
+#' 
+#' The \code{taxonatrank} method of \code{\linkS4class{obitools.taxonomy}} class
+#' returns the \emph{taxid} or the scientific name corresponding 
+#' to a \emph{taxid}.at a specified taxonomic rank
+#' 
+#' @param   taxonomy a \code{\linkS4class{obitools.taxonomy}} instance
+#' @param   taxid a vector of taxid to analyse
+#' @param   rank a \code{character} indicating the desired rank
+#' @param   name  A logical value \code{TRUE} or \code{FALSE} indicating 
+#'          if the method return a taxid or a scientific name.
+#'          
+#' @return  \describe{
+#'   \item{If \code{name==FALSE}}{the taxid of the corresponding 
+#'                                taxon as an integer or a vector of integers 
+#'                                if the \code{taxid} argument is itself 
+#'                                a vector}
+#'   \item{If \code{name==TRUE}}{the scientific name of the corresponding 
+#'                               taxon as a string or a vector of string 
+#'                               if the \code{taxid} argument is itself 
+#'                               a vector}
+#'  }
+#'  
+#' @examples
+#' # read the taxonomy database
+#' taxo=default.taxonomy()
+#' 
+#' # build a vector of 6 taxids corresponding to species
+#' sp.taxid=c(7000,7004,7007,7009,7010,7011)
+#' 
+#' # look for the subfamily taxids
+#' taxonatrank(taxo,sp.taxid,"subfamily")
+#' 
+#' # same thing but returns results as a vector of scientific names
+#' taxonatrank(taxo,sp.taxid,"subfamily",TRUE)
+#' 
+#' @seealso class \code{\linkS4class{obitools.taxonomy}},
+#' and methods \code{\link{species}},\code{\link{genus}},
+#' \code{\link{family}},\code{\link{kingdom}},
+#' \code{\link{superkingdom}}
+#' 
+#' @docType methods
+#' @rdname taxonatrank-methods
+#' @aliases taxonatrank-methods,obitools.taxonomy
+#' @author Eric Coissac
+#' @keywords taxonomy
+#' 
+setMethod("taxonatrank", "obitools.taxonomy",function(taxonomy,taxid,rank,name=FALSE) {
+  getsp = function(t)   {	
+    if (is.na(t[1]) | is.na(t[2]))
+      return(NA)
+    else
+      return(.Call('R_findtaxonatrank',taxonomy,
+                   as.integer(t[1]),
+                   t[2],
+                   name,
+                   PACKAGE="ROBITaxonomy"))
+  }
+  
+  rank  = as.character(rank)
+  name  = as.logical(name[1])
+  
+  apply(data.frame(taxid,rank),1,getsp)
+})
+
+
+#' @export
+setGeneric("species", function(taxonomy,taxid,name=FALSE) {
+  return(standardGeneric("species"))
+})
+
+#' Extracts the species corresponding to a taxid
+#' 
+#' The \code{species} method of \code{\linkS4class{obitools.taxonomy}} class
+#' returns the \emph{taxid} or the scientific name of the species corresponding 
+#' to a \emph{taxid}.
+#' 
+#' @param   taxonomy a \code{\linkS4class{obitools.taxonomy}} instance
+#' @param   taxid a vector of taxid to analyse
+#' @param   name  A logical value \code{TRUE} or \code{FALSE} indicating 
+#'          if the method return a taxid or a scientific name.
+#'          
+#' @return  \describe{
+#'   \item{If \code{name==FALSE}}{the taxid of the corresponding 
+#'                                taxon as an integer or a vector of integers 
+#'                                if the \code{taxid} argument is itself 
+#'                                a vector}
+#'   \item{If \code{name==TRUE}}{the scientific name of the corresponding 
+#'                               taxon as a string or a vector of string 
+#'                               if the \code{taxid} argument is itself 
+#'                               a vector}
+#'  }
+#'  
+#' @examples
+#' # read the taxonomy database
+#' taxo=default.taxonomy()
+#' 
+#' # build a vector of 6 taxids corresponding to species
+#' sp.taxid=c(7000,7004,7007,7009,7010,7011)
+#' 
+#' # look for the species taxids
+#' species(taxo,sp.taxid)
+#' 
+#' # same thing but returns results as a vector of scientific names
+#' species(taxo,sp.taxid,TRUE)
+#' 
+#' @seealso class \code{\linkS4class{obitools.taxonomy}},
+#' and methods \code{\link{taxonatrank}},\code{\link{genus}},
+#' \code{\link{family}},\code{\link{kingdom}},
+#' \code{\link{superkingdom}}
+#' 
+#' @docType methods
+#' @rdname species-methods
+#' @aliases species-methods,obitools.taxonomy
+#' @author Eric Coissac
+#' @keywords taxonomy
+#' 
+setMethod("species", "obitools.taxonomy",function(taxonomy,taxid,name=FALSE) {
+  getsp = function(t)	 {	
+    if (is.na(t))
+      return(NA)
+    else
+      return(.Call('R_get_species',
+                   taxonomy,
+                   t,
+                   name,
+                   PACKAGE="ROBITaxonomy"))
+  }
+  
+  taxid = as.integer(taxid)
+  name  = as.logical(name[1])
+  sapply(taxid,getsp)
+})
+
+#' @export
+setGeneric("genus", function(taxonomy,taxid,name=FALSE) {
+  return(standardGeneric("genus"))
+})
+
+#' Extracts the genus corresponding to a taxid
+#' 
+#' The \code{genus} method of \code{\linkS4class{obitools.taxonomy}} class
+#' returns the \emph{taxid} or the scientific name of the genus corresponding 
+#' to a \emph{taxid}.
+#' 
+#' @param   taxonomy a \code{\linkS4class{obitools.taxonomy}} instance
+#' @param   taxid a vector of taxid to analyse
+#' @param   name  A logical value \code{TRUE} or \code{FALSE} indicating 
+#'          if the method return a taxid or a scientific name.
+#'          
+#' @return  \describe{
+#'   \item{If \code{name==FALSE}}{the taxid of the corresponding 
+#'                                taxon as an integer or a vector of integers 
+#'                                if the \code{taxid} argument is itself 
+#'                                a vector}
+#'   \item{If \code{name==TRUE}}{the scientific name of the corresponding 
+#'                               taxon as a string or a vector of string 
+#'                               if the \code{taxid} argument is itself 
+#'                               a vector}
+#'  }
+#'  
+#' @examples
+#' # read the taxonomy database
+#' taxo=default.taxonomy()
+#' 
+#' # build a vector of 6 taxids corresponding to species
+#' sp.taxid=c(7000,7004,7007,7009,7010,7011)
+#' 
+#' # look for the genus taxids
+#' genus(taxo,sp.taxid)
+#' 
+#' # same thing but returns results as a vector of scientific names
+#' genus(taxo,sp.taxid,TRUE)
+#' 
+#' @seealso class \code{\linkS4class{obitools.taxonomy}},
+#' and methods \code{\link{species}},\code{\link{taxonatrank}},
+#' \code{\link{family}},\code{\link{kingdom}},
+#' \code{\link{superkingdom}}
+#' 
+#' @docType methods
+#' @rdname genus-methods
+#' @aliases genus-methods,obitools.taxonomy
+#' @author Eric Coissac
+#' @keywords taxonomy
+#' 
+setMethod("genus", "obitools.taxonomy",function(taxonomy,taxid,name=FALSE) {
+  getsp = function(t)	 {	
+    if (is.na(t))
+      return(NA)
+    else
+      return(.Call('R_get_genus',
+                   taxonomy,
+                   t,
+                   name,
+                   PACKAGE="ROBITaxonomy"))
+  }
+  
+  taxid = as.integer(taxid)
+  name  = as.logical(name[1])
+  sapply(taxid,getsp)
+})
+
+#' @export
+setGeneric("family", function(taxonomy,taxid,name=FALSE) {
+  return(standardGeneric("family"))
+})
+
+#' Extracts the family corresponding to a taxid
+#' 
+#' The \code{family} method of \code{\linkS4class{obitools.taxonomy}} class
+#' returns the \emph{taxid} or the scientific name of the family corresponding 
+#' to a \emph{taxid}.
+#' 
+#' @param   taxonomy a \code{\linkS4class{obitools.taxonomy}} instance
+#' @param   taxid a vector of taxid to analyse
+#' @param   name  A logical value \code{TRUE} or \code{FALSE} indicating 
+#'          if the method return a taxid or a scientific name.
+#'          
+#' @return  \describe{
+#'   \item{If \code{name==FALSE}}{the taxid of the corresponding 
+#'                                taxon as an integer or a vector of integers 
+#'                                if the \code{taxid} argument is itself 
+#'                                a vector}
+#'   \item{If \code{name==TRUE}}{the scientific name of the corresponding 
+#'                               taxon as a string or a vector of string 
+#'                               if the \code{taxid} argument is itself 
+#'                               a vector}
+#'  }
+#'  
+#' @examples
+#' # read the taxonomy database
+#' taxo=default.taxonomy()
+#' 
+#' # build a vector of 6 taxids corresponding to species
+#' sp.taxid=c(7000,7004,7007,7009,7010,7011)
+#' 
+#' # look for the family taxids
+#' family(taxo,sp.taxid)
+#' 
+#' # same thing but returns results as a vector of scientific names
+#' family(taxo,sp.taxid,TRUE)
+#' 
+#' @seealso class \code{\linkS4class{obitools.taxonomy}},
+#' and methods \code{\link{species}},\code{\link{genus}},
+#' \code{\link{taxonatrank}},\code{\link{kingdom}},
+#' \code{\link{superkingdom}}
+#' 
+#' @docType methods
+#' @rdname family-methods
+#' @aliases family-methods,obitools.taxonomy
+#' @author Eric Coissac
+#' @keywords taxonomy
+#' 
+setMethod("family", "obitools.taxonomy",function(taxonomy,taxid,name=FALSE) {
+  getsp = function(t)	 {	
+    if (is.na(t))
+      return(NA)
+    else
+      return(.Call('R_get_family',
+                   taxonomy,
+                   t,
+                   name,
+                   PACKAGE="ROBITaxonomy"))
+  }
+  
+  taxid = as.integer(taxid)
+  name  = as.logical(name[1])
+  sapply(taxid,getsp)
+})
+
+#' @export
+setGeneric("kingdom", function(taxonomy,taxid,name=FALSE) {
+  return(standardGeneric("kingdom"))
+})
+
+#' Extracts the kingdom corresponding to a taxid
+#' 
+#' The \code{kingdom} method of \code{\linkS4class{obitools.taxonomy}} class
+#' returns the \emph{taxid} or the scientific name of the kingdom corresponding 
+#' to a \emph{taxid}.
+#' 
+#' @param   taxonomy a \code{\linkS4class{obitools.taxonomy}} instance
+#' @param   taxid a vector of taxid to analyse
+#' @param   name  A logical value \code{TRUE} or \code{FALSE} indicating 
+#'          if the method return a taxid or a scientific name.
+#'          
+#' @return  \describe{
+#'   \item{If \code{name==FALSE}}{the taxid of the corresponding 
+#'                                taxon as an integer or a vector of integers 
+#'                                if the \code{taxid} argument is itself 
+#'                                a vector}
+#'   \item{If \code{name==TRUE}}{the scientific name of the corresponding 
+#'                               taxon as a string or a vector of string 
+#'                               if the \code{taxid} argument is itself 
+#'                               a vector}
+#'  }
+#'  
+#' @examples
+#' # read the taxonomy database
+#' taxo=default.taxonomy()
+#' 
+#' # build a vector of 6 taxids corresponding to species
+#' sp.taxid=c(7000,7004,7007,7009,7010,7011)
+#' 
+#' # look for the kingdom taxids
+#' kingdom(taxo,sp.taxid)
+#' 
+#' # same thing but returns results as a vector of scientific names
+#' kingdom(taxo,sp.taxid,TRUE)
+#' 
+#' @seealso class \code{\linkS4class{obitools.taxonomy}},
+#' and methods \code{\link{species}},\code{\link{genus}},
+#' \code{\link{family}},\code{\link{taxonatrank}},
+#' \code{\link{superkingdom}}
+#' 
+#' @docType methods
+#' @rdname kingdom-methods
+#' @aliases kingdom-methods,obitools.taxonomy
+#' @author Eric Coissac
+#' @keywords taxonomy
+#' 
+setMethod("kingdom", "obitools.taxonomy",function(taxonomy,taxid,name=FALSE) {
+  getsp = function(t)	 {	
+    if (is.na(t))
+      return(NA)
+    else
+      return(.Call('R_get_kingdom',
+                   taxonomy,
+                   t,
+                   name,
+                   PACKAGE="ROBITaxonomy"))
+  }
+  
+  taxid = as.integer(taxid)
+  name  = as.logical(name[1])
+  sapply(taxid,getsp)
+})
+
+#' @export
+setGeneric("superkingdom", function(taxonomy,taxid,name=FALSE) {
+  return(standardGeneric("superkingdom"))
+})
+
+#' Extracts the superkingdom corresponding to a taxid
+#' 
+#' The \code{superkingdom} method of \code{\linkS4class{obitools.taxonomy}} class
+#' returns the \emph{taxid} or the scientific name of the superkingdom corresponding 
+#' to a \emph{taxid}.
+#' 
+#' @param   taxonomy a \code{\linkS4class{obitools.taxonomy}} instance
+#' @param   taxid a vector of taxid to analyse
+#' @param   name  A logical value \code{TRUE} or \code{FALSE} indicating 
+#'          if the method return a taxid or a scientific name.
+#'          
+#' @return  \describe{
+#'   \item{If \code{name==FALSE}}{the taxid of the corresponding 
+#'                                taxon as an integer or a vector of integers 
+#'                                if the \code{taxid} argument is itself 
+#'                                a vector}
+#'   \item{If \code{name==TRUE}}{the scientific name of the corresponding 
+#'                               taxon as a string or a vector of string 
+#'                               if the \code{taxid} argument is itself 
+#'                               a vector}
+#'  }
+#'  
+#' @examples
+#' # read the taxonomy database
+#' taxo=default.taxonomy()
+#' 
+#' # build a vector of 6 taxids corresponding to species
+#' sp.taxid=c(7000,7004,7007,7009,7010,7011)
+#' 
+#' # look for the superkingdom taxids
+#' superkingdom(taxo,sp.taxid)
+#' 
+#' # same thing but returns results as a vector of scientific names
+#' superkingdom(taxo,sp.taxid,TRUE)
+#' 
+#' @seealso class \code{\linkS4class{obitools.taxonomy}},
+#' and methods \code{\link{species}},\code{\link{genus}},
+#' \code{\link{family}},\code{\link{kingdom}},
+#' \code{\link{taxonatrank}}
+#' 
+#' @docType methods
+#' @rdname superkingdom-methods
+#' @aliases superkingdom-methods,obitools.taxonomy
+#' @author Eric Coissac
+#' @keywords taxonomy
+#' 
+setMethod("superkingdom", "obitools.taxonomy",function(taxonomy,taxid,name=FALSE) {
+  getsp = function(t)	 {	
+    if (is.na(t))
+      return(NA)
+    else
+      return(.Call('R_get_superkingdom',
+                   taxonomy,
+                   t,
+                   name,
+                   PACKAGE="ROBITaxonomy"))
+  }
+  
+  taxid = as.integer(taxid)
+  name  = as.logical(name[1])
+  sapply(taxid,getsp)
+})
+
+
@@ -0,0 +1,216 @@
+#' @include ROBITaxonomy.R
+#' @useDynLib ROBITaxonomy
+NULL
+
+#' Gives access to a taxonomy preformated by OBITools
+#' 
+#' A S4 class describing a taxonomy. It allows access to 
+#' taxonomy formated for OBITools.
+#'
+#' @references \describe{
+#'  \item{NCBI Taxonomy : }{\url{http://www.ncbi.nlm.nih.gov/taxonomy}}
+#'  \item{OBITools : }{\url{http://metabarcoding/obitools/doc}}
+#'  }
+#'  
+#' @seealso \code{\link{read.taxonomy}}
+#'  
+#' @name obitools.taxonomy
+#' @rdname obitools-taxonomy-class
+#' @keywords taxonomy
+#' @author Eric Coissac
+#' @exportClass obitools.taxonomy
+#' 
+setClass("obitools.taxonomy",
+         
+         
+         #
+         # Attribute declaration
+         #
+         
+         # data.frame containing the counts of reads per samples
+         #      1 samples per line
+         #      1 sequence per column
+         
+         representation(
+           
+           # An external pointer structure to 
+           # the C taxonomy structure
+           
+           pointer    = "externalptr",
+           
+           # the name of the database on the hard disk
+           
+           dbname     = 'character',
+           
+           # the working directory when the taxonomy
+           # object is created. 
+           # This inforation combined with bname allows
+           # to reload taxonomy from disk
+           
+           workingdir = 'character',
+           
+           # Indicate if the taxonomy is saved in a file
+           # Taxonomy created in R or modified in R are
+           # not saved ==> This have to be take into 
+           # consideration but how ???
+           saved = 'logical'
+         ),
+         
+         #
+         # Check object structure 
+         #
+         
+         validity = function(object) { 			
+           return(TRUE)
+         }
+)
+
+
+
+#' obitools.taxonomy constructor
+#' 
+#' --> this constructor have not to be called directly
+#'     use the read.obitools.taxonomy function to
+#'     create a new instance of taxonomy
+#'
+#' @docType methods
+#' @rdname initialize-methods-obitools.taxonomy
+#' @aliases initialize-methods,obitools.taxonomy
+setMethod("initialize",
+          "obitools.taxonomy",
+          function(.Object, pointer,dbname,workingdir,saved) {
+            .Object@pointer    <- pointer
+            .Object@dbname     <- dbname
+            .Object@workingdir <- workingdir
+            .Object@saved      <- saved
+            
+            validObject(.Object) ## valide l'objet
+            return(.Object)
+          })
+
+
+#' @exportClass obitools.taxonomyOrNULL
+setClassUnion("obitools.taxonomyOrNULL",c("obitools.taxonomy","NULL"))
+
+
+#' @export
+setGeneric("path", function(taxonomy,taxid,name=FALSE) {
+  return(standardGeneric("path"))
+})
+
+
+
+setMethod("path", "obitools.taxonomy",function(taxonomy,taxid,name=FALSE) {
+  getp = function(t)	 {	
+    if (is.na(t))
+      return(NA)
+    else
+    {
+      path=c()
+      
+      t=.Call('R_validate_taxid',
+              taxonomy,
+              as.integer(t),
+              PACKAGE="ROBITaxonomy")
+      
+      if (is.na(t))
+        return(NA)
+      
+      repeat {
+        if (name)
+          path = c(scientificname(taxonomy,t),path)
+        else
+          path = c(t,path)
+        
+        t = .Call('R_get_parent',
+                  taxonomy,
+                  t,
+                  FALSE,
+                  PACKAGE="ROBITaxonomy")
+        if (is.na(t))
+          break
+      }
+      
+      return(path)
+    }
+  }
+  
+  taxid=as.integer(taxid)
+  name=as.logical(name)
+  
+  p = lapply(taxid,getp)
+  d = dim(p)
+  
+  if (!is.null(d))
+    if (d[2]==1)
+      p = as.vector(p)
+  
+  return(p)
+})
+
+
+#' @export
+setGeneric("is.subcladeof", function(taxonomy,taxid,parent) {
+  return(standardGeneric("is.subcladeof"))
+})
+
+setMethod("is.subcladeof", "obitools.taxonomy",function(taxonomy,taxid,parent) {
+  taxid = as.integer(taxid)
+  parent= as.integer(parent)
+  return(.Call('R_is_under_taxon',
+               taxonomy,
+               taxid,
+               parent,
+               PACKAGE="ROBITaxonomy"))
+})
+
+
+
+build.taxonomy = function(pointer,dbname,workingdir,saved) {
+  rd <- new('obitools.taxonomy',
+            pointer=pointer,
+            dbname=dbname,
+            workingdir=workingdir,
+            saved=saved
+  )
+  return(rd)
+}
+
+
+#' Reads a taxonomy
+#' 
+#' \code{read.taxonomy} reads a taxonomy formated by OBITools.
+#' NCBI taxonomy can be download from the NCBI FTP site in taxdump format.
+#' The taxdump must be formated using the obitaxonomy command from OBITools
+#' before being used in R. A OBITools formated taxonomy is composed of 3 files 
+#' with the same prefix name and suffixes .tdx, .rdx, .ndx, two extra files 
+#' suffixed .adx and .ldx can also be present. 
+#' 
+#' @param  dbname A character string containing the file name of the database
+#' 
+#' @return an instance of the class \code{\linkS4class{obitools.taxonomy}}
+#' 
+#' @examples
+#' 
+#' \dontshow{# switch the working directory to the data package directory}
+#' \dontshow{setwd(system.file("extdata", package="ROBITaxonomy"))}
+#'
+#' # read the taxonomy ncbi
+#' ncbi = read.taxonomy("ncbitaxo")
+#' 
+#' # and use it for requesting a scientific name
+#' scientificname(ncbi,7742)
+#' 
+#' @seealso \code{\linkS4class{obitools.taxonomy}}
+#' 
+#' @author Eric Coissac
+#' @keywords taxonomy
+#' @export
+
+read.taxonomy = function(dbname) {
+  t <- .Call('R_read_taxonomy',dbname,TRUE,PACKAGE="ROBITaxonomy")	
+  
+  return(build.taxonomy(t,dbname,getwd(),TRUE))
+}
+
+
@@ -0,0 +1,20 @@
+Version: 1.0
+
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: knitr
+LaTeX: pdfLaTeX
+
+AutoAppendNewline: Yes
+
+BuildType: Package
+PackageUseDevtools: Yes
+PackageInstallArgs: --no-multiarch --with-keep.source
+PackageRoxygenize: rd,collate,namespace
@@ -0,0 +1,24 @@
+#include "ecoPCR.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+/*
+ * print the message given as argument and exit the program
+ * @param error		error number	
+ * @param message 	the text explaining what's going on
+ * @param filename	the file source where the program failed
+ * @param linenumber	the line where it has failed
+ * filename and linenumber are written at pre-processing 
+ * time by a macro
+ */
+void ecoError(int32_t errorcode,
+              const char* message,
+              const char * filename,
+              int linenumber)
+{
+  error("Error %d in file %s line %d : %s",
+                 errorcode,
+	               filename,
+	               linenumber,
+	               message);
+}
@@ -0,0 +1,122 @@
+#include "ecoPCR.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+#define SWAPINT32(x)     ((((x) << 24) & 0xFF000000) | (((x) <<  8) & 0xFF0000) | \
+                         (((x) >>  8) & 0xFF00)     | (((x) >> 24) & 0xFF))
+
+
+int32_t is_big_endian()
+{
+	int32_t i=1;
+	
+	return (int32_t)((char*)&i)[0];
+}
+
+
+
+
+int32_t swap_int32_t(int32_t i)
+{
+	return SWAPINT32(i);
+}
+
+
+/**
+ * Read part of the file
+ * @param	*f	the database
+ * @param	recordSize the size to be read
+ * 
+ * @return	buffer
+ */
+void *read_ecorecord(FILE *f,int32_t *recordSize)
+{
+	static void *buffer    =NULL;
+	int32_t      buffersize=0;
+	int32_t      read;
+	
+	if (!recordSize)
+		ECOERROR(ECO_ASSERT_ERROR,
+		         "recordSize cannot be NULL");
+		
+	read = fread(recordSize,
+	      		 1,
+	      		 sizeof(int32_t),
+	             f);
+	             
+	if (feof(f))
+		return NULL;
+	             
+	if (read != sizeof(int32_t))
+		ECOERROR(ECO_IO_ERROR,"Reading record size error");
+		
+	if (is_big_endian())
+		*recordSize=swap_int32_t(*recordSize);
+		
+	if (buffersize < *recordSize)
+	{
+		if (buffer)
+			buffer = ECOREALLOC(buffer,*recordSize,
+			                    "Increase size of record buffer");
+		else
+			buffer = ECOMALLOC(*recordSize,
+			                    "Allocate record buffer");
+	}
+	
+	read = fread(buffer,
+	             1,
+				 *recordSize,
+				 f);
+				 
+	if (read != *recordSize)
+		ECOERROR(ECO_IO_ERROR,"Reading record data error");
+		
+	return buffer;	 
+}
+
+
+
+
+
+/**
+ * Open the database and check it's readable
+ * @param 	filename 		name of the database (.sdx, .rdx, .tbx)
+ * @param 	sequencecount	buffer - pointer to variable storing the number of occurence    
+ * @param 	abort_on_open_error		 	boolean to define the behaviour in case of error 
+ * 										while opening the database
+ * @return 	FILE type
+ **/
+FILE *open_ecorecorddb(const char *filename,
+                       int32_t    *sequencecount,
+                       int32_t    abort_on_open_error)
+{
+    FILE        *f;
+	int32_t      read;
+	
+	f = fopen(filename,"rb");
+	
+	if (!f)
+		{
+			if (abort_on_open_error)
+		 		ECOERROR(ECO_IO_ERROR,"Cannot open file");
+		 	else
+		 	{
+		 		*sequencecount=0;
+		 		return NULL;
+		 	}
+		}
+		
+	read = fread(sequencecount,
+	      		 1,
+	      		 sizeof(int32_t),
+	      		 f);
+	             
+	if (read != sizeof(int32_t))
+		ECOERROR(ECO_IO_ERROR,"Reading record size error");
+
+	if (is_big_endian())
+		*sequencecount=swap_int32_t(*sequencecount);
+		
+	return f;                  
+}
+
@@ -0,0 +1,76 @@
+#include "ecoPCR.h"
+#include <stdlib.h>
+
+static int eco_log_malloc = 0;
+
+void    eco_trace_memory_allocation()
+{
+	eco_log_malloc=1;
+}
+
+void    eco_untrace_memory_allocation()
+{
+	eco_log_malloc=0;
+}
+
+
+void   *eco_malloc(int32_t chunksize,
+                   const char *error_message,
+                   const char *filename,
+                   int32_t    line)
+{
+	void * chunk;
+	
+	chunk = calloc(1,chunksize);
+	
+	if (!chunk)
+		ecoError(ECO_MEM_ERROR,error_message,filename,line);
+		
+	if (eco_log_malloc)
+		REprintf("Memory segment located at %p of size %d is allocated (file : %s [%d])",
+			       chunk,
+			       chunksize,
+			       filename,
+			       line);
+		
+	return chunk;
+}
+
+void   *eco_realloc(void *chunk,
+                    int32_t newsize,
+                    const char *error_message,
+                    const char *filename,
+                    int32_t    line)
+{
+	void *newchunk;
+	
+	newchunk = realloc(chunk,newsize);
+	
+	if (!newchunk)
+		ecoError(ECO_MEM_ERROR,error_message,filename,line);
+
+	if (eco_log_malloc)
+		REprintf("Old memory segment %p is reallocated at %p with a size of %d (file : %s [%d])",
+			        chunk,
+			        newchunk,
+			        newsize,
+			        filename,
+			        line);
+		
+	return newchunk;	
+}
+
+void    eco_free(void *chunk,
+                 const char *error_message,
+                 const char *filename,
+                 int32_t    line)
+{
+	free(chunk);
+	
+	if (eco_log_malloc)
+		REprintf("Memory segment %p is released => %s (file : %s [%d])",
+			       chunk,
+			       error_message,
+			       filename,
+			       line);
+}
@@ -0,0 +1,285 @@
+#ifndef ECOPCR_H_
+#define ECOPCR_H_
+
+#include <stdio.h>
+#include <inttypes.h>
+
+#include <R.h>
+#include <Rinternals.h>
+#include <Rdefines.h>
+
+
+//#ifndef H_apat
+//#include "../libapat/apat.h"
+//#endif
+
+/*****************************************************
+ * 
+ *  Data type declarations
+ * 
+ *****************************************************/
+
+/*
+ * 
+ *  Sequence types
+ * 
+ */
+
+typedef struct {
+	
+	int32_t  taxid;
+	char     AC[20];
+	int32_t  DE_length;
+	int32_t  SQ_length;
+	int32_t  CSQ_length;
+	
+	char     data[1];
+	
+} ecoseqformat_t;
+
+typedef struct {
+	int32_t taxid;
+	int32_t SQ_length;
+	char    *AC;
+	char    *DE;
+	char    *SQ;
+} ecoseq_t;
+
+/*
+ * 
+ * Taxonomy taxon types
+ * 
+ */
+
+
+typedef struct {
+	int32_t  taxid;
+	int32_t  rank;
+	int32_t	 parent;
+	int32_t  namelength;
+	char     name[1];
+	
+} ecotxformat_t;
+
+typedef struct ecotxnode {
+	int32_t           taxid;
+	int32_t           rank;
+	int32_t  		      farest;
+	struct ecotxnode  *parent;
+	char              *name;
+} ecotx_t;
+
+typedef struct {
+	int32_t count;
+	int32_t maxtaxid;
+  int32_t buffersize;
+	ecotx_t taxon[1];
+} ecotxidx_t;
+ 
+	
+/*
+ * 
+ * Taxonomy rank types
+ * 
+ */
+	
+typedef struct {
+	int32_t count;
+	char*   label[1];
+} ecorankidx_t;
+
+/*
+ * 
+ * Taxonomy name types
+ * 
+ */
+
+typedef struct {
+ 	int32_t is_scientificname;
+	int32_t  namelength;
+	int32_t  classlength;
+	int32_t  taxid;
+	char     names[1];	
+} econameformat_t;
+ 
+ 
+ typedef struct {
+ 	char 	*name;
+ 	char 	*classname;
+ 	int32_t is_scientificname;
+ 	struct ecotxnode  *taxon;
+} econame_t;
+
+ 
+typedef struct {
+	int32_t count;
+	econame_t   names[1];
+} econameidx_t;
+
+
+ typedef struct {
+	ecorankidx_t *ranks;
+	econameidx_t *names;
+	ecotxidx_t   *taxons;
+} ecotaxonomy_t;
+
+ 
+/*****************************************************
+ * 
+ *  Function declarations
+ * 
+ *****************************************************/
+
+/*
+ * 
+ * Low level system functions
+ * 
+ */
+
+int32_t is_big_endian();
+int32_t swap_int32_t(int32_t);
+
+void   *eco_malloc(int32_t chunksize,
+                   const char *error_message,
+                   const char *filename,
+                   int32_t    line);
+                   
+                   
+void   *eco_realloc(void *chunk,
+                    int32_t chunksize,
+                    const char *error_message,
+                    const char *filename,
+                    int32_t    line);
+                    
+void    eco_free(void *chunk,
+                 const char *error_message,
+                 const char *filename,
+                 int32_t    line);
+                 
+void    eco_trace_memory_allocation();
+void    eco_untrace_memory_allocation();
+
+#define ECOMALLOC(size,error_message) \
+	    eco_malloc((size),(error_message),__FILE__,__LINE__)
+	   
+#define ECOREALLOC(chunk,size,error_message) \
+        eco_realloc((chunk),(size),(error_message),__FILE__,__LINE__)
+        
+#define ECOFREE(chunk,error_message) \
+        eco_free((chunk),(error_message),__FILE__,__LINE__)
+        
+        
+
+
+/*
+ * 
+ * Error managment
+ * 
+ */
+ 
+  
+void ecoError(int32_t,const char*,const char *,int);
+
+#define ECOERROR(code,message) ecoError((code),(message),__FILE__,__LINE__)
+
+#define ECO_IO_ERROR       (1)
+#define ECO_MEM_ERROR      (2)
+#define ECO_ASSERT_ERROR   (3)
+#define ECO_NOTFOUND_ERROR (4)
+
+
+/*
+ * 
+ * Low level Disk access functions
+ * 
+ */
+
+FILE *open_ecorecorddb(const char *filename,
+                       int32_t    *sequencecount,
+                       int32_t    abort_on_open_error);
+                       
+void *read_ecorecord(FILE *,int32_t *recordSize);
+
+
+
+/* 
+ *   Read function in internal binary format
+ */
+
+FILE             *open_ecoseqdb(const char *filename,
+                                int32_t    *sequencecount);
+                                                                
+ecoseq_t         *readnext_ecoseq(FILE *);
+
+ecorankidx_t     *read_rankidx(const char *filename);
+
+econameidx_t     *read_nameidx(const char *filename,ecotaxonomy_t *taxonomy);
+
+
+
+	/**
+	 * Read taxonomy data as formated by the ecoPCRFormat.py script.
+	 * 
+	 * This function is normaly uses internaly by the read_taxonomy
+	 * function and should not be called directly.
+	 * 
+	 * @arg filename  path to the *.tdx file of the reformated db
+	 * 
+	 * @return pointer to a taxonomy index structure
+	 */
+ 
+ecotxidx_t       *read_taxonomyidx(const char *filename,const char *filename2);
+
+ecotaxonomy_t    *read_taxonomy(const char *prefix,int32_t readAlternativeName);
+
+ecotx_t *eco_findtaxonatrank(ecotx_t *taxon, int32_t rankidx);
+
+ecotx_t *eco_findtaxonbytaxid(ecotaxonomy_t *taxonomy, int32_t taxid);
+
+int eco_isundertaxon(ecotx_t *taxon, int other_taxid);
+
+ecoseq_t *ecoseq_iterator(const char *prefix);
+
+
+
+ecoseq_t *new_ecoseq();
+int32_t   delete_ecoseq(ecoseq_t *);
+ecoseq_t *new_ecoseq_with_data( char *AC,
+								char *DE,
+								char *SQ,
+								int32_t   taxid
+								);
+
+
+int32_t delete_taxon(ecotx_t *taxon);
+int32_t delete_rankidx(ecorankidx_t *rankidx);
+int32_t delete_nameidx(econameidx_t *nameidx);
+int32_t delete_taxonomy(ecotxidx_t *index);
+int32_t delete_ecotaxonomy(ecotaxonomy_t *taxonomy);
+
+
+int32_t rank_index(const char* label,ecorankidx_t* ranks);
+
+//int32_t  delete_apatseq(SeqPtr pseq);
+//PatternPtr buildPattern(const char *pat, int32_t error_max);
+//PatternPtr complementPattern(PatternPtr pat);
+//
+//SeqPtr ecoseq2apatseq(ecoseq_t *in,SeqPtr out,int32_t circular);
+
+//char *ecoComplementPattern(char *nucAcSeq);
+//char *ecoComplementSequence(char *nucAcSeq);
+//char *getSubSequence(char* nucAcSeq,int32_t begin,int32_t end);
+
+ecotx_t *eco_getspecies(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
+ecotx_t *eco_getgenus(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
+ecotx_t *eco_getfamily(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
+ecotx_t *eco_getkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
+ecotx_t *eco_getsuperkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
+
+//int eco_is_taxid_ignored(int32_t *ignored_taxid, int32_t tab_len, int32_t taxid);
+//int eco_is_taxid_included(ecotaxonomy_t *taxonomy, int32_t *included_taxid, int32_t tab_len, int32_t taxid);
+
+
+ecotaxonomy_t *getTaxPointer(SEXP Rtaxonomy);
+
+#endif /*ECOPCR_H_*/
@@ -0,0 +1,156 @@
+#include <string.h>
+#include "ecoPCR.h"
+
+/*
+ * @doc: DNA alphabet (IUPAC)
+ */
+#define LX_BIO_DNA_ALPHA   "ABCDEFGHIJKLMNOPQRSTUVWXYZ#![]"
+
+/*
+ * @doc: complementary DNA alphabet (IUPAC)
+ */
+#define LX_BIO_CDNA_ALPHA  "TVGHEFCDIJMLKNOPQYSAABWXRZ#!]["
+
+
+static char sNuc[]     = LX_BIO_DNA_ALPHA;
+static char sAnuc[]    = LX_BIO_CDNA_ALPHA;
+
+static char LXBioBaseComplement(char nucAc);
+static char *LXBioSeqComplement(char *nucAcSeq);
+static char *reverseSequence(char *str,char isPattern);
+
+ 
+/* ---------------------------- */
+
+char LXBioBaseComplement(char nucAc)
+{
+    char *c;
+
+    if ((c = strchr(sNuc, nucAc)))
+        return sAnuc[(c - sNuc)];
+    else
+        return nucAc;
+}
+
+/* ---------------------------- */
+
+char *LXBioSeqComplement(char *nucAcSeq)
+{
+    char *s;
+
+    for (s = nucAcSeq ; *s ; s++)
+        *s = LXBioBaseComplement(*s);
+
+    return nucAcSeq;
+}
+
+
+char *reverseSequence(char *str,char isPattern)
+{
+        char *sb, *se, c;
+
+        if (! str)
+            return str;
+            
+        sb = str;
+        se = str + strlen(str) - 1;
+
+        while(sb <= se) {
+           c    = *sb;
+          *sb++ = *se;
+          *se-- = c;
+        }
+
+		sb = str;
+		se = str + strlen(str) - 1;
+		
+		if (isPattern)
+			for (;sb < se; sb++)
+			{
+				if (*sb=='#')
+				{
+					if (((se - sb) > 2) && (*(sb+2)=='!'))
+					{
+						*sb='!';
+						sb+=2;
+						*sb='#';
+					}
+					else
+					{
+						*sb=*(sb+1);
+						sb++;
+						*sb='#';
+					}
+				}
+				else if (*sb=='!')
+					{
+						*sb=*(sb-1);
+						*(sb-1)='!';
+					}
+			}
+
+        return str;
+}
+
+char *ecoComplementPattern(char *nucAcSeq)
+{
+    return reverseSequence(LXBioSeqComplement(nucAcSeq),1);
+}
+
+char *ecoComplementSequence(char *nucAcSeq)
+{
+    return reverseSequence(LXBioSeqComplement(nucAcSeq),0);
+}
+
+
+char *getSubSequence(char* nucAcSeq,int32_t begin,int32_t end)
+/*
+   extract subsequence from nucAcSeq [begin,end[
+*/
+{
+	static char *buffer  = NULL;
+	static int32_t buffSize= 0;
+	int32_t length;
+	
+	if (begin < end)
+	{
+		length = end - begin;
+		
+		if (length >= buffSize)
+		{
+			buffSize = length+1;
+			if (buffer)
+				buffer=ECOREALLOC(buffer,buffSize,
+						   	      "Error in reallocating sub sequence buffer");
+			else
+				buffer=ECOMALLOC(buffSize,
+				          		 "Error in allocating sub sequence buffer");
+				
+		}
+		
+		strncpy(buffer,nucAcSeq + begin,length);
+		buffer[length]=0;
+	}
+	else
+	{
+		length = end + strlen(nucAcSeq) - begin;
+		
+		if (length >= buffSize)
+		{
+			buffSize = length+1;
+			if (buffer)
+				buffer=ECOREALLOC(buffer,buffSize,
+						   	      "Error in reallocating sub sequence buffer");
+			else
+				buffer=ECOMALLOC(buffSize,
+				          		 "Error in allocating sub sequence buffer");
+				
+		}
+		strncpy(buffer,nucAcSeq+begin,length - end);
+		strncpy(buffer+(length-end),nucAcSeq ,end);
+		buffer[length]=0;
+	}
+	
+	return buffer;
+}
+
@@ -0,0 +1,20 @@
+#include "ecoPCR.h"
+
+int eco_is_taxid_included(	ecotaxonomy_t *taxonomy, 
+							int32_t *restricted_taxid, 
+							int32_t tab_len, 
+							int32_t taxid)
+{
+	int i;
+	ecotx_t *taxon;
+	
+	taxon = eco_findtaxonbytaxid(taxonomy, taxid);
+	
+	if (taxon)
+		for (i=0; i < tab_len; i++)
+			if ( (taxon->taxid == restricted_taxid[i]) ||
+				 (eco_isundertaxon(taxon, restricted_taxid[i])) )
+				return 1;
+	
+	return 0;
+}
@@ -0,0 +1,86 @@
+#include "ecoPCR.h"
+#include <string.h>
+#include <stdlib.h>
+
+static econame_t *readnext_econame(FILE *f,econame_t *name,ecotaxonomy_t *taxonomy);
+
+econameidx_t *read_nameidx(const char *filename,ecotaxonomy_t *taxonomy)
+{
+
+	int32_t      		count;
+	FILE         		*f;
+	econameidx_t		*indexname;
+	int32_t      		i;
+	
+	f = open_ecorecorddb(filename,&count,0);
+	
+	if (f==NULL)
+		return NULL;
+
+	indexname = (econameidx_t*) ECOMALLOC(sizeof(econameidx_t) + sizeof(econame_t) * (count-1),"Allocate names");
+	
+	indexname->count=count;
+	                                    
+	for (i=0; i < count; i++){
+		readnext_econame(f,(indexname->names)+i,taxonomy);
+	}
+
+	return indexname;
+}
+
+int32_t delete_nameidx(econameidx_t *nameidx)
+{
+	size_t i;
+
+	if (nameidx) {
+		for (i=0; i < nameidx->count; i++) {
+			if (nameidx->names[i].name)
+				ECOFREE(nameidx->names[i].name,
+						"Desallocate name");
+			if (nameidx->names[i].classname)
+				ECOFREE(nameidx->names[i].classname,
+						"Desallocate classname");
+		}
+
+		ECOFREE(nameidx,"Desallocate name index");
+
+		return 0;
+	}
+
+	return 1;
+}
+
+econame_t *readnext_econame(FILE *f,econame_t *name,ecotaxonomy_t *taxonomy)
+{
+	
+	econameformat_t *raw;
+	int32_t  rs;
+	
+	raw = read_ecorecord(f,&rs);
+	
+	if (!raw)
+		return NULL;
+
+	if (is_big_endian())
+	{
+		raw->is_scientificname 	= swap_int32_t(raw->is_scientificname);
+		raw->namelength 	    = swap_int32_t(raw->namelength);
+		raw->classlength        = swap_int32_t(raw->classlength);
+		raw->taxid  	        = swap_int32_t(raw->taxid); 
+	}
+	
+	name->is_scientificname=raw->is_scientificname;
+	
+	name->name   	= ECOMALLOC((raw->namelength+1) * sizeof(char),"Allocate name");
+	strncpy(name->name,raw->names,raw->namelength);
+	name->name[raw->namelength]=0;
+	
+	name->classname = ECOMALLOC((raw->classlength+1) * sizeof(char),"Allocate classname");
+	strncpy(name->classname,(raw->names+raw->namelength),raw->classlength);
+	name->classname[raw->classlength]=0;
+	
+	name->taxon = taxonomy->taxons->taxon + raw->taxid;
+
+	return name;
+}
+
@@ -0,0 +1,75 @@
+#include "ecoPCR.h"
+#include <string.h>
+#include <stdlib.h>
+
+static int compareRankLabel(const void *label1, const void *label2);
+
+ecorankidx_t     *read_rankidx(const char *filename)
+{
+	int32_t      count;
+	FILE         *f;
+	ecorankidx_t *index;
+	int32_t      i;
+	int32_t      rs;
+	char         *buffer;
+	
+	f = open_ecorecorddb(filename,&count,0);
+	
+	if (f==NULL)
+		return NULL;
+
+	index = (ecorankidx_t*) ECOMALLOC(sizeof(ecorankidx_t) + sizeof(char*) * (count-1),
+	                                  "Allocate rank index");
+	 
+	index->count=count;                                 
+	          
+	for (i=0; i < count; i++)
+		{
+			buffer = read_ecorecord(f,&rs);
+			index->label[i]=(char*) ECOMALLOC(rs+1,
+			                                  "Allocate rank label");
+			strncpy(index->label[i],buffer,rs);
+		}
+		
+	return index;
+}
+
+int32_t delete_rankidx(ecorankidx_t *rankidx)
+{
+	size_t i;
+
+	if (rankidx) {
+		for (i=0; i < rankidx->count; i++)
+			if (rankidx->label[i])
+				ECOFREE(rankidx->label[i],
+						"Desallocate rank label");
+
+		ECOFREE(rankidx,"Desallocate rank index");
+
+		return 0;
+	}
+
+	return 1;
+}
+
+int32_t rank_index(const char* label,ecorankidx_t* ranks)
+{
+	char **rep;
+	
+	rep = bsearch(label,ranks->label,ranks->count,sizeof(char*),compareRankLabel);
+	
+	if (rep)
+		return rep-ranks->label;
+//	else
+//		ECOERROR(ECO_NOTFOUND_ERROR,"Rank label not found");
+		
+	return -1;
+}
+
+
+int compareRankLabel(const void *label1, const void *label2)
+{
+	return strcmp((const char*)label1,*(const char**)label2);
+}
+
+
@@ -0,0 +1,231 @@
+#include "ecoPCR.h"
+#include <stdlib.h>
+#include <string.h>
+#include <zlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <ctype.h>
+
+static FILE *open_seqfile(const char *prefix,int32_t index);
+
+
+ecoseq_t *new_ecoseq()
+{
+	void *tmp;
+
+	tmp = ECOMALLOC(sizeof(ecoseq_t),"Allocate new ecoseq structure");
+
+	return tmp;
+}
+
+int32_t delete_ecoseq(ecoseq_t * seq)
+{
+
+	if (seq)
+	{
+		if (seq->AC)
+			ECOFREE(seq->AC,"Free sequence AC");
+
+		if (seq->DE)
+			ECOFREE(seq->DE,"Free sequence DE");
+
+		if (seq->SQ)
+			ECOFREE(seq->SQ,"Free sequence SQ");
+
+		ECOFREE(seq,"Free sequence structure");
+
+		return 0;
+
+	}
+
+	return 1;
+}
+
+ecoseq_t *new_ecoseq_with_data( char *AC,
+								char *DE,
+								char *SQ,
+								int32_t   taxid_idx
+								)
+{
+	ecoseq_t *tmp;
+	int32_t lstr;
+	tmp = new_ecoseq();
+
+	tmp->taxid=taxid_idx;
+
+	if (AC)
+		{
+			lstr =strlen(AC);
+			tmp->AC=ECOMALLOC((lstr+1) * sizeof(char),
+			                  "Allocate sequence accession");
+			strcpy(tmp->AC,AC);
+		}
+
+	if (DE)
+		{
+			lstr =strlen(DE);
+			tmp->DE=ECOMALLOC((lstr+1) * sizeof(char),
+			                  "Allocate sequence definition");
+			strcpy(tmp->DE,DE);
+		}
+
+	if (SQ)
+		{
+			lstr =strlen(SQ);
+			tmp->SQ=ECOMALLOC((lstr+1) * sizeof(char),
+			                  "Allocate sequence data");
+			strcpy(tmp->SQ,SQ);
+		}
+	return tmp;
+
+}
+
+/**
+ * ?? used ??
+ **/
+FILE *open_ecoseqdb(const char *filename,
+                    int32_t    *sequencecount)
+{
+	return open_ecorecorddb(filename,sequencecount,1);
+}
+
+ecoseq_t *readnext_ecoseq(FILE *f)
+{
+	char     *compressed=NULL;
+
+	ecoseqformat_t *raw;
+	ecoseq_t *seq;
+	int32_t  comp_status;
+	unsigned long int seqlength;
+	int32_t  rs;
+	char *c;
+	int32_t i;
+
+	raw = read_ecorecord(f,&rs);
+
+	if (!raw)
+		return NULL;
+
+	if (is_big_endian())
+	{
+		raw->CSQ_length = swap_int32_t(raw->CSQ_length);
+		raw->DE_length  = swap_int32_t(raw->DE_length);
+		raw->SQ_length  = swap_int32_t(raw->SQ_length);
+		raw->taxid      = swap_int32_t(raw->taxid);
+	}
+
+	seq = new_ecoseq();
+
+	seq->taxid = raw->taxid;
+
+    seq->AC    = ECOMALLOC(strlen(raw->AC) +1,
+                           "Allocate Sequence Accesion number");
+    strncpy(seq->AC,raw->AC,strlen(raw->AC));
+
+
+    seq->DE    = ECOMALLOC(raw->DE_length+1,
+                           "Allocate Sequence definition");
+    strncpy(seq->DE,raw->data,raw->DE_length);
+
+	seqlength = seq->SQ_length = raw->SQ_length;
+
+    compressed = raw->data + raw->DE_length;
+
+    seq->SQ = ECOMALLOC(seqlength+1,
+                        "Allocate sequence buffer");
+/*
+    comp_status = uncompress((unsigned char*)seq->SQ,
+                             &seqlength,
+                             (unsigned char*)compressed,
+                             raw->CSQ_length);
+*/
+
+    if (comp_status != Z_OK)
+    	ECOERROR(ECO_IO_ERROR,"I cannot uncompress sequence data");
+
+    for (c=seq->SQ,i=0;i<seqlength;c++,i++)
+    	*c=toupper(*c);
+
+
+	return seq;
+}
+
+/**
+ * Open the sequences database (.sdx file)
+ * @param	prefix	name of the database (radical without extension)
+ * @param	index 	integer
+ *
+ * @return	file object
+ */
+FILE *open_seqfile(const char *prefix,int32_t index)
+{
+	char           filename_buffer[1024];
+	int32_t        filename_length;
+	FILE           *input;
+	int32_t        seqcount;
+
+	filename_length = snprintf(filename_buffer,
+								1023,
+	                           "%s_%03d.sdx",
+	                           prefix,
+	                           index);
+
+		//	fprintf(stderr,"# Coucou %s\n",filename_buffer);
+
+
+	if (filename_length >= 1024)
+		ECOERROR(ECO_ASSERT_ERROR,"file name is too long");
+
+	filename_buffer[filename_length]=0;
+
+	input=open_ecorecorddb(filename_buffer,&seqcount,0);
+
+	if (input)
+		REprintf("# Reading file %s containing %d sequences...\n",
+				     filename_buffer,
+				     seqcount);
+
+	return input;
+}
+
+ecoseq_t *ecoseq_iterator(const char *prefix)
+{
+	static FILE    *current_seq_file= NULL;
+	static int32_t current_file_idx = 1;
+	static char    current_prefix[1024];
+	ecoseq_t       *seq;
+
+	if (prefix)
+	{
+		current_file_idx = 1;
+
+		if (current_seq_file)
+			fclose(current_seq_file);
+
+		strncpy(current_prefix,prefix,1023);
+		current_prefix[1023]=0;
+
+		current_seq_file = open_seqfile(current_prefix,
+		 							    current_file_idx);
+
+		if (!current_seq_file)
+			return NULL;
+
+	}
+
+	seq = readnext_ecoseq(current_seq_file);
+
+	if (!seq && feof(current_seq_file))
+	{
+		current_file_idx++;
+		fclose(current_seq_file);
+		current_seq_file = open_seqfile(current_prefix,
+		 							    current_file_idx);
+
+
+		if (current_seq_file)
+			seq = readnext_ecoseq(current_seq_file);
+	}
+
+	return seq;
+}
@@ -0,0 +1,420 @@
+#include "ecoPCR.h"
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include <R.h>
+
+#ifndef MAX
+#define MAX(x,y) (((x)>(y)) ? (x):(y))
+#endif
+
+static ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon);
+
+ /** 
+ * Open the taxonomy database 
+ * @param	pointer to the database (.tdx file) 
+ * @return	a ecotxidx_t structure 
+ */
+ecotxidx_t     *read_taxonomyidx(const char *filename,const char *filename2)
+{
+	int32_t      count;
+	int32_t      count2;
+	FILE         *f;
+	FILE         *f2;
+	ecotxidx_t *index;
+	struct ecotxnode  *t;
+	int32_t      i;
+	int32_t      j;
+	
+	f  = open_ecorecorddb(filename,&count,0);
+
+	if (f==NULL) return NULL;
+
+	f2 = open_ecorecorddb(filename2,&count2,0);
+
+	index = (ecotxidx_t*) ECOMALLOC(sizeof(ecotxidx_t) + sizeof(ecotx_t) * (count+count2-1),
+	                                  "Allocate taxonomy");
+	 
+	index->count=count+count2;
+    index->buffersize = index->count;
+
+	index->maxtaxid=0;
+	REprintf("Readind %d taxa...\n",count);
+	for (i=0; i < count; i++){
+		readnext_ecotaxon(f,&(index->taxon[i]));
+		index->taxon[i].parent=index->taxon + (size_t)index->taxon[i].parent;
+		index->taxon[i].parent->farest=0;
+		if (index->taxon[i].taxid > index->maxtaxid)
+			index->maxtaxid=index->taxon[i].taxid;
+	}					
+
+
+	if (count2>0)
+		REprintf("Readind %d local taxa...\n",count2);
+	else
+		REprintf("No local taxon\n");
+
+	count = index->count;
+
+	for (; i < count; i++){
+		readnext_ecotaxon(f2,&(index->taxon[i]));
+		index->taxon[i].parent=index->taxon + (size_t)index->taxon[i].parent;
+		index->taxon[i].parent->farest=0;
+		if (index->taxon[i].taxid > index->maxtaxid)
+			index->maxtaxid=index->taxon[i].taxid;
+	}
+
+	REprintf("Computing longest branches...\n",count);
+
+	for (i=0; i < count; i++){
+		t=index->taxon+i;
+		if (t->farest==-1)
+		{
+			t->farest=0;
+            while(t->parent != t)
+            {
+            	j = t->farest + 1;
+            	if (j > t->parent->farest)
+            	{
+            		t->parent->farest = j;
+            		t=t->parent;
+            	}
+            	else
+            		t=index->taxon;
+            }
+		}
+	}
+
+	return index;
+}
+
+
+int32_t delete_taxonomy(ecotxidx_t *index)
+{
+	int32_t i;
+	
+	if (index)
+	{
+		for (i=0; i< index->count; i++)
+			if (index->taxon[i].name)
+				ECOFREE(index->taxon[i].name,"Free scientific name");
+
+				
+		ECOFREE(index,"Free Taxonomy");
+		
+		return 0;
+	}
+	
+	return 1;
+}
+
+
+
+int32_t delete_taxon(ecotx_t *taxon)
+{
+	if (taxon)
+	{
+		if (taxon->name)
+			ECOFREE(taxon->name,"Free scientific name");
+			
+		ECOFREE(taxon,"Free Taxon");
+		
+		return 0;
+	}
+		
+	return 1;
+}
+
+
+/**
+ * Read the database for a given taxon a save the data 
+ * into the taxon structure(if any found)
+ * @param	*f	pointer to FILE type returned by fopen
+ * @param	*taxon	pointer to the structure
+ * 
+ * @return	a ecotx_t structure if any taxon found else NULL 
+ */
+ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon)
+{
+	
+	ecotxformat_t *raw;
+	int32_t  rs;
+	
+	raw = read_ecorecord(f,&rs);
+	
+	if (!raw)
+		return NULL;
+
+	if (is_big_endian())
+	{
+		raw->namelength = swap_int32_t(raw->namelength);
+		raw->parent     = swap_int32_t(raw->parent);
+		raw->rank       = swap_int32_t(raw->rank);
+		raw->taxid      = swap_int32_t(raw->taxid); 
+	}
+	
+	taxon->parent = (ecotx_t*)((size_t)raw->parent);
+	taxon->taxid  = raw->taxid;
+	taxon->rank   = raw->rank;
+	taxon->farest = -1;
+	
+	taxon->name   = ECOMALLOC((raw->namelength+1) * sizeof(char),
+	                          "Allocate taxon scientific name");
+	                          
+	strncpy(taxon->name,raw->name,raw->namelength);
+		
+	return taxon;
+}
+
+
+ecotaxonomy_t    *read_taxonomy(const char *prefix,int32_t readAlternativeName)
+{
+	ecotaxonomy_t *tax;
+	char          *filename;
+	char          *filename2;
+	int           buffsize;
+	
+	tax = ECOMALLOC(sizeof(ecotaxonomy_t),
+	                "Allocate taxonomy structure");
+	
+	tax->ranks =NULL;
+	tax->taxons=NULL;
+	tax->names =NULL;
+
+	buffsize = strlen(prefix)+10;
+	
+	filename = ECOMALLOC(buffsize,
+	                     "Allocate filename");
+	filename2= ECOMALLOC(buffsize,
+	                     "Allocate filename");
+	
+	snprintf(filename,buffsize,"%s.rdx",prefix);
+	
+	tax->ranks = read_rankidx(filename);
+
+	if (tax->ranks == NULL)
+	{
+		ECOFREE(filename,"Desallocate filename 1");
+		ECOFREE(filename2,"Desallocate filename 2");
+
+		delete_ecotaxonomy(tax);
+		return NULL;
+	}
+	
+	snprintf(filename,buffsize,"%s.tdx",prefix);
+	snprintf(filename2,buffsize,"%s.ldx",prefix);
+	
+	tax->taxons = read_taxonomyidx(filename,filename2);
+	
+	if (tax->taxons == NULL)
+	{
+		ECOFREE(filename,"Desallocate filename 1");
+		ECOFREE(filename,"Desallocate filename 2");
+
+		delete_ecotaxonomy(tax);
+		return NULL;
+	}
+
+	if (readAlternativeName)
+	{
+  	   snprintf(filename,buffsize,"%s.ndx",prefix);
+	   tax->names=read_nameidx(filename,tax);
+	}
+	else
+	   tax->names=NULL;
+
+	ECOFREE(filename,"Desallocate filename 1");
+	ECOFREE(filename2,"Desallocate filename 2");
+
+	return tax;
+	
+}
+
+
+
+int32_t delete_ecotaxonomy(ecotaxonomy_t *taxonomy)
+{
+	if (taxonomy)
+	{
+		delete_rankidx(taxonomy->ranks);
+		delete_nameidx(taxonomy->names);
+		delete_taxonomy(taxonomy->taxons);
+			
+		ECOFREE(taxonomy,"Free taxonomy structure");
+		
+		return 0;
+	}
+	
+	return 1;
+}
+
+ecotx_t *eco_findtaxonatrank(ecotx_t *taxon,
+                                int32_t rankidx)
+{
+	ecotx_t *current_taxon;
+	ecotx_t *next_taxon;
+	
+	current_taxon = taxon;
+	next_taxon    = current_taxon->parent;
+	
+	while ((current_taxon!=next_taxon) &&  // I' am the root node
+		   (current_taxon->rank!=rankidx))
+		   {
+		   	current_taxon = next_taxon;
+		   	next_taxon    = current_taxon->parent;
+		   }
+		   
+	if (current_taxon->rank==rankidx)
+		return current_taxon;
+	else
+		return NULL;
+}
+
+static int bcomptaxon (const void * ptaxid, const void * ptaxon) {
+  
+  ecotx_t    *current_taxon = (ecotx_t*)ptaxon;
+  int32_t    taxid=(int32_t)((size_t)ptaxid);
+  return taxid - current_taxon->taxid;
+}
+
+/**
+ * Get back information concerning a taxon from a taxonomic id
+ * @param 	*taxonomy 	the taxonomy database
+ * @param	taxid		the taxonomic id 
+ * 
+ * @result	a ecotx_t structure containing the taxonimic information  
+ **/
+ecotx_t *eco_findtaxonbytaxid(ecotaxonomy_t *taxonomy, 
+							  int32_t taxid)
+{
+	ecotx_t    *current_taxon;
+	int32_t     taxoncount;
+
+  taxoncount=taxonomy->taxons->count;
+	
+  current_taxon = (ecotx_t*)  bsearch((const void *)((size_t)taxid), 
+                                      (const void *)taxonomy->taxons->taxon, 
+                                      taxoncount, 
+                                      sizeof(ecotx_t), 
+                                      bcomptaxon);
+	
+	return current_taxon;	
+}
+
+/**
+ * Find out if taxon is son of other taxon (identified by its taxid)
+ * @param	*taxon son 		taxon
+ * @param	parent_taxid 	taxonomic id of the other taxon
+ * 
+ * @return 	1 is the other taxid math a parent taxid, else 0
+ **/
+int eco_isundertaxon(ecotx_t *taxon, 
+						int other_taxid)
+{
+	ecotx_t *next_parent;
+	
+	next_parent = taxon->parent;	
+	
+	while ( (other_taxid != next_parent->taxid) && 
+			(strcmp(next_parent->name, "root")) )
+	{
+		next_parent = next_parent->parent;
+	}
+		
+	if (other_taxid == next_parent->taxid)
+		return 1;
+	else
+		return 0;
+}
+
+ecotx_t *eco_getspecies(ecotx_t *taxon,
+						ecotaxonomy_t *taxonomy)
+{
+	static ecotaxonomy_t *tax=NULL;
+	static int32_t		 rankindex=-1;
+	
+	if (taxonomy && tax!=taxonomy)
+	{
+		rankindex = rank_index("species",taxonomy->ranks);
+		tax=taxonomy;
+	}
+		
+	if (!tax || rankindex < 0)
+		ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
+		
+	return eco_findtaxonatrank(taxon,rankindex);
+}
+
+ecotx_t *eco_getgenus(ecotx_t *taxon,
+						ecotaxonomy_t *taxonomy)
+{
+	static ecotaxonomy_t *tax=NULL;
+	static int32_t		 rankindex=-1;
+	
+	if (taxonomy && tax!=taxonomy)
+	{
+		rankindex = rank_index("genus",taxonomy->ranks);
+		tax=taxonomy;
+	}
+		
+	if (!tax || rankindex < 0)
+		ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
+		
+	return eco_findtaxonatrank(taxon,rankindex);
+}
+
+
+ecotx_t *eco_getfamily(ecotx_t *taxon,
+						ecotaxonomy_t *taxonomy)
+{
+	static ecotaxonomy_t *tax=NULL;
+	static int32_t		 rankindex=-1;
+	
+	if (taxonomy && tax!=taxonomy)
+	{
+		rankindex = rank_index("family",taxonomy->ranks);
+		tax=taxonomy;
+	}
+		
+	if (!tax || rankindex < 0)
+		ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
+		
+	return eco_findtaxonatrank(taxon,rankindex);
+}
+
+ecotx_t *eco_getkingdom(ecotx_t *taxon,
+						ecotaxonomy_t *taxonomy)
+{
+	static ecotaxonomy_t *tax=NULL;
+	static int32_t		 rankindex=-1;
+	
+	if (taxonomy && tax!=taxonomy)
+	{
+		rankindex = rank_index("kingdom",taxonomy->ranks);
+		tax=taxonomy;
+	}
+		
+	if (!tax || rankindex < 0)
+		ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
+		
+	return eco_findtaxonatrank(taxon,rankindex);
+}
+
+ecotx_t *eco_getsuperkingdom(ecotx_t *taxon,
+						ecotaxonomy_t *taxonomy)
+{
+	static ecotaxonomy_t *tax=NULL;
+	static int32_t		 rankindex=-1;
+	
+	if (taxonomy && tax!=taxonomy)
+	{
+		rankindex = rank_index("superkingdom",taxonomy->ranks);
+		tax=taxonomy;
+	}
+		
+	if (!tax || rankindex < 0)
+		ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
+		
+	return eco_findtaxonatrank(taxon,rankindex);
+}
@@ -0,0 +1,845 @@
+/*
+ * robitax.c
+ *
+ *  Created on: 17 janv. 2013
+ *      Author: coissac
+ */
+
+#include "robitax.h"
+#include <unistd.h>
+//#include <regex.h>
+#include "slre.h"
+
+/**
+ * Return a pointeur to an obitools taxonomy C structure
+ * from an R instance of taxonomy.obitools
+ *
+ * The function checks if the pointer stored in the R object is set
+ * to NULL. In this case this means that we have to load the taxonomy
+ * from the disk.
+ *
+ * @param taxonomy an R object
+ * @type  taxonomy SEXP
+ *
+ * @return a pointer to the C structure
+ * @rtype  ecotaxonomy_t *
+ */
+
+ecotaxonomy_t *getTaxPointer(SEXP Rtaxonomy)
+{
+
+
+	char *pwd;
+	SEXP pointer;
+	SEXP rclass;
+	SEXP rdir;
+	SEXP rfile;
+	ecotaxonomy_t *ptax;
+	const char *class;
+	const char *file;
+	const char *dir;
+
+  int saved;
+
+    if (!IS_S4_OBJECT(Rtaxonomy) )
+        error("argument not obitools.taxonomy instance");
+
+	// We get the class name and compare it to "taxonomy.obitools"
+    rclass = getAttrib(Rtaxonomy, R_ClassSymbol);
+    class = CHAR(asChar(rclass));
+
+    if (strcmp(class,"obitools.taxonomy"))
+        error("argument not obitools.taxonomy instance");
+
+    pointer = R_do_slot(Rtaxonomy,mkString("pointer"));
+    saved = LOGICAL(R_do_slot(Rtaxonomy,mkString("saved")))[0];
+    ptax = (ecotaxonomy_t *) R_ExternalPtrAddr(pointer);
+
+    // If the external pointer is set to NULL we have to load
+    // the taxonomy from file
+    if (ptax==NULL && saved)
+    {
+    	pwd = getcwd(NULL,0);
+
+    	rfile = R_do_slot(Rtaxonomy,mkString("dbname"));
+    	file  = CHAR(asChar(rfile));
+
+    	rdir  = R_do_slot(Rtaxonomy,mkString("workingdir"));
+    	dir   = CHAR(asChar(rdir));
+
+    	chdir(dir);
+
+    	ptax = read_taxonomy(file,1);
+
+    	R_SetExternalPtrAddr(pointer,(void*)ptax);
+
+    	chdir(pwd);
+    	free(pwd);
+    }
+    
+    if (ptax==NULL && ! saved)
+      error("The taxonomy instance is no more valid and must be rebuilt");
+
+    return ptax;
+}
+
+SEXP R_delete_taxonomy(SEXP Rtaxonomy)
+{
+	ecotaxonomy_t *ptax;
+//	SEXP pointer;
+
+    ptax = (ecotaxonomy_t *) R_ExternalPtrAddr(Rtaxonomy);
+
+    (void) delete_ecotaxonomy(ptax);
+
+    // Clear the external pointer
+    R_ClearExternalPtr(Rtaxonomy);
+
+    return R_NilValue;
+
+}
+
+
+
+SEXP R_read_taxonomy(SEXP filename, SEXP altenative)
+{
+	int   alt;
+	const char* file;
+	SEXP  Rtax;
+
+    if (! isString(filename))
+        error("filename not character");
+    file = CHAR(STRING_ELT(filename, 0));
+
+    if (! isLogical(altenative))
+        error("altenative not logical");
+    alt = LOGICAL(altenative)[0];
+
+    ecotaxonomy_t *taxonomy = read_taxonomy(file,alt);
+
+    if (! taxonomy)
+    	error("Cannot open the taxonomy database");
+
+	Rtax = PROTECT(R_MakeExternalPtr(taxonomy, mkString("ROBITools NCBI Taxonomy pointer"), R_NilValue));
+	R_RegisterCFinalizerEx(Rtax, (R_CFinalizer_t)R_delete_taxonomy,TRUE);
+
+    UNPROTECT(1);
+
+
+	return Rtax;
+}
+
+
+SEXP R_get_scientific_name(SEXP Rtaxonomy,SEXP Rtaxid)
+{
+	ecotx_t *taxon;
+	ecotaxonomy_t *ptax;
+	int taxid;
+//	SEXP scname;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    if (! isInteger(Rtaxid))
+        error("taxid not integer");
+
+    taxid = *INTEGER(Rtaxid);
+
+    if (! (taxid > 0))
+        error("taxid not positive");
+
+
+	taxon = eco_findtaxonbytaxid(ptax, taxid);
+
+	if (!taxon)
+		return ScalarString(R_NaString);
+		// error("unkown taxid");
+
+	return mkString(taxon->name);
+
+}
+
+SEXP R_get_rank(SEXP Rtaxonomy,SEXP Rtaxid)
+{
+	ecotx_t *taxon;
+	ecotaxonomy_t *ptax;
+	int *taxid;
+  int ntaxid;
+  int i;
+  SEXP results;
+//	SEXP scname;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    if (! isInteger(Rtaxid))
+        error("taxid not integer");
+        
+    ntaxid = length(Rtaxid);
+    
+    results = PROTECT(allocVector(STRSXP, ntaxid));
+
+    taxid = INTEGER(Rtaxid);
+
+    for (i=0; i < ntaxid; i++)
+    {
+      if (taxid[i]== NA_INTEGER || taxid[i] <= 0)
+        SET_STRING_ELT(results, i, R_NaString);
+      else {
+        taxon = eco_findtaxonbytaxid(ptax, taxid[i]);
+        if (!taxon)
+          SET_STRING_ELT(results, i, R_NaString);
+        else
+          SET_STRING_ELT(results, i, mkChar(ptax->ranks->label[taxon->rank]));
+      }
+    }
+
+  UNPROTECT(1);
+  
+	return results;
+
+}
+
+SEXP R_findtaxonatrank(SEXP Rtaxonomy,SEXP Rtaxid,SEXP Rrank, SEXP Rname)
+{
+	ecotx_t *taxon;
+	ecotx_t *rep;
+	ecotaxonomy_t *ptax;
+	int taxid;
+	int name;
+	const char *rank;
+	int   rankidx;
+//	SEXP scname;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    if (! isInteger(Rtaxid))
+        error("taxid not integer");
+
+    taxid = *INTEGER(Rtaxid);
+
+    if (! (taxid > 0))
+        error("taxid not positive");
+
+    if (! isString(Rrank))
+        error("rank not a string");
+
+    rank=CHAR(STRING_ELT(Rrank,0));
+
+    rankidx=rank_index(rank,ptax->ranks);
+
+    if (rankidx < 0)
+        error("unkown rank name");
+
+    if (! isLogical(Rname))
+        error("name not logical");
+    name = LOGICAL(Rname)[0];
+
+	taxon = eco_findtaxonbytaxid(ptax, taxid);
+
+	if (!taxon)
+  {
+		if (name)
+			return ScalarString(R_NaString);
+		else
+			return ScalarInteger(R_NaInt);
+  }
+	rep = eco_findtaxonatrank(taxon,rankidx);
+
+	if (!rep)
+  {
+		if (name)
+			return ScalarString(R_NaString);
+		else
+			return ScalarInteger(R_NaInt);
+  }
+  
+	if (name)
+		return mkString(rep->name);
+	else
+		return ScalarInteger(rep->taxid);
+}
+
+
+SEXP R_get_species(SEXP Rtaxonomy,SEXP Rtaxid,SEXP Rname)
+{
+	ecotx_t *taxon;
+	ecotx_t *rep;
+	ecotaxonomy_t *ptax;
+	int taxid;
+	int name;
+//	SEXP scname;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    if (! isInteger(Rtaxid))
+        error("taxid not integer");
+
+    taxid = *INTEGER(Rtaxid);
+
+    if (! (taxid > 0))
+        error("taxid not positive");
+
+    if (! isLogical(Rname))
+        error("name not logical");
+    name = LOGICAL(Rname)[0];
+
+	taxon = eco_findtaxonbytaxid(ptax, taxid);
+
+	if (!taxon)
+  {
+		if (name)
+			return ScalarString(R_NaString);
+		else
+			return ScalarInteger(R_NaInt);
+  }
+  
+	rep = eco_getspecies(taxon,ptax);
+
+	if (!rep)
+  {
+		if (name)
+			return ScalarString(R_NaString);
+		else
+			return ScalarInteger(R_NaInt);
+  }
+  
+	if (name)
+		return mkString(rep->name);
+	else
+		return ScalarInteger(rep->taxid);
+}
+
+SEXP R_get_genus(SEXP Rtaxonomy,SEXP Rtaxid,SEXP Rname)
+{
+	ecotx_t *taxon;
+	ecotx_t *rep;
+	ecotaxonomy_t *ptax;
+	int taxid;
+	int name;
+//	SEXP scname;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    if (! isInteger(Rtaxid))
+        error("taxid not integer");
+
+    taxid = *INTEGER(Rtaxid);
+
+    if (! (taxid > 0))
+        error("taxid not positive");
+
+    if (! isLogical(Rname))
+        error("name not logical");
+    name = LOGICAL(Rname)[0];
+
+	taxon = eco_findtaxonbytaxid(ptax, taxid);
+
+	if (!taxon)
+  {
+		if (name)
+			return ScalarString(R_NaString);
+		else
+			return ScalarInteger(R_NaInt);
+  }
+  
+	rep = eco_getgenus(taxon,ptax);
+
+	if (!rep)
+  {
+		if (name)
+			return ScalarString(R_NaString);
+		else
+			return ScalarInteger(R_NaInt);
+  }
+  
+	if (name)
+		return mkString(rep->name);
+	else
+		return ScalarInteger(rep->taxid);
+}
+
+SEXP R_get_family(SEXP Rtaxonomy,SEXP Rtaxid,SEXP Rname)
+{
+	ecotx_t *taxon;
+	ecotx_t *rep;
+	ecotaxonomy_t *ptax;
+	int taxid;
+	int name;
+//	SEXP scname;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    if (! isInteger(Rtaxid))
+        error("taxid not integer");
+
+    taxid = *INTEGER(Rtaxid);
+
+    if (! (taxid > 0))
+        error("taxid not positive");
+
+    if (! isLogical(Rname))
+        error("name not logical");
+    name = LOGICAL(Rname)[0];
+
+	taxon = eco_findtaxonbytaxid(ptax, taxid);
+
+	if (!taxon)
+  {
+		if (name)
+			return ScalarString(R_NaString);
+		else
+			return ScalarInteger(R_NaInt);
+  }
+  
+	rep = eco_getfamily(taxon,ptax);
+
+	if (!rep)
+  {
+		if (name)
+			return ScalarString(R_NaString);
+		else
+			return ScalarInteger(R_NaInt);
+  }
+  
+	if (name)
+		return mkString(rep->name);
+	else
+		return ScalarInteger(rep->taxid);
+}
+
+SEXP R_get_kingdom(SEXP Rtaxonomy,SEXP Rtaxid,SEXP Rname)
+{
+	ecotx_t *taxon;
+	ecotx_t *rep;
+	ecotaxonomy_t *ptax;
+	int taxid;
+	int name;
+//	SEXP scname;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    if (! isInteger(Rtaxid))
+        error("taxid not integer");
+
+    taxid = *INTEGER(Rtaxid);
+
+    if (! (taxid > 0))
+        error("taxid not positive");
+
+    if (! isLogical(Rname))
+        error("name not logical");
+    name = LOGICAL(Rname)[0];
+
+	taxon = eco_findtaxonbytaxid(ptax, taxid);
+
+	if (!taxon)
+  {
+		if (name)
+			return ScalarString(R_NaString);
+		else
+			return ScalarInteger(R_NaInt);
+  }
+  
+	rep = eco_getkingdom(taxon,ptax);
+
+	if (!rep)
+  {
+		if (name)
+			return ScalarString(R_NaString);
+		else
+			return ScalarInteger(R_NaInt);
+  }
+  
+	if (name)
+		return mkString(rep->name);
+	else
+		return ScalarInteger(rep->taxid);
+}
+
+SEXP R_get_superkingdom(SEXP Rtaxonomy,SEXP Rtaxid,SEXP Rname)
+{
+	ecotx_t *taxon;
+	ecotx_t *rep;
+	ecotaxonomy_t *ptax;
+	int taxid;
+	int name;
+//	SEXP scname;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    if (! isInteger(Rtaxid))
+        error("taxid not integer");
+
+    taxid = *INTEGER(Rtaxid);
+
+    if (! (taxid > 0))
+        error("taxid not positive");
+
+    if (! isLogical(Rname))
+        error("name not logical");
+    name = LOGICAL(Rname)[0];
+
+	taxon = eco_findtaxonbytaxid(ptax, taxid);
+
+	if (!taxon)
+  {
+		if (name)
+			return ScalarString(R_NaString);
+		else
+			return ScalarInteger(R_NaInt);
+  }
+  
+	rep = eco_getsuperkingdom(taxon,ptax);
+
+	if (!rep)
+  {
+		if (name)
+			return ScalarString(R_NaString);
+		else
+			return ScalarInteger(R_NaInt);
+  }
+  
+	if (name)
+		return mkString(rep->name);
+	else
+		return ScalarInteger(rep->taxid);
+}
+
+SEXP R_get_parent(SEXP Rtaxonomy,SEXP Rtaxid,SEXP Rname)
+{
+	ecotx_t *taxon;
+	ecotx_t *rep;
+	ecotaxonomy_t *ptax;
+	int taxid;
+	int name;
+//	SEXP scname;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    if (! isInteger(Rtaxid))
+        error("taxid not integer");
+
+    taxid = *INTEGER(Rtaxid);
+
+    if (! (taxid > 0))
+        error("taxid not positive");
+
+    if (! isLogical(Rname))
+        error("name not logical");
+    name = LOGICAL(Rname)[0];
+
+	taxon = eco_findtaxonbytaxid(ptax, taxid);
+
+	if (!taxon)
+  {
+		if (name)
+			return ScalarString(R_NaString);
+		else
+			return ScalarInteger(R_NaInt);
+  }
+  
+	rep = taxon->parent;
+
+	if (rep->taxid==taxid)
+  {
+		if (name)
+			return ScalarString(R_NaString);
+		else
+			return ScalarInteger(R_NaInt);
+  }
+  
+	if (name)
+		return mkString(rep->name);
+	else
+		return ScalarInteger(rep->taxid);
+}
+
+
+SEXP R_validate_taxid(SEXP Rtaxonomy,SEXP Rtaxid)
+{
+	ecotx_t *taxon;
+	ecotaxonomy_t *ptax;
+	int taxid;
+//	int name;
+//	SEXP scname;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    if (! isInteger(Rtaxid))
+        error("taxid not integer");
+
+    taxid = *INTEGER(Rtaxid);
+
+    if (! (taxid > 0))
+    	return ScalarInteger(R_NaInt);
+
+	taxon = eco_findtaxonbytaxid(ptax, taxid);
+
+	if (!taxon)
+		return ScalarInteger(R_NaInt);
+	else
+		return ScalarInteger(taxon->taxid);
+}
+
+
+SEXP R_is_under_taxon(SEXP Rtaxonomy, SEXP Rtaxid, SEXP Rparent)
+{
+	ecotx_t *taxon;
+	ecotaxonomy_t *ptax;
+	int taxid;
+  int *ptaxid;
+  int ntaxid;
+	int parent;
+	SEXP rep;
+  int* prep;
+  size_t i;
+//	SEXP isunder;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    if (! isInteger(Rparent))
+        error("parent not integer");
+        
+    if (GET_LENGTH(Rparent) > 1)
+        error("parent must have a length equal to one.");
+
+    parent = *INTEGER(Rparent);
+    
+    if (! isInteger(Rtaxid))
+        error("taxid not integer");
+
+    ntaxid = GET_LENGTH(Rtaxid);
+    rep = PROTECT(NEW_LOGICAL(ntaxid));
+    prep=LOGICAL(rep);
+
+    ptaxid = INTEGER_POINTER(Rtaxid); 
+
+
+    if (parent > 0) {
+        taxon = eco_findtaxonbytaxid(ptax, parent);
+        if (taxon) {
+          for (i=0; i < ntaxid; i++) {
+            if (ptaxid[i] <= 0)
+              prep[i]=NA_LOGICAL;
+            else {
+              taxon = eco_findtaxonbytaxid(ptax, ptaxid[i]);
+              prep[i] = eco_isundertaxon(taxon, parent);            
+            }  
+          }          
+        }
+    }    
+    else
+    {
+      for (i=0; i < ntaxid; i++)
+        prep[i]=NA_LOGICAL;
+    }
+
+  unprotect(1);
+	return rep;
+}
+
+
+SEXP R_longest_path(SEXP Rtaxonomy,SEXP Rtaxid)
+{
+	ecotx_t *taxon;
+	ecotaxonomy_t *ptax;
+	int taxid;
+//	int name;
+//	SEXP scname;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    if (! isInteger(Rtaxid))
+        error("taxid not integer");
+
+    taxid = *INTEGER(Rtaxid);
+
+    if (taxid <= 0)
+    	return ScalarInteger(R_NaInt);
+
+	taxon = eco_findtaxonbytaxid(ptax, taxid);
+
+	if (!taxon)
+		return ScalarInteger(R_NaInt);
+	else
+		return ScalarInteger(taxon->farest);
+}
+
+SEXP R_rank_list(SEXP Rtaxonomy)
+{
+	int nrank;
+	int i;
+	ecotaxonomy_t *ptax;
+	SEXP rNames;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    nrank = ptax->ranks->count;
+
+    rNames = PROTECT(allocVector(STRSXP, nrank));
+
+	for (i=0; i < nrank;i++)
+		SET_STRING_ELT(rNames, i, mkChar(ptax->ranks->label[i]));
+
+	UNPROTECT(1);
+
+	return rNames;
+}
+
+SEXP R_taxid_list(SEXP Rtaxonomy)
+{
+	int ntaxid;
+	int i;
+	ecotaxonomy_t *ptax;
+	SEXP rTaxids;
+
+    ptax = getTaxPointer(Rtaxonomy);
+    ntaxid  = ptax->taxons->count;
+    rTaxids = PROTECT(allocVector(INTSXP, ntaxid));
+
+	for (i=0; i < ntaxid;i++)
+		INTEGER(rTaxids)[i]=ptax->taxons->taxon[i].taxid;
+
+	UNPROTECT(1);
+
+	return rTaxids;
+
+}
+
+SEXP R_max_taxid(SEXP Rtaxonomy)
+{
+//	int nrank;
+//	int i;
+	ecotaxonomy_t *ptax;
+//	SEXP rNames;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    return ScalarInteger(ptax->taxons->maxtaxid);
+}
+
+SEXP R_length_taxonomy(SEXP Rtaxonomy)
+{
+	ecotaxonomy_t *ptax;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    return ScalarInteger(ptax->taxons->count);
+}
+
+SEXP R_ecofind(SEXP Rtaxonomy, SEXP Rpattern, SEXP Rrank, SEXP Ralternative)
+{
+  ecotaxonomy_t *ptax;
+  econame_t		*name;
+  char*     pattern=NULL;
+	int				re_match;
+  SEXP      taxids;
+  int32_t*  buffer;
+  int32_t		tax_count	= 0;
+  size_t 		j 		= 0;
+  int32_t		rankfilter 	= 1;
+  int*      ptaxid;
+  char			*rankname=NULL;
+  int32_t			nummatch 	= 0;
+  int32_t         alternative = 0;
+
+  size_t    bsize;
+	
+  ptax = getTaxPointer(Rtaxonomy);
+  tax_count = ptax->taxons->count;
+	
+  if (! isString(Rpattern))
+      error("pattern not a string");
+
+  pattern= (char*) CHAR(STRING_ELT(Rpattern,0));
+
+  if (! isNull(Rrank))
+  {
+    if (! isString(Rrank))
+      error("rank not a string");
+
+    rankname= (char*) CHAR(STRING_ELT(Rrank,0));
+  }
+  
+  if (! isLogical(Ralternative))
+      error("rank not a logical");
+      
+  alternative = LOGICAL(Ralternative)[0];
+
+		
+	nummatch=0;
+  buffer = (int32_t*) malloc(100 * sizeof(int32_t));
+  bsize=100;
+
+  if (alternative && ptax->names!=NULL)
+	  for (j=0,name=ptax->names->names;
+			  j < ptax->names->count;
+			  name++,j++)
+	  {
+		  if(rankname)
+			  rankfilter = !(strcmp(rankname,ptax->ranks->label[name->taxon->rank]));
+
+  	  re_match = slre_match(pattern, name->name, 
+                            strlen(name->name), 
+                            NULL, 0, 
+                            SLRE_IGNORE_CASE);
+
+  	  if (re_match > 0 && rankfilter)
+		  {
+			  buffer[nummatch]=name->taxon->taxid;
+			  nummatch++;
+			  if (nummatch==bsize) {
+				  bsize*=2;
+				  buffer = (int32_t*) realloc(buffer, bsize * sizeof(int32_t));
+				  if (buffer==0)
+				  {
+					  // regfree(&re_preg);
+					  error("Cannot allocate memory for the taxid list");
+				  }
+			  }
+		  }
+
+	  }
+  else
+	  for (j=0; j < ptax->taxons->count;j++)
+	  {
+		  if(rankname)
+			  rankfilter = !(strcmp(rankname,ptax->ranks->label[ptax->taxons->taxon[j].rank]));
+
+//		  re_match = regexec (&re_preg, ptax->taxons->taxon[j].name, 0, NULL, 0);
+      re_match = slre_match(pattern, ptax->taxons->taxon[j].name, 
+                            strlen(ptax->taxons->taxon[j].name), 
+                            NULL, 0, 
+                            SLRE_IGNORE_CASE);
+
+
+//  	  if (!re_match && rankfilter)
+  	  if (re_match > 0 && rankfilter)
+		  {
+			  buffer[nummatch]=ptax->taxons->taxon[j].taxid;
+			  nummatch++;
+			  if (nummatch==bsize) {
+				  bsize*=2;
+				  buffer = (int32_t*) realloc(buffer, bsize * sizeof(int32_t));
+				  if (buffer==0)
+				  {
+					  // regfree(&re_preg);
+					  error("Cannot allocate memory for the taxid list");
+				  }
+			  }
+		  }
+
+	  }
+
+   	//regfree(&re_preg);
+
+    taxids = PROTECT(NEW_INTEGER(nummatch));
+    ptaxid = INTEGER(taxids);
+    
+    for (j=0; j < nummatch; j++)
+      ptaxid[j]=buffer[j];
+      
+    free(buffer);
+
+    UNPROTECT(1);
+    return taxids;
+}
@@ -0,0 +1,6 @@
+#include "ecoPCR.h"
+
+
+ecotaxonomy_t *getTaxPointer(SEXP Rtaxonomy);
+SEXP R_delete_taxonomy(SEXP Rtaxonomy);
+
@@ -0,0 +1,433 @@
+/*
+ * Copyright (c) 2004-2013 Sergey Lyubka <valenok@gmail.com>
+ * Copyright (c) 2013 Cesanta Software Limited
+ * All rights reserved
+ *
+ * This library is dual-licensed: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation. For the terms of this
+ * license, see <http://www.gnu.org/licenses/>.
+ *
+ * You are free to use this library under the terms of the GNU General
+ * Public License, but WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * Alternatively, you can license this library under a commercial
+ * license, as set out in <http://cesanta.com/products.html>.
+ */
+
+#include <stdio.h>
+#include <ctype.h>
+#include <string.h>
+
+#include "slre.h"
+
+#define MAX_BRANCHES 100
+#define MAX_BRACKETS 100
+#define FAIL_IF(condition, error_code) if (condition) return (error_code)
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(ar) (sizeof(ar) / sizeof((ar)[0]))
+#endif
+
+#ifdef SLRE_DEBUG
+#define DBG(x) printf x
+#else
+#define DBG(x)
+#endif
+
+struct bracket_pair {
+  const char *ptr;  /* Points to the first char after '(' in regex  */
+  int len;          /* Length of the text between '(' and ')'       */
+  int branches;     /* Index in the branches array for this pair    */
+  int num_branches; /* Number of '|' in this bracket pair           */
+};
+
+struct branch {
+  int bracket_index;    /* index for 'struct bracket_pair brackets' */
+                        /* array defined below                      */
+  const char *schlong;  /* points to the '|' character in the regex */
+};
+
+struct regex_info {
+  /*
+   * Describes all bracket pairs in the regular expression.
+   * First entry is always present, and grabs the whole regex.
+   */
+  struct bracket_pair brackets[MAX_BRACKETS];
+  int num_brackets;
+
+  /*
+   * Describes alternations ('|' operators) in the regular expression.
+   * Each branch falls into a specific branch pair.
+   */
+  struct branch branches[MAX_BRANCHES];
+  int num_branches;
+
+  /* Array of captures provided by the user */
+  struct slre_cap *caps;
+  int num_caps;
+
+  /* E.g. SLRE_IGNORE_CASE. See enum below */
+  int flags;
+};
+
+static int is_metacharacter(const unsigned char *s) {
+  static const char *metacharacters = "^$().[]*+?|\\Ssdbfnrtv";
+  return strchr(metacharacters, *s) != NULL;
+}
+
+static int op_len(const char *re) {
+  return re[0] == '\\' && re[1] == 'x' ? 4 : re[0] == '\\' ? 2 : 1;
+}
+
+static int set_len(const char *re, int re_len) {
+  int len = 0;
+
+  while (len < re_len && re[len] != ']') {
+    len += op_len(re + len);
+  }
+
+  return len <= re_len ? len + 1 : -1;
+}
+
+static int get_op_len(const char *re, int re_len) {
+  return re[0] == '[' ? set_len(re + 1, re_len - 1) + 1 : op_len(re);
+}
+
+static int is_quantifier(const char *re) {
+  return re[0] == '*' || re[0] == '+' || re[0] == '?';
+}
+
+static int toi(int x) {
+  return isdigit(x) ? x - '0' : x - 'W';
+}
+
+static int hextoi(const unsigned char *s) {
+  return (toi(tolower(s[0])) << 4) | toi(tolower(s[1]));
+}
+
+static int match_op(const unsigned char *re, const unsigned char *s,
+                    struct regex_info *info) {
+  int result = 0;
+  switch (*re) {
+    case '\\':
+      /* Metacharacters */
+      switch (re[1]) {
+        case 'S': FAIL_IF(isspace(*s), SLRE_NO_MATCH); result++; break;
+        case 's': FAIL_IF(!isspace(*s), SLRE_NO_MATCH); result++; break;
+        case 'd': FAIL_IF(!isdigit(*s), SLRE_NO_MATCH); result++; break;
+        case 'b': FAIL_IF(*s != '\b', SLRE_NO_MATCH); result++; break;
+        case 'f': FAIL_IF(*s != '\f', SLRE_NO_MATCH); result++; break;
+        case 'n': FAIL_IF(*s != '\n', SLRE_NO_MATCH); result++; break;
+        case 'r': FAIL_IF(*s != '\r', SLRE_NO_MATCH); result++; break;
+        case 't': FAIL_IF(*s != '\t', SLRE_NO_MATCH); result++; break;
+        case 'v': FAIL_IF(*s != '\v', SLRE_NO_MATCH); result++; break;
+
+        case 'x':
+          /* Match byte, \xHH where HH is hexadecimal byte representaion */
+          FAIL_IF(hextoi(re + 2) != *s, SLRE_NO_MATCH);
+          result++;
+          break;
+
+        default:
+          /* Valid metacharacter check is done in bar() */
+          FAIL_IF(re[1] != s[0], SLRE_NO_MATCH);
+          result++;
+          break;
+      }
+      break;
+
+    case '|': FAIL_IF(1, SLRE_INTERNAL_ERROR); break;
+    case '$': FAIL_IF(1, SLRE_NO_MATCH); break;
+    case '.': result++; break;
+
+    default:
+      if (info->flags & SLRE_IGNORE_CASE) {
+        FAIL_IF(tolower(*re) != tolower(*s), SLRE_NO_MATCH);
+      } else {
+        FAIL_IF(*re != *s, SLRE_NO_MATCH);
+      }
+      result++;
+      break;
+  }
+
+  return result;
+}
+
+static int match_set(const char *re, int re_len, const char *s,
+                     struct regex_info *info) {
+  int len = 0, result = -1, invert = re[0] == '^';
+
+  if (invert) re++, re_len--;
+
+  while (len <= re_len && re[len] != ']' && result <= 0) {
+    /* Support character range */
+    if (re[len] != '-' && re[len + 1] == '-' && re[len + 2] != ']' &&
+        re[len + 2] != '\0') {
+      result = info->flags &&  SLRE_IGNORE_CASE ?
+        *s >= re[len] && *s <= re[len + 2] :
+        tolower(*s) >= tolower(re[len]) && tolower(*s) <= tolower(re[len + 2]);
+      len += 3;
+    } else {
+      result = match_op((unsigned char *) re + len, (unsigned char *) s, info);
+      len += op_len(re + len);
+    }
+  }
+  return (!invert && result > 0) || (invert && result <= 0) ? 1 : -1;
+}
+
+static int doh(const char *s, int s_len, struct regex_info *info, int bi);
+
+static int bar(const char *re, int re_len, const char *s, int s_len,
+               struct regex_info *info, int bi) {
+  /* i is offset in re, j is offset in s, bi is brackets index */
+  int i, j, n, step;
+
+  for (i = j = 0; i < re_len && j <= s_len; i += step) {
+
+    /* Handle quantifiers. Get the length of the chunk. */
+    step = re[i] == '(' ? info->brackets[bi + 1].len + 2 :
+      get_op_len(re + i, re_len - i);
+
+    DBG(("%s [%.*s] [%.*s] re_len=%d step=%d i=%d j=%d\n", __func__,
+         re_len - i, re + i, s_len - j, s + j, re_len, step, i, j));
+
+    FAIL_IF(is_quantifier(&re[i]), SLRE_UNEXPECTED_QUANTIFIER);
+    FAIL_IF(step <= 0, SLRE_INVALID_CHARACTER_SET);
+
+    if (i + step < re_len && is_quantifier(re + i + step)) {
+      DBG(("QUANTIFIER: [%.*s]%c [%.*s]\n", step, re + i,
+           re[i + step], s_len - j, s + j));
+      if (re[i + step] == '?') {
+        int result = bar(re + i, step, s + j, s_len - j, info, bi);
+        j += result > 0 ? result : 0;
+        i++;
+      } else if (re[i + step] == '+' || re[i + step] == '*') {
+        int j2 = j, nj = j, n1, n2 = -1, ni, non_greedy = 0;
+
+        /* Points to the regexp code after the quantifier */
+        ni = i + step + 1;
+        if (ni < re_len && re[ni] == '?') {
+          non_greedy = 1;
+          ni++;
+        }
+
+        do {
+          if ((n1 = bar(re + i, step, s + j2, s_len - j2, info, bi)) > 0) {
+            j2 += n1;
+          }
+          if (re[i + step] == '+' && n1 < 0) break;
+
+          if (ni >= re_len) {
+            /* After quantifier, there is nothing */
+            nj = j2;
+          } else if ((n2 = bar(re + ni, re_len - ni, s + j2,
+                               s_len - j2, info, bi)) >= 0) {
+            /* Regex after quantifier matched */
+            nj = j2 + n2;
+          }
+          if (nj > j && non_greedy) break;
+        } while (n1 > 0);
+
+        if (n1 < 0 && re[i + step] == '*' &&
+            (n2 = bar(re + ni, re_len - ni, s + j, s_len - j, info, bi)) > 0) {
+          nj = j + n2;
+        }
+
+        DBG(("STAR/PLUS END: %d %d %d %d %d\n", j, nj, re_len - ni, n1, n2));
+        FAIL_IF(re[i + step] == '+' && nj == j, SLRE_NO_MATCH);
+
+        /* If while loop body above was not executed for the * quantifier,  */
+        /* make sure the rest of the regex matches                          */
+        FAIL_IF(nj == j && ni < re_len && n2 < 0, SLRE_NO_MATCH);
+
+        /* Returning here cause we've matched the rest of RE already */
+        return nj;
+      }
+      continue;
+    }
+
+    if (re[i] == '[') {
+      n = match_set(re + i + 1, re_len - (i + 2), s + j, info);
+      DBG(("SET %.*s [%.*s] -> %d\n", step, re + i, s_len - j, s + j, n));
+      FAIL_IF(n <= 0, SLRE_NO_MATCH);
+      j += n;
+    } else if (re[i] == '(') {
+      n = SLRE_NO_MATCH;
+      bi++;
+      FAIL_IF(bi >= info->num_brackets, SLRE_INTERNAL_ERROR);
+      DBG(("CAPTURING [%.*s] [%.*s] [%s]\n",
+           step, re + i, s_len - j, s + j, re + i + step));
+
+      if (re_len - (i + step) <= 0) {
+        /* Nothing follows brackets */
+        n = doh(s + j, s_len - j, info, bi);
+      } else {
+        int j2;
+        for (j2 = 0; j2 <= s_len - j; j2++) {
+          if ((n = doh(s + j, s_len - (j + j2), info, bi)) >= 0 &&
+              bar(re + i + step, re_len - (i + step),
+                  s + j + n, s_len - (j + n), info, bi) >= 0) break;
+        }
+      }
+
+      DBG(("CAPTURED [%.*s] [%.*s]:%d\n", step, re + i, s_len - j, s + j, n));
+      FAIL_IF(n < 0, n);
+      if (info->caps != NULL) {
+        info->caps[bi - 1].ptr = s + j;
+        info->caps[bi - 1].len = n;
+      }
+      j += n;
+    } else if (re[i] == '^') {
+      FAIL_IF(j != 0, SLRE_NO_MATCH);
+    } else if (re[i] == '$') {
+      FAIL_IF(j != s_len, SLRE_NO_MATCH);
+    } else {
+      FAIL_IF(j >= s_len, SLRE_NO_MATCH);
+      n = match_op((unsigned char *) (re + i), (unsigned char *) (s + j), info);
+      FAIL_IF(n <= 0, n);
+      j += n;
+    }
+  }
+
+  return j;
+}
+
+/* Process branch points */
+static int doh(const char *s, int s_len, struct regex_info *info, int bi) {
+  const struct bracket_pair *b = &info->brackets[bi];
+  int i = 0, len, result;
+  const char *p;
+
+  do {
+    p = i == 0 ? b->ptr : info->branches[b->branches + i - 1].schlong + 1;
+    len = b->num_branches == 0 ? b->len :
+      i == b->num_branches ? (int) (b->ptr + b->len - p) :
+      (int) (info->branches[b->branches + i].schlong - p);
+    DBG(("%s %d %d [%.*s] [%.*s]\n", __func__, bi, i, len, p, s_len, s));
+    result = bar(p, len, s, s_len, info, bi);
+    DBG(("%s <- %d\n", __func__, result));
+  } while (result <= 0 && i++ < b->num_branches);  /* At least 1 iteration */
+
+  return result;
+}
+
+static int baz(const char *s, int s_len, struct regex_info *info) {
+  int i, result = -1, is_anchored = info->brackets[0].ptr[0] == '^';
+
+  for (i = 0; i <= s_len; i++) {
+    result = doh(s + i, s_len - i, info, 0);
+    if (result >= 0) {
+      result += i;
+      break;
+    }
+    if (is_anchored) break;
+  }
+
+  return result;
+}
+
+static void setup_branch_points(struct regex_info *info) {
+  int i, j;
+  struct branch tmp;
+
+  /* First, sort branches. Must be stable, no qsort. Use bubble algo. */
+  for (i = 0; i < info->num_branches; i++) {
+    for (j = i + 1; j < info->num_branches; j++) {
+      if (info->branches[i].bracket_index > info->branches[j].bracket_index) {
+        tmp = info->branches[i];
+        info->branches[i] = info->branches[j];
+        info->branches[j] = tmp;
+      }
+    }
+  }
+
+  /*
+   * For each bracket, set their branch points. This way, for every bracket
+   * (i.e. every chunk of regex) we know all branch points before matching.
+   */
+  for (i = j = 0; i < info->num_brackets; i++) {
+    info->brackets[i].num_branches = 0;
+    info->brackets[i].branches = j;
+    while (j < info->num_branches && info->branches[j].bracket_index == i) {
+      info->brackets[i].num_branches++;
+      j++;
+    }
+  }
+}
+
+static int foo(const char *re, int re_len, const char *s, int s_len,
+               struct regex_info *info) {
+  int i, step, depth = 0;
+
+  /* First bracket captures everything */
+  info->brackets[0].ptr = re;
+  info->brackets[0].len = re_len;
+  info->num_brackets = 1;
+
+  /* Make a single pass over regex string, memorize brackets and branches */
+  for (i = 0; i < re_len; i += step) {
+    step = get_op_len(re + i, re_len - i);
+
+    if (re[i] == '|') {
+      FAIL_IF(info->num_branches >= (int) ARRAY_SIZE(info->branches),
+              SLRE_TOO_MANY_BRANCHES);
+      info->branches[info->num_branches].bracket_index =
+        info->brackets[info->num_brackets - 1].len == -1 ?
+        info->num_brackets - 1 : depth;
+      info->branches[info->num_branches].schlong = &re[i];
+      info->num_branches++;
+    } else if (re[i] == '\\') {
+      FAIL_IF(i >= re_len - 1, SLRE_INVALID_METACHARACTER);
+      if (re[i + 1] == 'x') {
+        /* Hex digit specification must follow */
+        FAIL_IF(re[i + 1] == 'x' && i >= re_len - 3,
+                SLRE_INVALID_METACHARACTER);
+        FAIL_IF(re[i + 1] ==  'x' && !(isxdigit(re[i + 2]) &&
+                isxdigit(re[i + 3])), SLRE_INVALID_METACHARACTER);
+      } else {
+        FAIL_IF(!is_metacharacter((unsigned char *) re + i + 1),
+                SLRE_INVALID_METACHARACTER);
+      }
+    } else if (re[i] == '(') {
+      FAIL_IF(info->num_brackets >= (int) ARRAY_SIZE(info->brackets),
+              SLRE_TOO_MANY_BRACKETS);
+      depth++;  /* Order is important here. Depth increments first. */
+      info->brackets[info->num_brackets].ptr = re + i + 1;
+      info->brackets[info->num_brackets].len = -1;
+      info->num_brackets++;
+      FAIL_IF(info->num_caps > 0 && info->num_brackets - 1 > info->num_caps,
+              SLRE_CAPS_ARRAY_TOO_SMALL);
+    } else if (re[i] == ')') {
+      int ind = info->brackets[info->num_brackets - 1].len == -1 ?
+        info->num_brackets - 1 : depth;
+      info->brackets[ind].len = (int) (&re[i] - info->brackets[ind].ptr);
+      DBG(("SETTING BRACKET %d [%.*s]\n",
+           ind, info->brackets[ind].len, info->brackets[ind].ptr));
+      depth--;
+      FAIL_IF(depth < 0, SLRE_UNBALANCED_BRACKETS);
+      FAIL_IF(i > 0 && re[i - 1] == '(', SLRE_NO_MATCH);
+    }
+  }
+
+  FAIL_IF(depth != 0, SLRE_UNBALANCED_BRACKETS);
+  setup_branch_points(info);
+
+  return baz(s, s_len, info);
+}
+
+int slre_match(const char *regexp, const char *s, int s_len,
+               struct slre_cap *caps, int num_caps, int flags) {
+  struct regex_info info;
+
+  /* Initialize info structure */
+  info.flags = flags;
+  info.num_brackets = info.num_branches = 0;
+  info.num_caps = num_caps;
+  info.caps = caps;
+
+  DBG(("========================> [%s] [%.*s]\n", regexp, s_len, s));
+  return foo(regexp, (int) strlen(regexp), s, s_len, &info);
+}
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2004-2013 Sergey Lyubka <valenok@gmail.com>
+ * Copyright (c) 2013 Cesanta Software Limited
+ * All rights reserved
+ *
+ * This library is dual-licensed: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation. For the terms of this
+ * license, see <http://www.gnu.org/licenses/>.
+ *
+ * You are free to use this library under the terms of the GNU General
+ * Public License, but WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * Alternatively, you can license this library under a commercial
+ * license, as set out in <http://cesanta.com/products.html>.
+ */
+
+/*
+ * This is a regular expression library that implements a subset of Perl RE.
+ * Please refer to README.md for a detailed reference.
+ */
+
+#ifndef SLRE_HEADER_DEFINED
+#define SLRE_HEADER_DEFINED
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct slre_cap {
+  const char *ptr;
+  int len;
+};
+
+
+int slre_match(const char *regexp, const char *buf, int buf_len,
+               struct slre_cap *caps, int num_caps, int flags);
+
+/* Possible flags for slre_match() */
+enum { SLRE_IGNORE_CASE = 1 };
+
+
+/* slre_match() failure codes */
+#define SLRE_NO_MATCH               -1
+#define SLRE_UNEXPECTED_QUANTIFIER  -2
+#define SLRE_UNBALANCED_BRACKETS    -3
+#define SLRE_INTERNAL_ERROR         -4
+#define SLRE_INVALID_CHARACTER_SET  -5
+#define SLRE_INVALID_METACHARACTER  -6
+#define SLRE_CAPS_ARRAY_TOO_SMALL   -7
+#define SLRE_TOO_MANY_BRANCHES      -8
+#define SLRE_TOO_MANY_BRACKETS      -9
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  /* SLRE_HEADER_DEFINED */