Initial commit

2016-01-13 10:20:39 +01:00
commit ffa9638359
62 changed files with 53833 additions and 0 deletions
--- a/ROBITools.Rproj
+++ b/ROBITools.Rproj
@@ -0,0 +1,19 @@
+Version: 1.0
+
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: ISO-8859-1
+
+RnwWeave: knitr
+LaTeX: pdfLaTeX
+
+BuildType: Package
+PackageUseDevtools: Yes
+PackagePath: ROBITools
+PackageInstallArgs: --no-multiarch --with-keep.source
+PackageRoxygenize: rd,collate,namespace
--- a/ROBITools/.gitignore
+++ b/ROBITools/.gitignore
@@ -0,0 +1,3 @@
+/man/
+/loopbenchmark.R
+/Read-and-delete-me
--- a/ROBITools/DESCRIPTION
+++ b/ROBITools/DESCRIPTION
@@ -0,0 +1,38 @@
+Package: ROBITools
+Type: Package
+Title: Metabarcoding data biodiversity analysis
+Version: 0.1
+Date: 2012-08-23
+Author: LECA - Laboratoire d'ecologie alpine
+Maintainer: LECA OBITools team <obitools@metabarcoding.org>
+Description: More about what it does (maybe more than one line)
+License: CeCILL v2.0
+LazyLoad: yes
+Roxygen: list(wrap = FALSE)
+Collate:
+    's3objects.R'
+    'ROBITools.R'
+    '02_class_metabarcoding.data.R'
+    'aggregate.R'
+    'choose.taxonomy.R'
+    'contaslayer.R'
+    'distrib.extrapol.R'
+    'experimental.section.R'
+    'export-metabarcoding.R'
+    'read.obitab.R'
+    'import.metabarcoding.R'
+    'import.ngsfilter.R'
+    'layers.metabarcoding.R'
+    'metabarcoding_threshold.R'
+    'mstat.R'
+    'obiclean.R'
+    'pcrslayer.R'
+    'plot.PCRplate.R'
+    'plot.seqinsample.R'
+    'rarefy.R'
+    'read.ngsfilter.R'
+    'read.sumatra.R'
+    'taxoDBtree.R'
+    'taxonomic.resolution.R'
+    'taxonomy_classic_table.R'
+RoxygenNote: 5.0.1
--- a/ROBITools/LICENSE-SLRE
+++ b/ROBITools/LICENSE-SLRE
@@ -0,0 +1,16 @@
+Copyright (c) 2004-2013 Sergey Lyubka <valenok@gmail.com>
+Copyright (c) 2013 Cesanta Software Limited
+All rights reserved
+
+This code is dual-licensed: you can redistribute it and/or modify
+it under the terms of the GNU General Public License version 2 as
+published by the Free Software Foundation. For the terms of this
+license, see <http://www.gnu.org/licenses/>.
+
+You are free to use this code under the terms of the GNU General
+Public License, but WITHOUT ANY WARRANTY; without even the implied
+warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+See the GNU General Public License for more details.
+
+Alternatively, you can license this code under a commercial
+license, as set out in <http://cesanta.com/>.
--- a/ROBITools/NAMESPACE
+++ b/ROBITools/NAMESPACE
@@ -0,0 +1,52 @@
+# Generated by roxygen2: do not edit by hand
+
+S3method(aggregate,metabarcoding.data)
+S3method(plot,PCRplate)
+S3method(plot,seqinsample)
+S3method(summary,taxores)
+export(addS3Class)
+export(colnames)
+export(const.threshold.mask)
+export(contaslayer)
+export(createS3Class)
+export(dbtree)
+export(dist.center.group)
+export(dist.clique.group)
+export(dist.grid)
+export(dm.univariate)
+export(extracts.obiclean)
+export(extracts.obiclean_cluster)
+export(extrapol.freq)
+export(get.classic.taxonomy)
+export(import.metabarcoding.data)
+export(import.ngsfilter.data)
+export(layer.names)
+export(m.bivariate)
+export(m.univariate)
+export(m.univariate.test)
+export(m.weight)
+export(map.extrapol.freq)
+export(marginalsum)
+export(metabarcoding.data)
+export(motus)
+export(normalize)
+export(rarefy)
+export(read.ngsfilter)
+export(read.obitab)
+export(reads)
+export(rmS3Class)
+export(rownames)
+export(samples)
+export(taxo.decider)
+export(threshold)
+export(threshold.mask)
+export(threshold.set)
+exportClasses(metabarcoding.data)
+exportMethods("$")
+exportMethods("$<-")
+exportMethods("[[")
+exportMethods("[[<-")
+exportMethods(colnames)
+exportMethods(rownames)
+import(ROBITaxonomy)
+import(igraph)
--- a/ROBITools/R/02_class_metabarcoding.data.R
+++ b/ROBITools/R/02_class_metabarcoding.data.R
@@ -0,0 +1,539 @@
+#' @include ROBITools.R
+#' @include s3objects.R
+#' @import ROBITaxonomy 
+NULL
+
+require(ROBITaxonomy)
+
+#
+# FOR THE DEVELOPPER : we have to check that the code doesn't relies on the
+#                      fact that the xx@samples$sample column is not always 
+#                      identical to the rownames(xx@samples)
+
+setClassUnion("characterOrNULL",c("character","NULL"))
+setClassUnion("matrixOrfactorL",c("matrix","factor"))
+
+#
+# We specialize data.frame in two subclasses motus.frame and samples.frame
+# for this we add to function insuring the type checking and the cast from
+# data.frame
+#
+
+is.motus.frame= function(x) any(class(x)=="motus.frame")
+is.samples.frame= function(x) any(class(x)=="samples.frame")
+
+as.motus.frame= function(x) {
+  if (! is.data.frame(x))
+    stop("only cast from data.frame is allowed")
+  if (! is.motus.frame(x))
+    x = addS3Class(x,"motus.frame")
+  
+  return(x)
+}
+
+
+as.samples.frame= function(x) {
+  if (! is.data.frame(x))
+    stop("only cast from data.frame is allowed")
+  if (! is.samples.frame(x))
+    x = addS3Class(x,"samples.frame")
+  return(x)
+}
+
+samples.frame=as.samples.frame
+motus.frame=as.motus.frame
+
+as.factor.or.matrix = function(x) {
+  if (is.matrix(x))
+    return(x)
+  
+  if (is.factor(x)){
+    if (length(dim(x))!=2)
+      stop('Just factor with two dimensions are allowed')
+    return(x)
+  }
+  
+  if (!is.data.frame(x))
+    stop('Just matrix, 2D factor and data.frame can be casted')
+  
+  tps   = sapply(x,class)
+  allna = sapply(x, function(y) all(is.na(y)))
+  
+  if (all(tps==tps[[1]] | allna)) {
+    tps = tps[[1]] 
+  }
+  else
+    stop('all the column of the data.frame must have the same type')
+
+  tps = tps[[1]]
+  
+  x = as.matrix(x)
+  dx = dim(x)
+  if (tps=='factor')
+    x = factor(x)
+    dim(x)=dx
+  
+  return(x)
+}
+
+#' DNA metabarcoding experiment description class
+#' 
+#' A S4 class describing a DNA metabarcoding experiment. It groups
+#' three data frames describing samples, motus and occurrences of 
+#' MOTUs per sample
+#'
+#'@section Slots: 
+#'  \describe{
+#'    \item{\code{reads}:}{Matrix of class \code{"numeric"},  
+#'                         containing the counts of reads per samples
+#'                         \itemize{
+#'                           \item{1 samples per line}
+#'                           \item{1 sequence per column}
+#'                         }
+#'                        }
+#'                        
+#'    \item{\code{samples}:}{Object of class \code{"data.frame"}, describing samples
+#'                         \itemize{
+#'                           \item{1 samples per line}
+#'                           \item{1 property per column}
+#'                         }
+#'                       }
+#'                       
+#'    \item{\code{motus}:}{Object of class \code{"data.frame"}, describing MOTUs (sequences)
+#'                         \itemize{
+#'                           \item{1 MOTU per line}
+#'                           \item{1 property per column}
+#'                         }
+#'                       }
+#'                       
+#'    \item{\code{layers}:}{Object of class \code{"list"}, containing a set of data layers 
+#'                          linking motus and samples. Each element of the list is a matrix
+#'                          of the same size than the \code{read} slot with
+#'                         \itemize{
+#'                           \item{1 samples per line}
+#'                           \item{1 sequence per column}
+#'                         }
+#'                       }
+#'        
+#'    \item{\code{scount}:}{Object of class \code{"integer"}, containing the count of sample}             
+#'        
+#'    \item{\code{mcount}:}{Object of class \code{"integer"}, containing the count of MOTUs}             
+#'        
+#'    \item{\code{sample.margin}:}{Vector of class \code{"numeric"},  describing the total count of 
+#'                          sequence per sample. By default this slot is set by applying sum
+#'                         to the reads data.frame lines}             
+#'
+#'    \item{\code{taxonomy}:}{Object of class \code{"taxonomy.obitools"}, linking the DNA metabarcoding
+#'                            experiment to a taxonomy}             
+#'        
+#'    \item{\code{taxid}:}{Vector of class \code{"character"}, list of MOTUs' attributes to manage as taxid}             
+#'  }
+#'
+#' @seealso \code{\link{taxonomy.obitools}},
+#' @name metabarcoding.data
+#' @rdname metabarcoding-data-class
+#' @keywords DNA metabarcoding
+#' @author Eric Coissac
+#' @exportClass metabarcoding.data
+
+setClass("metabarcoding.data",
+		
+		
+		#
+		# Attribute declaration
+		#
+		
+		 representation(reads         = "matrix", 
+						        samples       = "data.frame",
+  		  		        motus         = "data.frame",
+        						layers        = "list",
+    				        scount        = "integer",
+        						mcount        = "integer",
+ 				 		        sample.margin = "numeric",
+ 						        taxonomy      = "obitools.taxonomyOrNULL",
+						        taxid         = "characterOrNULL"
+						),
+				
+		#
+		# Check object structure 
+		#
+				
+		validity = function(object) { 
+			
+						## object : nom reserve !
+			
+			#
+			# Check that reads / samples and motus data.frames
+			# have compatible sizes
+			#
+			#   reads line count   = samples line count
+			#   reads column count = motus   line count
+			
+			rsize = dim(object@reads)
+			ssize = dim(object@samples)
+			msize = dim(object@motus)
+			csize = length(object@sample.margin)
+			
+			if (rsize[1] != ssize[1] & 
+				rsize[2] != msize[1] &
+				rsize[1] != csize)
+				return(FALSE)
+			
+			
+			# if no layer, object is ok
+			
+			if (length(object@layers)==0)
+				return(TRUE)
+			
+			# otherwise we check the size of each layer as we
+			# did for reads
+			
+			return(! any(sapply(object@layers, 
+					   function(l) any(dim(l)!=c(ssize[1],msize[1])))))
+			
+		}
+)
+
+
+
+#
+#' metabarcoding.data constructor
+#' 
+#' @docType methods
+#' @rdname initialize-methods
+#' @aliases initialize-methods,metabarcoding.data
+setMethod("initialize",
+		  "metabarcoding.data",
+			function(.Object, reads,samples,motus,
+					          taxonomy=NULL,taxid=NULL,
+							  sample.margin=NA,
+							  layers=list()) {
+        
+        rn = rownames(reads)
+        cn = colnames(reads)
+        
+				.Object@reads   <- reads
+        
+				# .Object@samples <- as.samples.frame(samples)
+        .Object@samples <- samples
+        row.names(.Object@samples) = rn
+        
+				#.Object@motus   <- as.motus.frame(motus)
+        .Object@motus   <- motus
+        row.names(.Object@motus) = cn
+        
+        
+        # Set colnames and rownames to each layers
+        layers = lapply(layers, function(x) {colnames(x)=cn 
+                                             rownames(x)=rn 
+                                             return(x)})
+		.Object@layers   <- layers
+        				
+				# Precompute sample count and motu count
+				
+		.Object@scount = dim(.Object@samples)[1]
+		.Object@mcount = dim(.Object@motus)[1]
+				
+		.Object@taxonomy = taxonomy
+		.Object@taxid = taxid
+				
+		if (is.null(sample.margin))
+			.Object@sample.margin = rowSums(reads)
+		else
+			.Object@sample.margin = sample.margin
+        
+        names(.Object@sample.margin) = rn
+				
+		validObject(.Object) ## valide l'objet
+        
+		return(.Object)
+		})
+
+
+#
+# metabarcoding.data getters
+#
+
+#' @export
+setGeneric("reads", function(obj) {
+			return(standardGeneric("reads"))
+		})
+
+#' Extracts the matrix describing MOTUs abondances
+#' 
+#' Extract the the matrix describing MOTUs abondances (read counts) 
+#' from a \code{\link{metabarcoding.data}} instance.
+#' 
+#' @param   obj a \code{\link{metabarcoding.data}} instance
+#' @return  a matrix containing data about reads 
+#' 
+#' @examples
+#' # load termite data set from the ROBITools sample data
+#' data(termes)
+#' 
+#' # Extract the matrix describing MOTUs abondances
+#' d = reads(termes)
+#' 
+#' head(d)
+#' 
+#' @seealso \code{\link{metabarcoding.data}},
+#'          \code{\link{motus}}, \code{\link{samples}}
+#'          
+#' @docType methods
+#' @rdname read-methods
+#' @aliases read-methods,metabarcoding.data
+#' @author Eric Coissac
+#' 
+setMethod("reads", "metabarcoding.data", function(obj) {
+			return(obj@reads)
+		})
+
+
+# get samples data.frames
+
+#' @export
+setGeneric("samples", function(obj) {
+			return(standardGeneric("samples"))
+		})
+
+#' Extracts the samples description data.frame
+#' 
+#' Extract the sample description data.frame from a 
+#' \code{\link{metabarcoding.data}} instance.
+#' 
+#' @param   obj a \code{\link{metabarcoding.data}} instance
+#' @return  a data.frame containing data about sample 
+#' 
+#' @examples
+#' # load termite data set from the ROBITools sample data
+#' data(termes)
+#' 
+#' # Extract the data frame describing samples
+#' d = samples(termes)
+#' 
+#' head(d)
+#' 
+#' @seealso \code{\link{metabarcoding.data}},
+#'          \code{\link{motus}}, \code{\link{reads}}
+#'          
+#' @docType methods
+#' @rdname samples-methods
+#' @aliases samples-methods,metabarcoding.data
+#' @author Eric Coissac
+#' 
+setMethod("samples", "metabarcoding.data", function(obj) {
+			return(obj@samples)
+		})
+
+
+#' @export
+setGeneric("motus", function(obj) {
+			return(standardGeneric("motus"))
+		})
+
+#' Extracts the MOTU descriptions \code{data.frame}
+#' 
+#' Extract the MOTUs description \code{data.frame} from a 
+#' \code{\link{metabarcoding.data}} instance.
+#' 
+#' @param   obj a \code{\link{metabarcoding.data}} instance
+#' @return  a data.frame containing data about MOTU 
+#' 
+#' @examples
+#' # load termite data set from the ROBITools sample data
+#' data(termes)
+#' 
+#' # Extract the data.frame describing MOTUs
+#' d = motus(termes)
+#' 
+#' head(d)
+#' 
+#' @seealso \code{\link{metabarcoding.data}},
+#'          \code{\link{reads}}, \code{\link{samples}}
+#'          
+#' @docType methods
+#' @rdname motu-methods
+#' @aliases motu-methods,metabarcoding.data
+#' 
+setMethod("motus", "metabarcoding.data", function(obj) {
+			return(obj@motus)
+		})
+
+
+# get sample count 
+
+setGeneric("sample.count", function(obj) {
+			return(standardGeneric("sample.count"))
+		})
+
+setMethod("sample.count", "metabarcoding.data", function(obj) {
+			return(obj@scount)
+		})
+
+# get motu count 
+
+setGeneric("motu.count", function(obj) {
+			return(standardGeneric("motu.count"))
+		})
+
+setMethod("motu.count", "metabarcoding.data", function(obj) {
+			return(obj@mcount)
+		})
+
+# dim method
+
+setMethod("dim", "metabarcoding.data", function(x) {
+			return(c(x@scount,x@mcount))
+		})
+
+
+setMethod('[', "metabarcoding.data", function(x,i=NULL,j=NULL,...,drop=TRUE) {
+			
+      # special case if samples are not specified (dimension 1)
+			if (!hasArg(i))
+				i = 1:x@scount
+			
+			# special case if motus are not specified (dimension 2)
+			if (!hasArg(j))
+				j = 1:x@mcount
+
+      # special case if the layer attribut is specified
+			args = list(...)
+			
+			if (!is.null(args$layer))
+          return(x[[args$layer]][i,j])	
+      
+      #####################
+      #
+      # normal case 
+      #
+			  
+			r = x@reads[i,j,drop=FALSE]
+      
+			if (sum(dim(r) > 1)==2 | ! drop)
+			{
+        
+        # we do the selection on the motus and samples description data.frame
+        
+				m = x@motus[j,,drop=FALSE]
+				s = x@samples[i,,drop=FALSE]
+        
+        # we do the selection on each layers
+        l = lapply(x@layers,function(l) l[i,j,drop=FALSE])
+        
+				newdata = copy.metabarcoding.data(x, reads=r, samples=s, motus=m, layers=l)
+			}
+			else
+			{
+				newdata = as.numeric(x@reads[i,j])
+			}
+			
+			return(newdata)
+			
+		})
+
+setMethod('[<-', "metabarcoding.data",
+          function (x, i, j, ..., value) {
+            if (!hasArg(i))
+              i = 1:x@scount
+            
+            if (!hasArg(j))
+              j = 1:x@mcount
+            
+            args = list(...)
+            
+            if (is.null(args$layer))
+              x@reads[i, j]=value
+            else
+              
+              x[[args$layer]][i,j]=value
+            
+            return(x)
+          })
+
+	
+
+#################################################
+#
+# User interface function to create 
+# metabarcoding.data objects
+#
+#################################################
+
+#'@export
+metabarcoding.data = function(reads,samples,motus,
+							  taxonomy=NULL,taxid=NULL,
+							  sample.margin=NULL,
+                layers=list()) {
+	rd = new('metabarcoding.data',
+			reads=reads,
+			samples=samples,
+			motus=motus,
+			taxonomy=taxonomy,
+			taxid=taxid,
+			sample.margin=sample.margin,
+      layers=layers
+	)
+	
+	return(rd)
+}
+
+copy.metabarcoding.data = function(data, 
+                                   reads=NULL,
+                                   samples=NULL,motus=NULL,
+										               taxonomy=NULL,taxid=NULL,
+										               sample.margin=NULL,
+                                   layers=NULL) {
+    
+    
+    
+		if (is.null(reads))
+			reads = data@reads
+		
+		if (is.null(samples))
+			samples = data@samples
+		
+		if (is.null(motus))
+			motus = data@motus
+		
+		if (is.null(taxonomy))
+			taxonomy = data@taxonomy
+		
+		if (is.null(taxid))
+			taxid = data@taxid
+		
+		if (is.null(sample.margin))
+			sample.margin = data@sample.margin
+		
+		if (is.null(layers))
+		  layers = data@layers
+		
+		
+		rd = new('metabarcoding.data',
+				reads=reads,
+				samples=samples,
+				motus=motus,
+				taxonomy=taxonomy,
+				taxid=taxid,
+				sample.margin=sample.margin,
+        		layers=layers
+		)
+		
+		return(rd)
+}
+
+#' @export
+setGeneric('rownames')
+
+#' @export
+setMethod("rownames", "metabarcoding.data", function(x, do.NULL = TRUE, prefix = "col") {
+  return(rownames(x@reads,do.NULL,prefix))
+})
+
+#' @export
+setGeneric('colnames')
+
+#' @export
+setMethod("colnames", "metabarcoding.data", function(x, do.NULL = TRUE, prefix = "col") {
+  return(colnames(x@reads,do.NULL,prefix))
+})
--- a/ROBITools/R/ROBITools.R
+++ b/ROBITools/R/ROBITools.R
@@ -0,0 +1,33 @@
+#' A package to manipulate DNA metabarcoding data.
+#' 
+#' This package was written as a following of the OBITools. 
+#' 
+#' \tabular{ll}{
+#'  Package: \tab ROBITools\cr
+#'  Type: \tab Package\cr
+#'  Version: \tab 0.1\cr
+#'  Date: \tab 2013-06-27\cr
+#'  License: \tab CeCILL 2.0\cr
+#'  LazyLoad: \tab yes\cr
+#'}
+#' 
+#' @name ROBITools-package
+#' @aliases ROBITools
+#' @docType package
+#' @title  A package to manipulate DNA metabarcoding data.
+#' @author Frederic Boyer
+#' @author Aurelie Bonin
+#' @author Lucie Zinger
+#' @author Eric Coissac
+#' 
+#' @references http://metabarcoding.org/obitools
+#' 
+NA
+
+.onLoad <- function(libname, pkgname) { 
+  
+  packageStartupMessage( "ROBITools package" )
+  #print(getwd())
+  
+}
+
--- a/ROBITools/R/aggregate.R
+++ b/ROBITools/R/aggregate.R
@@ -0,0 +1,229 @@
+#' @include 02_class_metabarcoding.data.R
+NULL
+
+# TODO: Add comment
+# 
+# Author: coissac
+###############################################################################
+
+#' @export
+aggregate.metabarcoding.data=function(x, by, FUN,..., 
+		                              MARGIN='sample',
+									  default.layer=NULL,
+									  layers=NULL) {	
+  
+	uniq.value = function(z) {
+		
+		if (is.null(z) | 
+				any(is.na(z)) |
+				length(z)==0)
+			ans = NA
+		else {
+      if (all(z==z[1]))
+			  ans = z[1]
+		  else
+			  ans = NA
+		}
+		if (is.factor(z))
+			ans = factor(ans,levels=levels(z))
+		
+		return(ans)
+	}			
+	
+	
+	#
+	# Deals with the supplementaty aggregate arguments
+	#
+			
+	if (is.null(default.layer))
+		default.layer=uniq.value
+	
+	
+	if (is.null(layers)) {
+		layers = as.list(rep(c(default.layer),length(x@layers)))
+		names(layers)=layer.names(x)
+	}
+	else {
+		for (n in layer.names(x))
+			if (is.null(layers[[n]]))
+				layers[[n]]=default.layers
+	}
+	
+	if (MARGIN == 'sample')
+		MARGIN=1
+	
+	if (MARGIN == 'motu')
+		MARGIN=2
+	
+	reads = x@reads
+	  
+	if (MARGIN==1) {
+		# prepare the aggrevation arguments for the read table
+		# from the function arguments
+	  dotted = list(...)
+	  if (length(dotted) > 0)
+	    aggr.args = list(reads,by=by,FUN=FUN,...=dotted,simplify=FALSE)
+	  else
+	    aggr.args = list(reads,by=by,FUN=FUN,simplify=FALSE)
+	  		
+		# Aggregate the read table
+		ragr = do.call(aggregate,aggr.args)
+		
+		# extrat new ids from the aggregated table
+		ncat = length(by)
+		ids  = as.character(interaction(ragr[,1:ncat,drop=FALSE]))
+		
+		# remove the aggregations modalities to rebuild a correct
+		# reads table
+		ragr = as.matrix(ragr[,-(1:ncat),drop=FALSE])
+		dragr= dim(ragr)
+		cragr= colnames(ragr)
+		ragr = as.numeric(ragr)
+		dim(ragr)=dragr
+		colnames(ragr)=cragr		
+		rownames(ragr)=ids
+		
+		#
+		# Apply the same aggragation to each layer
+		#
+		
+		ln = layer.names(x)
+		
+    la = vector(mode="list",length(ln))
+    names(la)=ln
+    
+		for (n in ln) {
+			f = layers[[n]]
+			if (is.factor(x[[n]])){
+			  isfact = TRUE
+			  lf = levels(x[[n]])
+			  df = dim(x[[n]])
+			  m = matrix(as.character(x[[n]]))
+			  dim(m)=df
+			}
+			else
+			  m = x[[n]]
+
+      aggr.args = list(m,by=by,FUN=f,simplify=FALSE)
+			lagr = do.call(aggregate,aggr.args)
+			lagr = as.factor.or.matrix(lagr[,-(1:ncat),drop=FALSE])
+
+			if (isfact){
+			  df = dim(lagr)
+			  lagr = factor(lagr,levels=lf)
+			  dim(lagr)=df
+			}
+			
+      rownames(lagr)=ids
+			la[[n]]=lagr
+		}
+		
+		# aggragate the sample table according to the same criteria
+		#
+		# TODO: We have to take special care of factors in the samples
+		#       data.frame
+		
+		sagr = aggregate(samples(x),by,uniq.value,simplify=FALSE)
+		
+		# move the first columns of the resulting data frame (the aggregations
+		# modalities to the last columns of the data.frame
+		sagr = sagr[,c((ncat+1):(dim(sagr)[2]),1:ncat),drop=FALSE]
+		larg = c(lapply(sagr,unlist),list(stringsAsFactors=FALSE))	
+		sagr = do.call(data.frame,larg)
+		
+		# set samples ids to the ids computed from modalities
+		sagr$id=ids
+		rownames(sagr)=ids
+		
+		# build the new metabarcoding data instance
+		newdata = copy.metabarcoding.data(x,reads=ragr,samples=sagr)
+		
+	}
+	else {
+		# prepare the aggregation arguments for the read table
+		# from the function arguments
+		# BECARFUL : the reads table is transposed
+		#            standard aggregate runs by row and we want
+		# 			 aggregation by column
+    
+    dotted = list(...)
+    if (length(dotted) > 0)
+		  aggr.args = list(t(reads),by=by,FUN=FUN,...=dotted,simplify=FALSE)
+    else
+      aggr.args = list(t(reads),by=by,FUN=FUN,simplify=FALSE)
+    
+		
+    # Aggregate the read table
+		ragr = do.call(aggregate.data.frame,aggr.args)
+		
+		# extrat new ids from the aggregated table
+		ncat = length(by)
+		ids  = as.character(interaction(ragr[,1:ncat,drop=FALSE]))
+		
+		# remove the aggregations modalities to rebuild a correct
+		# reads table
+    
+		ragr = t(ragr[,-(1:ncat),drop=FALSE])		
+    dragr= dim(ragr)
+    rragr= rownames(ragr)
+    ragr = as.numeric(ragr)
+    dim(ragr)=dragr
+    colnames(ragr)=ids
+    rownames(ragr)=rragr
+		
+		#
+		# Apply the same aggragation to each layer
+		#
+		
+		ln = layer.names(x)
+    
+    la = vector(mode="list",length(ln))
+    names(la)=ln
+    
+    for (n in ln) {
+			f = layers[[n]]
+      
+      if (is.factor(x[[n]])){
+			  isfact = TRUE
+        lf = levels(x[[n]])
+        df = dim(x[[n]])
+        m = matrix(as.character(x[[n]]))
+        dim(m)=df
+      }
+      else
+          m = x[[n]]
+
+      aggr.args = list(t(m),by=by,FUN=f,simplify=FALSE)
+			lagr = do.call(aggregate,aggr.args)
+			lagr = t(as.factor.or.matrix(lagr[,-(1:ncat),drop=FALSE]))
+      
+      if (isfact){
+        df = dim(lagr)
+        lagr = factor(lagr,levels=lf)
+        dim(lagr)=df
+			}
+
+      colnames(lagr)=ids
+			la[[n]]=lagr
+		}
+    
+		# aggragate the motus table according to the same criteria
+		magr = aggregate(motus(x),by,uniq.value,simplify=FALSE)
+		
+		# move the first columns of the resulting data frame (the aggregations
+		# modalities to the last columns of the data.frame
+		magr = magr[,c((ncat+1):(dim(magr)[2]),1:ncat),drop=FALSE]
+		larg = c(lapply(magr,unlist),list(stringsAsFactors=FALSE))	
+		magr = do.call(data.frame,larg)
+		
+		# set motus ids to the ids computed from modalities
+		magr$id=ids
+		rownames(magr)=ids
+		
+		# build the new metabarcoding data instance
+		newdata = copy.metabarcoding.data(x,reads=ragr,motus=magr,layers=la)
+	}
+	
+	return(newdata)			
+}
+
--- a/ROBITools/R/choose.taxonomy.R
+++ b/ROBITools/R/choose.taxonomy.R
@@ -0,0 +1,107 @@
+#' @import ROBITaxonomy 
+#' @include 02_class_metabarcoding.data.R
+NULL
+
+#' Choose between databases for taxonomic classifications
+#' 
+#' Chooses a sequence taxonomic assignment in order of preference for the different 
+#' reference databases that have been used when the assignment is above a certain threshold
+#' 
+#' 
+#' @param x a \code{\link{metabarcoding.data}} object
+#' @param taxonomy a \code{\linkS4class{taxonomy.obitools}} instance
+#' @param dbrank string or vector indicating reference database names ranked by order of preference
+#' @param thresh a best_identity threshold for applying priority. Default is \code{0.95}
+#' 
+#' @return returns a data.frame with the refined taxonomic assignement and classic taxonomy description.
+#' 
+#' @examples
+#' 
+#' data(termes)
+#' 
+#' taxo=default.taxonomy()
+#' 
+#' #create artificial taxonomic assignments
+#' attr(termes, "motus")["best_identity:DB1"] = sample(seq(0.5,1,0.001),size=nrow(termes$motus), replace=T)
+#' attr(termes, "motus")["best_identity:DB2"] = sample(seq(0.5,1,0.001),size=nrow(termes$motus), replace=T)
+#' attr(termes, "motus")["best_identity:DB3"] = sample(seq(0.5,1,0.001),size=nrow(termes$motus), replace=T)
+#' attr(termes, "motus")["taxid_by_db:DB1"] = termes$motus$taxid
+#' attr(termes, "motus")["taxid_by_db:DB2"] = sample(termes$motus$taxid,size=nrow(termes$motus), replace=F)
+#' attr(termes, "motus")["taxid_by_db:DB3"] = sample(termes$motus$taxid,size=nrow(termes$motus), replace=F)
+#' 
+#' #Run taxo.decider
+#' termes.ok = taxo.decider(termes, taxo, "DB2", 0.95)
+#' head(termes.ok$motus[union(grep("DB",  colnames(termes.ok$motus)), grep("_ok", colnames(termes.ok$motus)))])
+#' 
+#' termes.ok = taxo.decider(termes, taxo, c("DB3", "DB1"), 0.95)
+#' head(termes.ok$motus[union(grep("DB",  colnames(termes.ok$motus)), grep("_ok", colnames(termes.ok$motus)))])
+#' 
+#' #Quick look at the enhancement in taxonomic assignements
+#' par(mfrow=c(1,4))
+#' for(i in grep("best_identity.", colnames(termes.ok$motus))){
+#' hist(termes.ok$motus[,i], breaks=20, ylim=c(1,21), main=colnames(termes.ok$motus)[i], xlab="assignment score")
+#' }
+#'     
+#' @seealso \code{\linkS4class{taxonomy.obitools}}, and methods \code{\link{species}},\code{\link{genus}}, \code{\link{family}},\code{\link{kingdom}},
+#'          \code{\link{superkingdom}},\code{\link{taxonatrank}}, \code{\link{taxonmicank}}
+#'
+#' @author Lucie Zinger
+#' @keywords taxonomy
+#' 
+#' @export
+#' 
+
+taxo.decider = function(x, taxonomy, dbrank, thresh=0.95) {
+  
+  noms = colnames(x$motus)
+  best_ids_names = noms[grep("best_identity.", noms)]
+  best_ids = x$motus[,best_ids_names]
+  taxids = x$motus[, gsub("best_identity", "taxid_by_db", best_ids_names)]
+  dbs = unlist(lapply(strsplit(best_ids_names, "\\:"), "[[", 2))
+  
+  
+  #Set max indices
+  ind = as.vector(t(apply(best_ids,1,function(y) order(rank(-y, ties.method="max"), match(dbrank, dbs))))[,1])
+  
+  #Set default vector: db, bestids, taxids with max score
+  db_ok = dbs[ind]
+  best_identity_ok = best_ids[cbind(1:length(ind), ind)]
+  taxids_by_db_ok = taxids[cbind(1:length(ind), ind)]
+  
+  #Get vector of db index that should be used according to condition > thresh
+  db_choice = taxo.decider.routine(dbrank, best_ids, dbs, thresh)
+  
+  #Replacing by right values according to db_ok
+  for(i in 1:length(dbrank)){
+    db_ok[which(db_choice==i)] = dbrank[i]
+    best_identity_ok[which(db_choice==i)] = best_ids[which(db_choice==i),grep(dbrank[i], colnames(best_ids))]
+    taxids_by_db_ok[which(db_choice==i)] = taxids[which(db_choice==i),grep(dbrank[i], colnames(taxids))]
+  }
+  
+  decision = data.frame(db_ok, best_identity_ok, taxids_by_db_ok)
+  
+  coltaxid = colnames(decision)[grep("taxid", colnames(decision))]
+  
+  attr(x, "motus") = data.frame(x$motus, decision)
+  new.tax = get.classic.taxonomy(x, taxonomy, coltaxid)
+  
+  attr(x, "motus") = data.frame(x$motus, new.tax)
+  
+  return(x)  
+}
+
+
+taxo.decider.routine = function(dbrank, best_ids, dbs, thresh) {
+  #Setting mask 
+  mask = matrix(NA,nrow(best_ids),length(dbrank))
+  colnames(mask)=dbrank
+  #For each DB, see if condition T/F
+  for(i in dbrank){
+    mask[,i] = best_ids[,which(dbs==i)]>thresh
+  }
+  #Get the first occurence of T in the table
+  out = apply(mask, 1, function(x) which(x==T)[1])
+  return(out)
+}
+
+
--- a/ROBITools/R/contaslayer.R
+++ b/ROBITools/R/contaslayer.R
@@ -0,0 +1,49 @@
+#' @include 02_class_metabarcoding.data.R
+NULL
+
+#' Detects contaminants in metabarcoding data
+#' 
+#' Detects sequences/motus in a \code{\link{metabarcoding.data}} object
+#' for which frequencies over the entire dataset are maximum in negative controls and 
+#' hence, most likely to be contaminants. 
+#' 
+#' 
+#' @param x a \code{\link{metabarcoding.data}} object
+#' @param controls a vector of samples names where conta are suspected to be detected 
+#'                 (typically negative control names).
+#' @param clust a vector for grouping sequences. Default set to \code{NULL}.
+#'
+#' @return a vector containing the names of sequences identified as contaminants
+#'
+#' @examples
+#' 
+#' data(termes)
+#' termes.ok = termes[,colSums(termes$reads)>0]
+#' neg = rownames(termes.ok)[grep("r",rownames(termes.ok))]
+#' 
+#' #finds contaminants based on neg samples
+#' contaslayer(termes.ok, neg)
+#' 
+#' # extanding contamininant detection with grouping factor, 
+#' # typically obiclean/sumatra cluster or taxonomy membership
+#' contaslayer(termes.ok, neg, termes.ok$motus$scientific_name)
+#'   
+#' @seealso \code{\link{threshold}} for further trimming
+#' @author Lucie Zinger
+#' @export
+
+contaslayer = function(x,controls,clust=NULL){
+  
+  x.fcol = normalize(x, MARGIN=2)$reads
+  x.max = rownames(x.fcol[apply(x.fcol, 2, which.max),])
+  conta = colnames(x)[!is.na(match(x.max,controls))]
+  
+  if (length(clust)!=0) {
+    agg = data.frame(conta.id=colnames(x.fcol), clust)
+    conta.ext = agg$conta.id[which(!is.na(match( agg$clust, agg$clust[match(conta,agg$conta.id)])))]
+    return(as.vector(conta.ext))
+  } 
+  else {
+    return(conta)
+  }
+}  
--- a/ROBITools/R/distrib.extrapol.R
+++ b/ROBITools/R/distrib.extrapol.R
@@ -0,0 +1,178 @@
+#' @include 02_class_metabarcoding.data.R
+NULL
+
+#' Read frequencies krigging
+#' 
+#' Extrapolates read frequencies from a \code{\link{metabarcoding.data}} object in space for a finer resolution 
+#' 
+#' @param x a vector or matrix from a row-normalized read table 
+#'          \code{\link{metabarcoding.data}} object
+#' @param min.coord a vector of length = 2 indicating the minimum values of x and y
+#'                  coordinates to be used for the predicted grid
+#' @param max.coord a vector of length = 2 indicating the maximum values of x and y
+#'                  coordinates to be used for the predicted grid
+#' @param grid.grain an integer indicating the resolution (i.e. nb of subpoints) in x and y
+#'                  coordinates required for the predicted grid
+#' @param coords a dataframe containing the x and y coordinates of the abundances 
+#'               from x to be extrapolated.
+#' @param otus.table a motus data.frame containing motus informations of x
+#' @param cutoff a cutoff below which abundances are set to 0. 
+#'               This threshold also determines the value to be added to 0 values for log10 
+#'               transformation
+#' @param return.metabarcoding.data if \code{TRUE}, returns a \code{\link{metabarcoding.data}} object. Default is \code{FALSE}
+#'
+#' @return either a dataframe or a S3 object with a structure similar to \code{\link{metabarcoding.data}} object. 
+#'         The number of samples corresponds to the predicted points.
+#'         The two last columns (if \code{return.metabarcoding.data==F}) or sample data.frame contains x y coordinates of the predicted grid
+#'         The all but last two columns (if \code{return.metabarcoding.data==F}) or read matrix contains the predicted log10 transformed relative abundances 
+#'         instead of reads counts
+#'         If \code{return.metabarcoding.data==F} the motus data.frame contains the motus informations from x
+#'
+#' @examples
+#' 
+#' data(termes)
+#' #Create dummy spatial coordinates
+#' attr(termes, "samples")[c("x", "y")] = expand.grid(1:7,1:3)
+#' 
+#' #compute frequencies
+#' attr(termes, "layers")[["reads.freq"]] = normalize(termes, MARGIN=1)$reads
+#' 
+#' # Getting extrapolations
+#' termes.pred = extrapol.freq(attr(termes, "layers")[["reads.freq"]], min.coord=c(1,1), max.coord=c(7,3), 
+#'                             grid.grain=100,termes$samples[,c("x", "y")], termes$motus, cutoff=1e-3)
+#'
+#' head(termes.pred$reads)
+#' @seealso \code{\link{map.extrapol.freq}} as well as \code{sp} and \code{gstat} packages
+#' @author Lucie Zinger
+#' @export
+
+extrapol.freq = function(x, min.coord, max.coord, grid.grain=100, coords, otus.table, cutoff=1e-3, return.metabarcoding.data = FALSE) {
+  require(gstat)
+  require(sp)
+  
+  #predicted grid setting
+  new.x = seq(min.coord[1], max.coord[1], length.out = grid.grain)
+  new.y = seq(min.coord[2], max.coord[2], length.out = grid.grain)
+  grid.p=expand.grid(new.x, new.y)
+  colnames(grid.p)=c("x", "y")
+  S=sp::SpatialPoints(grid.p); sp::gridded(S)<-TRUE
+  m=gstat::vgm(50, "Exp", 100)
+  
+  #krigging
+  preds = apply(x, 2, function(otu) {
+    otu[otu<cutoff] = cutoff
+    spj=cbind(coords,otu)
+    colnames(spj)=c("x", "y", "otu")
+    spj.g=gstat::gstat(id="Log10.freq", formula=log10(otu)~1,locations=~x+y,data=spj,model=m)
+    gstat::predict.gstat(spj.g, grid.p, quiet=T)$Log10.freq.pred
+  })
+  
+  #formatting the output
+  colnames(preds) = rownames(otus.table)
+  rownames(preds) = paste("s", 1:nrow(grid.p), sep=".")
+  row.names(grid.p) = rownames(preds)
+  
+  if(return.metabarcoding.data==F) {
+    out = data.frame(preds, grid.p)
+  } else{ 
+    out = metabarcoding.data(preds, grid.p, otus.table)
+  }
+  return(out)
+}
+
+
+#' Maps of krigged log10-transformed frequencies
+#' 
+#' Maps the output of extrapol.freq
+#' 
+#' 
+#' @param x an extrapol.freq output
+#' @param path the path of the folder to export the map. Default is \code{NULL} and map is printed in Rplot/quartz
+#' @param col.names a vector containing the names of the columns to be used for defining the file name. Typically
+#'                  the column names containing the taxonomic information and/or sequence/motus id.
+#' @param index  an integer indicating column number of the motu/sequence to be plotted.
+#' @param cutoff  lower motu frequency accepted to consider motu abundance as different
+#'                from 0. Should be the same than the one used in extrapol.freq
+#' @param add.points a 3-column data.frame containing factor levels and associated x and y coordinates
+#'                   to be added to the map. Typically taxa observed in the field.
+#' @param adj a value used for adjusting text position in the map. Default is \code{4}
+#'
+#' @return a map/png file displaying motus distribution.
+#'
+#' @examples
+#' 
+#' data(termes)
+#' attr(termes, "samples")[c("x", "y")] = expand.grid(1:7,1:3)
+#' 
+#' #compute frequencies
+#' attr(termes, "layers")[["reads.freq"]] = normalize(termes, MARGIN=1)$reads
+#' 
+#' # Getting extrapolations
+#' termes.pred = extrapol.freq(attr(termes, "layers")[["reads.freq"]], 
+#' grid.grain=100,termes$samples[,c("x", "y")], termes$motus, cutoff=1e-3)
+#' 
+#' #mapping the distribution of the 3 most abundant sequences (caution, mfrow does not work for lattice's levelplot)
+#' map.extrapol.freq(termes.pred, path=NULL, col.name=NULL, 1, cutoff=1e-3)
+#' map.extrapol.freq(termes.pred, path=NULL, col.name=NULL, 2, cutoff=1e-3)
+#' map.extrapol.freq(termes.pred, path=NULL, col.name=NULL, 3, cutoff=1e-3)
+#' 
+#' #dummy observationnal data
+#' termes.obs = data.frame(x=c(2,3,5), y=c(2.7,2,2.6), taxa = rep("Isoptera Apicotermitinae", 3))
+#' map.extrapol.freq(termes.pred, path=NULL, col.name=NULL, 3, cutoff=1e-3, add.points=termes.obs)
+#' 
+#' @seealso \code{\link{extrapol.freq}}, and \code{levelplot} from \code{lattice} package
+#' @author Lucie Zinger
+#' @export
+
+map.extrapol.freq = function(x, path=NULL, col.name=NULL, index, cutoff=1e-3, add.points=NULL, adj=4) {
+  
+  require(lattice)
+  
+  if(!is.null(path)) {
+    x.motus = apply(x$motus,2,as.character)
+    name = gsub("\\.", "_", paste(gsub(", ", "_", toString(x.motus[index,col.name])), x.motus[index,"id"], sep="_"))
+    file.out = paste(path, "/", name, ".png", sep="")
+  }
+  
+  z=x$reads[,index]
+  z[abs(z)>abs(log10(cutoff))]=log10(cutoff)
+  z[z>0] = 0
+  spj=as.data.frame(cbind(x$samples,z))
+  colnames(spj)=c("x", "y", "z")
+  
+  map.out=levelplot(z~x+y, spj, col.regions=topo.colors(100), 
+                    at=seq(log10(cutoff),log10(1), by=0.2), 
+                    colorkey=list(at=seq(log10(cutoff),log10(1), by=0.2),
+                                  labels=list(at=seq(log10(cutoff),log10(1), by=0.2),
+                                              labels=round(10^seq(log10(cutoff),log10(1), by=0.2),3))),
+                    aspect = "iso", contour=F, main=list(label=x$motus[index, "id"], cex=0.7))         
+  
+  if(!is.null(path)) {
+    png(file=file.out, width=800, height=800)  
+    print(map.out)
+    if(!is.null(add.points)) {
+      n = (max(spj[,"y"])-min(spj["y"]))/length(unique(spj[,"y"]))*adj
+      trellis.focus("panel", 1, 1, highlight=FALSE)
+      lpoints(add.points[,"x"], add.points[,"y"], cex=0.7, lwd=3, col="red")
+      ltext(add.points[,"x"], add.points[,"y"]+n, add.points[,-match(c("x", "y"), colnames(add.points))], col="red", cex=1.5)
+      trellis.unfocus()
+    }
+    dev.off() 
+    
+  } else {
+    print(map.out)
+    if(!is.null(add.points)) {
+      n = (max(spj[,"y"])-min(spj["y"]))/length(unique(spj[,"y"]))*adj
+      trellis.focus("panel", 1, 1, highlight=FALSE)
+      lpoints(add.points[,"x"], add.points[,"y"], cex=0.7, lwd=3, col="red")
+      ltext(add.points[,"x"], add.points[,"y"]+n, add.points[,-match(c("x", "y"), colnames(add.points))], col="red", cex=1)
+      trellis.unfocus()
+    }
+  }
+  
+}
+
+                                 
+                                 
+                                 
+                                 
--- a/ROBITools/R/experimental.section.R
+++ b/ROBITools/R/experimental.section.R
@@ -0,0 +1,206 @@
+#' @include 02_class_metabarcoding.data.R
+NULL
+
+#11.03.2011
+#L.Zinger
+
+#######################
+#function anosim.pw
+#######################
+#computes pairwise anosim computation
+#input:
+#dat: dissimilarity matrix
+#g: factor defining the grouping to test
+#permutations: nb of permutation to access anosim statistics
+#p.adjust.method: method of correction for multiple-testing
+#
+#output: a distance-like table containing:
+#in the upper triangle: the anosims R values
+#in the lower triangle: the adjusted p-values
+
+
+### start
+
+anosim.pw<-function(dat, g, permutations, p.adjust.method, ...) {
+	require(vegan)
+	#data.trasformation
+	dat<-as.matrix(dat)
+	g<-factor(g)
+	
+	#empty object for result storage
+	ano<-matrix(NA, nrow=nlevels(g), ncol=nlevels(g), dimnames=list(levels(g),levels(g)))
+	p.val.tmp<-NULL
+	#running anosims
+	for(i in 1:(nlevels(g)-1)) for(j in (i+1):nlevels(g)){
+			tmp<-anosim(as.dist(dat[c(which(g==levels(g)[i]),which(g==levels(g)[j])),
+									c(which(g==levels(g)[i]),which(g==levels(g)[j]))]),
+					c(rep(levels(g)[i], length(which(g==levels(g)[i]))),
+							rep(levels(g)[j], length(which(g==levels(g)[j])))), permutations)
+			ano[i,j]<-tmp$statistic
+			p.val.tmp<-append(p.val.tmp, tmp$signif)
+		}
+	
+	#p value correction for multiple comparison
+	p.val.tmp<-p.adjust(p.val.tmp, p.adjust.method )
+	
+	#put the corrected p values in the anosim table
+	tmp<-NULL
+	tmp2<-NULL
+	for(i in 1:(nlevels(g)-1)) for(j in (i+1):nlevels(g)){
+			tmp<-append(tmp,i)
+			tmp2<-append(tmp2,j)
+		}
+	for(i in 1:length(p.val.tmp)){
+		ano[tmp2[i],tmp[i]]<-p.val.tmp[i]}
+	
+	return(ano)
+}
+
+### end
+
+
+
+
+#23 Nov 2012
+#L.Zinger
+###################
+#function MOTUtable
+###################
+# Generates ready-to-use MOTU tables and basic statistics on samples (i.e. sequencing depth, raw richness, and invsimpson index)
+#input:
+#x: an obitable output (samples should be indicated as e.g. "sample.A01r" in column names)
+#y: the column name by which that data are to be aggregated. Should be e.g. "cluster" or "species_name"
+#outputs:
+#x.otu: the ready-to-use MOTU table
+#x.rawstats: basic statistics on samples
+
+### start
+
+MOTUtable<-function(x, y) {
+	
+	require(vegan)
+	nom<-as.character(substitute(x))
+	
+	tmp<-x[,c(grep(y, colnames(x)), grep("sample", colnames(x)))]
+	tmp2<-t(aggregate(tmp[,-1], by=list(tmp[,1]), sum))
+	x.otu<-tmp2[-1,]
+	colnames(x.otu)<-paste(y,tmp2[1,], sep=".")
+	
+	x.rawstats<-data.frame(Nb_ind=rowSums(x.otu), Raw_richness=specnumber(x.otu, MARGIN=1), Raw_eveness=diversity(x.otu, "invsimpson", MARGIN=1) )
+	#may have a pb in the rowSums depending on the R version (allows or not non-numeric)
+	
+	assign(paste(nom, y, sep="."),x.otu,env = .GlobalEnv)
+	assign(paste(nom, y, "rawstats", sep="."),x.rawstats,env = .GlobalEnv)
+}
+
+### end
+
+
+
+
+#26 Nov 2012
+#F.Boyer
+###################
+#function reads.frequency & filter.threshold
+###################
+#can be used to filter the table of reads to have the sequences that represents at least 95% of the total reads by sample
+#
+#e.g. reads.treshold(reads.frequency(metabarcodingS4Obj@reads), 0.95)
+
+
+filter.threshold <- function(v, threshold) {
+	o <- order(v, decreasing=T)
+	ind <- which(cumsum(as.matrix(v[o]))>threshold)
+	v[-o[seq(min(length(o), 1+length(o)-length(ind)))]] <- 0
+	v
+}
+
+reads.threshold  <- function (reads, threshold, by.sample=T) {
+	res <- apply(reads, MARGIN=ifelse(by.sample, 1, 2), filter.threshold, thr=threshold)
+	if (by.sample) res <- t(res)
+	data.frame(res)
+}
+
+reads.frequency <- function (reads, by.sample=T) {
+	res <- apply(reads, MARGIN=ifelse(by.sample, 1, 2), function(v) {v/sum(v)})
+	if (by.sample) res <- t(res)
+	data.frame(res)
+}
+
+
+#06 Jan 2013
+#F.Boyer
+###################
+#function removeOutliers
+###################
+#given a contengency table and a distance matrix
+#returns the list of samples that should be removed in order to have only 
+#distances below thresold
+#can't return only one sample
+#
+#e.g. intraBad <- lapply(levels(sample.desc$sampleName), function(group) {samples<-rownames(sample.desc)[sample.desc$sampleName==group]; removeOutliers(contingencyTable[samples,], thr=0.3, distFun = function(x) vegdist(x, method='bray'))})
+
+
+
+#require(vegan)
+removeOutliers <- function(m, thr=0.3, distFun = function(x) vegdist(x, method='bray') ) {
+	distMat <- as.matrix(distFun(m))
+	maxM <- max(distMat)
+	theBadGuys =c()
+	
+	while (maxM>thr) {
+		bad <- apply(distMat, MARGIN=1, function(row, maxM) {any(row==maxM)}, maxM=maxM)    
+		bad <- names(bad)[bad]
+		bad <- apply(distMat[bad,], MARGIN=1, mean)
+		badGuy <- names(bad)[bad==max(bad), drop=F][1]
+		
+		theBadGuys <- c(theBadGuys, badGuy)
+		
+		stillok <- rownames(distMat) != badGuy
+		distMat <- distMat[stillok, stillok, drop=F]
+		maxM <- max(distMat)
+	}
+	
+	if (length(theBadGuys) >= (nrow(m)-1)) {
+		theBadGuys <- rownames(m)    
+	}
+	theBadGuys
+}
+
+
+#31.05.2013
+#L.Zinger
+#getAttrPerS, a function allowing to get the values of a sequence attribute per sample
+#(e.g. best_identities, etc...) the output is a list with one dataframe per sample.
+#This dataframe contains:
+#	 first column (named as attr): the attribute value for each sequence present in the sample
+#	 second column (named weight): the corresponding number of reads in the sample
+
+getAttrPerS=function(x,attr){
+	#x: a metabarcoding object
+	#attr: a character object corresponding to the attribute
+	#for which values per sample are needed (should be equal to a colname in x@motus)
+	
+	if(class(x)[1]!= "metabarcoding.data") {
+		stop("x is not a metabarcoding S4 object")
+	}
+	
+	if(is.character(attr)==F) {
+		stop("attr is not a character object")
+	}
+	
+	x.motus = motus(x)
+	x.reads = reads(x)
+	
+	otu = apply(x.reads, 1, function(y) x.motus[match(names(y[which(y!=0)]),x.motus$id), grep(attr, colnames(x.motus))])
+	reads = apply(x.reads, 1, function(y) y[which(y!=0)])
+	
+	output = mapply(cbind, otu, reads)
+	output = lapply(output, function(y) {
+				colnames(y)=c(attr,"weight")
+				return(y)
+			})
+	return(output)
+}
+### end getAttrPerS
+
--- a/ROBITools/R/export-metabarcoding.R
+++ b/ROBITools/R/export-metabarcoding.R
@@ -0,0 +1,62 @@
+#' @include 02_class_metabarcoding.data.R
+NULL
+
+
+# TODO: Add comment
+# 
+# Author: coissac
+###############################################################################
+
+require(utils)
+
+expand.metabarcoding.data=function(data,minread=1) {
+	resultonesample=function(sample) {
+		mo= data@reads[sample,] >= minread
+		s = data@samples[rep(sample,sum(mo)),]
+		r = as.numeric(data@reads[sample,mo])
+		m = data@motus[mo,]
+		
+		result = data.frame(s,frequency=r,m, 
+				stringsAsFactors =FALSE,
+				row.names = NULL)
+		
+		result
+	}
+	
+	res = lapply(1:data@scount, resultonesample)
+	
+	do.call(rbind,res)	
+}
+
+#setGeneric("utils::write.csv")
+write.csv.metabarcoding.data = function(...) {
+	Call <- match.call(expand.dots = TRUE)
+	if (!is.null(Call[["minread"]])) {
+		minread = Call[["minread"]]
+		Call = Call[!names(Call)=="minread"]
+	}
+	else
+		minread = 1
+	data = eval.parent(Call[[2L]])
+	data = expand.metabarcoding.data(data,minread)
+	Call[[1L]] <- as.name("write.csv")
+	Call[[2L]] <- as.name("data")
+	eval(Call)
+}
+
+#setGeneric("utils::write.csv2")
+write.csv2.metabarcoding.data = function(...) {
+	Call <- match.call(expand.dots = TRUE)
+	if (!is.null(Call[["minread"]])) {
+		minread = Call[["minread"]]
+		Call = Call[!names(Call)=="minread"]
+	}
+	else
+		minread = 1
+	data = eval.parent(Call[[2L]])
+	data = expand.metabarcoding.data(data,minread)
+	Call[[1L]] <- as.name("write.csv2")
+	Call[[2L]] <- as.name("data")
+	eval(Call)
+	
+}
--- a/ROBITools/R/import.metabarcoding.R
+++ b/ROBITools/R/import.metabarcoding.R
@@ -0,0 +1,106 @@
+#' @include read.obitab.R
+#' @include 02_class_metabarcoding.data.R
+NULL
+
+#' Read a data file produced by the \code{obitab} command 
+#' 
+#' Read a data file issued from the conversion of a \strong{fasta} 
+#' file to a tabular file by the \code{obitab} command of the 
+#' \strong{OBITools} package 
+#' 
+#' @param file a string containing the file name of the obitab file.
+#' @param sep  Column separator in the obitab file. 
+#'             The default separator is the tabulation.
+#' @param sample A regular expression allowing to identify columns 
+#'               from the file describing abundances of sequences per sample
+#' @param sample.sep Separator between combined sample name.
+#' @param attribute Separator used to split between sample 'tag' and sample name.
+#' 
+#' @return a \code{\link{metabarcoding.data}} instance
+#' 
+#' @examples
+#' require(ROBITools)
+#' 
+#' \dontshow{# switch the working directory to the data package directory}
+#' \dontshow{setwd(system.file("extdata", package="ROBITools"))}
+#' 
+#' # read the termes.tab file
+#' termes=import.metabarcoding.data('termes.tab')
+#' 
+#' # print the number of samples and motus described in the file
+#' dim(termes)
+#'   
+#' @seealso \code{\link{metabarcoding.data}}
+#'
+#' @author Eric Coissac
+#' @keywords DNA metabarcoding
+#' @export
+#' 
+import.metabarcoding.data = function(file,sep='\t',sample="sample",sample.sep="\\.",attribute=":") {
+	
+	data=read.obitab(file,sep=sep)
+	
+	# get the colnames matching the sample pattern
+	
+	column=colnames(data)
+	pat = paste('(^|',sample.sep,')',sample,'[',sample.sep,attribute,']',sep='')
+	scol= grep(pat,column)
+	
+	# reads informations about samples
+	
+	reads  = data[,scol]
+	names  = colnames(reads)
+	names  = strsplit(names,split=attribute)
+	
+			# for sample name just remove the first part of the col names
+			# usally "sample:"
+	
+	sample.names = sapply(names,function(a) paste(a[-1],collapse=attribute))	
+	
+	reads=t(reads)
+	rownames(reads)=sample.names
+	
+	# sample's data
+	
+	sample.data = data.frame(t(data.frame(strsplit(sample.names,split=attribute))))
+	rownames(sample.data)=sample.names
+	colnames(sample.data)=strsplit(names[[1]][1],split=attribute)
+	
+	
+	# motus information 
+
+	motus = data[,-scol]
+	
+	motus.id = motus$id
+	
+	rownames(motus)=motus.id
+	colnames(reads)=motus.id
+	
+	
+	return(metabarcoding.data(reads,sample.data,motus))
+	
+}
+
+
+#pcr = gh[,grep('^sample',colnames(gh))]
+#pcr.names = colnames(pcr)
+#pcr.names = sub('sample\\.','',pcr.names)
+#sequencer = rep('Solexa',length(pcr.names))
+#sequencer[grep('454',pcr.names)]='454'
+#sequencer=factor(sequencer)
+#
+#tmp = strsplit(pcr.names,'\\.[A-Z](sol|454)\\.')
+#
+#sample = sapply(tmp,function(x) x[1])
+#locality = factor(sapply(strsplit(sample,'_'),function(x) x[1]))
+#sample = factor(sample)
+#repeats= factor(sapply(tmp,function(x) x[2]))
+#
+#tmp = regexpr('[A-Z](454|sol)',pcr.names)
+#run=factor(substr(pcr.names,tmp,tmp+attr(tmp,"match.length")-1))
+#
+#pcr.metadata = data.frame(run,sequencer,locality,sample,repeats)
+#
+#rownames(pcr.metadata)=pcr.names
+
+
--- a/ROBITools/R/import.ngsfilter.R
+++ b/ROBITools/R/import.ngsfilter.R
@@ -0,0 +1,79 @@
+#' @include 02_class_metabarcoding.data.R
+NULL
+
+#' Read ngsfilter text file
+#' 
+#' Reads  the text file used for assigning reads to samples with the
+#'  \code{ngsfilter} command of the \strong{OBITools} package. 
+#' 
+#' @param file a string containing the file name for the \code{ngsfilter} command.
+#' @param platewell a string corresponding to the tag used for storing the sample location
+#'                  in the PCR plate. Should be of the form "nbPlate_Well" (e.g. "01_A02").
+#'                  Default is \code{NULL}
+#' @return \code{\link{import.ngsfilter.data}} returns a \code{\link{data.frame}} instance
+#' 
+#' @examples
+#' \dontshow{# switch the working directory to the data package directory}
+#' \dontshow{setwd(system.file("extdata", package="ROBITools"))}
+#' 
+#' data(termes)
+#' 
+#' # reading the termes_ngsfilt.txt file
+#' termes.ngs=import.ngsfilter.data('termes_ngsfilt.txt', platewell="position")
+#' 
+#' # including ngsfilter data into termes data
+#' attr(termes, "samples") = termes.ngs[rownames(termes),]
+#' 
+#' colnames(termes$samples)
+#'   
+#' @seealso \code{\link{import.metabarcoding.data}} and \code{\link{read.obitab}} for other methods of data importation 
+#'
+#' @author Lucie Zinger
+#' @keywords DNA metabarcoding
+#' @export
+#' 
+import.ngsfilter.data = function(file, platewell=NULL) {
+  raw = read.table(file, sep="\t")
+  
+  #get samples names
+  names = raw[,2]
+  
+  #form first part of the output table (default ngsfilter text input)
+  out = raw[,-c(2,3,ncol(raw))]
+  colnames(out) = c("Experiment", "primerF", "primerR")
+
+  #add tags
+  out[,c("tagF", "tagR")] = do.call("rbind", strsplit(as.vector(raw[,3]), "\\:"))
+  
+  #collect nb and names of additionnal information
+  max.add = max(unlist(lapply(strsplit(gsub("^F @ ","", raw[, ncol(raw)]), "; "), length)))
+  names.add = unique(unlist(lapply(strsplit(unlist(strsplit(gsub("^F @ ","", raw[, ncol(raw)]), "; ")), "="), "[[",1)))
+  
+  #form table of additionnal info
+  form = lapply(strsplit(gsub("^F @ ","", raw[, ncol(raw)]), "; "), strsplit, "=")
+  additionnals = as.data.frame(do.call("rbind", lapply(form, function(y) {
+    val = rep(NA, , max.add)
+    names(val) = names.add
+    val[match(unlist(lapply(y, "[[", 1)), names(val))] = gsub(";", "",unlist(lapply(y, "[[", 2)))
+    val
+  })))
+  
+  #create PCR plate coordinates
+  if(!is.null(platewell)) {
+    form = strsplit(as.vector(additionnals[, platewell]), "_")
+    nbPlate = as.numeric(gsub("^0", "", unlist(lapply(form, "[[", 1))))
+    wellPlate = unlist(lapply(form, "[[", 2))
+    xPlate = as.numeric(gsub("[A-Z]", "", wellPlate))
+    yPlate = as.numeric(as.factor(gsub("[0-9]*", "", wellPlate))) + 8*nbPlate
+    
+    additionnals = additionnals[,-grep(platewell, colnames(additionnals))]
+    out = data.frame(out, additionnals, nbPlate, wellPlate, xPlate, yPlate)
+  }
+  else {
+    additionnals[,ncol(additionnals)] = gsub(";","", additionnals[,ncol(additionnals)])
+    out = data.frame(out, additionnals)
+  }
+
+  rownames(out) = names
+  return(out)
+}
--- a/ROBITools/R/layers.metabarcoding.R
+++ b/ROBITools/R/layers.metabarcoding.R
@@ -0,0 +1,119 @@
+#' @include 02_class_metabarcoding.data.R
+NULL
+
+#
+#
+# Managment of layers
+#
+# Layers a matrix or factors with the same dimension
+# than the read matrix
+#
+
+# get motus data.frames
+
+#' @export
+setGeneric("layer.names", function(obj) {
+  return(standardGeneric("layer.names"))
+})
+
+#' Returns the names of all the layers 
+#' 
+#' \code{layer.names} extracts the list of all the layer
+#' names attached to a \code{\link{metabarcoding.data}} instance.
+#' 
+#' @param   obj a \code{\link{metabarcoding.data}} instance
+#' @return  a vector of type \code{character} containing the
+#'          list of all the layer names.
+#' 
+#' @docType methods
+#' @rdname layer.names-methods
+#' @aliases layer.names-methods,metabarcoding.data
+#' 
+setMethod("layer.names", "metabarcoding.data", function(obj) {
+  return(names(obj@layers))
+})
+
+
+#' Returns the a layer associated to a \code{\link{metabarcoding.data}}
+#' 
+#' [[ operator Extracts a layer
+#' attached to a \code{\link{metabarcoding.data}} instance.
+#' 
+#' @usage \method{[[}{unmutable}(x,i)
+#'
+#' @param   x a \code{\link{metabarcoding.data}} instance
+#' @return  matrix or a factor.
+#' 
+#' @docType methods
+#' @rdname double-open-brace-methods
+#' @aliases double-open-brace-methods,metabarcoding.data
+#' @method [[
+#' @export
+#' 
+setMethod("[[", "metabarcoding.data", 
+          function(x, i, j, ...) {
+            
+            if (! is.character(i))
+              stop('Just named index must be used')  
+            
+            if (i=="reads")
+              return(x@reads)
+            
+            if (i=="samples")
+              return(x@samples)
+            
+            if (i=="motus")
+              return(x@motus)
+            
+            if (i=="reads")
+              return(x@reads)
+            
+            return(x@layers[[i,exact=TRUE]])
+          })
+
+#' @method $
+#' @export
+setMethod("$", "metabarcoding.data", 
+          function(x, name) {
+            return(x[[name]])
+          })
+
+
+# set one data layer data.frames
+
+#' @method [[<-
+#' @export
+setMethod("[[<-","metabarcoding.data", 
+          function(x, i, j, ...,value) {
+            
+            if (any(dim(value)!=c(x@scount,x@mcount)))
+              stop("data dimmension are not coherent with this metabarcoding.data")
+            
+            if (hasArg('j'))
+              stop('Just one dimension must be specified')
+            
+            if (! is.character(i))
+              stop('Just named index must be used')
+            
+            if (i=='reads')
+              stop('you cannot change the reads layer by this way')
+            
+            if (i=='motus' | i=='samples')
+              stop('layers cannot be names motus or samples')
+            
+            value = as.factor.or.matrix(value)
+            rownames(value)=rownames(x@reads)
+            colnames(value)=colnames(x@reads)
+            x@layers[[i]]=value
+            
+            return(x)
+          })
+
+#' @method $<-
+#' @export
+setMethod("$<-","metabarcoding.data", 
+          function(x, name, value) {
+            
+            x[[name]]=value
+            return(x)
+          })
--- a/ROBITools/R/metabarcoding_threshold.R
+++ b/ROBITools/R/metabarcoding_threshold.R
@@ -0,0 +1,378 @@
+#' @include 02_class_metabarcoding.data.R
+NULL
+
+
+#' @export
+setGeneric("marginalsum", function(data,MARGIN="sample", na.rm = FALSE) {
+			return(standardGeneric("marginalsum"))
+		})
+
+
+#' Computes marginal sums over read counts.
+#' 
+#' Method \code{marginalsum} computes marginal sums over read counts of
+#' a \code{\link{metabarcoding.data}} instance.
+#' 
+#' @param data The \code{\linkS4class{metabarcoding.data}} instance
+#'             on which marginal sums have to be computed.
+#' @param MARGIN Indicates if the sums have to be computed across 
+#'               samples or motus. 
+#'               Allowed values are :
+#'               \itemize{
+#'                 \item{'sample' or 1} for computing sum across samples
+#'                 \item{'motu' or 2} for computing sum across motus
+#'                 }
+#' @param na.rm  Logical. Should missing values be omitted from the 
+#'               calculations?
+#'               
+#' @return Returns the vector of marginal sums as a \code{numeric} vector
+#' 
+#' @examples
+#' # load termite data set from the ROBITools sample data
+#' data(termes)
+#' 
+#' # Computes marginal sums per sample
+#' ssum = marginalsum(termes,MARGIN="sample")
+#' 
+#' # Computes marginal sums per MOTU
+#' msum = marginalsum(termes,MARGIN="motu")
+#' 
+#' @seealso \code{\linkS4class{metabarcoding.data}}
+#'
+#' @docType methods
+#' @rdname marginalsum-methods
+#' @aliases marginalsum-methods,metabarcoding.data
+#' @author Aurelie Bonin
+#' 
+setMethod("marginalsum", "metabarcoding.data", function(data,MARGIN='sample', na.rm = FALSE) {
+	
+	if (MARGIN == 'sample')
+		MARGIN=1
+	
+	if (MARGIN == 'motu')
+		MARGIN=2
+	
+	readcount = reads(data)
+  if (MARGIN==1)    
+	  margesum = rowSums(readcount,na.rm=na.rm)
+  else
+    margesum = colSums(readcount,na.rm=na.rm)
+	
+	
+	return(margesum)
+})
+
+rowSums.metabarcoding.data = function (x, na.rm = FALSE, dims = 1L) {
+  print("coucou")
+}
+
+#' @export
+setGeneric("normalize", function(data,MARGIN='sample',as.matrix=FALSE) {
+			return(standardGeneric("normalize"))
+		})
+
+
+#' Normalizes read counts by sample or by MOTU.
+#' 
+#' Method \code{normalize} computes a normalized read aboundancy matrix
+#' (relative frequency matrix) of a \code{\link{metabarcoding.data}} instance.
+#' Normalization can be done according aboundancies per sample or per MOTU.
+#' 
+#' @param data The \code{\linkS4class{metabarcoding.data}} instance
+#'             on normalisation have to be computed.
+#' @param MARGIN Indicates if the sums have to be computed across 
+#'               samples or motus. 
+#'               Allowed values are :
+#'               \itemize{
+#'                 \item{'sample' or 1} for computing sum across samples
+#'                 \item{'motu' or 2} for computing sum across motus
+#'                 }
+#' @param as.matrix Logical indicating if the normalized aboundancies
+#'               must be returned as a simple \code{matrix} (TRUE) or as a new
+#'               instance of the \code{\linkS4class{metabarcoding.data}} class
+#'               (FALSE, the default case).
+#'               
+#' @return Returns a new instance of \code{\linkS4class{metabarcoding.data}}
+#'         or a \code{numeric} matrix according to the \code{return.as.matrix}
+#'         parameter.
+#' 
+#' @examples
+#' # load termite data set from the ROBITools sample data
+#' data(termes)
+#' 
+#' # Computes normalized aboundancies per sample
+#' termes.norm = normalize(termes,MARGIN="sample")
+#' 
+#' # Computes normalized aboundancies per sample and
+#' # stores the result as a new layer into the thermes
+#' # structure
+#' termes$normalized = normalize(termes,MARGIN="sample",as.matrix=TRUE)
+#' 
+#' @seealso \code{\linkS4class{metabarcoding.data}}
+#'
+#' @docType methods
+#' @rdname normalize-methods
+#' @aliases normalize-methods,metabarcoding.data
+#' @author Aurelie Bonin
+#' 
+setMethod("normalize", "metabarcoding.data", function(data,MARGIN="sample",as.matrix=FALSE) {
+	
+	if (MARGIN == 'sample')
+		MARGIN=1
+	
+	if (MARGIN == 'motu')
+		MARGIN=2
+	
+	readcount = reads(data)
+	margesum  = marginalsum(data,MARGIN,na.rm=TRUE)
+	
+	readcount = sweep(readcount,MARGIN,margesum, FUN="/")
+	
+  if (as.matrix)
+    newdata=readcount
+  else
+	  newdata = copy.metabarcoding.data(data,reads=readcount)
+	
+	return(newdata)
+})
+
+#' @export
+setGeneric("threshold", function(data,MARGIN="sample",threshold=0.97) {
+			return(standardGeneric("threshold"))
+		})
+
+#' Compute the cumulative threshold of read aboundances.
+#' 
+#' The method \code{threshold} of the class \code{\linkS4class{metabarcoding.data}}
+#' computes the thresold to be used for conserving just a part of the global
+#' signal. This thresold is computed by ranking aboundances by decreasing order.
+#' The cululative sums of these ranked abondencies are computed and the aboundance
+#' corresponding to the first sum greater than the threshold is returned as result.
+#' 
+#' @param data The \code{\linkS4class{metabarcoding.data}} instance
+#'             on normalisation have to be computed.
+#' @param MARGIN Indicates if the sums have to be computed across 
+#'               samples or motus. 
+#'               Allowed values are :
+#'               \itemize{
+#'                 \item{'sample' or 1} for computing sum across samples
+#'                 \item{'motu' or 2} for computing sum across motus
+#'                 }
+#' @param threshold a numeric value between 0 and 1 indicating which part of 
+#'                  the signal must be conserved. Default value is setup to
+#'                  0.97 (97% of the total signal).
+#'                  
+#' @return a numeric vector containing the limit aboundancy to consider for
+#'         each sample or each MOTU according to the value of the \code{MARGIN} 
+#'         parameter.
+#'                  
+#' @examples
+#' # load termite data set from the ROBITools sample data
+#' data(termes)
+#' 
+#' # computes threshold value to used for keep 95% of 
+#' # the reads per MOTU
+#' 
+#' t = threshold(termes,MARGIN='motu',threshold=0.95)
+#'  
+#' @seealso \code{\linkS4class{metabarcoding.data}}, \code{\link{threshold.mask}}
+#'
+#' @docType methods
+#' @rdname threshold-methods
+#' @aliases threshold-methods,metabarcoding.data
+#' @author Aurelie Bonin
+#'
+setMethod("threshold", "metabarcoding.data", function(data,MARGIN="sample",threshold=0.97) {
+	
+	
+	onethreshold=function(x,threshold) {
+		s = x[order(-x)]
+		cs= cumsum(s) 
+    total=cs[length(cs)]
+    if (total > 0) {
+		  cs= cs / total
+		  cs = cs > threshold
+		  t = s[cs][1]
+    }
+    else t=0
+		
+    return(t)
+	}
+	
+	
+	if (MARGIN == 'sample')
+		MARGIN=1
+	
+	if (MARGIN == 'motu')
+		MARGIN=2
+	
+	readcount = reads(data)
+	
+	t = apply(readcount,MARGIN,onethreshold,threshold)
+	
+	return(t)
+})
+
+#' @export
+setGeneric("threshold.mask", function(data,MARGIN,threshold=0.97,operator='<') {
+			return(standardGeneric("threshold.mask"))
+		})
+
+#' Computes a cumulatif thresold mask for filtering read aboundancies.
+#' 
+#' The method \code{threshold.mask} of the class \code{\linkS4class{metabarcoding.data}}
+#' computes a logical matrix of the same size than the read matrix of the data parameter.
+#' Each cell of this matrix contains a \code{TRUE} or a \code{FALSE} value according to the
+#' relationship existing between the read abondancy and the corresponding theshold as computed
+#' by the \code{\link{theshold}} method.
+#' 
+#' (computed value) = (read aboundancy) operator (threshold value)
+#' 
+#' for a cell in the result matrix, \code{(read aboundancy)} is extracted from the read layer.
+#' \code{operator} is a comparaison operator and \code{(threshold value)} is estimated with the
+#' \code{\link{theshold}} method.
+#' 
+#' @param data The \code{\linkS4class{metabarcoding.data}} instance
+#'             on normalisation have to be computed.
+#' @param MARGIN Indicates if the sums have to be computed across 
+#'               samples or motus. 
+#'               Allowed values are :
+#'               \itemize{
+#'                 \item{'sample' or 1} for computing sum across samples
+#'                 \item{'motu' or 2} for computing sum across motus
+#'                 }
+#' @param threshold a numeric value between 0 and 1 indicating which part of 
+#'                  the signal must be conserved. Default value is setup to
+#'                  0.97 (97% of the total signal).
+#' @param operator is a logical comparison operator.
+#' 
+#' @return A logical matrix usable for selecting cell in the read aboundancy matrix.
+#'                   
+#' @seealso \code{\linkS4class{metabarcoding.data}}, \code{\link{threshold.mask}}, \code{\link{threshold}}
+#'
+#' @docType methods
+#' @rdname threshold-mask-methods
+#' @aliases threshold.mask-methods,metabarcoding.data
+#' @author Aurelie Bonin
+#' 
+setMethod("threshold.mask", "metabarcoding.data", function(data,MARGIN,threshold=0.97,operator='<') {
+	
+	
+	if (MARGIN == 'sample')
+		MARGIN=1
+	
+	if (MARGIN == 'motu')
+		MARGIN=2
+	
+	readcount = reads(data)
+	
+	t = threshold(data,MARGIN,threshold)
+	mask = apply(readcount,c(2,1)[MARGIN],operator,t)
+	
+	if (MARGIN==2)
+		mask = t(mask)
+	
+	return(mask)
+})
+
+
+#' @export
+setGeneric("const.threshold.mask", function(data,MARGIN,threshold=0.01,operator='<') {
+			return(standardGeneric("const.threshold.mask"))
+		})
+
+#' Computes a constant thresold mask for filtering read aboundancies.
+#' 
+#' The method \code{const.threshold.mask} of the class \code{\linkS4class{metabarcoding.data}}
+#' computes a logical matrix of the same size than the read matrix of the data parameter.
+#' Each cell of this matrix contains a \code{TRUE} or a \code{FALSE} value according to the
+#' relationship existing between the read abondancy and the global theshold.
+#' 
+#' (computed value) = (normalized read aboundancy) operator (threshold value)
+#' 
+#' for a cell in the result matrix, \code{(normalized read aboundancy)} is extracted from the read layer
+#' after normalization.
+#' \code{operator} is a comparaison operator and \code{(threshold value)} is estimated with the
+#' \code{\link{theshold}} method.
+#' 
+#' @param data The \code{\linkS4class{metabarcoding.data}} instance
+#'             on normalisation have to be computed.
+#' @param MARGIN Indicates if the sums have to be computed across 
+#'               samples or motus. 
+#'               Allowed values are :
+#'               \itemize{
+#'                 \item{'sample' or 1} for computing sum across samples
+#'                 \item{'motu' or 2} for computing sum across motus
+#'                 }
+#' @param threshold a numeric value between 0 and 1 indicating which part of 
+#'                  the signal must be conserved. Default value is setup to
+#'                  0.01 (1% of the normalized signal).
+#' @param operator is a logical comparison operator.
+#' 
+#' @return A logical matrix usable for selecting cell in the read aboundancy matrix.
+#'                   
+#' @seealso \code{\linkS4class{metabarcoding.data}}, \code{\link{threshold.mask}}, \code{\link{normalize}}
+#'
+#' @docType methods
+#' @rdname const-threshold-mask-methods
+#' @aliases const.threshold.mask-methods,metabarcoding.data
+#' @author Aurelie Bonin
+#' 
+setMethod("const.threshold.mask", "metabarcoding.data", function(data,MARGIN,threshold=0.01,operator='<') {
+  
+  
+  if (MARGIN == 'sample')
+    MARGIN=1
+  
+  if (MARGIN == 'motu')
+    MARGIN=2
+  
+  readcount = normalize(data,MARGIN,as.matrix=TRUE)	
+  
+  mask = do.call(operator,list(readcount,threshold))
+  
+  return(mask)
+})
+
+#' @export
+setGeneric("threshold.set", function(data,
+				MARGIN,
+				threshold=0.97,
+				operator='<',
+				value=0,
+				normalize=TRUE,
+				mask.fun=threshold.mask) {
+			return(standardGeneric("threshold.set"))
+		})
+
+
+setMethod("threshold.set", "metabarcoding.data", function(data,
+		MARGIN,
+		threshold=0.97,
+		operator='<',
+		value=0,
+		normalize=TRUE,
+		mask.fun=threshold.mask) {
+	
+	
+	
+	if (MARGIN == 'sample')
+		MARGIN=1
+	
+	if (MARGIN == 'motu')
+		MARGIN=2
+	
+	readcount = reads(data)
+	
+	if (normalize)
+		data = normalize(data,c(2,1)[MARGIN])
+	
+	mask = mask.fun(data,MARGIN,threshold,operator)
+	
+	readcount[mask] = value
+	
+	newdata = copy.metabarcoding.data(data,reads=readcount)
+	
+	return(newdata)
+	
+})
--- a/ROBITools/R/mstat.R
+++ b/ROBITools/R/mstat.R
@@ -0,0 +1,407 @@
+#' @include 02_class_metabarcoding.data.R
+#' @import igraph
+NULL
+
+require(igraph)
+
+# pos = expand.grid(x,y)
+
+#' Computes the pairwise distance matrix as a data.frame where
+#' 
+#' @param x a vector for the X coordinates
+#' @param y a vector for the Y coordinates
+#' @param labels a vector with the sample names
+#' 
+#' @return a data.frame instance of three columns
+#'    - a : The label of the first sample
+#'    - b : The label of the second sample
+#'    - dist : The euclidian distance beween sample a and b
+#' 
+#' @examples
+#' data(termes)
+#' termes.ok = termes[,colSums(termes$reads)>0]
+#' pos = expand.grid(1:3 * 10,1:7 * 10)
+#' labels = rownames(termes.ok)
+#' d = dist.grid(pos[,1],pos[2],labels)
+#' 
+#' @export
+dist.grid = function(x,y,labels=NULL){
+  pos = data.frame(x,y)
+  
+  if (is.null(labels))
+    labels = as.character(interaction(pos))
+  else
+    labels = as.character(labels)
+
+  llabels=length(labels)
+  dpos=dist(pos)
+
+  a = rep(labels[1:(llabels-1)],(llabels-1):1)
+  b = do.call(c,(lapply(2:llabels, function(i) labels[i:llabels])))
+
+  return(data.frame(a,b,dist=as.vector(dpos)))
+}
+
+#' Builds the list of sample groups included in a circle around a central sample
+#' 
+#' @param dtable a distance table between samples as 
+#'               computed by \code{\link{dist.grid}}
+#' @param radius the radius of the circle
+#' @param center a \code{logical} value indicating if the center of
+#'               the group must be included in the group
+#'               
+#' @return a list of vectors containing the labels of the group members 
+#'          
+#' @examples
+#' data(termes)
+#' termes.ok = termes[,colSums(termes$reads)>0]
+#' pos = expand.grid(1:3 * 10,1:7 * 10)
+#' labels = rownames(termes.ok)
+#' d = dist.grid(pos[,1],pos[2],labels)
+#' groups = dist.center.group(d,20)
+#' 
+#' @export
+dist.center.group=function(dtable,radius,center=TRUE) {
+  
+  fgroup = function(c) {
+    ig = dtable[(dtable[,1]==c | dtable[,2]==c) & dtable[,3] <= radius,]
+    return(union(ig[,1],ig[,2]))
+  }
+  
+  pos = as.character(union(dtable[,1],dtable[,2]))
+  
+  g = lapply(pos,fgroup)
+  names(g) = pos
+  
+  if (!center)
+    g = mapply(setdiff,g,pos)
+  
+  return(g)
+  
+}
+
+#' Builds the list of sample groups including samples closest than a define distance
+#' 
+#' A graph is build by applying the threshold \code{dmax} to the distance matrix
+#' A group is a clique max in this graph. Consequently all member pairs of a group
+#' are distant by less or equal to \code{dmax}.
+#' 
+#' @param dtable a distance table between samples as 
+#'               computed by \code{\link{dist.grid}}
+#' @param dmax the maximum distance between two samples
+#'               
+#' @return a list of vectors containing the labels of the group members 
+#'          
+#' @examples
+#' data(termes)
+#' termes.ok = termes[,colSums(termes$reads)>0]
+#' pos = expand.grid(1:3 * 10,1:7 * 10)
+#' labels = rownames(termes.ok)
+#' d = dist.grid(pos[,1],pos[2],labels)
+#' groups = dist.clique.group(d,20)
+#' 
+#' @export
+dist.clique.group=function(dtable,dmax,center=True) {
+  gp = igraph::graph.edgelist(as.matrix(dtable[dtable$dist <= dmax,c('a','b')]),directed=FALSE)
+  g  = igraph::maximal.cliques(gp)
+  return(lapply(g, function(i) igraph::V(gp)$name[i]))
+}
+
+#' Computes the univariate M statistics 
+#' 
+#' @param w the weigth matrix indicating the presence probability of each motu
+#'          in each samples. Each line corresponds to a sample and each column
+#'          to a MOTU. \code{rownames} of the \code{w} matrix must be the sample
+#'          names.  It is nice but not mandatory if the \code{colnames} refer to the MOTU id.
+#'          
+#' @param groups the list of considered groups as computed by the \code{\link{dist.center.group}}
+#'               function
+#'
+#' @seealso \code{\link{dist.center.group}}
+#' @seealso \code{\link{m.weight}}
+#' 
+#' @examples
+#' data(termes)
+#' termes.ok = termes[,colSums(termes$reads)>0]
+#' pos = expand.grid(1:3 * 10,1:7 * 10)
+#' labels = rownames(termes.ok)
+#' d = dist.grid(pos[,1],pos[2],labels)
+#' groups = dist.center.group(d,20)
+#' w = m.weight(termes.ok)
+#' m = m.univariate(w,groups)
+#' 
+#' @references Marcon, E., Puech, F., and Traissac, S. (2012). 
+#'             Characterizing the relative spatial structure of point patterns. 
+#'             International Journal of Ecology, 2012.
+#'             
+#' @export
+m.univariate = function(w,groups) {
+  
+  nunivar = function(members,center) {
+    g = w[members,]
+    
+    wn = colSums(g)
+    wa = sum(wn)
+ 
+    wn = wn - center
+    wa = wa - center
+    
+    p = wn / wa * center
+        
+    return(p)
+  }
+
+  centers = lapply(names(groups),function(x) w[x,])
+  
+  Wf = colSums(w)
+  Wa = sum(Wf)
+
+  Denom.univar = colSums(w * (sweep(-w,2,Wf,'+') / (Wa - w)))
+  Num.univar   = rowSums(mapply(nunivar,groups,centers))
+  
+  Munivar=Num.univar/Denom.univar
+  Munivar[Denom.univar==0]=0
+  
+  return(Munivar)
+}
+
+
+#' Computes the bivariate M statistics 
+#' 
+#' The function computes the bivariate M statiscics for a set of target species around a set of
+#' focus species.
+#' 
+#' @param w1 the weigth matrix indicating the presence probability of each motu
+#'          used as focus species in each samples. Each line corresponds to a sample and each column
+#'          to a MOTU. \code{rownames} of the \code{w} matrix must be the sample
+#'          names. It is nice but not mandatory if the \code{colnames} refer to the MOTU id.
+#'          
+#' @param w2 the weigth matrix indicating the presence probability of each motu
+#'          used as target species in each samples. Each line corresponds to a sample and each column
+#'          to a MOTU. \code{rownames} of the \code{w} matrix must be the sample
+#'          names. It is nice but not mandatory if the \code{colnames} refer to the MOTU id.
+#'          if \code{w2} is not set, w1 is also used as target species. in this case the diagonal
+#'          of the matrix return contains the univariate M statistic for the diferent species.
+#'          
+#' @param groups the list of considered groups as computed by the \code{\link{dist.center.group}}
+#'               function
+#'               
+#' @return a matrix of M bivariate statistics with one focus species by row and one target species 
+#'         by columns If \code{w2} is not specified the diagonal of the matrix is equal to the univariate
+#'         M statistic of the corresponding species.
+#'
+#' @seealso \code{\link{dist.center.group}}
+#' @seealso \code{\link{m.weight}}
+#' 
+#' @examples
+#' data(termes)
+#' termes.ok = termes[,colSums(termes$reads)>0]
+#' pos = expand.grid(1:3 * 10,1:7 * 10)
+#' labels = rownames(termes.ok)
+#' d = dist.grid(pos[,1],pos[2],labels)
+#' groups = dist.center.group(d,20)
+#' w = m.weight(termes.ok)
+#' m = m.bivariate(w,groups)
+#' 
+#' @references Marcon, E., Puech, F., and Traissac, S. (2012). 
+#'             Characterizing the relative spatial structure of point patterns. 
+#'             International Journal of Ecology, 2012.
+#'             
+#' @export
+m.bivariate = function(w1,w2=NULL,groups) {
+  
+  nunbivar = function(members,center) {
+    g = w2[members,]
+    
+    wn = colSums(g)
+    wa = sum(wn)
+    
+    if (self){
+      mwn  = wn %*% t(rep(1,length(wn)))
+      diag(mwn)= wn - center
+      wa = wa - center
+      wna = mwn/wa
+      p = sweep(wna,2,center,'*')
+      #p = center %*% wna
+    }
+    else {
+      wna= matrix(wn/wa,nrow=1)
+      p = center %*% wna
+    }    
+    
+    return(p)
+  }
+  
+  if (is.null(w2)){
+    self = TRUE
+    w2=w1
+  }
+  else {
+    self = FALSE
+  }
+  
+  centers = lapply(names(groups),function(x) w[x,])
+
+  Wf = colSums(w1)
+  Wn = colSums(w2)
+  Wa = sum(Wn)
+  
+  if (self){      
+    Wn = sweep(-w1,2,Wn,'+')
+    Wna = Wn/(Wa - w1)
+    Denom.bivar = t(w1) %*% Wna
+  }
+  else {
+    Wna= t(Wn/Wa)
+    Denom.bivar = Wf %*% Wna    
+  }
+  
+  Num.bivar = matrix(0,nrow=ncol(w1),ncol=ncol(w2))
+
+  ng = length(groups)
+  
+  for (i in 1:ng) {
+    Num.bivar = Num.bivar + nunbivar(groups[[i]],centers[[i]])
+  }
+  
+  Mbivar=Num.bivar/Denom.bivar
+
+  Mbivar[Denom.bivar==0]=0
+  
+  return(Mbivar)
+}
+
+#' Computes a weigth matrix from a \code{\linkS4class{metabarcoding.data}}
+#' 
+#' The weight can be considered as a propability of presence of a MOTU in a 
+#' given sample. This function defines this probability as the fraction of
+#' the maximal occurrence frequency over all samples. 
+#' 
+#' @param data a \code{\linkS4class{metabarcoding.data}} instance
+#' 
+#' @return a weight matrix usable for M statistics
+#' 
+#' @examples
+#' data(termes)
+#' termes.ok = termes[,colSums(termes$reads)>0]
+#' w = m.weight(termes.ok)
+#' 
+#' @export
+m.weight = function(data) {
+  ndata = normalize(data,MARGIN='sample')
+  fmax=apply(ndata$reads,2,max)
+  w = sweep(ndata$reads,2,fmax,'/')
+  rownames(w)=rownames(ndata)
+  colnames(w)=colnames(ndata)
+  return(w)
+}
+
+#' Simulate null distribion of the M statistics by Monte-Carlo
+#' 
+#' Computes the null empirical distribution of the M statistics
+#' by shuffling MOTUs among location.
+#' 
+#' @param w the weigth matrix indicating the presence probability of each motu
+#'          in each samples. Each line corresponds to a sample and each column
+#'          to a MOTU. \code{rownames} of the \code{w} matrix must be the sample
+#'          names.
+#' @param groups the list of considered groups as computed by the \code{\link{dist.center.group}}
+#'               function
+#' @param resampling the number of simulation to establish the null distribution
+#' 
+#' @return a matrix of M score under the null hypothesis of random distribution of MOTUs
+#'         with a MOTUs per line and a culumn per simulation
+#'
+#' @examples       
+#' data(termes)
+#' termes.ok = termes[,colSums(termes$reads)>0]
+#' pos = expand.grid(1:3 * 10,1:7 * 10)
+#' labels = rownames(termes.ok)
+#' d = dist.grid(pos[,1],pos[2],labels)
+#' groups = dist.center.group(d,20)
+#' w = m.weight(termes.ok)
+#' dnull = dm.univariate(w,groups)
+#' 
+#' @export              
+dm.univariate = function(w,groups,resampling=100) {
+  
+  shuffle = function(w){
+    wr =apply(w,2,function(y) sample(y,length(y),replace=FALSE))
+    rownames(wr)=rownames(w)
+    return(wr)
+  }
+  
+  msim = function(x) {
+    return(m.univariate(shuffle(w),groups))
+  }
+  
+  dnull = mapply(msim,1:resampling)
+  
+  rownames(dnull) = colnames(w)
+  
+  return(dnull)
+}
+
+#' Test the significance of the M statistics by Monte-Carlo
+#' 
+#' Computes computes the p.value the M statistics asociated to a MOTU
+#' by shuffling MOTUs among location.
+#' 
+#' @param w the weigth matrix indicating the presence probability of each motu
+#'          in each samples. Each line corresponds to a sample and each column
+#'          to a MOTU. \code{rownames} of the \code{w} matrix must be the sample
+#'          names.
+#' @param groups the list of considered groups as computed by the \code{\link{dist.center.group}}
+#'               function
+#' @param resampling the number of simulation to establish the null distribution
+#' 
+#' @param alternative a character value in \code{c('two.sided','less','greater')}
+#'               - two.sided : the m stat is check against the two side of the empirical
+#'                             M distribution
+#'               - less : test if the M stat is lesser than the M observed in the the empirical
+#'                             M distribution (exlusion hypothesis)
+#'               - greater : test if the M stat is greater than the M observed in the the empirical
+#'                             M distribution (aggregation hypothesis)
+#' 
+#' @return a vector of p.value with an attribute \code{m.stat} containing the actual M stat
+#'         for each MOTUs
+#'
+#' @examples       
+#' data(termes)
+#' termes.ok = termes[,colSums(termes$reads)>0]
+#' pos = expand.grid(1:3 * 10,1:7 * 10)
+#' labels = rownames(termes.ok)
+#' d = dist.grid(pos[,1],pos[2],labels)
+#' groups = dist.center.group(d,20)
+#' w = m.weight(termes.ok)
+#' pval = m.univariate.test(w,groups)
+#' 
+#' @export              
+m.univariate.test = function(w,groups,resampling=100,alternative='two.sided') {
+  dnull = dm.univariate(w,groups,resampling)
+  m = m.univariate(w,groups)
+  pnull = sapply(1:dim(dnull)[1],function(y) 1 - ecdf(dnull[y,])(m[y]))
+  
+  p.value=NULL
+  
+  if (alternative=='two.sided') {
+    p.value = mapply(min,pnull,1 - pnull)
+  }
+  
+  if (alternative=='less') {
+    p.value = pnull
+  }
+
+  if (alternative=='greater') {
+    p.value = 1 - pnull
+  }
+  
+  # Set p.value to 1 if the MOTU occurres in only one place
+  n = colSums(w > 0)
+  p.value[n==1]=1
+  
+  names(p.value) = colnames(w)
+  attr(p.value,'m.stat')=m
+  
+  return(p.value)
+}
--- a/ROBITools/R/obiclean.R
+++ b/ROBITools/R/obiclean.R
@@ -0,0 +1,118 @@
+#' @include 02_class_metabarcoding.data.R
+NULL
+
+# TODO: Add comment
+# 
+# Author: coissac
+###############################################################################
+
+#' @export
+setGeneric("extracts.obiclean", function(obj) {
+			return(standardGeneric("extracts.obiclean"))
+		})
+
+#' Extracts the obiclean results
+#' 
+#' The method \code{extracts.obiclean} of the class \code{\linkS4class{metabarcoding.data}}
+#' extracts \code{obiclean} results from the MOTUs descriptions include in the 
+#' \code{\linkS4class{metabarcoding.data}} instance. 
+#' When an \code{obitab} file is imported using the \code{\link{import.metabarcoding.data}}
+#' if \code{obiclean} results are present in the file they are stored in the 
+#' \code{motu} data.frame. By calling this methods, MOTU descriptors describing 
+#' the \code{obiclean} status are moved to a set of layers.
+#' 
+#' @param obj the \code{\linkS4class{metabarcoding.data}} to analyze
+#' 
+#' @return the modified \code{\linkS4class{metabarcoding.data}} instance
+#' 
+#' @examples
+#' 
+#' # load termite data set from the ROBITools sample data
+#' data(termes)
+#' 
+#' # shows the initial list of layer names
+#' layer.names(t)
+#' 
+#' # extracts the obiclean status
+#' termes = extracts.obiclean(termes)
+#' 
+#' # shows the name of the newly created layers
+#' layer.names(t)
+#' 
+#' 
+#' 
+#' @seealso \code{\linkS4class{metabarcoding.data}}, \code{\link{threshold.mask}}, \code{\link{normalize}}
+#'
+#' @docType methods
+#' @rdname extracts-obiclean-methods
+#' @aliases extracts.obiclean-methods,metabarcoding.data
+#' @author Eric Coissac
+#' 
+
+
+setMethod("extracts.obiclean", "metabarcoding.data", function(obj) {
+  
+			pat = "^obiclean_status:.*$"
+			cols = colnames(obj@motus)
+			cleancols = grep(pat,cols)
+			clean.names=cols[cleancols]
+      p = grep(pat,cols)
+      d = t(as.factor.or.matrix(obj@motus[,p]))
+      n = sapply(strsplit(cols[p],':'),function(y) y[[2]])
+      rownames(d)=n
+      d = d[rownames(obj@reads),]
+      obj[["obiclean_status"]]=d
+      
+      newmotus = obj@motus[-cleancols]
+      
+			pat = "^obiclean_count:.*$"
+			cols = colnames(newmotus)
+			cleancols = grep(pat,cols)
+			clean.names=cols[cleancols]
+			p = grep(pat,cols)
+			d = t(as.factor.or.matrix(newmotus[,p]))
+			n = sapply(strsplit(cols[p],':'),function(y) y[[2]])
+			rownames(d)=n
+			d = d[rownames(obj@reads),]
+			obj[["obiclean_count"]]=d
+			
+			newmotus = newmotus[-cleancols]
+			
+			pat = "^obiclean_cluster:.*$"
+			cols = colnames(newmotus)
+			cleancols = grep(pat,cols)
+			clean.names=cols[cleancols]
+			p = grep(pat,cols)
+			d = t(as.factor.or.matrix(newmotus[,p]))
+			n = sapply(strsplit(cols[p],':'),function(y) y[[2]])
+			rownames(d)=n
+			d = d[rownames(obj@reads),]
+			obj[["obiclean_cluster"]]=d
+			
+			newmotus = newmotus[-cleancols]
+			
+			newdata = copy.metabarcoding.data(obj,motus=newmotus)
+      
+			return(newdata)
+		})
+
+
+#' @export
+setGeneric("extracts.obiclean_cluster", function(obj) {
+  return(standardGeneric("extracts.obiclean_cluster"))
+})
+
+setMethod("extracts.obiclean_cluster", "metabarcoding.data", function(obj) {
+ 
+    obiclean = extracts.obiclean(obj)
+    obihead  = obiclean[,! is.na(obiclean$motus$obiclean_head)]
+    obihead$obiclean_count[is.na(obihead$obiclean_count)]=0
+    reads = obihead$obiclean_count
+    
+    l = obihead@layers[layer.names(obihead) != "obiclean_count"]
+    
+    newdata = copy.metabarcoding.data(obihead,reads=reads,layers=l)
+    
+    return(newdata)
+}
+)
--- a/ROBITools/R/pcrslayer.R
+++ b/ROBITools/R/pcrslayer.R
--- a/ROBITools/R/plot.PCRplate.R
+++ b/ROBITools/R/plot.PCRplate.R
@@ -0,0 +1,84 @@
+#' @include 02_class_metabarcoding.data.R
+NULL
+
+#' Plot PCR plates
+#' 
+#' Plots samples localization in PCR plates, and points out problematic samples if provided.
+#' 
+#' @param x a \code{\link{metabarcoding.data}} object
+#' @param samples a character vector containing names of problematic samples. Default is \code{NULL}
+#' @param different a boolean indicating whether different tags where used in forward and reverse to identify samples. Default is \code{TRUE}
+#' @param ... arguments ot be passed to methods, such as graphical parameters
+#' 
+#' @return \code{\link{plot.PCRplate}} returns a plot displaying no more than 4 PCR plates, with problematic sample localization
+#' 
+#' @examples
+#' \dontshow{# switch the working directory to the data package directory}
+#' \dontshow{setwd(system.file("extdata", package="ROBITools"))}
+#' 
+#' data(termes)
+#' 
+#' # reading the termes_ngsfilt.txt file
+#' termes.ngs=import.ngsfilter.data('termes_ngsfilt.txt', platewell="position")
+#' 
+#' # including ngsfilter data into termes data
+#' attr(termes, "samples") = termes.ngs[rownames(termes),]
+#' 
+#' #plot PCR plate plan
+#' col = rep("green", nrow(termes))
+#' col[grep("r", rownames(termes))] = "red"
+#' plot.PCRplate(termes, col=col)
+#' 
+#' #highlighting location of samples with low identification score
+#' 
+#' #low quality taxonomic assignements identification
+#' library(plotrix)
+#' weighted.hist(termes$motus$best_identity, colSums(termes$reads), breaks = 20, ylab = "Nb reads", xlab = "Ecotag scores", xaxis=F)
+#' axis(1, labels = T)
+#' lowqual.seq = rownames(termes$motus)[termes$motus$best_identity < 0.7]
+#' 
+#' #identification and localization (in PCR plate) of samples with high proportions of low quality taxonomic assignements
+#' termes.freq= normalize(termes, MARGIN=1)$reads
+#' hist(log10(rowSums(termes.freq[,lowqual.seq]) + 1e-05), breaks = 20, xlab = "Prop low quality reads")
+#' lowqual.sample = rownames(termes)[log10(rowSums(termes.freq[, lowqual.seq]) + 1e-05) > -0.5]
+#' 
+#' plot.PCRplate(termes, lowqual.sample, col=col)
+#'   
+#' @seealso \code{\link{import.metabarcoding.data}}
+#'
+#' @author Lucie Zinger
+#' @keywords DNA metabarcoding
+#' @export
+#' 
+plot.PCRplate = function(x, samples=NULL, col="cyan2", different=T, ...) {
+  
+  if(length(grep("xPlate", colnames(x$samples)))==0 | 
+       length(grep("yPlate", colnames(x$samples)))==0) {
+    stop("samples/controls position in PCR plates (xPlate and yPlate) are not defined")
+  }
+  
+  if(length(grep("tagF", colnames(x$samples)))==0 | 
+       length(grep("tagR", colnames(x$samples)))==0) {
+    stop("tags (tagF and tagR) are not defined")
+  }
+  
+  nplate = max(x$samples$nbPlate)
+  
+  if(nplate>4) {
+    stop("Cannot plot more than 4 plates")
+  }
+   
+    plot(x$samples$xPlate, -x$samples$yPlate, pch=19, xaxt="n", yaxt="n", col=col,
+         xlim=c(-5,17), ylab="y plate", xlab= "x plate", ylim=c(-4.5*8-5,0), ...)
+    if(different==T) {
+      text(-3, -unique(x$samples$yPlate[order(x$samples$yPlate)]), unique(x$samples$tagF[order(x$samples$yPlate)]), cex=0.5)
+      text(unique(x$samples$xPlate[order(x$samples$xPlate)]), -5, unique(x$samples$tagR[order(x$samples$xPlate)]), cex=0.5, srt=90)
+      }
+      abline(h=-seq(8.5,8*nplate+0.5,8), lty=2, col="grey")
+      segments(c(0,13), rep(min(-x$samples$yPlate),2), c(0,13), c(0,0), lty=2, col="grey")
+  
+    #plot problematic samples
+    if(!is.null(samples)) {
+      points(x$samples[samples,"xPlate"], -x$samples[samples,"yPlate"], pch="x")
+    }  
+}
--- a/ROBITools/R/plot.seqinsample.R
+++ b/ROBITools/R/plot.seqinsample.R
@@ -0,0 +1,105 @@
+#' @include 02_class_metabarcoding.data.R
+NULL
+
+#' Plot sequence abundance in samples
+#' 
+#' Plots relative abundances of a set of sequences in all samples (log10 transformed)
+#' 
+#' 
+#' @param x          a \code{\link{metabarcoding.data}} object
+#' @param seqset     a vetcor with sequences names
+#' @param seqtype    a string indicating what type of sequences are displayed
+#' @param controls   a vector indicating the negative controls names in the x object.
+#'                   Default is \code{NULL}
+#' 
+#' @return returns a plot with the log10 transformed relative porportion of 
+#'         selected MOTUs in each samples. If the number of samples is > 96,
+#'         then the plot is displayed in 4 panels
+#' 
+#' @examples
+#' 
+#' data(termes)
+#' 
+#' seqset = rownames(termes$motus)[which(termes$motus$genus_name=="Anoplotermes")]
+#' plot.seqinsample(termes, seqset, "Anoplotermes")
+#' 
+#' controls = rownames(termes)[grep("r", rownames(termes))]
+#' seqset = rownames(termes$motus)[which(termes$motus$best_identity<0.7)]
+#' plot.seqinsample(termes, seqset, "Not assigned", controls)
+#' 
+#' @seealso \code{\linkS4class{taxonomy.obitools}}, and method \code{\link{taxonmicank}}
+#'
+#' @author Lucie Zinger
+#' @keywords metabarcoding
+#' 
+#' @export
+#' 
+
+plot.seqinsample = function(x, seqset, seqtype, controls=NULL){
+ 
+  require(vegan)
+    
+  x.freq = vegan::decostand(x$reads,"total",1)
+  
+  if(!is.null(controls)){
+    controls.ind = match(controls, rownames(x.freq))
+  }
+  
+  if(nrow(x.freq)>96){
+    x.freq.parse = seq(0,round(nrow(x$samples), digit=0),
+                       round(nrow(x$samples)/4, digit=0))
+    
+    layout(matrix(c(1,2,3,1,4,5),3,2), height=c(0.3,1,1))
+    par(oma=c(1,1,1,0), mar=c(3,3,1,1))
+    
+    #legend
+    breaks = seq(log10(1e-4),log10(1), length.out=100)
+    plot(breaks, rep(1,100), col=topo.colors(100), pch=15, cex=2, ylim=c(0,1.5),
+         xaxt="n", yaxt="n", bty='n')
+    text(breaks[seq(1,100,10)], rep(0.7,length(seq(1,100,10))), 
+         round(10^breaks[seq(1,100,10)],4))
+    mtext("Seqence frequencies:", side=3, line=0, cex=0.8)
+    
+    #plot
+    for(i in 1:(length(x.freq.parse)-1)) {
+      range = (x.freq.parse[i]+1):(x.freq.parse[i]+round(nrow(x$samples)/4, digit=0))
+      mat = x.freq[range,seqset]
+      image(log10(mat),col = topo.colors(100), xaxt="n", yaxt="n", breaks=c(breaks,0))
+      
+      if(!is.null(controls)){
+        if(length(na.omit(match(controls.ind, range)))!=0){
+          abline(v=seq(0,1,l=round(nrow(x$samples)/4, digit=0))[match(controls.ind, range)],col="red", lty=3)
+        }}
+      
+      axis(side=1,at=seq(0,1,l=round(nrow(x$samples)/4,digit=0)),
+           labels=rownames(x$samples)[range],
+           las=2, cex.axis=0.3)
+    }
+    mtext(side=2, paste(seqtype, "n = ", length(seqset)), outer=T, cex=0.7, font=3)
+    mtext(side=1, "Samples", cex=0.7, outer=T)
+  
+  } else {
+    layout(matrix(c(1,2,1,2),2,2), height=c(0.3,1))
+    par(oma=c(1,1,1,0), mar=c(3,3,1,1))
+    
+    #legend
+    breaks = seq(log10(1e-4),log10(1), length.out=100)
+    plot(breaks, rep(1,100), col=topo.colors(100), pch=15, cex=2, ylim=c(0,1.5),
+         xaxt="n", yaxt="n", bty='n')
+    text(breaks[seq(1,100,10)], rep(0.7,length(seq(1,100,10))), 
+         round(10^breaks[seq(1,100,10)],4))
+    mtext("Seqence frequencies:", side=3, line=0, cex=0.8)
+    
+    image(log10(x.freq[,seqset]),col = topo.colors(100), xaxt="n", yaxt="n", breaks=c(breaks,0))
+    
+    if(!is.null(controls)){
+      abline(v=seq(0,1,l=round(nrow(x$samples), digit=0))[controls.ind],col="red", lty=3)
+      }
+    axis(side=1,at=seq(0,1,l=round(nrow(x$samples),digit=0)),
+         labels=rownames(x$samples),
+         las=2, cex.axis=0.3)
+  mtext(side=2, paste(seqtype, "n = ", length(seqset)), outer=T, cex=0.7, font=3)
+  mtext(side=1, "Samples", cex=0.7, outer=T)
+  }
+}
+
--- a/ROBITools/R/rarefy.R
+++ b/ROBITools/R/rarefy.R
@@ -0,0 +1,99 @@
+#' @include 02_class_metabarcoding.data.R
+NULL
+
+# TODO: Add comment
+# 
+# Author: coissac
+###############################################################################
+
+#' @export
+setGeneric("rarefy", function(x,n,first.pass=0.95,pseudo.count=0,...) {
+			return(standardGeneric("rarefy"))
+		})
+
+setMethod("rarefy", "ANY", function(x,n,first.pass=0.95,pseudo.count=0,sum=NA) {
+				
+	if (is.na(sum))
+		sum=sum(x)
+
+	if (sum < sum(x))
+		stop("sum parameter must be greater or equal to sum(x)")
+  
+  grey = sum-sum(x)
+	
+	probs = x + pseudo.count
+  
+  if (grey > 0)
+	  probs = c(probs,grey)
+	
+	# Just to ensure at least one execution of the loop
+	n1 = n * 2
+	
+	while(n1 > n)
+		n1 = rpois(1,n * first.pass)
+	
+	rep1 = as.vector(rmultinom(1,n1,probs))
+	n2  = sum(rep1)
+	
+	levels = 1:length(probs)
+	
+	rep2= as.vector(table(factor(sample(levels,
+						   	                      n - n2,
+							                        replace=TRUE, 
+							                        prob = probs),
+					                            levels=levels)))
+	
+	rep1 = (rep1 + rep2)
+  
+  if (grey > 0)
+    rep1 = rep1[-length(rep1)]
+  
+	return(rep1)
+})
+
+
+setMethod("rarefy", "metabarcoding.data", function(x,n,first.pass=0.95,pseudo.count=0,MARGIN='sample') {
+		
+	if (MARGIN == 'sample')
+		MARGIN=1
+	
+	if (MARGIN == 'motu')
+		MARGIN=2
+	
+  dreads= dim(x@reads)
+  rreads= matrix(0,nrow = dreads[1] , ncol = dreads[2])
+	
+	if (MARGIN == 1)
+      for (i in 1:dreads[1]) {
+        rreads[i,]=rarefy(x@reads[i,],
+                          n=n,
+                          first.pass=first.pass,
+                          pseudo.count=pseudo.count)
+      }
+    
+# 		rreads = t(apply(reads,1,rarefy,n=n,
+# 					 	 first.pass=first.pass,
+# 						 pseudo.count=pseudo.count))
+	else
+	  for (i in 1:dreads[2]) {
+	    rreads[,i]=rarefy(x@reads[,i],
+	                      n=n,
+	                      first.pass=first.pass,
+	                      pseudo.count=pseudo.count)
+	  }
+
+# rreads =   as.matrix(apply(reads,2,rarefy,n=n,
+# 								   first.pass=first.pass,
+# 								   pseudo.count=pseudo.count))
+
+  rreads=as.matrix(rreads)
+			
+	rownames(rreads) = rownames(x@reads)
+	colnames(rreads) = colnames(x@reads)
+	
+	newdata = copy.metabarcoding.data(x,reads=rreads)
+	
+	return(newdata)
+	
+})
+	
--- a/ROBITools/R/read.ngsfilter.R
+++ b/ROBITools/R/read.ngsfilter.R
@@ -0,0 +1,56 @@
+#' @include 02_class_metabarcoding.data.R
+NULL
+
+#' Read an OBITools ngsfilter file
+#' 
+#' Reads a ngsfilter file as formatted for the OBITools. For now, needs to be tab delimited till the "F" column. 
+#' Any additionnal information needs to be space delimited.
+#' 
+#' @seealso \code{\link{import.metabarcoding.data}}
+#' @author Lucie Zinger
+#' @keywords data import
+#' @export
+#' 
+
+read.ngsfilter <- function(filename, decimal='.', as.is=!stringsAsFactors, stringsAsFactors = default.stringsAsFactors()) {
+	
+	t<-read.table(file=filename, header=F, sep="\t", as.is=T)
+	beg <- t[,1:5]
+	
+	colnames(beg) <- c('experiment','sample','tags','forward_primer','reverse_primer')
+	if (length(unique(beg$sample))==nrow(beg)) 
+		rownames(beg) <- beg$sample
+	end <- t[,c(2,6)]
+	
+	#F <- unlist(lapply(end$V6, function(x) strsplit(x,"@")[[1]][1]))
+	rawextras <- unlist(lapply(end$V6, function(x) strsplit(x,"@")[[1]][2]))
+	
+	rawextras <- lapply(rawextras, function(s) strsplit(s, '; ')[[1]])
+	rawextras <- lapply(rawextras, function(l) unlist(lapply(l, function(s) sub("^ +","",s))))
+	rawextras <- lapply(rawextras, function(l) unlist(lapply(l, function(s) sub(" +$","",s))))
+	
+	
+	rawextras <- lapply(rawextras, function(l) unlist(lapply(l, function(s) strsplit(s,"="))))
+	
+	
+	columnnames <- unique(unlist(lapply(rawextras, function(l) l[seq(1,length(l),2)])))
+	
+	m <- matrix(nrow=nrow(end), ncol=length(columnnames))
+	colnames(m) <- columnnames
+	m <- as.data.frame(m)    
+	
+	
+	#print(head(rawextras))
+	
+	
+	tt <- lapply(rawextras, function(l) list(l[seq(1,length(l),2)],l[seq(2,length(l),2)]))
+	invisible(lapply(1:length(tt), function(i){m[i,tt[[i]][[1]]] <<- tt[[i]][[2]]}))
+	
+	invisible(lapply(colnames(m), function(n) m[,n] <<- type.convert(m[,n], dec=decimal, as.is=as.is)))
+	
+	ngs = cbind(beg, m)
+	rownames(ngs) = ngs$sample
+	class(ngs)<-c('ngsfilter.data',class(ngs))
+	
+	return(ngs)
+}
--- a/ROBITools/R/read.obitab.R
+++ b/ROBITools/R/read.obitab.R
@@ -0,0 +1,39 @@
+#' @include 02_class_metabarcoding.data.R
+NULL
+
+
+#' Reads a data file produced by the obitab command
+#'
+#' Read a data file issued from the convertion of a fasta 
+#' file to a tabular file by the obitab command
+#' 
+#' @param file a string containing the file name of the obitab file.
+#' @param sep  Column separator in the obitab file. 
+#'             The default separator is the tabulation.
+#'
+#' @return a \code{data.frame} instance containing the obitab file
+#'
+#' @examples
+#' require(ROBITools)
+#' 
+#' \dontshow{# switch the working directory to the data package directory}
+#' \dontshow{setwd(system.file("extdata", package="ROBITools"))}
+#' 
+#' # read the termes.tab file
+#' termes=read.obitab('termes.tab')
+#' 
+#' # print the dimensions of the data.frame
+#' dim(termes)
+#'   
+#' @seealso \code{\link{import.metabarcoding.data}}
+#' @author Eric Coissac
+#' @export
+#'
+read.obitab <-
+function(filename,sep='\t') {
+
+   data=read.delim(filename,sep=sep,strip.white=T,check.names =F)
+   data
+   
+}
+
--- a/ROBITools/R/read.sumatra.R
+++ b/ROBITools/R/read.sumatra.R
@@ -0,0 +1,17 @@
+#' @include 02_class_metabarcoding.data.R
+NULL
+
+
+# TODO: Add comment
+# 
+# Author: coissac
+###############################################################################
+
+
+read.sumatra = function(filename) {
+	data = read.table(filename,sep="\t",header=FALSE)
+	score = data[,3]
+	name.first = mapply(min,as.character(s[,1]),as.character(s[,2]))
+	name.second= mapply(max,as.character(s[,1]),as.character(s[,2]))
+	sname = as.character(interaction(data[,1],data[,2]))
+}
--- a/ROBITools/R/s3objects.R
+++ b/ROBITools/R/s3objects.R
@@ -0,0 +1,123 @@
+# TODO: Add comment
+# 
+# Author: coissac
+###############################################################################
+
+#' Adds a class into the class hierarchie attribute.
+#' 
+#' \code{addS3Class} adds a new class name to the vector
+#' of class associated to the object. This the way to
+#' assign an object to an S3 class. \code{addS3Class} add
+#' the new class name in front of the class vector
+#' 
+#' @param object the object to modify
+#' @param classname the name of the new class
+#' 
+#' @return the object given as parametter casted to the new
+#'         class
+#' 
+#' @examples
+#' x = c(1,3,2,5)
+#' x = addS3Class(x,"my.vector")
+#' class(x)
+#' 
+#' @seealso \code{\link{rmS3Class}}
+#' 
+#' @note for efficiency purpose no check is done on the input
+#'       parametters
+#'       
+#' @keywords system function
+#' 
+#' @author Eric Coissac
+#' @export
+#' 
+addS3Class = function(object,classname) {
+	class(object) = c(classname,class(object))
+	return(object)
+}
+
+#' Removes a class from the class hierarchie attribute.
+#' 
+#' \code{rmS3Class} removes a class name from the vector
+#' of class associated to the object. This the way to
+#' remove the association between an object and a S3 class. 
+#' 
+#' @param object the object to modify
+#' @param classname the name of the class to remove
+#' 
+#' @return the object given as parametter.
+#' 
+#' @examples
+#' x = c(1,3,2,5)
+#' x = addS3Class(x,"my.vector")
+#' class(x)
+#' x = rmS3Class(x,"my.vector")
+#' class(x)
+#' 
+#' @seealso \code{\link{addS3Class}}
+#' 
+#' @note for efficiency purpose no check is done on the input
+#'       parametters
+#'       
+#' @keywords system function
+#' 
+#' @author Eric Coissac
+#' @export
+#' 
+rmS3Class = function(object,classname) {
+	c = class(object)
+	if (! is.null(c))
+		index = match(classname,c)
+		class(object)=c[-index]
+	return(object)
+}
+
+#' create basic functions to manipulate a new S3 class
+#' 
+#' createS3Class function create in the \code{package:ROBITools}
+#' environment an \code{is.xxx} function and an \code{as.xxx} function
+#' allowing to test if an abject belong the class \code{xxx} and to add
+#' the class \code{xxx} to the class list of an object. \code{xxx} is a 
+#' generic class name that is specified through the \code{classname}
+#' argument of the function.
+#' 
+#' @param classname a \code{character string} indicating the name
+#'         of the new class.
+#'         
+#' @examples
+#' 
+#' # Create a new S3 class named mynewclass
+#' createS3Class('mynewclass')
+#' 
+#' #create a new vector object
+#' x=c(1,4,6)
+#' 
+#' # test if it belongs the new class, that is false
+#' is.mynewclass(x)
+#' 
+#' # Associate x to the new class
+#' as.mynewclass(x)
+#' 
+#' # test again if x belongs the new class, that is now true
+#' is.mynewclass(x)
+#' 
+#' @seealso \code{\link{rmS3Class}}
+#' 
+#' @note Take care that the new functions are created in the 
+#' \code{package:ROBITools} environment.
+#' 
+#' @keywords system function
+#' 
+#' @author Eric Coissac
+#' @export
+#' 
+createS3Class = function(classname) {
+  is.class = function(object) any(class(object)==classname)
+  as.class = function(object) return(addS3Class(object,classname))
+  
+  assign(paste('is',classname,sep="."),is.class,envir=globalenv())
+  assign(paste('as',classname,sep="."),as.class,envir=globalenv())
+  
+}
+
+
--- a/ROBITools/R/taxoDBtree.R
+++ b/ROBITools/R/taxoDBtree.R
@@ -0,0 +1,89 @@
+#'@include 02_class_metabarcoding.data.R
+#'@import ROBITaxonomy 
+
+NULL
+
+#' Construct a taxonomic tree from a list of taxa
+#' 
+#' Construct a graph from a table containing the taxonomic path of sequences
+#' 
+#' 
+#' @param x a table containing the taxonomic path of the references. Typically an output from get.classic.taxonomy
+#' 
+#' @return g a directed graph displaying the taxonomy hierarchy of the input data. Stored in a \code{\link{igraph}} object 
+#'         where the taxonomic ranks of the vertices are added to the vertices attributes
+#'
+#' @examples
+#' 
+#' data(termes)
+#' 
+#' taxo=default.taxonomy()
+#' 
+#' termes.taxo.table = get.classic.taxonomy(termes, taxo, "taxid")
+#' head(termes.taxo.table)
+#' 
+#' graph.tax.termes = dbtree(termes.taxo.table[,1:7])
+#' library(igraph)
+#' 
+#' #plot the tree
+#' coord = layout.reingold.tilford(graph.tax.termes, root=1, circular=F)
+#' v.cex = as.factor(V(graph.tax.termes)$rank)
+#' levels(v.cex) = match(levels(v.cex), colnames(termes.taxo.table))
+#' plot(graph.tax.termes, vertex.size=1, vertex.label.cex=2*(as.numeric(as.vector(v.cex))^-1), edge.arrow.size=0, layout=coord)
+#' 
+#' 
+#' #Vizualization with sequence counts
+#' tax.count = log10(colSums(termes$reads)[match(as.vector(V(graph.tax.termes)$name), termes$motus$scientific_name)])
+#' tax.count[is.na(tax.count)|tax.count<0] = 0.01
+#' V(graph.tax.termes)$count = unname(tax.count)
+#' 
+#' plot(graph.tax.termes, vertex.size=V(graph.tax.termes)$count, vertex.label.cex=2*(as.numeric(as.vector(v.cex))^-1), edge.arrow.size=0, layout=coord)
+#' 
+#'   
+#' @seealso \code{\link{get.classic.taxonomy}}
+#' @author Lucie Zinger
+#' @export
+
+dbtree = function(x) {
+  
+  #dealing with noranks
+  x2 = x
+  for (i in 1:ncol(x2)) {
+    x2[,i] = as.character(x[,i])
+    if(length(which(is.na(x[,i])==T))!=0) {
+      if(i==1) {
+        x2[which(is.na(x[,i])==T),i] = "NR"
+      } else {
+        x2[which(is.na(x[,i])==T),i] = as.character(x2[,i-1])[which(is.na(x2[,i])==T)]
+      }
+    }
+  }
+      
+  #prepare an edgelist
+  edgelist = list()
+  
+  for (i in 1:(ncol(x2)-1)){
+    out = x2[,c(i,i+1)]
+    out2 = out[-which(duplicated(out)==T),]
+    colnames(out2) = c("parent", "kid")
+    edgelist[[i]] = out2[which(out2[,1]!=out2[,2]),]
+  } 
+  
+  edgelist = do.call("rbind", edgelist)
+  
+  
+  #construct the graph
+  
+  g = igraph::graph.edgelist(as.matrix(edgelist), directed=T)
+  
+  #get taxorank for each taxa
+  ranks = do.call("rbind", lapply(1:ncol(x), function(y) {
+    out = cbind(unique(as.character(x[,y])), colnames(x)[y])
+    out
+  }))
+  
+  #Assign nodes to taxorank
+  igraph::V(g)$rank = ranks[match(igraph::V(g)$name, ranks[,1]),2]
+    
+  return(g)
+}
--- a/ROBITools/R/taxonomic.resolution.R
+++ b/ROBITools/R/taxonomic.resolution.R
@@ -0,0 +1,74 @@
+#' @import ROBITaxonomy
+#' @include 02_class_metabarcoding.data.R
+NULL
+
+#' Dataset taxonomic resolution summary.
+#' 
+#' Summarizes the taxonomic relution of reads and MOTUs over the entire dataset
+#' 
+#' 
+#' @param x          a \code{\link{metabarcoding.data}} object
+#' @param colranks   a string indicating column name where ranks are stored in \code{x}
+#' @param colscores  a string indicating column name where taxonomic identification scores are stored in \code{x}
+#' @param thresh     a threshold for defining at which taxonomic identification scores a sequence can be considered as "not assigned". 
+#'                   Default is \code{0.7}
+#' 
+#' @return returns a data.frame and piecharts of the number/proportion of MOTUs/reads assigned to each taxonomic levels
+#' 
+#' @examples
+#' 
+#' data(termes)
+#' taxo=default.taxonomy()
+#' 
+#' termes.taxo.table = get.classic.taxonomy(termes, taxo, "taxid")
+#' attr(termes, "motus") = data.frame(termes$motus, termes.taxo.table)
+#' attr(termes, "motus")["count"] = colSums(termes$reads)
+#' 
+#' summary.taxores(termes, "taxonomic_rank_ok","best_identity")
+#' 
+#' @seealso \code{\linkS4class{taxonomy.obitools}}, and method \code{\link{taxonmicank}}
+#'
+#' @author Lucie Zinger
+#' @keywords taxonomy
+#' 
+#' @export
+#' 
+summary.taxores = function(x,colranks,colscores, thresh=0.7){
+  
+  #vector encompassing all ranked possible taxonomic levels
+  taxorank = c("superkingdom", "kingdom", "subkingdom", "superphylum", "phylum", "subphylum", "superclass", "class", "subclass", "infraclass",
+               "superorder", "order", "suborder", "infraorder", "parvorder", "superfamily", "family", "subfamily", "supertribe", "tribe", 
+               "subtribe", "supergenus", "genus", "subgenus", "species group", "species subgroup", "superspecies", "species", "subspecies",
+               "varietas", "forma", "no rank", "not assigned")
+  
+  #settings if thresh
+  ranks = as.vector(x$motus[,colranks])
+  ranks[x$motus[,colscores]<thresh] =  "not assigned"
+  
+  #nb of otus
+  tmp = table(ranks)
+  taxores.otu = tmp[match(taxorank, names(tmp))]
+  names(taxores.otu) = taxorank
+  taxores.otu[is.na(taxores.otu)] = 0
+  
+  #nb of reads
+  tmp = aggregate(x$motus$count, by=list(ranks), sum)
+  taxores.reads = tmp[match(taxorank,tmp[,1]),2]
+  names(taxores.reads) = taxorank
+  taxores.reads[is.na(taxores.reads)] = 0
+  
+  #plot
+  layout(matrix(c(1,2,1,3),2,2),heights=c(0.3,1))
+  col.tmp = c(rainbow(length(taxorank)-2,start=0, end=0.5, alpha=0.6), "lightgrey", "darkgrey")
+  par(mar=c(1,0,0,0), oma=c(0,0,2,0))
+  frame()
+  legend("top", taxorank, ncol=6, cex=0.8, fill=col.tmp)
+  pie(taxores.otu, col=col.tmp, border="lightgrey", labels="", clockwise=T)
+  mtext("OTUs", side=1, cex=1)
+  pie(taxores.reads, col=col.tmp, border="lightgrey", labels="", clockwise=T)
+  mtext("Reads", side=1, cex=1)
+  
+  #result
+  out = data.frame(otu=taxores.otu, reads=taxores.reads)
+  out
+}
--- a/ROBITools/R/taxonomy_classic_table.R
+++ b/ROBITools/R/taxonomy_classic_table.R
@@ -0,0 +1,53 @@
+#' @import ROBITaxonomy
+#' @include 02_class_metabarcoding.data.R
+NULL
+
+#' Get classical taxonomy format
+#' 
+#' Creates a table with the classical taxonomic description (from phylum to species)
+#' 
+#' @param x a \code{\link{metabarcoding.data}} object
+#' @param taxonomy a instance of \code{\linkS4class{taxonomy.obitools}}
+#' @param coltaxid a the name of the column containing taxids to be used for creating classical taxonomic description
+#' 
+#' @return returns a data.frame with the classical taxonomic description ("kingdom", "phylum", "class", "order", "family", "genus", "species"), as well as
+#'         sequence taxonomic assignment rank and scientific name for each sequences stored in the \code{\link{metabarcoding.data}} object
+#' 
+#' @examples
+#' 
+#' data(termes)
+#' 
+#' taxo=default.taxonomy()
+#' 
+#' termes.taxo.table = get.classic.taxonomy(termes, taxo, "taxid")
+#' head(termes.taxo.table)
+#' 
+#' attr(termes, "motus") = data.frame(termes$motus, termes.taxo.table)
+#' 
+#'   
+#' @seealso \code{\linkS4class{taxonomy.obitools}}, and methods \code{\link{species}},\code{\link{genus}}, \code{\link{family}},\code{\link{kingdom}},
+#'          \code{\link{superkingdom}},\code{\link{taxonatrank}}, \code{\link{taxonmicank}}
+#'
+#' @author Lucie Zinger
+#' @keywords taxonomy
+#' @export
+#' 
+
+get.classic.taxonomy = function(x, taxonomy, coltaxid) {
+  
+  classic.taxo = c("kingdom", "phylum", "class", "order", "family", "genus", "species")
+  
+  taxids = x$motus[,coltaxid]
+  
+  out = as.data.frame(do.call("cbind", lapply(classic.taxo, function(y) {
+    scientificname(taxonomy, taxonatrank(taxonomy,taxids,y))
+  })))
+  
+  colnames(out) = paste(classic.taxo, "_name_ok", sep="")
+  rownames(out) = colnames(x)
+  
+  out$scientific_name_ok = scientificname(taxonomy, taxids)
+  out$taxonomic_rank_ok = taxonomicrank(taxonomy, taxids)
+
+  return(out)
+}
--- a/ROBITools/README-SLRE.md
+++ b/ROBITools/README-SLRE.md
@@ -0,0 +1,128 @@
+SLRE: Super Light Regular Expression library
+============================================
+
+SLRE is an ISO C library that implements a subset of Perl regular
+expression syntax. Main features of SLRE are:
+
+   * Written in strict ANSI C'89
+   * Small size (compiled x86 code is about 5kB)
+   * Uses little stack and does no dynamic memory allocation
+   * Provides simple intuitive API
+   * Implements most useful subset of Perl regex syntax (see below)
+   * Easily extensible. E.g. if one wants to introduce a new
+metacharacter `\i`, meaning "IPv4 address", it is easy to do so with SLRE.
+
+SLRE is perfect for tasks like parsing network requests, configuration
+files, user input, etc, when libraries like [PCRE](http://pcre.org) are too
+heavyweight for the given task. Developers of embedded systems would benefit
+most.
+
+## Supported Syntax
+
+    (?i)    Must be at the beginning of the regex. Makes match case-insensitive
+    ^       Match beginning of a buffer
+    $       Match end of a buffer
+    ()      Grouping and substring capturing
+    \s      Match whitespace
+    \S      Match non-whitespace
+    \d      Match decimal digit
+    \n      Match new line character
+    \r      Match line feed character
+    \f      Match form feed character
+    \v      Match vertical tab character
+    \t      Match horizontal tab character
+    \b      Match backspace character
+    +       Match one or more times (greedy)
+    +?      Match one or more times (non-greedy)
+    *       Match zero or more times (greedy)
+    *?      Match zero or more times (non-greedy)
+    ?       Match zero or once (non-greedy)
+    x|y     Match x or y (alternation operator)
+    \meta   Match one of the meta character: ^$().[]*+?|\
+    \xHH    Match byte with hex value 0xHH, e.g. \x4a
+    [...]   Match any character from set. Ranges like [a-z] are supported
+    [^...]  Match any character but ones from set
+
+Under development: Unicode support.
+
+## API
+
+    int slre_match(const char *regexp, const char *buf, int buf_len,
+                   struct slre_cap *caps, int num_caps, int flags);
+
+`slre_match()` matches string buffer `buf` of length `buf_len` against
+regular expression `regexp`, which should conform the syntax outlined
+above. If regular expression `regexp` contains brackets, `slre_match()`
+can capture the respective substrings into the array of `struct slre_cap`
+structures:
+
+    /* Stores matched fragment for the expression inside brackets */
+    struct slre_cap {
+      const char *ptr;  /* Points to the matched fragment */
+      int len;          /* Length of the matched fragment */
+    };
+
+N-th member of the `caps` array will contain fragment that corresponds to the
+N-th opening bracket in the `regex`, N is zero-based. `slre_match()` returns
+number of bytes scanned from the beginning of the string. If return value is
+greater or equal to 0, there is a match. If return value is less then 0, there
+is no match. Negative return codes are as follows:
+
+    #define SLRE_NO_MATCH               -1
+    #define SLRE_UNEXPECTED_QUANTIFIER  -2
+    #define SLRE_UNBALANCED_BRACKETS    -3
+    #define SLRE_INTERNAL_ERROR         -4
+    #define SLRE_INVALID_CHARACTER_SET  -5
+    #define SLRE_INVALID_METACHARACTER  -6
+    #define SLRE_CAPS_ARRAY_TOO_SMALL   -7
+    #define SLRE_TOO_MANY_BRANCHES      -8
+    #define SLRE_TOO_MANY_BRACKETS      -9
+
+
+## Example: parsing HTTP request line
+
+    const char *request = " GET /index.html HTTP/1.0\r\n\r\n";
+    struct slre_cap caps[4];
+
+    if (slre_match("^\\s*(\\S+)\\s+(\\S+)\\s+HTTP/(\\d)\\.(\\d)",
+                   request, strlen(request), caps, 4, 0) > 0) {
+      printf("Method: [%.*s], URI: [%.*s]\n",
+             caps[0].len, caps[0].ptr,
+             caps[1].len, caps[1].ptr);
+    } else {
+      printf("Error parsing [%s]\n", request);
+    }
+
+## Example: find all URLs in a string
+
+    static const char *str =
+      "<img src=\"HTTPS://FOO.COM/x?b#c=tab1\"/> "
+      "  <a href=\"http://cesanta.com\">some link</a>";
+
+    static const char *regex = "(?i)((https?://)[^\\s/'\"<>]+/?[^\\s'\"<>]*)";
+    struct slre_cap caps[2];
+    int i, j = 0, str_len = strlen(str);
+
+    while (j < str_len &&
+           (i = slre_match(regex, str + j, str_len - j, caps, 2, 0)) > 0) {
+      printf("Found URL: [%.*s]\n", caps[0].len, caps[0].ptr);
+      j += i;
+    }
+
+Output:
+
+    Found URL: [HTTPS://FOO.COM/x?b#c=tab1]
+    Found URL: [http://cesanta.com]
+
+# License
+
+SLRE is released under
+[GNU GPL v.2](http://www.gnu.org/licenses/old-licenses/gpl-2.0.html).
+Businesses have an option to get non-restrictive, royalty-free commercial
+license and professional support from
+[Cesanta Software](http://cesanta.com).
+
+[Super Light DNS Resolver](https://github.com/cesanta/sldr),
+[Mongoose web server](https://github.com/cesanta/mongoose)
+are other projects by Cesanta Software, developed with the same philosophy
+of functionality and simplicity.
--- a/ROBITools/data/termes.rda
+++ b/ROBITools/data/termes.rda
--- a/ROBITools/inst/extdata/termes.fasta
+++ b/ROBITools/inst/extdata/termes.fasta
--- a/ROBITools/inst/extdata/termes.tab
+++ b/ROBITools/inst/extdata/termes.tab
--- a/ROBITools/inst/extdata/termes_ngsfilt.txt
+++ b/ROBITools/inst/extdata/termes_ngsfilt.txt
@@ -0,0 +1,21 @@
+termes_data	A01	acacacac:acacacac	ATTTCAGGTCAAGGTGCAGC	TACAACCAAATCCAATTTCA	F @ position=01_01A; serie=01; coordX=5; coordY=5;
+termes_data	A02	acagcaca:acacacac	ATTTCAGGTCAAGGTGCAGC	TACAACCAAATCCAATTTCA	F @ position=01_01B; serie=01; coordX=10; coordY=5;
+termes_data	A03	gtgtacat:acacacac	ATTTCAGGTCAAGGTGCAGC	TACAACCAAATCCAATTTCA	F @ position=01_01C; serie=01; coordX=15; coordY=5;
+termes_data	A04	tatgtcag:acacacac	ATTTCAGGTCAAGGTGCAGC	TACAACCAAATCCAATTTCA	F @ position=01_01D; serie=01; coordX=20; coordY=5;
+termes_data	A05	tagtcgca:acacacac	ATTTCAGGTCAAGGTGCAGC	TACAACCAAATCCAATTTCA	F @ position=01_01E; serie=01; coordX=25; coordY=5;
+termes_data	A06	tactatac:acacacac	ATTTCAGGTCAAGGTGCAGC	TACAACCAAATCCAATTTCA	F @ position=01_01F; serie=01; coordX=30; coordY=5;
+termes_data	A07	actagatc:acacacac	ATTTCAGGTCAAGGTGCAGC	TACAACCAAATCCAATTTCA	F @ position=01_01G; serie=01; coordX=35; coordY=5;
+termes_data	A08	gatcgcga:acacacac	ATTTCAGGTCAAGGTGCAGC	TACAACCAAATCCAATTTCA	F @ position=01_01H; serie=01; coordX=40; coordY=5;
+termes_data	A09	acacacac:acagcaca	ATTTCAGGTCAAGGTGCAGC	TACAACCAAATCCAATTTCA	F @ position=01_02A; serie=01; coordX=45; coordY=5;
+termes_data	A10	acagcaca:acagcaca	ATTTCAGGTCAAGGTGCAGC	TACAACCAAATCCAATTTCA	F @ position=01_02B; serie=01; coordX=50; coordY=5;
+termes_data	A11	gtgtacat:acagcaca	ATTTCAGGTCAAGGTGCAGC	TACAACCAAATCCAATTTCA	F @ position=01_02C; serie=01; coordX=55; coordY=5;
+termes_data	A12	tatgtcag:acagcaca	ATTTCAGGTCAAGGTGCAGC	TACAACCAAATCCAATTTCA	F @ position=01_02D; serie=01; coordX=60; coordY=5;
+termes_data	A13	tagtcgca:acagcaca	ATTTCAGGTCAAGGTGCAGC	TACAACCAAATCCAATTTCA	F @ position=01_02E; serie=02; coordX=65; coordY=5;
+termes_data	A14	tactatac:acagcaca	ATTTCAGGTCAAGGTGCAGC	TACAACCAAATCCAATTTCA	F @ position=01_02F; serie=02; coordX=70; coordY=5;
+termes_data	A15	actagatc:acagcaca	ATTTCAGGTCAAGGTGCAGC	TACAACCAAATCCAATTTCA	F @ position=01_02G; serie=02; coordX=75; coordY=5;
+termes_data	A16	gatcgcga:acagcaca	ATTTCAGGTCAAGGTGCAGC	TACAACCAAATCCAATTTCA	F @ position=01_02H; serie=02; coordX=80; coordY=5;
+termes_data	A17	acacacac:gtgtacat	ATTTCAGGTCAAGGTGCAGC	TACAACCAAATCCAATTTCA	F @ position=01_03A; serie=02; coordX=85; coordY=5;
+termes_data	A18	acagcaca:gtgtacat	ATTTCAGGTCAAGGTGCAGC	TACAACCAAATCCAATTTCA	F @ position=01_03B; serie=02; coordX=90; coordY=5;
+termes_data	A19	gtgtacat:gtgtacat	ATTTCAGGTCAAGGTGCAGC	TACAACCAAATCCAATTTCA	F @ position=01_03C; serie=02; coordX=95; coordY=5;
+termes_data	A11r	tcagtgtc:gtcgtaga	ATTTCAGGTCAAGGTGCAGC	TACAACCAAATCCAATTTCA	F @ position=04_10B; serie=01; coordX=55; coordY=5;
+termes_data	A16r	actctgct:gtcgtaga	ATTTCAGGTCAAGGTGCAGC	TACAACCAAATCCAATTTCA	F @ position=04_10C; serie=02; coordX=80; coordY=5;
--- a/ROBITools/src/ROBITools.so
+++ b/ROBITools/src/ROBITools.so
--- a/ROBITools/src/ecoError.c
+++ b/ROBITools/src/ecoError.c
@@ -0,0 +1,26 @@
+#include "ecoPCR.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+/*
+ * print the message given as argument and exit the program
+ * @param error		error number	
+ * @param message 	the text explaining what's going on
+ * @param filename	the file source where the program failed
+ * @param linenumber	the line where it has failed
+ * filename and linenumber are written at pre-processing 
+ * time by a macro
+ */
+void ecoError(int32_t error,
+              const char* message,
+              const char * filename,
+              int linenumber)
+{
+	fprintf(stderr,"Error %d in file %s line %d : %s\n",
+	               error,
+	               filename,
+	               linenumber,
+	               message);
+	
+	abort();
+}
--- a/ROBITools/src/ecoError.o
+++ b/ROBITools/src/ecoError.o
--- a/ROBITools/src/ecoIOUtils.c
+++ b/ROBITools/src/ecoIOUtils.c
@@ -0,0 +1,122 @@
+#include "ecoPCR.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+#define SWAPINT32(x)     ((((x) << 24) & 0xFF000000) | (((x) <<  8) & 0xFF0000) | \
+                         (((x) >>  8) & 0xFF00)     | (((x) >> 24) & 0xFF))
+
+
+int32_t is_big_endian()
+{
+	int32_t i=1;
+	
+	return (int32_t)((char*)&i)[0];
+}
+
+
+
+
+int32_t swap_int32_t(int32_t i)
+{
+	return SWAPINT32(i);
+}
+
+
+/**
+ * Read part of the file
+ * @param	*f	the database
+ * @param	recordSize the size to be read
+ * 
+ * @return	buffer
+ */
+void *read_ecorecord(FILE *f,int32_t *recordSize)
+{
+	static void *buffer    =NULL;
+	int32_t      buffersize=0;
+	int32_t      read;
+	
+	if (!recordSize)
+		ECOERROR(ECO_ASSERT_ERROR,
+		         "recordSize cannot be NULL");
+		
+	read = fread(recordSize,
+	      		 1,
+	      		 sizeof(int32_t),
+	             f);
+	             
+	if (feof(f))
+		return NULL;
+	             
+	if (read != sizeof(int32_t))
+		ECOERROR(ECO_IO_ERROR,"Reading record size error");
+		
+	if (is_big_endian())
+		*recordSize=swap_int32_t(*recordSize);
+		
+	if (buffersize < *recordSize)
+	{
+		if (buffer)
+			buffer = ECOREALLOC(buffer,*recordSize,
+			                    "Increase size of record buffer");
+		else
+			buffer = ECOMALLOC(*recordSize,
+			                    "Allocate record buffer");
+	}
+	
+	read = fread(buffer,
+	             1,
+				 *recordSize,
+				 f);
+				 
+	if (read != *recordSize)
+		ECOERROR(ECO_IO_ERROR,"Reading record data error");
+		
+	return buffer;	 
+};
+
+
+
+
+
+/**
+ * Open the database and check it's readable
+ * @param 	filename 		name of the database (.sdx, .rdx, .tbx)
+ * @param 	sequencecount	buffer - pointer to variable storing the number of occurence    
+ * @param 	abort_on_open_error		 	boolean to define the behaviour in case of error 
+ * 										while opening the database
+ * @return 	FILE type
+ **/
+FILE *open_ecorecorddb(const char *filename,
+                       int32_t    *sequencecount,
+                       int32_t    abort_on_open_error)
+{
+    FILE        *f;
+	int32_t      read;
+	
+	f = fopen(filename,"rb");
+	
+	if (!f)
+		{
+			if (abort_on_open_error)
+		 		ECOERROR(ECO_IO_ERROR,"Cannot open file");
+		 	else
+		 	{
+		 		*sequencecount=0;
+		 		return NULL;
+		 	}
+		}
+		
+	read = fread(sequencecount,
+	      		 1,
+	      		 sizeof(int32_t),
+	      		 f);
+	             
+	if (read != sizeof(int32_t))
+		ECOERROR(ECO_IO_ERROR,"Reading record size error");
+
+	if (is_big_endian())
+		*sequencecount=swap_int32_t(*sequencecount);
+		
+	return f;                  
+}
+
--- a/ROBITools/src/ecoIOUtils.o
+++ b/ROBITools/src/ecoIOUtils.o
--- a/ROBITools/src/ecoMalloc.c
+++ b/ROBITools/src/ecoMalloc.c
@@ -0,0 +1,79 @@
+#include "ecoPCR.h"
+#include <stdlib.h>
+
+static int eco_log_malloc = 0;
+
+void    eco_trace_memory_allocation()
+{
+	eco_log_malloc=1;
+}
+
+void    eco_untrace_memory_allocation()
+{
+	eco_log_malloc=0;
+}
+
+
+void   *eco_malloc(int32_t chunksize,
+                   const char *error_message,
+                   const char *filename,
+                   int32_t    line)
+{
+	void * chunk;
+	
+	chunk = calloc(1,chunksize);
+	
+	if (!chunk)
+		ecoError(ECO_MEM_ERROR,error_message,filename,line);
+		
+	if (eco_log_malloc)
+		fprintf(stderr,
+			    "Memory segment located at %p of size %d is allocated (file : %s [%d])",
+			    chunk,
+			    chunksize,
+			    filename,
+			    line);
+		
+	return chunk;
+}
+
+void   *eco_realloc(void *chunk,
+                    int32_t newsize,
+                    const char *error_message,
+                    const char *filename,
+                    int32_t    line)
+{
+	void *newchunk;
+	
+	newchunk = realloc(chunk,newsize);
+	
+	if (!newchunk)
+		ecoError(ECO_MEM_ERROR,error_message,filename,line);
+
+	if (eco_log_malloc)
+		fprintf(stderr,
+			    "Old memory segment %p is reallocated at %p with a size of %d (file : %s [%d])",
+			    chunk,
+			    newchunk,
+			    newsize,
+			    filename,
+			    line);
+		
+	return newchunk;	
+}
+
+void    eco_free(void *chunk,
+                 const char *error_message,
+                 const char *filename,
+                 int32_t    line)
+{
+	free(chunk);
+	
+	if (eco_log_malloc)
+		fprintf(stderr,
+			    "Memory segment %p is released => %s (file : %s [%d])",
+			    chunk,
+			    error_message,
+			    filename,
+			    line);
+}
--- a/ROBITools/src/ecoMalloc.o
+++ b/ROBITools/src/ecoMalloc.o
--- a/ROBITools/src/ecoPCR.h
+++ b/ROBITools/src/ecoPCR.h
@@ -0,0 +1,283 @@
+#ifndef ECOPCR_H_
+#define ECOPCR_H_
+
+#include <stdio.h>
+#include <inttypes.h>
+
+#include <R.h>
+#include <Rinternals.h>
+#include <Rdefines.h>
+
+
+//#ifndef H_apat
+//#include "../libapat/apat.h"
+//#endif
+
+/*****************************************************
+ * 
+ *  Data type declarations
+ * 
+ *****************************************************/
+
+/*
+ * 
+ *  Sequence types
+ * 
+ */
+
+typedef struct {
+	
+	int32_t  taxid;
+	char     AC[20];
+	int32_t  DE_length;
+	int32_t  SQ_length;
+	int32_t  CSQ_length;
+	
+	char     data[1];
+	
+} ecoseqformat_t;
+
+typedef struct {
+	int32_t taxid;
+	int32_t SQ_length;
+	char    *AC;
+	char    *DE;
+	char    *SQ;
+} ecoseq_t;
+
+/*
+ * 
+ * Taxonomy taxon types
+ * 
+ */
+
+
+typedef struct {
+	int32_t  taxid;
+	int32_t  rank;
+	int32_t	 parent;
+	int32_t  namelength;
+	char     name[1];
+	
+} ecotxformat_t;
+
+typedef struct ecotxnode {
+	int32_t           taxid;
+	int32_t           rank;
+	int32_t  		      farest;
+	struct ecotxnode  *parent;
+	char              *name;
+} ecotx_t;
+
+typedef struct {
+	int32_t count;
+	int32_t maxtaxid;
+  int32_t buffersize;
+	ecotx_t taxon[1];
+} ecotxidx_t;
+ 
+	
+/*
+ * 
+ * Taxonomy rank types
+ * 
+ */
+	
+typedef struct {
+	int32_t count;
+	char*   label[1];
+} ecorankidx_t;
+
+/*
+ * 
+ * Taxonomy name types
+ * 
+ */
+
+typedef struct {
+ 	int32_t is_scientificname;
+	int32_t  namelength;
+	int32_t  classlength;
+	int32_t  taxid;
+	char     names[1];	
+} econameformat_t;
+ 
+ 
+ typedef struct {
+ 	char 	*name;
+ 	char 	*classname;
+ 	int32_t is_scientificname;
+ 	struct ecotxnode  *taxon;
+} econame_t;
+
+ 
+typedef struct {
+	int32_t count;
+	econame_t   names[1];
+} econameidx_t;
+
+
+ typedef struct {
+	ecorankidx_t *ranks;
+	econameidx_t *names;
+	ecotxidx_t   *taxons;
+} ecotaxonomy_t;
+
+ 
+/*****************************************************
+ * 
+ *  Function declarations
+ * 
+ *****************************************************/
+
+/*
+ * 
+ * Low level system functions
+ * 
+ */
+
+int32_t is_big_endian();
+int32_t swap_int32_t(int32_t);
+
+void   *eco_malloc(int32_t chunksize,
+                   const char *error_message,
+                   const char *filename,
+                   int32_t    line);
+                   
+                   
+void   *eco_realloc(void *chunk,
+                    int32_t chunksize,
+                    const char *error_message,
+                    const char *filename,
+                    int32_t    line);
+                    
+void    eco_free(void *chunk,
+                 const char *error_message,
+                 const char *filename,
+                 int32_t    line);
+                 
+void    eco_trace_memory_allocation();
+void    eco_untrace_memory_allocation();
+
+#define ECOMALLOC(size,error_message) \
+	    eco_malloc((size),(error_message),__FILE__,__LINE__)
+	   
+#define ECOREALLOC(chunk,size,error_message) \
+        eco_realloc((chunk),(size),(error_message),__FILE__,__LINE__)
+        
+#define ECOFREE(chunk,error_message) \
+        eco_free((chunk),(error_message),__FILE__,__LINE__)
+        
+        
+
+
+/*
+ * 
+ * Error managment
+ * 
+ */
+ 
+  
+void ecoError(int32_t,const char*,const char *,int);
+
+#define ECOERROR(code,message) ecoError((code),(message),__FILE__,__LINE__)
+
+#define ECO_IO_ERROR       (1)
+#define ECO_MEM_ERROR      (2)
+#define ECO_ASSERT_ERROR   (3)
+#define ECO_NOTFOUND_ERROR (4)
+
+
+/*
+ * 
+ * Low level Disk access functions
+ * 
+ */
+
+FILE *open_ecorecorddb(const char *filename,
+                       int32_t    *sequencecount,
+                       int32_t    abort_on_open_error);
+                       
+void *read_ecorecord(FILE *,int32_t *recordSize);
+
+
+
+/* 
+ *   Read function in internal binary format
+ */
+
+FILE             *open_ecoseqdb(const char *filename,
+                                int32_t    *sequencecount);
+                                                                
+ecoseq_t         *readnext_ecoseq(FILE *);
+
+ecorankidx_t     *read_rankidx(const char *filename);
+
+econameidx_t     *read_nameidx(const char *filename,ecotaxonomy_t *taxonomy);
+
+
+
+	/**
+	 * Read taxonomy data as formated by the ecoPCRFormat.py script.
+	 * 
+	 * This function is normaly uses internaly by the read_taxonomy
+	 * function and should not be called directly.
+	 * 
+	 * @arg filename  path to the *.tdx file of the reformated db
+	 * 
+	 * @return pointer to a taxonomy index structure
+	 */
+ 
+ecotxidx_t       *read_taxonomyidx(const char *filename,const char *filename2);
+
+ecotaxonomy_t    *read_taxonomy(const char *prefix,int32_t readAlternativeName);
+
+ecotx_t *eco_findtaxonatrank(ecotx_t *taxon, int32_t rankidx);
+
+ecotx_t *eco_findtaxonbytaxid(ecotaxonomy_t *taxonomy, int32_t taxid);
+
+int eco_isundertaxon(ecotx_t *taxon, int other_taxid);
+
+ecoseq_t *ecoseq_iterator(const char *prefix);
+
+
+
+ecoseq_t *new_ecoseq();
+int32_t   delete_ecoseq(ecoseq_t *);
+ecoseq_t *new_ecoseq_with_data( char *AC,
+								char *DE,
+								char *SQ,
+								int32_t   taxid
+								);
+
+
+int32_t delete_taxon(ecotx_t *taxon);
+int32_t delete_taxonomy(ecotxidx_t *index);
+int32_t delete_ecotaxonomy(ecotaxonomy_t *taxonomy);
+
+
+int32_t rank_index(const char* label,ecorankidx_t* ranks);
+
+//int32_t  delete_apatseq(SeqPtr pseq);
+//PatternPtr buildPattern(const char *pat, int32_t error_max);
+//PatternPtr complementPattern(PatternPtr pat);
+//
+//SeqPtr ecoseq2apatseq(ecoseq_t *in,SeqPtr out,int32_t circular);
+
+//char *ecoComplementPattern(char *nucAcSeq);
+//char *ecoComplementSequence(char *nucAcSeq);
+//char *getSubSequence(char* nucAcSeq,int32_t begin,int32_t end);
+
+ecotx_t *eco_getspecies(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
+ecotx_t *eco_getgenus(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
+ecotx_t *eco_getfamily(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
+ecotx_t *eco_getkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
+ecotx_t *eco_getsuperkingdom(ecotx_t *taxon,ecotaxonomy_t *taxonomy);
+
+//int eco_is_taxid_ignored(int32_t *ignored_taxid, int32_t tab_len, int32_t taxid);
+//int eco_is_taxid_included(ecotaxonomy_t *taxonomy, int32_t *included_taxid, int32_t tab_len, int32_t taxid);
+
+
+ecotaxonomy_t *getTaxPointer(SEXP Rtaxonomy);
+
+#endif /*ECOPCR_H_*/
--- a/ROBITools/src/ecodna.c
+++ b/ROBITools/src/ecodna.c
@@ -0,0 +1,156 @@
+#include <string.h>
+#include "ecoPCR.h"
+
+/*
+ * @doc: DNA alphabet (IUPAC)
+ */
+#define LX_BIO_DNA_ALPHA   "ABCDEFGHIJKLMNOPQRSTUVWXYZ#![]"
+
+/*
+ * @doc: complementary DNA alphabet (IUPAC)
+ */
+#define LX_BIO_CDNA_ALPHA  "TVGHEFCDIJMLKNOPQYSAABWXRZ#!]["
+
+
+static char sNuc[]     = LX_BIO_DNA_ALPHA;
+static char sAnuc[]    = LX_BIO_CDNA_ALPHA;
+
+static char LXBioBaseComplement(char nucAc);
+static char *LXBioSeqComplement(char *nucAcSeq);
+static char *reverseSequence(char *str,char isPattern);
+
+ 
+/* ---------------------------- */
+
+char LXBioBaseComplement(char nucAc)
+{
+    char *c;
+
+    if ((c = strchr(sNuc, nucAc)))
+        return sAnuc[(c - sNuc)];
+    else
+        return nucAc;
+}
+
+/* ---------------------------- */
+
+char *LXBioSeqComplement(char *nucAcSeq)
+{
+    char *s;
+
+    for (s = nucAcSeq ; *s ; s++)
+        *s = LXBioBaseComplement(*s);
+
+    return nucAcSeq;
+}
+
+
+char *reverseSequence(char *str,char isPattern)
+{
+        char *sb, *se, c;
+
+        if (! str)
+            return str;
+            
+        sb = str;
+        se = str + strlen(str) - 1;
+
+        while(sb <= se) {
+           c    = *sb;
+          *sb++ = *se;
+          *se-- = c;
+        }
+
+		sb = str;
+		se = str + strlen(str) - 1;
+		
+		if (isPattern)
+			for (;sb < se; sb++)
+			{
+				if (*sb=='#')
+				{
+					if (((se - sb) > 2) && (*(sb+2)=='!'))
+					{
+						*sb='!';
+						sb+=2;
+						*sb='#';
+					}
+					else
+					{
+						*sb=*(sb+1);
+						sb++;
+						*sb='#';
+					}
+				}
+				else if (*sb=='!')
+					{
+						*sb=*(sb-1);
+						*(sb-1)='!';
+					}
+			}
+
+        return str;
+}
+
+char *ecoComplementPattern(char *nucAcSeq)
+{
+    return reverseSequence(LXBioSeqComplement(nucAcSeq),1);
+}
+
+char *ecoComplementSequence(char *nucAcSeq)
+{
+    return reverseSequence(LXBioSeqComplement(nucAcSeq),0);
+}
+
+
+char *getSubSequence(char* nucAcSeq,int32_t begin,int32_t end)
+/*
+   extract subsequence from nucAcSeq [begin,end[
+*/
+{
+	static char *buffer  = NULL;
+	static int32_t buffSize= 0;
+	int32_t length;
+	
+	if (begin < end)
+	{
+		length = end - begin;
+		
+		if (length >= buffSize)
+		{
+			buffSize = length+1;
+			if (buffer)
+				buffer=ECOREALLOC(buffer,buffSize,
+						   	      "Error in reallocating sub sequence buffer");
+			else
+				buffer=ECOMALLOC(buffSize,
+				          		 "Error in allocating sub sequence buffer");
+				
+		}
+		
+		strncpy(buffer,nucAcSeq + begin,length);
+		buffer[length]=0;
+	}
+	else
+	{
+		length = end + strlen(nucAcSeq) - begin;
+		
+		if (length >= buffSize)
+		{
+			buffSize = length+1;
+			if (buffer)
+				buffer=ECOREALLOC(buffer,buffSize,
+						   	      "Error in reallocating sub sequence buffer");
+			else
+				buffer=ECOMALLOC(buffSize,
+				          		 "Error in allocating sub sequence buffer");
+				
+		}
+		strncpy(buffer,nucAcSeq+begin,length - end);
+		strncpy(buffer+(length-end),nucAcSeq ,end);
+		buffer[length]=0;
+	}
+	
+	return buffer;
+}
+
--- a/ROBITools/src/ecodna.o
+++ b/ROBITools/src/ecodna.o
--- a/ROBITools/src/ecofilter.c
+++ b/ROBITools/src/ecofilter.c
@@ -0,0 +1,20 @@
+#include "ecoPCR.h"
+
+int eco_is_taxid_included(	ecotaxonomy_t *taxonomy, 
+							int32_t *restricted_taxid, 
+							int32_t tab_len, 
+							int32_t taxid)
+{
+	int i;
+	ecotx_t *taxon;
+	
+	taxon = eco_findtaxonbytaxid(taxonomy, taxid);
+	
+	if (taxon)
+		for (i=0; i < tab_len; i++)
+			if ( (taxon->taxid == restricted_taxid[i]) ||
+				 (eco_isundertaxon(taxon, restricted_taxid[i])) )
+				return 1;
+	
+	return 0;
+}
--- a/ROBITools/src/ecofilter.o
+++ b/ROBITools/src/ecofilter.o
--- a/ROBITools/src/econame.c
+++ b/ROBITools/src/econame.c
@@ -0,0 +1,64 @@
+#include "ecoPCR.h"
+#include <string.h>
+#include <stdlib.h>
+
+static econame_t *readnext_econame(FILE *f,econame_t *name,ecotaxonomy_t *taxonomy);
+
+econameidx_t *read_nameidx(const char *filename,ecotaxonomy_t *taxonomy)
+{
+
+	int32_t      		count;
+	FILE         		*f;
+	econameidx_t		*indexname;
+	int32_t      		i;
+	
+	f = open_ecorecorddb(filename,&count,0);
+	
+	if (f==NULL)
+		return NULL;
+
+	indexname = (econameidx_t*) ECOMALLOC(sizeof(econameidx_t) + sizeof(econame_t) * (count-1),"Allocate names");
+	
+	indexname->count=count;
+	                                    
+	for (i=0; i < count; i++){
+		readnext_econame(f,(indexname->names)+i,taxonomy);
+	}
+
+	return indexname;
+}
+
+econame_t *readnext_econame(FILE *f,econame_t *name,ecotaxonomy_t *taxonomy)
+{
+	
+	econameformat_t *raw;
+	int32_t  rs;
+	
+	raw = read_ecorecord(f,&rs);
+	
+	if (!raw)
+		return NULL;
+
+	if (is_big_endian())
+	{
+		raw->is_scientificname 	= swap_int32_t(raw->is_scientificname);
+		raw->namelength 	    = swap_int32_t(raw->namelength);
+		raw->classlength        = swap_int32_t(raw->classlength);
+		raw->taxid  	        = swap_int32_t(raw->taxid); 
+	}
+	
+	name->is_scientificname=raw->is_scientificname;
+	
+	name->name   	= ECOMALLOC((raw->namelength+1) * sizeof(char),"Allocate name");
+	strncpy(name->name,raw->names,raw->namelength);
+	name->name[raw->namelength]=0;
+	
+	name->classname = ECOMALLOC((raw->classlength+1) * sizeof(char),"Allocate classname");
+	strncpy(name->classname,(raw->names+raw->namelength),raw->classlength);
+	name->classname[raw->classlength]=0;
+	
+	name->taxon = taxonomy->taxons->taxon + raw->taxid;
+
+	return name;
+}
+
--- a/ROBITools/src/econame.o
+++ b/ROBITools/src/econame.o
--- a/ROBITools/src/ecorank.c
+++ b/ROBITools/src/ecorank.c
@@ -0,0 +1,55 @@
+#include "ecoPCR.h"
+#include <string.h>
+#include <stdlib.h>
+
+static int compareRankLabel(const void *label1, const void *label2);
+
+ecorankidx_t     *read_rankidx(const char *filename)
+{
+	int32_t      count;
+	FILE         *f;
+	ecorankidx_t *index;
+	int32_t      i;
+	int32_t      rs;
+	char         *buffer;
+	
+	f = open_ecorecorddb(filename,&count,0);
+	
+	if (f==NULL)
+		return NULL;
+
+	index = (ecorankidx_t*) ECOMALLOC(sizeof(ecorankidx_t) + sizeof(char*) * (count-1),
+	                                  "Allocate rank index");
+	 
+	index->count=count;                                 
+	          
+	for (i=0; i < count; i++)
+		{
+			buffer = read_ecorecord(f,&rs);
+			index->label[i]=(char*) ECOMALLOC(rs+1,
+			                                  "Allocate rank label");
+			strncpy(index->label[i],buffer,rs);
+		}
+		
+	return index;
+}
+
+int32_t rank_index(const char* label,ecorankidx_t* ranks)
+{
+	char **rep;
+	
+	rep = bsearch(label,ranks->label,ranks->count,sizeof(char*),compareRankLabel);
+	
+	if (rep)
+		return rep-ranks->label;
+//	else
+//		ECOERROR(ECO_NOTFOUND_ERROR,"Rank label not found");
+		
+	return -1;
+}
+
+
+int compareRankLabel(const void *label1, const void *label2)
+{
+	return strcmp((const char*)label1,*(const char**)label2);
+}
--- a/ROBITools/src/ecorank.o
+++ b/ROBITools/src/ecorank.o
--- a/ROBITools/src/ecoseq.c
+++ b/ROBITools/src/ecoseq.c
@@ -0,0 +1,230 @@
+#include "ecoPCR.h"
+#include <stdlib.h>
+#include <string.h>
+#include <zlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <ctype.h>
+
+static FILE *open_seqfile(const char *prefix,int32_t index);
+
+
+ecoseq_t *new_ecoseq()
+{
+	void *tmp;
+
+	tmp = ECOMALLOC(sizeof(ecoseq_t),"Allocate new ecoseq structure");
+
+	return tmp;
+}
+
+int32_t delete_ecoseq(ecoseq_t * seq)
+{
+
+	if (seq)
+	{
+		if (seq->AC)
+			ECOFREE(seq->AC,"Free sequence AC");
+
+		if (seq->DE)
+			ECOFREE(seq->DE,"Free sequence DE");
+
+		if (seq->SQ)
+			ECOFREE(seq->SQ,"Free sequence SQ");
+
+		ECOFREE(seq,"Free sequence structure");
+
+		return 0;
+
+	}
+
+	return 1;
+}
+
+ecoseq_t *new_ecoseq_with_data( char *AC,
+								char *DE,
+								char *SQ,
+								int32_t   taxid_idx
+								)
+{
+	ecoseq_t *tmp;
+	int32_t lstr;
+	tmp = new_ecoseq();
+
+	tmp->taxid=taxid_idx;
+
+	if (AC)
+		{
+			lstr =strlen(AC);
+			tmp->AC=ECOMALLOC((lstr+1) * sizeof(char),
+			                  "Allocate sequence accession");
+			strcpy(tmp->AC,AC);
+		}
+
+	if (DE)
+		{
+			lstr =strlen(DE);
+			tmp->DE=ECOMALLOC((lstr+1) * sizeof(char),
+			                  "Allocate sequence definition");
+			strcpy(tmp->DE,DE);
+		}
+
+	if (SQ)
+		{
+			lstr =strlen(SQ);
+			tmp->SQ=ECOMALLOC((lstr+1) * sizeof(char),
+			                  "Allocate sequence data");
+			strcpy(tmp->SQ,SQ);
+		}
+	return tmp;
+
+}
+
+/**
+ * ?? used ??
+ **/
+FILE *open_ecoseqdb(const char *filename,
+                    int32_t    *sequencecount)
+{
+	return open_ecorecorddb(filename,sequencecount,1);
+}
+
+ecoseq_t *readnext_ecoseq(FILE *f)
+{
+	char     *compressed=NULL;
+
+	ecoseqformat_t *raw;
+	ecoseq_t *seq;
+	int32_t  comp_status;
+	unsigned long int seqlength;
+	int32_t  rs;
+	char *c;
+	int32_t i;
+
+	raw = read_ecorecord(f,&rs);
+
+	if (!raw)
+		return NULL;
+
+	if (is_big_endian())
+	{
+		raw->CSQ_length = swap_int32_t(raw->CSQ_length);
+		raw->DE_length  = swap_int32_t(raw->DE_length);
+		raw->SQ_length  = swap_int32_t(raw->SQ_length);
+		raw->taxid      = swap_int32_t(raw->taxid);
+	}
+
+	seq = new_ecoseq();
+
+	seq->taxid = raw->taxid;
+
+    seq->AC    = ECOMALLOC(strlen(raw->AC) +1,
+                           "Allocate Sequence Accesion number");
+    strncpy(seq->AC,raw->AC,strlen(raw->AC));
+
+
+    seq->DE    = ECOMALLOC(raw->DE_length+1,
+                           "Allocate Sequence definition");
+    strncpy(seq->DE,raw->data,raw->DE_length);
+
+	seqlength = seq->SQ_length = raw->SQ_length;
+
+    compressed = raw->data + raw->DE_length;
+
+    seq->SQ = ECOMALLOC(seqlength+1,
+                        "Allocate sequence buffer");
+
+    comp_status = uncompress((unsigned char*)seq->SQ,
+                             &seqlength,
+                             (unsigned char*)compressed,
+                             raw->CSQ_length);
+
+    if (comp_status != Z_OK)
+    	ECOERROR(ECO_IO_ERROR,"I cannot uncompress sequence data");
+
+    for (c=seq->SQ,i=0;i<seqlength;c++,i++)
+    	*c=toupper(*c);
+
+
+	return seq;
+}
+
+/**
+ * Open the sequences database (.sdx file)
+ * @param	prefix	name of the database (radical without extension)
+ * @param	index 	integer
+ *
+ * @return	file object
+ */
+FILE *open_seqfile(const char *prefix,int32_t index)
+{
+	char           filename_buffer[1024];
+	int32_t        filename_length;
+	FILE           *input;
+	int32_t        seqcount;
+
+	filename_length = snprintf(filename_buffer,
+								1023,
+	                           "%s_%03d.sdx",
+	                           prefix,
+	                           index);
+
+		//	fprintf(stderr,"# Coucou %s\n",filename_buffer);
+
+
+	if (filename_length >= 1024)
+		ECOERROR(ECO_ASSERT_ERROR,"file name is too long");
+
+	filename_buffer[filename_length]=0;
+
+	input=open_ecorecorddb(filename_buffer,&seqcount,0);
+
+	if (input)
+		fprintf(stderr,"# Reading file %s containing %d sequences...\n",
+				filename_buffer,
+				seqcount);
+
+	return input;
+}
+
+ecoseq_t *ecoseq_iterator(const char *prefix)
+{
+	static FILE    *current_seq_file= NULL;
+	static int32_t current_file_idx = 1;
+	static char    current_prefix[1024];
+	ecoseq_t       *seq;
+
+	if (prefix)
+	{
+		current_file_idx = 1;
+
+		if (current_seq_file)
+			fclose(current_seq_file);
+
+		strncpy(current_prefix,prefix,1023);
+		current_prefix[1023]=0;
+
+		current_seq_file = open_seqfile(current_prefix,
+		 							    current_file_idx);
+
+		if (!current_seq_file)
+			return NULL;
+
+	}
+
+	seq = readnext_ecoseq(current_seq_file);
+
+	if (!seq && feof(current_seq_file))
+	{
+		current_file_idx++;
+		fclose(current_seq_file);
+		current_seq_file = open_seqfile(current_prefix,
+		 							    current_file_idx);
+
+
+		if (current_seq_file)
+			seq = readnext_ecoseq(current_seq_file);
+	}
+
+	return seq;
+}
--- a/ROBITools/src/ecoseq.o
+++ b/ROBITools/src/ecoseq.o
--- a/ROBITools/src/ecotax.c
+++ b/ROBITools/src/ecotax.c
@@ -0,0 +1,437 @@
+#include "ecoPCR.h"
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include <R.h>
+
+#ifndef MAX
+#define MAX(x,y) (((x)>(y)) ? (x):(y))
+#endif
+
+static ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon);
+
+ /** 
+ * Open the taxonomy database 
+ * @param	pointer to the database (.tdx file) 
+ * @return	a ecotxidx_t structure 
+ */
+ecotxidx_t     *read_taxonomyidx(const char *filename,const char *filename2)
+{
+	int32_t      count;
+	int32_t      count2;
+	FILE         *f;
+	FILE         *f2;
+	ecotxidx_t *index;
+	struct ecotxnode  *t;
+	int32_t      i;
+	int32_t      j;
+	
+	f  = open_ecorecorddb(filename,&count,0);
+
+	if (f==NULL) return NULL;
+
+	f2 = open_ecorecorddb(filename2,&count2,0);
+
+	index = (ecotxidx_t*) ECOMALLOC(sizeof(ecotxidx_t) + sizeof(ecotx_t) * (count+count2-1),
+	                                  "Allocate taxonomy");
+	 
+	index->count=count+count2;
+  index->buffersize = index->count;
+
+	index->maxtaxid=0;
+	REprintf("Readind %d taxa...\n",count);
+	for (i=0; i < count; i++){
+		readnext_ecotaxon(f,&(index->taxon[i]));
+		index->taxon[i].parent=index->taxon + (size_t)index->taxon[i].parent;
+		index->taxon[i].parent->farest=0;
+		if (index->taxon[i].taxid > index->maxtaxid)
+			index->maxtaxid=index->taxon[i].taxid;
+	}					
+
+
+	if (count2>0)
+		REprintf("Readind %d local taxa...\n",count2);
+	else
+		REprintf("No local taxon\n");
+
+	count = index->count;
+
+	for (; i < count; i++){
+		readnext_ecotaxon(f2,&(index->taxon[i]));
+		index->taxon[i].parent=index->taxon + (size_t)index->taxon[i].parent;
+		index->taxon[i].parent->farest=0;
+		if (index->taxon[i].taxid > index->maxtaxid)
+			index->maxtaxid=index->taxon[i].taxid;
+	}
+
+	REprintf("Computing longest branches...\n",count);
+
+	for (i=0; i < count; i++){
+		t=index->taxon+i;
+		if (t->farest==-1)
+		{
+			t->farest=0;
+            while(t->parent != t)
+            {
+            	j = t->farest + 1;
+            	if (j > t->parent->farest)
+            	{
+            		t->parent->farest = j;
+            		t=t->parent;
+            	}
+            	else
+            		t=index->taxon;
+            }
+		}
+	}
+
+	return index;
+}
+
+
+int32_t delete_taxonomy(ecotxidx_t *index)
+{
+	int32_t i;
+	
+	if (index)
+	{
+		for (i=0; i< index->count; i++)
+			if (index->taxon[i].name)
+				ECOFREE(index->taxon[i].name,"Free scientific name");
+				
+		ECOFREE(index,"Free Taxonomy");
+		
+		return 0;
+	}
+	
+	return 1;
+}
+
+
+
+int32_t delete_taxon(ecotx_t *taxon)
+{
+	if (taxon)
+	{
+		if (taxon->name)
+			ECOFREE(taxon->name,"Free scientific name");
+			
+		ECOFREE(taxon,"Free Taxon");
+		
+		return 0;
+	}
+		
+	return 1;
+}
+
+
+/**
+ * Read the database for a given taxon a save the data 
+ * into the taxon structure(if any found)
+ * @param	*f	pointer to FILE type returned by fopen
+ * @param	*taxon	pointer to the structure
+ * 
+ * @return	a ecotx_t structure if any taxon found else NULL 
+ */
+ecotx_t *readnext_ecotaxon(FILE *f,ecotx_t *taxon)
+{
+	
+	ecotxformat_t *raw;
+	int32_t  rs;
+	
+	raw = read_ecorecord(f,&rs);
+	
+	if (!raw)
+		return NULL;
+
+	if (is_big_endian())
+	{
+		raw->namelength = swap_int32_t(raw->namelength);
+		raw->parent     = swap_int32_t(raw->parent);
+		raw->rank       = swap_int32_t(raw->rank);
+		raw->taxid      = swap_int32_t(raw->taxid); 
+	}
+	
+	taxon->parent = (ecotx_t*)((size_t)raw->parent);
+	taxon->taxid  = raw->taxid;
+	taxon->rank   = raw->rank;
+	taxon->farest = -1;
+	
+	taxon->name   = ECOMALLOC((raw->namelength+1) * sizeof(char),
+	                          "Allocate taxon scientific name");
+	                          
+	strncpy(taxon->name,raw->name,raw->namelength);
+		
+	return taxon;
+}
+
+
+ecotaxonomy_t    *read_taxonomy(const char *prefix,int32_t readAlternativeName)
+{
+	ecotaxonomy_t *tax;
+	char          *filename;
+	char          *filename2;
+	int           buffsize;
+	
+	tax = ECOMALLOC(sizeof(ecotaxonomy_t),
+	                "Allocate taxonomy structure");
+	
+	tax->ranks =NULL;
+	tax->taxons=NULL;
+	tax->names =NULL;
+
+	buffsize = strlen(prefix)+10;
+	
+	filename = ECOMALLOC(buffsize,
+	                     "Allocate filename");
+	filename2= ECOMALLOC(buffsize,
+	                     "Allocate filename");
+	
+	snprintf(filename,buffsize,"%s.rdx",prefix);
+	
+	tax->ranks = read_rankidx(filename);
+
+	if (tax->ranks == NULL)
+	{
+		ECOFREE(filename,"Desallocate filename 1");
+		ECOFREE(filename2,"Desallocate filename 2");
+
+		delete_ecotaxonomy(tax);
+		return NULL;
+	}
+	
+	snprintf(filename,buffsize,"%s.tdx",prefix);
+	snprintf(filename2,buffsize,"%s.ldx",prefix);
+	
+	tax->taxons = read_taxonomyidx(filename,filename2);
+	
+	if (tax->taxons == NULL)
+	{
+		ECOFREE(filename,"Desallocate filename 1");
+		ECOFREE(filename,"Desallocate filename 2");
+
+		delete_ecotaxonomy(tax);
+		return NULL;
+	}
+
+	if (readAlternativeName)
+	{
+  	   snprintf(filename,buffsize,"%s.ndx",prefix);
+	   tax->names=read_nameidx(filename,tax);
+	}
+	else
+	   tax->names=NULL;
+
+	ECOFREE(filename,"Desallocate filename 1");
+	ECOFREE(filename2,"Desallocate filename 2");
+
+	return tax;
+	
+}
+
+
+
+int32_t delete_ecotaxonomy(ecotaxonomy_t *taxonomy)
+{
+	if (taxonomy)
+	{
+		if (taxonomy->ranks)
+			ECOFREE(taxonomy->ranks,"Free rank index");
+			
+		if (taxonomy->names)
+			ECOFREE(taxonomy->names,"Free names index");
+
+		if (taxonomy->taxons)
+			ECOFREE(taxonomy->taxons,"Free taxon index");
+			
+		ECOFREE(taxonomy,"Free taxonomy structure");
+		
+		return 0;
+	}
+	
+	return 1;
+}
+
+ecotx_t *eco_findtaxonatrank(ecotx_t *taxon,
+                                int32_t rankidx)
+{
+	ecotx_t *current_taxon;
+	ecotx_t *next_taxon;
+	
+	current_taxon = taxon;
+	next_taxon    = current_taxon->parent;
+	
+	while ((current_taxon!=next_taxon) &&  // I' am the root node
+		   (current_taxon->rank!=rankidx))
+		   {
+		   	current_taxon = next_taxon;
+		   	next_taxon    = current_taxon->parent;
+		   }
+		   
+	if (current_taxon->rank==rankidx)
+		return current_taxon;
+	else
+		return NULL;
+}
+
+static int bcomptaxon (const void * ptaxid, const void * ptaxon) {
+  
+  ecotx_t    *current_taxon = (ecotx_t*)ptaxon;
+  int32_t    taxid=(int32_t)((size_t)ptaxid);
+  return taxid - current_taxon->taxid;
+}
+
+/**
+ * Get back information concerning a taxon from a taxonomic id
+ * @param 	*taxonomy 	the taxonomy database
+ * @param	taxid		the taxonomic id 
+ * 
+ * @result	a ecotx_t structure containing the taxonimic information  
+ **/
+ecotx_t *eco_findtaxonbytaxid(ecotaxonomy_t *taxonomy, 
+							  int32_t taxid)
+{
+	ecotx_t    *current_taxon;
+	int32_t     taxoncount;
+//	int32_t     i;
+	
+	taxoncount=taxonomy->taxons->count;
+	
+  current_taxon = (ecotx_t*)  bsearch((const void *)((size_t)taxid), 
+                                      (const void *)taxonomy->taxons->taxon, 
+                                      taxoncount, 
+                                      sizeof(ecotx_t), 
+                                      bcomptaxon);
+
+/* Old version  
+	for (current_taxon=taxonomy->taxons->taxon,
+	     i=0;
+	     i < taxoncount;
+	     i++,
+	     current_taxon++){
+	     if (current_taxon->taxid==taxid){
+	     	return current_taxon;
+	     }
+	 }
+*/
+	
+	return current_taxon;	
+}
+
+/**
+ * Find out if taxon is son of other taxon (identified by its taxid)
+ * @param	*taxon son 		taxon
+ * @param	parent_taxid 	taxonomic id of the other taxon
+ * 
+ * @return 	1 is the other taxid math a parent taxid, else 0
+ **/
+int eco_isundertaxon(ecotx_t *taxon, 
+						int other_taxid)
+{
+	ecotx_t *next_parent;
+	
+	next_parent = taxon->parent;	
+	
+	while ( (other_taxid != next_parent->taxid) && 
+			(strcmp(next_parent->name, "root")) )
+	{
+		next_parent = next_parent->parent;
+	}
+		
+	if (other_taxid == next_parent->taxid)
+		return 1;
+	else
+		return 0;
+}
+
+ecotx_t *eco_getspecies(ecotx_t *taxon,
+						ecotaxonomy_t *taxonomy)
+{
+	static ecotaxonomy_t *tax=NULL;
+	static int32_t		 rankindex=-1;
+	
+	if (taxonomy && tax!=taxonomy)
+	{
+		rankindex = rank_index("species",taxonomy->ranks);
+		tax=taxonomy;
+	}
+		
+	if (!tax || rankindex < 0)
+		ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
+		
+	return eco_findtaxonatrank(taxon,rankindex);
+}
+
+ecotx_t *eco_getgenus(ecotx_t *taxon,
+						ecotaxonomy_t *taxonomy)
+{
+	static ecotaxonomy_t *tax=NULL;
+	static int32_t		 rankindex=-1;
+	
+	if (taxonomy && tax!=taxonomy)
+	{
+		rankindex = rank_index("genus",taxonomy->ranks);
+		tax=taxonomy;
+	}
+		
+	if (!tax || rankindex < 0)
+		ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
+		
+	return eco_findtaxonatrank(taxon,rankindex);
+}
+
+
+ecotx_t *eco_getfamily(ecotx_t *taxon,
+						ecotaxonomy_t *taxonomy)
+{
+	static ecotaxonomy_t *tax=NULL;
+	static int32_t		 rankindex=-1;
+	
+	if (taxonomy && tax!=taxonomy)
+	{
+		rankindex = rank_index("family",taxonomy->ranks);
+		tax=taxonomy;
+	}
+		
+	if (!tax || rankindex < 0)
+		ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
+		
+	return eco_findtaxonatrank(taxon,rankindex);
+}
+
+ecotx_t *eco_getkingdom(ecotx_t *taxon,
+						ecotaxonomy_t *taxonomy)
+{
+	static ecotaxonomy_t *tax=NULL;
+	static int32_t		 rankindex=-1;
+	
+	if (taxonomy && tax!=taxonomy)
+	{
+		rankindex = rank_index("kingdom",taxonomy->ranks);
+		tax=taxonomy;
+	}
+		
+	if (!tax || rankindex < 0)
+		ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
+		
+	return eco_findtaxonatrank(taxon,rankindex);
+}
+
+ecotx_t *eco_getsuperkingdom(ecotx_t *taxon,
+						ecotaxonomy_t *taxonomy)
+{
+	static ecotaxonomy_t *tax=NULL;
+	static int32_t		 rankindex=-1;
+	
+	if (taxonomy && tax!=taxonomy)
+	{
+		rankindex = rank_index("superkingdom",taxonomy->ranks);
+		tax=taxonomy;
+	}
+		
+	if (!tax || rankindex < 0)
+		ECOERROR(ECO_ASSERT_ERROR,"No taxonomy defined");
+		
+	return eco_findtaxonatrank(taxon,rankindex);
+}
--- a/ROBITools/src/ecotax.o
+++ b/ROBITools/src/ecotax.o
--- a/ROBITools/src/robitax.c
+++ b/ROBITools/src/robitax.c
@@ -0,0 +1,835 @@
+/*
+ * robitax.c
+ *
+ *  Created on: 17 janv. 2013
+ *      Author: coissac
+ */
+
+#include "robitax.h"
+#include <unistd.h>
+//#include <regex.h>
+#include "slre.h"
+
+/**
+ * Return a pointeur to an obitools taxonomy C structure
+ * from an R instance of taxonomy.obitools
+ *
+ * The function checks if the pointer stored in the R object is set
+ * to NULL. In this case this means that we have to load the taxonomy
+ * from the disk.
+ *
+ * @param taxonomy an R object
+ * @type  taxonomy SEXP
+ *
+ * @return a pointer to the C structure
+ * @rtype  ecotaxonomy_t *
+ */
+
+ecotaxonomy_t *getTaxPointer(SEXP Rtaxonomy)
+{
+
+
+	char *pwd;
+	SEXP pointer;
+	SEXP rclass;
+	SEXP rdir;
+	SEXP rfile;
+	ecotaxonomy_t *ptax;
+	const char *class;
+	const char *file;
+	const char *dir;
+
+  int saved;
+
+    if (!IS_S4_OBJECT(Rtaxonomy) )
+        error("argument not taxonomy.obitools instance");
+
+	// We get the class name and compare it to "taxonomy.obitools"
+    rclass = getAttrib(Rtaxonomy, R_ClassSymbol);
+    class = CHAR(asChar(rclass));
+
+    if (strcmp(class,"taxonomy.obitools"))
+        error("argument not taxonomy.obitools instance");
+
+    pointer = R_do_slot(Rtaxonomy,mkString("pointer"));
+    saved = LOGICAL(R_do_slot(Rtaxonomy,mkString("saved")))[0];
+    ptax = (ecotaxonomy_t *) R_ExternalPtrAddr(pointer);
+
+    // If the external pointer is set to NULL we have to load
+    // the taxonomy from file
+    if (ptax==NULL && saved)
+    {
+    	pwd = getcwd(NULL,0);
+
+    	rfile = R_do_slot(Rtaxonomy,mkString("dbname"));
+    	file  = CHAR(asChar(rfile));
+
+    	rdir  = R_do_slot(Rtaxonomy,mkString("workingdir"));
+    	dir   = CHAR(asChar(rdir));
+
+    	chdir(dir);
+
+    	ptax = read_taxonomy(file,1);
+
+    	R_SetExternalPtrAddr(pointer,(void*)ptax);
+
+    	chdir(pwd);
+    	free(pwd);
+    }
+    
+    if (ptax==NULL && ! saved)
+      error("The taxonomy instance is no more valid and must be rebuilt");
+
+    return ptax;
+}
+
+SEXP R_delete_taxonomy(SEXP Rtaxonomy)
+{
+	ecotaxonomy_t *ptax;
+//	SEXP pointer;
+
+    ptax = (ecotaxonomy_t *) R_ExternalPtrAddr(Rtaxonomy);
+
+    (void) delete_ecotaxonomy(ptax);
+
+    // Clear the external pointer
+    R_ClearExternalPtr(Rtaxonomy);
+
+    return R_NilValue;
+
+}
+
+
+
+SEXP R_read_taxonomy(SEXP filename, SEXP altenative)
+{
+	int   alt;
+	const char* file;
+	SEXP  Rtax;
+
+    if (! isString(filename))
+        error("filename not character");
+    file = CHAR(STRING_ELT(filename, 0));
+
+    if (! isLogical(altenative))
+        error("altenative not logical");
+    alt = LOGICAL(altenative)[0];
+
+    ecotaxonomy_t *taxonomy = read_taxonomy(file,alt);
+
+    if (! taxonomy)
+    	error("Cannot open the taxonomy database");
+
+	Rtax = PROTECT(R_MakeExternalPtr(taxonomy, mkString("ROBITools NCBI Taxonomy pointer"), R_NilValue));
+	R_RegisterCFinalizerEx(Rtax, (R_CFinalizer_t)R_delete_taxonomy,TRUE);
+
+    UNPROTECT(1);
+
+
+	return Rtax;
+}
+
+
+SEXP R_get_scientific_name(SEXP Rtaxonomy,SEXP Rtaxid)
+{
+	ecotx_t *taxon;
+	ecotaxonomy_t *ptax;
+	int taxid;
+//	SEXP scname;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    if (! isInteger(Rtaxid))
+        error("taxid not integer");
+
+    taxid = *INTEGER(Rtaxid);
+
+    if (! (taxid > 0))
+        error("taxid not positive");
+
+
+	taxon = eco_findtaxonbytaxid(ptax, taxid);
+
+	if (!taxon)
+		return ScalarString(R_NaString);
+		// error("unkown taxid");
+
+	return mkString(taxon->name);
+
+}
+
+SEXP R_get_rank(SEXP Rtaxonomy,SEXP Rtaxid)
+{
+	ecotx_t *taxon;
+	ecotaxonomy_t *ptax;
+	int *taxid;
+  int ntaxid;
+  int i;
+  SEXP results;
+//	SEXP scname;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    if (! isInteger(Rtaxid))
+        error("taxid not integer");
+        
+    ntaxid = length(Rtaxid);
+    
+    results = PROTECT(allocVector(STRSXP, ntaxid));
+
+    taxid = INTEGER(Rtaxid);
+
+    for (i=0; i < ntaxid; i++)
+    {
+      if (taxid[i]== NA_INTEGER || taxid[i] <= 0)
+        SET_STRING_ELT(results, i, R_NaString);
+      else {
+        taxon = eco_findtaxonbytaxid(ptax, taxid[i]);
+        if (!taxon)
+          SET_STRING_ELT(results, i, R_NaString);
+        else
+          SET_STRING_ELT(results, i, mkChar(ptax->ranks->label[taxon->rank]));
+      }
+    }
+
+  UNPROTECT(1);
+  
+	return results;
+
+}
+
+SEXP R_findtaxonatrank(SEXP Rtaxonomy,SEXP Rtaxid,SEXP Rrank, SEXP Rname)
+{
+	ecotx_t *taxon;
+	ecotx_t *rep;
+	ecotaxonomy_t *ptax;
+	int taxid;
+	int name;
+	const char *rank;
+	int   rankidx;
+//	SEXP scname;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    if (! isInteger(Rtaxid))
+        error("taxid not integer");
+
+    taxid = *INTEGER(Rtaxid);
+
+    if (! (taxid > 0))
+        error("taxid not positive");
+
+    if (! isString(Rrank))
+        error("rank not a string");
+
+    rank=CHAR(STRING_ELT(Rrank,0));
+
+    rankidx=rank_index(rank,ptax->ranks);
+
+    if (rankidx < 0)
+        error("unkown rank name");
+
+    if (! isLogical(Rname))
+        error("name not logical");
+    name = LOGICAL(Rname)[0];
+
+	taxon = eco_findtaxonbytaxid(ptax, taxid);
+
+	if (!taxon)
+  {
+		if (name)
+			return ScalarString(R_NaString);
+		else
+			return ScalarInteger(R_NaInt);
+  }
+	rep = eco_findtaxonatrank(taxon,rankidx);
+
+	if (!rep)
+  {
+		if (name)
+			return ScalarString(R_NaString);
+		else
+			return ScalarInteger(R_NaInt);
+  }
+  
+	if (name)
+		return mkString(rep->name);
+	else
+		return ScalarInteger(rep->taxid);
+}
+
+
+SEXP R_get_species(SEXP Rtaxonomy,SEXP Rtaxid,SEXP Rname)
+{
+	ecotx_t *taxon;
+	ecotx_t *rep;
+	ecotaxonomy_t *ptax;
+	int taxid;
+	int name;
+//	SEXP scname;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    if (! isInteger(Rtaxid))
+        error("taxid not integer");
+
+    taxid = *INTEGER(Rtaxid);
+
+    if (! (taxid > 0))
+        error("taxid not positive");
+
+    if (! isLogical(Rname))
+        error("name not logical");
+    name = LOGICAL(Rname)[0];
+
+	taxon = eco_findtaxonbytaxid(ptax, taxid);
+
+	if (!taxon)
+  {
+		if (name)
+			return ScalarString(R_NaString);
+		else
+			return ScalarInteger(R_NaInt);
+  }
+  
+	rep = eco_getspecies(taxon,ptax);
+
+	if (!rep)
+  {
+		if (name)
+			return ScalarString(R_NaString);
+		else
+			return ScalarInteger(R_NaInt);
+  }
+  
+	if (name)
+		return mkString(rep->name);
+	else
+		return ScalarInteger(rep->taxid);
+}
+
+SEXP R_get_genus(SEXP Rtaxonomy,SEXP Rtaxid,SEXP Rname)
+{
+	ecotx_t *taxon;
+	ecotx_t *rep;
+	ecotaxonomy_t *ptax;
+	int taxid;
+	int name;
+//	SEXP scname;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    if (! isInteger(Rtaxid))
+        error("taxid not integer");
+
+    taxid = *INTEGER(Rtaxid);
+
+    if (! (taxid > 0))
+        error("taxid not positive");
+
+    if (! isLogical(Rname))
+        error("name not logical");
+    name = LOGICAL(Rname)[0];
+
+	taxon = eco_findtaxonbytaxid(ptax, taxid);
+
+	if (!taxon)
+  {
+		if (name)
+			return ScalarString(R_NaString);
+		else
+			return ScalarInteger(R_NaInt);
+  }
+  
+	rep = eco_getgenus(taxon,ptax);
+
+	if (!rep)
+  {
+		if (name)
+			return ScalarString(R_NaString);
+		else
+			return ScalarInteger(R_NaInt);
+  }
+  
+	if (name)
+		return mkString(rep->name);
+	else
+		return ScalarInteger(rep->taxid);
+}
+
+SEXP R_get_family(SEXP Rtaxonomy,SEXP Rtaxid,SEXP Rname)
+{
+	ecotx_t *taxon;
+	ecotx_t *rep;
+	ecotaxonomy_t *ptax;
+	int taxid;
+	int name;
+//	SEXP scname;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    if (! isInteger(Rtaxid))
+        error("taxid not integer");
+
+    taxid = *INTEGER(Rtaxid);
+
+    if (! (taxid > 0))
+        error("taxid not positive");
+
+    if (! isLogical(Rname))
+        error("name not logical");
+    name = LOGICAL(Rname)[0];
+
+	taxon = eco_findtaxonbytaxid(ptax, taxid);
+
+	if (!taxon)
+  {
+		if (name)
+			return ScalarString(R_NaString);
+		else
+			return ScalarInteger(R_NaInt);
+  }
+  
+	rep = eco_getfamily(taxon,ptax);
+
+	if (!rep)
+  {
+		if (name)
+			return ScalarString(R_NaString);
+		else
+			return ScalarInteger(R_NaInt);
+  }
+  
+	if (name)
+		return mkString(rep->name);
+	else
+		return ScalarInteger(rep->taxid);
+}
+
+SEXP R_get_kingdom(SEXP Rtaxonomy,SEXP Rtaxid,SEXP Rname)
+{
+	ecotx_t *taxon;
+	ecotx_t *rep;
+	ecotaxonomy_t *ptax;
+	int taxid;
+	int name;
+//	SEXP scname;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    if (! isInteger(Rtaxid))
+        error("taxid not integer");
+
+    taxid = *INTEGER(Rtaxid);
+
+    if (! (taxid > 0))
+        error("taxid not positive");
+
+    if (! isLogical(Rname))
+        error("name not logical");
+    name = LOGICAL(Rname)[0];
+
+	taxon = eco_findtaxonbytaxid(ptax, taxid);
+
+	if (!taxon)
+  {
+		if (name)
+			return ScalarString(R_NaString);
+		else
+			return ScalarInteger(R_NaInt);
+  }
+  
+	rep = eco_getkingdom(taxon,ptax);
+
+	if (!rep)
+  {
+		if (name)
+			return ScalarString(R_NaString);
+		else
+			return ScalarInteger(R_NaInt);
+  }
+  
+	if (name)
+		return mkString(rep->name);
+	else
+		return ScalarInteger(rep->taxid);
+}
+
+SEXP R_get_superkingdom(SEXP Rtaxonomy,SEXP Rtaxid,SEXP Rname)
+{
+	ecotx_t *taxon;
+	ecotx_t *rep;
+	ecotaxonomy_t *ptax;
+	int taxid;
+	int name;
+//	SEXP scname;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    if (! isInteger(Rtaxid))
+        error("taxid not integer");
+
+    taxid = *INTEGER(Rtaxid);
+
+    if (! (taxid > 0))
+        error("taxid not positive");
+
+    if (! isLogical(Rname))
+        error("name not logical");
+    name = LOGICAL(Rname)[0];
+
+	taxon = eco_findtaxonbytaxid(ptax, taxid);
+
+	if (!taxon)
+  {
+		if (name)
+			return ScalarString(R_NaString);
+		else
+			return ScalarInteger(R_NaInt);
+  }
+  
+	rep = eco_getsuperkingdom(taxon,ptax);
+
+	if (!rep)
+  {
+		if (name)
+			return ScalarString(R_NaString);
+		else
+			return ScalarInteger(R_NaInt);
+  }
+  
+	if (name)
+		return mkString(rep->name);
+	else
+		return ScalarInteger(rep->taxid);
+}
+
+SEXP R_get_parent(SEXP Rtaxonomy,SEXP Rtaxid,SEXP Rname)
+{
+	ecotx_t *taxon;
+	ecotx_t *rep;
+	ecotaxonomy_t *ptax;
+	int taxid;
+	int name;
+//	SEXP scname;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    if (! isInteger(Rtaxid))
+        error("taxid not integer");
+
+    taxid = *INTEGER(Rtaxid);
+
+    if (! (taxid > 0))
+        error("taxid not positive");
+
+    if (! isLogical(Rname))
+        error("name not logical");
+    name = LOGICAL(Rname)[0];
+
+	taxon = eco_findtaxonbytaxid(ptax, taxid);
+
+	if (!taxon)
+  {
+		if (name)
+			return ScalarString(R_NaString);
+		else
+			return ScalarInteger(R_NaInt);
+  }
+  
+	rep = taxon->parent;
+
+	if (rep->taxid==taxid)
+  {
+		if (name)
+			return ScalarString(R_NaString);
+		else
+			return ScalarInteger(R_NaInt);
+  }
+  
+	if (name)
+		return mkString(rep->name);
+	else
+		return ScalarInteger(rep->taxid);
+}
+
+
+SEXP R_validate_taxid(SEXP Rtaxonomy,SEXP Rtaxid)
+{
+	ecotx_t *taxon;
+	ecotaxonomy_t *ptax;
+	int taxid;
+//	int name;
+//	SEXP scname;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    if (! isInteger(Rtaxid))
+        error("taxid not integer");
+
+    taxid = *INTEGER(Rtaxid);
+
+    if (! (taxid > 0))
+    	return ScalarInteger(R_NaInt);
+
+	taxon = eco_findtaxonbytaxid(ptax, taxid);
+
+	if (!taxon)
+		return ScalarInteger(R_NaInt);
+	else
+		return ScalarInteger(taxon->taxid);
+}
+
+
+SEXP R_is_under_taxon(SEXP Rtaxonomy, SEXP Rtaxid, SEXP Rparent)
+{
+	ecotx_t *taxon;
+	ecotaxonomy_t *ptax;
+	int taxid;
+	int parent;
+	int rep;
+//	SEXP isunder;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    if (! isInteger(Rparent))
+        error("parent not integer");
+
+    parent = *INTEGER(Rparent);
+
+    if (parent <= 0)
+    	return ScalarInteger(R_NaInt);
+
+	taxon = eco_findtaxonbytaxid(ptax, parent);
+
+	if (!taxon)
+		return ScalarInteger(R_NaInt);
+
+
+	if (! isInteger(Rtaxid))
+        error("taxid not integer");
+
+    taxid = *INTEGER(Rtaxid);
+
+    if (taxid <= 0)
+    	return ScalarInteger(R_NaInt);
+
+	taxon = eco_findtaxonbytaxid(ptax, taxid);
+
+	if (!taxon)
+		return ScalarInteger(R_NaInt);
+
+
+	rep = eco_isundertaxon(taxon, parent);
+
+	return ScalarLogical(rep);
+
+
+}
+
+
+SEXP R_longest_path(SEXP Rtaxonomy,SEXP Rtaxid)
+{
+	ecotx_t *taxon;
+	ecotaxonomy_t *ptax;
+	int taxid;
+//	int name;
+//	SEXP scname;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    if (! isInteger(Rtaxid))
+        error("taxid not integer");
+
+    taxid = *INTEGER(Rtaxid);
+
+    if (taxid <= 0)
+    	return ScalarInteger(R_NaInt);
+
+	taxon = eco_findtaxonbytaxid(ptax, taxid);
+
+	if (!taxon)
+		return ScalarInteger(R_NaInt);
+	else
+		return ScalarInteger(taxon->farest);
+}
+
+SEXP R_rank_list(SEXP Rtaxonomy)
+{
+	int nrank;
+	int i;
+	ecotaxonomy_t *ptax;
+	SEXP rNames;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    nrank = ptax->ranks->count;
+
+    rNames = PROTECT(allocVector(STRSXP, nrank));
+
+	for (i=0; i < nrank;i++)
+		SET_STRING_ELT(rNames, i, mkChar(ptax->ranks->label[i]));
+
+	UNPROTECT(1);
+
+	return rNames;
+}
+
+SEXP R_taxid_list(SEXP Rtaxonomy)
+{
+	int ntaxid;
+	int i;
+	ecotaxonomy_t *ptax;
+	SEXP rTaxids;
+
+    ptax = getTaxPointer(Rtaxonomy);
+    ntaxid  = ptax->taxons->count;
+    rTaxids = PROTECT(allocVector(INTSXP, ntaxid));
+
+	for (i=0; i < ntaxid;i++)
+		INTEGER(rTaxids)[i]=ptax->taxons->taxon[i].taxid;
+
+	UNPROTECT(1);
+
+	return rTaxids;
+
+}
+
+SEXP R_max_taxid(SEXP Rtaxonomy)
+{
+//	int nrank;
+//	int i;
+	ecotaxonomy_t *ptax;
+//	SEXP rNames;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    return ScalarInteger(ptax->taxons->maxtaxid);
+}
+
+SEXP R_length_taxonomy(SEXP Rtaxonomy)
+{
+	ecotaxonomy_t *ptax;
+
+    ptax = getTaxPointer(Rtaxonomy);
+
+    return ScalarInteger(ptax->taxons->count);
+}
+
+SEXP R_ecofind(SEXP Rtaxonomy, SEXP Rpattern, SEXP Rrank, SEXP Ralternative)
+{
+  ecotaxonomy_t *ptax;
+  econame_t		*name;
+  char*     pattern=NULL;
+	int				re_match;
+  SEXP      taxids;
+  int32_t*  buffer;
+  int32_t		tax_count	= 0;
+  size_t 		j 		= 0;
+  int32_t		rankfilter 	= 1;
+  int*      ptaxid;
+  char			*rankname=NULL;
+  int32_t			nummatch 	= 0;
+  int32_t         alternative = 0;
+
+  size_t    bsize;
+	
+  ptax = getTaxPointer(Rtaxonomy);
+  tax_count = ptax->taxons->count;
+	
+  if (! isString(Rpattern))
+      error("pattern not a string");
+
+  pattern= (char*) CHAR(STRING_ELT(Rpattern,0));
+
+  if (! isNull(Rrank))
+  {
+    if (! isString(Rrank))
+      error("rank not a string");
+
+    rankname= (char*) CHAR(STRING_ELT(Rrank,0));
+  }
+  
+  if (! isLogical(Ralternative))
+      error("rank not a logical");
+      
+  alternative = LOGICAL(Ralternative)[0];
+
+		
+	nummatch=0;
+  buffer = (int32_t*) malloc(100 * sizeof(int32_t));
+  bsize=100;
+
+  if (alternative && ptax->names!=NULL)
+	  for (j=0,name=ptax->names->names;
+			  j < ptax->names->count;
+			  name++,j++)
+	  {
+		  if(rankname)
+			  rankfilter = !(strcmp(rankname,ptax->ranks->label[name->taxon->rank]));
+
+  	  re_match = slre_match(pattern, name->name, 
+                            strlen(name->name), 
+                            NULL, 0, 
+                            SLRE_IGNORE_CASE);
+
+  	  if (re_match > 0 && rankfilter)
+		  {
+			  buffer[nummatch]=name->taxon->taxid;
+			  nummatch++;
+			  if (nummatch==bsize) {
+				  bsize*=2;
+				  buffer = (int32_t*) realloc(buffer, bsize * sizeof(int32_t));
+				  if (buffer==0)
+				  {
+					  // regfree(&re_preg);
+					  error("Cannot allocate memory for the taxid list");
+				  }
+			  }
+		  }
+
+	  }
+  else
+	  for (j=0; j < ptax->taxons->count;j++)
+	  {
+		  if(rankname)
+			  rankfilter = !(strcmp(rankname,ptax->ranks->label[ptax->taxons->taxon[j].rank]));
+
+//		  re_match = regexec (&re_preg, ptax->taxons->taxon[j].name, 0, NULL, 0);
+      re_match = slre_match(pattern, ptax->taxons->taxon[j].name, 
+                            strlen(ptax->taxons->taxon[j].name), 
+                            NULL, 0, 
+                            SLRE_IGNORE_CASE);
+
+
+//  	  if (!re_match && rankfilter)
+  	  if (re_match > 0 && rankfilter)
+		  {
+			  buffer[nummatch]=ptax->taxons->taxon[j].taxid;
+			  nummatch++;
+			  if (nummatch==bsize) {
+				  bsize*=2;
+				  buffer = (int32_t*) realloc(buffer, bsize * sizeof(int32_t));
+				  if (buffer==0)
+				  {
+					  // regfree(&re_preg);
+					  error("Cannot allocate memory for the taxid list");
+				  }
+			  }
+		  }
+
+	  }
+
+   	//regfree(&re_preg);
+
+    taxids = PROTECT(NEW_INTEGER(nummatch));
+    ptaxid = INTEGER(taxids);
+    
+    for (j=0; j < nummatch; j++)
+      ptaxid[j]=buffer[j];
+      
+    free(buffer);
+
+    UNPROTECT(1);
+    return taxids;
+}
--- a/ROBITools/src/robitax.h
+++ b/ROBITools/src/robitax.h
@@ -0,0 +1,6 @@
+#include "ecoPCR.h"
+
+
+ecotaxonomy_t *getTaxPointer(SEXP Rtaxonomy);
+SEXP R_delete_taxonomy(SEXP Rtaxonomy);
+
--- a/ROBITools/src/robitax.o
+++ b/ROBITools/src/robitax.o
--- a/ROBITools/src/slre.c
+++ b/ROBITools/src/slre.c
@@ -0,0 +1,433 @@
+/*
+ * Copyright (c) 2004-2013 Sergey Lyubka <valenok@gmail.com>
+ * Copyright (c) 2013 Cesanta Software Limited
+ * All rights reserved
+ *
+ * This library is dual-licensed: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation. For the terms of this
+ * license, see <http://www.gnu.org/licenses/>.
+ *
+ * You are free to use this library under the terms of the GNU General
+ * Public License, but WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * Alternatively, you can license this library under a commercial
+ * license, as set out in <http://cesanta.com/products.html>.
+ */
+
+#include <stdio.h>
+#include <ctype.h>
+#include <string.h>
+
+#include "slre.h"
+
+#define MAX_BRANCHES 100
+#define MAX_BRACKETS 100
+#define FAIL_IF(condition, error_code) if (condition) return (error_code)
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(ar) (sizeof(ar) / sizeof((ar)[0]))
+#endif
+
+#ifdef SLRE_DEBUG
+#define DBG(x) printf x
+#else
+#define DBG(x)
+#endif
+
+struct bracket_pair {
+  const char *ptr;  /* Points to the first char after '(' in regex  */
+  int len;          /* Length of the text between '(' and ')'       */
+  int branches;     /* Index in the branches array for this pair    */
+  int num_branches; /* Number of '|' in this bracket pair           */
+};
+
+struct branch {
+  int bracket_index;    /* index for 'struct bracket_pair brackets' */
+                        /* array defined below                      */
+  const char *schlong;  /* points to the '|' character in the regex */
+};
+
+struct regex_info {
+  /*
+   * Describes all bracket pairs in the regular expression.
+   * First entry is always present, and grabs the whole regex.
+   */
+  struct bracket_pair brackets[MAX_BRACKETS];
+  int num_brackets;
+
+  /*
+   * Describes alternations ('|' operators) in the regular expression.
+   * Each branch falls into a specific branch pair.
+   */
+  struct branch branches[MAX_BRANCHES];
+  int num_branches;
+
+  /* Array of captures provided by the user */
+  struct slre_cap *caps;
+  int num_caps;
+
+  /* E.g. SLRE_IGNORE_CASE. See enum below */
+  int flags;
+};
+
+static int is_metacharacter(const unsigned char *s) {
+  static const char *metacharacters = "^$().[]*+?|\\Ssdbfnrtv";
+  return strchr(metacharacters, *s) != NULL;
+}
+
+static int op_len(const char *re) {
+  return re[0] == '\\' && re[1] == 'x' ? 4 : re[0] == '\\' ? 2 : 1;
+}
+
+static int set_len(const char *re, int re_len) {
+  int len = 0;
+
+  while (len < re_len && re[len] != ']') {
+    len += op_len(re + len);
+  }
+
+  return len <= re_len ? len + 1 : -1;
+}
+
+static int get_op_len(const char *re, int re_len) {
+  return re[0] == '[' ? set_len(re + 1, re_len - 1) + 1 : op_len(re);
+}
+
+static int is_quantifier(const char *re) {
+  return re[0] == '*' || re[0] == '+' || re[0] == '?';
+}
+
+static int toi(int x) {
+  return isdigit(x) ? x - '0' : x - 'W';
+}
+
+static int hextoi(const unsigned char *s) {
+  return (toi(tolower(s[0])) << 4) | toi(tolower(s[1]));
+}
+
+static int match_op(const unsigned char *re, const unsigned char *s,
+                    struct regex_info *info) {
+  int result = 0;
+  switch (*re) {
+    case '\\':
+      /* Metacharacters */
+      switch (re[1]) {
+        case 'S': FAIL_IF(isspace(*s), SLRE_NO_MATCH); result++; break;
+        case 's': FAIL_IF(!isspace(*s), SLRE_NO_MATCH); result++; break;
+        case 'd': FAIL_IF(!isdigit(*s), SLRE_NO_MATCH); result++; break;
+        case 'b': FAIL_IF(*s != '\b', SLRE_NO_MATCH); result++; break;
+        case 'f': FAIL_IF(*s != '\f', SLRE_NO_MATCH); result++; break;
+        case 'n': FAIL_IF(*s != '\n', SLRE_NO_MATCH); result++; break;
+        case 'r': FAIL_IF(*s != '\r', SLRE_NO_MATCH); result++; break;
+        case 't': FAIL_IF(*s != '\t', SLRE_NO_MATCH); result++; break;
+        case 'v': FAIL_IF(*s != '\v', SLRE_NO_MATCH); result++; break;
+
+        case 'x':
+          /* Match byte, \xHH where HH is hexadecimal byte representaion */
+          FAIL_IF(hextoi(re + 2) != *s, SLRE_NO_MATCH);
+          result++;
+          break;
+
+        default:
+          /* Valid metacharacter check is done in bar() */
+          FAIL_IF(re[1] != s[0], SLRE_NO_MATCH);
+          result++;
+          break;
+      }
+      break;
+
+    case '|': FAIL_IF(1, SLRE_INTERNAL_ERROR); break;
+    case '$': FAIL_IF(1, SLRE_NO_MATCH); break;
+    case '.': result++; break;
+
+    default:
+      if (info->flags & SLRE_IGNORE_CASE) {
+        FAIL_IF(tolower(*re) != tolower(*s), SLRE_NO_MATCH);
+      } else {
+        FAIL_IF(*re != *s, SLRE_NO_MATCH);
+      }
+      result++;
+      break;
+  }
+
+  return result;
+}
+
+static int match_set(const char *re, int re_len, const char *s,
+                     struct regex_info *info) {
+  int len = 0, result = -1, invert = re[0] == '^';
+
+  if (invert) re++, re_len--;
+
+  while (len <= re_len && re[len] != ']' && result <= 0) {
+    /* Support character range */
+    if (re[len] != '-' && re[len + 1] == '-' && re[len + 2] != ']' &&
+        re[len + 2] != '\0') {
+      result = info->flags &&  SLRE_IGNORE_CASE ?
+        *s >= re[len] && *s <= re[len + 2] :
+        tolower(*s) >= tolower(re[len]) && tolower(*s) <= tolower(re[len + 2]);
+      len += 3;
+    } else {
+      result = match_op((unsigned char *) re + len, (unsigned char *) s, info);
+      len += op_len(re + len);
+    }
+  }
+  return (!invert && result > 0) || (invert && result <= 0) ? 1 : -1;
+}
+
+static int doh(const char *s, int s_len, struct regex_info *info, int bi);
+
+static int bar(const char *re, int re_len, const char *s, int s_len,
+               struct regex_info *info, int bi) {
+  /* i is offset in re, j is offset in s, bi is brackets index */
+  int i, j, n, step;
+
+  for (i = j = 0; i < re_len && j <= s_len; i += step) {
+
+    /* Handle quantifiers. Get the length of the chunk. */
+    step = re[i] == '(' ? info->brackets[bi + 1].len + 2 :
+      get_op_len(re + i, re_len - i);
+
+    DBG(("%s [%.*s] [%.*s] re_len=%d step=%d i=%d j=%d\n", __func__,
+         re_len - i, re + i, s_len - j, s + j, re_len, step, i, j));
+
+    FAIL_IF(is_quantifier(&re[i]), SLRE_UNEXPECTED_QUANTIFIER);
+    FAIL_IF(step <= 0, SLRE_INVALID_CHARACTER_SET);
+
+    if (i + step < re_len && is_quantifier(re + i + step)) {
+      DBG(("QUANTIFIER: [%.*s]%c [%.*s]\n", step, re + i,
+           re[i + step], s_len - j, s + j));
+      if (re[i + step] == '?') {
+        int result = bar(re + i, step, s + j, s_len - j, info, bi);
+        j += result > 0 ? result : 0;
+        i++;
+      } else if (re[i + step] == '+' || re[i + step] == '*') {
+        int j2 = j, nj = j, n1, n2 = -1, ni, non_greedy = 0;
+
+        /* Points to the regexp code after the quantifier */
+        ni = i + step + 1;
+        if (ni < re_len && re[ni] == '?') {
+          non_greedy = 1;
+          ni++;
+        }
+
+        do {
+          if ((n1 = bar(re + i, step, s + j2, s_len - j2, info, bi)) > 0) {
+            j2 += n1;
+          }
+          if (re[i + step] == '+' && n1 < 0) break;
+
+          if (ni >= re_len) {
+            /* After quantifier, there is nothing */
+            nj = j2;
+          } else if ((n2 = bar(re + ni, re_len - ni, s + j2,
+                               s_len - j2, info, bi)) >= 0) {
+            /* Regex after quantifier matched */
+            nj = j2 + n2;
+          }
+          if (nj > j && non_greedy) break;
+        } while (n1 > 0);
+
+        if (n1 < 0 && re[i + step] == '*' &&
+            (n2 = bar(re + ni, re_len - ni, s + j, s_len - j, info, bi)) > 0) {
+          nj = j + n2;
+        }
+
+        DBG(("STAR/PLUS END: %d %d %d %d %d\n", j, nj, re_len - ni, n1, n2));
+        FAIL_IF(re[i + step] == '+' && nj == j, SLRE_NO_MATCH);
+
+        /* If while loop body above was not executed for the * quantifier,  */
+        /* make sure the rest of the regex matches                          */
+        FAIL_IF(nj == j && ni < re_len && n2 < 0, SLRE_NO_MATCH);
+
+        /* Returning here cause we've matched the rest of RE already */
+        return nj;
+      }
+      continue;
+    }
+
+    if (re[i] == '[') {
+      n = match_set(re + i + 1, re_len - (i + 2), s + j, info);
+      DBG(("SET %.*s [%.*s] -> %d\n", step, re + i, s_len - j, s + j, n));
+      FAIL_IF(n <= 0, SLRE_NO_MATCH);
+      j += n;
+    } else if (re[i] == '(') {
+      n = SLRE_NO_MATCH;
+      bi++;
+      FAIL_IF(bi >= info->num_brackets, SLRE_INTERNAL_ERROR);
+      DBG(("CAPTURING [%.*s] [%.*s] [%s]\n",
+           step, re + i, s_len - j, s + j, re + i + step));
+
+      if (re_len - (i + step) <= 0) {
+        /* Nothing follows brackets */
+        n = doh(s + j, s_len - j, info, bi);
+      } else {
+        int j2;
+        for (j2 = 0; j2 <= s_len - j; j2++) {
+          if ((n = doh(s + j, s_len - (j + j2), info, bi)) >= 0 &&
+              bar(re + i + step, re_len - (i + step),
+                  s + j + n, s_len - (j + n), info, bi) >= 0) break;
+        }
+      }
+
+      DBG(("CAPTURED [%.*s] [%.*s]:%d\n", step, re + i, s_len - j, s + j, n));
+      FAIL_IF(n < 0, n);
+      if (info->caps != NULL) {
+        info->caps[bi - 1].ptr = s + j;
+        info->caps[bi - 1].len = n;
+      }
+      j += n;
+    } else if (re[i] == '^') {
+      FAIL_IF(j != 0, SLRE_NO_MATCH);
+    } else if (re[i] == '$') {
+      FAIL_IF(j != s_len, SLRE_NO_MATCH);
+    } else {
+      FAIL_IF(j >= s_len, SLRE_NO_MATCH);
+      n = match_op((unsigned char *) (re + i), (unsigned char *) (s + j), info);
+      FAIL_IF(n <= 0, n);
+      j += n;
+    }
+  }
+
+  return j;
+}
+
+/* Process branch points */
+static int doh(const char *s, int s_len, struct regex_info *info, int bi) {
+  const struct bracket_pair *b = &info->brackets[bi];
+  int i = 0, len, result;
+  const char *p;
+
+  do {
+    p = i == 0 ? b->ptr : info->branches[b->branches + i - 1].schlong + 1;
+    len = b->num_branches == 0 ? b->len :
+      i == b->num_branches ? (int) (b->ptr + b->len - p) :
+      (int) (info->branches[b->branches + i].schlong - p);
+    DBG(("%s %d %d [%.*s] [%.*s]\n", __func__, bi, i, len, p, s_len, s));
+    result = bar(p, len, s, s_len, info, bi);
+    DBG(("%s <- %d\n", __func__, result));
+  } while (result <= 0 && i++ < b->num_branches);  /* At least 1 iteration */
+
+  return result;
+}
+
+static int baz(const char *s, int s_len, struct regex_info *info) {
+  int i, result = -1, is_anchored = info->brackets[0].ptr[0] == '^';
+
+  for (i = 0; i <= s_len; i++) {
+    result = doh(s + i, s_len - i, info, 0);
+    if (result >= 0) {
+      result += i;
+      break;
+    }
+    if (is_anchored) break;
+  }
+
+  return result;
+}
+
+static void setup_branch_points(struct regex_info *info) {
+  int i, j;
+  struct branch tmp;
+
+  /* First, sort branches. Must be stable, no qsort. Use bubble algo. */
+  for (i = 0; i < info->num_branches; i++) {
+    for (j = i + 1; j < info->num_branches; j++) {
+      if (info->branches[i].bracket_index > info->branches[j].bracket_index) {
+        tmp = info->branches[i];
+        info->branches[i] = info->branches[j];
+        info->branches[j] = tmp;
+      }
+    }
+  }
+
+  /*
+   * For each bracket, set their branch points. This way, for every bracket
+   * (i.e. every chunk of regex) we know all branch points before matching.
+   */
+  for (i = j = 0; i < info->num_brackets; i++) {
+    info->brackets[i].num_branches = 0;
+    info->brackets[i].branches = j;
+    while (j < info->num_branches && info->branches[j].bracket_index == i) {
+      info->brackets[i].num_branches++;
+      j++;
+    }
+  }
+}
+
+static int foo(const char *re, int re_len, const char *s, int s_len,
+               struct regex_info *info) {
+  int i, step, depth = 0;
+
+  /* First bracket captures everything */
+  info->brackets[0].ptr = re;
+  info->brackets[0].len = re_len;
+  info->num_brackets = 1;
+
+  /* Make a single pass over regex string, memorize brackets and branches */
+  for (i = 0; i < re_len; i += step) {
+    step = get_op_len(re + i, re_len - i);
+
+    if (re[i] == '|') {
+      FAIL_IF(info->num_branches >= (int) ARRAY_SIZE(info->branches),
+              SLRE_TOO_MANY_BRANCHES);
+      info->branches[info->num_branches].bracket_index =
+        info->brackets[info->num_brackets - 1].len == -1 ?
+        info->num_brackets - 1 : depth;
+      info->branches[info->num_branches].schlong = &re[i];
+      info->num_branches++;
+    } else if (re[i] == '\\') {
+      FAIL_IF(i >= re_len - 1, SLRE_INVALID_METACHARACTER);
+      if (re[i + 1] == 'x') {
+        /* Hex digit specification must follow */
+        FAIL_IF(re[i + 1] == 'x' && i >= re_len - 3,
+                SLRE_INVALID_METACHARACTER);
+        FAIL_IF(re[i + 1] ==  'x' && !(isxdigit(re[i + 2]) &&
+                isxdigit(re[i + 3])), SLRE_INVALID_METACHARACTER);
+      } else {
+        FAIL_IF(!is_metacharacter((unsigned char *) re + i + 1),
+                SLRE_INVALID_METACHARACTER);
+      }
+    } else if (re[i] == '(') {
+      FAIL_IF(info->num_brackets >= (int) ARRAY_SIZE(info->brackets),
+              SLRE_TOO_MANY_BRACKETS);
+      depth++;  /* Order is important here. Depth increments first. */
+      info->brackets[info->num_brackets].ptr = re + i + 1;
+      info->brackets[info->num_brackets].len = -1;
+      info->num_brackets++;
+      FAIL_IF(info->num_caps > 0 && info->num_brackets - 1 > info->num_caps,
+              SLRE_CAPS_ARRAY_TOO_SMALL);
+    } else if (re[i] == ')') {
+      int ind = info->brackets[info->num_brackets - 1].len == -1 ?
+        info->num_brackets - 1 : depth;
+      info->brackets[ind].len = (int) (&re[i] - info->brackets[ind].ptr);
+      DBG(("SETTING BRACKET %d [%.*s]\n",
+           ind, info->brackets[ind].len, info->brackets[ind].ptr));
+      depth--;
+      FAIL_IF(depth < 0, SLRE_UNBALANCED_BRACKETS);
+      FAIL_IF(i > 0 && re[i - 1] == '(', SLRE_NO_MATCH);
+    }
+  }
+
+  FAIL_IF(depth != 0, SLRE_UNBALANCED_BRACKETS);
+  setup_branch_points(info);
+
+  return baz(s, s_len, info);
+}
+
+int slre_match(const char *regexp, const char *s, int s_len,
+               struct slre_cap *caps, int num_caps, int flags) {
+  struct regex_info info;
+
+  /* Initialize info structure */
+  info.flags = flags;
+  info.num_brackets = info.num_branches = 0;
+  info.num_caps = num_caps;
+  info.caps = caps;
+
+  DBG(("========================> [%s] [%.*s]\n", regexp, s_len, s));
+  return foo(regexp, (int) strlen(regexp), s, s_len, &info);
+}
--- a/ROBITools/src/slre.h
+++ b/ROBITools/src/slre.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2004-2013 Sergey Lyubka <valenok@gmail.com>
+ * Copyright (c) 2013 Cesanta Software Limited
+ * All rights reserved
+ *
+ * This library is dual-licensed: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation. For the terms of this
+ * license, see <http://www.gnu.org/licenses/>.
+ *
+ * You are free to use this library under the terms of the GNU General
+ * Public License, but WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * Alternatively, you can license this library under a commercial
+ * license, as set out in <http://cesanta.com/products.html>.
+ */
+
+/*
+ * This is a regular expression library that implements a subset of Perl RE.
+ * Please refer to README.md for a detailed reference.
+ */
+
+#ifndef SLRE_HEADER_DEFINED
+#define SLRE_HEADER_DEFINED
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct slre_cap {
+  const char *ptr;
+  int len;
+};
+
+
+int slre_match(const char *regexp, const char *buf, int buf_len,
+               struct slre_cap *caps, int num_caps, int flags);
+
+/* Possible flags for slre_match() */
+enum { SLRE_IGNORE_CASE = 1 };
+
+
+/* slre_match() failure codes */
+#define SLRE_NO_MATCH               -1
+#define SLRE_UNEXPECTED_QUANTIFIER  -2
+#define SLRE_UNBALANCED_BRACKETS    -3
+#define SLRE_INTERNAL_ERROR         -4
+#define SLRE_INVALID_CHARACTER_SET  -5
+#define SLRE_INVALID_METACHARACTER  -6
+#define SLRE_CAPS_ARRAY_TOO_SMALL   -7
+#define SLRE_TOO_MANY_BRANCHES      -8
+#define SLRE_TOO_MANY_BRACKETS      -9
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  /* SLRE_HEADER_DEFINED */
--- a/ROBITools/src/slre.o
+++ b/ROBITools/src/slre.o
--- a/dev_notes.txt
+++ b/dev_notes.txt
@@ -0,0 +1,190 @@
+package.skeleton("ROBITools",c("robitools.motu.count","robitools.motus",
+                               "robitools.reads","robitools.samples",
+                               "robitools.sample.count",))
+                               
+                               
+#include <R.h>
+#include <Rinternals.h>
+
+static void cooked_goose(SEXP foo)
+{
+    if (TYPEOF(foo) != EXTPTRSXP)
+        error("argument not external pointer");
+    double *x = (double *) R_ExternalPtrAddr(foo);
+    int blather = x[0];
+    Free(x);
+    if (blather)
+        printf("finalizer ran\n");
+}
+
+SEXP blob(SEXP nin, SEXP blatherin)
+{
+    if (! isInteger(nin))
+        error("n not integer");
+    int n = INTEGER(nin)[0];
+    if (! (n > 0))
+        error("n not positive");
+    if (! isLogical(blatherin))
+        error("blather not logical");
+    int blather = LOGICAL(blatherin)[0];
+
+    double *x = Calloc(n + 2, double);
+
+    GetRNGstate();
+    for (int i = 0; i < n; ++i)
+        x[i + 2] = norm_rand();
+    PutRNGstate();
+    x[1] = n;
+    x[0] = blather;
+
+    SEXP bar;
+    PROTECT(bar = R_MakeExternalPtr(x, R_NilValue, R_NilValue));
+    R_RegisterCFinalizer(bar, cooked_goose);
+    UNPROTECT(1);
+    return bar;
+}
+
+SEXP blub(SEXP foo)
+{
+    if (TYPEOF(foo) != EXTPTRSXP)
+        error("argument not external pointer");
+
+    double *x = (double *) R_ExternalPtrAddr(foo);
+    int blather = x[0];
+    int n = x[1];
+
+    SEXP bar;
+    PROTECT(bar = allocVector(REALSXP, n));
+    for (int i = 0; i < n; ++i)
+        REAL(bar)[i] = x[i + 2];
+    UNPROTECT(1);
+    return bar;
+}
+
+
+
+blob <- function(n, blather = FALSE) {
+    stopifnot(is.numeric(n))
+    stopifnot(as.integer(n) == n)
+    stopifnot(n > 0)
+    stopifnot(is.logical(blather))
+    .Call("blob", as.integer(n), blather)
+}
+
+blub <- function(x) {
+    stopifnot(class(x) == "externalptr")
+    .Call("blub", x)
+}
+
+
+Hi Robert,
+
+It looks like there is no way to explicitly make an S4 object call a
+function when it is garbage collected unless you resort to tricks with
+reg.finalizer.
+
+It turns out that Prof. Ripley's reply (thanks!!) had enough hints in it
+that I was able to get the effect I wanted by using R's external pointer
+facility. In fact it works quite nicely.
+
+In a nutshell, I create a C++ object (with new) and then wrap its pointer
+with an R external pointer using
+SEXP rExtPtr = R_MakeExternalPtr( cPtr, aTag, R_NilValue);
+
+Where cPtr is the C++/C pointer to the object and aTag is an R symbol
+describing the pointer type [e.g. SEXP aTag =
+install("this_is_a_tag_for_a_pointer_to_my_object")]. The final argument is
+"a value to protect". I don't know what this means, but all of the examples
+I saw use R_NilValue.
+
+If you want a C++ function to be called when R loses the reference to the
+external pointer (actually when R garbage collects it, or when R quits), do
+R_RegisterCFinalizerEx( rExtPtr, (R_CFinalizer_t)functionToBeCalled, TRUE );
+
+The TRUE means that R will call the "functionToBeCalled" if the pointer is
+still around when R quits. I guess if you set it to FALSE, then you are
+assuming that your shell can delete memory and/or release resources when R
+quits. 
+
+So return this external pointer to R (the function that new'ed it was called
+by .Call or something similar) and stick it in a slot of your object. Then
+when your object is garbage collected, "functionToBeCalled" will be called.
+The slot would have the type "externalptr".
+
+The functionToBeCalled contains the code to delete the C++ pointer or
+release resources, for example...
+
+SEXP functionToBeCalled( SEXP rExtPtr ) {
+  // Get the C++ pointer
+  MyThing* ptr = R_ExternalPtrAddr(rExtPtr);
+
+  // Delete it
+  delete ptr;
+
+  // Clear the external pointer
+  R_ClearExternalPtr(rExtPtr);
+
+  return R_NilValue;
+}
+
+And there you have it.
+
+There doesn't seem to be any official documentation on this stuff (at least
+none that I could find). The best references I found are on the R developers
+web page. See the links within  "some notes on _references, external
+objects, or mutable state_ for R and a _simple implementation_ of external
+references and finalization". Note that the documents are slightly out of
+date (the function names have apparently been changed somewhat). The latter
+one has some examples that are very helpful. And as Prof. Ripley pointed
+out, RODBC uses this facility too, so look at that code.
+
+Hope this was useful. Good luck.
+
+
+SEXP
+get(SEXP ext)
+{
+    return mkString((char *) R_ExternalPtrAddr(ext));
+}
+
+SEXP
+
+set(SEXP ext, SEXP str)
+{
+    char *x = (char *) R_ExternalPtrAddr(ext);
+    snprintf(x, N_MAX, CHAR(STRING_ELT(str, 0)));
+    return ScalarLogical(TRUE);
+}
+
+
+> dyn.load("tmp.so")
+> x <- .Call("create", list("info could be any R object", 1:5))
+> .Call("get", x)
+[1] "my name is joe"
+> ## reference semantics!
+> .Call("set", x, "i am sam i am")
+[1] TRUE
+> .Call("get", x)
+[1] "i am sam i am"
+> x <- NULL
+> gc()
+finalizing
+         used (Mb) gc trigger (Mb) max used (Mb)
+Ncells 339306 18.2     467875   25   407500 21.8
+Vcells 202064  1.6     786432    6   380515  3.0
+
+
+SEXP
+incr(SEXP ext)
+{
+    struct Foo *foo = (struct Foo*) R_ExternalPtrAddr(ext);
+    foo->x += 1;
+    return ScalarInteger(foo->x);
+}
+
+
+
+library(ROBITools)
+library.dynam('ROBITools.so')
+t=.Call('R_read_taxonomy','ecochange',TRUE)
+.Call('R_get_scientific_name',t,as.integer(7742))