diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..cd92261 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.Rproj.user +.Rhistory +.RData +loopbenchmark.R diff --git a/R/aggregate.R b/R/aggregate.R index 586eaa6..07eaca8 100644 --- a/R/aggregate.R +++ b/R/aggregate.R @@ -101,9 +101,10 @@ aggregate.metabarcoding.data=function(x, by, FUN,..., m = matrix(as.character(x[[n]])) dim(m)=df } - else + else { m = x[[n]] - + isfact=FALSE + } aggr.args = list(m,by=by,FUN=f,simplify=FALSE) lagr = do.call(aggregate,aggr.args) lagr = as.factor.or.matrix(lagr[,-(1:ncat),drop=FALSE]) @@ -182,15 +183,15 @@ aggregate.metabarcoding.data=function(x, by, FUN,..., for (n in ln) { f = layers[[n]] - - if (is.factor(x[[n]])){ + isfact=is.factor(x[[n]]) + if (isfact){ isfact = TRUE lf = levels(x[[n]]) df = dim(x[[n]]) m = matrix(as.character(x[[n]])) dim(m)=df } - else + else m = x[[n]] aggr.args = list(t(m),by=by,FUN=f,simplify=FALSE) diff --git a/man/ROBITools-package.Rd b/man/ROBITools-package.Rd new file mode 100644 index 0000000..c22ac8a --- /dev/null +++ b/man/ROBITools-package.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ROBITools.R +\docType{package} +\name{ROBITools-package} +\alias{ROBITools-package} +\alias{ROBITools} +\title{A package to manipulate DNA metabarcoding data.} +\description{ +A package to manipulate DNA metabarcoding data. +} +\details{ +This package was written as a following of the OBITools. + +\tabular{ll}{ + Package: \tab ROBITools\cr + Type: \tab Package\cr + Version: \tab 0.1\cr + Date: \tab 2013-06-27\cr + License: \tab CeCILL 2.0\cr + LazyLoad: \tab yes\cr +} +} +\references{ +http://metabarcoding.org/obitools +} +\author{ +Frederic Boyer + +Aurelie Bonin + +Lucie Zinger + +Eric Coissac +} diff --git a/man/addS3Class.Rd b/man/addS3Class.Rd new file mode 100644 index 0000000..b86726a --- /dev/null +++ b/man/addS3Class.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/s3objects.R +\name{addS3Class} +\alias{addS3Class} +\title{Adds a class into the class hierarchie attribute.} +\usage{ +addS3Class(object, classname) +} +\arguments{ +\item{object}{the object to modify} + +\item{classname}{the name of the new class} +} +\value{ +the object given as parametter casted to the new + class +} +\description{ +\code{addS3Class} adds a new class name to the vector +of class associated to the object. This the way to +assign an object to an S3 class. \code{addS3Class} add +the new class name in front of the class vector +} +\note{ +for efficiency purpose no check is done on the input + parametters +} +\examples{ +x = c(1,3,2,5) +x = addS3Class(x,"my.vector") +class(x) + +} +\seealso{ +\code{\link{rmS3Class}} +} +\author{ +Eric Coissac +} +\keyword{function} +\keyword{system} diff --git a/man/const-threshold-mask-methods.Rd b/man/const-threshold-mask-methods.Rd new file mode 100644 index 0000000..a16570f --- /dev/null +++ b/man/const-threshold-mask-methods.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/metabarcoding_threshold.R +\docType{methods} +\name{const.threshold.mask,metabarcoding.data-method} +\alias{const.threshold.mask,metabarcoding.data-method} +\alias{const.threshold.mask-methods,metabarcoding.data} +\title{Computes a constant thresold mask for filtering read aboundancies.} +\usage{ +\S4method{const.threshold.mask}{metabarcoding.data}(data, MARGIN, + threshold = 0.01, operator = "<") +} +\arguments{ +\item{data}{The \code{\linkS4class{metabarcoding.data}} instance +on normalisation have to be computed.} + +\item{MARGIN}{Indicates if the sums have to be computed across +samples or motus. +Allowed values are : +\itemize{ + \item{'sample' or 1} for computing sum across samples + \item{'motu' or 2} for computing sum across motus + }} + +\item{threshold}{a numeric value between 0 and 1 indicating which part of +the signal must be conserved. Default value is setup to +0.01 (1% of the normalized signal).} + +\item{operator}{is a logical comparison operator.} +} +\value{ +A logical matrix usable for selecting cell in the read aboundancy matrix. +} +\description{ +The method \code{const.threshold.mask} of the class \code{\linkS4class{metabarcoding.data}} +computes a logical matrix of the same size than the read matrix of the data parameter. +Each cell of this matrix contains a \code{TRUE} or a \code{FALSE} value according to the +relationship existing between the read abondancy and the global theshold. +} +\details{ +(computed value) = (normalized read aboundancy) operator (threshold value) + +for a cell in the result matrix, \code{(normalized read aboundancy)} is extracted from the read layer +after normalization. +\code{operator} is a comparaison operator and \code{(threshold value)} is estimated with the +\code{\link{theshold}} method. +} +\seealso{ +\code{\linkS4class{metabarcoding.data}}, \code{\link{threshold.mask}}, \code{\link{normalize}} +} +\author{ +Aurelie Bonin +} diff --git a/man/contaslayer.Rd b/man/contaslayer.Rd new file mode 100644 index 0000000..cffde6f --- /dev/null +++ b/man/contaslayer.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/contaslayer.R +\name{contaslayer} +\alias{contaslayer} +\title{Detects contaminants in metabarcoding data} +\usage{ +contaslayer(x, controls, clust = NULL) +} +\arguments{ +\item{x}{a \code{\link{metabarcoding.data}} object} + +\item{controls}{a vector of samples names where conta are suspected to be detected +(typically negative control names).} + +\item{clust}{a vector for grouping sequences. Default set to \code{NULL}.} +} +\value{ +a vector containing the names of sequences identified as contaminants +} +\description{ +Detects sequences/motus in a \code{\link{metabarcoding.data}} object +for which frequencies over the entire dataset are maximum in negative controls and +hence, most likely to be contaminants. +} +\examples{ + +data(termes) +termes.ok = termes[,colSums(termes$reads)>0] +neg = rownames(termes.ok)[grep("r",rownames(termes.ok))] + +#finds contaminants based on neg samples +contaslayer(termes.ok, neg) + +# extanding contamininant detection with grouping factor, +# typically obiclean/sumatra cluster or taxonomy membership +contaslayer(termes.ok, neg, termes.ok$motus$scientific_name) + +} +\seealso{ +\code{\link{threshold}} for further trimming +} +\author{ +Lucie Zinger +} diff --git a/man/createS3Class.Rd b/man/createS3Class.Rd new file mode 100644 index 0000000..54fdb03 --- /dev/null +++ b/man/createS3Class.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/s3objects.R +\name{createS3Class} +\alias{createS3Class} +\title{create basic functions to manipulate a new S3 class} +\usage{ +createS3Class(classname) +} +\arguments{ +\item{classname}{a \code{character string} indicating the name +of the new class.} +} +\description{ +createS3Class function create in the \code{package:ROBITools} +environment an \code{is.xxx} function and an \code{as.xxx} function +allowing to test if an abject belong the class \code{xxx} and to add +the class \code{xxx} to the class list of an object. \code{xxx} is a +generic class name that is specified through the \code{classname} +argument of the function. +} +\note{ +Take care that the new functions are created in the +\code{package:ROBITools} environment. +} +\examples{ + +# Create a new S3 class named mynewclass +createS3Class('mynewclass') + +#create a new vector object +x=c(1,4,6) + +# test if it belongs the new class, that is false +is.mynewclass(x) + +# Associate x to the new class +as.mynewclass(x) + +# test again if x belongs the new class, that is now true +is.mynewclass(x) + +} +\seealso{ +\code{\link{rmS3Class}} +} +\author{ +Eric Coissac +} +\keyword{function} +\keyword{system} diff --git a/man/dbtree.Rd b/man/dbtree.Rd new file mode 100644 index 0000000..923acab --- /dev/null +++ b/man/dbtree.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/taxoDBtree.R +\name{dbtree} +\alias{dbtree} +\title{Construct a taxonomic tree from a list of taxa} +\usage{ +dbtree(x) +} +\arguments{ +\item{x}{a table containing the taxonomic path of the references. Typically an output from get.classic.taxonomy} +} +\value{ +g a directed graph displaying the taxonomy hierarchy of the input data. Stored in a \code{\link{igraph}} object + where the taxonomic ranks of the vertices are added to the vertices attributes +} +\description{ +Construct a graph from a table containing the taxonomic path of sequences +} +\examples{ + +data(termes) + +taxo=default.taxonomy() + +termes.taxo.table = get.classic.taxonomy(termes, taxo, "taxid") +head(termes.taxo.table) + +graph.tax.termes = dbtree(termes.taxo.table[,1:7]) +library(igraph) + +#plot the tree +coord = layout.reingold.tilford(graph.tax.termes, root=1, circular=F) +v.cex = as.factor(V(graph.tax.termes)$rank) +levels(v.cex) = match(levels(v.cex), colnames(termes.taxo.table)) +plot(graph.tax.termes, vertex.size=1, vertex.label.cex=2*(as.numeric(as.vector(v.cex))^-1), edge.arrow.size=0, layout=coord) + + +#Vizualization with sequence counts +tax.count = log10(colSums(termes$reads)[match(as.vector(V(graph.tax.termes)$name), termes$motus$scientific_name)]) +tax.count[is.na(tax.count)|tax.count<0] = 0.01 +V(graph.tax.termes)$count = unname(tax.count) + +plot(graph.tax.termes, vertex.size=V(graph.tax.termes)$count, vertex.label.cex=2*(as.numeric(as.vector(v.cex))^-1), edge.arrow.size=0, layout=coord) + + +} +\seealso{ +\code{\link{get.classic.taxonomy}} +} +\author{ +Lucie Zinger +} diff --git a/man/dist.center.group.Rd b/man/dist.center.group.Rd new file mode 100644 index 0000000..85a1177 --- /dev/null +++ b/man/dist.center.group.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mstat.R +\name{dist.center.group} +\alias{dist.center.group} +\title{Builds the list of sample groups included in a circle around a central sample} +\usage{ +dist.center.group(dtable, radius, center = TRUE) +} +\arguments{ +\item{dtable}{a distance table between samples as +computed by \code{\link{dist.grid}}} + +\item{radius}{the radius of the circle} + +\item{center}{a \code{logical} value indicating if the center of +the group must be included in the group} +} +\value{ +a list of vectors containing the labels of the group members +} +\description{ +Builds the list of sample groups included in a circle around a central sample +} +\examples{ +data(termes) +termes.ok = termes[,colSums(termes$reads)>0] +pos = expand.grid(1:3 * 10,1:7 * 10) +labels = rownames(termes.ok) +d = dist.grid(pos[,1],pos[2],labels) +groups = dist.center.group(d,20) + +} diff --git a/man/dist.clique.group.Rd b/man/dist.clique.group.Rd new file mode 100644 index 0000000..43363f0 --- /dev/null +++ b/man/dist.clique.group.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mstat.R +\name{dist.clique.group} +\alias{dist.clique.group} +\title{Builds the list of sample groups including samples closest than a define distance} +\usage{ +dist.clique.group(dtable, dmax, center = True) +} +\arguments{ +\item{dtable}{a distance table between samples as +computed by \code{\link{dist.grid}}} + +\item{dmax}{the maximum distance between two samples} +} +\value{ +a list of vectors containing the labels of the group members +} +\description{ +A graph is build by applying the threshold \code{dmax} to the distance matrix +A group is a clique max in this graph. Consequently all member pairs of a group +are distant by less or equal to \code{dmax}. +} +\examples{ +data(termes) +termes.ok = termes[,colSums(termes$reads)>0] +pos = expand.grid(1:3 * 10,1:7 * 10) +labels = rownames(termes.ok) +d = dist.grid(pos[,1],pos[2],labels) +groups = dist.clique.group(d,20) + +} diff --git a/man/dist.grid.Rd b/man/dist.grid.Rd new file mode 100644 index 0000000..ffbc173 --- /dev/null +++ b/man/dist.grid.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mstat.R +\name{dist.grid} +\alias{dist.grid} +\title{Computes the pairwise distance matrix as a data.frame where} +\usage{ +dist.grid(x, y, labels = NULL) +} +\arguments{ +\item{x}{a vector for the X coordinates} + +\item{y}{a vector for the Y coordinates} + +\item{labels}{a vector with the sample names} +} +\value{ +a data.frame instance of three columns + - a : The label of the first sample + - b : The label of the second sample + - dist : The euclidian distance beween sample a and b +} +\description{ +Computes the pairwise distance matrix as a data.frame where +} +\examples{ +data(termes) +termes.ok = termes[,colSums(termes$reads)>0] +pos = expand.grid(1:3 * 10,1:7 * 10) +labels = rownames(termes.ok) +d = dist.grid(pos[,1],pos[2],labels) + +} diff --git a/man/dm.univariate.Rd b/man/dm.univariate.Rd new file mode 100644 index 0000000..b9c5196 --- /dev/null +++ b/man/dm.univariate.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mstat.R +\name{dm.univariate} +\alias{dm.univariate} +\title{Simulate null distribion of the M statistics by Monte-Carlo} +\usage{ +dm.univariate(w, groups, resampling = 100) +} +\arguments{ +\item{w}{the weigth matrix indicating the presence probability of each motu +in each samples. Each line corresponds to a sample and each column +to a MOTU. \code{rownames} of the \code{w} matrix must be the sample +names.} + +\item{groups}{the list of considered groups as computed by the \code{\link{dist.center.group}} +function} + +\item{resampling}{the number of simulation to establish the null distribution} +} +\value{ +a matrix of M score under the null hypothesis of random distribution of MOTUs + with a MOTUs per line and a culumn per simulation +} +\description{ +Computes the null empirical distribution of the M statistics +by shuffling MOTUs among location. +} +\examples{ + +data(termes) +termes.ok = termes[,colSums(termes$reads)>0] +pos = expand.grid(1:3 * 10,1:7 * 10) +labels = rownames(termes.ok) +d = dist.grid(pos[,1],pos[2],labels) +groups = dist.center.group(d,20) +w = m.weight(termes.ok) +dnull = dm.univariate(w,groups) + +} diff --git a/man/double-open-brace-methods.Rd b/man/double-open-brace-methods.Rd new file mode 100644 index 0000000..a1e2b4b --- /dev/null +++ b/man/double-open-brace-methods.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/layers.metabarcoding.R +\docType{methods} +\name{[[,metabarcoding.data-method} +\alias{[[,metabarcoding.data-method} +\alias{double-open-brace-methods,metabarcoding.data} +\title{Returns the a layer associated to a \code{\link{metabarcoding.data}}} +\usage{ +\method{[[}{unmutable}(x,i) +} +\arguments{ +\item{x}{a \code{\link{metabarcoding.data}} instance} +} +\value{ +matrix or a factor. +} +\description{ +[[ operator Extracts a layer +attached to a \code{\link{metabarcoding.data}} instance. +} diff --git a/man/extracts-obiclean-methods.Rd b/man/extracts-obiclean-methods.Rd new file mode 100644 index 0000000..a63cd4e --- /dev/null +++ b/man/extracts-obiclean-methods.Rd @@ -0,0 +1,48 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/obiclean.R +\docType{methods} +\name{extracts.obiclean,metabarcoding.data-method} +\alias{extracts.obiclean,metabarcoding.data-method} +\alias{extracts.obiclean-methods,metabarcoding.data} +\title{Extracts the obiclean results} +\usage{ +\S4method{extracts.obiclean}{metabarcoding.data}(obj) +} +\arguments{ +\item{obj}{the \code{\linkS4class{metabarcoding.data}} to analyze} +} +\value{ +the modified \code{\linkS4class{metabarcoding.data}} instance +} +\description{ +The method \code{extracts.obiclean} of the class \code{\linkS4class{metabarcoding.data}} +extracts \code{obiclean} results from the MOTUs descriptions include in the +\code{\linkS4class{metabarcoding.data}} instance. +When an \code{obitab} file is imported using the \code{\link{import.metabarcoding.data}} +if \code{obiclean} results are present in the file they are stored in the +\code{motu} data.frame. By calling this methods, MOTU descriptors describing +the \code{obiclean} status are moved to a set of layers. +} +\examples{ + +# load termite data set from the ROBITools sample data +data(termes) + +# shows the initial list of layer names +layer.names(t) + +# extracts the obiclean status +termes = extracts.obiclean(termes) + +# shows the name of the newly created layers +layer.names(t) + + + +} +\seealso{ +\code{\linkS4class{metabarcoding.data}}, \code{\link{threshold.mask}}, \code{\link{normalize}} +} +\author{ +Eric Coissac +} diff --git a/man/extrapol.freq.Rd b/man/extrapol.freq.Rd new file mode 100644 index 0000000..e4189b3 --- /dev/null +++ b/man/extrapol.freq.Rd @@ -0,0 +1,65 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/distrib.extrapol.R +\name{extrapol.freq} +\alias{extrapol.freq} +\title{Read frequencies krigging} +\usage{ +extrapol.freq(x, min.coord, max.coord, grid.grain = 100, coords, otus.table, + cutoff = 0.001, return.metabarcoding.data = FALSE) +} +\arguments{ +\item{x}{a vector or matrix from a row-normalized read table +\code{\link{metabarcoding.data}} object} + +\item{min.coord}{a vector of length = 2 indicating the minimum values of x and y +coordinates to be used for the predicted grid} + +\item{max.coord}{a vector of length = 2 indicating the maximum values of x and y +coordinates to be used for the predicted grid} + +\item{grid.grain}{an integer indicating the resolution (i.e. nb of subpoints) in x and y +coordinates required for the predicted grid} + +\item{coords}{a dataframe containing the x and y coordinates of the abundances +from x to be extrapolated.} + +\item{otus.table}{a motus data.frame containing motus informations of x} + +\item{cutoff}{a cutoff below which abundances are set to 0. +This threshold also determines the value to be added to 0 values for log10 +transformation} + +\item{return.metabarcoding.data}{if \code{TRUE}, returns a \code{\link{metabarcoding.data}} object. Default is \code{FALSE}} +} +\value{ +either a dataframe or a S3 object with a structure similar to \code{\link{metabarcoding.data}} object. + The number of samples corresponds to the predicted points. + The two last columns (if \code{return.metabarcoding.data==F}) or sample data.frame contains x y coordinates of the predicted grid + The all but last two columns (if \code{return.metabarcoding.data==F}) or read matrix contains the predicted log10 transformed relative abundances + instead of reads counts + If \code{return.metabarcoding.data==F} the motus data.frame contains the motus informations from x +} +\description{ +Extrapolates read frequencies from a \code{\link{metabarcoding.data}} object in space for a finer resolution +} +\examples{ + +data(termes) +#Create dummy spatial coordinates +attr(termes, "samples")[c("x", "y")] = expand.grid(1:7,1:3) + +#compute frequencies +attr(termes, "layers")[["reads.freq"]] = normalize(termes, MARGIN=1)$reads + +# Getting extrapolations +termes.pred = extrapol.freq(attr(termes, "layers")[["reads.freq"]], min.coord=c(1,1), max.coord=c(7,3), + grid.grain=100,termes$samples[,c("x", "y")], termes$motus, cutoff=1e-3) + +head(termes.pred$reads) +} +\seealso{ +\code{\link{map.extrapol.freq}} as well as \code{sp} and \code{gstat} packages +} +\author{ +Lucie Zinger +} diff --git a/man/get.classic.taxonomy.Rd b/man/get.classic.taxonomy.Rd new file mode 100644 index 0000000..2023b6d --- /dev/null +++ b/man/get.classic.taxonomy.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/taxonomy_classic_table.R +\name{get.classic.taxonomy} +\alias{get.classic.taxonomy} +\title{Get classical taxonomy format} +\usage{ +get.classic.taxonomy(x, taxonomy, coltaxid) +} +\arguments{ +\item{x}{a \code{\link{metabarcoding.data}} object} + +\item{taxonomy}{a instance of \code{\linkS4class{taxonomy.obitools}}} + +\item{coltaxid}{a the name of the column containing taxids to be used for creating classical taxonomic description} +} +\value{ +returns a data.frame with the classical taxonomic description ("kingdom", "phylum", "class", "order", "family", "genus", "species"), as well as + sequence taxonomic assignment rank and scientific name for each sequences stored in the \code{\link{metabarcoding.data}} object +} +\description{ +Creates a table with the classical taxonomic description (from phylum to species) +} +\examples{ + +data(termes) + +taxo=default.taxonomy() + +termes.taxo.table = get.classic.taxonomy(termes, taxo, "taxid") +head(termes.taxo.table) + +attr(termes, "motus") = data.frame(termes$motus, termes.taxo.table) + + +} +\seealso{ +\code{\linkS4class{taxonomy.obitools}}, and methods \code{\link{species}},\code{\link{genus}}, \code{\link{family}},\code{\link{kingdom}}, + \code{\link{superkingdom}},\code{\link{taxonatrank}}, \code{\link{taxonmicank}} +} +\author{ +Lucie Zinger +} +\keyword{taxonomy} diff --git a/man/import.metabarcoding.data.Rd b/man/import.metabarcoding.data.Rd new file mode 100644 index 0000000..e1535fa --- /dev/null +++ b/man/import.metabarcoding.data.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/import.metabarcoding.R +\name{import.metabarcoding.data} +\alias{import.metabarcoding.data} +\title{Read a data file produced by the \code{obitab} command} +\usage{ +import.metabarcoding.data(file, sep = "\\t", sample = "sample", + sample.sep = "\\\\.", attribute = ":") +} +\arguments{ +\item{file}{a string containing the file name of the obitab file.} + +\item{sep}{Column separator in the obitab file. +The default separator is the tabulation.} + +\item{sample}{A regular expression allowing to identify columns +from the file describing abundances of sequences per sample} + +\item{sample.sep}{Separator between combined sample name.} + +\item{attribute}{Separator used to split between sample 'tag' and sample name.} +} +\value{ +a \code{\link{metabarcoding.data}} instance +} +\description{ +Read a data file issued from the conversion of a \strong{fasta} +file to a tabular file by the \code{obitab} command of the +\strong{OBITools} package +} +\examples{ +require(ROBITools) + +\dontshow{# switch the working directory to the data package directory} +\dontshow{setwd(system.file("extdata", package="ROBITools"))} + +# read the termes.tab file +termes=import.metabarcoding.data('termes.tab') + +# print the number of samples and motus described in the file +dim(termes) + +} +\seealso{ +\code{\link{metabarcoding.data}} +} +\author{ +Eric Coissac +} +\keyword{DNA} +\keyword{metabarcoding} diff --git a/man/import.ngsfilter.data.Rd b/man/import.ngsfilter.data.Rd new file mode 100644 index 0000000..6c8475f --- /dev/null +++ b/man/import.ngsfilter.data.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/import.ngsfilter.R +\name{import.ngsfilter.data} +\alias{import.ngsfilter.data} +\title{Read ngsfilter text file} +\usage{ +import.ngsfilter.data(file, platewell = NULL) +} +\arguments{ +\item{file}{a string containing the file name for the \code{ngsfilter} command.} + +\item{platewell}{a string corresponding to the tag used for storing the sample location +in the PCR plate. Should be of the form "nbPlate_Well" (e.g. "01_A02"). +Default is \code{NULL}} +} +\value{ +\code{\link{import.ngsfilter.data}} returns a \code{\link{data.frame}} instance +} +\description{ +Reads the text file used for assigning reads to samples with the + \code{ngsfilter} command of the \strong{OBITools} package. +} +\examples{ +\dontshow{# switch the working directory to the data package directory} +\dontshow{setwd(system.file("extdata", package="ROBITools"))} + +data(termes) + +# reading the termes_ngsfilt.txt file +termes.ngs=import.ngsfilter.data('termes_ngsfilt.txt', platewell="position") + +# including ngsfilter data into termes data +attr(termes, "samples") = termes.ngs[rownames(termes),] + +colnames(termes$samples) + +} +\seealso{ +\code{\link{import.metabarcoding.data}} and \code{\link{read.obitab}} for other methods of data importation +} +\author{ +Lucie Zinger +} +\keyword{DNA} +\keyword{metabarcoding} diff --git a/man/initialize-methods.Rd b/man/initialize-methods.Rd new file mode 100644 index 0000000..0918e25 --- /dev/null +++ b/man/initialize-methods.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/02_class_metabarcoding.data.R +\docType{methods} +\name{initialize,metabarcoding.data-method} +\alias{initialize,metabarcoding.data-method} +\alias{initialize-methods,metabarcoding.data} +\title{metabarcoding.data constructor} +\usage{ +\S4method{initialize}{metabarcoding.data}(.Object, reads, samples, motus, + taxonomy = NULL, taxid = NULL, sample.margin = NA, layers = list()) +} +\description{ +metabarcoding.data constructor +} diff --git a/man/layer.names-methods.Rd b/man/layer.names-methods.Rd new file mode 100644 index 0000000..d2886ef --- /dev/null +++ b/man/layer.names-methods.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/layers.metabarcoding.R +\docType{methods} +\name{layer.names,metabarcoding.data-method} +\alias{layer.names,metabarcoding.data-method} +\alias{layer.names-methods,metabarcoding.data} +\title{Returns the names of all the layers} +\usage{ +\S4method{layer.names}{metabarcoding.data}(obj) +} +\arguments{ +\item{obj}{a \code{\link{metabarcoding.data}} instance} +} +\value{ +a vector of type \code{character} containing the + list of all the layer names. +} +\description{ +\code{layer.names} extracts the list of all the layer +names attached to a \code{\link{metabarcoding.data}} instance. +} diff --git a/man/m.bivariate.Rd b/man/m.bivariate.Rd new file mode 100644 index 0000000..a9aa5e8 --- /dev/null +++ b/man/m.bivariate.Rd @@ -0,0 +1,54 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mstat.R +\name{m.bivariate} +\alias{m.bivariate} +\title{Computes the bivariate M statistics} +\usage{ +m.bivariate(w1, w2 = NULL, groups) +} +\arguments{ +\item{w1}{the weigth matrix indicating the presence probability of each motu +used as focus species in each samples. Each line corresponds to a sample and each column +to a MOTU. \code{rownames} of the \code{w} matrix must be the sample +names. It is nice but not mandatory if the \code{colnames} refer to the MOTU id.} + +\item{w2}{the weigth matrix indicating the presence probability of each motu +used as target species in each samples. Each line corresponds to a sample and each column +to a MOTU. \code{rownames} of the \code{w} matrix must be the sample +names. It is nice but not mandatory if the \code{colnames} refer to the MOTU id. +if \code{w2} is not set, w1 is also used as target species. in this case the diagonal +of the matrix return contains the univariate M statistic for the diferent species.} + +\item{groups}{the list of considered groups as computed by the \code{\link{dist.center.group}} +function} +} +\value{ +a matrix of M bivariate statistics with one focus species by row and one target species + by columns If \code{w2} is not specified the diagonal of the matrix is equal to the univariate + M statistic of the corresponding species. +} +\description{ +The function computes the bivariate M statiscics for a set of target species around a set of +focus species. +} +\examples{ +data(termes) +termes.ok = termes[,colSums(termes$reads)>0] +pos = expand.grid(1:3 * 10,1:7 * 10) +labels = rownames(termes.ok) +d = dist.grid(pos[,1],pos[2],labels) +groups = dist.center.group(d,20) +w = m.weight(termes.ok) +m = m.bivariate(w,groups) + +} +\references{ +Marcon, E., Puech, F., and Traissac, S. (2012). + Characterizing the relative spatial structure of point patterns. + International Journal of Ecology, 2012. +} +\seealso{ +\code{\link{dist.center.group}} + +\code{\link{m.weight}} +} diff --git a/man/m.univariate.Rd b/man/m.univariate.Rd new file mode 100644 index 0000000..11306a2 --- /dev/null +++ b/man/m.univariate.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mstat.R +\name{m.univariate} +\alias{m.univariate} +\title{Computes the univariate M statistics} +\usage{ +m.univariate(w, groups) +} +\arguments{ +\item{w}{the weigth matrix indicating the presence probability of each motu +in each samples. Each line corresponds to a sample and each column +to a MOTU. \code{rownames} of the \code{w} matrix must be the sample +names. It is nice but not mandatory if the \code{colnames} refer to the MOTU id.} + +\item{groups}{the list of considered groups as computed by the \code{\link{dist.center.group}} +function} +} +\description{ +Computes the univariate M statistics +} +\examples{ +data(termes) +termes.ok = termes[,colSums(termes$reads)>0] +pos = expand.grid(1:3 * 10,1:7 * 10) +labels = rownames(termes.ok) +d = dist.grid(pos[,1],pos[2],labels) +groups = dist.center.group(d,20) +w = m.weight(termes.ok) +m = m.univariate(w,groups) + +} +\references{ +Marcon, E., Puech, F., and Traissac, S. (2012). + Characterizing the relative spatial structure of point patterns. + International Journal of Ecology, 2012. +} +\seealso{ +\code{\link{dist.center.group}} + +\code{\link{m.weight}} +} diff --git a/man/m.univariate.test.Rd b/man/m.univariate.test.Rd new file mode 100644 index 0000000..26c6a67 --- /dev/null +++ b/man/m.univariate.test.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mstat.R +\name{m.univariate.test} +\alias{m.univariate.test} +\title{Test the significance of the M statistics by Monte-Carlo} +\usage{ +m.univariate.test(w, groups, resampling = 100, alternative = "two.sided") +} +\arguments{ +\item{w}{the weigth matrix indicating the presence probability of each motu +in each samples. Each line corresponds to a sample and each column +to a MOTU. \code{rownames} of the \code{w} matrix must be the sample +names.} + +\item{groups}{the list of considered groups as computed by the \code{\link{dist.center.group}} +function} + +\item{resampling}{the number of simulation to establish the null distribution} + +\item{alternative}{a character value in \code{c('two.sided','less','greater')} +- two.sided : the m stat is check against the two side of the empirical + M distribution +- less : test if the M stat is lesser than the M observed in the the empirical + M distribution (exlusion hypothesis) +- greater : test if the M stat is greater than the M observed in the the empirical + M distribution (aggregation hypothesis)} +} +\value{ +a vector of p.value with an attribute \code{m.stat} containing the actual M stat + for each MOTUs +} +\description{ +Computes computes the p.value the M statistics asociated to a MOTU +by shuffling MOTUs among location. +} +\examples{ + +data(termes) +termes.ok = termes[,colSums(termes$reads)>0] +pos = expand.grid(1:3 * 10,1:7 * 10) +labels = rownames(termes.ok) +d = dist.grid(pos[,1],pos[2],labels) +groups = dist.center.group(d,20) +w = m.weight(termes.ok) +pval = m.univariate.test(w,groups) + +} diff --git a/man/m.weight.Rd b/man/m.weight.Rd new file mode 100644 index 0000000..af8a56e --- /dev/null +++ b/man/m.weight.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mstat.R +\name{m.weight} +\alias{m.weight} +\title{Computes a weigth matrix from a \code{\linkS4class{metabarcoding.data}}} +\usage{ +m.weight(data) +} +\arguments{ +\item{data}{a \code{\linkS4class{metabarcoding.data}} instance} +} +\value{ +a weight matrix usable for M statistics +} +\description{ +The weight can be considered as a propability of presence of a MOTU in a +given sample. This function defines this probability as the fraction of +the maximal occurrence frequency over all samples. +} +\examples{ +data(termes) +termes.ok = termes[,colSums(termes$reads)>0] +w = m.weight(termes.ok) + +} diff --git a/man/map.extrapol.freq.Rd b/man/map.extrapol.freq.Rd new file mode 100644 index 0000000..1f30663 --- /dev/null +++ b/man/map.extrapol.freq.Rd @@ -0,0 +1,61 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/distrib.extrapol.R +\name{map.extrapol.freq} +\alias{map.extrapol.freq} +\title{Maps of krigged log10-transformed frequencies} +\usage{ +map.extrapol.freq(x, path = NULL, col.name = NULL, index, cutoff = 0.001, + add.points = NULL, adj = 4) +} +\arguments{ +\item{x}{an extrapol.freq output} + +\item{path}{the path of the folder to export the map. Default is \code{NULL} and map is printed in Rplot/quartz} + +\item{index}{an integer indicating column number of the motu/sequence to be plotted.} + +\item{cutoff}{lower motu frequency accepted to consider motu abundance as different +from 0. Should be the same than the one used in extrapol.freq} + +\item{add.points}{a 3-column data.frame containing factor levels and associated x and y coordinates +to be added to the map. Typically taxa observed in the field.} + +\item{adj}{a value used for adjusting text position in the map. Default is \code{4}} + +\item{col.names}{a vector containing the names of the columns to be used for defining the file name. Typically +the column names containing the taxonomic information and/or sequence/motus id.} +} +\value{ +a map/png file displaying motus distribution. +} +\description{ +Maps the output of extrapol.freq +} +\examples{ + +data(termes) +attr(termes, "samples")[c("x", "y")] = expand.grid(1:7,1:3) + +#compute frequencies +attr(termes, "layers")[["reads.freq"]] = normalize(termes, MARGIN=1)$reads + +# Getting extrapolations +termes.pred = extrapol.freq(attr(termes, "layers")[["reads.freq"]], +grid.grain=100,termes$samples[,c("x", "y")], termes$motus, cutoff=1e-3) + +#mapping the distribution of the 3 most abundant sequences (caution, mfrow does not work for lattice's levelplot) +map.extrapol.freq(termes.pred, path=NULL, col.name=NULL, 1, cutoff=1e-3) +map.extrapol.freq(termes.pred, path=NULL, col.name=NULL, 2, cutoff=1e-3) +map.extrapol.freq(termes.pred, path=NULL, col.name=NULL, 3, cutoff=1e-3) + +#dummy observationnal data +termes.obs = data.frame(x=c(2,3,5), y=c(2.7,2,2.6), taxa = rep("Isoptera Apicotermitinae", 3)) +map.extrapol.freq(termes.pred, path=NULL, col.name=NULL, 3, cutoff=1e-3, add.points=termes.obs) + +} +\seealso{ +\code{\link{extrapol.freq}}, and \code{levelplot} from \code{lattice} package +} +\author{ +Lucie Zinger +} diff --git a/man/marginalsum-methods.Rd b/man/marginalsum-methods.Rd new file mode 100644 index 0000000..d31fdd5 --- /dev/null +++ b/man/marginalsum-methods.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/metabarcoding_threshold.R +\docType{methods} +\name{marginalsum,metabarcoding.data-method} +\alias{marginalsum,metabarcoding.data-method} +\alias{marginalsum-methods,metabarcoding.data} +\title{Computes marginal sums over read counts.} +\usage{ +\S4method{marginalsum}{metabarcoding.data}(data, MARGIN = "sample", + na.rm = FALSE) +} +\arguments{ +\item{data}{The \code{\linkS4class{metabarcoding.data}} instance +on which marginal sums have to be computed.} + +\item{MARGIN}{Indicates if the sums have to be computed across +samples or motus. +Allowed values are : +\itemize{ + \item{'sample' or 1} for computing sum across samples + \item{'motu' or 2} for computing sum across motus + }} + +\item{na.rm}{Logical. Should missing values be omitted from the +calculations?} +} +\value{ +Returns the vector of marginal sums as a \code{numeric} vector +} +\description{ +Method \code{marginalsum} computes marginal sums over read counts of +a \code{\link{metabarcoding.data}} instance. +} +\examples{ +# load termite data set from the ROBITools sample data +data(termes) + +# Computes marginal sums per sample +ssum = marginalsum(termes,MARGIN="sample") + +# Computes marginal sums per MOTU +msum = marginalsum(termes,MARGIN="motu") + +} +\seealso{ +\code{\linkS4class{metabarcoding.data}} +} +\author{ +Aurelie Bonin +} diff --git a/man/metabarcoding-data-class.Rd b/man/metabarcoding-data-class.Rd new file mode 100644 index 0000000..c585c4d --- /dev/null +++ b/man/metabarcoding-data-class.Rd @@ -0,0 +1,68 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/02_class_metabarcoding.data.R +\docType{class} +\name{metabarcoding.data} +\alias{metabarcoding.data} +\title{DNA metabarcoding experiment description class} +\description{ +A S4 class describing a DNA metabarcoding experiment. It groups +three data frames describing samples, motus and occurrences of +MOTUs per sample +} +\section{Slots}{ + + \describe{ + \item{\code{reads}:}{Matrix of class \code{"numeric"}, + containing the counts of reads per samples + \itemize{ + \item{1 samples per line} + \item{1 sequence per column} + } + } + + \item{\code{samples}:}{Object of class \code{"data.frame"}, describing samples + \itemize{ + \item{1 samples per line} + \item{1 property per column} + } + } + + \item{\code{motus}:}{Object of class \code{"data.frame"}, describing MOTUs (sequences) + \itemize{ + \item{1 MOTU per line} + \item{1 property per column} + } + } + + \item{\code{layers}:}{Object of class \code{"list"}, containing a set of data layers + linking motus and samples. Each element of the list is a matrix + of the same size than the \code{read} slot with + \itemize{ + \item{1 samples per line} + \item{1 sequence per column} + } + } + + \item{\code{scount}:}{Object of class \code{"integer"}, containing the count of sample} + + \item{\code{mcount}:}{Object of class \code{"integer"}, containing the count of MOTUs} + + \item{\code{sample.margin}:}{Vector of class \code{"numeric"}, describing the total count of + sequence per sample. By default this slot is set by applying sum + to the reads data.frame lines} + + \item{\code{taxonomy}:}{Object of class \code{"taxonomy.obitools"}, linking the DNA metabarcoding + experiment to a taxonomy} + + \item{\code{taxid}:}{Vector of class \code{"character"}, list of MOTUs' attributes to manage as taxid} + } +} + +\seealso{ +\code{\link{taxonomy.obitools}}, +} +\author{ +Eric Coissac +} +\keyword{DNA} +\keyword{metabarcoding} diff --git a/man/motu-methods.Rd b/man/motu-methods.Rd new file mode 100644 index 0000000..96958b9 --- /dev/null +++ b/man/motu-methods.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/02_class_metabarcoding.data.R +\docType{methods} +\name{motus,metabarcoding.data-method} +\alias{motus,metabarcoding.data-method} +\alias{motu-methods,metabarcoding.data} +\title{Extracts the MOTU descriptions \code{data.frame}} +\usage{ +\S4method{motus}{metabarcoding.data}(obj) +} +\arguments{ +\item{obj}{a \code{\link{metabarcoding.data}} instance} +} +\value{ +a data.frame containing data about MOTU +} +\description{ +Extract the MOTUs description \code{data.frame} from a +\code{\link{metabarcoding.data}} instance. +} +\examples{ +# load termite data set from the ROBITools sample data +data(termes) + +# Extract the data.frame describing MOTUs +d = motus(termes) + +head(d) + +} +\seealso{ +\code{\link{metabarcoding.data}}, + \code{\link{reads}}, \code{\link{samples}} +} diff --git a/man/normalize-methods.Rd b/man/normalize-methods.Rd new file mode 100644 index 0000000..ecb3b18 --- /dev/null +++ b/man/normalize-methods.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/metabarcoding_threshold.R +\docType{methods} +\name{normalize,metabarcoding.data-method} +\alias{normalize,metabarcoding.data-method} +\alias{normalize-methods,metabarcoding.data} +\title{Normalizes read counts by sample or by MOTU.} +\usage{ +\S4method{normalize}{metabarcoding.data}(data, MARGIN = "sample", + as.matrix = FALSE) +} +\arguments{ +\item{data}{The \code{\linkS4class{metabarcoding.data}} instance +on normalisation have to be computed.} + +\item{MARGIN}{Indicates if the sums have to be computed across +samples or motus. +Allowed values are : +\itemize{ + \item{'sample' or 1} for computing sum across samples + \item{'motu' or 2} for computing sum across motus + }} + +\item{as.matrix}{Logical indicating if the normalized aboundancies +must be returned as a simple \code{matrix} (TRUE) or as a new +instance of the \code{\linkS4class{metabarcoding.data}} class +(FALSE, the default case).} +} +\value{ +Returns a new instance of \code{\linkS4class{metabarcoding.data}} + or a \code{numeric} matrix according to the \code{return.as.matrix} + parameter. +} +\description{ +Method \code{normalize} computes a normalized read aboundancy matrix +(relative frequency matrix) of a \code{\link{metabarcoding.data}} instance. +Normalization can be done according aboundancies per sample or per MOTU. +} +\examples{ +# load termite data set from the ROBITools sample data +data(termes) + +# Computes normalized aboundancies per sample +termes.norm = normalize(termes,MARGIN="sample") + +# Computes normalized aboundancies per sample and +# stores the result as a new layer into the thermes +# structure +termes$normalized = normalize(termes,MARGIN="sample",as.matrix=TRUE) + +} +\seealso{ +\code{\linkS4class{metabarcoding.data}} +} +\author{ +Aurelie Bonin +} diff --git a/man/plot.PCRplate.Rd b/man/plot.PCRplate.Rd new file mode 100644 index 0000000..76710b0 --- /dev/null +++ b/man/plot.PCRplate.Rd @@ -0,0 +1,65 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot.PCRplate.R +\name{plot.PCRplate} +\alias{plot.PCRplate} +\title{Plot PCR plates} +\usage{ +\method{plot}{PCRplate}(x, samples = NULL, col = "cyan2", different = T, + ...) +} +\arguments{ +\item{x}{a \code{\link{metabarcoding.data}} object} + +\item{samples}{a character vector containing names of problematic samples. Default is \code{NULL}} + +\item{different}{a boolean indicating whether different tags where used in forward and reverse to identify samples. Default is \code{TRUE}} + +\item{...}{arguments ot be passed to methods, such as graphical parameters} +} +\value{ +\code{\link{plot.PCRplate}} returns a plot displaying no more than 4 PCR plates, with problematic sample localization +} +\description{ +Plots samples localization in PCR plates, and points out problematic samples if provided. +} +\examples{ +\dontshow{# switch the working directory to the data package directory} +\dontshow{setwd(system.file("extdata", package="ROBITools"))} + +data(termes) + +# reading the termes_ngsfilt.txt file +termes.ngs=import.ngsfilter.data('termes_ngsfilt.txt', platewell="position") + +# including ngsfilter data into termes data +attr(termes, "samples") = termes.ngs[rownames(termes),] + +#plot PCR plate plan +col = rep("green", nrow(termes)) +col[grep("r", rownames(termes))] = "red" +plot.PCRplate(termes, col=col) + +#highlighting location of samples with low identification score + +#low quality taxonomic assignements identification +library(plotrix) +weighted.hist(termes$motus$best_identity, colSums(termes$reads), breaks = 20, ylab = "Nb reads", xlab = "Ecotag scores", xaxis=F) +axis(1, labels = T) +lowqual.seq = rownames(termes$motus)[termes$motus$best_identity < 0.7] + +#identification and localization (in PCR plate) of samples with high proportions of low quality taxonomic assignements +termes.freq= normalize(termes, MARGIN=1)$reads +hist(log10(rowSums(termes.freq[,lowqual.seq]) + 1e-05), breaks = 20, xlab = "Prop low quality reads") +lowqual.sample = rownames(termes)[log10(rowSums(termes.freq[, lowqual.seq]) + 1e-05) > -0.5] + +plot.PCRplate(termes, lowqual.sample, col=col) + +} +\seealso{ +\code{\link{import.metabarcoding.data}} +} +\author{ +Lucie Zinger +} +\keyword{DNA} +\keyword{metabarcoding} diff --git a/man/plot.seqinsample.Rd b/man/plot.seqinsample.Rd new file mode 100644 index 0000000..1c87f9c --- /dev/null +++ b/man/plot.seqinsample.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot.seqinsample.R +\name{plot.seqinsample} +\alias{plot.seqinsample} +\title{Plot sequence abundance in samples} +\usage{ +\method{plot}{seqinsample}(x, seqset, seqtype, controls = NULL) +} +\arguments{ +\item{x}{a \code{\link{metabarcoding.data}} object} + +\item{seqset}{a vetcor with sequences names} + +\item{seqtype}{a string indicating what type of sequences are displayed} + +\item{controls}{a vector indicating the negative controls names in the x object. +Default is \code{NULL}} +} +\value{ +returns a plot with the log10 transformed relative porportion of + selected MOTUs in each samples. If the number of samples is > 96, + then the plot is displayed in 4 panels +} +\description{ +Plots relative abundances of a set of sequences in all samples (log10 transformed) +} +\examples{ + +data(termes) + +seqset = rownames(termes$motus)[which(termes$motus$genus_name=="Anoplotermes")] +plot.seqinsample(termes, seqset, "Anoplotermes") + +controls = rownames(termes)[grep("r", rownames(termes))] +seqset = rownames(termes$motus)[which(termes$motus$best_identity<0.7)] +plot.seqinsample(termes, seqset, "Not assigned", controls) + +} +\seealso{ +\code{\linkS4class{taxonomy.obitools}}, and method \code{\link{taxonmicank}} +} +\author{ +Lucie Zinger +} +\keyword{metabarcoding} diff --git a/man/read-methods.Rd b/man/read-methods.Rd new file mode 100644 index 0000000..f3510bc --- /dev/null +++ b/man/read-methods.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/02_class_metabarcoding.data.R +\docType{methods} +\name{reads,metabarcoding.data-method} +\alias{reads,metabarcoding.data-method} +\alias{read-methods,metabarcoding.data} +\title{Extracts the matrix describing MOTUs abondances} +\usage{ +\S4method{reads}{metabarcoding.data}(obj) +} +\arguments{ +\item{obj}{a \code{\link{metabarcoding.data}} instance} +} +\value{ +a matrix containing data about reads +} +\description{ +Extract the the matrix describing MOTUs abondances (read counts) +from a \code{\link{metabarcoding.data}} instance. +} +\examples{ +# load termite data set from the ROBITools sample data +data(termes) + +# Extract the matrix describing MOTUs abondances +d = reads(termes) + +head(d) + +} +\seealso{ +\code{\link{metabarcoding.data}}, + \code{\link{motus}}, \code{\link{samples}} +} +\author{ +Eric Coissac +} diff --git a/man/read.ngsfilter.Rd b/man/read.ngsfilter.Rd new file mode 100644 index 0000000..57f9c1c --- /dev/null +++ b/man/read.ngsfilter.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read.ngsfilter.R +\name{read.ngsfilter} +\alias{read.ngsfilter} +\title{Read an OBITools ngsfilter file} +\usage{ +read.ngsfilter(filename, decimal = ".", as.is = !stringsAsFactors, + stringsAsFactors = default.stringsAsFactors()) +} +\description{ +Reads a ngsfilter file as formatted for the OBITools. For now, needs to be tab delimited till the "F" column. +Any additionnal information needs to be space delimited. +} +\seealso{ +\code{\link{import.metabarcoding.data}} +} +\author{ +Lucie Zinger +} +\keyword{data} +\keyword{import} diff --git a/man/read.obitab.Rd b/man/read.obitab.Rd new file mode 100644 index 0000000..77c77c8 --- /dev/null +++ b/man/read.obitab.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read.obitab.R +\name{read.obitab} +\alias{read.obitab} +\title{Reads a data file produced by the obitab command} +\usage{ +read.obitab(filename, sep = "\\t") +} +\arguments{ +\item{sep}{Column separator in the obitab file. +The default separator is the tabulation.} + +\item{file}{a string containing the file name of the obitab file.} +} +\value{ +a \code{data.frame} instance containing the obitab file +} +\description{ +Read a data file issued from the convertion of a fasta +file to a tabular file by the obitab command +} +\examples{ +require(ROBITools) + +\dontshow{# switch the working directory to the data package directory} +\dontshow{setwd(system.file("extdata", package="ROBITools"))} + +# read the termes.tab file +termes=read.obitab('termes.tab') + +# print the dimensions of the data.frame +dim(termes) + +} +\seealso{ +\code{\link{import.metabarcoding.data}} +} +\author{ +Eric Coissac +} diff --git a/man/rmS3Class.Rd b/man/rmS3Class.Rd new file mode 100644 index 0000000..c35c4cf --- /dev/null +++ b/man/rmS3Class.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/s3objects.R +\name{rmS3Class} +\alias{rmS3Class} +\title{Removes a class from the class hierarchie attribute.} +\usage{ +rmS3Class(object, classname) +} +\arguments{ +\item{object}{the object to modify} + +\item{classname}{the name of the class to remove} +} +\value{ +the object given as parametter. +} +\description{ +\code{rmS3Class} removes a class name from the vector +of class associated to the object. This the way to +remove the association between an object and a S3 class. +} +\note{ +for efficiency purpose no check is done on the input + parametters +} +\examples{ +x = c(1,3,2,5) +x = addS3Class(x,"my.vector") +class(x) +x = rmS3Class(x,"my.vector") +class(x) + +} +\seealso{ +\code{\link{addS3Class}} +} +\author{ +Eric Coissac +} +\keyword{function} +\keyword{system} diff --git a/man/samples-methods.Rd b/man/samples-methods.Rd new file mode 100644 index 0000000..d3783ed --- /dev/null +++ b/man/samples-methods.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/02_class_metabarcoding.data.R +\docType{methods} +\name{samples,metabarcoding.data-method} +\alias{samples,metabarcoding.data-method} +\alias{samples-methods,metabarcoding.data} +\title{Extracts the samples description data.frame} +\usage{ +\S4method{samples}{metabarcoding.data}(obj) +} +\arguments{ +\item{obj}{a \code{\link{metabarcoding.data}} instance} +} +\value{ +a data.frame containing data about sample +} +\description{ +Extract the sample description data.frame from a +\code{\link{metabarcoding.data}} instance. +} +\examples{ +# load termite data set from the ROBITools sample data +data(termes) + +# Extract the data frame describing samples +d = samples(termes) + +head(d) + +} +\seealso{ +\code{\link{metabarcoding.data}}, + \code{\link{motus}}, \code{\link{reads}} +} +\author{ +Eric Coissac +} diff --git a/man/summary.taxores.Rd b/man/summary.taxores.Rd new file mode 100644 index 0000000..9b5d4df --- /dev/null +++ b/man/summary.taxores.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/taxonomic.resolution.R +\name{summary.taxores} +\alias{summary.taxores} +\title{Dataset taxonomic resolution summary.} +\usage{ +\method{summary}{taxores}(x, colranks, colscores, thresh = 0.7) +} +\arguments{ +\item{x}{a \code{\link{metabarcoding.data}} object} + +\item{colranks}{a string indicating column name where ranks are stored in \code{x}} + +\item{colscores}{a string indicating column name where taxonomic identification scores are stored in \code{x}} + +\item{thresh}{a threshold for defining at which taxonomic identification scores a sequence can be considered as "not assigned". +Default is \code{0.7}} +} +\value{ +returns a data.frame and piecharts of the number/proportion of MOTUs/reads assigned to each taxonomic levels +} +\description{ +Summarizes the taxonomic relution of reads and MOTUs over the entire dataset +} +\examples{ + +data(termes) +taxo=default.taxonomy() + +termes.taxo.table = get.classic.taxonomy(termes, taxo, "taxid") +attr(termes, "motus") = data.frame(termes$motus, termes.taxo.table) +attr(termes, "motus")["count"] = colSums(termes$reads) + +summary.taxores(termes, "taxonomic_rank_ok","best_identity") + +} +\seealso{ +\code{\linkS4class{taxonomy.obitools}}, and method \code{\link{taxonmicank}} +} +\author{ +Lucie Zinger +} +\keyword{taxonomy} diff --git a/man/taxo.decider.Rd b/man/taxo.decider.Rd new file mode 100644 index 0000000..803cd2e --- /dev/null +++ b/man/taxo.decider.Rd @@ -0,0 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/choose.taxonomy.R +\name{taxo.decider} +\alias{taxo.decider} +\title{Choose between databases for taxonomic classifications} +\usage{ +taxo.decider(x, taxonomy, dbrank, thresh = 0.95) +} +\arguments{ +\item{x}{a \code{\link{metabarcoding.data}} object} + +\item{taxonomy}{a \code{\linkS4class{taxonomy.obitools}} instance} + +\item{dbrank}{string or vector indicating reference database names ranked by order of preference} + +\item{thresh}{a best_identity threshold for applying priority. Default is \code{0.95}} +} +\value{ +returns a data.frame with the refined taxonomic assignement and classic taxonomy description. +} +\description{ +Chooses a sequence taxonomic assignment in order of preference for the different +reference databases that have been used when the assignment is above a certain threshold +} +\examples{ + +data(termes) + +taxo=default.taxonomy() + +#create artificial taxonomic assignments +attr(termes, "motus")["best_identity:DB1"] = sample(seq(0.5,1,0.001),size=nrow(termes$motus), replace=T) +attr(termes, "motus")["best_identity:DB2"] = sample(seq(0.5,1,0.001),size=nrow(termes$motus), replace=T) +attr(termes, "motus")["best_identity:DB3"] = sample(seq(0.5,1,0.001),size=nrow(termes$motus), replace=T) +attr(termes, "motus")["taxid_by_db:DB1"] = termes$motus$taxid +attr(termes, "motus")["taxid_by_db:DB2"] = sample(termes$motus$taxid,size=nrow(termes$motus), replace=F) +attr(termes, "motus")["taxid_by_db:DB3"] = sample(termes$motus$taxid,size=nrow(termes$motus), replace=F) + +#Run taxo.decider +termes.ok = taxo.decider(termes, taxo, "DB2", 0.95) +head(termes.ok$motus[union(grep("DB", colnames(termes.ok$motus)), grep("_ok", colnames(termes.ok$motus)))]) + +termes.ok = taxo.decider(termes, taxo, c("DB3", "DB1"), 0.95) +head(termes.ok$motus[union(grep("DB", colnames(termes.ok$motus)), grep("_ok", colnames(termes.ok$motus)))]) + +#Quick look at the enhancement in taxonomic assignements +par(mfrow=c(1,4)) +for(i in grep("best_identity.", colnames(termes.ok$motus))){ +hist(termes.ok$motus[,i], breaks=20, ylim=c(1,21), main=colnames(termes.ok$motus)[i], xlab="assignment score") +} + +} +\seealso{ +\code{\linkS4class{taxonomy.obitools}}, and methods \code{\link{species}},\code{\link{genus}}, \code{\link{family}},\code{\link{kingdom}}, + \code{\link{superkingdom}},\code{\link{taxonatrank}}, \code{\link{taxonmicank}} +} +\author{ +Lucie Zinger +} +\keyword{taxonomy} diff --git a/man/threshold-mask-methods.Rd b/man/threshold-mask-methods.Rd new file mode 100644 index 0000000..50f643b --- /dev/null +++ b/man/threshold-mask-methods.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/metabarcoding_threshold.R +\docType{methods} +\name{threshold.mask,metabarcoding.data-method} +\alias{threshold.mask,metabarcoding.data-method} +\alias{threshold.mask-methods,metabarcoding.data} +\title{Computes a cumulatif thresold mask for filtering read aboundancies.} +\usage{ +\S4method{threshold.mask}{metabarcoding.data}(data, MARGIN, threshold = 0.97, + operator = "<") +} +\arguments{ +\item{data}{The \code{\linkS4class{metabarcoding.data}} instance +on normalisation have to be computed.} + +\item{MARGIN}{Indicates if the sums have to be computed across +samples or motus. +Allowed values are : +\itemize{ + \item{'sample' or 1} for computing sum across samples + \item{'motu' or 2} for computing sum across motus + }} + +\item{threshold}{a numeric value between 0 and 1 indicating which part of +the signal must be conserved. Default value is setup to +0.97 (97% of the total signal).} + +\item{operator}{is a logical comparison operator.} +} +\value{ +A logical matrix usable for selecting cell in the read aboundancy matrix. +} +\description{ +The method \code{threshold.mask} of the class \code{\linkS4class{metabarcoding.data}} +computes a logical matrix of the same size than the read matrix of the data parameter. +Each cell of this matrix contains a \code{TRUE} or a \code{FALSE} value according to the +relationship existing between the read abondancy and the corresponding theshold as computed +by the \code{\link{theshold}} method. +} +\details{ +(computed value) = (read aboundancy) operator (threshold value) + +for a cell in the result matrix, \code{(read aboundancy)} is extracted from the read layer. +\code{operator} is a comparaison operator and \code{(threshold value)} is estimated with the +\code{\link{theshold}} method. +} +\seealso{ +\code{\linkS4class{metabarcoding.data}}, \code{\link{threshold.mask}}, \code{\link{threshold}} +} +\author{ +Aurelie Bonin +} diff --git a/man/threshold-methods.Rd b/man/threshold-methods.Rd new file mode 100644 index 0000000..1ff9acb --- /dev/null +++ b/man/threshold-methods.Rd @@ -0,0 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/metabarcoding_threshold.R +\docType{methods} +\name{threshold,metabarcoding.data-method} +\alias{threshold,metabarcoding.data-method} +\alias{threshold-methods,metabarcoding.data} +\title{Compute the cumulative threshold of read aboundances.} +\usage{ +\S4method{threshold}{metabarcoding.data}(data, MARGIN = "sample", + threshold = 0.97) +} +\arguments{ +\item{data}{The \code{\linkS4class{metabarcoding.data}} instance +on normalisation have to be computed.} + +\item{MARGIN}{Indicates if the sums have to be computed across +samples or motus. +Allowed values are : +\itemize{ + \item{'sample' or 1} for computing sum across samples + \item{'motu' or 2} for computing sum across motus + }} + +\item{threshold}{a numeric value between 0 and 1 indicating which part of +the signal must be conserved. Default value is setup to +0.97 (97% of the total signal).} +} +\value{ +a numeric vector containing the limit aboundancy to consider for + each sample or each MOTU according to the value of the \code{MARGIN} + parameter. +} +\description{ +The method \code{threshold} of the class \code{\linkS4class{metabarcoding.data}} +computes the thresold to be used for conserving just a part of the global +signal. This thresold is computed by ranking aboundances by decreasing order. +The cululative sums of these ranked abondencies are computed and the aboundance +corresponding to the first sum greater than the threshold is returned as result. +} +\examples{ +# load termite data set from the ROBITools sample data +data(termes) + +# computes threshold value to used for keep 95\% of +# the reads per MOTU + +t = threshold(termes,MARGIN='motu',threshold=0.95) + +} +\seealso{ +\code{\linkS4class{metabarcoding.data}}, \code{\link{threshold.mask}} +} +\author{ +Aurelie Bonin +} diff --git a/src/.gitignore b/src/.gitignore new file mode 100644 index 0000000..656836b --- /dev/null +++ b/src/.gitignore @@ -0,0 +1,2 @@ +ROBITools.so +*.o